From e22f3a992f0aef3334f04ff27312180a11b91a30 Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Thu, 8 May 2025 21:31:28 +0500 Subject: [PATCH 1/6] Prevent object access hook from accesing not installed namespace (#2161) Currently we cannot install Age to shared_preload_libraries if pg_cron is installed. To prevent following error we must bail out early. postgres=# set backtrace_functions to 'get_namespace_oid'; SET postgres=# create extension pg_cron ; 2025-04-15 16:59:49.867 +05 [30402] ERROR: schema "ag_catalog" does not exist 2025-04-15 16:59:49.867 +05 [30402] BACKTRACE: 2 postgres 0x0000000102401ab0 get_namespace_oid + 204 3 age.so 0x0000000103285cd0 ag_catalog_namespace_id + 28 4 age.so 0x00000001032846fc ag_relation_id + 32 5 age.so 0x00000001032efe9c search_label_relation_cache_miss + 84 6 age.so 0x00000001032efe30 search_label_relation_cache + 100 7 age.so 0x00000001032842f4 object_access + 384 8 postgres 0x000000010240a7a0 RunObjectDropHook + 136 9 postgres 0x00000001023ee85c deleteOneObject + 108 10 postgres 0x00000001023eb860 deleteObjectsInList + 476 11 postgres 0x00000001023eba14 performMultipleDeletions + 316 12 postgres 0x0000000102560244 ATPostAlterTypeCleanup + 2144 13 postgres 0x0000000102559fb4 ATRewriteCatalogs + 516 14 postgres 0x00000001025543a8 ATController + 284 15 postgres 0x00000001025541bc AlterTable + 96 16 postgres 0x00000001028b8240 ProcessUtilitySlow + 1812 17 postgres 0x00000001028b600c standard_ProcessUtility + 3684 18 age.so 0x00000001032844f8 ag_ProcessUtility_hook + 200 19 postgres 0x00000001028b516c ProcessUtility + 392 20 postgres 0x000000010250e5b4 execute_sql_string + 812 21 postgres 0x000000010250d438 execute_extension_script + 2264 22 postgres 0x000000010250b330 ApplyExtensionUpdates + 1320 23 postgres 0x0000000102507954 CreateExtensionInternal + 1896 24 postgres 0x0000000102506ea4 CreateExtension + 1152 25 postgres 0x00000001028b8ed4 ProcessUtilitySlow + 5032 26 postgres 0x00000001028b600c standard_ProcessUtility + 3684 27 age.so 0x00000001032844f8 ag_ProcessUtility_hook + 200 28 postgres 0x00000001028b516c ProcessUtility + 392 29 postgres 0x00000001028b4768 PortalRunUtility + 232 30 postgres 0x00000001028b3660 PortalRunMulti + 756 31 postgres 0x00000001028b2abc PortalRun + 1008 32 postgres 0x00000001028ad870 exec_simple_query + 1436 33 postgres 0x00000001028ac990 PostgresMain + 2472 34 postgres 0x00000001027a49ac report_fork_failure_to_client + 0 35 postgres 0x00000001027a3e54 BackendStartup + 520 36 postgres 0x00000001027a29f0 ServerLoop + 812 37 postgres 0x000000010279fe0c PostmasterMain + 6484 38 postgres 0x000000010266acd0 startup_hacks + 0 39 dyld 0x000000018a3ab154 start + 2476 --- src/backend/catalog/ag_catalog.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/backend/catalog/ag_catalog.c b/src/backend/catalog/ag_catalog.c index 9c070fc6e..edb080a5a 100644 --- a/src/backend/catalog/ag_catalog.c +++ b/src/backend/catalog/ag_catalog.c @@ -20,6 +20,7 @@ #include "postgres.h" #include "catalog/dependency.h" +#include "catalog/namespace.h" #include "catalog/objectaccess.h" #include "catalog/pg_class_d.h" #include "catalog/pg_namespace_d.h" @@ -176,6 +177,13 @@ static void object_access(ObjectAccessType access, Oid class_id, Oid object_id, if (access != OAT_DROP) return; + /* + * Age might be installed into shared_preload_libraries before extension is + * created. In this case we must bail out from this hook. + */ + if (!OidIsValid(get_namespace_oid("ag_catalog", true))) + return; + drop_arg = arg; /* From cee9517428023f365511902b2a44fcbe1f64f0ce Mon Sep 17 00:00:00 2001 From: Moontasir Mahmood <53787290+Munmud@users.noreply.github.com> Date: Sun, 18 May 2025 15:56:39 +0600 Subject: [PATCH 2/6] Fix CSV import for edge with one property (#2175) - start_index marks where property fields begin. For edges, it's 4. If start_index >= total_fields, create empty properties; otherwise, parse the properties. --- src/backend/utils/load/age_load.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index 00ee56f5a..1658ac306 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -157,7 +157,7 @@ agtype* create_agtype_from_list_i(char **header, char **fields, agtype_in_state result; size_t i; - if (start_index + 1 == fields_len) + if (start_index >= fields_len) { return create_empty_agtype(); } From 35a131f116ba20371e5466559be696a0fd5e6038 Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Wed, 9 Jul 2025 20:17:51 +0500 Subject: [PATCH 3/6] Add support for operators in cypher query (#2172) - Fixed some operator signatures in .sql - Added support for PG operators in cypher. Some hardcoded operators are removed, since they are now covered by the general operator handling. - Added full typecast syntax that allows for type modifiers. - These changes also improve interoperability with other extensions, as reflected in the regression tests. - Added a new function to check if graph_oid exists. --- age--1.5.0--y.y.y.sql | 118 ++++++- regress/expected/cypher_match.out | 6 +- regress/expected/cypher_vle.out | 6 +- regress/expected/pgvector.out | 508 +++++++++++++++++++++++++--- regress/expected/scan.out | 8 +- regress/sql/pgvector.sql | 252 ++++++++++++-- regress/sql/scan.sql | 2 +- sql/agtype_coercions.sql | 2 +- sql/agtype_exists.sql | 26 +- sql/agtype_operators.sql | 16 +- sql/agtype_string.sql | 6 + src/backend/catalog/ag_graph.c | 13 + src/backend/catalog/ag_label.c | 15 + src/backend/nodes/cypher_outfuncs.c | 2 +- src/backend/parser/ag_scanner.l | 217 ++++++++---- src/backend/parser/cypher_analyze.c | 9 +- src/backend/parser/cypher_expr.c | 251 +++++++------- src/backend/parser/cypher_gram.y | 168 ++++++--- src/backend/parser/cypher_parser.c | 19 +- src/backend/utils/adt/agtype.c | 12 +- src/include/catalog/ag_graph.h | 1 + src/include/nodes/cypher_nodes.h | 2 +- src/include/parser/ag_scanner.h | 10 +- src/include/parser/cypher_kwlist.h | 1 + 24 files changed, 1297 insertions(+), 373 deletions(-) diff --git a/age--1.5.0--y.y.y.sql b/age--1.5.0--y.y.y.sql index 6b7560aa2..d04a6a24b 100644 --- a/age--1.5.0--y.y.y.sql +++ b/age--1.5.0--y.y.y.sql @@ -41,8 +41,8 @@ CREATE OPERATOR @>> ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contains_top_level, COMMUTATOR = '<<@', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_contained_by_top_level(agtype, agtype) @@ -58,17 +58,114 @@ CREATE OPERATOR <<@ ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contained_by_top_level, COMMUTATOR = '@>>', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); +/* + * We have to drop and recreate the operators, because + * commutator is not modifiable using ALTER OPERATOR. + */ +ALTER EXTENSION age + DROP OPERATOR ? (agtype, agtype); +ALTER EXTENSION age + DROP OPERATOR ? (agtype, text); +ALTER EXTENSION age + DROP OPERATOR ?| (agtype, agtype); +ALTER EXTENSION age + DROP OPERATOR ?| (agtype, text[]); +ALTER EXTENSION age + DROP OPERATOR ?& (agtype, agtype[]); +ALTER EXTENSION age + DROP OPERATOR ?& (agtype, text); + +DROP OPERATOR ? (agtype, agtype), ? (agtype, text), + ?| (agtype, agtype), ?| (agtype, text[]), + ?& (agtype, agtype[]), ?& (agtype, text); + +CREATE OPERATOR ? ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.agtype_exists_agtype, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ? ( + LEFTARG = agtype, + RIGHTARG = text, + FUNCTION = ag_catalog.agtype_exists, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ?| ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.agtype_exists_any_agtype, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ?| ( + LEFTARG = agtype, + RIGHTARG = text[], + FUNCTION = ag_catalog.agtype_exists_any, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ?& ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.agtype_exists_all_agtype, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ?& ( + LEFTARG = agtype, + RIGHTARG = text[], + FUNCTION = ag_catalog.agtype_exists_all, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +ALTER EXTENSION age + ADD OPERATOR ? (agtype, agtype); +ALTER EXTENSION age + ADD OPERATOR ? (agtype, text); +ALTER EXTENSION age + ADD OPERATOR ?| (agtype, agtype); +ALTER EXTENSION age + ADD OPERATOR ?| (agtype, text[]); +ALTER EXTENSION age + ADD OPERATOR ?& (agtype, agtype[]); +ALTER EXTENSION age + ADD OPERATOR ?& (agtype, text); + +ALTER OPERATOR @> (agtype, agtype) + SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); + +ALTER OPERATOR @> (agtype, agtype) + SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); + +ALTER OPERATOR <@ (agtype, agtype) + SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); + +ALTER OPERATOR <@ (agtype, agtype) + SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); + /* * Since there is no option to add or drop operator from class, * we have to drop and recreate the whole operator class. * Reference: https://www.postgresql.org/docs/current/sql-alteropclass.html */ -DROP OPERATOR CLASS ag_catalog.gin_agtype_ops; +ALTER EXTENSION age + DROP OPERATOR CLASS ag_catalog.gin_agtype_ops USING gin; + +DROP OPERATOR CLASS ag_catalog.gin_agtype_ops USING gin; CREATE OPERATOR CLASS ag_catalog.gin_agtype_ops DEFAULT FOR TYPE agtype USING gin AS @@ -89,6 +186,9 @@ DEFAULT FOR TYPE agtype USING gin AS internal, internal, internal), STORAGE text; +ALTER EXTENSION age + ADD OPERATOR CLASS ag_catalog.gin_agtype_ops USING gin; + -- this function went from variadic "any" to just "any" type CREATE OR REPLACE FUNCTION ag_catalog.age_tostring("any") RETURNS agtype @@ -148,4 +248,10 @@ PARALLEL SAFE AS 'MODULE_PATHNAME'; CREATE CAST (agtype[] AS agtype) - WITH FUNCTION ag_catalog.agtype_array_to_agtype(agtype[]); \ No newline at end of file + WITH FUNCTION ag_catalog.agtype_array_to_agtype(agtype[]); + +CREATE OPERATOR =~ ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.age_eq_tilde +); diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index e25584788..e83ba3b93 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -2407,22 +2407,22 @@ SELECT * FROM cypher('cypher_match', $$ MATCH (a {name:a.name}) MATCH (a {age:a. SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship}]->(b) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship, years: u.years}]->(b) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ MATCH p=(a {name:a.name})-[u {relationship: u.relationship}]->(b {age:b.age}) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ CREATE () WITH * MATCH (x{n0:x.n1}) RETURN 0 $$) as (a agtype); diff --git a/regress/expected/cypher_vle.out b/regress/expected/cypher_vle.out index b3cada60a..9cbb3420c 100644 --- a/regress/expected/cypher_vle.out +++ b/regress/expected/cypher_vle.out @@ -726,8 +726,8 @@ SELECT prepend_node('list01', 'b'); SELECT * FROM show_list_use_vle('list01'); node ----------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex {"id": 1407374883553282, "label": "node", "properties": {"content": "b"}}::vertex + {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex (2 rows) -- prepend a node 'c' @@ -741,9 +741,9 @@ SELECT prepend_node('list01', 'c'); SELECT * FROM show_list_use_vle('list01'); node ----------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex - {"id": 1407374883553282, "label": "node", "properties": {"content": "b"}}::vertex {"id": 1407374883553283, "label": "node", "properties": {"content": "c"}}::vertex + {"id": 1407374883553282, "label": "node", "properties": {"content": "b"}}::vertex + {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex (3 rows) DROP FUNCTION show_list_use_vle; diff --git a/regress/expected/pgvector.out b/regress/expected/pgvector.out index f1bd53ed4..bbc558349 100644 --- a/regress/expected/pgvector.out +++ b/regress/expected/pgvector.out @@ -61,6 +61,58 @@ SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n spa {1:1.22,2:2.22,3:3.33}/3 (1 row) +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector); + n +------------------ + [1.22,2.22,3.33] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec); + n +--------------------------------- + [1.2197266,2.2207031,3.3300781] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec); + n +-------------------------- + {1:1.22,2:2.22,3:3.33}/3 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(3)); + n +------------------ + [1.22,2.22,3.33] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(3)); + n +--------------------------------- + [1.2197266,2.2207031,3.3300781] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(3)); + n +-------------------------- + {1:1.22,2:2.22,3:3.33}/3 +(1 row) + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(2)); +ERROR: expected 2 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(2)); +ERROR: expected 2 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(2)); +ERROR: expected 2 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n vector(4)); +ERROR: expected 4 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n halfvec(4)); +ERROR: expected 4 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n sparsevec(4)); +ERROR: expected 4 dimensions, not 3 +-- +-- Test functions +-- SELECT * FROM cypher('graph', $$ RETURN l2_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); n ----- @@ -121,32 +173,186 @@ SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4)::text $ [2, 3, 4, 5] (1 row) -SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit); +SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit(3)); n ----- 111 (1 row) +-- +-- Test operators +-- +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector + [1,2,4]::vector $$) AS (n vector); + n +--------- + [2,4,7] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector - [1,2,4]::vector $$) AS (n vector); + n +---------- + [0,0,-1] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector * [1,2,4]::vector $$) AS (n vector); + n +---------- + [1,4,12] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector || [1,2,4]::vector $$) AS (n vector); + n +--------------- + [1,2,3,1,2,4] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <#> [1,2,4]::vector $$) AS (n agtype); + n +------- + -17.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <=> [1,2,4]::vector $$) AS (n agtype); + n +--------------------- + 0.00853986601633272 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <+> [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +-- +-- Due to issues with pattern matching syntax, '-' is not allowed +-- as an operator character, so we have to use the OPERATOR syntax. +-- +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +-- Using OPERATOR () syntax +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (+) [1,2,4]::vector $$) AS (n vector); + n +--------- + [2,4,7] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (-) [1,2,4]::vector $$) AS (n vector); + n +---------- + [0,0,-1] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (*) [1,2,4]::vector $$) AS (n vector); + n +---------- + [1,4,12] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (||) [1,2,4]::vector $$) AS (n vector); + n +--------------- + [1,2,3,1,2,4] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<#>) [1,2,4]::vector $$) AS (n agtype); + n +------- + -17.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<=>) [1,2,4]::vector $$) AS (n agtype); + n +--------------------- + 0.00853986601633272 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<+>) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.+) [1,2,4]::vector $$) AS (n vector); + n +--------- + [2,4,7] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.-) [1,2,4]::vector $$) AS (n vector); + n +---------- + [0,0,-1] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.*) [1,2,4]::vector $$) AS (n vector); + n +---------- + [1,4,12] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.||) [1,2,4]::vector $$) AS (n vector); + n +--------------- + [1,2,3,1,2,4] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.`<->`) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<#>) [1,2,4]::vector $$) AS (n agtype); + n +------- + -17.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<=>) [1,2,4]::vector $$) AS (n agtype); + n +--------------------- + 0.00853986601633272 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<+>) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +-- -- An example usage +-- SELECT * FROM cypher('graph', $$ - CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: "[-0.07594558, 0.04081754, 0.29592122, -0.11921061]"}), - (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: "[0.30228977, -0.22839354, 0.35070436, 0.01262819]"}), - (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: "[ 0.12240622, -0.29752459, 0.22620453, 0.24454723]"}), - (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: "[ 0.34717246, -0.13820869, 0.29214213, 0.08090488]"}), - (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: "[ 0.03923657, 0.39284106, -0.20927092, -0.17770818]"}), - (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: "[-0.29302418, -0.39615033, -0.23393948, -0.09601383]"}), - (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: "[-0.13663386, 0.00635589, -0.03038832, -0.08252723]"}), - (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: "[ 0.27215557, -0.1479577, -0.09972772, -0.08234394]"}), - (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: "[ 0.21560573, -0.07505179, -0.01331814, 0.13403069]"}), - (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: "[ 0.24008012, 0.44954908, -0.30905488, 0.15195407]"}), - (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: "[-0.17471036, 0.14695261, -0.06272433, -0.21795064]"}), - (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: "[-0.33280967, 0.07733926, 0.11015328, 0.53382836]"}), - (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: "[-0.21629286, 0.31114665, 0.08303899, 0.46199759]"}), - (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: "[-0.11528205, -0.0208782, -0.0735215, 0.14327449]"}), - (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: "[ 0.33666933, 0.18040994, -0.01075103, -0.11117851]"}), - (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: "[ 0.34698868, 0.06439331, 0.06232323, -0.19534876]"}), - (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: "[ 0.01794725, -0.11434246, -0.46831815, -0.01049593]"}), - (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: "[ 0.00546514, -0.37005171, -0.42612838, 0.07968612]"}) + CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: [-0.07594558, 0.04081754, 0.29592122, -0.11921061]}), + (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: [0.30228977, -0.22839354, 0.35070436, 0.01262819]}), + (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: [ 0.12240622, -0.29752459, 0.22620453, 0.24454723]}), + (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: [ 0.34717246, -0.13820869, 0.29214213, 0.08090488]}), + (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: [ 0.03923657, 0.39284106, -0.20927092, -0.17770818]}), + (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: [-0.29302418, -0.39615033, -0.23393948, -0.09601383]}), + (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: [-0.13663386, 0.00635589, -0.03038832, -0.08252723]}), + (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: [ 0.27215557, -0.1479577, -0.09972772, -0.08234394]}), + (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: [ 0.21560573, -0.07505179, -0.01331814, 0.13403069]}), + (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: [ 0.24008012, 0.44954908, -0.30905488, 0.15195407]}), + (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: [-0.17471036, 0.14695261, -0.06272433, -0.21795064]}), + (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: [-0.33280967, 0.07733926, 0.11015328, 0.53382836]}), + (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: [-0.21629286, 0.31114665, 0.08303899, 0.46199759]}), + (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: [-0.11528205, -0.0208782, -0.0735215, 0.14327449]}), + (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: [ 0.33666933, 0.18040994, -0.01075103, -0.11117851]}), + (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: [ 0.34698868, 0.06439331, 0.06232323, -0.19534876]}), + (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: [ 0.01794725, -0.11434246, -0.46831815, -0.01049593]}), + (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: [ 0.00546514, -0.37005171, -0.42612838, 0.07968612]}) $$) AS (result agtype); result -------- @@ -201,7 +407,20 @@ SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, vector_dims(m.e -- Get top 4 most similar movies to The Terminator using cosine distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) - RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) + ASC LIMIT 4 +$$) AS (title agtype); + title +------------------------------ + "The Terminator" + "Terminator 2: Judgment Day" + "Minority Report" + "Blade Runner" +(4 rows) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector + ASC LIMIT 4 $$) AS (title agtype); title ------------------------------ @@ -213,7 +432,20 @@ $$) AS (title agtype); -- Get top 4 most similar movies to The Matrix using cosine distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) - RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector + ASC LIMIT 4 $$) AS (title agtype); title ---------------------------- @@ -224,27 +456,27 @@ $$) AS (title agtype); (4 rows) -- l2 norm of the embedding -SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=(l2_normalize(m.embedding))::text return m.title, m.embedding $$) AS (title agtype, embedding agtype); - title | embedding -------------------------------+---------------------------------------------------- - "The Matrix" | "[-0.22980669,0.12351139,0.89543957,-0.36072403]" - "The Matrix Reloaded" | "[0.58534974,-0.44225806,0.6790991,0.024453051]" - "The Matrix Revolutions" | "[0.26431033,-0.6424414,0.4884408,0.528048]" - "The Matrix Resurrections" | "[0.72151977,-0.28723562,0.60715157,0.16814256]" - "Inception" | "[0.08159459,0.81693435,-0.43519026,-0.3695538]" - "Interstellar" | "[-0.5290723,-0.71527255,-0.4223914,-0.17335857]" - "Avatar" | "[-0.84023285,0.039085682,-0.18687363,-0.507503]" - "Blade Runner" | "[0.81074023,-0.44075987,-0.29708475,-0.2452992]" - "Blade Runner 2049" | "[0.8134027,-0.28314334,-0.05024454,0.50564945]" - "Minority Report" | "[0.39031598,0.7308651,-0.5024533,0.24704295]" - "Total Recall" | "[-0.54291505,0.4566574,-0.19491677,-0.67728484]" - "Elysium" | "[-0.517338,0.12022049,0.17122844,0.82981277]" - "Gattaca" | "[-0.35853538,0.51576865,0.13764863,0.765825]" - "The Fifth Element" | "[-0.5788842,-0.10483904,-0.36918527,0.7194471]" - "The Terminator" | "[0.84599304,0.45333964,-0.02701552,-0.27937278]" - "Terminator 2: Judgment Day" | "[0.8501332,0.15776564,0.15269388,-0.4786106]" - "Jurassic Park" | "[0.037194606,-0.23696794,-0.9705615,-0.02175219]" - "The Avengers" | "[0.009587915,-0.6492101,-0.7475897,0.13979948]" +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=l2_normalize(m.embedding)::agtype return m.title, m.embedding $$) AS (title agtype, embedding agtype); + title | embedding +------------------------------+----------------------------------------------------- + "The Matrix" | [-0.22980669, 0.12351139, 0.89543957, -0.36072403] + "The Matrix Reloaded" | [0.58534974, -0.44225806, 0.6790991, 0.024453051] + "The Matrix Revolutions" | [0.26431033, -0.6424414, 0.4884408, 0.528048] + "The Matrix Resurrections" | [0.72151977, -0.28723562, 0.60715157, 0.16814256] + "Inception" | [0.08159459, 0.81693435, -0.43519026, -0.3695538] + "Interstellar" | [-0.5290723, -0.71527255, -0.4223914, -0.17335857] + "Avatar" | [-0.84023285, 0.039085682, -0.18687363, -0.507503] + "Blade Runner" | [0.81074023, -0.44075987, -0.29708475, -0.2452992] + "Blade Runner 2049" | [0.8134027, -0.28314334, -0.05024454, 0.50564945] + "Minority Report" | [0.39031598, 0.7308651, -0.5024533, 0.24704295] + "Total Recall" | [-0.54291505, 0.4566574, -0.19491677, -0.67728484] + "Elysium" | [-0.517338, 0.12022049, 0.17122844, 0.82981277] + "Gattaca" | [-0.35853538, 0.51576865, 0.13764863, 0.765825] + "The Fifth Element" | [-0.5788842, -0.10483904, -0.36918527, 0.7194471] + "The Terminator" | [0.84599304, 0.45333964, -0.02701552, -0.27937278] + "Terminator 2: Judgment Day" | [0.8501332, 0.15776564, 0.15269388, -0.4786106] + "Jurassic Park" | [0.037194606, -0.23696794, -0.9705615, -0.02175219] + "The Avengers" | [0.009587915, -0.6492101, -0.7475897, 0.13979948] (18 rows) -- Get top 4 most similar movies to The Terminator using l2 distance @@ -259,6 +491,18 @@ $$) AS (title agtype); "Blade Runner" (4 rows) +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + title +------------------------------ + "The Terminator" + "Terminator 2: Judgment Day" + "Minority Report" + "Blade Runner" +(4 rows) + -- Get top 4 most similar movies to The Matrix using l2 distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 @@ -271,6 +515,186 @@ $$) AS (title agtype); "Total Recall" (4 rows) +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +-- +-- Test vector index +-- +-- This function will be used to check if index scan +-- is used successfully. We cannot simply have EXPLAIN +-- in the upcoming queries because it produces some +-- hardcoded oids in sort node, which may change in +-- future and break the tests. +CREATE OR REPLACE FUNCTION plan_has_index_scan(sql text) +RETURNS boolean +LANGUAGE plpgsql AS +$$ +DECLARE + plan_lines text[]; + plan_text text; +BEGIN + EXECUTE format('EXPLAIN (FORMAT JSON, COSTS OFF) %s', sql) INTO plan_text; + + -- Return true if 'Index Scan' appears anywhere + RETURN position('"Index Scan"' in plan_text) > 0; +END; +$$; +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector(4) <=> search.embedding::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +-- The index expression below matches the expression +-- seen in the EXPLAIN plan of above query +DO $$ +DECLARE + graph_oid oid; +BEGIN + SELECT graphid INTO graph_oid + FROM ag_catalog.ag_graph + WHERE name = 'graph'; + + EXECUTE format($f$ + CREATE INDEX movie_vector_idx ON graph."Movie" + USING hnsw ((( + agtype_access_operator( + VARIADIC ARRAY[ + _agtype_build_vertex(id, _label_name(%L::oid, id), properties), + '"embedding"'::agtype + ] + )::text + )::vector(4)) vector_cosine_ops); + $f$, graph_oid); +END; +$$; +-- Disable seqscan just to test the index +SET enable_seqscan = off; +SELECT plan_has_index_scan($f$ + SELECT * FROM cypher('graph', $$ + MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 + $$) AS (title agtype); +$f$); + plan_has_index_scan +--------------------- + t +(1 row) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +DROP INDEX graph.movie_vector_idx; +SET enable_seqscan = on; +-- Test a direct implicit cast +CREATE CAST (agtype AS vector) + WITH INOUT AS implicit; +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding <=> search.embedding + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding OPERATOR (`<->`) search.embedding + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +DO $$ +DECLARE + graph_oid oid; +BEGIN + SELECT graphid INTO graph_oid + FROM ag_catalog.ag_graph + WHERE name = 'graph'; + + EXECUTE format($f$ + CREATE INDEX movie_vector_idx ON graph."Movie" + USING hnsw (( + agtype_access_operator( + VARIADIC ARRAY[ + _agtype_build_vertex(id, _label_name(%L::oid, id), properties), + '"embedding"'::agtype + ] + )::vector(4)) vector_cosine_ops); + $f$, graph_oid); +END; +$$; +-- Disable seqscan just to test the index +SET enable_seqscan = off; +SELECT plan_has_index_scan($f$ + SELECT * FROM cypher('graph', $$ + MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 + $$) AS (title agtype); +$f$); + plan_has_index_scan +--------------------- + t +(1 row) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +SET enable_seqscan = on; +-- +-- Clean up +-- +DROP FUNCTION plan_has_index_scan(text); +DROP CAST (agtype AS vector); SELECT drop_graph('graph', true); NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to table graph._ag_label_vertex diff --git a/regress/expected/scan.out b/regress/expected/scan.out index d96d80049..d8105a053 100644 --- a/regress/expected/scan.out +++ b/regress/expected/scan.out @@ -52,7 +52,7 @@ RETURN 0 " LINE 2: /* unterminated /* comment ^ --- recover syntax highlighting */ +-- recover syntax highlighting */ */ -- -- single-line comment -- @@ -208,9 +208,9 @@ $$) AS t(a agtype, b agtype); SELECT * FROM cypher('scan', $$ RETURN 0xF~ $$) AS t(a int); -ERROR: unexpected character at or near "~" -LINE 2: RETURN 0xF~ - ^ +ERROR: syntax error at end of input +LINE 3: $$) AS t(a int); + ^ -- an invalid character after the leading "0x" SELECT * FROM cypher('scan', $$ RETURN 0x~ diff --git a/regress/sql/pgvector.sql b/regress/sql/pgvector.sql index 816d6eb9f..677e78586 100644 --- a/regress/sql/pgvector.sql +++ b/regress/sql/pgvector.sql @@ -39,6 +39,26 @@ SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n vec SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n halfvec); SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n sparsevec); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec); + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(3)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(3)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(3)); + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(2)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(2)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(2)); + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n vector(4)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n halfvec(4)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n sparsevec(4)); + +-- +-- Test functions +-- SELECT * FROM cypher('graph', $$ RETURN l2_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); SELECT * FROM cypher('graph', $$ RETURN inner_product("[1,2,3]", "[1,2,4]") $$) AS (n agtype); SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); @@ -49,28 +69,65 @@ SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]") $$) AS (n vector SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]")::text $$) AS (n agtype); SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4) $$) AS (n vector); SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4)::text $$) AS (n agtype); -SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit); +SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit(3)); + +-- +-- Test operators +-- +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector + [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector - [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector * [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector || [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <#> [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <=> [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <+> [1,2,4]::vector $$) AS (n agtype); +-- +-- Due to issues with pattern matching syntax, '-' is not allowed +-- as an operator character, so we have to use the OPERATOR syntax. +-- +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype); +-- Using OPERATOR () syntax +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (+) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (-) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (*) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (||) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<#>) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<=>) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<+>) [1,2,4]::vector $$) AS (n agtype); + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.+) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.-) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.*) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.||) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.`<->`) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<#>) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<=>) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<+>) [1,2,4]::vector $$) AS (n agtype); + +-- -- An example usage +-- SELECT * FROM cypher('graph', $$ - CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: "[-0.07594558, 0.04081754, 0.29592122, -0.11921061]"}), - (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: "[0.30228977, -0.22839354, 0.35070436, 0.01262819]"}), - (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: "[ 0.12240622, -0.29752459, 0.22620453, 0.24454723]"}), - (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: "[ 0.34717246, -0.13820869, 0.29214213, 0.08090488]"}), - (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: "[ 0.03923657, 0.39284106, -0.20927092, -0.17770818]"}), - (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: "[-0.29302418, -0.39615033, -0.23393948, -0.09601383]"}), - (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: "[-0.13663386, 0.00635589, -0.03038832, -0.08252723]"}), - (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: "[ 0.27215557, -0.1479577, -0.09972772, -0.08234394]"}), - (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: "[ 0.21560573, -0.07505179, -0.01331814, 0.13403069]"}), - (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: "[ 0.24008012, 0.44954908, -0.30905488, 0.15195407]"}), - (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: "[-0.17471036, 0.14695261, -0.06272433, -0.21795064]"}), - (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: "[-0.33280967, 0.07733926, 0.11015328, 0.53382836]"}), - (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: "[-0.21629286, 0.31114665, 0.08303899, 0.46199759]"}), - (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: "[-0.11528205, -0.0208782, -0.0735215, 0.14327449]"}), - (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: "[ 0.33666933, 0.18040994, -0.01075103, -0.11117851]"}), - (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: "[ 0.34698868, 0.06439331, 0.06232323, -0.19534876]"}), - (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: "[ 0.01794725, -0.11434246, -0.46831815, -0.01049593]"}), - (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: "[ 0.00546514, -0.37005171, -0.42612838, 0.07968612]"}) + CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: [-0.07594558, 0.04081754, 0.29592122, -0.11921061]}), + (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: [0.30228977, -0.22839354, 0.35070436, 0.01262819]}), + (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: [ 0.12240622, -0.29752459, 0.22620453, 0.24454723]}), + (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: [ 0.34717246, -0.13820869, 0.29214213, 0.08090488]}), + (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: [ 0.03923657, 0.39284106, -0.20927092, -0.17770818]}), + (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: [-0.29302418, -0.39615033, -0.23393948, -0.09601383]}), + (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: [-0.13663386, 0.00635589, -0.03038832, -0.08252723]}), + (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: [ 0.27215557, -0.1479577, -0.09972772, -0.08234394]}), + (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: [ 0.21560573, -0.07505179, -0.01331814, 0.13403069]}), + (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: [ 0.24008012, 0.44954908, -0.30905488, 0.15195407]}), + (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: [-0.17471036, 0.14695261, -0.06272433, -0.21795064]}), + (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: [-0.33280967, 0.07733926, 0.11015328, 0.53382836]}), + (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: [-0.21629286, 0.31114665, 0.08303899, 0.46199759]}), + (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: [-0.11528205, -0.0208782, -0.0735215, 0.14327449]}), + (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: [ 0.33666933, 0.18040994, -0.01075103, -0.11117851]}), + (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: [ 0.34698868, 0.06439331, 0.06232323, -0.19534876]}), + (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: [ 0.01794725, -0.11434246, -0.46831815, -0.01049593]}), + (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: [ 0.00546514, -0.37005171, -0.42612838, 0.07968612]}) $$) AS (result agtype); SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, (m.embedding)::vector $$) AS (title agtype, embedding vector); @@ -79,23 +136,174 @@ SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, vector_dims(m.e -- Get top 4 most similar movies to The Terminator using cosine distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) - RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) + ASC LIMIT 4 $$) AS (title agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + -- Get top 4 most similar movies to The Matrix using cosine distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) - RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) + ASC LIMIT 4 $$) AS (title agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + -- l2 norm of the embedding -SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=(l2_normalize(m.embedding))::text return m.title, m.embedding $$) AS (title agtype, embedding agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=l2_normalize(m.embedding)::agtype return m.title, m.embedding $$) AS (title agtype, embedding agtype); -- Get top 4 most similar movies to The Terminator using l2 distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 $$) AS (title agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + -- Get top 4 most similar movies to The Matrix using l2 distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 $$) AS (title agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + +-- +-- Test vector index +-- + +-- This function will be used to check if index scan +-- is used successfully. We cannot simply have EXPLAIN +-- in the upcoming queries because it produces some +-- hardcoded oids in sort node, which may change in +-- future and break the tests. +CREATE OR REPLACE FUNCTION plan_has_index_scan(sql text) +RETURNS boolean +LANGUAGE plpgsql AS +$$ +DECLARE + plan_lines text[]; + plan_text text; +BEGIN + EXECUTE format('EXPLAIN (FORMAT JSON, COSTS OFF) %s', sql) INTO plan_text; + + -- Return true if 'Index Scan' appears anywhere + RETURN position('"Index Scan"' in plan_text) > 0; +END; +$$; + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector(4) <=> search.embedding::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + +-- The index expression below matches the expression +-- seen in the EXPLAIN plan of above query +DO $$ +DECLARE + graph_oid oid; +BEGIN + SELECT graphid INTO graph_oid + FROM ag_catalog.ag_graph + WHERE name = 'graph'; + + EXECUTE format($f$ + CREATE INDEX movie_vector_idx ON graph."Movie" + USING hnsw ((( + agtype_access_operator( + VARIADIC ARRAY[ + _agtype_build_vertex(id, _label_name(%L::oid, id), properties), + '"embedding"'::agtype + ] + )::text + )::vector(4)) vector_cosine_ops); + $f$, graph_oid); +END; +$$; + +-- Disable seqscan just to test the index +SET enable_seqscan = off; +SELECT plan_has_index_scan($f$ + SELECT * FROM cypher('graph', $$ + MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 + $$) AS (title agtype); +$f$); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + +DROP INDEX graph.movie_vector_idx; +SET enable_seqscan = on; + +-- Test a direct implicit cast +CREATE CAST (agtype AS vector) + WITH INOUT AS implicit; + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding <=> search.embedding + ASC LIMIT 4 +$$) AS (title agtype); + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding OPERATOR (`<->`) search.embedding + ASC LIMIT 4 +$$) AS (title agtype); + +DO $$ +DECLARE + graph_oid oid; +BEGIN + SELECT graphid INTO graph_oid + FROM ag_catalog.ag_graph + WHERE name = 'graph'; + + EXECUTE format($f$ + CREATE INDEX movie_vector_idx ON graph."Movie" + USING hnsw (( + agtype_access_operator( + VARIADIC ARRAY[ + _agtype_build_vertex(id, _label_name(%L::oid, id), properties), + '"embedding"'::agtype + ] + )::vector(4)) vector_cosine_ops); + $f$, graph_oid); +END; +$$; + +-- Disable seqscan just to test the index +SET enable_seqscan = off; +SELECT plan_has_index_scan($f$ + SELECT * FROM cypher('graph', $$ + MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 + $$) AS (title agtype); +$f$); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + +SET enable_seqscan = on; +-- +-- Clean up +-- +DROP FUNCTION plan_has_index_scan(text); +DROP CAST (agtype AS vector); SELECT drop_graph('graph', true); DROP EXTENSION vector CASCADE; \ No newline at end of file diff --git a/regress/sql/scan.sql b/regress/sql/scan.sql index 840a822f2..4d35fe0fe 100644 --- a/regress/sql/scan.sql +++ b/regress/sql/scan.sql @@ -41,7 +41,7 @@ SELECT * FROM cypher('scan', $$ /* unterminated /* comment RETURN 0 $$) AS t(a int); --- recover syntax highlighting */ +-- recover syntax highlighting */ */ -- -- single-line comment diff --git a/sql/agtype_coercions.sql b/sql/agtype_coercions.sql index c7895fabc..933375fc1 100644 --- a/sql/agtype_coercions.sql +++ b/sql/agtype_coercions.sql @@ -78,7 +78,7 @@ AS 'MODULE_PATHNAME'; CREATE CAST (float8 AS agtype) WITH FUNCTION ag_catalog.float8_to_agtype(float8); --- agtype -> float8 (exmplicit) +-- agtype -> float8 (explicit) CREATE FUNCTION ag_catalog.agtype_to_float8(agtype) RETURNS float8 LANGUAGE c diff --git a/sql/agtype_exists.sql b/sql/agtype_exists.sql index fe6150d69..441af1755 100644 --- a/sql/agtype_exists.sql +++ b/sql/agtype_exists.sql @@ -32,9 +32,8 @@ CREATE OPERATOR ? ( LEFTARG = agtype, RIGHTARG = text, FUNCTION = ag_catalog.agtype_exists, - COMMUTATOR = '?', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_agtype(agtype, agtype) @@ -49,9 +48,8 @@ CREATE OPERATOR ? ( LEFTARG = agtype, RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_exists_agtype, - COMMUTATOR = '?', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_any(agtype, text[]) @@ -66,8 +64,8 @@ CREATE OPERATOR ?| ( LEFTARG = agtype, RIGHTARG = text[], FUNCTION = ag_catalog.agtype_exists_any, - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_any_agtype(agtype, agtype) @@ -82,8 +80,8 @@ CREATE OPERATOR ?| ( LEFTARG = agtype, RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_exists_any_agtype, - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_all(agtype, text[]) @@ -98,8 +96,8 @@ CREATE OPERATOR ?& ( LEFTARG = agtype, RIGHTARG = text[], FUNCTION = ag_catalog.agtype_exists_all, - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_all_agtype(agtype, agtype) @@ -114,6 +112,6 @@ CREATE OPERATOR ?& ( LEFTARG = agtype, RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_exists_all_agtype, - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); diff --git a/sql/agtype_operators.sql b/sql/agtype_operators.sql index 3fbc52f33..36fedfe80 100644 --- a/sql/agtype_operators.sql +++ b/sql/agtype_operators.sql @@ -33,8 +33,8 @@ CREATE OPERATOR @> ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contains, COMMUTATOR = '<@', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_contained_by(agtype, agtype) @@ -50,8 +50,8 @@ CREATE OPERATOR <@ ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contained_by, COMMUTATOR = '@>', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_contains_top_level(agtype, agtype) @@ -67,8 +67,8 @@ CREATE OPERATOR @>> ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contains_top_level, COMMUTATOR = '<<@', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_contained_by_top_level(agtype, agtype) @@ -84,6 +84,6 @@ CREATE OPERATOR <<@ ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contained_by_top_level, COMMUTATOR = '@>>', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); \ No newline at end of file diff --git a/sql/agtype_string.sql b/sql/agtype_string.sql index e7485769d..0d7b2013f 100644 --- a/sql/agtype_string.sql +++ b/sql/agtype_string.sql @@ -52,6 +52,12 @@ CREATE FUNCTION ag_catalog.age_eq_tilde(agtype, agtype) PARALLEL SAFE AS 'MODULE_PATHNAME'; +CREATE OPERATOR =~ ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.age_eq_tilde +); + CREATE FUNCTION ag_catalog.age_is_valid_label_name(agtype) RETURNS boolean LANGUAGE c diff --git a/src/backend/catalog/ag_graph.c b/src/backend/catalog/ag_graph.c index d03e08126..56e5cf982 100644 --- a/src/backend/catalog/ag_graph.c +++ b/src/backend/catalog/ag_graph.c @@ -184,3 +184,16 @@ char *get_graph_namespace_name(const char *graph_name) { return get_namespace_name(get_graph_namespace(graph_name)); } + +bool graph_namespace_exists(Oid graph_oid) +{ + graph_cache_data *cache_data; + + cache_data = search_graph_namespace_cache(graph_oid); + if (cache_data) + { + return true; + } + + return false; +} diff --git a/src/backend/catalog/ag_label.c b/src/backend/catalog/ag_label.c index d7ba7e7e2..897baa586 100644 --- a/src/backend/catalog/ag_label.c +++ b/src/backend/catalog/ag_label.c @@ -199,6 +199,13 @@ Datum _label_name(PG_FUNCTION_ARGS) } graph = PG_GETARG_OID(0); + + /* Check if the graph OID is valid */ + if (!graph_namespace_exists(graph)) + { + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("graph with oid %u does not exist", graph))); + } label_id = (int32)(((uint64)AG_GETARG_GRAPHID(1)) >> ENTRY_ID_BITS); @@ -206,6 +213,14 @@ Datum _label_name(PG_FUNCTION_ARGS) label_name = NameStr(label_cache->name); + /* If label_name is not found, error out */ + if (label_name == NULL) + { + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("label with id %d does not exist in graph %u", + label_id, graph))); + } + if (IS_AG_DEFAULT_LABEL(label_name)) PG_RETURN_CSTRING(""); diff --git a/src/backend/nodes/cypher_outfuncs.c b/src/backend/nodes/cypher_outfuncs.c index 7aebbc312..22bcdb979 100644 --- a/src/backend/nodes/cypher_outfuncs.c +++ b/src/backend/nodes/cypher_outfuncs.c @@ -319,7 +319,7 @@ void out_cypher_typecast(StringInfo str, const ExtensibleNode *node) DEFINE_AG_NODE(cypher_typecast); WRITE_NODE_FIELD(expr); - WRITE_STRING_FIELD(typecast); + WRITE_NODE_FIELD(typname); WRITE_LOCATION_FIELD(location); } diff --git a/src/backend/parser/ag_scanner.l b/src/backend/parser/ag_scanner.l index 45ccdac3b..d5d72b926 100644 --- a/src/backend/parser/ag_scanner.l +++ b/src/backend/parser/ag_scanner.l @@ -124,7 +124,7 @@ whitespace [\t\n\v\f\r ]+ * Therefore, the rule has been modified so that it can match such comments. */ %x mlcomment -mlcstart "/*" +mlcstart \/\*{op_chars}* mlcchars [^*]+|\*+ mlcstop \*+\/ slcomment "//"[^\n\r]* @@ -228,20 +228,15 @@ param \${id} * These are tokens that are used as operators and language constructs in * Cypher, and some of them are structural characters in JSON. */ -left_contains "<@" -right_contains "@>" -any_exists "?|" -all_exists "?&" -concat "||" -access_path "#>" lt_gt "<>" lt_eq "<=" gt_eq ">=" dot_dot ".." plus_eq "+=" -eq_tilde "=~" typecast "::" -self [?%()*+,\-./:;<=>[\]^{|}] +self [%()*+,\-./:;<=>[\]^{|}] +op_chars [\!\@\#\^\&\|\~\?\+\*\/\%\<\>\=] +operator {op_chars}+ other . @@ -339,6 +334,11 @@ ag_token token; /* update location in case of unterminated comment */ update_location(); BEGIN(mlcomment); + yyless(2); +} + +{mlcstart} { + yyless(2); } {mlcchars} { @@ -649,54 +649,6 @@ ag_token token; return token; } -{concat} { - update_location(); - token.type = AG_TOKEN_CONCAT; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{access_path} { - update_location(); - token.type = AG_TOKEN_ACCESS_PATH; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{any_exists} { - update_location(); - token.type = AG_TOKEN_ANY_EXISTS; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{left_contains} { - update_location(); - token.type = AG_TOKEN_LEFT_CONTAINS; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{right_contains} { - update_location(); - token.type = AG_TOKEN_RIGHT_CONTAINS; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{all_exists} { - update_location(); - token.type = AG_TOKEN_ALL_EXISTS; - token.value.s = yytext; - token.location = get_location(); - return token; -} - {lt_gt} { update_location(); token.type = AG_TOKEN_LT_GT; @@ -737,26 +689,163 @@ ag_token token; return token; } -{eq_tilde} { +{typecast} { update_location(); - token.type = AG_TOKEN_EQ_TILDE; + token.type = AG_TOKEN_TYPECAST; token.value.s = yytext; token.location = get_location(); return token; } -{typecast} { +{self} { update_location(); - token.type = AG_TOKEN_TYPECAST; + token.type = AG_TOKEN_CHAR; + token.value.c = yytext[0]; + token.location = get_location(); + return token; +} + +{op_chars} { + update_location(); + token.type = AG_TOKEN_OP; token.value.s = yytext; token.location = get_location(); return token; } -{self} { +{operator} { + /* Borrowed from PG and adjusted for our scanner */ + + /* + * Check for embedded slash-star or slash-slash; those + * are comment starts, so operator must stop there. + * Note that slash-star or slash-slash at the first + * character will match a prior rule, not this one. + */ + int nchars = yyleng; + char *slashstar = strstr(yytext, "/*"); + char *slashslash = strstr(yytext, "//"); + + if (slashstar && slashslash) + { + /* if both appear, take the first one */ + if (slashstar > slashslash) + slashstar = slashslash; + } + else if (!slashstar) + slashstar = slashslash; + if (slashstar) + nchars = slashstar - yytext; + + /* + * For SQL compatibility, '+' and '-' cannot be the + * last char of a multi-char operator unless the operator + * contains chars that are not in SQL operators. + * The idea is to lex '=-' as two operators, but not + * to forbid operator names like '?-' that could not be + * sequences of SQL operators. + */ + if (nchars > 1 && + (yytext[nchars - 1] == '+' || + yytext[nchars - 1] == '-')) + { + int ic; + + for (ic = nchars - 2; ic >= 0; ic--) + { + char c = yytext[ic]; + if (c == '~' || c == '!' || c == '@' || + c == '#' || c == '^' || c == '&' || + c == '|' || c == '`' || c == '?' || + c == '%') + break; + } + if (ic < 0) + { + /* + * didn't find a qualifying character, so remove + * all trailing [+-] + */ + do { + nchars--; + } while (nchars > 1 && + (yytext[nchars - 1] == '+' || + yytext[nchars - 1] == '-')); + } + } + update_location(); - token.type = AG_TOKEN_CHAR; - token.value.c = yytext[0]; + + if (nchars < yyleng) + { + /* Strip the unwanted chars from the token */ + yyless(nchars); + /* + * If what we have left is only one char, and it's + * one of the characters matching "self", then + * return it as a character token the same way + * that the "self" rule would have. + */ + if (nchars == 1 && + strchr("%()*+,-./:;<=>[\\]^{|}", yytext[0])) + { + token.type = AG_TOKEN_CHAR; + token.value.c = yytext[0]; + token.location = get_location(); + return token; + } + + /* + * Likewise, if what we have left is two chars, and + * those match the tokens ">=", "<=", "=>", "<>" or + * "!=", then we must return the appropriate token + * rather than the generic Op. + */ + if (nchars == 2) + { + if (yytext[0] == '>' && yytext[1] == '=') + token.type = AG_TOKEN_GT_EQ; + else if (yytext[0] == '<' && yytext[1] == '=') + token.type = AG_TOKEN_LT_EQ; + else if (yytext[0] == '<' && yytext[1] == '>') + token.type = AG_TOKEN_LT_GT; + else if (yytext[0] == '+' && yytext[1] == '=') + token.type = AG_TOKEN_PLUS_EQ; + /* + * These operators (!=, =>) are not allowed as user-defined + * operators in PG because they are reserved as valid tokens + * with predefined semantics. As a result, we also reject + * them here. However, if a specific use case arises, we + * could allow them with custom handling. + */ + else if ((yytext[0] == '!' && yytext[1] == '=') || + (yytext[0] == '=' && yytext[1] == '>')) + ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), + scan_errmsg("unexpected character"), + scan_errposition())); + else + goto handle_generic_op; + + token.value.s = yytext; + token.location = get_location(); + return token; + } + } + +handle_generic_op: + /* + * Complain if operator is too long. Unlike the case + * for identifiers, we make this an error not a notice- + * and-truncate, because the odds are we are looking at + * a syntactic mistake anyway. + */ + if (nchars >= NAMEDATALEN) + ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), + scan_errmsg("operator too long"), + scan_errposition())); + + token.type = AG_TOKEN_OP; + token.value.s = yytext; token.location = get_location(); return token; } diff --git a/src/backend/parser/cypher_analyze.c b/src/backend/parser/cypher_analyze.c index 4bba1044c..d53fba3ef 100644 --- a/src/backend/parser/cypher_analyze.c +++ b/src/backend/parser/cypher_analyze.c @@ -1055,15 +1055,20 @@ static Query *analyze_cypher_and_coerce(List *stmt, RangeTblFunction *rtfunc, TargetEntry *te = lfirst(lt); Node *expr = (Node *)te->expr; Oid current_type; + int32 current_typmod; Oid target_type; + int32 target_typmod; Assert(!te->resjunk); current_type = exprType(expr); + current_typmod = exprTypmod(expr); target_type = lfirst_oid(lc2); - if (current_type != target_type) + target_typmod = lfirst_int(lc3); + + if ((current_type != target_type) || + (current_typmod != target_typmod)) { - int32 target_typmod = lfirst_int(lc3); Node *new_expr; /* diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index 19bc71d42..390bfb392 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -106,16 +106,16 @@ static List *cast_agtype_args_to_target_type(cypher_parsestate *cpstate, Form_pg_proc procform, List *fargs, Oid *target_types); -static Node *cast_to_target_type(cypher_parsestate *cpstate, Node *expr, - Oid source_oid, Oid target_oid); static Node *wrap_text_output_to_agtype(cypher_parsestate *cpstate, FuncExpr *fexpr); static Form_pg_proc get_procform(FuncCall *fn, bool err_not_found); static char *get_mapped_extension(Oid func_oid); static bool is_extension_external(char *extension); -static bool is_pgvector_datatype(char *typename); static char *construct_age_function_name(char *funcname); static bool function_exists(char *funcname, char *extension); +static Node *coerce_expr_flexible(ParseState *pstate, Node *expr, + Oid source_oid, Oid target_oid, + int32 t_typemod, bool error_out); /* transform a cypher expression */ Node *transform_cypher_expr(cypher_parsestate *cpstate, Node *expr, @@ -1540,6 +1540,7 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, List *fname; FuncCall *fnode; ParseState *pstate; + TypeName *target_typ; /* verify input parameter */ Assert (cpstate != NULL); @@ -1548,98 +1549,137 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, /* create the qualified function name, schema first */ fname = list_make1(makeString("ag_catalog")); pstate = &cpstate->pstate; - - /* append the name of the requested typecast function */ - if (pg_strcasecmp(ctypecast->typecast, "edge") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_EDGE)); - } - else if (pg_strcasecmp(ctypecast->typecast, "path") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PATH)); - } - else if (pg_strcasecmp(ctypecast->typecast, "vertex") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_VERTEX)); - } - else if (pg_strcasecmp(ctypecast->typecast, "numeric") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_NUMERIC)); - } - else if (pg_strcasecmp(ctypecast->typecast, "float") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_FLOAT)); - } - else if (pg_strcasecmp(ctypecast->typecast, "int") == 0 || - pg_strcasecmp(ctypecast->typecast, "integer") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_INT)); - } - else if (pg_strcasecmp(ctypecast->typecast, "pg_float8") == 0) + target_typ = ctypecast->typname; + + if (list_length(target_typ->names) == 1) { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_FLOAT8)); - } - else if (pg_strcasecmp(ctypecast->typecast, "pg_bigint") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_BIGINT)); - } - else if ((pg_strcasecmp(ctypecast->typecast, "bool") == 0 || - pg_strcasecmp(ctypecast->typecast, "boolean") == 0)) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_BOOL)); - } - else if (pg_strcasecmp(ctypecast->typecast, "pg_text") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_TEXT)); + char *typecast = strVal(linitial(target_typ->names)); + + /* append the name of the requested typecast function */ + if (pg_strcasecmp(typecast, "edge") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_EDGE)); + } + else if (pg_strcasecmp(typecast, "path") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PATH)); + } + else if (pg_strcasecmp(typecast, "vertex") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_VERTEX)); + } + else if (pg_strcasecmp(typecast, "numeric") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_NUMERIC)); + } + else if (pg_strcasecmp(typecast, "float") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_FLOAT)); + } + else if (pg_strcasecmp(typecast, "int") == 0 || + pg_strcasecmp(typecast, "integer") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_INT)); + } + else if (pg_strcasecmp(typecast, "pg_float8") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_FLOAT8)); + } + else if (pg_strcasecmp(typecast, "pg_bigint") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_BIGINT)); + } + else if ((pg_strcasecmp(typecast, "bool") == 0 || + pg_strcasecmp(typecast, "boolean") == 0)) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_BOOL)); + } + else if (pg_strcasecmp(typecast, "pg_text") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_TEXT)); + } + else + { + goto fallback_coercion; + } + + /* make a function call node */ + fnode = makeFuncCall(fname, list_make1(ctypecast->expr), COERCE_SQL_SYNTAX, + ctypecast->location); + + /* return the transformed function */ + return transform_FuncCall(cpstate, fnode); } - else if (is_pgvector_datatype(ctypecast->typecast)) + +fallback_coercion: { - TypeName *target_typname; Oid source_oid; Oid target_oid; + int32 t_typmod = -1; Node *expr; /* transform the expr before casting */ expr = transform_cypher_expr_recurse(cpstate, ctypecast->expr); - /* get the source and target oids */ - target_typname = makeTypeNameFromNameList(list_make1( - makeString(ctypecast->typecast))); - target_oid = typenameTypeId(pstate, target_typname); + typenameTypeIdAndMod(pstate, target_typ, &target_oid, &t_typmod); source_oid = exprType(expr); - if (source_oid == AGTYPEOID) - { - /* - * Cast to text and then to target type, since we cant - * directly cast agtype to pgvector datatypes. - */ - expr = cast_to_target_type(cpstate, expr, source_oid, TEXTOID); - expr = cast_to_target_type(cpstate, expr, TEXTOID, target_oid); - } - else - { - /* try a direct cast, it will error out if not possible */ - expr = cast_to_target_type(cpstate, expr, source_oid, target_oid); - } + /* errors out if cast not possible */ + expr = coerce_expr_flexible(pstate, expr, source_oid, target_oid, + t_typmod, true); return expr; } - /* if none was found, error out */ - else +} + +/* + * Helper function to coerce an expression to the target type. If + * no direct cast exists, it attempts to cast through text if the + * source or target type is agtype. This improves interoperability + * with types from other extensions. + */ +static Node *coerce_expr_flexible(ParseState *pstate, Node *expr, + Oid source_oid, Oid target_oid, + int32 t_typmod, bool error_out) +{ + const Oid text_oid = TEXTOID; + Node *result; + + if (expr == NULL) + return NULL; + + /* Try a direct cast */ + result = coerce_to_target_type(pstate, expr, source_oid, target_oid, + t_typmod, COERCION_EXPLICIT, + COERCE_EXPLICIT_CAST, -1); + if (result != NULL) + return result; + + /* Try cast via TEXT if either side is AGTYPE */ + if (source_oid == AGTYPEOID || target_oid == AGTYPEOID) + { + Node *to_text = coerce_to_target_type(pstate, expr, source_oid, text_oid, + -1, COERCION_EXPLICIT, + COERCE_EXPLICIT_CAST, -1); + if (to_text != NULL) + { + result = coerce_to_target_type(pstate, to_text, text_oid, target_oid, + t_typmod, COERCION_EXPLICIT, + COERCE_EXPLICIT_CAST, -1); + if (result != NULL) + return result; + } + } + + if (error_out) { ereport(ERROR, (errmsg_internal("typecast \'%s\' not supported", - ctypecast->typecast))); + format_type_be(target_oid)))); } - /* make a function call node */ - fnode = makeFuncCall(fname, list_make1(ctypecast->expr), COERCE_SQL_SYNTAX, - ctypecast->location); - - /* return the transformed function */ - return transform_FuncCall(cpstate, fnode); + return NULL; } static Node *transform_external_ext_FuncCall(cypher_parsestate *cpstate, @@ -1704,7 +1744,6 @@ static List *cast_agtype_args_to_target_type(cypher_parsestate *cpstate, char *funcname = NameStr(procform->proname); int nargs = procform->pronargs; ListCell *lc = NULL; - int i = 0; /* verify the length of args are same */ if (list_length(fargs) != nargs) @@ -1718,67 +1757,20 @@ static List *cast_agtype_args_to_target_type(cypher_parsestate *cpstate, /* iterate through the function's args */ foreach (lc, fargs) { - char *target_typname; Node *expr = lfirst(lc); Oid source_oid = exprType(expr); - Oid target_oid = target_types[i]; - - /* get the typename from target_oid */ - target_typname = format_type_be(target_oid); + Oid target_oid = target_types[foreach_current_index(lc)]; - /* cast the agtype to the target type */ - if (source_oid == AGTYPEOID && is_pgvector_datatype(target_typname)) - { - /* - * There is no cast from agtype to vector, so we first - * cast agtype to text and then text to vector. - */ - expr = cast_to_target_type(cpstate, expr, source_oid, TEXTOID); - expr = cast_to_target_type(cpstate, expr, TEXTOID, target_oid); - } - /* additional casts can be added here for other types */ - else - { - /* try a direct cast, it will error out if not possible */ - expr = cast_to_target_type(cpstate, expr, source_oid, target_oid); - } + /* errors out if cast not possible */ + expr = coerce_expr_flexible(&cpstate->pstate, expr, source_oid, + target_oid, -1, true); lfirst(lc) = expr; - i++; } return fargs; } -/* - * Cast an input type to an output type, error out if not possible. - * Thanks to Taha for this idea. - */ -static Node *cast_to_target_type(cypher_parsestate *cpstate, Node *expr, - Oid source_oid, Oid target_oid) -{ - ParseState *pstate = &cpstate->pstate; - - /* can we cast from source to target oid? */ - if (can_coerce_type(1, &source_oid, &target_oid, COERCION_EXPLICIT)) - { - /* coerce the source to the target */ - expr = coerce_type(pstate, expr, source_oid, target_oid, -1, - COERCION_EXPLICIT, COERCE_EXPLICIT_CAST, -1); - } - /* error out if we can't cast */ - else - { - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_FUNCTION), - errmsg("cannot cast type %s to %s", format_type_be(source_oid), - format_type_be(target_oid)))); - } - - /* return the casted expression */ - return expr; -} - /* * Due to issues with creating a cast from text to agtype, we need to wrap a * function that outputs text with text_to_agtype. @@ -1912,13 +1904,6 @@ static bool is_extension_external(char *extension) (pg_strcasecmp(extension, "age") != 0)); } -static bool is_pgvector_datatype(char *typename) -{ - return (pg_strcasecmp(typename, "vector") || - pg_strcasecmp(typename, "halfvec") || - pg_strcasecmp(typename, "sparsevec")); -} - /* Returns age_ prefiexed lower case function name */ static char *construct_age_function_name(char *funcname) { diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index 94b1c1fff..1a3d2167c 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -69,11 +69,11 @@ %token IDENTIFIER %token PARAMETER %token BQIDENT +%token OP %token CHAR /* operators that have more than 1 character */ -%token NOT_EQ LT_EQ GT_EQ DOT_DOT TYPECAST PLUS_EQ EQ_TILDE CONCAT -%token ACCESS_PATH LEFT_CONTAINS RIGHT_CONTAINS ANY_EXISTS ALL_EXISTS +%token NOT_EQ LT_EQ GT_EQ DOT_DOT TYPECAST PLUS_EQ /* keywords in alphabetical order */ %token ALL ANALYZE AND AS ASC ASCENDING @@ -86,7 +86,7 @@ LIMIT MATCH MERGE NOT NULL_P - OPTIONAL OR ORDER + OPERATOR OPTIONAL OR ORDER REMOVE RETURN SET SKIP STARTS THEN TRUE_P @@ -168,10 +168,18 @@ /* names */ %type property_key_name var_name var_name_opt label_name -%type symbolic_name schema_name +%type symbolic_name schema_name type_name %type reserved_keyword safe_keywords conflicted_keywords %type func_name +/* types */ +%type generic_type +%type opt_type_modifiers + +/* operator */ +%type all_op math_op +%type qual_op any_operator + /* precedence: lowest to highest */ %left UNION %left OR @@ -179,8 +187,8 @@ %left XOR %right NOT %left '=' NOT_EQ '<' LT_EQ '>' GT_EQ -%left '@' '|' '&' '?' LEFT_CONTAINS RIGHT_CONTAINS ANY_EXISTS ALL_EXISTS -%left '+' '-' CONCAT +%left '+' '-' +%left OP OPERATOR %left '*' '/' '%' %left '^' %nonassoc IN IS @@ -235,7 +243,7 @@ static Node *make_bool_const(bool b, int location); static Node *make_null_const(int location); /* typecast */ -static Node *make_typecast_expr(Node *expr, char *typecast, int location); +static Node *make_typecast_expr(Node *expr, Node *typname, int location); /* functions */ static Node *make_function_expr(List *func_name, List *exprs, int location); @@ -513,7 +521,6 @@ yield_item: } ; - semicolon_opt: /* empty */ | ';' @@ -1578,33 +1585,13 @@ expr: { $$ = build_comparison_expression($1, $3, ">=", @2); } - | expr LEFT_CONTAINS expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "<@", $1, $3, @2); - } - | expr RIGHT_CONTAINS expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "@>", $1, $3, @2); - } - | expr '?' expr %prec '.' + | expr qual_op expr %prec OP { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "?", $1, $3, @2); + $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); } - | expr ANY_EXISTS expr + | qual_op expr %prec OP { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "?|", $1, $3, @2); - } - | expr ALL_EXISTS expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "?&", $1, $3, @2); - } - | expr CONCAT expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "||", $1, $3, @2); - } - | expr ACCESS_PATH expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "#>", $1, $3, @2); + $$ = (Node *) makeA_Expr(AEXPR_OP, $1, NULL, $2, @1); } | expr '+' expr { @@ -1696,11 +1683,6 @@ expr: $$ = (Node *)n; } - | expr EQ_TILDE expr - { - $$ = make_function_expr(list_make1(makeString("eq_tilde")), - list_make2($1, $3), @2); - } | expr '[' expr ']' { A_Indices *i; @@ -1791,6 +1773,31 @@ expr: $$ = append_indirection($1, (Node*)string); } + /* allow indirection with a typecast */ + else if ((IsA($1, ColumnRef) || IsA($1, A_Indirection)) && + (IsA($3, ExtensibleNode) && + is_ag_node($3, cypher_typecast))) + { + cypher_typecast *tc = (cypher_typecast *)$3; + + if (IsA(tc->expr, ColumnRef)) + { + ColumnRef *cr = (ColumnRef *)tc->expr; + List *fields = cr->fields; + String *string = linitial(fields); + + tc->expr = append_indirection($1, (Node *)string); + + $$ = (Node *)tc; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid indirection syntax"), + ag_scanner_errposition(@1, scanner))); + } + } else if (IsA($1, FuncCall) && IsA($3, A_Indirection)) { ereport(ERROR, @@ -1814,7 +1821,7 @@ expr: { $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "->", $1, $4, @2); } - | expr TYPECAST symbolic_name + | expr TYPECAST generic_type { $$ = make_typecast_expr($1, $3, @2); } @@ -2354,6 +2361,10 @@ label_name: schema_name ; +type_name: + schema_name + ; + symbolic_name: IDENTIFIER ; @@ -2372,6 +2383,82 @@ reserved_keyword: | conflicted_keywords ; +/* + * types + */ +generic_type: + type_name opt_type_modifiers + { + TypeName *typname; + + typname = makeTypeName($1); + typname->typmods = $2; + typname->location = @1; + + $$ = (Node *) typname; + } + ; + +opt_type_modifiers: + '(' expr_list ')' + { + $$ = $2; + } + | /* empty */ + { + $$ = NIL; + } + ; + +/* + * operators + */ +any_operator: + all_op + { + $$ = list_make1(makeString($1)); + } + | symbolic_name + { + $$ = list_make1(makeString($1)); + } + | schema_name '.' any_operator + { + $$ = lcons(makeString($1), $3); + } + ; + +all_op: + OP + | math_op + ; + +math_op: + '+' { $$ = "+"; } + | '-' { $$ = "-"; } + | '*' { $$ = "*"; } + | '/' { $$ = "/"; } + | '%' { $$ = "%"; } + | '^' { $$ = "^"; } + | '<' { $$ = "<"; } + | '>' { $$ = ">"; } + | '=' { $$ = "="; } + | LT_EQ { $$ = "<="; } + | GT_EQ { $$ = ">="; } + | NOT_EQ { $$ = "<>"; } + ; + +qual_op: + OP + { + $$ = list_make1(makeString($1)); + } + | OPERATOR '(' any_operator ')' + { + $$ = $3; + } + ; + /* * All keywords need to be copied and properly terminated with a null before * using them, pnstrdup effectively does this for us. @@ -2406,6 +2493,7 @@ safe_keywords: | MATCH { $$ = pnstrdup($1, 6); } | MERGE { $$ = pnstrdup($1, 6); } | NOT { $$ = pnstrdup($1, 3); } + | OPERATOR { $$ = pnstrdup($1, 8); } | OPTIONAL { $$ = pnstrdup($1, 8); } | OR { $$ = pnstrdup($1, 2); } | ORDER { $$ = pnstrdup($1, 5); } @@ -2674,13 +2762,13 @@ static Node *make_null_const(int location) /* * typecast */ -static Node *make_typecast_expr(Node *expr, char *typecast, int location) +static Node *make_typecast_expr(Node *expr, Node *typname, int location) { cypher_typecast *node; node = make_ag_node(cypher_typecast); node->expr = expr; - node->typecast = typecast; + node->typname = (TypeName *) typname; node->location = location; return (Node *)node; diff --git a/src/backend/parser/cypher_parser.c b/src/backend/parser/cypher_parser.c index d2b64ffef..ebf46b44d 100644 --- a/src/backend/parser/cypher_parser.c +++ b/src/backend/parser/cypher_parser.c @@ -44,15 +44,9 @@ int cypher_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, ag_scanner_t scanner) DOT_DOT, TYPECAST, PLUS_EQ, - EQ_TILDE, - LEFT_CONTAINS, - RIGHT_CONTAINS, - ACCESS_PATH, - ANY_EXISTS, - ALL_EXISTS, - CONCAT, CHAR, - BQIDENT + BQIDENT, + OP }; ag_token token; @@ -68,6 +62,7 @@ int cypher_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, ag_scanner_t scanner) break; case AG_TOKEN_DECIMAL: case AG_TOKEN_STRING: + case AG_TOKEN_OP: lvalp->string = pstrdup(token.value.s); break; case AG_TOKEN_IDENTIFIER: @@ -115,14 +110,6 @@ int cypher_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, ag_scanner_t scanner) case AG_TOKEN_GT_EQ: case AG_TOKEN_DOT_DOT: case AG_TOKEN_PLUS_EQ: - case AG_TOKEN_EQ_TILDE: - case AG_TOKEN_ACCESS_PATH: - case AG_TOKEN_ALL_EXISTS: - case AG_TOKEN_ANY_EXISTS: - case AG_TOKEN_LEFT_CONTAINS: - case AG_TOKEN_RIGHT_CONTAINS: - case AG_TOKEN_CONCAT: - break; case AG_TOKEN_TYPECAST: break; case AG_TOKEN_CHAR: diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index 20ee92baf..ef07657a4 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -1116,7 +1116,10 @@ char *agtype_to_cstring_indent(StringInfo out, agtype_container *in, } /* - * common worker for above two functions + * Common worker for above two functions. + * If extend is set to true, the function will append + * ::vertex, ::edge or ::path based on the type of + * container. */ static char *agtype_to_cstring_worker(StringInfo out, agtype_container *in, int estimated_len, bool indent, @@ -3213,9 +3216,10 @@ Datum agtype_to_text(PG_FUNCTION_ARGS) /* check that we have a scalar value */ if (!AGT_ROOT_IS_SCALAR(arg_agt)) { - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("agtype argument must resolve to a scalar value"))); + char *out; + out = agtype_to_cstring(NULL, &arg_agt->root, VARSIZE(arg_agt)); + PG_FREE_IF_COPY(arg_agt, 0); + return CStringGetTextDatum(out); } /* get the arg parameter */ diff --git a/src/include/catalog/ag_graph.h b/src/include/catalog/ag_graph.h index 432954561..f86efd885 100644 --- a/src/include/catalog/ag_graph.h +++ b/src/include/catalog/ag_graph.h @@ -39,6 +39,7 @@ void update_graph_name(const Name graph_name, const Name new_name); uint32 get_graph_oid(const char *graph_name); char *get_graph_namespace_name(const char *graph_name); +bool graph_namespace_exists(Oid graph_oid); List *get_graphnames(void); void drop_graphs(List *graphnames); diff --git a/src/include/nodes/cypher_nodes.h b/src/include/nodes/cypher_nodes.h index 0b5fec1c4..7698772e2 100644 --- a/src/include/nodes/cypher_nodes.h +++ b/src/include/nodes/cypher_nodes.h @@ -483,7 +483,7 @@ typedef struct cypher_typecast { ExtensibleNode extensible; Node *expr; - char *typecast; + TypeName *typname; int location; } cypher_typecast; diff --git a/src/include/parser/ag_scanner.h b/src/include/parser/ag_scanner.h index 3dd89abd3..4cef63288 100644 --- a/src/include/parser/ag_scanner.h +++ b/src/include/parser/ag_scanner.h @@ -45,15 +45,9 @@ typedef enum ag_token_type AG_TOKEN_DOT_DOT, AG_TOKEN_TYPECAST, AG_TOKEN_PLUS_EQ, - AG_TOKEN_EQ_TILDE, - AG_TOKEN_LEFT_CONTAINS, - AG_TOKEN_RIGHT_CONTAINS, - AG_TOKEN_ACCESS_PATH, - AG_TOKEN_ANY_EXISTS, - AG_TOKEN_ALL_EXISTS, - AG_TOKEN_CONCAT, AG_TOKEN_CHAR, - AG_TOKEN_BQIDENT + AG_TOKEN_BQIDENT, + AG_TOKEN_OP } ag_token_type; /* diff --git a/src/include/parser/cypher_kwlist.h b/src/include/parser/cypher_kwlist.h index ce48f2887..e4c4437ba 100644 --- a/src/include/parser/cypher_kwlist.h +++ b/src/include/parser/cypher_kwlist.h @@ -29,6 +29,7 @@ PG_KEYWORD("match", MATCH, RESERVED_KEYWORD) PG_KEYWORD("merge", MERGE, RESERVED_KEYWORD) PG_KEYWORD("not", NOT, RESERVED_KEYWORD) PG_KEYWORD("null", NULL_P, RESERVED_KEYWORD) +PG_KEYWORD("operator", OPERATOR, RESERVED_KEYWORD) PG_KEYWORD("optional", OPTIONAL, RESERVED_KEYWORD) PG_KEYWORD("or", OR, RESERVED_KEYWORD) PG_KEYWORD("order", ORDER, RESERVED_KEYWORD) From 1b107edc75b157489900e22e33f2e6caac4a607d Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Wed, 13 Aug 2025 19:51:08 +0500 Subject: [PATCH 4/6] Add missing dependency in jdbc driver (#2206) - This was also failing CI. The issue is described here https://github.com/gradle/gradle/issues/33950 --- drivers/jdbc/lib/build.gradle.kts | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/jdbc/lib/build.gradle.kts b/drivers/jdbc/lib/build.gradle.kts index 2ba529ec1..0b63bc5a6 100644 --- a/drivers/jdbc/lib/build.gradle.kts +++ b/drivers/jdbc/lib/build.gradle.kts @@ -36,6 +36,7 @@ dependencies { testImplementation("org.junit.jupiter:junit-jupiter-api:5.9.3") testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine") + testRuntimeOnly("org.junit.platform:junit-platform-launcher") testImplementation("org.testcontainers:testcontainers:1.18.0") testImplementation("org.postgresql:postgresql:42.6.0") From a1bb68855da8bc7bd3dc8aee3fc793cd8f90c750 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Wed, 13 Aug 2025 08:13:29 -0700 Subject: [PATCH 5/6] Fix issue 2201: unexpected empty string behavior (#2203) This PR fixes the issue of some string functions returning NULL instead of the empty string. The string functions affected and corrected are - reverse, toupper, tolower, rtrim, ltrim, trim, right, left, substring, and replace. Added additional regression tests. Corrected previous tests. modified: regress/expected/expr.out modified: regress/sql/expr.sql modified: src/backend/utils/adt/agtype.c --- regress/expected/expr.out | 245 ++++++++++++++++++++++++++++----- regress/sql/expr.sql | 80 +++++++++-- src/backend/utils/adt/agtype.c | 42 ------ 3 files changed, 281 insertions(+), 86 deletions(-) diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 53d737c8c..9185b1eab 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -3927,6 +3927,41 @@ SELECT * FROM age_reverse('gnirts a si siht'::cstring); "this is a string" (1 row) +-- should return empty string +SELECT * FROM age_reverse(''); + age_reverse +------------- + "" +(1 row) + +SELECT * FROM age_reverse(''::text); + age_reverse +------------- + "" +(1 row) + +SELECT * FROM age_reverse(''::cstring); + age_reverse +------------- + "" +(1 row) + +SELECT * FROM cypher('expr', $$ + RETURN reverse('') +$$) AS (result agtype); + result +-------- + "" +(1 row) + +SELECT * FROM cypher('expr', $$ + RETURN reverse("") +$$) AS (result agtype); + result +-------- + "" +(1 row) + -- should return null SELECT * FROM cypher('expr', $$ RETURN reverse(null) @@ -4104,6 +4139,75 @@ SELECT * FROM age_tolower('CSTRING'::cstring); "cstring" (1 row) +-- should return empty string +SELECT * FROM age_toupper(''); + age_toupper +------------- + "" +(1 row) + +SELECT * FROM age_toupper(''::text); + age_toupper +------------- + "" +(1 row) + +SELECT * FROM age_toupper(''::cstring); + age_toupper +------------- + "" +(1 row) + +SELECT * FROM cypher('expr', $$ + RETURN toupper('') +$$) AS (result agtype); + result +-------- + "" +(1 row) + +SELECT * FROM cypher('expr', $$ + RETURN toupper("") +$$) AS (result agtype); + result +-------- + "" +(1 row) + +SELECT * FROM age_tolower(''); + age_tolower +------------- + "" +(1 row) + +SELECT * FROM age_tolower(''::text); + age_tolower +------------- + "" +(1 row) + +SELECT * FROM age_tolower(''::cstring); + age_tolower +------------- + "" +(1 row) + +SELECT * FROM cypher('expr', $$ + RETURN tolower('') +$$) AS (result agtype); + result +-------- + "" +(1 row) + +SELECT * FROM cypher('expr', $$ + RETURN tolower("") +$$) AS (result agtype); + result +-------- + "" +(1 row) + -- should return null SELECT * FROM cypher('expr', $$ RETURN toUpper(null) @@ -4211,6 +4315,73 @@ SELECT * FROM age_trim(' string '); "string" (1 row) +-- should return empty string +SELECT * FROM cypher('expr', $$ + RETURN lTrim('') +$$) AS (results agtype); + results +--------- + "" +(1 row) + +SELECT * FROM cypher('expr', $$ + RETURN rTrim('') +$$) AS (results agtype); + results +--------- + "" +(1 row) + +SELECT * FROM cypher('expr', $$ + RETURN trim('') +$$) AS (results agtype); + results +--------- + "" +(1 row) + +SELECT * FROM cypher('expr', $$ + RETURN lTrim("") +$$) AS (results agtype); + results +--------- + "" +(1 row) + +SELECT * FROM cypher('expr', $$ + RETURN rTrim("") +$$) AS (results agtype); + results +--------- + "" +(1 row) + +SELECT * FROM cypher('expr', $$ + RETURN trim("") +$$) AS (results agtype); + results +--------- + "" +(1 row) + +SELECT * FROM age_ltrim(''); + age_ltrim +----------- + "" +(1 row) + +SELECT * FROM age_rtrim(''); + age_rtrim +----------- + "" +(1 row) + +SELECT * FROM age_trim(''); + age_trim +---------- + "" +(1 row) + -- should return null SELECT * FROM cypher('expr', $$ RETURN lTrim(null) @@ -4322,15 +4493,16 @@ $$) AS (results agtype); "123" (1 row) --- should return null +-- should return empty string SELECT * FROM cypher('expr', $$ RETURN left("123456789", 0) $$) AS (results agtype); results --------- - + "" (1 row) +-- should return null SELECT * FROM cypher('expr', $$ RETURN left(null, 1) $$) AS (results agtype); @@ -4401,15 +4573,16 @@ $$) AS (results agtype); "789" (1 row) --- should return null +-- should return empty string SELECT * FROM cypher('expr', $$ RETURN right("123456789", 0) $$) AS (results agtype); results --------- - + "" (1 row) +-- should return null SELECT * FROM cypher('expr', $$ RETURN right(null, 1) $$) AS (results agtype); @@ -4508,6 +4681,13 @@ SELECT * FROM age_substring('0123456789', 1); "123456789" (1 row) +-- should return empty string +SELECT * FROM age_substring('0123456789', 0, 0); + age_substring +--------------- + "" +(1 row) + -- should return null SELECT * FROM cypher('expr', $$ RETURN substring(null, null, null) @@ -4747,33 +4927,52 @@ $$) AS (results agtype); "ababab" (1 row) --- should return null +-- should return empty string SELECT * FROM cypher('expr', $$ - RETURN replace(null, null, null) + RETURN replace("", "", "") $$) AS (results agtype); results --------- - + "" (1 row) SELECT * FROM cypher('expr', $$ - RETURN replace("Hello", null, null) + RETURN replace("Hello", "Hello", "") $$) AS (results agtype); results --------- - + "" (1 row) SELECT * FROM cypher('expr', $$ - RETURN replace("Hello", "", null) + RETURN replace("", "Hello", "Mellow") $$) AS (results agtype); results --------- - + "" +(1 row) + +SELECT * FROM age_replace('', '', ''); + age_replace +------------- + "" (1 row) +SELECT * FROM age_replace('Hello', 'Hello', ''); + age_replace +------------- + "" +(1 row) + +SELECT * FROM age_replace('', 'Hello', 'Mellow'); + age_replace +------------- + "" +(1 row) + +-- should return null SELECT * FROM cypher('expr', $$ - RETURN replace("", "", "") + RETURN replace(null, null, null) $$) AS (results agtype); results --------- @@ -4781,7 +4980,7 @@ $$) AS (results agtype); (1 row) SELECT * FROM cypher('expr', $$ - RETURN replace("Hello", "Hello", "") + RETURN replace("Hello", null, null) $$) AS (results agtype); results --------- @@ -4789,7 +4988,7 @@ $$) AS (results agtype); (1 row) SELECT * FROM cypher('expr', $$ - RETURN replace("", "Hello", "Mellow") + RETURN replace("Hello", "", null) $$) AS (results agtype); results --------- @@ -4814,24 +5013,6 @@ SELECT * FROM age_replace('Hello', '', null); (1 row) -SELECT * FROM age_replace('', '', ''); - age_replace -------------- - -(1 row) - -SELECT * FROM age_replace('Hello', 'Hello', ''); - age_replace -------------- - -(1 row) - -SELECT * FROM age_replace('', 'Hello', 'Mellow'); - age_replace -------------- - -(1 row) - -- should fail SELECT * FROM cypher('expr', $$ RETURN replace() diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index c3ddee862..3ce192047 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -1669,6 +1669,16 @@ $$) AS (results agtype); SELECT * FROM age_reverse('gnirts a si siht'); SELECT * FROM age_reverse('gnirts a si siht'::text); SELECT * FROM age_reverse('gnirts a si siht'::cstring); +-- should return empty string +SELECT * FROM age_reverse(''); +SELECT * FROM age_reverse(''::text); +SELECT * FROM age_reverse(''::cstring); +SELECT * FROM cypher('expr', $$ + RETURN reverse('') +$$) AS (result agtype); +SELECT * FROM cypher('expr', $$ + RETURN reverse("") +$$) AS (result agtype); -- should return null SELECT * FROM cypher('expr', $$ RETURN reverse(null) @@ -1742,6 +1752,25 @@ SELECT * FROM age_toupper('text'::text); SELECT * FROM age_toupper('cstring'::cstring); SELECT * FROM age_tolower('TEXT'::text); SELECT * FROM age_tolower('CSTRING'::cstring); +-- should return empty string +SELECT * FROM age_toupper(''); +SELECT * FROM age_toupper(''::text); +SELECT * FROM age_toupper(''::cstring); +SELECT * FROM cypher('expr', $$ + RETURN toupper('') +$$) AS (result agtype); +SELECT * FROM cypher('expr', $$ + RETURN toupper("") +$$) AS (result agtype); +SELECT * FROM age_tolower(''); +SELECT * FROM age_tolower(''::text); +SELECT * FROM age_tolower(''::cstring); +SELECT * FROM cypher('expr', $$ + RETURN tolower('') +$$) AS (result agtype); +SELECT * FROM cypher('expr', $$ + RETURN tolower("") +$$) AS (result agtype); -- should return null SELECT * FROM cypher('expr', $$ RETURN toUpper(null) @@ -1783,6 +1812,28 @@ $$) AS (results agtype); SELECT * FROM age_ltrim(' string '); SELECT * FROM age_rtrim(' string '); SELECT * FROM age_trim(' string '); +-- should return empty string +SELECT * FROM cypher('expr', $$ + RETURN lTrim('') +$$) AS (results agtype); +SELECT * FROM cypher('expr', $$ + RETURN rTrim('') +$$) AS (results agtype); +SELECT * FROM cypher('expr', $$ + RETURN trim('') +$$) AS (results agtype); +SELECT * FROM cypher('expr', $$ + RETURN lTrim("") +$$) AS (results agtype); +SELECT * FROM cypher('expr', $$ + RETURN rTrim("") +$$) AS (results agtype); +SELECT * FROM cypher('expr', $$ + RETURN trim("") +$$) AS (results agtype); +SELECT * FROM age_ltrim(''); +SELECT * FROM age_rtrim(''); +SELECT * FROM age_trim(''); -- should return null SELECT * FROM cypher('expr', $$ RETURN lTrim(null) @@ -1829,10 +1880,11 @@ $$) AS (results agtype); SELECT * FROM cypher('expr', $$ RETURN left("123456789", 3) $$) AS (results agtype); --- should return null +-- should return empty string SELECT * FROM cypher('expr', $$ RETURN left("123456789", 0) $$) AS (results agtype); +-- should return null SELECT * FROM cypher('expr', $$ RETURN left(null, 1) $$) AS (results agtype); @@ -1861,10 +1913,11 @@ $$) AS (results agtype); SELECT * FROM cypher('expr', $$ RETURN right("123456789", 3) $$) AS (results agtype); --- should return null +-- should return empty string SELECT * FROM cypher('expr', $$ RETURN right("123456789", 0) $$) AS (results agtype); +-- should return null SELECT * FROM cypher('expr', $$ RETURN right(null, 1) $$) AS (results agtype); @@ -1901,6 +1954,8 @@ SELECT * FROM cypher('expr', $$ $$) AS (results agtype); SELECT * FROM age_substring('0123456789', 3, 2); SELECT * FROM age_substring('0123456789', 1); +-- should return empty string +SELECT * FROM age_substring('0123456789', 0, 0); -- should return null SELECT * FROM cypher('expr', $$ RETURN substring(null, null, null) @@ -2002,31 +2057,32 @@ $$) AS (results agtype); SELECT * FROM cypher('expr', $$ RETURN replace("ababab", "ab", "ab") $$) AS (results agtype); --- should return null +-- should return empty string SELECT * FROM cypher('expr', $$ - RETURN replace(null, null, null) + RETURN replace("", "", "") $$) AS (results agtype); SELECT * FROM cypher('expr', $$ - RETURN replace("Hello", null, null) + RETURN replace("Hello", "Hello", "") $$) AS (results agtype); SELECT * FROM cypher('expr', $$ - RETURN replace("Hello", "", null) + RETURN replace("", "Hello", "Mellow") $$) AS (results agtype); +SELECT * FROM age_replace('', '', ''); +SELECT * FROM age_replace('Hello', 'Hello', ''); +SELECT * FROM age_replace('', 'Hello', 'Mellow'); +-- should return null SELECT * FROM cypher('expr', $$ - RETURN replace("", "", "") + RETURN replace(null, null, null) $$) AS (results agtype); SELECT * FROM cypher('expr', $$ - RETURN replace("Hello", "Hello", "") + RETURN replace("Hello", null, null) $$) AS (results agtype); SELECT * FROM cypher('expr', $$ - RETURN replace("", "Hello", "Mellow") + RETURN replace("Hello", "", null) $$) AS (results agtype); SELECT * FROM age_replace(null, null, null); SELECT * FROM age_replace('Hello', null, null); SELECT * FROM age_replace('Hello', '', null); -SELECT * FROM age_replace('', '', ''); -SELECT * FROM age_replace('Hello', 'Hello', ''); -SELECT * FROM age_replace('', 'Hello', 'Mellow'); -- should fail SELECT * FROM cypher('expr', $$ RETURN replace() diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index ef07657a4..fa6362952 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -7697,12 +7697,6 @@ Datum age_reverse(PG_FUNCTION_ARGS) string = text_to_cstring(text_string); string_len = strlen(string); - /* if we have an empty string, return null */ - if (string_len == 0) - { - PG_RETURN_NULL(); - } - /* build the result */ agtv_result.type = AGTV_STRING; agtv_result.val.string.val = string; @@ -7782,10 +7776,6 @@ Datum age_toupper(PG_FUNCTION_ARGS) agtv_value->type))); } - /* if we have an empty string, return null */ - if (string_len == 0) - PG_RETURN_NULL(); - /* allocate the new string */ result = palloc0(string_len); @@ -7872,10 +7862,6 @@ Datum age_tolower(PG_FUNCTION_ARGS) agtv_value->type))); } - /* if we have an empty string, return null */ - if (string_len == 0) - PG_RETURN_NULL(); - /* allocate the new string */ result = palloc0(string_len); @@ -7970,10 +7956,6 @@ Datum age_rtrim(PG_FUNCTION_ARGS) string = text_to_cstring(text_string); string_len = strlen(string); - /* if we have an empty string, return null */ - if (string_len == 0) - PG_RETURN_NULL(); - /* build the result */ agtv_result.type = AGTV_STRING; agtv_result.val.string.val = string; @@ -8061,10 +8043,6 @@ Datum age_ltrim(PG_FUNCTION_ARGS) string = text_to_cstring(text_string); string_len = strlen(string); - /* if we have an empty string, return null */ - if (string_len == 0) - PG_RETURN_NULL(); - /* build the result */ agtv_result.type = AGTV_STRING; agtv_result.val.string.val = string; @@ -8152,10 +8130,6 @@ Datum age_trim(PG_FUNCTION_ARGS) string = text_to_cstring(text_string); string_len = strlen(string); - /* if we have an empty string, return null */ - if (string_len == 0) - PG_RETURN_NULL(); - /* build the result */ agtv_result.type = AGTV_STRING; agtv_result.val.string.val = string; @@ -8293,10 +8267,6 @@ Datum age_right(PG_FUNCTION_ARGS) string = text_to_cstring(text_string); string_len = strlen(string); - /* if we have an empty string, return null */ - if (string_len == 0) - PG_RETURN_NULL(); - /* build the result */ agtv_result.type = AGTV_STRING; agtv_result.val.string.val = string; @@ -8434,10 +8404,6 @@ Datum age_left(PG_FUNCTION_ARGS) string = text_to_cstring(text_string); string_len = strlen(string); - /* if we have an empty string, return null */ - if (string_len == 0) - PG_RETURN_NULL(); - /* build the result */ agtv_result.type = AGTV_STRING; agtv_result.val.string.val = string; @@ -8603,10 +8569,6 @@ Datum age_substring(PG_FUNCTION_ARGS) string = text_to_cstring(text_string); string_len = strlen(string); - /* if we have an empty string, return null */ - if (string_len == 0) - PG_RETURN_NULL(); - /* build the result */ agtv_result.type = AGTV_STRING; agtv_result.val.string.val = string; @@ -8861,10 +8823,6 @@ Datum age_replace(PG_FUNCTION_ARGS) string = text_to_cstring(text_result); string_len = strlen(string); - /* if we have an empty string, return null */ - if (string_len == 0) - PG_RETURN_NULL(); - /* build the result */ agtv_result.type = AGTV_STRING; agtv_result.val.string.val = string; From e1f05f9da3b59ed468450bdc66ff630e81f0478a Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Mon, 18 Aug 2025 07:50:17 -0700 Subject: [PATCH 6/6] fix issue 2205: left doesn't catch overflow (#2207) Fixed issue 2205 where large int values aren't detected. The following functions were fixed - left, right, and substring modified: regress/expected/expr.out modified: regress/sql/expr.sql modified: src/backend/utils/adt/agtype.c Added regression tests. --- regress/expected/expr.out | 48 +++++++++++ regress/sql/expr.sql | 36 +++++++++ src/backend/utils/adt/agtype.c | 143 ++++++++++++++++++++++++++++++--- 3 files changed, 216 insertions(+), 11 deletions(-) diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 9185b1eab..9b5dd0a16 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -4556,6 +4556,18 @@ ERROR: function age_left() does not exist LINE 1: SELECT * FROM age_left(); ^ HINT: No function matches the given name and argument types. You might need to add explicit type casts. +SELECT * FROM cypher('expr', $$ + RETURN left('abcdef', -2147483648) +$$) AS (result agtype); +ERROR: left() negative values are not supported for length +SELECT * FROM cypher('expr', $$ + RETURN left('abcdef', -2147483649) +$$) AS (result agtype); +ERROR: left() length value is out of INT range +SELECT * FROM cypher('expr', $$ + RETURN left('abcdef', 2147483649) +$$) AS (result agtype); +ERROR: left() length value is out of INT range --right() SELECT * FROM cypher('expr', $$ RETURN right("123456789", 1) @@ -4636,6 +4648,18 @@ ERROR: function age_right() does not exist LINE 1: SELECT * FROM age_right(); ^ HINT: No function matches the given name and argument types. You might need to add explicit type casts. +SELECT * FROM cypher('expr', $$ + RETURN right('abcdef', -2147483648) +$$) AS (result agtype); +ERROR: right() negative values are not supported for length +SELECT * FROM cypher('expr', $$ + RETURN right('abcdef', -2147483649) +$$) AS (result agtype); +ERROR: right() length value is out of INT range +SELECT * FROM cypher('expr', $$ + RETURN right('abcdef', 2147483649) +$$) AS (result agtype); +ERROR: right() length value is out of INT range -- substring() SELECT * FROM cypher('expr', $$ RETURN substring("0123456789", 0, 1) @@ -4731,6 +4755,30 @@ SELECT * FROM age_substring(null, 1); (1 row) +SELECT * FROM cypher('expr', $$ + RETURN substring('abcdef', -2147483648, 0) +$$) AS (result agtype); +ERROR: substring() negative values are not supported for offset or length +SELECT * FROM cypher('expr', $$ + RETURN substring('abcdef', -2147483649, 0) +$$) AS (result agtype); +ERROR: substring() parameter value is out of INT range +SELECT * FROM cypher('expr', $$ + RETURN substring('abcdef', 2147483649, 0) +$$) AS (result agtype); +ERROR: substring() parameter value is out of INT range +SELECT * FROM cypher('expr', $$ + RETURN substring('abcdef', 0, -2147483648) +$$) AS (result agtype); +ERROR: substring() negative values are not supported for offset or length +SELECT * FROM cypher('expr', $$ + RETURN substring('abcdef', 0, -2147483649) +$$) AS (result agtype); +ERROR: substring() parameter value is out of INT range +SELECT * FROM cypher('expr', $$ + RETURN substring('abcdef', 0, 2147483649) +$$) AS (result agtype); +ERROR: substring() parameter value is out of INT range -- should fail SELECT * FROM cypher('expr', $$ RETURN substring("123456789", null) diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index 3ce192047..28936c404 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -1906,6 +1906,15 @@ $$) AS (results agtype); SELECT * FROM age_left('123456789', null); SELECT * FROM age_left('123456789', -1); SELECT * FROM age_left(); +SELECT * FROM cypher('expr', $$ + RETURN left('abcdef', -2147483648) +$$) AS (result agtype); +SELECT * FROM cypher('expr', $$ + RETURN left('abcdef', -2147483649) +$$) AS (result agtype); +SELECT * FROM cypher('expr', $$ + RETURN left('abcdef', 2147483649) +$$) AS (result agtype); --right() SELECT * FROM cypher('expr', $$ RETURN right("123456789", 1) @@ -1939,6 +1948,15 @@ $$) AS (results agtype); SELECT * FROM age_right('123456789', null); SELECT * FROM age_right('123456789', -1); SELECT * FROM age_right(); +SELECT * FROM cypher('expr', $$ + RETURN right('abcdef', -2147483648) +$$) AS (result agtype); +SELECT * FROM cypher('expr', $$ + RETURN right('abcdef', -2147483649) +$$) AS (result agtype); +SELECT * FROM cypher('expr', $$ + RETURN right('abcdef', 2147483649) +$$) AS (result agtype); -- substring() SELECT * FROM cypher('expr', $$ RETURN substring("0123456789", 0, 1) @@ -1969,6 +1987,24 @@ $$) AS (results agtype); SELECT * FROM age_substring(null, null, null); SELECT * FROM age_substring(null, null); SELECT * FROM age_substring(null, 1); +SELECT * FROM cypher('expr', $$ + RETURN substring('abcdef', -2147483648, 0) +$$) AS (result agtype); +SELECT * FROM cypher('expr', $$ + RETURN substring('abcdef', -2147483649, 0) +$$) AS (result agtype); +SELECT * FROM cypher('expr', $$ + RETURN substring('abcdef', 2147483649, 0) +$$) AS (result agtype); +SELECT * FROM cypher('expr', $$ + RETURN substring('abcdef', 0, -2147483648) +$$) AS (result agtype); +SELECT * FROM cypher('expr', $$ + RETURN substring('abcdef', 0, -2147483649) +$$) AS (result agtype); +SELECT * FROM cypher('expr', $$ + RETURN substring('abcdef', 0, 2147483649) +$$) AS (result agtype); -- should fail SELECT * FROM cypher('expr', $$ RETURN substring("123456789", null) diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index fa6362952..a38ac857f 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -8150,7 +8150,7 @@ Datum age_right(PG_FUNCTION_ARGS) agtype_value agtv_result; text *text_string = NULL; char *string = NULL; - int string_len; + int64 string_len; Oid type; /* extract argument values */ @@ -8158,18 +8158,21 @@ Datum age_right(PG_FUNCTION_ARGS) /* check number of args */ if (nargs != 2) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("right() invalid number of arguments"))); - + } /* check for a null string */ if (nargs < 0 || nulls[0]) + { PG_RETURN_NULL(); - + } /* check for a null length */ if (nulls[1]) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("right() length parameter cannot be null"))); - + } /* right() supports text, cstring, or the agtype string input */ arg = args[0]; type = types[0]; @@ -8177,13 +8180,19 @@ Datum age_right(PG_FUNCTION_ARGS) if (type != AGTYPEOID) { if (type == CSTRINGOID) + { text_string = cstring_to_text(DatumGetCString(arg)); + } else if (type == TEXTOID) + { text_string = DatumGetTextPP(arg); + } else + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("right() unsupported argument type %d", type))); + } } else { @@ -8194,21 +8203,29 @@ Datum age_right(PG_FUNCTION_ARGS) agt_arg = DATUM_GET_AGTYPE_P(arg); if (!AGT_ROOT_IS_SCALAR(agt_arg)) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("right() only supports scalar arguments"))); + } agtv_value = get_ith_agtype_value_from_container(&agt_arg->root, 0); /* check for agtype null */ if (agtv_value->type == AGTV_NULL) + { PG_RETURN_NULL(); + } if (agtv_value->type == AGTV_STRING) + { text_string = cstring_to_text_with_len(agtv_value->val.string.val, agtv_value->val.string.len); + } else + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("right() unsupported argument agtype %d", agtv_value->type))); + } } /* right() only supports integer and agtype integer for the second parameter. */ @@ -8218,14 +8235,22 @@ Datum age_right(PG_FUNCTION_ARGS) if (type != AGTYPEOID) { if (type == INT2OID) + { string_len = (int64) DatumGetInt16(arg); + } else if (type == INT4OID) + { string_len = (int64) DatumGetInt32(arg); + } else if (type == INT8OID) + { string_len = (int64) DatumGetInt64(arg); + } else + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("right() unsupported argument type %d", type))); + } } else { @@ -8236,21 +8261,30 @@ Datum age_right(PG_FUNCTION_ARGS) agt_arg = DATUM_GET_AGTYPE_P(arg); if (!AGT_ROOT_IS_SCALAR(agt_arg)) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("right() only supports scalar arguments"))); + } agtv_value = get_ith_agtype_value_from_container(&agt_arg->root, 0); /* no need to check for agtype null because it is an error if found */ if (agtv_value->type != AGTV_INTEGER) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("right() unsupported argument agtype %d", agtv_value->type))); + } string_len = agtv_value->val.int_value; } - /* negative values are not supported in the opencypher spec */ + /* out of range and negative values are not supported in the opencypher spec */ + if (string_len > INT_MAX || string_len < INT_MIN) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("right() length value is out of INT range"))); + } if (string_len < 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("right() negative values are not supported for length"))); @@ -8287,7 +8321,7 @@ Datum age_left(PG_FUNCTION_ARGS) agtype_value agtv_result; text *text_string = NULL; char *string = NULL; - int string_len; + int64 string_len; Oid type; /* extract argument values */ @@ -8295,17 +8329,23 @@ Datum age_left(PG_FUNCTION_ARGS) /* check number of args */ if (nargs != 2) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("left() invalid number of arguments"))); + } /* check for a null string */ if (nargs < 0 || nulls[0]) + { PG_RETURN_NULL(); + } /* check for a null length */ if (nulls[1]) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("left() length parameter cannot be null"))); + } /* left() supports text, cstring, or the agtype string input */ arg = args[0]; @@ -8314,13 +8354,19 @@ Datum age_left(PG_FUNCTION_ARGS) if (type != AGTYPEOID) { if (type == CSTRINGOID) + { text_string = cstring_to_text(DatumGetCString(arg)); + } else if (type == TEXTOID) + { text_string = DatumGetTextPP(arg); + } else + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("left() unsupported argument type %d", type))); + } } else { @@ -8331,21 +8377,29 @@ Datum age_left(PG_FUNCTION_ARGS) agt_arg = DATUM_GET_AGTYPE_P(arg); if (!AGT_ROOT_IS_SCALAR(agt_arg)) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("left() only supports scalar arguments"))); + } agtv_value = get_ith_agtype_value_from_container(&agt_arg->root, 0); /* check for agtype null */ if (agtv_value->type == AGTV_NULL) + { PG_RETURN_NULL(); + } if (agtv_value->type == AGTV_STRING) + { text_string = cstring_to_text_with_len(agtv_value->val.string.val, agtv_value->val.string.len); + } else + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("left() unsupported argument agtype %d", agtv_value->type))); + } } /* left() only supports integer and agtype integer for the second parameter. */ @@ -8355,14 +8409,22 @@ Datum age_left(PG_FUNCTION_ARGS) if (type != AGTYPEOID) { if (type == INT2OID) + { string_len = (int64) DatumGetInt16(arg); + } else if (type == INT4OID) + { string_len = (int64) DatumGetInt32(arg); + } else if (type == INT8OID) + { string_len = (int64) DatumGetInt64(arg); + } else + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("left() unsupported argument type %d", type))); + } } else { @@ -8373,24 +8435,37 @@ Datum age_left(PG_FUNCTION_ARGS) agt_arg = DATUM_GET_AGTYPE_P(arg); if (!AGT_ROOT_IS_SCALAR(agt_arg)) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("left() only supports scalar arguments"))); + } agtv_value = get_ith_agtype_value_from_container(&agt_arg->root, 0); /* no need to check for agtype null because it is an error if found */ if (agtv_value->type != AGTV_INTEGER) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("left() unsupported argument agtype %d", agtv_value->type))); + } string_len = agtv_value->val.int_value; } - /* negative values are not supported in the opencypher spec */ + /* out of range and negative values are not supported in the opencypher spec */ + if (string_len > INT_MAX || string_len < INT_MIN) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("left() length value is out of INT range"))); + } + if (string_len < 0) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("left() negative values are not supported for length"))); + } + /* * We need the string as a text string so that we can let PG deal with @@ -8424,7 +8499,7 @@ Datum age_substring(PG_FUNCTION_ARGS) agtype_value agtv_result; text *text_string = NULL; char *string = NULL; - int param; + int64 param; int string_start = 0; int string_len = 0; int i; @@ -8435,19 +8510,24 @@ Datum age_substring(PG_FUNCTION_ARGS) /* check number of args */ if (nargs < 2 || nargs > 3) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("substring() invalid number of arguments"))); + } /* check for null */ if (nargs < 0 || nulls[0]) + { PG_RETURN_NULL(); + } /* neither offset or length can be null if there is a valid string */ if ((nargs == 2 && nulls[1]) || (nargs == 3 && nulls[2])) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("substring() offset or length cannot be null"))); - + } /* substring() supports text, cstring, or the agtype string input */ arg = args[0]; type = types[0]; @@ -8455,13 +8535,19 @@ Datum age_substring(PG_FUNCTION_ARGS) if (type != AGTYPEOID) { if (type == CSTRINGOID) + { text_string = cstring_to_text(DatumGetCString(arg)); + } else if (type == TEXTOID) + { text_string = DatumGetTextPP(arg); + } else + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("substring() unsupported argument type %d", type))); + } } else { @@ -8472,21 +8558,29 @@ Datum age_substring(PG_FUNCTION_ARGS) agt_arg = DATUM_GET_AGTYPE_P(arg); if (!AGT_ROOT_IS_SCALAR(agt_arg)) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("substring() only supports scalar arguments"))); + } agtv_value = get_ith_agtype_value_from_container(&agt_arg->root, 0); /* check for agtype null */ if (agtv_value->type == AGTV_NULL) + { PG_RETURN_NULL(); + } if (agtv_value->type == AGTV_STRING) + { text_string = cstring_to_text_with_len(agtv_value->val.string.val, agtv_value->val.string.len); + } else + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("substring() unsupported argument agtype %d", agtv_value->type))); + } } /* @@ -8501,15 +8595,23 @@ Datum age_substring(PG_FUNCTION_ARGS) if (type != AGTYPEOID) { if (type == INT2OID) + { param = (int64) DatumGetInt16(arg); + } else if (type == INT4OID) + { param = (int64) DatumGetInt32(arg); + } else if (type == INT8OID) + { param = (int64) DatumGetInt64(arg); + } else + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("substring() unsupported argument type %d", type))); + } } else { @@ -8520,30 +8622,46 @@ Datum age_substring(PG_FUNCTION_ARGS) agt_arg = DATUM_GET_AGTYPE_P(arg); if (!AGT_ROOT_IS_SCALAR(agt_arg)) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("substring() only supports scalar arguments"))); - + } agtv_value = get_ith_agtype_value_from_container(&agt_arg->root, 0); /* no need to check for agtype null because it is an error if found */ if (agtv_value->type != AGTV_INTEGER) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("substring() unsupported argument agtype %d", agtv_value->type))); + } param = agtv_value->val.int_value; } + /* out of range values are not supported in the opencypher spec */ + if (param > INT_MAX || param < INT_MIN) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("substring() parameter value is out of INT range"))); + } + if (i == 1) + { string_start = param; + } if (i == 2) + { string_len = param; + } } /* negative values are not supported in the opencypher spec */ if (string_start < 0 || string_len < 0) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("substring() negative values are not supported for offset or length"))); + } /* cypher substring is 0 based while PG's is 1 based */ string_start += 1; @@ -8555,16 +8673,19 @@ Datum age_substring(PG_FUNCTION_ARGS) /* if optional length is left out */ if (nargs == 2) + { text_string = DatumGetTextPP(DirectFunctionCall2(text_substr_no_len, PointerGetDatum(text_string), Int64GetDatum(string_start))); + } /* if length is given */ else + { text_string = DatumGetTextPP(DirectFunctionCall3(text_substr, PointerGetDatum(text_string), Int64GetDatum(string_start), Int64GetDatum(string_len))); - + } /* convert it back to a cstring */ string = text_to_cstring(text_string); string_len = strlen(string);