From 80b77cb378a2a96a3daffe32365822752a8349b3 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sun, 10 Dec 2023 23:08:35 -0800 Subject: [PATCH] Fix issue 1219 - MERGE not seeing previous clause var (#1441) Fixed issue 1219 where MERGE did not see the previous clause's variable. This description is a bit misleading as the transform phase did see the variable and was able to use it. However, the planner phase removed the variable by replacing it with a NULL Const. This caused MERGE to see a NULL Const for the previous tuple, generating incorrect results. However, this only occurred for very specific cases. Fx: MATCH (x) MERGE (y {id: id(x)}) -- worked MATCH (x) MERGE (y {id: id(x)}) RETURN y -- didn't MATCH (x) MERGE (y {id: id(x)}) RETURN x, y -- worked The change impacted no current regression tests and involved wrapping all explicitly defined variables' target entries with a volatile wrapper. Added new regression tests. --- regress/expected/cypher_merge.out | 110 ++++++++++++++++++++++ regress/sql/cypher_merge.sql | 30 ++++++ src/backend/nodes/cypher_outfuncs.c | 4 + src/backend/optimizer/cypher_createplan.c | 10 +- src/backend/optimizer/cypher_pathnode.c | 2 +- src/backend/parser/cypher_clause.c | 68 ++++++++++++- src/backend/parser/cypher_gram.y | 5 + src/include/nodes/cypher_nodes.h | 3 + 8 files changed, 222 insertions(+), 10 deletions(-) diff --git a/regress/expected/cypher_merge.out b/regress/expected/cypher_merge.out index 2d6c2ef08..7427c8e71 100644 --- a/regress/expected/cypher_merge.out +++ b/regress/expected/cypher_merge.out @@ -1072,6 +1072,116 @@ SELECT * FROM cypher('cypher_merge', $$ MERGE p=(x:r)-[y:E]->(p)-[x]->(y) $$) AS ERROR: variable "p" is for a path LINE 1: ...M cypher('cypher_merge', $$ MERGE p=(x:r)-[y:E]->(p)-[x]->(y... ^ +-- issue 1219 +SELECT * FROM create_graph('issue_1219'); +NOTICE: graph "issue_1219" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('issue_1219', $$ CREATE (x:Label1{arr:[1,2,3,4]}) RETURN x $$) as (a agtype); + a +----------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Label1", "properties": {"arr": [1, 2, 3, 4]}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1219', $$ + MATCH (x:Label1{arr:[1,2,3,4]}) MERGE (y:Label2{key1:2, key2:x.arr, key3:3}) RETURN y +$$) as (result agtype); + result +----------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "Label2", "properties": {"key1": 2, "key2": [1, 2, 3, 4], "key3": 3}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1219', $$ + MATCH (x:Label1{arr:[1,2,3,4]}) MERGE (y:Label2{key2:id(x)}) RETURN y +$$) as (result agtype); + result +---------------------------------------------------------------------------------------------- + {"id": 1125899906842626, "label": "Label2", "properties": {"key2": 844424930131969}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1219', $$ MATCH (x) RETURN x $$) as (a agtype); + a +----------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Label1", "properties": {"arr": [1, 2, 3, 4]}}::vertex + {"id": 1125899906842625, "label": "Label2", "properties": {"key1": 2, "key2": [1, 2, 3, 4], "key3": 3}}::vertex + {"id": 1125899906842626, "label": "Label2", "properties": {"key2": 844424930131969}}::vertex +(3 rows) + +-- these shouldn't create more +SELECT * FROM cypher('issue_1219', $$ + MATCH (x:Label1{arr:[1,2,3,4]}) MERGE (y:Label2{key1:2, key2:x.arr, key3:3}) RETURN y +$$) as (result agtype); + result +----------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "Label2", "properties": {"key1": 2, "key2": [1, 2, 3, 4], "key3": 3}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1219', $$ + MATCH (x:Label1{arr:[1,2,3,4]}) MERGE (y:Label2{key2:id(x)}) RETURN y +$$) as (result agtype); + result +---------------------------------------------------------------------------------------------- + {"id": 1125899906842626, "label": "Label2", "properties": {"key2": 844424930131969}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1219', $$ MATCH (x) RETURN x $$) as (a agtype); + a +----------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Label1", "properties": {"arr": [1, 2, 3, 4]}}::vertex + {"id": 1125899906842625, "label": "Label2", "properties": {"key1": 2, "key2": [1, 2, 3, 4], "key3": 3}}::vertex + {"id": 1125899906842626, "label": "Label2", "properties": {"key2": 844424930131969}}::vertex +(3 rows) + +-- create a path +SELECT * FROM cypher('issue_1219', $$ + CREATE (u:Label1{name: "John"})-[e:knows]->(v:Label1{name: "Jane"}) +$$) as (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('issue_1219', $$ + MATCH (u:Label1{name:"John"})-[e:knows]->(v:Label1{name: "Jane"}) + MERGE (y:Label2{start_id:id(u), edge_id:id(e), end_id:id(v)}) RETURN y +$$) as (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842627, "label": "Label2", "properties": {"end_id": 844424930131971, "edge_id": 1407374883553281, "start_id": 844424930131970}}::vertex +(1 row) + +SELECT * FROM cypher('issue_1219', $$ MATCH (x) RETURN x $$) as (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Label1", "properties": {"arr": [1, 2, 3, 4]}}::vertex + {"id": 844424930131970, "label": "Label1", "properties": {"name": "John"}}::vertex + {"id": 844424930131971, "label": "Label1", "properties": {"name": "Jane"}}::vertex + {"id": 1125899906842625, "label": "Label2", "properties": {"key1": 2, "key2": [1, 2, 3, 4], "key3": 3}}::vertex + {"id": 1125899906842626, "label": "Label2", "properties": {"key2": 844424930131969}}::vertex + {"id": 1125899906842627, "label": "Label2", "properties": {"end_id": 844424930131971, "edge_id": 1407374883553281, "start_id": 844424930131970}}::vertex +(6 rows) + +SELECT * FROM cypher('issue_1219', $$ MATCH ()-[e]->() RETURN e $$) as (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------- + {"id": 1407374883553281, "label": "knows", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge +(1 row) + +SELECT drop_graph('issue_1219', true); +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table issue_1219._ag_label_vertex +drop cascades to table issue_1219._ag_label_edge +drop cascades to table issue_1219."Label1" +drop cascades to table issue_1219."Label2" +drop cascades to table issue_1219.knows +NOTICE: graph "issue_1219" has been dropped + drop_graph +------------ + +(1 row) + --clean up SELECT * FROM cypher('cypher_merge', $$MATCH (n) DETACH DELETE n $$) AS (a agtype); a diff --git a/regress/sql/cypher_merge.sql b/regress/sql/cypher_merge.sql index 9d2a11d67..81d965a22 100644 --- a/regress/sql/cypher_merge.sql +++ b/regress/sql/cypher_merge.sql @@ -524,6 +524,36 @@ SELECT * FROM cypher('cypher_merge', $$ MERGE p=(x:r)-[y:E]->(x)-[p]->(x) $$) AS SELECT * FROM cypher('cypher_merge', $$ MERGE p=(x:r)-[y:E]->(x)-[p:E]->(x) $$) AS (x agtype); SELECT * FROM cypher('cypher_merge', $$ MERGE p=(x:r)-[y:E]->(p)-[x]->(y) $$) AS (x agtype); +-- issue 1219 +SELECT * FROM create_graph('issue_1219'); +SELECT * FROM cypher('issue_1219', $$ CREATE (x:Label1{arr:[1,2,3,4]}) RETURN x $$) as (a agtype); +SELECT * FROM cypher('issue_1219', $$ + MATCH (x:Label1{arr:[1,2,3,4]}) MERGE (y:Label2{key1:2, key2:x.arr, key3:3}) RETURN y +$$) as (result agtype); +SELECT * FROM cypher('issue_1219', $$ + MATCH (x:Label1{arr:[1,2,3,4]}) MERGE (y:Label2{key2:id(x)}) RETURN y +$$) as (result agtype); +SELECT * FROM cypher('issue_1219', $$ MATCH (x) RETURN x $$) as (a agtype); +-- these shouldn't create more +SELECT * FROM cypher('issue_1219', $$ + MATCH (x:Label1{arr:[1,2,3,4]}) MERGE (y:Label2{key1:2, key2:x.arr, key3:3}) RETURN y +$$) as (result agtype); +SELECT * FROM cypher('issue_1219', $$ + MATCH (x:Label1{arr:[1,2,3,4]}) MERGE (y:Label2{key2:id(x)}) RETURN y +$$) as (result agtype); +SELECT * FROM cypher('issue_1219', $$ MATCH (x) RETURN x $$) as (a agtype); +-- create a path +SELECT * FROM cypher('issue_1219', $$ + CREATE (u:Label1{name: "John"})-[e:knows]->(v:Label1{name: "Jane"}) +$$) as (result agtype); +SELECT * FROM cypher('issue_1219', $$ + MATCH (u:Label1{name:"John"})-[e:knows]->(v:Label1{name: "Jane"}) + MERGE (y:Label2{start_id:id(u), edge_id:id(e), end_id:id(v)}) RETURN y +$$) as (result agtype); +SELECT * FROM cypher('issue_1219', $$ MATCH (x) RETURN x $$) as (result agtype); +SELECT * FROM cypher('issue_1219', $$ MATCH ()-[e]->() RETURN e $$) as (result agtype); +SELECT drop_graph('issue_1219', true); + --clean up SELECT * FROM cypher('cypher_merge', $$MATCH (n) DETACH DELETE n $$) AS (a agtype); diff --git a/src/backend/nodes/cypher_outfuncs.c b/src/backend/nodes/cypher_outfuncs.c index 8ca1df0e4..ca9baafd7 100644 --- a/src/backend/nodes/cypher_outfuncs.c +++ b/src/backend/nodes/cypher_outfuncs.c @@ -194,6 +194,8 @@ void out_cypher_path(StringInfo str, const ExtensibleNode *node) DEFINE_AG_NODE(cypher_path); WRITE_NODE_FIELD(path); + WRITE_STRING_FIELD(var_name); + WRITE_STRING_FIELD(parsed_var_name); WRITE_LOCATION_FIELD(location); } @@ -203,6 +205,7 @@ void out_cypher_node(StringInfo str, const ExtensibleNode *node) DEFINE_AG_NODE(cypher_node); WRITE_STRING_FIELD(name); + WRITE_STRING_FIELD(parsed_name); WRITE_STRING_FIELD(label); WRITE_STRING_FIELD(parsed_label); WRITE_NODE_FIELD(props); @@ -215,6 +218,7 @@ void out_cypher_relationship(StringInfo str, const ExtensibleNode *node) DEFINE_AG_NODE(cypher_relationship); WRITE_STRING_FIELD(name); + WRITE_STRING_FIELD(parsed_name); WRITE_STRING_FIELD(label); WRITE_STRING_FIELD(parsed_label); WRITE_NODE_FIELD(props); diff --git a/src/backend/optimizer/cypher_createplan.c b/src/backend/optimizer/cypher_createplan.c index 9e0863423..414469859 100644 --- a/src/backend/optimizer/cypher_createplan.c +++ b/src/backend/optimizer/cypher_createplan.c @@ -125,7 +125,7 @@ Plan *plan_cypher_set_path(PlannerInfo *root, RelOptInfo *rel, } /* - * Coverts the Scan node representing the delete clause + * Coverts the Scan node representing the DELETE clause * to the delete Plan node */ Plan *plan_cypher_delete_path(PlannerInfo *root, RelOptInfo *rel, @@ -171,7 +171,7 @@ Plan *plan_cypher_delete_path(PlannerInfo *root, RelOptInfo *rel, // child plan nodes are here, Postgres processed them for us. cs->custom_plans = custom_plans; cs->custom_exprs = NIL; - // transfer delete metadata needed by the delete clause. + // transfer delete metadata needed by the DELETE clause. cs->custom_private = best_path->custom_private; /* * the scan list of the delete node's children, used for ScanTupleSlot @@ -186,7 +186,7 @@ Plan *plan_cypher_delete_path(PlannerInfo *root, RelOptInfo *rel, } /* - * Coverts the Scan node representing the delete clause + * Coverts the Scan node representing the MERGE clause * to the merge Plan node */ Plan *plan_cypher_merge_path(PlannerInfo *root, RelOptInfo *rel, @@ -209,7 +209,7 @@ Plan *plan_cypher_merge_path(PlannerInfo *root, RelOptInfo *rel, cs->scan.plan.plan_node_id = 0; // Set later in set_plan_refs /* - * the scan list of the delete node, used for its ScanTupleSlot used + * the scan list of the merge node, used for its ScanTupleSlot used * by its parent in the execution phase. */ cs->scan.plan.targetlist = tlist; @@ -232,7 +232,7 @@ Plan *plan_cypher_merge_path(PlannerInfo *root, RelOptInfo *rel, // child plan nodes are here, Postgres processed them for us. cs->custom_plans = custom_plans; cs->custom_exprs = NIL; - // transfer delete metadata needed by the delete clause. + // transfer delete metadata needed by the MERGE clause. cs->custom_private = best_path->custom_private; /* * the scan list of the merge node's children, used for ScanTupleSlot diff --git a/src/backend/optimizer/cypher_pathnode.c b/src/backend/optimizer/cypher_pathnode.c index 4e04b752c..50fb93706 100644 --- a/src/backend/optimizer/cypher_pathnode.c +++ b/src/backend/optimizer/cypher_pathnode.c @@ -152,7 +152,7 @@ CustomPath *create_cypher_delete_path(PlannerInfo *root, RelOptInfo *rel, } /* - * Creates a Delete Path. Makes the original path a child of the new + * Creates a merge path. Makes the original path a child of the new * path. We leave it to the caller to replace the pathlist of the rel. */ CustomPath *create_cypher_merge_path(PlannerInfo *root, RelOptInfo *rel, diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index f3b99209b..90f5547bf 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -269,7 +269,8 @@ static Node *transform_clause_for_join(cypher_parsestate *cpstate, Alias* alias); static cypher_clause *convert_merge_to_match(cypher_merge *merge); static void -transform_cypher_merge_mark_tuple_position(List *target_list, +transform_cypher_merge_mark_tuple_position(cypher_parsestate *cpstate, + List *target_list, cypher_create_path *path); static cypher_target_node *get_referenced_variable(ParseState *pstate, Node *node, @@ -6194,7 +6195,7 @@ static Query *transform_cypher_merge(cypher_parsestate *cpstate, * For the metadata need to create paths, find the tuple position that * will represent the entity in the execution phase. */ - transform_cypher_merge_mark_tuple_position(query->targetList, + transform_cypher_merge_mark_tuple_position(cpstate, query->targetList, merge_path); } @@ -6350,7 +6351,7 @@ transform_merge_make_lateral_join(cypher_parsestate *cpstate, Query *query, * For the metadata need to create paths, find the tuple position that * will represent the entity in the execution phase. */ - transform_cypher_merge_mark_tuple_position(query->targetList, + transform_cypher_merge_mark_tuple_position(cpstate, query->targetList, merge_path); return merge_path; @@ -6362,7 +6363,8 @@ transform_merge_make_lateral_join(cypher_parsestate *cpstate, Query *query, * function to keep the optimizer from removing the TargetEntry. */ static void -transform_cypher_merge_mark_tuple_position(List *target_list, +transform_cypher_merge_mark_tuple_position(cypher_parsestate *cpstate, + List *target_list, cypher_create_path *path) { ListCell *lc = NULL; @@ -6398,6 +6400,64 @@ transform_cypher_merge_mark_tuple_position(List *target_list, // Mark the tuple position the target_node is for. node->tuple_position = te->resno; } + + /* Iterate through the entities wrapping Var nodes with the volatile + * wrapper, if not already done. + * + * NOTE: add_volatile_wrapper function will not wrap itself so the following + * is safe to do. + * + * TODO: This ideally needs to be rewritten using a walker, to be more + * selective. However, walkers are tricky and take time to set up. For + * now, we brute force it. It is already restricted to explicitly + * named variables. + * + * TODO: We need to understand why add_volatile_wrapper is needed. Meaning, + * we need to understand why variables are removed by the function + * remove_unused_subquery_outputs. It "appears" that some linkage may + * not be set up properly, not allowing the PG logic to see that a + * variable is used from a previous clause. Right now, the volatile + * wrapper will suffice, but it is still a hack imho. + * + * TODO: There may be other locations where something similar may need to be + * done. This needs to be researched. + */ + foreach (lc, cpstate->entities) + { + transform_entity *te = lfirst(lc); + Node *node = (Node*) te->entity.node; + char *name = NULL; + + if (is_ag_node(node, cypher_node)) + { + name = te->entity.node->parsed_name; + } + else if (is_ag_node(node, cypher_relationship)) + { + name = te->entity.rel->parsed_name; + } + else if (is_ag_node(node, cypher_path)) + { + name = te->entity.path->parsed_var_name; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("unexpected transform_entity entity type"))); + } + + /* node needs to have a parsed_name, meaning a name from the query */ + if (name != NULL) + { + TargetEntry *tle = findTarget(target_list, name); + + if (tle != NULL && IsA(tle->expr, Var)) + { + tle->expr = add_volatile_wrapper(tle->expr); + } + } + } } /* diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index 26ea970db..9afe8fb00 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -1206,6 +1206,7 @@ path: p = (cypher_path *)$3; p->var_name = $1; + p->parsed_var_name = $1; p->location = @1; $$ = (Node *)p; @@ -1221,6 +1222,7 @@ anonymous_path: n = make_ag_node(cypher_path); n->path = $1; n->var_name = NULL; + n->parsed_var_name = NULL; n->location = @1; $$ = (Node *)n; @@ -1271,6 +1273,7 @@ path_node: n = make_ag_node(cypher_node); n->name = $2; + n->parsed_name = $2; n->label = $3; n->parsed_label = $3; n->props = $4; @@ -1317,6 +1320,7 @@ path_relationship_body: n = make_ag_node(cypher_relationship); n->name = $2; + n->parsed_name = $2; n->label = $3; n->parsed_label = $3; n->varlen = $4; @@ -1331,6 +1335,7 @@ path_relationship_body: n = make_ag_node(cypher_relationship); n->name = NULL; + n->parsed_name = NULL; n->label = NULL; n->parsed_label = NULL; n->varlen = NULL; diff --git a/src/include/nodes/cypher_nodes.h b/src/include/nodes/cypher_nodes.h index 9fbf27b67..b1aeb78b3 100644 --- a/src/include/nodes/cypher_nodes.h +++ b/src/include/nodes/cypher_nodes.h @@ -133,6 +133,7 @@ typedef struct cypher_path ExtensibleNode extensible; List *path; // [ node ( , relationship , node , ... ) ] char *var_name; + char *parsed_var_name; int location; } cypher_path; @@ -141,6 +142,7 @@ typedef struct cypher_node { ExtensibleNode extensible; char *name; + char *parsed_name; char *label; char *parsed_label; Node *props; // map or parameter @@ -159,6 +161,7 @@ typedef struct cypher_relationship { ExtensibleNode extensible; char *name; + char *parsed_name; char *label; char *parsed_label; Node *props; // map or parameter