From 74aa5c7917e6f837d4ea27e35c2c22fd93a83a3d Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Thu, 10 Aug 2023 13:20:42 +0800 Subject: [PATCH] Loosen the restriction outer path has Motion of parallel plan We drop all outer paths who have Motion of parallel plan to avoid deadlock when mixing parallel-aware hashjoin with parallel-oblivious paths. And we fix it to enable_parallel, which is stricter than needed. It's possible to keep such a path when enable_parallel is on and enable_parallel_hash is off. Because we can make sure that there is no parallel-aware hashjoin, and of course, no deadlock issues like above. By loosening the restriction to enable_parallel_hash, such parallel -oblivious plan would be possible. explain(costs off) select * from t1 right join t2 on t1.b = t2.a; QUERY PLAN ------------------------------------------------------------------ Gather Motion 6:1 (slice1; segments: 6) -> Hash Left Join Hash Cond: (t2.a = t1.b) -> Redistribute Motion 6:6 (slice2; segments: 6) Hash Key: t2.a Hash Module: 3 -> Parallel Seq Scan on t2 -> Hash -> Redistribute Motion 3:6 (slice3; segments: 3) Hash Key: t1.b Hash Module: 3 -> Seq Scan on t1 Optimizer: Postgres query optimizer (13 rows) Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/optimizer/util/pathnode.c | 7 +++-- src/test/regress/expected/gp_parallel.out | 33 +++++++++++++++++++++++ src/test/regress/sql/gp_parallel.sql | 17 ++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index d8050576bf8..709d2496015 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -4483,9 +4483,12 @@ create_hashjoin_path(PlannerInfo *root, /* * For parallel hash, it is motionHazard. If there are parallel hash join on outside child, * not use parallel hash. - * CBDB_PARALLEL_FIXME: At least, should not have impact on non-parallel path generation. + * CBDB_PARALLEL_FIXME: + * At least, should not have impact on non-parallel path generation and when there are no + * parallel-aware paths. */ - if (enable_parallel && outer_path->barrierHazard && !parallel_hash) + if (enable_parallel && enable_parallel_hash && + outer_path->barrierHazard && !parallel_hash) return NULL; if (parallel_hash && outer_path->barrierHazard) diff --git a/src/test/regress/expected/gp_parallel.out b/src/test/regress/expected/gp_parallel.out index 23bba396d3d..af9aa5f0b8b 100644 --- a/src/test/regress/expected/gp_parallel.out +++ b/src/test/regress/expected/gp_parallel.out @@ -1847,6 +1847,39 @@ abort; -- -- End of Test locus after eliding mtion node. -- +-- +-- Test outer path has Motion of parallel plan. +-- +begin; +create table t1(a int, b int) with(parallel_workers=3); +create table t2(b int, a int) with(parallel_workers=2); +insert into t1 select i, i+1 from generate_series(1, 10) i; +insert into t2 select i, i+1 from generate_series(1, 5) i; +analyze t1; +analyze t2; +set local optimizer=off; +set local enable_parallel=on; +set local enable_parallel_hash=off; +set local max_parallel_workers_per_gather= 4; +explain(costs off) select * from t1 right join t2 on t1.b = t2.a; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 6:1 (slice1; segments: 6) + -> Hash Left Join + Hash Cond: (t2.a = t1.b) + -> Redistribute Motion 6:6 (slice2; segments: 6) + Hash Key: t2.a + Hash Module: 3 + -> Parallel Seq Scan on t2 + -> Hash + -> Redistribute Motion 3:6 (slice3; segments: 3) + Hash Key: t1.b + Hash Module: 3 + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(13 rows) + +abort; -- start_ignore drop schema test_parallel cascade; -- end_ignore diff --git a/src/test/regress/sql/gp_parallel.sql b/src/test/regress/sql/gp_parallel.sql index 6feaeaa4117..39cae6465c2 100644 --- a/src/test/regress/sql/gp_parallel.sql +++ b/src/test/regress/sql/gp_parallel.sql @@ -561,6 +561,23 @@ abort; -- End of Test locus after eliding mtion node. -- +-- +-- Test outer path has Motion of parallel plan. +-- +begin; +create table t1(a int, b int) with(parallel_workers=3); +create table t2(b int, a int) with(parallel_workers=2); +insert into t1 select i, i+1 from generate_series(1, 10) i; +insert into t2 select i, i+1 from generate_series(1, 5) i; +analyze t1; +analyze t2; +set local optimizer=off; +set local enable_parallel=on; +set local enable_parallel_hash=off; +set local max_parallel_workers_per_gather= 4; +explain(costs off) select * from t1 right join t2 on t1.b = t2.a; +abort; + -- start_ignore drop schema test_parallel cascade; -- end_ignore