From a766ee8711cdeb7ccec994661d80042553a62d1a Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Mon, 23 Oct 2023 20:34:59 +0800 Subject: [PATCH] Add parallel semi join cases We have enabled them, but lack of test cases for regression. Add cases for: Parallel-aware Hash Semi Join Parallel-oblivious Hash Semi Join Parallel Merge Semi Join Parallel Nested Loop Semi Join Authored-by: Zhang Mingli avamingli@gmail.com --- src/test/regress/expected/gp_parallel.out | 101 ++++++++++++++++++++++ src/test/regress/sql/gp_parallel.sql | 29 +++++++ 2 files changed, 130 insertions(+) diff --git a/src/test/regress/expected/gp_parallel.out b/src/test/regress/expected/gp_parallel.out index b9c6db2e4a4..c4ce05995e1 100644 --- a/src/test/regress/expected/gp_parallel.out +++ b/src/test/regress/expected/gp_parallel.out @@ -2235,6 +2235,107 @@ explain(costs off) create table ctas_aoco using ao_column as select sum(a.c2) as Optimizer: Postgres query optimizer (11 rows) +abort; +-- +-- Parallel Semi Join +-- +begin; +set local optimizer=off; +set local enable_parallel=on; +set local force_parallel_mode =1 ; +set local min_parallel_table_scan_size = 0; +create table semi_t1 (c1 integer) with(parallel_workers=2) distributed randomly; +create table semi_t2 (c2 integer) with(parallel_workers=2) distributed randomly; +insert into semi_t1 values (generate_series (1,20000)); +insert into semi_t2 values (generate_series (1,10000)); +analyze semi_t1; +analyze semi_t2; +-- Parallel-aware Hash Semi Join +explain(costs off) select c1 from semi_t1 where not c1 >=all (select c2 from semi_t2 where c2 = c1); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 6:1 (slice1; segments: 6) + -> Parallel Hash Semi Join + Hash Cond: (semi_t1.c1 = semi_t2.c2) + Join Filter: (semi_t1.c1 < semi_t2.c2) + -> Redistribute Motion 6:6 (slice2; segments: 6) + Hash Key: semi_t1.c1 + Hash Module: 3 + -> Parallel Seq Scan on semi_t1 + -> Parallel Hash + -> Redistribute Motion 6:6 (slice3; segments: 6) + Hash Key: semi_t2.c2 + Hash Module: 3 + -> Parallel Seq Scan on semi_t2 + Optimizer: Postgres query optimizer +(14 rows) + +-- Parallel-oblivious Hash Semi Join +set local enable_parallel_hash = off; +explain(costs off) select c1 from semi_t1 where not c1 >=all (select c2 from semi_t2 where c2 = c1); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 6:1 (slice1; segments: 6) + -> Hash Semi Join + Hash Cond: (semi_t1.c1 = semi_t2.c2) + Join Filter: (semi_t1.c1 < semi_t2.c2) + -> Redistribute Motion 6:6 (slice2; segments: 6) + Hash Key: semi_t1.c1 + Hash Module: 3 + -> Parallel Seq Scan on semi_t1 + -> Hash + -> Redistribute Motion 3:6 (slice3; segments: 3) + Hash Key: semi_t2.c2 + Hash Module: 3 + -> Seq Scan on semi_t2 + Optimizer: Postgres query optimizer +(14 rows) + +-- Parallel Merge Semi Join +set local enable_hashjoin = off; +explain(costs off) select c1 from semi_t1 where not c1 >=all (select c2 from semi_t2 where c2 = c1); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 6:1 (slice1; segments: 6) + -> Merge Semi Join + Merge Cond: (semi_t1.c1 = semi_t2.c2) + Join Filter: (semi_t1.c1 < semi_t2.c2) + -> Sort + Sort Key: semi_t1.c1 + -> Redistribute Motion 6:6 (slice2; segments: 6) + Hash Key: semi_t1.c1 + Hash Module: 3 + -> Parallel Seq Scan on semi_t1 + -> Sort + Sort Key: semi_t2.c2 + -> Redistribute Motion 3:6 (slice3; segments: 3) + Hash Key: semi_t2.c2 + Hash Module: 3 + -> Seq Scan on semi_t2 + Optimizer: Postgres query optimizer +(17 rows) + +set local enable_mergejoin = off; +set local enable_nestloop = on; +-- Parallel Nested Loop Semi Join +explain(costs off) select c1 from semi_t1 where not c1 >=all (select c2 from semi_t2 where c2 = c1); + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 6:1 (slice1; segments: 6) + -> Nested Loop Semi Join + Join Filter: ((semi_t1.c1 < semi_t2.c2) AND (semi_t1.c1 = semi_t2.c2)) + -> Redistribute Motion 6:6 (slice2; segments: 6) + Hash Key: semi_t1.c1 + Hash Module: 3 + -> Parallel Seq Scan on semi_t1 + -> Materialize + -> Redistribute Motion 3:6 (slice3; segments: 3) + Hash Key: semi_t2.c2 + Hash Module: 3 + -> Seq Scan on semi_t2 + Optimizer: Postgres query optimizer +(13 rows) + abort; -- start_ignore drop schema test_parallel cascade; diff --git a/src/test/regress/sql/gp_parallel.sql b/src/test/regress/sql/gp_parallel.sql index f7d319058a4..ec67077f7a5 100644 --- a/src/test/regress/sql/gp_parallel.sql +++ b/src/test/regress/sql/gp_parallel.sql @@ -708,6 +708,35 @@ explain(costs off) create table ctas_ao using ao_row as select sum(a.c2) as c2, explain(costs off) create table ctas_aoco using ao_column as select sum(a.c2) as c2, avg(b.c1) as c1 from t_p2 a join t_p2 b on a.c1 = b.c1 distributed by(c2); abort; +-- +-- Parallel Semi Join +-- +begin; +set local optimizer=off; +set local enable_parallel=on; +set local force_parallel_mode =1 ; +set local min_parallel_table_scan_size = 0; +create table semi_t1 (c1 integer) with(parallel_workers=2) distributed randomly; +create table semi_t2 (c2 integer) with(parallel_workers=2) distributed randomly; +insert into semi_t1 values (generate_series (1,20000)); +insert into semi_t2 values (generate_series (1,10000)); +analyze semi_t1; +analyze semi_t2; + +-- Parallel-aware Hash Semi Join +explain(costs off) select c1 from semi_t1 where not c1 >=all (select c2 from semi_t2 where c2 = c1); +-- Parallel-oblivious Hash Semi Join +set local enable_parallel_hash = off; +explain(costs off) select c1 from semi_t1 where not c1 >=all (select c2 from semi_t2 where c2 = c1); +-- Parallel Merge Semi Join +set local enable_hashjoin = off; +explain(costs off) select c1 from semi_t1 where not c1 >=all (select c2 from semi_t2 where c2 = c1); +set local enable_mergejoin = off; +set local enable_nestloop = on; +-- Parallel Nested Loop Semi Join +explain(costs off) select c1 from semi_t1 where not c1 >=all (select c2 from semi_t2 where c2 = c1); +abort; + -- start_ignore drop schema test_parallel cascade; -- end_ignore