From 81f0ee275bb83882773306fc893a610c52e3c01a Mon Sep 17 00:00:00 2001 From: Leonid Borchuk Date: Sun, 2 Feb 2025 00:29:03 +0300 Subject: [PATCH] Do not call gporca for simple queries --- src/backend/optimizer/plan/planner.c | 45 +++++++++++++- src/backend/utils/misc/guc_gp.c | 12 ++++ src/include/utils/guc.h | 1 + src/include/utils/unsync_guc_name.h | 1 + src/test/regress/expected/gporca.out | 61 ++++++++++++++++++ .../regress/expected/gporca_optimizer.out | 62 +++++++++++++++++++ src/test/regress/sql/gporca.sql | 10 +++ 7 files changed, 191 insertions(+), 1 deletion(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 16efdb2ed80..04d9235a27d 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -183,6 +183,7 @@ static List *preprocess_groupclause(PlannerInfo *root, List *force); static List *extract_rollup_sets(List *groupingSets); static List *reorder_grouping_sets(List *groupingSets, List *sortclause); static void standard_qp_callback(PlannerInfo *root, void *extra); +bool enabled_for_optimizer(Query *parse); static double get_number_of_groups(PlannerInfo *root, double path_rows, grouping_sets_data *gd, @@ -342,6 +343,47 @@ planner(Query *parse, const char *query_string, int cursorOptions, return result; } +/* Check if query too simple to use optimizer */ +bool enabled_for_optimizer(Query *parse) +{ + int num_relations = 0; + ListCell *l; + + if (optimizer_relations_threshold == 0) + return true; + + if (parse->hasAggs || parse->hasWindowFuncs || parse->hasSubLinks || parse->hasRecursive || parse->hasDistinctOn || parse->cteList || parse->hasModifyingCTE) + return true; + + foreach(l, parse->rtable) + { + RangeTblEntry *rte = lfirst_node(RangeTblEntry, l); + + switch (rte->rtekind) + { + case RTE_RELATION: + num_relations++; + break; + case RTE_JOIN: + // do not count joins + break; + case RTE_RESULT: + break; + default: + /* No work here for other RTE types */ + break; + } + + if (rte->lateral) + return true; + + if (num_relations > optimizer_relations_threshold) + return true; + } + + return false; +} + PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams) @@ -373,11 +415,12 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, * * PARALLEL RETRIEVE CURSOR is not supported by ORCA yet. */ + if (optimizer && GP_ROLE_DISPATCH == Gp_role && IS_QUERY_DISPATCHER() && (cursorOptions & CURSOR_OPT_SKIP_FOREIGN_PARTITIONS) == 0 && - (cursorOptions & CURSOR_OPT_PARALLEL_RETRIEVE) == 0) + (cursorOptions & CURSOR_OPT_PARALLEL_RETRIEVE) == 0 && enabled_for_optimizer(parse)) { #ifdef USE_ORCA diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index b50f55d1024..313f9ede259 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -380,6 +380,7 @@ bool optimizer_enable_derive_stats_all_groups; int optimizer_segments; int optimizer_penalize_broadcast_threshold; double optimizer_cost_threshold; +int optimizer_relations_threshold; double optimizer_nestloop_factor; double optimizer_sort_factor; double optimizer_spilling_mem_threshold; @@ -4363,6 +4364,17 @@ struct config_int ConfigureNamesInt_gp[] = NULL, NULL, NULL }, + { + {"optimizer_relations_threshold", PGC_USERSET, DEVELOPER_OPTIONS, + gettext_noop("Minimal number of relations in a query to use gporca for planning, 0 means always try to use gporca"), + NULL, + GUC_NOT_IN_SAMPLE + }, + &optimizer_relations_threshold, + 0, 0, INT_MAX, + NULL, NULL, NULL + }, + { {"memory_profiler_dataset_size", PGC_USERSET, DEVELOPER_OPTIONS, gettext_noop("Set the size in GB"), diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index a6d3e9e3c84..cf66c20a282 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -559,6 +559,7 @@ extern bool optimizer_enable_derive_stats_all_groups; extern int optimizer_segments; extern int optimizer_penalize_broadcast_threshold; extern double optimizer_cost_threshold; +extern int optimizer_relations_threshold; extern double optimizer_nestloop_factor; extern double optimizer_sort_factor; extern double optimizer_spilling_mem_threshold; diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 6f8e02e6f0c..8d00b464ceb 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -379,6 +379,7 @@ "optimizer_control", "optimizer_cost_model", "optimizer_cost_threshold", + "optimizer_relations_threshold", "optimizer_cte_inlining", "optimizer_cte_inlining_bound", "optimizer_damping_factor_filter", diff --git a/src/test/regress/expected/gporca.out b/src/test/regress/expected/gporca.out index 0d83a50c178..c7c6d5e0479 100644 --- a/src/test/regress/expected/gporca.out +++ b/src/test/regress/expected/gporca.out @@ -14933,3 +14933,64 @@ SELECT CAST(a AS TEXT[]) FROM array_coerceviaio; (1 row) --------------------------------------------------------------------------------- +-- Test do not use ORCA when optimizer_relations_threshold is set +create table ort(a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +explain insert into ort values(1); + QUERY PLAN +------------------------------------------------- + Insert on ort (cost=0.00..0.03 rows=0 width=0) + -> Result (cost=0.00..0.01 rows=1 width=4) + Optimizer: Postgres query optimizer +(3 rows) + +set optimizer_relations_threshold = 1; +explain insert into ort values(1); + QUERY PLAN +------------------------------------------------- + Insert on ort (cost=0.00..0.03 rows=0 width=0) + -> Result (cost=0.00..0.01 rows=1 width=4) + Optimizer: Postgres query optimizer +(3 rows) + +explain select * from ort a join ort b on a.a = b.a; + QUERY PLAN +----------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=756.25..284554.25 rows=9273690 width=8) + -> Hash Join (cost=756.25..160905.05 rows=3091230 width=8) + Hash Cond: (a.a = b.a) + -> Seq Scan on ort a (cost=0.00..355.00 rows=32100 width=4) + -> Hash (cost=355.00..355.00 rows=32100 width=4) + -> Seq Scan on ort b (cost=0.00..355.00 rows=32100 width=4) + Optimizer: Postgres query optimizer +(7 rows) + +set optimizer_relations_threshold = 2; +explain select count(a.a) from ort a join ort b on a.a = b.a; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=168633.18..168633.19 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=168633.12..168633.18 rows=3 width=8) + -> Partial Aggregate (cost=168633.12..168633.14 rows=1 width=8) + -> Hash Join (cost=756.25..160905.05 rows=3091230 width=4) + Hash Cond: (a.a = b.a) + -> Seq Scan on ort a (cost=0.00..355.00 rows=32100 width=4) + -> Hash (cost=355.00..355.00 rows=32100 width=4) + -> Seq Scan on ort b (cost=0.00..355.00 rows=32100 width=4) + Optimizer: Postgres query optimizer +(9 rows) + +explain select * from ort a join ort b on a.a = b.a; + QUERY PLAN +----------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=756.25..284554.25 rows=9273690 width=8) + -> Hash Join (cost=756.25..160905.05 rows=3091230 width=8) + Hash Cond: (a.a = b.a) + -> Seq Scan on ort a (cost=0.00..355.00 rows=32100 width=4) + -> Hash (cost=355.00..355.00 rows=32100 width=4) + -> Seq Scan on ort b (cost=0.00..355.00 rows=32100 width=4) + Optimizer: Postgres query optimizer +(7 rows) + +drop table ort; diff --git a/src/test/regress/expected/gporca_optimizer.out b/src/test/regress/expected/gporca_optimizer.out index 6eb308c4164..9337befb698 100644 --- a/src/test/regress/expected/gporca_optimizer.out +++ b/src/test/regress/expected/gporca_optimizer.out @@ -15004,3 +15004,65 @@ SELECT CAST(a AS TEXT[]) FROM array_coerceviaio; (1 row) --------------------------------------------------------------------------------- +-- Test do not use ORCA when optimizer_relations_threshold is set +create table ort(a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +explain insert into ort values(1); + QUERY PLAN +------------------------------------------------------------ + Insert on ort (cost=0.00..0.01 rows=1 width=4) + -> Result (cost=0.00..0.00 rows=1 width=8) + -> Result (cost=0.00..0.00 rows=1 width=4) + -> Result (cost=0.00..0.00 rows=1 width=1) + Optimizer: Pivotal Optimizer (GPORCA) +(5 rows) + +set optimizer_relations_threshold = 1; +explain insert into ort values(1); + QUERY PLAN +------------------------------------------------- + Insert on ort (cost=0.00..0.03 rows=0 width=0) + -> Result (cost=0.00..0.01 rows=1 width=4) + Optimizer: Postgres query optimizer +(3 rows) + +explain select * from ort a join ort b on a.a = b.a; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=8) + -> Hash Join (cost=0.00..862.00 rows=1 width=8) + Hash Cond: (ort.a = ort_1.a) + -> Seq Scan on ort (cost=0.00..431.00 rows=1 width=4) + -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Seq Scan on ort ort_1 (cost=0.00..431.00 rows=1 width=4) + Optimizer: Pivotal Optimizer (GPORCA) +(7 rows) + +set optimizer_relations_threshold = 2; +explain select count(a.a) from ort a join ort b on a.a = b.a; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..862.00 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=4) + -> Hash Join (cost=0.00..862.00 rows=1 width=4) + Hash Cond: (ort.a = ort_1.a) + -> Seq Scan on ort (cost=0.00..431.00 rows=1 width=4) + -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Seq Scan on ort ort_1 (cost=0.00..431.00 rows=1 width=4) + Optimizer: Pivotal Optimizer (GPORCA) +(8 rows) + +explain select * from ort a join ort b on a.a = b.a; + QUERY PLAN +----------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=756.25..284554.25 rows=9273690 width=8) + -> Hash Join (cost=756.25..160905.05 rows=3091230 width=8) + Hash Cond: (a.a = b.a) + -> Seq Scan on ort a (cost=0.00..355.00 rows=32100 width=4) + -> Hash (cost=355.00..355.00 rows=32100 width=4) + -> Seq Scan on ort b (cost=0.00..355.00 rows=32100 width=4) + Optimizer: Postgres query optimizer +(7 rows) + +drop table ort; diff --git a/src/test/regress/sql/gporca.sql b/src/test/regress/sql/gporca.sql index a659d5695c6..fb72970dd6f 100644 --- a/src/test/regress/sql/gporca.sql +++ b/src/test/regress/sql/gporca.sql @@ -3706,6 +3706,16 @@ INSERT INTO array_coerceviaio values(ARRAY[1, 2, 3]); EXPLAIN SELECT CAST(a AS TEXT[]) FROM array_coerceviaio; SELECT CAST(a AS TEXT[]) FROM array_coerceviaio; --------------------------------------------------------------------------------- +-- Test do not use ORCA when optimizer_relations_threshold is set +create table ort(a int); +explain insert into ort values(1); +set optimizer_relations_threshold = 1; +explain insert into ort values(1); +explain select * from ort a join ort b on a.a = b.a; +set optimizer_relations_threshold = 2; +explain select count(a.a) from ort a join ort b on a.a = b.a; +explain select * from ort a join ort b on a.a = b.a; +drop table ort; -- start_ignore DROP SCHEMA orca CASCADE;