From bab5d6498b5be58e778af83181521b0118dda20c Mon Sep 17 00:00:00 2001 From: GongXun Date: Fri, 20 Jun 2025 11:46:14 +0800 Subject: [PATCH] perf: Improve hash agg performance in large-scale data situations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the TPC-DS tests, we observed an issue where CloudberryDB (CBDB) performed worse than Greenplum (GPDB) when the query plan generated a multi-stage aggregation (e.g., TPC-DS query 04). The phenomenon showed that the deduplication effect of CBDB’s **Streaming Partial HashAggregate** was significantly worse. As a result, the **Finalize HashAggregate** operator in CBDB processed significantly more data compared to GPDB under the same dataset. Example plan from CBDB: Gather Motion 32:1 (slice1; segments: 32) (cost=0.00..19988.81 rows=1800000 width=76) -> Finalize HashAggregate (cost=0.00..19663.99 rows=56250 width=81) Group Key: customer_gp.c_customer_id, customer_gp.c_first_name, customer_gp.c_last_name, customer_gp.c_preferred_cust_flag, customer_gp.c_birth_country, customer_gp.c_login, customer_gp.c_email_address, date_dim_gp.d_year -> Redistribute Motion 32:32 (slice2; segments: 32) (cost=0.00..19603.35 rows=56250 width=81) Hash Key: customer_gp.c_customer_id, customer_gp.c_first_name, customer_gp.c_last_name, customer_gp.c_preferred_cust_flag, customer_gp.c_birth_country, customer_gp.c_login, customer_gp.c_email_address, date_dim_gp.d_year -> Streaming Partial HashAggregate (cost=0.00..19589.09 rows=56250 width=81) Group Key: customer_gp.c_customer_id, customer_gp.c_first_name, customer_gp.c_last_name, customer_gp.c_preferred_cust_flag, customer_gp.c_birth_country, customer_gp.c_login, customer_gp.c_email_address, date_dim_gp.d_year -> Hash Join (cost=0.00..12346.24 rows=6935137 width=95) Hash Cond: (store_sales_gp.ss_customer_sk = customer_gp.c_customer_sk) ... ... Upon further investigation, we found that the **`NumericAggState`** structure in CloudberryDB contained two additional fields compared to Greenplum: ```c int64 pInfcount; /* count of +Inf values */ int64 nInfcount; /* count of -Inf values */ These fields were introduced in PostgreSQL 14 to support +/- Inf values for numeric types. Consequently, the size of the NumericAggState structure increased from 128 bytes to 144 bytes. In the Streaming Partial HashAggregate, a NumericAggState structure is created for each grouping key to track statistics for numeric types. This results in CBDB allocating 16 more bytes per grouping key compared to GPDB. This additional memory allocation contributes to the observed performance difference. To address this issue, we need to adjust the aggstate->hash_mem_limit Inspired by PostgreSQL’s handling of hash_mem_limit, we introduce a scaling factor called hash_mem_multiplier. Following the changes made in PostgreSQL 15, we set the default value of hash_mem_multiplier to 2.0 to ensure better memory utilization during hash-based aggregation operations. https://www.postgresql.org/message-id/flat/CAH2-Wzndc_ROk6CY-bC6p9O53q974Y0Ey4WX8jcPbuTZYM4Q3A%40mail.gmail.com --- .../test/regress/expected/groupingsets.out | 2 ++ .../expected/groupingsets_optimizer.out | 2 ++ .../src/test/regress/expected/rowsecurity.out | 10 +++---- .../expected/rowsecurity_optimizer.out | 10 +++---- .../src/test/regress/sql/groupingsets.sql | 2 ++ src/backend/executor/nodeAgg.c | 27 ++++++++++++++++++- src/backend/utils/init/globals.c | 2 +- src/backend/utils/misc/guc.c | 2 +- src/test/regress/expected/cte_prune.out | 18 ++++++------- src/test/regress/expected/explain_format.out | 2 ++ .../expected/explain_format_optimizer.out | 2 ++ src/test/regress/expected/groupingsets.out | 2 ++ .../expected/groupingsets_optimizer.out | 2 ++ src/test/regress/sql/explain_format.sql | 2 ++ src/test/regress/sql/groupingsets.sql | 2 ++ .../expected/groupingsets.out | 9 +++---- .../expected/workfile/hashagg_spill.out | 2 ++ .../sql/workfile/hashagg_spill.sql | 2 ++ 18 files changed, 73 insertions(+), 27 deletions(-) diff --git a/contrib/pax_storage/src/test/regress/expected/groupingsets.out b/contrib/pax_storage/src/test/regress/expected/groupingsets.out index 5dd8bb8b53e..25a26d506f8 100644 --- a/contrib/pax_storage/src/test/regress/expected/groupingsets.out +++ b/contrib/pax_storage/src/test/regress/expected/groupingsets.out @@ -1784,6 +1784,7 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou (2 rows) -- test the knapsack +set hash_mem_multiplier = 1; set enable_indexscan = false; set work_mem = '64kB'; explain (costs off) @@ -1863,6 +1864,7 @@ explain (costs off) Optimizer: Postgres query optimizer (15 rows) +reset hash_mem_multiplier; -- check collation-sensitive matching between grouping expressions -- (similar to a check for aggregates, but there are additional code -- paths for GROUPING, so check again here) diff --git a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out index 9923ea610d2..b2ac9ce9ddc 100644 --- a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out @@ -1847,6 +1847,7 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou (2 rows) -- test the knapsack +set hash_mem_multiplier = 1; set enable_indexscan = false; set work_mem = '64kB'; explain (costs off) @@ -2017,6 +2018,7 @@ explain (costs off) Optimizer: Pivotal Optimizer (GPORCA) (50 rows) +reset hash_mem_multiplier; -- check collation-sensitive matching between grouping expressions -- (similar to a check for aggregates, but there are additional code -- paths for GROUPING, so check again here) diff --git a/contrib/pax_storage/src/test/regress/expected/rowsecurity.out b/contrib/pax_storage/src/test/regress/expected/rowsecurity.out index 4cd81f77e1e..f1a68caafaf 100644 --- a/contrib/pax_storage/src/test/regress/expected/rowsecurity.out +++ b/contrib/pax_storage/src/test/regress/expected/rowsecurity.out @@ -1878,15 +1878,15 @@ AND f_leak(t2_1.b) AND f_leak(t2_2.b) RETURNING *, t2_1, t2_2; -> Update on t2 t2_1 -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) -> Hash Join - Hash Cond: (t2_2.b = t2_1.b) + Hash Cond: (t2_1.b = t2_2.b) -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: t2_2.b - -> Seq Scan on t2 t2_2 + Hash Key: t2_1.b + -> Seq Scan on t2 t2_1 Filter: ((a = 3) AND ((a % 2) = 1) AND f_leak(b)) -> Hash -> Redistribute Motion 3:3 (slice4; segments: 3) - Hash Key: t2_1.b - -> Seq Scan on t2 t2_1 + Hash Key: t2_2.b + -> Seq Scan on t2 t2_2 Filter: ((a = 3) AND ((a % 2) = 1) AND f_leak(b)) Optimizer: Postgres query optimizer (15 rows) diff --git a/contrib/pax_storage/src/test/regress/expected/rowsecurity_optimizer.out b/contrib/pax_storage/src/test/regress/expected/rowsecurity_optimizer.out index da828356739..c26777d8711 100644 --- a/contrib/pax_storage/src/test/regress/expected/rowsecurity_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/rowsecurity_optimizer.out @@ -1878,15 +1878,15 @@ AND f_leak(t2_1.b) AND f_leak(t2_2.b) RETURNING *, t2_1, t2_2; -> Update on t2 t2_1 -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) -> Hash Join - Hash Cond: (t2_2.b = t2_1.b) + Hash Cond: (t2_1.b = t2_2.b) -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: t2_2.b - -> Seq Scan on t2 t2_2 + Hash Key: t2_1.b + -> Seq Scan on t2 t2_1 Filter: ((a = 3) AND ((a % 2) = 1) AND f_leak(b)) -> Hash -> Redistribute Motion 3:3 (slice4; segments: 3) - Hash Key: t2_1.b - -> Seq Scan on t2 t2_1 + Hash Key: t2_2.b + -> Seq Scan on t2 t2_2 Filter: ((a = 3) AND ((a % 2) = 1) AND f_leak(b)) Optimizer: Postgres query optimizer (15 rows) diff --git a/contrib/pax_storage/src/test/regress/sql/groupingsets.sql b/contrib/pax_storage/src/test/regress/sql/groupingsets.sql index 835a3cff623..294a52c2036 100644 --- a/contrib/pax_storage/src/test/regress/sql/groupingsets.sql +++ b/contrib/pax_storage/src/test/regress/sql/groupingsets.sql @@ -518,6 +518,7 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou -- test the knapsack +set hash_mem_multiplier = 1; set enable_indexscan = false; set work_mem = '64kB'; explain (costs off) @@ -541,6 +542,7 @@ explain (costs off) count(*) from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two); +reset hash_mem_multiplier; -- check collation-sensitive matching between grouping expressions -- (similar to a check for aggregates, but there are additional code -- paths for GROUPING, so check again here) diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 1c49e426d2d..7513d11102f 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -268,6 +268,7 @@ #include "utils/dynahash.h" #include "utils/expandeddatum.h" #include "utils/faultinjector.h" +#include "utils/guc.h" #include "utils/logtape.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -1870,6 +1871,8 @@ hash_agg_set_limits(AggState *aggstate, double hashentrysize, double input_group strict_memlimit = operator_mem; } + strict_memlimit = strict_memlimit * hash_mem_multiplier; + /* if not expected to spill, use all of work_mem */ if (input_groups * hashentrysize < strict_memlimit * 1024L) { @@ -2092,6 +2095,26 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions) } } } + +#ifdef USE_ASSERT_CHECKING + if (unlikely(DEBUG1 >= log_min_messages) && aggstate->streaming) + { + for (int setno = 0; setno < aggstate->num_hashes; setno++) + { + AggStatePerHash perhash = &aggstate->perhash[setno]; + if (perhash->chain_count > 0) + { + elog(DEBUG1, "hash_agg_update_metrics streaming: setno=%d, chain_length_total=%.1f, chain_length_max=%d, bucket_used=%lu, bucket_total=%lu, num_expansions=%u", + setno, + (double)perhash->chain_length_total / perhash->chain_count, + perhash->chain_length_max, + perhash->bucket_used, + perhash->bucket_total, + perhash->num_expansions); + } + } + } +#endif } /* @@ -2148,6 +2171,8 @@ hash_choose_num_partitions(AggState *aggstate, double input_groups, double hashe strict_memlimit = operator_mem; } + strict_memlimit = strict_memlimit * hash_mem_multiplier; + /* * Avoid creating so many partitions that the memory requirements of the * open partition files are greater than 1/4 of hash_mem. @@ -2276,7 +2301,7 @@ lookup_hash_entries(AggState *aggstate) initialize_hash_entry(aggstate, hashtable, entry); pergroup[setno] = entry->additional; } - else + else if (!aggstate->streaming) { HashAggSpill *spill = &aggstate->hash_spills[setno]; TupleTableSlot *slot = aggstate->tmpcontext->ecxt_outertuple; diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index b187da7e849..aad67dbb2f5 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -145,7 +145,7 @@ int max_statement_mem = 2048000; * do not enforce per-query memory limit */ int gp_vmem_limit_per_query = 0; -double hash_mem_multiplier = 1.0; +double hash_mem_multiplier = 1.5; int maintenance_work_mem = 65536; int max_parallel_maintenance_workers = 2; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index fae7f8a0e5c..d8cdf027df5 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -3828,7 +3828,7 @@ static struct config_real ConfigureNamesReal[] = GUC_EXPLAIN }, &hash_mem_multiplier, - 1.0, 1.0, 1000.0, + 1.5, 1.0, 1000.0, NULL, NULL, NULL }, diff --git a/src/test/regress/expected/cte_prune.out b/src/test/regress/expected/cte_prune.out index 1b54f443487..a76bc064ce2 100644 --- a/src/test/regress/expected/cte_prune.out +++ b/src/test/regress/expected/cte_prune.out @@ -1230,14 +1230,14 @@ LIMIT 10; -> HashAggregate (cost=102580688686565152.00..102596190789768272.00 rows=247107000000000 width=48) Output: cup.c, cup.d, cup.e, t.d, t.b Group Key: cup.c, cup.d, cup.e, t.d, t.b - Planned Partitions: 256 + Planned Partitions: 512 -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=93196181903903024.00..102459992361252656.00 rows=741321000000000 width=48) Output: cup.c, cup.d, cup.e, t.d, t.b Hash Key: cup.c, cup.d, cup.e, t.d, t.b -> HashAggregate (cost=93196181903903024.00..102445165941252656.00 rows=741321000000000 width=48) Output: cup.c, cup.d, cup.e, t.d, t.b Group Key: cup.c, cup.d, cup.e, t.d, t.b - Planned Partitions: 256 + Planned Partitions: 512 -> Nested Loop (cost=67221234716.02..7598699218584692.00 rows=525742695955889984 width=48) Output: cup.c, cup.d, cup.e, t.d, t.b -> Broadcast Motion 3:3 (slice4; segments: 3) (cost=679.75..228161.25 rows=7413210 width=8) @@ -1443,20 +1443,20 @@ select * from where country.percentage = countrylanguage.percentage order by countrylanguage.COUNTRY,country.language LIMIT 40; QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Limit (cost=1051.23..1051.63 rows=28 width=202) + Limit (cost=1051.23..1051.62 rows=28 width=202) Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage, country.code, country.country, country.language, country.isofficial, country.percentage - -> Gather Motion 3:1 (slice1; segments: 3) (cost=1051.23..1051.63 rows=28 width=202) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=1051.23..1051.62 rows=28 width=202) Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage, country.code, country.country, country.language, country.isofficial, country.percentage Merge Key: country.country, countrylanguage_2.language - -> Limit (cost=1051.23..1051.26 rows=9 width=202) + -> Limit (cost=1051.23..1051.25 rows=9 width=202) Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage, country.code, country.country, country.language, country.isofficial, country.percentage - -> Sort (cost=1051.23..1051.26 rows=9 width=202) + -> Sort (cost=1051.23..1051.25 rows=9 width=202) Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage, country.code, country.country, country.language, country.isofficial, country.percentage Sort Key: country.country, countrylanguage_2.language -> Hash Join (cost=846.99..1051.08 rows=9 width=202) Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage, country.code, country.country, country.language, country.isofficial, country.percentage Hash Cond: (country.percentage = countrylanguage_2.percentage) - -> Hash Join (cost=436.98..637.91 rows=245 width=85) + -> Hash Join (cost=436.98..637.90 rows=245 width=85) Output: country.code, country.country, country.language, country.isofficial, country.percentage Hash Cond: (countrylanguage.countrycode = country.code) -> Seq Scan on cte_prune.countrylanguage (cost=0.00..165.33 rows=13133 width=16) @@ -1490,9 +1490,9 @@ where country.percentage = countrylanguage.percentage order by countrylanguage.C -> Seq Scan on cte_prune.country country_1 (cost=0.00..77.33 rows=3 width=52) Output: country_1.code, country_1.name, country_1.capital Filter: (country_1.continent = 'Europe'::text) - -> Hash (cost=409.67..409.67 rows=28 width=117) + -> Hash (cost=409.66..409.66 rows=28 width=117) Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage - -> Broadcast Motion 3:3 (slice5; segments: 3) (cost=227.29..409.67 rows=28 width=117) + -> Broadcast Motion 3:3 (slice5; segments: 3) (cost=227.29..409.66 rows=28 width=117) Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage -> Hash Join (cost=227.29..409.20 rows=9 width=117) Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage diff --git a/src/test/regress/expected/explain_format.out b/src/test/regress/expected/explain_format.out index c8c452cc781..a57a1fd55fc 100644 --- a/src/test/regress/expected/explain_format.out +++ b/src/test/regress/expected/explain_format.out @@ -599,6 +599,7 @@ ANALYZE test_src_tbl; set optimizer_enable_hashagg = on; SET statement_mem = '1000kB'; -- Hashagg with spilling +set hash_mem_multiplier = 1; CREATE TABLE test_hashagg_spill AS SELECT a, COUNT(DISTINCT b) AS b FROM test_src_tbl GROUP BY a; NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Greenplum Database data distribution key for this table. @@ -631,6 +632,7 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur -- The planner generates multiple hash tables but ORCA uses Shared Scan. -- flaky test -- EXPLAIN (analyze, costs off) SELECT a, avg(b) AS b FROM test_src_tbl GROUP BY grouping sets ((a), (b)); +reset hash_mem_multiplier; RESET optimizer_enable_hashagg; RESET statement_mem; -- Cleanup diff --git a/src/test/regress/expected/explain_format_optimizer.out b/src/test/regress/expected/explain_format_optimizer.out index 459a466c993..f778c3f2048 100644 --- a/src/test/regress/expected/explain_format_optimizer.out +++ b/src/test/regress/expected/explain_format_optimizer.out @@ -546,6 +546,7 @@ ANALYZE test_src_tbl; set optimizer_enable_hashagg = on; SET statement_mem = '1000kB'; -- Hashagg with spilling +set hash_mem_multiplier = 1; CREATE TABLE test_hashagg_spill AS SELECT a, COUNT(DISTINCT b) AS b FROM test_src_tbl GROUP BY a; NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. @@ -576,6 +577,7 @@ NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entr -- The planner generates multiple hash tables but ORCA uses Shared Scan. -- flaky test -- EXPLAIN (analyze, costs off) SELECT a, avg(b) AS b FROM test_src_tbl GROUP BY grouping sets ((a), (b)); +reset hash_mem_multiplier; RESET optimizer_enable_hashagg; RESET statement_mem; -- Cleanup diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index ae84ced2484..f7eefa2c8eb 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -1861,6 +1861,7 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou (2 rows) -- test the knapsack +set hash_mem_multiplier = 1; set enable_indexscan = false; set work_mem = '64kB'; explain (costs off) @@ -1940,6 +1941,7 @@ explain (costs off) Optimizer: Postgres query optimizer (15 rows) +reset hash_mem_multiplier; -- check collation-sensitive matching between grouping expressions -- (similar to a check for aggregates, but there are additional code -- paths for GROUPING, so check again here) diff --git a/src/test/regress/expected/groupingsets_optimizer.out b/src/test/regress/expected/groupingsets_optimizer.out index d59a83ae9cf..39fad5a1c8a 100644 --- a/src/test/regress/expected/groupingsets_optimizer.out +++ b/src/test/regress/expected/groupingsets_optimizer.out @@ -1924,6 +1924,7 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou (2 rows) -- test the knapsack +set hash_mem_multiplier = 1; set enable_indexscan = false; set work_mem = '64kB'; explain (costs off) @@ -2094,6 +2095,7 @@ explain (costs off) Optimizer: Pivotal Optimizer (GPORCA) (50 rows) +reset hash_mem_multiplier; -- check collation-sensitive matching between grouping expressions -- (similar to a check for aggregates, but there are additional code -- paths for GROUPING, so check again here) diff --git a/src/test/regress/sql/explain_format.sql b/src/test/regress/sql/explain_format.sql index e04cba90947..229fcdfe6fe 100644 --- a/src/test/regress/sql/explain_format.sql +++ b/src/test/regress/sql/explain_format.sql @@ -128,6 +128,7 @@ set optimizer_enable_hashagg = on; SET statement_mem = '1000kB'; -- Hashagg with spilling +set hash_mem_multiplier = 1; CREATE TABLE test_hashagg_spill AS SELECT a, COUNT(DISTINCT b) AS b FROM test_src_tbl GROUP BY a; EXPLAIN (analyze, costs off) SELECT a, COUNT(DISTINCT b) AS b FROM test_src_tbl GROUP BY a; @@ -139,6 +140,7 @@ SELECT a, avg(b) AS b FROM test_src_tbl GROUP BY grouping sets ((a), (b)); -- flaky test -- EXPLAIN (analyze, costs off) SELECT a, avg(b) AS b FROM test_src_tbl GROUP BY grouping sets ((a), (b)); +reset hash_mem_multiplier; RESET optimizer_enable_hashagg; RESET statement_mem; diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index 054a299b56f..6907b5f1f55 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -544,6 +544,7 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou -- test the knapsack +set hash_mem_multiplier = 1; set enable_indexscan = false; set work_mem = '64kB'; explain (costs off) @@ -567,6 +568,7 @@ explain (costs off) count(*) from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two); +reset hash_mem_multiplier; -- check collation-sensitive matching between grouping expressions -- (similar to a check for aggregates, but there are additional code -- paths for GROUPING, so check again here) diff --git a/src/test/singlenode_regress/expected/groupingsets.out b/src/test/singlenode_regress/expected/groupingsets.out index 36872ea7a29..83d6890b95d 100644 --- a/src/test/singlenode_regress/expected/groupingsets.out +++ b/src/test/singlenode_regress/expected/groupingsets.out @@ -2069,14 +2069,13 @@ select g100, g10, sum(g::numeric), count(*), max(g::text) from gs_data_1 group by cube (g1000, g100,g10); QUERY PLAN ------------------------------------- - GroupAggregate + MixedAggregate + Hash Key: g100 + Hash Key: g100, g10 Group Key: g1000, g100, g10 Group Key: g1000, g100 Group Key: g1000 Group Key: () - Sort Key: g100, g10 - Group Key: g100, g10 - Group Key: g100 Sort Key: g10, g1000 Group Key: g10, g1000 Group Key: g10 @@ -2084,7 +2083,7 @@ from gs_data_1 group by cube (g1000, g100,g10); Sort Key: g1000, g100, g10 -> Seq Scan on gs_data_1 Optimizer: Postgres query optimizer -(15 rows) +(14 rows) create table gs_hash_1 as select g100, g10, sum(g::numeric), count(*), max(g::text) diff --git a/src/test/singlenode_regress/expected/workfile/hashagg_spill.out b/src/test/singlenode_regress/expected/workfile/hashagg_spill.out index 4a61fc97090..cc98d3a785a 100644 --- a/src/test/singlenode_regress/expected/workfile/hashagg_spill.out +++ b/src/test/singlenode_regress/expected/workfile/hashagg_spill.out @@ -112,6 +112,7 @@ reset optimizer_force_multistage_agg; CREATE TABLE hashagg_spill(col1 numeric, col2 int); INSERT INTO hashagg_spill SELECT id, 1 FROM generate_series(1,20000) id; ANALYZE hashagg_spill; +set hash_mem_multiplier = 1; SET statement_mem='1000kB'; SET gp_workfile_compression = OFF; select * from hashagg_spill.is_workfile_created('explain (analyze, verbose) SELECT avg(col2) col2 FROM hashagg_spill GROUP BY col1 HAVING(sum(col1)) < 0;'); @@ -131,6 +132,7 @@ select * from hashagg_spill.is_workfile_created('explain (analyze, verbose) SELE CREATE TABLE spill_temptblspace (a numeric); SET temp_tablespaces=pg_default; INSERT INTO spill_temptblspace SELECT avg(col2) col2 FROM hashagg_spill GROUP BY col1 HAVING(sum(col1)) < 0; +reset hash_mem_multiplier; RESET temp_tablespaces; RESET statement_mem; RESET gp_workfile_compression; diff --git a/src/test/singlenode_regress/sql/workfile/hashagg_spill.sql b/src/test/singlenode_regress/sql/workfile/hashagg_spill.sql index 6f22b57a99a..7cbcd11b8ec 100644 --- a/src/test/singlenode_regress/sql/workfile/hashagg_spill.sql +++ b/src/test/singlenode_regress/sql/workfile/hashagg_spill.sql @@ -85,6 +85,7 @@ reset optimizer_force_multistage_agg; CREATE TABLE hashagg_spill(col1 numeric, col2 int); INSERT INTO hashagg_spill SELECT id, 1 FROM generate_series(1,20000) id; ANALYZE hashagg_spill; +set hash_mem_multiplier = 1; SET statement_mem='1000kB'; SET gp_workfile_compression = OFF; select * from hashagg_spill.is_workfile_created('explain (analyze, verbose) SELECT avg(col2) col2 FROM hashagg_spill GROUP BY col1 HAVING(sum(col1)) < 0;'); @@ -95,6 +96,7 @@ select * from hashagg_spill.is_workfile_created('explain (analyze, verbose) SELE CREATE TABLE spill_temptblspace (a numeric); SET temp_tablespaces=pg_default; INSERT INTO spill_temptblspace SELECT avg(col2) col2 FROM hashagg_spill GROUP BY col1 HAVING(sum(col1)) < 0; +reset hash_mem_multiplier; RESET temp_tablespaces; RESET statement_mem; RESET gp_workfile_compression;