From eb6df8cbd4039997c07ee61c735b2c94ebbb444d Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Tue, 24 Oct 2023 23:02:16 +0800 Subject: [PATCH] Correct shared hash table rows estimation The subpath's rows is set correctly no matter what locus it is. Keep it with UPSTREAM. For shared hash table size estimation, we need the total number, so we need to undo the division. Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/optimizer/path/costsize.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 2122f54ff25..48727a544e1 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -4075,30 +4075,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, * number, so we need to undo the division. */ if (parallel_hash) - { - /* - * GPDB - * For GP style parallel, inner path's locus could be ReplicatedWorkers. - * - * Join - * / \ - * Outer ParallelHash - * \ - * ParallelBroadcastMotion - * \ - * origin_inner - * - * In this case, inner_path.rows has already taken parallel into account. - * We shouldn't plus parallel_divisor again, else the estimation of Hash - * Table will be much more than the size it really is. - * The side-effect will lead to: - * 1. Estimate or allocate much more memory for shared Hash Table. - * 2. Use MergeJoin instead of HashJoin if planner recognize inner table - * is too big. - */ - if(!CdbPathLocus_IsReplicatedWorkers(inner_path->locus)) - inner_path_rows_total *= get_parallel_divisor(inner_path); - } + inner_path_rows_total *= get_parallel_divisor(inner_path); /* * Get hash table size that executor would use for inner relation.