From 393cc7edf7bc528f29c466e8e5379d246762aa28 Mon Sep 17 00:00:00 2001 From: xy720 <22125576+xy720@users.noreply.github.com> Date: Sat, 15 Nov 2025 18:07:39 +0800 Subject: [PATCH] [fix](mtmx) Fix coredump because common expr push down to scannode which belongs to an agg materialized view (#58038) ### What problem does this PR solve? Issue Number: close #58037 Problem Summary: ``` #0 0x00007f9aca4a3f8c in __pthread_kill_implementation () from /lib64/libc.so.6 #1 0x00007f9aca454a26 in raise () from /lib64/libc.so.6 #2 0x00007f9aca43d87c in abort () from /lib64/libc.so.6 #3 0x0000561dc3d1ea1d in ?? () #4 0x0000561dc3d1105a in google::LogMessage::Fail() () #5 0x0000561dc3d14146 in google::LogMessage::SendToLog() () #6 0x0000561dc3d10b90 in google::LogMessage::Flush() () #7 0x0000561dc3d14989 in google::LogMessageFatal::~LogMessageFatal() () #8 0x0000561db854c996 in assert_cast const&, (TypeCheckOnRelease)1, doris::vectorized::IColumn const&>(doris::vectorized::IColumn const&)::{lambda(auto:1&&)#1}::operator()(doris::vectorized::IColumn const&) const (this=this@entry=0x7f9658ccc1f8, from=...) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/common/assert_cast.h:58 #9 0x0000561db854c7d7 in assert_cast const&, (TypeCheckOnRelease)1, doris::vectorized::IColumn const&> (from=...) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/common/assert_cast.h:73 #10 0x0000561db854bb0b in doris::vectorized::ColumnStr::compare_at (this=0x7f957a14e2c0, n=1159288, m=6, rhs_=...) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/columns/column_string.h:526 #11 0x0000561dbe108c6b in doris::vectorized::GenericComparisonImpl >::vector_constant (a=..., b=..., c=...) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/functions/functions_comparison.h:112 #12 doris::vectorized::FunctionComparison::execute_generic_identical_types ( this=, block=..., result=result@entry=10, c0=0x7f957a14e2c0, c1=) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/functions/functions_comparison.h:506 #13 0x0000561dbdf9e97e in doris::vectorized::FunctionComparison::execute_generic ( this=0x7f96d6fb1b90, block=..., result=10, c0=..., c1=...) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/functions/functions_comparison.h:517 #14 doris::vectorized::FunctionComparison::execute_impl (this=0x7f96d6fb1b90, context=, block=..., arguments=..., result=10, input_rows_count=104) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/functions/functions_comparison.h:707 #15 0x0000561dbdcf1b8f in doris::vectorized::DefaultExecutable::execute_impl (this=, context=0x6, block=..., arguments=..., result=1, input_rows_count=104) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/functions/function.h:472 #16 0x0000561dbeea76ae in doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal (this=this@entry=0x7f99f62a65d0, context=context@entry=0x7f99f6442b00, block=..., args=..., result=result@entry=10, input_rows_count=104, dry_run=) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/functions/function.cpp:121 #17 0x0000561dbeea4ce8 in doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns (this=0x7f99f62a65d0, context=0x7f99f6442b00, block=..., args=..., result=10, input_rows_count=104, dry_run=) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/functions/function.cpp:246 #18 doris::vectorized::PreparedFunctionImpl::default_implementation_for_nulls (this=this@entry=0x7f99f62a65d0, context=context@entry=0x7f99f6442b00, block=..., args=..., result=result@entry=10, input_rows_count=104, dry_run=, executed=0x7f9658ccc666) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/functions/function.cpp:218 #19 0x0000561dbeea4e9c in doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal (this=0x7f99f62a65d0, context=0x7f99f6442b00, block=..., args=..., result=10, input_rows_count=, dry_run=) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/functions/function.cpp:112 #20 doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns (this=0x7f99f62a65d0, context=0x7f99f6442b00, block=..., args=..., result=10, input_rows_count=104, dry_run=) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/functions/function.cpp:246 #21 0x0000561dbeea4f66 in doris::vectorized::PreparedFunctionImpl::execute (this=0x11b078, context=0x6, block=..., args=..., result=1, input_rows_count=104, dry_run=) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/functions/function.cpp:252 #22 0x0000561dbdcf1500 in doris::vectorized::IFunctionBase::execute (this=, context=0x7f99f6442b00, block=..., arguments=..., result=10, input_rows_count=104, dry_run=) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/functions/function.h:195 --Type for more, q to quit, c to continue without paging--c #23 0x0000561dbdceccad in doris::vectorized::VectorizedFnCall::_do_execute (this=0x7f96f0fec510, context=0x7f957f09cdf0, block=0x7f957b02a3b0, result_column_id=0x7f9658ccca14, args=...) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/exprs/vectorized_fn_call.cpp:197 #24 0x0000561dbdced2c6 in doris::vectorized::VectorizedFnCall::execute (this=0x11b078, context=0x6, block=0x7f9aca4a3f8c <__pthread_kill_implementation+268>, result_column_id=0x7f9658ccbe10) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/exprs/vectorized_fn_call.cpp:212 #25 0x0000561dbdd1e51b in doris::vectorized::VExprContext::execute (this=0x7f957f09cdf0, block=0x7f9aca4a3f8c <__pthread_kill_implementation+268>, block@entry=0x7f957b02a3b0, result_column_id=result_column_id@entry=0x7f9658ccca14) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/exprs/vexpr_context.cpp:55 #26 0x0000561dbdd1fcb5 in doris::vectorized::VExprContext::execute_conjuncts (ctxs=..., filters=filters@entry=0x0, accept_null=false, block=block@entry=0x7f957b02a3b0, result_filter=result_filter@entry=0x7f9658ccccc0, can_filter_all=0x7f9658cccbc7) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/exprs/vexpr_context.cpp:174 #27 0x0000561dbdd2131f in doris::vectorized::VExprContext::execute_conjuncts_and_filter_block (ctxs=..., block=0x7f957b02a3b0, columns_to_filter=..., column_to_keep=6, filter=...) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/exprs/vexpr_context.cpp:354 #28 0x0000561db8f49450 in doris::segment_v2::SegmentIterator::_execute_common_expr (this=this@entry=0x7f954e20a000, sel_rowid_idx=0x7f955c0d2000, selected_size=@0x7f9658ccce6e: 104, block=block@entry=0x7f957b02a3b0) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2338 #29 0x0000561db8f482f8 in doris::segment_v2::SegmentIterator::_next_batch_internal (this=0x7f954e20a000, block=0x7f957b02a3b0) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2230 #30 0x0000561db8f45212 in doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*)::$_0::operator()() const ( this=) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:1953 #31 doris::segment_v2::SegmentIterator::next_batch (this=0x7f954e20a000, block=0x6) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:1952 #32 0x0000561db8ee49bc in doris::segment_v2::LazyInitSegmentIterator::next_batch (this=0x7f953bc71f80, block=0x7f957b02a3b0) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.h:44 #33 0x0000561db8dab844 in doris::BetaRowsetReader::next_block (this=0x7f9a4e215800, block=0x7f957b02a3b0) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/olap/rowset/beta_rowset_reader.cpp:377 #34 0x0000561dc2c9413d in doris::vectorized::VCollectIterator::Level0Iterator::_refresh (this=0x7f953ba137a0) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/olap/vcollect_iterator.h:256 #35 doris::vectorized::VCollectIterator::Level0Iterator::refresh_current_row (this=this@entry=0x7f953ba137a0) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/olap/vcollect_iterator.cpp:509 #36 0x0000561dc2c93bf4 in doris::vectorized::VCollectIterator::Level0Iterator::init (this=0x7f953ba137a0, get_data_by_ref=) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/olap/vcollect_iterator.cpp:461 #37 0x0000561dc2c91002 in doris::vectorized::VCollectIterator::build_heap (this=0x7f957a52bb30, rs_readers=...) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/olap/vcollect_iterator.cpp:125 #38 0x0000561dc2c7e1f2 in doris::vectorized::BlockReader::_init_collect_iter (this=this@entry=0x7f957a52b400, read_params=...) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/olap/block_reader.cpp:153 #39 0x0000561dc2c7f191 in doris::vectorized::BlockReader::init (this=, read_params=...) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/olap/block_reader.cpp:226 #40 0x0000561dc3937869 in doris::vectorized::NewOlapScanner::open (this=0x7f9a56270210, state=) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/exec/scan/new_olap_scanner.cpp:252 #41 0x0000561dbdcc5413 in doris::vectorized::ScannerScheduler::_scanner_scan (ctx=..., scan_task=...) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:221 #42 0x0000561dbdcc62bd in doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const (this=) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:154 #43 doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::{lambda()#1}::operator()() const (this=0x7f954e25d3c0) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:153 #44 std::__invoke_impl, std::shared_ptr)::$_1::operator()() const::{lambda()#1}&>(std::__invoke_other, doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::{lambda()#1}&) (__f=...) at /data/home/lambxu/installs/ldb_toolchain_bak/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61 #45 std::__invoke_r, std::shared_ptr)::$_1::operator()() const::{lambda()#1}&>(doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::{lambda()#1}&) (__fn=...) at /data/home/lambxu/installs/ldb_toolchain_bak/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111 #46 std::_Function_handler, std::shared_ptr)::$_1::operator()() const::{lambda()#1}>::_M_invoke(std::_Any_data const&) (__functor=...) at /data/home/lambxu/installs/ldb_toolchain_bak/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291 #47 0x0000561db962137a in doris::ThreadPool::dispatch_thread (this=0x7f9a50f9d380) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/util/threadpool.cpp:602 #48 0x0000561db96159a1 in std::function::operator()() const (this=0x11a791) at /data/home/lambxu/installs/ldb_toolchain_bak/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:560 #49 doris::Thread::supervise_thread (arg=0x7f969f569ce0) at /data/home/lambxu/work/git/doris-3.1/doris/be/src/util/thread.cpp:498 #50 0x00007f9aca4a2215 in start_thread () from /lib64/libc.so.6 #51 0x00007f9aca524bdc in clone3 () from /lib64/libc.so.6 ``` Assume that 0,1,2,3,4, is key columns of an AGG mv, because the PreAgg is OFF at scan node, the block will contain all key columns to merge data in storage layer. if we select 0,1 column, with 3,4 column in where clause, then the slot ids should be 0,1,3,4, and column ids in conjuncts is the index of slot ids.(which is 2 and 3) But the plan use the key type of base table which is DUP key, treating the AGG mv as a DUP mv, so these conjuncts are pushed down to the scan node which belongs to an AGG mv, these conjuncts will pick the wrong column 2 and 3 (which shoud be 4 and 5) in block to exucute. So we should use the key type of mv but not the key type of base table. ### Release note None ### Check List (For Author) - Test - [x] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason - Behavior changed: - [x] No. - [ ] Yes. - Does this need documentation? - [x] No. - [ ] Yes. ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label --- .../apache/doris/planner/OlapScanNode.java | 6 +- ...aterialized_view_common_expr_push_down.out | 5 ++ ...rialized_view_common_expr_push_down.groovy | 76 +++++++++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 regression-test/data/rollup_p0/test_materialized_view_common_expr_push_down.out create mode 100644 regression-test/suites/rollup_p0/test_materialized_view_common_expr_push_down.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index 3da774f2f8b5da..f1a71fd8e042d8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -1215,7 +1215,11 @@ protected void toThrift(TPlanNode msg) { if (annSortLimit != -1) { msg.olap_scan_node.setAnnSortLimit(annSortLimit); } - msg.olap_scan_node.setKeyType(olapTable.getKeysType().toThrift()); + if (selectedIndexId != -1) { + msg.olap_scan_node.setKeyType(olapTable.getIndexMetaByIndexId(selectedIndexId).getKeysType().toThrift()); + } else { + msg.olap_scan_node.setKeyType(olapTable.getKeysType().toThrift()); + } String tableName = olapTable.getName(); if (selectedIndexId != -1) { tableName = tableName + "(" + getSelectedIndexName() + ")"; diff --git a/regression-test/data/rollup_p0/test_materialized_view_common_expr_push_down.out b/regression-test/data/rollup_p0/test_materialized_view_common_expr_push_down.out new file mode 100644 index 00000000000000..891d84e9ae60e9 --- /dev/null +++ b/regression-test/data/rollup_p0/test_materialized_view_common_expr_push_down.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +100 +100 + diff --git a/regression-test/suites/rollup_p0/test_materialized_view_common_expr_push_down.groovy b/regression-test/suites/rollup_p0/test_materialized_view_common_expr_push_down.groovy new file mode 100644 index 00000000000000..31359db5fdd903 --- /dev/null +++ b/regression-test/suites/rollup_p0/test_materialized_view_common_expr_push_down.groovy @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_materialized_view_common_expr_push_down") { + def baseTable = "test_base_tbl" + def mvTable = "test_mv" + + def getJobState = { tableName -> + def jobStateResult = sql """ SHOW ALTER TABLE MATERIALIZED VIEW WHERE TableName='${tableName}' ORDER BY CreateTime DESC LIMIT 1; """ + return jobStateResult[0][8] + } + + sql """set enable_common_expr_pushdown = true""" + + sql "DROP TABLE IF EXISTS ${baseTable}" + sql """ + CREATE TABLE `${baseTable}` ( + `companyId` bigint NULL, + `jobId` bigint NULL, + `province` text NULL, + `vCallerId` bigint NULL DEFAULT "0", + `aCallerId` bigint NULL DEFAULT "-1" + ) ENGINE=OLAP + DUPLICATE KEY(`companyId`) + DISTRIBUTED BY HASH(`companyId`) BUCKETS 8 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """ + CREATE materialized VIEW ${mvTable} AS SELECT + companyId as company_id, + jobId as job_id, + province as p, + vCallerId as v_caller_id, + aCallerId as a_caller_id + FROM ${baseTable} + GROUP BY company_id, job_id, p, v_caller_id, a_caller_id; + """ + int max_try_secs = 60 + while (max_try_secs--) { + String res = getJobState(baseTable) + if (res == "FINISHED" || res == "CANCELLED") { + assertEquals("FINISHED", res) + sleep(3000) + break + } else { + Thread.sleep(2000) + if (max_try_secs < 1) { + println "test timeout," + "state:" + res + assertEquals("FINISHED",res) + } + } + } + + sql "insert into ${baseTable} values(100,100,'北京',3,3)" + sql "insert into ${baseTable} values(100,100,'广东',3,3)" + + qt_sql """ select company_id from ${baseTable} index ${mvTable} where v_caller_id = 3 and if(`a_caller_id` is null,-1, `a_caller_id`) = 3 """ + + sql "DROP TABLE ${baseTable} FORCE;" +}