From 875d9fbafc0c934d0a870223f5da410c65756023 Mon Sep 17 00:00:00 2001 From: Xin Liao Date: Sat, 8 Feb 2025 14:24:51 +0800 Subject: [PATCH] [Fix](load) Reset memtable immediately after insert failure to prevent crash (#47610) ### What problem does this PR solve? *** Query id: 5447701417c13e4e-cea25b10f284c6a5 *** *** is nereids: 0 *** *** tablet id: 1738818748602 *** *** Aborted at 1738820047 (unix time) try "date -d @1738820047" if you are using GNU date *** *** Current BE git commitID: 512681c238 *** *** SIGSEGV invalid permissions for mapped object (@0x7f112a5df53f) received by PID 6310 (TID 6765 OR 0x7f1384ed3640) from PID 710800703; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/common/signal_handler.h:421 1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in /usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so 2# JVM_handle_linux_signal in /usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so 3# 0x00007F14815CC520 in /lib/x86_64-linux-gnu/libc.so.6 4# doris::vectorized::ColumnVector::insert_indices_from(doris::vectorized::IColumn const&, unsigned int const*, unsigned int const*) at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/vec/columns/column_vector.cpp:323 5# doris::vectorized::MutableBlock::add_rows(doris::vectorized::Block const*, unsigned int const*, unsigned int const*, std::vector > const*) at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/vec/core/block.cpp:1036 6# doris::MemTable::_put_into_output(doris::vectorized::Block&) at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/olap/memtable.cpp:257 7# doris::MemTable::_to_block(std::unique_ptr >*) at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/olap/memtable.cpp:513 8# doris::MemTable::to_block(std::unique_ptr >*) at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/olap/memtable.cpp:532 9# doris::FlushToken::_do_flush_memtable(doris::MemTable*, int, long*) at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/olap/memtable_flush_executor.cpp:144 10# doris::FlushToken::_flush_memtable(std::shared_ptr, int, long) in /mnt/hdd01/PERFORMANCE_ENV/be/lib/doris_be 11# doris::MemtableFlushTask::run() at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/olap/memtable_flush_executor.cpp:60 12# doris::ThreadPool::dispatch_thread() in /mnt/hdd01/PERFORMANCE_ENV/be/lib/doris_be 13# doris::Thread::supervise_thread(void*) at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/util/thread.cpp:499 14# start_thread at ./nptl/pthread_create.c:442 15# 0x00007F14816B0850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83 Problem Summary: - When memtable insert fails (e.g., due to memory allocation failure during add_rows), the memtable is left in an inconsistent state - Under memory pressure, the system might trigger a flush operation on this failed memtable, leading to crashes Solution: - Reset memtable immediately after insert failure --- be/src/olap/memtable_writer.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/be/src/olap/memtable_writer.cpp b/be/src/olap/memtable_writer.cpp index e8123c48eccd29..15548e8c89e80b 100644 --- a/be/src/olap/memtable_writer.cpp +++ b/be/src/olap/memtable_writer.cpp @@ -115,7 +115,20 @@ Status MemTableWriter::write(const vectorized::Block* block, } _total_received_rows += row_idxs.size(); - RETURN_IF_ERROR(_mem_table->insert(block, row_idxs)); + auto st = _mem_table->insert(block, row_idxs); + + // Reset memtable immediately after insert failure to prevent potential flush operations. + // This is a defensive measure because: + // 1. When insert fails (e.g., memory allocation failure during add_rows), + // the memtable is in an inconsistent state and should not be flushed + // 2. However, memory pressure might trigger a flush operation on this failed memtable + // 3. By resetting here, we ensure the failed memtable won't be included in any subsequent flush, + // thus preventing potential crashes + if (!st.ok()) [[unlikely]] { + std::lock_guard l(_mem_table_ptr_lock); + _mem_table.reset(); + return st; + } if (UNLIKELY(_mem_table->need_agg() && config::enable_shrink_memory)) { _mem_table->shrink_memtable_by_agg();