From a0fbd4a542daa4ea9091fff9ac206288109d60a0 Mon Sep 17 00:00:00 2001 From: abmdocrt Date: Wed, 26 Feb 2025 17:56:07 +0800 Subject: [PATCH] [Fix](compaction) Fix nullptr when executing cold compaction (#48329) The cold compaction did not set the compaction policy in the initial phase, causing a null pointer exception when logging later. This issue occurs infrequently; if the tablet has previously done any compaction or calculated a compaction score, the compaction policy will be set automatically, allowing cold compaction to run normally. However, if a newly created tablet undergoes cold compaction immediately, this problem will arise. This PR fixes the issue. --- be/src/olap/olap_server.cpp | 6 + be/src/olap/tablet.cpp | 5 + ...old_data_compaction_fault_injection.groovy | 134 ++++++++++++++++++ 3 files changed, 145 insertions(+) create mode 100644 regression-test/suites/fault_injection_p0/test_cold_data_compaction_fault_injection.groovy diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 267adc5781f1b3..338d23c9fcc8ba 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -1460,6 +1460,12 @@ void StorageEngine::_cold_data_compaction_producer_callback() { << t->tablet_id(); return; } + if (t->get_cumulative_compaction_policy() == nullptr || + t->get_cumulative_compaction_policy()->name() != + t->tablet_meta()->compaction_policy()) { + t->set_cumulative_compaction_policy(_cumulative_compaction_policies.at( + t->tablet_meta()->compaction_policy())); + } auto st = compaction->prepare_compact(); if (!st.ok()) { diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 217990315b1b38..9c99255abdecf5 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -1043,6 +1043,11 @@ uint32_t Tablet::_calc_cumulative_compaction_score( if (cumulative_compaction_policy == nullptr) [[unlikely]] { return 0; } + DBUG_EXECUTE_IF("Tablet._calc_cumulative_compaction_score.return", { + LOG_WARNING("Tablet._calc_cumulative_compaction_score.return") + .tag("tablet id", tablet_id()); + return 0; + }); #ifndef BE_TEST if (_cumulative_compaction_policy == nullptr || _cumulative_compaction_policy->name() != cumulative_compaction_policy->name()) { diff --git a/regression-test/suites/fault_injection_p0/test_cold_data_compaction_fault_injection.groovy b/regression-test/suites/fault_injection_p0/test_cold_data_compaction_fault_injection.groovy new file mode 100644 index 00000000000000..3f173edace52a3 --- /dev/null +++ b/regression-test/suites/fault_injection_p0/test_cold_data_compaction_fault_injection.groovy @@ -0,0 +1,134 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import com.amazonaws.services.s3.model.ListObjectsRequest +import java.util.function.Supplier + +suite("test_cold_data_compaction_fault_injection", "nonConcurrent") { + def retryUntilTimeout = { int timeoutSecond, Supplier closure -> + long start = System.currentTimeMillis() + while (true) { + if (closure.get()) { + return + } else { + if (System.currentTimeMillis() - start > timeoutSecond * 1000) { + throw new RuntimeException("" + + "Operation timeout, maybe you need to check " + + "remove_unused_remote_files_interval_sec and " + + "cold_data_compaction_interval_sec in be.conf") + } else { + sleep(10_000) + } + } + } + } + def tabletName = "test_cold_data_compaction_fault_injection" + + String suffix = UUID.randomUUID().hashCode().abs().toString() + String s3Prefix = "regression/cold_data_compaction/${suffix}" + multi_sql """ + DROP TABLE IF EXISTS ${tabletName} force; + DROP STORAGE POLICY IF EXISTS test_policy_${suffix}; + DROP RESOURCE IF EXISTS 'remote_s3_${suffix}'; + + CREATE RESOURCE "remote_s3_${suffix}" + PROPERTIES + ( + "type" = "s3", + "s3.endpoint" = "${getS3Endpoint()}", + "s3.region" = "${getS3Region()}", + "s3.bucket" = "${getS3BucketName()}", + "s3.root.path" = "${s3Prefix}", + "s3.access_key" = "${getS3AK()}", + "s3.secret_key" = "${getS3SK()}", + "s3.connection.maximum" = "50", + "s3.connection.request.timeout" = "3000", + "s3.connection.timeout" = "1000" + ); + CREATE STORAGE POLICY test_policy_${suffix} + PROPERTIES( + "storage_resource" = "remote_s3_${suffix}", + "cooldown_ttl" = "5" + ); + CREATE TABLE IF NOT EXISTS ${tabletName} + ( + k1 BIGINT, + k2 LARGEINT, + v1 VARCHAR(2048) + ) + DISTRIBUTED BY HASH (k1) BUCKETS 1 + PROPERTIES( + "storage_policy" = "test_policy_${suffix}", + "disable_auto_compaction" = "true", + "replication_num" = "1" + ); + """ + + // insert 5 RowSets + multi_sql """ + insert into ${tabletName} values(1, 1, 'Tom'); + insert into ${tabletName} values(2, 2, 'Jelly'); + insert into ${tabletName} values(3, 3, 'Spike'); + insert into ${tabletName} values(4, 4, 'Tyke'); + insert into ${tabletName} values(5, 5, 'Tuffy'); + """ + + // wait until files upload to S3 + retryUntilTimeout(900, { + def res = sql_return_maparray "show data from ${tabletName}" + String size = "" + String remoteSize = "" + for (final def line in res) { + if (tabletName.equals(line.TableName)) { + size = line.Size + remoteSize = line.RemoteSize + break + } + } + logger.info("waiting for data to be uploaded to S3: ${tabletName}'s local data size: ${size}, remote data size: ${remoteSize}") + return size.startsWith("0") && !remoteSize.startsWith("0") + }) + + String tabletId = sql_return_maparray("show tablets from ${tabletName}")[0].TabletId + // check number of remote files + def filesBeforeCompaction = getS3Client().listObjects( + new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix + "/data/${tabletId}")).getObjectSummaries() + + // 5 RowSets + 1 meta + assertEquals(6, filesBeforeCompaction.size()) + + try { + GetDebugPoint().clearDebugPointsForAllBEs() + GetDebugPoint().enableDebugPointForAllBEs("Tablet._calc_cumulative_compaction_score.return") + // trigger cold data compaction + sql """alter table ${tabletName} set ("disable_auto_compaction" = "false")""" + + // wait until compaction finish + retryUntilTimeout(900, { + def filesAfterCompaction = getS3Client().listObjects( + new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix+ "/data/${tabletId}")).getObjectSummaries() + logger.info("${tabletName}'s remote file number is ${filesAfterCompaction.size()}") + // 1 RowSet + 1 meta + return filesAfterCompaction.size() == 7 + }) + + sql "drop table ${tabletName} force" + } finally { + GetDebugPoint().disableDebugPointForAllBEs("Tablet._calc_cumulative_compaction_score.return") + GetDebugPoint().clearDebugPointsForAllBEs() + } +}