From e7c7dafc0fea1e4be81152fcd1c763fdc3e4a708 Mon Sep 17 00:00:00 2001 From: yagagagaga Date: Thu, 27 Feb 2025 21:10:18 +0800 Subject: [PATCH 1/6] [fix](cold hot separation) Fix the issue that files on the remote storage are not deleted after triggering cold data compaction. (#48109) (cherry picked from commit 3d5575e05e11911683653df716ef93d9585fcc9c) --- be/src/olap/olap_server.cpp | 2 +- .../doris/regression/suite/Suite.groovy | 104 ++++++++------ regression-test/pipeline/p0/conf/be.conf | 3 + .../cold_data_compaction.groovy | 132 ++++++++++++++++++ 4 files changed, 197 insertions(+), 44 deletions(-) create mode 100644 regression-test/suites/cold_heat_separation/cold_data_compaction.groovy diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 014213c4694598..51040f130b5c1e 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -1304,7 +1304,7 @@ void StorageEngine::do_remove_unused_remote_files() { } cooldown_meta_id = t->tablet_meta()->cooldown_meta_id(); } - auto [cooldown_replica_id, cooldown_term] = t->cooldown_conf(); + auto [cooldown_term, cooldown_replica_id] = t->cooldown_conf(); if (cooldown_replica_id != t->replica_id()) { return; } diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy index 322b15c70d7507..b075b766d8102d 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy @@ -17,6 +17,12 @@ package org.apache.doris.regression.suite +import com.amazonaws.auth.AWSStaticCredentialsProvider +import com.amazonaws.auth.BasicAWSCredentials +import com.amazonaws.client.builder.AwsClientBuilder +import com.amazonaws.services.s3.AmazonS3 +import com.amazonaws.services.s3.AmazonS3ClientBuilder + import com.google.common.collect.Maps import com.google.common.util.concurrent.Futures import com.google.common.util.concurrent.ListenableFuture @@ -85,6 +91,8 @@ class Suite implements GroovyInterceptable { final List lazyCheckExceptions = new Vector<>() final List lazyCheckFutures = new Vector<>() + private AmazonS3 s3Client = null + Suite(String name, String group, SuiteContext context, SuiteCluster cluster) { this.name = name this.group = group @@ -421,7 +429,7 @@ class Suite implements GroovyInterceptable { } } - def sql_return_maparray_impl(String sqlStr, Connection conn = null) { + def sql_return_maparray_impl(String sqlStr, Connection conn = null) { logger.info("Execute sql: ${sqlStr}".toString()) if (conn == null) { conn = context.getConnection() @@ -439,7 +447,7 @@ class Suite implements GroovyInterceptable { // which cannot be handled by maps and will result in an error directly. Set uniqueSet = new HashSet<>() Set duplicates = new HashSet<>() - + for (String str : columnNames) { if (uniqueSet.contains(str)) { duplicates.add(str) @@ -448,7 +456,7 @@ class Suite implements GroovyInterceptable { } } if (!duplicates.isEmpty()) { - def errorMessage = "${sqlStr} returns duplicates headers: ${duplicates}" + def errorMessage = "${sqlStr} returns duplicates headers: ${duplicates}" throw new Exception(errorMessage) } @@ -607,7 +615,7 @@ class Suite implements GroovyInterceptable { } long getTableVersion(long dbId, String tableName) { - def result = sql_return_maparray """show proc '/dbs/${dbId}'""" + def result = sql_return_maparray """show proc '/dbs/${dbId}'""" for (def res : result) { if(res.TableName.equals(tableName)) { log.info(res.toString()) @@ -639,31 +647,31 @@ class Suite implements GroovyInterceptable { } AtomicBoolean isFirst = new AtomicBoolean(true) String sql = list.stream() - .map({ row -> - StringBuilder sb = new StringBuilder("SELECT ") - if (row instanceof List) { - if (isFirst.get()) { - String columns = row.withIndex().collect({ column, index -> - "${toSelectString(column)} AS c${index + 1}" - }).join(", ") - sb.append(columns) - isFirst.set(false) - } else { - String columns = row.collect({ column -> - "${toSelectString(column)}" - }).join(", ") - sb.append(columns) - } - } else { - if (isFirst.get()) { - sb.append(toSelectString(row)).append(" AS c1") - isFirst.set(false) + .map({ row -> + StringBuilder sb = new StringBuilder("SELECT ") + if (row instanceof List) { + if (isFirst.get()) { + String columns = row.withIndex().collect({ column, index -> + "${toSelectString(column)} AS c${index + 1}" + }).join(", ") + sb.append(columns) + isFirst.set(false) + } else { + String columns = row.collect({ column -> + "${toSelectString(column)}" + }).join(", ") + sb.append(columns) + } } else { - sb.append(toSelectString(row)) + if (isFirst.get()) { + sb.append(toSelectString(row)).append(" AS c1") + isFirst.set(false) + } else { + sb.append(toSelectString(row)) + } } - } - return sb.toString() - }).collect(Collectors.joining("\nUNION ALL\n")) + return sb.toString() + }).collect(Collectors.joining("\nUNION ALL\n")) return sql } @@ -907,6 +915,16 @@ class Suite implements GroovyInterceptable { return s3Url } + synchronized AmazonS3 getS3Client() { + if (s3Client == null) { + def credentials = new BasicAWSCredentials(getS3AK(), getS3SK()) + def endpointConfiguration = new AwsClientBuilder.EndpointConfiguration(getS3Endpoint(), getS3Region()) + s3Client = AmazonS3ClientBuilder.standard().withEndpointConfiguration(endpointConfiguration) + .withCredentials(new AWSStaticCredentialsProvider(credentials)).build() + } + return s3Client + } + void scpFiles(String username, String host, String files, String filePath, boolean fromDst=true) { String cmd = "scp -r ${username}@${host}:${files} ${filePath}" if (!fromDst) { @@ -983,7 +1001,7 @@ class Suite implements GroovyInterceptable { } void getBackendIpHeartbeatPort(Map backendId_to_backendIP, - Map backendId_to_backendHeartbeatPort) { + Map backendId_to_backendHeartbeatPort) { List> backends = sql("show backends"); logger.info("Content of backends: ${backends}") for (List backend : backends) { @@ -1061,7 +1079,7 @@ class Suite implements GroovyInterceptable { } void getBackendIpHttpAndBrpcPort(Map backendId_to_backendIP, - Map backendId_to_backendHttpPort, Map backendId_to_backendBrpcPort) { + Map backendId_to_backendHttpPort, Map backendId_to_backendBrpcPort) { List> backends = sql("show backends"); for (List backend : backends) { @@ -1282,16 +1300,16 @@ class Suite implements GroovyInterceptable { String errorMsg = null try { errorMsg = OutputUtils.checkOutput(expectCsvResults, realResults.iterator(), - { row -> OutputUtils.toCsvString(row as List) }, - { row -> OutputUtils.toCsvString(row) }, - "Check tag '${tag}' failed", meta) + { row -> OutputUtils.toCsvString(row as List) }, + { row -> OutputUtils.toCsvString(row) }, + "Check tag '${tag}' failed", meta) } catch (Throwable t) { throw new IllegalStateException("Check tag '${tag}' failed, sql:\n${arg}", t) } if (errorMsg != null) { String csvRealResult = realResults.stream() - .map {row -> OutputUtils.toCsvString(row)} - .collect(Collectors.joining("\n")) + .map {row -> OutputUtils.toCsvString(row)} + .collect(Collectors.joining("\n")) def outputFilePath = context.outputFile.getCanonicalPath().substring(context.config.dataPath.length() + 1) def line = expectCsvResults.currentLine() logger.warn("expect results in file: ${outputFilePath}, line: ${line}\nrealResults:\n" + csvRealResult) @@ -1586,14 +1604,14 @@ class Suite implements GroovyInterceptable { logger.info("result expected: " + resultExpected.toString()) String errorMsg = OutputUtils.checkOutput(resultExpected.iterator(), resultByFoldConstant.iterator(), - { row -> OutputUtils.toCsvString(row as List) }, - { row -> OutputUtils.toCsvString(row) }, - "check output failed", meta) + { row -> OutputUtils.toCsvString(row as List) }, + { row -> OutputUtils.toCsvString(row) }, + "check output failed", meta) } String getJobName(String dbName, String mtmvName) { String showMTMV = "select JobName from mv_infos('database'='${dbName}') where Name = '${mtmvName}'"; - logger.info(showMTMV) + logger.info(showMTMV) List> result = sql(showMTMV) logger.info("result: " + result.toString()) if (result.isEmpty()) { @@ -1848,7 +1866,7 @@ class Suite implements GroovyInterceptable { sql(" memo plan ${query_sql}") check { result -> boolean success = !result.contains("${mv_name} chose") && !result.contains("${mv_name} not chose") - && !result.contains("${mv_name} fail") + && !result.contains("${mv_name} fail") Assert.assertEquals(true, success) } } @@ -1887,7 +1905,7 @@ class Suite implements GroovyInterceptable { // is_partition_statistics_ready is the bool value which identifying if partition row count is valid or not // if true, check if chosen by cbo or doesn't check void mv_rewrite_success(query_sql, mv_name, sync_cbo_rewrite = enable_sync_mv_cost_based_rewrite(), - is_partition_statistics_ready = true) { + is_partition_statistics_ready = true) { logger.info("query_sql = " + query_sql + ", mv_name = " + mv_name + ", sync_cbo_rewrite = " +sync_cbo_rewrite + ", is_partition_statistics_ready = " + is_partition_statistics_ready) if (!is_partition_statistics_ready) { @@ -1913,7 +1931,7 @@ class Suite implements GroovyInterceptable { // is_partition_statistics_ready is the bool value which identifying if partition row count is valid or not // if true, check if chosen by cbo or doesn't check void mv_rewrite_all_success( query_sql, mv_names, sync_cbo_rewrite = enable_sync_mv_cost_based_rewrite(), - is_partition_statistics_ready = true) { + is_partition_statistics_ready = true) { logger.info("query_sql = " + query_sql + ", mv_names = " + mv_names + ", sync_cbo_rewrite = " +sync_cbo_rewrite + ", is_partition_statistics_ready = " + is_partition_statistics_ready) if (!is_partition_statistics_ready) { @@ -1957,7 +1975,7 @@ class Suite implements GroovyInterceptable { // is_partition_statistics_ready is the bool value which identifying if partition row count is valid or not // if true, check if chosen by cbo or doesn't check void mv_rewrite_any_success(query_sql, mv_names, sync_cbo_rewrite = enable_sync_mv_cost_based_rewrite(), - is_partition_statistics_ready = true) { + is_partition_statistics_ready = true) { logger.info("query_sql = " + query_sql + ", mv_names = " + mv_names + ", sync_cbo_rewrite = " +sync_cbo_rewrite + ", is_partition_statistics_ready = " + is_partition_statistics_ready) if (!is_partition_statistics_ready) { @@ -1999,7 +2017,7 @@ class Suite implements GroovyInterceptable { // multi mv part in rewrite process, all rewrte success without check if chosen by cbo // sync_cbo_rewrite is the bool value which control sync mv is use cbo based mv rewrite void mv_rewrite_all_success_without_check_chosen(query_sql, mv_names, - sync_cbo_rewrite = enable_sync_mv_cost_based_rewrite()){ + sync_cbo_rewrite = enable_sync_mv_cost_based_rewrite()){ logger.info("query_sql = " + query_sql + ", mv_names = " + mv_names) if (!sync_cbo_rewrite) { explain { diff --git a/regression-test/pipeline/p0/conf/be.conf b/regression-test/pipeline/p0/conf/be.conf index 486e01c3bd2ef1..b6cf8fb2792084 100644 --- a/regression-test/pipeline/p0/conf/be.conf +++ b/regression-test/pipeline/p0/conf/be.conf @@ -92,5 +92,8 @@ enable_missing_rows_correctness_check=true crash_in_memory_tracker_inaccurate = true enable_brpc_connection_check=true +remove_unused_remote_files_interval_sec=60 +cold_data_compaction_interval_sec=60 + # This feature has bug, so by default is false, only open it in pipeline to observe enable_parquet_page_index=true diff --git a/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy b/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy new file mode 100644 index 00000000000000..bf9e33e7528759 --- /dev/null +++ b/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy @@ -0,0 +1,132 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import com.amazonaws.services.s3.model.ListObjectsRequest +import java.util.function.Supplier + +suite("test_cold_data_compaction") { + def retryUntilTimeout = { int timeoutSecond, Supplier closure -> + long start = System.currentTimeMillis() + while (true) { + if (closure.get()) { + return + } else { + if (System.currentTimeMillis() - start > timeoutSecond * 1000) { + throw new RuntimeException("" + + "Operation timeout, maybe you need to check " + + "remove_unused_remote_files_interval_sec and " + + "cold_data_compaction_interval_sec in be.conf") + } else { + sleep(10_000) + } + } + } + } + + String suffix = UUID.randomUUID().hashCode().abs().toString() + String s3Prefix = "regression/cold_data_compaction" + multi_sql """ + DROP TABLE IF EXISTS t_recycle_in_s3; + DROP STORAGE POLICY IF EXISTS test_policy_${suffix}; + DROP RESOURCE IF EXISTS 'remote_s3_${suffix}'; + + CREATE RESOURCE "remote_s3_${suffix}" + PROPERTIES + ( + "type" = "s3", + "s3.endpoint" = "${getS3Endpoint()}", + "s3.region" = "${getS3Region()}", + "s3.bucket" = "${getS3BucketName()}", + "s3.root.path" = "${s3Prefix}", + "s3.access_key" = "${getS3AK()}", + "s3.secret_key" = "${getS3SK()}", + "s3.connection.maximum" = "50", + "s3.connection.request.timeout" = "3000", + "s3.connection.timeout" = "1000" + ); + CREATE STORAGE POLICY test_policy_${suffix} + PROPERTIES( + "storage_resource" = "remote_s3_${suffix}", + "cooldown_ttl" = "5" + ); + CREATE TABLE IF NOT EXISTS t_recycle_in_s3 + ( + k1 BIGINT, + k2 LARGEINT, + v1 VARCHAR(2048) + ) + DISTRIBUTED BY HASH (k1) BUCKETS 1 + PROPERTIES( + "storage_policy" = "test_policy_${suffix}", + "disable_auto_compaction" = "true", + "replication_num" = "1" + ); + """ + + // insert 5 RowSets + multi_sql """ + insert into t_recycle_in_s3 values(1, 1, 'Tom'); + insert into t_recycle_in_s3 values(2, 2, 'Jelly'); + insert into t_recycle_in_s3 values(3, 3, 'Spike'); + insert into t_recycle_in_s3 values(4, 4, 'Tyke'); + insert into t_recycle_in_s3 values(5, 5, 'Tuffy'); + """ + + // wait until files upload to S3 + retryUntilTimeout(1800, { + def res = sql_return_maparray "show data from t_recycle_in_s3" + String size = "" + String remoteSize = "" + for (final def line in res) { + if ("t_recycle_in_s3".equals(line.TableName)) { + size = line.Size + remoteSize = line.RemoteSize + break + } + } + logger.info("waiting for data to be uploaded to S3: t_recycle_in_s3's local data size: ${size}, remote data size: ${remoteSize}") + return size.startsWith("0") && !remoteSize.startsWith("0") + }) + + String tabletId = sql_return_maparray("show tablets from t_recycle_in_s3")[0].TabletId + // check number of remote files + def filesBeforeCompaction = getS3Client().listObjects( + new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix + "/data/${tabletId}")).getObjectSummaries() + + // 5 RowSets + 1 meta + assertEquals(6, filesBeforeCompaction.size()) + + // trigger cold data compaction + sql """alter table t_recycle_in_s3 set ("disable_auto_compaction" = "false")""" + + // wait until compaction finish + retryUntilTimeout(1800, { + def filesAfterCompaction = getS3Client().listObjects( + new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix+ "/data/${tabletId}")).getObjectSummaries() + logger.info("t_recycle_in_s3's remote file number is ${filesAfterCompaction.size()}") + // 1 RowSet + 1 meta + return filesAfterCompaction.size() == 2 + }) + + sql "drop table t_recycle_in_s3 force" + retryUntilTimeout(1800, { + def filesAfterDrop = getS3Client().listObjects( + new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix+ "/data/${tabletId}")).getObjectSummaries() + logger.info("after drop t_recycle_in_s3, remote file number is ${filesAfterDrop.size()}") + return filesAfterDrop.size() == 0 + }) +} From 52e094bcc91197ec1b1b1577ac6932790a2781bc Mon Sep 17 00:00:00 2001 From: yagagagaga Date: Thu, 27 Feb 2025 23:49:38 +0800 Subject: [PATCH 2/6] rollback --- .../doris/regression/suite/Suite.groovy | 86 +++++++++---------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy index b075b766d8102d..1d3d1580b62d21 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy @@ -429,7 +429,7 @@ class Suite implements GroovyInterceptable { } } - def sql_return_maparray_impl(String sqlStr, Connection conn = null) { + def sql_return_maparray_impl(String sqlStr, Connection conn = null) { logger.info("Execute sql: ${sqlStr}".toString()) if (conn == null) { conn = context.getConnection() @@ -447,7 +447,7 @@ class Suite implements GroovyInterceptable { // which cannot be handled by maps and will result in an error directly. Set uniqueSet = new HashSet<>() Set duplicates = new HashSet<>() - + for (String str : columnNames) { if (uniqueSet.contains(str)) { duplicates.add(str) @@ -456,7 +456,7 @@ class Suite implements GroovyInterceptable { } } if (!duplicates.isEmpty()) { - def errorMessage = "${sqlStr} returns duplicates headers: ${duplicates}" + def errorMessage = "${sqlStr} returns duplicates headers: ${duplicates}" throw new Exception(errorMessage) } @@ -615,7 +615,7 @@ class Suite implements GroovyInterceptable { } long getTableVersion(long dbId, String tableName) { - def result = sql_return_maparray """show proc '/dbs/${dbId}'""" + def result = sql_return_maparray """show proc '/dbs/${dbId}'""" for (def res : result) { if(res.TableName.equals(tableName)) { log.info(res.toString()) @@ -647,31 +647,31 @@ class Suite implements GroovyInterceptable { } AtomicBoolean isFirst = new AtomicBoolean(true) String sql = list.stream() - .map({ row -> - StringBuilder sb = new StringBuilder("SELECT ") - if (row instanceof List) { - if (isFirst.get()) { - String columns = row.withIndex().collect({ column, index -> - "${toSelectString(column)} AS c${index + 1}" - }).join(", ") - sb.append(columns) - isFirst.set(false) - } else { - String columns = row.collect({ column -> - "${toSelectString(column)}" - }).join(", ") - sb.append(columns) - } + .map({ row -> + StringBuilder sb = new StringBuilder("SELECT ") + if (row instanceof List) { + if (isFirst.get()) { + String columns = row.withIndex().collect({ column, index -> + "${toSelectString(column)} AS c${index + 1}" + }).join(", ") + sb.append(columns) + isFirst.set(false) } else { - if (isFirst.get()) { - sb.append(toSelectString(row)).append(" AS c1") - isFirst.set(false) - } else { - sb.append(toSelectString(row)) - } + String columns = row.collect({ column -> + "${toSelectString(column)}" + }).join(", ") + sb.append(columns) } - return sb.toString() - }).collect(Collectors.joining("\nUNION ALL\n")) + } else { + if (isFirst.get()) { + sb.append(toSelectString(row)).append(" AS c1") + isFirst.set(false) + } else { + sb.append(toSelectString(row)) + } + } + return sb.toString() + }).collect(Collectors.joining("\nUNION ALL\n")) return sql } @@ -1001,7 +1001,7 @@ class Suite implements GroovyInterceptable { } void getBackendIpHeartbeatPort(Map backendId_to_backendIP, - Map backendId_to_backendHeartbeatPort) { + Map backendId_to_backendHeartbeatPort) { List> backends = sql("show backends"); logger.info("Content of backends: ${backends}") for (List backend : backends) { @@ -1079,7 +1079,7 @@ class Suite implements GroovyInterceptable { } void getBackendIpHttpAndBrpcPort(Map backendId_to_backendIP, - Map backendId_to_backendHttpPort, Map backendId_to_backendBrpcPort) { + Map backendId_to_backendHttpPort, Map backendId_to_backendBrpcPort) { List> backends = sql("show backends"); for (List backend : backends) { @@ -1300,16 +1300,16 @@ class Suite implements GroovyInterceptable { String errorMsg = null try { errorMsg = OutputUtils.checkOutput(expectCsvResults, realResults.iterator(), - { row -> OutputUtils.toCsvString(row as List) }, - { row -> OutputUtils.toCsvString(row) }, - "Check tag '${tag}' failed", meta) + { row -> OutputUtils.toCsvString(row as List) }, + { row -> OutputUtils.toCsvString(row) }, + "Check tag '${tag}' failed", meta) } catch (Throwable t) { throw new IllegalStateException("Check tag '${tag}' failed, sql:\n${arg}", t) } if (errorMsg != null) { String csvRealResult = realResults.stream() - .map {row -> OutputUtils.toCsvString(row)} - .collect(Collectors.joining("\n")) + .map {row -> OutputUtils.toCsvString(row)} + .collect(Collectors.joining("\n")) def outputFilePath = context.outputFile.getCanonicalPath().substring(context.config.dataPath.length() + 1) def line = expectCsvResults.currentLine() logger.warn("expect results in file: ${outputFilePath}, line: ${line}\nrealResults:\n" + csvRealResult) @@ -1604,14 +1604,14 @@ class Suite implements GroovyInterceptable { logger.info("result expected: " + resultExpected.toString()) String errorMsg = OutputUtils.checkOutput(resultExpected.iterator(), resultByFoldConstant.iterator(), - { row -> OutputUtils.toCsvString(row as List) }, - { row -> OutputUtils.toCsvString(row) }, - "check output failed", meta) + { row -> OutputUtils.toCsvString(row as List) }, + { row -> OutputUtils.toCsvString(row) }, + "check output failed", meta) } String getJobName(String dbName, String mtmvName) { String showMTMV = "select JobName from mv_infos('database'='${dbName}') where Name = '${mtmvName}'"; - logger.info(showMTMV) + logger.info(showMTMV) List> result = sql(showMTMV) logger.info("result: " + result.toString()) if (result.isEmpty()) { @@ -1866,7 +1866,7 @@ class Suite implements GroovyInterceptable { sql(" memo plan ${query_sql}") check { result -> boolean success = !result.contains("${mv_name} chose") && !result.contains("${mv_name} not chose") - && !result.contains("${mv_name} fail") + && !result.contains("${mv_name} fail") Assert.assertEquals(true, success) } } @@ -1905,7 +1905,7 @@ class Suite implements GroovyInterceptable { // is_partition_statistics_ready is the bool value which identifying if partition row count is valid or not // if true, check if chosen by cbo or doesn't check void mv_rewrite_success(query_sql, mv_name, sync_cbo_rewrite = enable_sync_mv_cost_based_rewrite(), - is_partition_statistics_ready = true) { + is_partition_statistics_ready = true) { logger.info("query_sql = " + query_sql + ", mv_name = " + mv_name + ", sync_cbo_rewrite = " +sync_cbo_rewrite + ", is_partition_statistics_ready = " + is_partition_statistics_ready) if (!is_partition_statistics_ready) { @@ -1931,7 +1931,7 @@ class Suite implements GroovyInterceptable { // is_partition_statistics_ready is the bool value which identifying if partition row count is valid or not // if true, check if chosen by cbo or doesn't check void mv_rewrite_all_success( query_sql, mv_names, sync_cbo_rewrite = enable_sync_mv_cost_based_rewrite(), - is_partition_statistics_ready = true) { + is_partition_statistics_ready = true) { logger.info("query_sql = " + query_sql + ", mv_names = " + mv_names + ", sync_cbo_rewrite = " +sync_cbo_rewrite + ", is_partition_statistics_ready = " + is_partition_statistics_ready) if (!is_partition_statistics_ready) { @@ -1975,7 +1975,7 @@ class Suite implements GroovyInterceptable { // is_partition_statistics_ready is the bool value which identifying if partition row count is valid or not // if true, check if chosen by cbo or doesn't check void mv_rewrite_any_success(query_sql, mv_names, sync_cbo_rewrite = enable_sync_mv_cost_based_rewrite(), - is_partition_statistics_ready = true) { + is_partition_statistics_ready = true) { logger.info("query_sql = " + query_sql + ", mv_names = " + mv_names + ", sync_cbo_rewrite = " +sync_cbo_rewrite + ", is_partition_statistics_ready = " + is_partition_statistics_ready) if (!is_partition_statistics_ready) { @@ -2017,7 +2017,7 @@ class Suite implements GroovyInterceptable { // multi mv part in rewrite process, all rewrte success without check if chosen by cbo // sync_cbo_rewrite is the bool value which control sync mv is use cbo based mv rewrite void mv_rewrite_all_success_without_check_chosen(query_sql, mv_names, - sync_cbo_rewrite = enable_sync_mv_cost_based_rewrite()){ + sync_cbo_rewrite = enable_sync_mv_cost_based_rewrite()){ logger.info("query_sql = " + query_sql + ", mv_names = " + mv_names) if (!sync_cbo_rewrite) { explain { From 2917fd1a658fbd14beda66e90fc3a37e964c3a77 Mon Sep 17 00:00:00 2001 From: yagagagaga Date: Fri, 7 Mar 2025 15:09:57 +0800 Subject: [PATCH 3/6] add S3 dependency --- regression-test/framework/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/regression-test/framework/pom.xml b/regression-test/framework/pom.xml index ea4866787e0613..8d921ce15cf134 100644 --- a/regression-test/framework/pom.xml +++ b/regression-test/framework/pom.xml @@ -74,6 +74,7 @@ under the License. 4.0.19 4.9.3 2.8.0 + 1.11.95 17.0.0 @@ -265,6 +266,11 @@ under the License. + + com.amazonaws + aws-java-sdk-s3 + ${aws-java-sdk-s3.version} + org.apache.hive hive-jdbc From a13233dc52dfeefafcb2e49542aac489114e09a3 Mon Sep 17 00:00:00 2001 From: yagagagaga Date: Tue, 11 Mar 2025 23:20:44 +0800 Subject: [PATCH 4/6] adjust timeout --- .../suites/cold_heat_separation/cold_data_compaction.groovy | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy b/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy index bf9e33e7528759..d32a3b74d1c0ef 100644 --- a/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy +++ b/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy @@ -87,7 +87,7 @@ suite("test_cold_data_compaction") { """ // wait until files upload to S3 - retryUntilTimeout(1800, { + retryUntilTimeout(3600, { def res = sql_return_maparray "show data from t_recycle_in_s3" String size = "" String remoteSize = "" @@ -114,7 +114,7 @@ suite("test_cold_data_compaction") { sql """alter table t_recycle_in_s3 set ("disable_auto_compaction" = "false")""" // wait until compaction finish - retryUntilTimeout(1800, { + retryUntilTimeout(3600, { def filesAfterCompaction = getS3Client().listObjects( new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix+ "/data/${tabletId}")).getObjectSummaries() logger.info("t_recycle_in_s3's remote file number is ${filesAfterCompaction.size()}") @@ -123,7 +123,7 @@ suite("test_cold_data_compaction") { }) sql "drop table t_recycle_in_s3 force" - retryUntilTimeout(1800, { + retryUntilTimeout(3600, { def filesAfterDrop = getS3Client().listObjects( new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix+ "/data/${tabletId}")).getObjectSummaries() logger.info("after drop t_recycle_in_s3, remote file number is ${filesAfterDrop.size()}") From d125741cce0fa41db6b6487f0c346e24ffe567e9 Mon Sep 17 00:00:00 2001 From: yagagagaga Date: Wed, 12 Mar 2025 11:01:22 +0800 Subject: [PATCH 5/6] set be.conf disable_auto_compaction = false --- .../suites/cold_heat_separation/cold_data_compaction.groovy | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy b/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy index d32a3b74d1c0ef..6a191d6f7983b6 100644 --- a/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy +++ b/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy @@ -112,6 +112,10 @@ suite("test_cold_data_compaction") { // trigger cold data compaction sql """alter table t_recycle_in_s3 set ("disable_auto_compaction" = "false")""" + def v = get_be_param("disable_auto_compaction").values().toArray()[0].toString() + if ("true" == v) { + set_be_param("disable_auto_compaction", "false") + } // wait until compaction finish retryUntilTimeout(3600, { From 7d03e9af3ee168ef8fec396de3d41005081cb13e Mon Sep 17 00:00:00 2001 From: yagagagaga Date: Wed, 12 Mar 2025 11:02:53 +0800 Subject: [PATCH 6/6] set be.conf disable_auto_compaction = false --- .../suites/cold_heat_separation/cold_data_compaction.groovy | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy b/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy index 6a191d6f7983b6..b59a803e9ec43f 100644 --- a/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy +++ b/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy @@ -126,6 +126,10 @@ suite("test_cold_data_compaction") { return filesAfterCompaction.size() == 2 }) + if ("true" == v) { + set_be_param("disable_auto_compaction", "true") + } + sql "drop table t_recycle_in_s3 force" retryUntilTimeout(3600, { def filesAfterDrop = getS3Client().listObjects(