diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp
index 014213c4694598..51040f130b5c1e 100644
--- a/be/src/olap/olap_server.cpp
+++ b/be/src/olap/olap_server.cpp
@@ -1304,7 +1304,7 @@ void StorageEngine::do_remove_unused_remote_files() {
}
cooldown_meta_id = t->tablet_meta()->cooldown_meta_id();
}
- auto [cooldown_replica_id, cooldown_term] = t->cooldown_conf();
+ auto [cooldown_term, cooldown_replica_id] = t->cooldown_conf();
if (cooldown_replica_id != t->replica_id()) {
return;
}
diff --git a/regression-test/framework/pom.xml b/regression-test/framework/pom.xml
index ea4866787e0613..8d921ce15cf134 100644
--- a/regression-test/framework/pom.xml
+++ b/regression-test/framework/pom.xml
@@ -74,6 +74,7 @@ under the License.
4.0.19
4.9.3
2.8.0
+ 1.11.95
17.0.0
@@ -265,6 +266,11 @@ under the License.
+
+ com.amazonaws
+ aws-java-sdk-s3
+ ${aws-java-sdk-s3.version}
+
org.apache.hive
hive-jdbc
diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
index 2f3a979945ea92..fa5d226f332060 100644
--- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
+++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
@@ -17,6 +17,12 @@
package org.apache.doris.regression.suite
+import com.amazonaws.auth.AWSStaticCredentialsProvider
+import com.amazonaws.auth.BasicAWSCredentials
+import com.amazonaws.client.builder.AwsClientBuilder
+import com.amazonaws.services.s3.AmazonS3
+import com.amazonaws.services.s3.AmazonS3ClientBuilder
+
import com.google.common.collect.Maps
import com.google.common.util.concurrent.Futures
import com.google.common.util.concurrent.ListenableFuture
@@ -85,6 +91,8 @@ class Suite implements GroovyInterceptable {
final List lazyCheckExceptions = new Vector<>()
final List lazyCheckFutures = new Vector<>()
+ private AmazonS3 s3Client = null
+
Suite(String name, String group, SuiteContext context, SuiteCluster cluster) {
this.name = name
this.group = group
@@ -907,6 +915,16 @@ class Suite implements GroovyInterceptable {
return s3Url
}
+ synchronized AmazonS3 getS3Client() {
+ if (s3Client == null) {
+ def credentials = new BasicAWSCredentials(getS3AK(), getS3SK())
+ def endpointConfiguration = new AwsClientBuilder.EndpointConfiguration(getS3Endpoint(), getS3Region())
+ s3Client = AmazonS3ClientBuilder.standard().withEndpointConfiguration(endpointConfiguration)
+ .withCredentials(new AWSStaticCredentialsProvider(credentials)).build()
+ }
+ return s3Client
+ }
+
void scpFiles(String username, String host, String files, String filePath, boolean fromDst=true) {
String cmd = "scp -r ${username}@${host}:${files} ${filePath}"
if (!fromDst) {
diff --git a/regression-test/pipeline/p0/conf/be.conf b/regression-test/pipeline/p0/conf/be.conf
index 486e01c3bd2ef1..b6cf8fb2792084 100644
--- a/regression-test/pipeline/p0/conf/be.conf
+++ b/regression-test/pipeline/p0/conf/be.conf
@@ -92,5 +92,8 @@ enable_missing_rows_correctness_check=true
crash_in_memory_tracker_inaccurate = true
enable_brpc_connection_check=true
+remove_unused_remote_files_interval_sec=60
+cold_data_compaction_interval_sec=60
+
# This feature has bug, so by default is false, only open it in pipeline to observe
enable_parquet_page_index=true
diff --git a/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy b/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy
new file mode 100644
index 00000000000000..b59a803e9ec43f
--- /dev/null
+++ b/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import com.amazonaws.services.s3.model.ListObjectsRequest
+import java.util.function.Supplier
+
+suite("test_cold_data_compaction") {
+ def retryUntilTimeout = { int timeoutSecond, Supplier closure ->
+ long start = System.currentTimeMillis()
+ while (true) {
+ if (closure.get()) {
+ return
+ } else {
+ if (System.currentTimeMillis() - start > timeoutSecond * 1000) {
+ throw new RuntimeException("" +
+ "Operation timeout, maybe you need to check " +
+ "remove_unused_remote_files_interval_sec and " +
+ "cold_data_compaction_interval_sec in be.conf")
+ } else {
+ sleep(10_000)
+ }
+ }
+ }
+ }
+
+ String suffix = UUID.randomUUID().hashCode().abs().toString()
+ String s3Prefix = "regression/cold_data_compaction"
+ multi_sql """
+ DROP TABLE IF EXISTS t_recycle_in_s3;
+ DROP STORAGE POLICY IF EXISTS test_policy_${suffix};
+ DROP RESOURCE IF EXISTS 'remote_s3_${suffix}';
+
+ CREATE RESOURCE "remote_s3_${suffix}"
+ PROPERTIES
+ (
+ "type" = "s3",
+ "s3.endpoint" = "${getS3Endpoint()}",
+ "s3.region" = "${getS3Region()}",
+ "s3.bucket" = "${getS3BucketName()}",
+ "s3.root.path" = "${s3Prefix}",
+ "s3.access_key" = "${getS3AK()}",
+ "s3.secret_key" = "${getS3SK()}",
+ "s3.connection.maximum" = "50",
+ "s3.connection.request.timeout" = "3000",
+ "s3.connection.timeout" = "1000"
+ );
+ CREATE STORAGE POLICY test_policy_${suffix}
+ PROPERTIES(
+ "storage_resource" = "remote_s3_${suffix}",
+ "cooldown_ttl" = "5"
+ );
+ CREATE TABLE IF NOT EXISTS t_recycle_in_s3
+ (
+ k1 BIGINT,
+ k2 LARGEINT,
+ v1 VARCHAR(2048)
+ )
+ DISTRIBUTED BY HASH (k1) BUCKETS 1
+ PROPERTIES(
+ "storage_policy" = "test_policy_${suffix}",
+ "disable_auto_compaction" = "true",
+ "replication_num" = "1"
+ );
+ """
+
+ // insert 5 RowSets
+ multi_sql """
+ insert into t_recycle_in_s3 values(1, 1, 'Tom');
+ insert into t_recycle_in_s3 values(2, 2, 'Jelly');
+ insert into t_recycle_in_s3 values(3, 3, 'Spike');
+ insert into t_recycle_in_s3 values(4, 4, 'Tyke');
+ insert into t_recycle_in_s3 values(5, 5, 'Tuffy');
+ """
+
+ // wait until files upload to S3
+ retryUntilTimeout(3600, {
+ def res = sql_return_maparray "show data from t_recycle_in_s3"
+ String size = ""
+ String remoteSize = ""
+ for (final def line in res) {
+ if ("t_recycle_in_s3".equals(line.TableName)) {
+ size = line.Size
+ remoteSize = line.RemoteSize
+ break
+ }
+ }
+ logger.info("waiting for data to be uploaded to S3: t_recycle_in_s3's local data size: ${size}, remote data size: ${remoteSize}")
+ return size.startsWith("0") && !remoteSize.startsWith("0")
+ })
+
+ String tabletId = sql_return_maparray("show tablets from t_recycle_in_s3")[0].TabletId
+ // check number of remote files
+ def filesBeforeCompaction = getS3Client().listObjects(
+ new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix + "/data/${tabletId}")).getObjectSummaries()
+
+ // 5 RowSets + 1 meta
+ assertEquals(6, filesBeforeCompaction.size())
+
+ // trigger cold data compaction
+ sql """alter table t_recycle_in_s3 set ("disable_auto_compaction" = "false")"""
+ def v = get_be_param("disable_auto_compaction").values().toArray()[0].toString()
+ if ("true" == v) {
+ set_be_param("disable_auto_compaction", "false")
+ }
+
+ // wait until compaction finish
+ retryUntilTimeout(3600, {
+ def filesAfterCompaction = getS3Client().listObjects(
+ new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix+ "/data/${tabletId}")).getObjectSummaries()
+ logger.info("t_recycle_in_s3's remote file number is ${filesAfterCompaction.size()}")
+ // 1 RowSet + 1 meta
+ return filesAfterCompaction.size() == 2
+ })
+
+ if ("true" == v) {
+ set_be_param("disable_auto_compaction", "true")
+ }
+
+ sql "drop table t_recycle_in_s3 force"
+ retryUntilTimeout(3600, {
+ def filesAfterDrop = getS3Client().listObjects(
+ new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix+ "/data/${tabletId}")).getObjectSummaries()
+ logger.info("after drop t_recycle_in_s3, remote file number is ${filesAfterDrop.size()}")
+ return filesAfterDrop.size() == 0
+ })
+}