Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion be/src/olap/olap_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1285,7 +1285,7 @@ void StorageEngine::do_remove_unused_remote_files() {
}
cooldown_meta_id = t->tablet_meta()->cooldown_meta_id();
}
auto [cooldown_replica_id, cooldown_term] = t->cooldown_conf();
auto [cooldown_term, cooldown_replica_id] = t->cooldown_conf();
if (cooldown_replica_id != t->replica_id()) {
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@

package org.apache.doris.regression.suite

import com.amazonaws.auth.AWSStaticCredentialsProvider
import com.amazonaws.auth.BasicAWSCredentials
import com.amazonaws.client.builder.AwsClientBuilder
import com.amazonaws.services.s3.AmazonS3
import com.amazonaws.services.s3.AmazonS3ClientBuilder

import static java.util.concurrent.TimeUnit.SECONDS

import com.google.common.base.Strings
Expand Down Expand Up @@ -96,6 +102,8 @@ class Suite implements GroovyInterceptable {
final List<Future> lazyCheckFutures = new Vector<>()
static Boolean isTrinoConnectorDownloaded = false

private AmazonS3 s3Client = null

Suite(String name, String group, SuiteContext context, SuiteCluster cluster) {
this.name = name
this.group = group
Expand Down Expand Up @@ -984,6 +992,16 @@ class Suite implements GroovyInterceptable {
return s3Url
}

synchronized AmazonS3 getS3Client() {
if (s3Client == null) {
def credentials = new BasicAWSCredentials(getS3AK(), getS3SK())
def endpointConfiguration = new AwsClientBuilder.EndpointConfiguration(getS3Endpoint(), getS3Region())
s3Client = AmazonS3ClientBuilder.standard().withEndpointConfiguration(endpointConfiguration)
.withCredentials(new AWSStaticCredentialsProvider(credentials)).build()
}
return s3Client
}

String getJdbcPassword() {
String sk = context.config.otherConfigs.get("jdbcPassword");
return sk
Expand Down
3 changes: 3 additions & 0 deletions regression-test/pipeline/p0/conf/be.conf
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,8 @@ crash_in_memory_tracker_inaccurate = true
# enable download small files in batch, see apache/doris#45061 for details
enable_batch_download = true

remove_unused_remote_files_interval_sec=60
cold_data_compaction_interval_sec=60

# This feature has bug, so by default is false, only open it in pipeline to observe
enable_parquet_page_index=true
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import com.amazonaws.services.s3.model.ListObjectsRequest
import java.util.function.Supplier

suite("test_cold_data_compaction") {
def retryUntilTimeout = { int timeoutSecond, Supplier<Boolean> closure ->
long start = System.currentTimeMillis()
while (true) {
if (closure.get()) {
return
} else {
if (System.currentTimeMillis() - start > timeoutSecond * 1000) {
throw new RuntimeException("" +
"Operation timeout, maybe you need to check " +
"remove_unused_remote_files_interval_sec and " +
"cold_data_compaction_interval_sec in be.conf")
} else {
sleep(10_000)
}
}
}
}

String suffix = UUID.randomUUID().hashCode().abs().toString()
String s3Prefix = "regression/cold_data_compaction"
multi_sql """
DROP TABLE IF EXISTS t_recycle_in_s3;
DROP STORAGE POLICY IF EXISTS test_policy_${suffix};
DROP RESOURCE IF EXISTS 'remote_s3_${suffix}';

CREATE RESOURCE "remote_s3_${suffix}"
PROPERTIES
(
"type" = "s3",
"s3.endpoint" = "${getS3Endpoint()}",
"s3.region" = "${getS3Region()}",
"s3.bucket" = "${getS3BucketName()}",
"s3.root.path" = "${s3Prefix}",
"s3.access_key" = "${getS3AK()}",
"s3.secret_key" = "${getS3SK()}",
"s3.connection.maximum" = "50",
"s3.connection.request.timeout" = "3000",
"s3.connection.timeout" = "1000"
);
CREATE STORAGE POLICY test_policy_${suffix}
PROPERTIES(
"storage_resource" = "remote_s3_${suffix}",
"cooldown_ttl" = "5"
);
CREATE TABLE IF NOT EXISTS t_recycle_in_s3
(
k1 BIGINT,
k2 LARGEINT,
v1 VARCHAR(2048)
)
DISTRIBUTED BY HASH (k1) BUCKETS 1
PROPERTIES(
"storage_policy" = "test_policy_${suffix}",
"disable_auto_compaction" = "true",
"replication_num" = "1"
);
"""

// insert 5 RowSets
multi_sql """
insert into t_recycle_in_s3 values(1, 1, 'Tom');
insert into t_recycle_in_s3 values(2, 2, 'Jelly');
insert into t_recycle_in_s3 values(3, 3, 'Spike');
insert into t_recycle_in_s3 values(4, 4, 'Tyke');
insert into t_recycle_in_s3 values(5, 5, 'Tuffy');
"""

// wait until files upload to S3
retryUntilTimeout(1800, {
def res = sql_return_maparray "show data from t_recycle_in_s3"
String size = ""
String remoteSize = ""
for (final def line in res) {
if ("t_recycle_in_s3".equals(line.TableName)) {
size = line.Size
remoteSize = line.RemoteSize
break
}
}
logger.info("waiting for data to be uploaded to S3: t_recycle_in_s3's local data size: ${size}, remote data size: ${remoteSize}")
return size.startsWith("0") && !remoteSize.startsWith("0")
})

String tabletId = sql_return_maparray("show tablets from t_recycle_in_s3")[0].TabletId
// check number of remote files
def filesBeforeCompaction = getS3Client().listObjects(
new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix + "/data/${tabletId}")).getObjectSummaries()

// 5 RowSets + 1 meta
assertEquals(6, filesBeforeCompaction.size())

// trigger cold data compaction
sql """alter table t_recycle_in_s3 set ("disable_auto_compaction" = "false")"""

// wait until compaction finish
retryUntilTimeout(1800, {
def filesAfterCompaction = getS3Client().listObjects(
new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix+ "/data/${tabletId}")).getObjectSummaries()
logger.info("t_recycle_in_s3's remote file number is ${filesAfterCompaction.size()}")
// 1 RowSet + 1 meta
return filesAfterCompaction.size() == 2
})

sql "drop table t_recycle_in_s3 force"
retryUntilTimeout(1800, {
def filesAfterDrop = getS3Client().listObjects(
new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix+ "/data/${tabletId}")).getObjectSummaries()
logger.info("after drop t_recycle_in_s3, remote file number is ${filesAfterDrop.size()}")
return filesAfterDrop.size() == 0
})
}
Loading