Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion be/src/olap/olap_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1304,7 +1304,7 @@ void StorageEngine::do_remove_unused_remote_files() {
}
cooldown_meta_id = t->tablet_meta()->cooldown_meta_id();
}
auto [cooldown_replica_id, cooldown_term] = t->cooldown_conf();
auto [cooldown_term, cooldown_replica_id] = t->cooldown_conf();
if (cooldown_replica_id != t->replica_id()) {
return;
}
Expand Down
6 changes: 6 additions & 0 deletions regression-test/framework/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ under the License.
<groovy.version>4.0.19</groovy.version>
<antlr.version>4.9.3</antlr.version>
<hadoop.version>2.8.0</hadoop.version>
<aws-java-sdk-s3.version>1.11.95</aws-java-sdk-s3.version>
<!-- Arrow 18 only supports jdk17 -->
<arrow.version>17.0.0</arrow.version>
</properties>
Expand Down Expand Up @@ -265,6 +266,11 @@ under the License.
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-s3</artifactId>
<version>${aws-java-sdk-s3.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@

package org.apache.doris.regression.suite

import com.amazonaws.auth.AWSStaticCredentialsProvider
import com.amazonaws.auth.BasicAWSCredentials
import com.amazonaws.client.builder.AwsClientBuilder
import com.amazonaws.services.s3.AmazonS3
import com.amazonaws.services.s3.AmazonS3ClientBuilder

import com.google.common.collect.Maps
import com.google.common.util.concurrent.Futures
import com.google.common.util.concurrent.ListenableFuture
Expand Down Expand Up @@ -85,6 +91,8 @@ class Suite implements GroovyInterceptable {
final List<Throwable> lazyCheckExceptions = new Vector<>()
final List<Future> lazyCheckFutures = new Vector<>()

private AmazonS3 s3Client = null

Suite(String name, String group, SuiteContext context, SuiteCluster cluster) {
this.name = name
this.group = group
Expand Down Expand Up @@ -907,6 +915,16 @@ class Suite implements GroovyInterceptable {
return s3Url
}

synchronized AmazonS3 getS3Client() {
if (s3Client == null) {
def credentials = new BasicAWSCredentials(getS3AK(), getS3SK())
def endpointConfiguration = new AwsClientBuilder.EndpointConfiguration(getS3Endpoint(), getS3Region())
s3Client = AmazonS3ClientBuilder.standard().withEndpointConfiguration(endpointConfiguration)
.withCredentials(new AWSStaticCredentialsProvider(credentials)).build()
}
return s3Client
}

void scpFiles(String username, String host, String files, String filePath, boolean fromDst=true) {
String cmd = "scp -r ${username}@${host}:${files} ${filePath}"
if (!fromDst) {
Expand Down
3 changes: 3 additions & 0 deletions regression-test/pipeline/p0/conf/be.conf
Original file line number Diff line number Diff line change
Expand Up @@ -92,5 +92,8 @@ enable_missing_rows_correctness_check=true
crash_in_memory_tracker_inaccurate = true
enable_brpc_connection_check=true

remove_unused_remote_files_interval_sec=60
cold_data_compaction_interval_sec=60

# This feature has bug, so by default is false, only open it in pipeline to observe
enable_parquet_page_index=true
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import com.amazonaws.services.s3.model.ListObjectsRequest
import java.util.function.Supplier

suite("test_cold_data_compaction") {
def retryUntilTimeout = { int timeoutSecond, Supplier<Boolean> closure ->
long start = System.currentTimeMillis()
while (true) {
if (closure.get()) {
return
} else {
if (System.currentTimeMillis() - start > timeoutSecond * 1000) {
throw new RuntimeException("" +
"Operation timeout, maybe you need to check " +
"remove_unused_remote_files_interval_sec and " +
"cold_data_compaction_interval_sec in be.conf")
} else {
sleep(10_000)
}
}
}
}

String suffix = UUID.randomUUID().hashCode().abs().toString()
String s3Prefix = "regression/cold_data_compaction"
multi_sql """
DROP TABLE IF EXISTS t_recycle_in_s3;
DROP STORAGE POLICY IF EXISTS test_policy_${suffix};
DROP RESOURCE IF EXISTS 'remote_s3_${suffix}';

CREATE RESOURCE "remote_s3_${suffix}"
PROPERTIES
(
"type" = "s3",
"s3.endpoint" = "${getS3Endpoint()}",
"s3.region" = "${getS3Region()}",
"s3.bucket" = "${getS3BucketName()}",
"s3.root.path" = "${s3Prefix}",
"s3.access_key" = "${getS3AK()}",
"s3.secret_key" = "${getS3SK()}",
"s3.connection.maximum" = "50",
"s3.connection.request.timeout" = "3000",
"s3.connection.timeout" = "1000"
);
CREATE STORAGE POLICY test_policy_${suffix}
PROPERTIES(
"storage_resource" = "remote_s3_${suffix}",
"cooldown_ttl" = "5"
);
CREATE TABLE IF NOT EXISTS t_recycle_in_s3
(
k1 BIGINT,
k2 LARGEINT,
v1 VARCHAR(2048)
)
DISTRIBUTED BY HASH (k1) BUCKETS 1
PROPERTIES(
"storage_policy" = "test_policy_${suffix}",
"disable_auto_compaction" = "true",
"replication_num" = "1"
);
"""

// insert 5 RowSets
multi_sql """
insert into t_recycle_in_s3 values(1, 1, 'Tom');
insert into t_recycle_in_s3 values(2, 2, 'Jelly');
insert into t_recycle_in_s3 values(3, 3, 'Spike');
insert into t_recycle_in_s3 values(4, 4, 'Tyke');
insert into t_recycle_in_s3 values(5, 5, 'Tuffy');
"""

// wait until files upload to S3
retryUntilTimeout(3600, {
def res = sql_return_maparray "show data from t_recycle_in_s3"
String size = ""
String remoteSize = ""
for (final def line in res) {
if ("t_recycle_in_s3".equals(line.TableName)) {
size = line.Size
remoteSize = line.RemoteSize
break
}
}
logger.info("waiting for data to be uploaded to S3: t_recycle_in_s3's local data size: ${size}, remote data size: ${remoteSize}")
return size.startsWith("0") && !remoteSize.startsWith("0")
})

String tabletId = sql_return_maparray("show tablets from t_recycle_in_s3")[0].TabletId
// check number of remote files
def filesBeforeCompaction = getS3Client().listObjects(
new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix + "/data/${tabletId}")).getObjectSummaries()

// 5 RowSets + 1 meta
assertEquals(6, filesBeforeCompaction.size())

// trigger cold data compaction
sql """alter table t_recycle_in_s3 set ("disable_auto_compaction" = "false")"""
def v = get_be_param("disable_auto_compaction").values().toArray()[0].toString()
if ("true" == v) {
set_be_param("disable_auto_compaction", "false")
}

// wait until compaction finish
retryUntilTimeout(3600, {
def filesAfterCompaction = getS3Client().listObjects(
new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix+ "/data/${tabletId}")).getObjectSummaries()
logger.info("t_recycle_in_s3's remote file number is ${filesAfterCompaction.size()}")
// 1 RowSet + 1 meta
return filesAfterCompaction.size() == 2
})

if ("true" == v) {
set_be_param("disable_auto_compaction", "true")
}

sql "drop table t_recycle_in_s3 force"
retryUntilTimeout(3600, {
def filesAfterDrop = getS3Client().listObjects(
new ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix+ "/data/${tabletId}")).getObjectSummaries()
logger.info("after drop t_recycle_in_s3, remote file number is ${filesAfterDrop.size()}")
return filesAfterDrop.size() == 0
})
}
Loading