Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion be/src/util/block_compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,10 @@ class Bzip2BlockCompression : public BlockCompressionCodec {

size_t max_compressed_len(size_t len) override {
// TODO: make sure the max_compressed_len for bzip2
return len * 2;
// 50 is an estimate fix overhead for bzip2
// in case the input len is small and BZ2_bzBuffToBuffCompress will return
// BZ_OUTBUFF_FULL
return len * 2 + 50;
}
};

Expand Down
60 changes: 60 additions & 0 deletions regression-test/data/export_p0/test_outfile_csv_compress.out
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,66 @@ c2 text Yes false \N NONE
c1 text Yes false \N NONE
c2 text Yes false \N NONE

-- !select --
1 2

-- !select --
1 1

-- !select --
c1 text Yes false \N NONE
c2 text Yes false \N NONE

-- !select --
1 2

-- !select --
1 1

-- !select --
c1 text Yes false \N NONE
c2 text Yes false \N NONE

-- !select --
1 2

-- !select --
1 1

-- !select --
c1 text Yes false \N NONE
c2 text Yes false \N NONE

-- !select --
1 2

-- !select --
1 1

-- !select --
c1 text Yes false \N NONE
c2 text Yes false \N NONE

-- !select --
1 2

-- !select --
1 1

-- !select --
c1 text Yes false \N NONE
c2 text Yes false \N NONE

-- !select --
1 2

-- !select --
1 1

-- !select --
c1 text Yes false \N NONE
c2 text Yes false \N NONE

-- !select --
__dummy_col text Yes false \N NONE

Original file line number Diff line number Diff line change
Expand Up @@ -1275,7 +1275,7 @@ false 1 1 1 10 1.1 10.1 3951 01/31/10 1 2010-01-31T12:01:13.500 2010 1
true 0 0 0 0 0.0 0 3950 01/31/10 0 2010-01-31T12:00:13.500 2010 1

-- !schema_1 --
1 7706 1 155190 17.00 21168.23 0.04 0.02 N O 1996-03-13 1996-02-12 1996-03-22 DELIVER IN PERSON TRUCK egular courts above the cn beijing
1 638 6 15635 32.00 49620.16 0.07 0.02 N O 1996-01-30 1996-02-07 1996-02-03 DELIVER IN PERSON MAIL arefully slyly ex cn beijing

-- !schema_2 --
6374628540732951412 -77 -65 -70 -107 -215 65 0 -526 -1309 3750 8827 -19795 34647 57042 -1662 -138248 -890685 -228568 1633079 -2725524 6163040 -10491702 697237 74565050 127767368 93532213 -209675435 -32116110 -3624917040 -2927805617 15581947241 21893441661 24075494509 -116822110531 -59683724667 -146210393388 114424524398 1341560771667 -1638742564263 520137948334 -2927347587131 7415137351179 -7963937754617 52157548982266 140803519083304 -294675355729619 -868076759504942 181128508165910 -91753231238823 -3511241416682881 -11545256318348796 -1952917510863468 -5161099825338866 -59726090170689781 287170105829528178 607326725526282735 1253194074103207461 -162443950414676064 -2964036188567341159 2602201580810990248 5581917084094110764 111739292249520611 -315687754593838642 -2804420462762366976 -2078683524
Expand Down
47 changes: 47 additions & 0 deletions regression-test/suites/export_p0/test_outfile_csv_compress.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,17 @@ suite("test_outfile_csv_compress", "p0") {
for (int i = 0; i < 20; i++) {
sql """ insert into ${table_name} select id + ${i}, concat(name, id + ${i}) from ${table_name};"""
}

// small table
sql """ DROP TABLE IF EXISTS small_${table_name} """
sql """
CREATE TABLE IF NOT EXISTS small_${table_name} (
`id` int,
`name` int
)
DISTRIBUTED BY HASH(name) PROPERTIES("replication_num" = "1");
"""
sql """INSERT INTO small_${table_name} values(1, 2);"""
}

def table_name = "test_outfile_csv_compress"
Expand Down Expand Up @@ -96,6 +107,42 @@ suite("test_outfile_csv_compress", "p0") {
"""
}

for (String compression_type: ["plain", "gz", "bz2", "snappyblock", "lz4block", "zstd"]) {
def small = "small_${table_name}"
def outfile_url = csv_outfile_result(small, compression_type);
print("http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), outfile_url.length() - 1)}0.")
qt_select """ select c1, c2 from s3(
"uri" = "http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), outfile_url.length() - 1)}*",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "csv",
"provider" = "${getS3Provider()}",
"region" = "${region}",
"compress_type" = "${compression_type}"
) order by c1, c2 limit 10;
"""
qt_select """ select count(c1), count(c2) from s3(
"uri" = "http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), outfile_url.length() - 1)}*",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "csv",
"provider" = "${getS3Provider()}",
"region" = "${region}",
"compress_type" = "${compression_type}"
);
"""
qt_select """desc function s3(
"uri" = "http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), outfile_url.length() - 1)}*",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "csv",
"provider" = "${getS3Provider()}",
"region" = "${region}",
"compress_type" = "${compression_type}"
);
"""
}

// test invalid compression_type
test {
sql """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,6 @@ suite("test_hive_get_schema_from_table", "external_docker,hive,external_docker_h
test_col_topn("month")
}





// test get scheam from table
for (String hivePrefix : ["hive2", "hive3"]) {
String catalog_name = "test_${hivePrefix}_get_schema"
Expand Down Expand Up @@ -96,7 +92,7 @@ suite("test_hive_get_schema_from_table", "external_docker,hive,external_docker_h
test_topn()
test_topn_abs()

order_qt_schema_1 """select * from ${catalog_name}.${ex_db_name}.parquet_partition_table order by l_orderkey limit 1;"""
order_qt_schema_1 """select * from ${catalog_name}.${ex_db_name}.parquet_partition_table order by l_orderkey, l_partkey limit 1;"""
order_qt_schema_2 """select * from ${catalog_name}.${ex_db_name}.parquet_delta_binary_packed order by int_value limit 1;"""
order_qt_schema_3 """select * from ${catalog_name}.${ex_db_name}.parquet_alltypes_tiny_pages order by id desc limit 5;"""
order_qt_schema_4 """select * from ${catalog_name}.${ex_db_name}.orc_all_types_partition order by bigint_col desc limit 3;"""
Expand Down
Loading