From 094b36af97db7e4b8bd6dfb6223a1ecc9e94f480 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Wed, 3 Sep 2025 04:25:53 +0000 Subject: [PATCH 1/4] PAX: Support LZ4 compression for table columns PAX only support zlib and zstd compression for column values. This commit add lz4 support for pax table columns. --- .../pax_storage/src/cpp/access/paxc_rel_options.cc | 4 ++++ .../pax_storage/src/cpp/access/paxc_rel_options.h | 1 + .../src/cpp/storage/columns/pax_column_test.cc | 6 ++++++ .../src/cpp/storage/columns/pax_compress.cc | 12 +++++++++--- contrib/pax_storage/src/cpp/storage/proto/pax.proto | 1 + 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc b/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc index 647cb5743cf..5de1b14cd97 100644 --- a/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc +++ b/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc @@ -50,6 +50,10 @@ static const relopt_compress_type_mapping kSelfRelCompressMap[] = { pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZSTD}, {ColumnEncoding_Kind_COMPRESS_ZLIB_STR, pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZLIB}, +#ifdef USE_LZ4 + {ColumnEncoding_Kind_COMPRESS_LZ4_STR, + pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_LZ4}, +#endif }; typedef struct { diff --git a/contrib/pax_storage/src/cpp/access/paxc_rel_options.h b/contrib/pax_storage/src/cpp/access/paxc_rel_options.h index 4e813f38c40..e6c29363ab1 100644 --- a/contrib/pax_storage/src/cpp/access/paxc_rel_options.h +++ b/contrib/pax_storage/src/cpp/access/paxc_rel_options.h @@ -41,6 +41,7 @@ namespace paxc { #define ColumnEncoding_Kind_DICTIONARY_STR "dict" #define ColumnEncoding_Kind_COMPRESS_ZSTD_STR "zstd" #define ColumnEncoding_Kind_COMPRESS_ZLIB_STR "zlib" +#define ColumnEncoding_Kind_COMPRESS_LZ4_STR "lz4" #define STORAGE_FORMAT_TYPE_PORC "porc" #define STORAGE_FORMAT_TYPE_PORC_VEC "porc_vec" diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc index b26fdff65bf..f39e453cfee 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc @@ -798,6 +798,9 @@ INSTANTIATE_TEST_SUITE_P( PaxColumnEncodingTestCombine, PaxColumnCompressTest, testing::Combine(testing::Values(16, 32, 64), testing::Values(ColumnEncoding_Kind_NO_ENCODED, +#ifdef USE_LZ4 + ColumnEncoding_Kind_COMPRESS_LZ4, +#endif ColumnEncoding_Kind_COMPRESS_ZSTD, ColumnEncoding_Kind_COMPRESS_ZLIB))); @@ -805,6 +808,9 @@ INSTANTIATE_TEST_SUITE_P( PaxColumnEncodingTestCombine, PaxNonFixedColumnCompressTest, testing::Combine(testing::Values(16, 32, 64), testing::Values(ColumnEncoding_Kind_NO_ENCODED, +#ifdef USE_LZ4 + ColumnEncoding_Kind_COMPRESS_LZ4, +#endif ColumnEncoding_Kind_COMPRESS_ZSTD, ColumnEncoding_Kind_COMPRESS_ZLIB), testing::Values(true, false), diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc index 87a34cbb6d7..94087da502c 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc @@ -50,6 +50,10 @@ std::shared_ptr PaxCompressor::CreateBlockCompressor( compressor = std::make_shared(); break; } + case ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_LZ4: { + compressor = std::make_shared(); + break; + } case ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED: { CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError, fmt("Invalid compress type %d", @@ -230,9 +234,11 @@ size_t PaxLZ4Compressor::GetCompressBound(size_t src_len) { } size_t PaxLZ4Compressor::Compress(void *dst_buff, size_t dst_cap, - void *src_buff, size_t src_len, int /*lvl*/) { - return LZ4_compress_default((char *)src_buff, (char *)dst_buff, src_len, - dst_cap); + void *src_buff, size_t src_len, int lvl) { + // acceleration is oppsite meaning of compress level + int acceleration = 19 - lvl; + return LZ4_compress_fast((char *)src_buff, (char *)dst_buff, src_len, + dst_cap, acceleration); } size_t PaxLZ4Compressor::Decompress(void *dst_buff, size_t dst_len, diff --git a/contrib/pax_storage/src/cpp/storage/proto/pax.proto b/contrib/pax_storage/src/cpp/storage/proto/pax.proto index 3e25710027d..765d3e0f8a5 100644 --- a/contrib/pax_storage/src/cpp/storage/proto/pax.proto +++ b/contrib/pax_storage/src/cpp/storage/proto/pax.proto @@ -37,6 +37,7 @@ message ColumnEncoding { COMPRESS_ZLIB = 4; // use ZLIB to compress DICTIONARY = 5; // use dict-endoing + COMPRESS_LZ4 = 6; // use lz4 to compress } optional Kind kind = 1; From ae4b7f8f662c0b49f0f7b8b032d3ac4b590a94af Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Wed, 3 Sep 2025 05:53:29 +0000 Subject: [PATCH 2/4] map compress level to acceleration for lz4 --- contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc index 94087da502c..36e95ae37fe 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc @@ -235,8 +235,11 @@ size_t PaxLZ4Compressor::GetCompressBound(size_t src_len) { size_t PaxLZ4Compressor::Compress(void *dst_buff, size_t dst_cap, void *src_buff, size_t src_len, int lvl) { +#define LZ4_MAX_ACC 65536 // acceleration is oppsite meaning of compress level + // map [19, 0] to [0, LZ4_MAX_ACC] int acceleration = 19 - lvl; + acceleration = (int)(acceleration * LZ4_MAX_ACC / 20.0); return LZ4_compress_fast((char *)src_buff, (char *)dst_buff, src_len, dst_cap, acceleration); } From 3f77c367bc936c8a1159ed8762cf4d09996ebcac Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Thu, 4 Sep 2025 01:13:38 +0000 Subject: [PATCH 3/4] strict acceleration to range [0, 3] --- .../pax_storage/src/cpp/storage/columns/pax_compress.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc index 36e95ae37fe..480dd9f0f4d 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc @@ -235,11 +235,9 @@ size_t PaxLZ4Compressor::GetCompressBound(size_t src_len) { size_t PaxLZ4Compressor::Compress(void *dst_buff, size_t dst_cap, void *src_buff, size_t src_len, int lvl) { -#define LZ4_MAX_ACC 65536 - // acceleration is oppsite meaning of compress level - // map [19, 0] to [0, LZ4_MAX_ACC] - int acceleration = 19 - lvl; - acceleration = (int)(acceleration * LZ4_MAX_ACC / 20.0); + // acceleration affects compression speed, the larger acceleration value, + // the less compression ratio. + int acceleration = (20 - lvl) / 6; return LZ4_compress_fast((char *)src_buff, (char *)dst_buff, src_len, dst_cap, acceleration); } From e08f7ed613c2ea0953fd044a4efa582282d8f7c1 Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Thu, 4 Sep 2025 01:22:52 +0000 Subject: [PATCH 4/4] add macro control --- contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc index 480dd9f0f4d..f4bae52ea7d 100644 --- a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc @@ -50,10 +50,12 @@ std::shared_ptr PaxCompressor::CreateBlockCompressor( compressor = std::make_shared(); break; } +#ifdef USE_LZ4 case ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_LZ4: { compressor = std::make_shared(); break; } +#endif case ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED: { CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError, fmt("Invalid compress type %d",