diff --git a/be/src/gutil/endian.h b/be/src/gutil/endian.h index 4bc04e1e303eb5..f78480b3cf5fec 100644 --- a/be/src/gutil/endian.h +++ b/be/src/gutil/endian.h @@ -60,8 +60,8 @@ inline unsigned __int128 gbswap_128(unsigned __int128 host_int) { } inline wide::UInt256 gbswap_256(wide::UInt256 host_int) { - wide::UInt256 result{gbswap_64(host_int.items[3]), gbswap_64(host_int.items[2]), - gbswap_64(host_int.items[1]), gbswap_64(host_int.items[0])}; + wide::UInt256 result {gbswap_64(host_int.items[3]), gbswap_64(host_int.items[2]), + gbswap_64(host_int.items[1]), gbswap_64(host_int.items[0])}; return result; } @@ -136,6 +136,9 @@ class LittleEndian { static unsigned __int128 FromHost128(unsigned __int128 x) { return x; } static unsigned __int128 ToHost128(unsigned __int128 x) { return x; } + static wide::UInt256 FromHost256(wide::UInt256 x) { return x; } + static wide::UInt256 ToHost256(wide::UInt256 x) { return x; } + static bool IsLittleEndian() { return true; } #elif defined IS_BIG_ENDIAN @@ -149,6 +152,12 @@ class LittleEndian { static uint64 FromHost64(uint64 x) { return gbswap_64(x); } static uint64 ToHost64(uint64 x) { return gbswap_64(x); } + static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); } + static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); } + + static wide::UInt256 FromHost256(wide::UInt256 x) { return gbswap_256(x); } + static wide::UInt256 ToHost256(wide::UInt256 x) { return gbswap_256(x); } + static bool IsLittleEndian() { return false; } #endif /* ENDIAN */ diff --git a/be/src/util/bit_util.h b/be/src/util/bit_util.h index 44b391f44dae34..504b0b27428190 100644 --- a/be/src/util/bit_util.h +++ b/be/src/util/bit_util.h @@ -20,6 +20,9 @@ #pragma once +#include + +#include "vec/core/wide_integer.h" #ifndef __APPLE__ #include #endif @@ -209,7 +212,11 @@ class BitUtil { template static T big_endian_to_host(T value) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { + return BigEndian::ToHost256(value); + } else if constexpr (std::is_same_v) { + return BigEndian::ToHost256(value); + } else if constexpr (std::is_same_v) { return BigEndian::ToHost128(value); } else if constexpr (std::is_same_v) { return BigEndian::ToHost128(value); diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp index 2fb0afea82ae8a..0a5ef2913dd940 100644 --- a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp +++ b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp @@ -19,6 +19,7 @@ #include +#include "runtime/define_primitive_type.h" #include "vec/columns/column_nullable.h" namespace doris::vectorized::parquet { const cctz::time_zone ConvertParams::utc0 = cctz::utc_time_zone(); @@ -27,7 +28,8 @@ const cctz::time_zone ConvertParams::utc0 = cctz::utc_time_zone(); M(TYPE_DECIMALV2) \ M(TYPE_DECIMAL32) \ M(TYPE_DECIMAL64) \ - M(TYPE_DECIMAL128I) + M(TYPE_DECIMAL128I) \ + M(TYPE_DECIMAL256) bool PhysicalToLogicalConverter::is_parquet_native_type(PrimitiveType type) { switch (type) { @@ -50,6 +52,7 @@ bool PhysicalToLogicalConverter::is_decimal_type(doris::PrimitiveType type) { case TYPE_DECIMAL32: case TYPE_DECIMAL64: case TYPE_DECIMAL128I: + case TYPE_DECIMAL256: case TYPE_DECIMALV2: return true; default: diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h b/be/src/vec/exec/format/parquet/parquet_column_convert.h index 91b81121aa4303..cf6f8aa13fa1d1 100644 --- a/be/src/vec/exec/format/parquet/parquet_column_convert.h +++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h @@ -20,6 +20,7 @@ #include #include "vec/core/types.h" +#include "vec/core/wide_integer.h" #include "vec/data_types/data_type_factory.hpp" #include "vec/exec/format/column_type_convert.h" #include "vec/exec/format/format_common.h" @@ -401,7 +402,23 @@ class FixedSizeToDecimal : public PhysicalToLogicalConverter { M(13, int128_t) \ M(14, int128_t) \ M(15, int128_t) \ - M(16, int128_t) + M(16, int128_t) \ + M(17, wide::Int256) \ + M(18, wide::Int256) \ + M(19, wide::Int256) \ + M(20, wide::Int256) \ + M(21, wide::Int256) \ + M(22, wide::Int256) \ + M(23, wide::Int256) \ + M(24, wide::Int256) \ + M(25, wide::Int256) \ + M(26, wide::Int256) \ + M(27, wide::Int256) \ + M(28, wide::Int256) \ + M(29, wide::Int256) \ + M(30, wide::Int256) \ + M(31, wide::Int256) \ + M(32, wide::Int256) switch (_type_length) { APPLY_FOR_DECIMALS() diff --git a/be/test/util/bit_util_test.cpp b/be/test/util/bit_util_test.cpp index 514daafa604745..fd3bee0143223a 100644 --- a/be/test/util/bit_util_test.cpp +++ b/be/test/util/bit_util_test.cpp @@ -21,7 +21,6 @@ #include #include -#include #include "gtest/gtest_pred_impl.h" @@ -48,4 +47,22 @@ TEST(BitUtil, Popcount) { EXPECT_EQ(BitUtil::popcount_no_hw(0), 0); } +TEST(BitUtil, BigEndianToHost) { + uint16_t v16 = 0x1234; + uint32_t v32 = 0x12345678; + uint64_t v64 = 0x123456789abcdef0; + unsigned __int128 v128 = ((__int128)0x123456789abcdef0LL << 64) | 0x123456789abcdef0LL; + wide::UInt256 v256 = + wide::UInt256(0x123456789abcdef0) << 192 | wide::UInt256(0x123456789abcdef0) << 128 | + wide::UInt256(0x123456789abcdef0) << 64 | wide::UInt256(0x123456789abcdef0); + EXPECT_EQ(BitUtil::big_endian_to_host(v16), 0x3412); + EXPECT_EQ(BitUtil::big_endian_to_host(v32), 0x78563412); + EXPECT_EQ(BitUtil::big_endian_to_host(v64), 0xf0debc9a78563412); + EXPECT_EQ(BitUtil::big_endian_to_host(v128), + ((__int128)0xf0debc9a78563412LL << 64) | 0xf0debc9a78563412LL); + EXPECT_EQ(BitUtil::big_endian_to_host(v256), + wide::UInt256(0xf0debc9a78563412) << 192 | wide::UInt256(0xf0debc9a78563412) << 128 | + wide::UInt256(0xf0debc9a78563412) << 64 | wide::UInt256(0xf0debc9a78563412)); +} + } // namespace doris diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/hdfs_tvf/test_parquet_decimal256.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/hdfs_tvf/test_parquet_decimal256.parquet new file mode 100644 index 00000000000000..323ded32160e00 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/hdfs_tvf/test_parquet_decimal256.parquet differ diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out index 3f12b44858136c..e850e38a237b06 100644 --- a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out +++ b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out @@ -221,6 +221,13 @@ 19 Supplier#000000019 edZT3es,nBFD8lBXTGeTl 24 34-278-310-2731 6150.38 refully final foxes across the dogged theodolites sleep slyly abou 20 Supplier#000000020 iybAE,RmTymrZVYaFZva2SH,j 3 13-715-945-6730 530.82 n, ironic ideas would nag blithely about the slyly regular accounts. silent, expr +-- !parquet_decimal256 -- +1 99999999999999999999999999999999999999.99999999999999999999999999999999999999 +2 -99999999999999999999999999999999999999.99999999999999999999999999999999999999 +3 1E-38 +4 -1E-38 +5 0E-38 + -- !orc -- 1 goldenrod lavender spring chocolate lace Manufacturer#1 Brand#13 PROMO BURNISHED COPPER 7 JUMBO PKG 901.00 ly. slyly ironi 2 blush thistle blue yellow saddle Manufacturer#1 Brand#13 LARGE BRUSHED BRASS 1 LG CASE 902.00 lar accounts amo diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy index 8c4028bfefe021..02bda4ec0ddc1b 100644 --- a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy +++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy @@ -108,6 +108,14 @@ suite("test_hdfs_tvf","external,hive,tvf,external_docker") { "hadoop.username" = "${hdfsUserName}", "format" = "${format}") order by s_suppkey limit 20; """ + // test parquet decimal256 + uri = "${defaultFS}" + "/user/doris/preinstalled_data/hdfs_tvf/test_parquet_decimal256.parquet" + format = "parquet" + qt_parquet_decimal256 """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}") order by id; """ + // test orc uri = "${defaultFS}" + "/user/doris/preinstalled_data/hdfs_tvf/test_orc.snappy.orc" format = "orc"