From 1d1247680aca58e26436eaf68a7691bf9f26bb98 Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Thu, 14 Nov 2024 11:05:15 +0800 Subject: [PATCH 1/3] [Bug](function) fix cut_ipv6 function error about modify the input column data --- be/src/vec/functions/function_ip.h | 14 +++++++++----- .../ip_functions/test_cut_ipv6_function.out | 5 +++++ .../ip_functions/test_cut_ipv6_function.groovy | 16 ++++++++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/be/src/vec/functions/function_ip.h b/be/src/vec/functions/function_ip.h index 69fd2a8914890e..23ea92985a084d 100644 --- a/be/src/vec/functions/function_ip.h +++ b/be/src/vec/functions/function_ip.h @@ -1291,14 +1291,17 @@ class FunctionCutIPv6 : public IFunction { offsets_res.resize(input_rows_count); auto* begin = reinterpret_cast(chars_res.data()); auto* pos = begin; + unsigned char ipv6_address_data[IPV6_BINARY_LENGTH]; for (size_t i = 0; i < input_rows_count; ++i) { auto ipv6_idx = index_check_const(i, ipv6_const); auto bytes_to_cut_for_ipv6_idx = index_check_const(i, bytes_to_cut_for_ipv6_const); auto bytes_to_cut_for_ipv4_idx = index_check_const(i, bytes_to_cut_for_ipv4_const); - auto* address = const_cast( - reinterpret_cast(&ipv6_addr_column_data[ipv6_idx])); + memcpy(ipv6_address_data, + reinterpret_cast(&ipv6_addr_column_data[ipv6_idx]), + IPV6_BINARY_LENGTH); + Int8 bytes_to_cut_for_ipv6_count = to_cut_for_ipv6_bytes_column_data[bytes_to_cut_for_ipv6_idx]; Int8 bytes_to_cut_for_ipv4_count = @@ -1318,9 +1321,10 @@ class FunctionCutIPv6 : public IFunction { get_name()); } - UInt8 bytes_to_cut_count = is_ipv4_mapped(address) ? bytes_to_cut_for_ipv4_count - : bytes_to_cut_for_ipv6_count; - cut_address(address, pos, bytes_to_cut_count); + UInt8 bytes_to_cut_count = is_ipv4_mapped(ipv6_address_data) + ? bytes_to_cut_for_ipv4_count + : bytes_to_cut_for_ipv6_count; + cut_address(ipv6_address_data, pos, bytes_to_cut_count); offsets_res[i] = cast_set(pos - begin); } diff --git a/regression-test/data/query_p0/sql_functions/ip_functions/test_cut_ipv6_function.out b/regression-test/data/query_p0/sql_functions/ip_functions/test_cut_ipv6_function.out index c549c1533cd1ab..3d17c87cb94a72 100644 --- a/regression-test/data/query_p0/sql_functions/ip_functions/test_cut_ipv6_function.out +++ b/regression-test/data/query_p0/sql_functions/ip_functions/test_cut_ipv6_function.out @@ -1,3 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this -- !sql -- :: ::ffff:0.0.0.0 :: ::ffff:192.168.0.0 @@ -17,3 +18,7 @@ ffff:ffff:ffff:ffff:: :: -- !sql -- \N + +-- !sql -- +0 182a:556f:6665:4fb1:a0f0:40ff:3af2:7ad3 + diff --git a/regression-test/suites/query_p0/sql_functions/ip_functions/test_cut_ipv6_function.groovy b/regression-test/suites/query_p0/sql_functions/ip_functions/test_cut_ipv6_function.groovy index 099705e2383acc..25eb010403cdcf 100644 --- a/regression-test/suites/query_p0/sql_functions/ip_functions/test_cut_ipv6_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/ip_functions/test_cut_ipv6_function.groovy @@ -53,4 +53,20 @@ suite("test_cut_ipv6_function") { qt_sql "select cut_ipv6(to_ipv6('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'), 0, NULL)" sql "DROP TABLE test_cut_ipv6_function" + + sql """ DROP TABLE IF EXISTS test_cutipv6 """ + sql """ + CREATE TABLE `test_cutipv6` ( + `pk` int NOT NULL, + `col_ipv6_undef_signed` ipv6 NULL + ) ENGINE=OLAP + UNIQUE KEY(`pk`) + DISTRIBUTED BY HASH(`pk`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """ insert into test_cutipv6 values(0,"182a:556f:6665:4fb1:a0f0:40ff:3af2:7ad3"); """ + qt_sql "select pk,col_ipv6_undef_signed from test_cutipv6 where cut_ipv6(col_ipv6_undef_signed, 4, 7) != '182a:556f:6665:4fb1:a0f0:40ff:3af2:7ad3';" + } \ No newline at end of file From 2e67a392b98d71ba7a88fe29272b260491a21e2f Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Mon, 18 Nov 2024 10:15:47 +0800 Subject: [PATCH 2/3] update --- be/src/vec/functions/function_ip.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/be/src/vec/functions/function_ip.h b/be/src/vec/functions/function_ip.h index 23ea92985a084d..e9896703c638d8 100644 --- a/be/src/vec/functions/function_ip.h +++ b/be/src/vec/functions/function_ip.h @@ -1324,6 +1324,9 @@ class FunctionCutIPv6 : public IFunction { UInt8 bytes_to_cut_count = is_ipv4_mapped(ipv6_address_data) ? bytes_to_cut_for_ipv4_count : bytes_to_cut_for_ipv6_count; + // the current function logic is processed in big endian manner + // But ipv6 in doris is stored in little-endian byte order + // so transfer to big-endian byte order first cut_address(ipv6_address_data, pos, bytes_to_cut_count); offsets_res[i] = cast_set(pos - begin); } From 614efab93d123e7748cdebb2bc23a263f814a5ff Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Mon, 18 Nov 2024 10:40:52 +0800 Subject: [PATCH 3/3] updat2 --- be/src/vec/functions/function_ip.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/be/src/vec/functions/function_ip.h b/be/src/vec/functions/function_ip.h index e9896703c638d8..b90d1b2bcf9434 100644 --- a/be/src/vec/functions/function_ip.h +++ b/be/src/vec/functions/function_ip.h @@ -1291,16 +1291,16 @@ class FunctionCutIPv6 : public IFunction { offsets_res.resize(input_rows_count); auto* begin = reinterpret_cast(chars_res.data()); auto* pos = begin; - unsigned char ipv6_address_data[IPV6_BINARY_LENGTH]; for (size_t i = 0; i < input_rows_count; ++i) { auto ipv6_idx = index_check_const(i, ipv6_const); auto bytes_to_cut_for_ipv6_idx = index_check_const(i, bytes_to_cut_for_ipv6_const); auto bytes_to_cut_for_ipv4_idx = index_check_const(i, bytes_to_cut_for_ipv4_const); - - memcpy(ipv6_address_data, - reinterpret_cast(&ipv6_addr_column_data[ipv6_idx]), - IPV6_BINARY_LENGTH); + // the current function logic is processed in big endian manner + // But ipv6 in doris is stored in little-endian byte order + // need transfer to big-endian byte order first, so we can't deal this process in column + auto val_128 = ipv6_addr_column_data[ipv6_idx]; + auto* address = reinterpret_cast(&val_128); Int8 bytes_to_cut_for_ipv6_count = to_cut_for_ipv6_bytes_column_data[bytes_to_cut_for_ipv6_idx]; @@ -1321,13 +1321,9 @@ class FunctionCutIPv6 : public IFunction { get_name()); } - UInt8 bytes_to_cut_count = is_ipv4_mapped(ipv6_address_data) - ? bytes_to_cut_for_ipv4_count - : bytes_to_cut_for_ipv6_count; - // the current function logic is processed in big endian manner - // But ipv6 in doris is stored in little-endian byte order - // so transfer to big-endian byte order first - cut_address(ipv6_address_data, pos, bytes_to_cut_count); + UInt8 bytes_to_cut_count = is_ipv4_mapped(address) ? bytes_to_cut_for_ipv4_count + : bytes_to_cut_for_ipv6_count; + cut_address(address, pos, bytes_to_cut_count); offsets_res[i] = cast_set(pos - begin); }