From 6cbd55ee6016905b90e971904bbbeb54cd6922c5 Mon Sep 17 00:00:00 2001 From: Mryange <2319153948@qq.com> Date: Thu, 27 Jun 2024 07:05:08 +0800 Subject: [PATCH 1/3] opt --- be/src/vec/functions/function_string.h | 132 ++++++++++++++++--------- 1 file changed, 83 insertions(+), 49 deletions(-) diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index bc15803fe4589e..0bdea19ccfc745 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "common/compiler_util.h" // IWYU pragma: keep @@ -70,6 +71,7 @@ #include "vec/data_types/data_type.h" #include "vec/functions/round.h" #include "vec/io/io_helper.h" +#include "vec/utils/template_helpers.hpp" #ifndef USE_LIBCPP #include @@ -2277,70 +2279,102 @@ class FunctionStringParseUrl : public IFunction { res_offsets.resize(input_rows_count); size_t argument_size = arguments.size(); - bool has_key = argument_size >= 3; + const bool has_key = argument_size >= 3; std::vector argument_columns(argument_size); + std::vector col_const(argument_size); for (size_t i = 0; i < argument_size; ++i) { - argument_columns[i] = - block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); + std::tie(argument_columns[i], col_const[i]) = + unpack_if_const(block.get_by_position(arguments[i]).column); } - const auto* url_col = check_and_get_column(argument_columns[0].get()); - const auto* part_col = check_and_get_column(argument_columns[1].get()); - const ColumnString* key_col = nullptr; - if (has_key) { - key_col = check_and_get_column(argument_columns[2].get()); - } + const auto* url_col = assert_cast(argument_columns[0].get()); + const auto* part_col = assert_cast(argument_columns[1].get()); + const bool part_const = col_const[1]; + std::vector url_parts; + const int part_nums = part_const ? 1 : input_rows_count; - if (!url_col || !part_col || (has_key && !key_col)) { - return Status::InternalError("Not supported input arguments types"); + url_parts.resize(part_nums); + for (int i = 0; i < part_nums; i++) { + StringRef part = part_col->get_data_at(i); + UrlParser::UrlPart url_part = UrlParser::get_url_part(part); + if (url_part == UrlParser::INVALID) { + return Status::RuntimeError("Invalid URL part: {}\n{}", + std::string(part.data, part.size), + "(Valid URL parts are 'PROTOCOL', 'HOST', " + "'PATH', 'REF', 'AUTHORITY', " + "'FILE', 'USERINFO', 'PORT' and 'QUERY')"); + } + url_parts[i] = url_part; } - for (size_t i = 0; i < input_rows_count; ++i) { - if (null_map_data[i]) { + if (has_key) { + const bool url_const = col_const[0]; + const bool key_const = col_const[2]; + const auto* key_col = assert_cast(argument_columns[2].get()); + RETURN_IF_ERROR(std::visit( + [&](auto url_const, auto part_const, auto key_const) { + return vector_parse_key( + url_col, url_parts, key_col, input_rows_count, null_map_data, + res_chars, res_offsets); + }, + vectorized::make_bool_variant(url_const), + vectorized::make_bool_variant(part_const), + vectorized::make_bool_variant(key_const))); + } else { + const bool url_const = col_const[0]; + RETURN_IF_ERROR(std::visit( + [&](auto url_const, auto part_const) { + return vector_parse(url_col, url_parts, + input_rows_count, null_map_data, + res_chars, res_offsets); + }, + vectorized::make_bool_variant(url_const), + vectorized::make_bool_variant(part_const))); + } + block.get_by_position(result).column = + ColumnNullable::create(std::move(res), std::move(null_map)); + return Status::OK(); + } + template + static Status vector_parse(const ColumnString* url_col, + std::vector& url_parts, const int size, + ColumnUInt8::Container& null_map_data, + ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) { + for (size_t i = 0; i < size; ++i) { + UrlParser::UrlPart& url_part = url_parts[part_const ? 0 : i]; + StringRef url_val = url_col->get_data_at(url_const ? 0 : i); + StringRef parse_res; + if (UrlParser::parse_url(url_val, url_part, &parse_res)) { + StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, + res_chars, res_offsets); + } else { StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); continue; } - - auto part = part_col->get_data_at(i); - StringRef p(const_cast(part.data), part.size); - UrlParser::UrlPart url_part = UrlParser::get_url_part(p); - StringRef url_key; - if (has_key) { - auto key = key_col->get_data_at(i); - url_key = StringRef(const_cast(key.data), key.size); - } - - auto source = url_col->get_data_at(i); - StringRef url_val(const_cast(source.data), source.size); - + } + return Status::OK(); + } + template + static Status vector_parse_key(const ColumnString* url_col, + std::vector& url_parts, + const ColumnString* key_col, const int size, + ColumnUInt8::Container& null_map_data, + ColumnString::Chars& res_chars, + ColumnString::Offsets& res_offsets) { + for (size_t i = 0; i < size; ++i) { + UrlParser::UrlPart& url_part = url_parts[part_const ? 0 : i]; + StringRef url_val = url_col->get_data_at(url_const ? 0 : i); + StringRef url_key = key_col->get_data_at(key_const ? 0 : i); StringRef parse_res; - bool success = false; - if (has_key) { - success = UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res); + if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { + StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, + res_chars, res_offsets); } else { - success = UrlParser::parse_url(url_val, url_part, &parse_res); - } - - if (!success) { - // url is malformed, or url_part is invalid. - if (url_part == UrlParser::INVALID) { - return Status::RuntimeError("Invalid URL part: {}\n{}", - std::string(part.data, part.size), - "(Valid URL parts are 'PROTOCOL', 'HOST', " - "'PATH', 'REF', 'AUTHORITY', " - "'FILE', 'USERINFO', 'PORT' and 'QUERY')"); - } else { - StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); - continue; - } + StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); + continue; } - - StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, - res_chars, res_offsets); } - block.get_by_position(result).column = - ColumnNullable::create(std::move(res), std::move(null_map)); return Status::OK(); } }; From fe296164b19c65e43855bbb50272a11cd810cf54 Mon Sep 17 00:00:00 2001 From: Mryange <2319153948@qq.com> Date: Thu, 27 Jun 2024 12:39:30 +0800 Subject: [PATCH 2/3] cr --- be/src/vec/columns/column_const.h | 9 +++++++++ be/src/vec/functions/function_string.h | 15 ++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/be/src/vec/columns/column_const.h b/be/src/vec/columns/column_const.h index 2a67b319e424db..2dad8cc6945783 100644 --- a/be/src/vec/columns/column_const.h +++ b/be/src/vec/columns/column_const.h @@ -67,6 +67,15 @@ template T index_check_const(T arg, bool constancy) noexcept { return constancy ? 0 : arg; } +template + requires std::is_integral_v +constexpr T index_check_const(T arg) noexcept { + if constexpr (is_const) { + return 0; + } else { + return arg; + } +} /* * @return first : data_column_ptr for ColumnConst, itself otherwise. diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 0bdea19ccfc745..446993c8a57270 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -2272,14 +2273,14 @@ class FunctionStringParseUrl : public IFunction { size_t result, size_t input_rows_count) const override { auto null_map = ColumnUInt8::create(input_rows_count, 0); auto& null_map_data = null_map->get_data(); - + DCHECK_GE(3, arguments.size()); auto res = ColumnString::create(); auto& res_offsets = res->get_offsets(); auto& res_chars = res->get_chars(); res_offsets.resize(input_rows_count); size_t argument_size = arguments.size(); - const bool has_key = argument_size >= 3; + const bool has_key = argument_size == 3; std::vector argument_columns(argument_size); std::vector col_const(argument_size); @@ -2342,8 +2343,8 @@ class FunctionStringParseUrl : public IFunction { ColumnUInt8::Container& null_map_data, ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) { for (size_t i = 0; i < size; ++i) { - UrlParser::UrlPart& url_part = url_parts[part_const ? 0 : i]; - StringRef url_val = url_col->get_data_at(url_const ? 0 : i); + UrlParser::UrlPart& url_part = url_parts[index_check_const(i)]; + StringRef url_val = url_col->get_data_at(index_check_const(i)); StringRef parse_res; if (UrlParser::parse_url(url_val, url_part, &parse_res)) { StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, @@ -2363,9 +2364,9 @@ class FunctionStringParseUrl : public IFunction { ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) { for (size_t i = 0; i < size; ++i) { - UrlParser::UrlPart& url_part = url_parts[part_const ? 0 : i]; - StringRef url_val = url_col->get_data_at(url_const ? 0 : i); - StringRef url_key = key_col->get_data_at(key_const ? 0 : i); + UrlParser::UrlPart& url_part = url_parts[index_check_const(i)]; + StringRef url_val = url_col->get_data_at(index_check_const(i)); + StringRef url_key = key_col->get_data_at(index_check_const(i)); StringRef parse_res; if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, From 9656bd5b833ff8a6a96d2aaae518643cf1204589 Mon Sep 17 00:00:00 2001 From: Mryange <2319153948@qq.com> Date: Thu, 27 Jun 2024 18:07:51 +0800 Subject: [PATCH 3/3] remove log --- be/src/vec/functions/function_string.h | 1 - 1 file changed, 1 deletion(-) diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 446993c8a57270..b764058a63dde4 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -17,7 +17,6 @@ #pragma once -#include #include #include #include