Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions be/src/vec/columns/column_const.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ template <typename T>
T index_check_const(T arg, bool constancy) noexcept {
return constancy ? 0 : arg;
}
template <bool is_const, typename T>
requires std::is_integral_v<T>
constexpr T index_check_const(T arg) noexcept {
if constexpr (is_const) {
return 0;
} else {
return arg;
}
}

/*
* @return first : data_column_ptr for ColumnConst, itself otherwise.
Expand Down
134 changes: 84 additions & 50 deletions be/src/vec/functions/function_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <tuple>
#include <type_traits>
#include <utility>
#include <variant>
#include <vector>

#include "common/compiler_util.h" // IWYU pragma: keep
Expand Down Expand Up @@ -70,6 +71,7 @@
#include "vec/data_types/data_type.h"
#include "vec/functions/round.h"
#include "vec/io/io_helper.h"
#include "vec/utils/template_helpers.hpp"

#ifndef USE_LIBCPP
#include <memory_resource>
Expand Down Expand Up @@ -2270,77 +2272,109 @@ class FunctionStringParseUrl : public IFunction {
size_t result, size_t input_rows_count) const override {
auto null_map = ColumnUInt8::create(input_rows_count, 0);
auto& null_map_data = null_map->get_data();

DCHECK_GE(3, arguments.size());
auto res = ColumnString::create();
auto& res_offsets = res->get_offsets();
auto& res_chars = res->get_chars();
res_offsets.resize(input_rows_count);

size_t argument_size = arguments.size();
bool has_key = argument_size >= 3;
const bool has_key = argument_size == 3;

std::vector<ColumnPtr> argument_columns(argument_size);
std::vector<UInt8> col_const(argument_size);
for (size_t i = 0; i < argument_size; ++i) {
argument_columns[i] =
block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
std::tie(argument_columns[i], col_const[i]) =
unpack_if_const(block.get_by_position(arguments[i]).column);
}

const auto* url_col = check_and_get_column<ColumnString>(argument_columns[0].get());
const auto* part_col = check_and_get_column<ColumnString>(argument_columns[1].get());
const ColumnString* key_col = nullptr;
if (has_key) {
key_col = check_and_get_column<ColumnString>(argument_columns[2].get());
}
const auto* url_col = assert_cast<const ColumnString*>(argument_columns[0].get());
const auto* part_col = assert_cast<const ColumnString*>(argument_columns[1].get());
const bool part_const = col_const[1];
std::vector<UrlParser::UrlPart> url_parts;
const int part_nums = part_const ? 1 : input_rows_count;

if (!url_col || !part_col || (has_key && !key_col)) {
return Status::InternalError("Not supported input arguments types");
url_parts.resize(part_nums);
for (int i = 0; i < part_nums; i++) {
StringRef part = part_col->get_data_at(i);
UrlParser::UrlPart url_part = UrlParser::get_url_part(part);
if (url_part == UrlParser::INVALID) {
return Status::RuntimeError("Invalid URL part: {}\n{}",
std::string(part.data, part.size),
"(Valid URL parts are 'PROTOCOL', 'HOST', "
"'PATH', 'REF', 'AUTHORITY', "
"'FILE', 'USERINFO', 'PORT' and 'QUERY')");
}
url_parts[i] = url_part;
}

for (size_t i = 0; i < input_rows_count; ++i) {
if (null_map_data[i]) {
if (has_key) {
const bool url_const = col_const[0];
const bool key_const = col_const[2];
const auto* key_col = assert_cast<const ColumnString*>(argument_columns[2].get());
RETURN_IF_ERROR(std::visit(
[&](auto url_const, auto part_const, auto key_const) {
return vector_parse_key<url_const, part_const, key_const>(
url_col, url_parts, key_col, input_rows_count, null_map_data,
res_chars, res_offsets);
},
vectorized::make_bool_variant(url_const),
vectorized::make_bool_variant(part_const),
vectorized::make_bool_variant(key_const)));
} else {
const bool url_const = col_const[0];
RETURN_IF_ERROR(std::visit(
[&](auto url_const, auto part_const) {
return vector_parse<url_const, part_const>(url_col, url_parts,
input_rows_count, null_map_data,
res_chars, res_offsets);
},
vectorized::make_bool_variant(url_const),
vectorized::make_bool_variant(part_const)));
}
block.get_by_position(result).column =
ColumnNullable::create(std::move(res), std::move(null_map));
return Status::OK();
}
template <bool url_const, bool part_const>
static Status vector_parse(const ColumnString* url_col,
std::vector<UrlParser::UrlPart>& url_parts, const int size,
ColumnUInt8::Container& null_map_data,
ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
for (size_t i = 0; i < size; ++i) {
UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
StringRef parse_res;
if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
res_chars, res_offsets);
} else {
StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
continue;
}

auto part = part_col->get_data_at(i);
StringRef p(const_cast<char*>(part.data), part.size);
UrlParser::UrlPart url_part = UrlParser::get_url_part(p);
StringRef url_key;
if (has_key) {
auto key = key_col->get_data_at(i);
url_key = StringRef(const_cast<char*>(key.data), key.size);
}

auto source = url_col->get_data_at(i);
StringRef url_val(const_cast<char*>(source.data), source.size);

}
return Status::OK();
}
template <bool url_const, bool part_const, bool key_const>
static Status vector_parse_key(const ColumnString* url_col,
std::vector<UrlParser::UrlPart>& url_parts,
const ColumnString* key_col, const int size,
ColumnUInt8::Container& null_map_data,
ColumnString::Chars& res_chars,
ColumnString::Offsets& res_offsets) {
for (size_t i = 0; i < size; ++i) {
UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
StringRef parse_res;
bool success = false;
if (has_key) {
success = UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res);
if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
res_chars, res_offsets);
} else {
success = UrlParser::parse_url(url_val, url_part, &parse_res);
}

if (!success) {
// url is malformed, or url_part is invalid.
if (url_part == UrlParser::INVALID) {
return Status::RuntimeError("Invalid URL part: {}\n{}",
std::string(part.data, part.size),
"(Valid URL parts are 'PROTOCOL', 'HOST', "
"'PATH', 'REF', 'AUTHORITY', "
"'FILE', 'USERINFO', 'PORT' and 'QUERY')");
} else {
StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
continue;
}
StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
continue;
}

StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
res_chars, res_offsets);
}
block.get_by_position(result).column =
ColumnNullable::create(std::move(res), std::move(null_map));
return Status::OK();
}
};
Expand Down