Skip to content
Merged
6 changes: 3 additions & 3 deletions be/src/exprs/function_filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace doris {
class FunctionFilter {
public:
FunctionFilter(bool opposite, const std::string& col_name, doris::FunctionContext* fn_ctx,
doris::StringVal string_param)
doris::StringRef string_param)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: variable has incomplete type 'doris::StringRef' [clang-diagnostic-error]

                   doris::StringRef string_param)
                                    ^

be/src/udf/udf.h:33: forward declaration of 'doris::StringRef'

struct StringRef;
       ^

: _opposite(opposite),
_col_name(col_name),
_fn_ctx(fn_ctx),
Expand All @@ -36,8 +36,8 @@ class FunctionFilter {
std::string _col_name;
// these pointer's life time controlled by scan node
doris::FunctionContext* _fn_ctx;
doris::StringVal
_string_param; // only one param from conjunct, because now only support like predicate
// only one param from conjunct, because now only support like predicate
doris::StringRef _string_param;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: field has incomplete type 'doris::StringRef' [clang-diagnostic-error]

    doris::StringRef _string_param;
                     ^

be/src/udf/udf.h:33: forward declaration of 'doris::StringRef'

struct StringRef;
       ^

};

} // namespace doris
6 changes: 3 additions & 3 deletions be/src/exprs/math_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ double MathFunctions::my_double_round(double value, int64_t dec, bool dec_unsign
return tmp2;
}

StringVal MathFunctions::decimal_to_base(FunctionContext* ctx, int64_t src_num, int8_t dest_base) {
StringRef MathFunctions::decimal_to_base(FunctionContext* ctx, int64_t src_num, int8_t dest_base) {
// Max number of digits of any base (base 2 gives max digits), plus sign.
const size_t max_digits = sizeof(uint64_t) * 8 + 1;
char buf[max_digits];
Expand All @@ -127,8 +127,8 @@ StringVal MathFunctions::decimal_to_base(FunctionContext* ctx, int64_t src_num,
buf[buf_index] = '-';
++result_len;
}
StringVal result = ctx->create_temp_string_val(result_len);
memcpy(result.ptr, buf + max_digits - result_len, result_len);
StringRef result = ctx->create_temp_string_val(result_len);
memcpy(const_cast<char*>(result.data), buf + max_digits - result_len, result_len);
return result;
}

Expand Down
3 changes: 2 additions & 1 deletion be/src/exprs/math_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <stdint.h>

#include "util/string_parser.hpp"
#include "vec/common/string_ref.h"

namespace doris {

Expand All @@ -32,7 +33,7 @@ class MathFunctions {

// Converts src_num in decimal to dest_base,
// and fills expr_val.string_val with the result.
static doris::StringVal decimal_to_base(doris::FunctionContext* ctx, int64_t src_num,
static doris::StringRef decimal_to_base(doris::FunctionContext* ctx, int64_t src_num,
int8_t dest_base);

// Converts src_num representing a number in src_base but encoded in decimal
Expand Down
18 changes: 9 additions & 9 deletions be/src/exprs/string_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@
namespace doris {

// This function sets options in the RE2 library before pattern matching.
bool StringFunctions::set_re2_options(const StringVal& match_parameter, std::string* error_str,
bool StringFunctions::set_re2_options(const StringRef& match_parameter, std::string* error_str,
re2::RE2::Options* opts) {
for (int i = 0; i < match_parameter.len; i++) {
char match = match_parameter.ptr[i];
for (int i = 0; i < match_parameter.size; i++) {
char match = match_parameter.data[i];
switch (match) {
case 'i':
opts->set_case_sensitive(false);
Expand All @@ -62,26 +62,26 @@ bool StringFunctions::set_re2_options(const StringVal& match_parameter, std::str
}

// The caller owns the returned regex. Returns nullptr if the pattern could not be compiled.
bool StringFunctions::compile_regex(const StringVal& pattern, std::string* error_str,
const StringVal& match_parameter,
bool StringFunctions::compile_regex(const StringRef& pattern, std::string* error_str,
const StringRef& match_parameter,
std::unique_ptr<re2::RE2>& re) {
re2::StringPiece pattern_sp(reinterpret_cast<char*>(pattern.ptr), pattern.len);
re2::StringPiece pattern_sp(pattern.data, pattern.size);
re2::RE2::Options options;
// Disable error logging in case e.g. every row causes an error
options.set_log_errors(false);
// ATTN(cmy): no set it, or the lazy mode of regex won't work. See Doris #6587
// Return the leftmost longest match (rather than the first match).
// options.set_longest_match(true);
options.set_dot_nl(true);
if (!match_parameter.is_null &&
if (match_parameter.size > 0 &&
!StringFunctions::set_re2_options(match_parameter, error_str, &options)) {
return false;
}
re.reset(new re2::RE2(pattern_sp, options));
if (!re->ok()) {
std::stringstream ss;
ss << "Could not compile regexp pattern: "
<< std::string(reinterpret_cast<char*>(pattern.ptr), pattern.len) << std::endl
ss << "Could not compile regexp pattern: " << std::string(pattern.data, pattern.size)
<< std::endl
<< "Error: " << re->error();
*error_str = ss.str();
re.reset();
Expand Down
6 changes: 3 additions & 3 deletions be/src/exprs/string_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ namespace doris {

class StringFunctions {
public:
static bool set_re2_options(const doris::StringVal& match_parameter, std::string* error_str,
static bool set_re2_options(const doris::StringRef& match_parameter, std::string* error_str,
re2::RE2::Options* opts);

// The caller owns the returned regex. Returns nullptr if the pattern could not be compiled.
static bool compile_regex(const StringVal& pattern, std::string* error_str,
const StringVal& match_parameter, std::unique_ptr<re2::RE2>& re);
static bool compile_regex(const StringRef& pattern, std::string* error_str,
const StringRef& match_parameter, std::unique_ptr<re2::RE2>& re);
};
} // namespace doris
183 changes: 83 additions & 100 deletions be/src/olap/like_column_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,127 +23,68 @@

namespace doris {

template <>
LikeColumnPredicate<true>::LikeColumnPredicate(bool opposite, uint32_t column_id,
doris::FunctionContext* fn_ctx, doris::StringVal val)
: ColumnPredicate(column_id, opposite), pattern(reinterpret_cast<char*>(val.ptr), val.len) {
_state = reinterpret_cast<StateType*>(
fn_ctx->get_function_state(doris::FunctionContext::THREAD_LOCAL));
_state->search_state.clone(_like_state);
}

template <>
LikeColumnPredicate<false>::LikeColumnPredicate(bool opposite, uint32_t column_id,
doris::FunctionContext* fn_ctx,
doris::StringVal val)
LikeColumnPredicate::LikeColumnPredicate(bool opposite, uint32_t column_id,
doris::FunctionContext* fn_ctx, doris::StringRef val)
: ColumnPredicate(column_id, opposite), pattern(val) {
_state = reinterpret_cast<StateType*>(
fn_ctx->get_function_state(doris::FunctionContext::THREAD_LOCAL));
_state->search_state.clone(_like_state);
}

template <bool is_vectorized>
void LikeColumnPredicate<is_vectorized>::evaluate_vec(const vectorized::IColumn& column,
uint16_t size, bool* flags) const {
void LikeColumnPredicate::evaluate_vec(const vectorized::IColumn& column, uint16_t size,
bool* flags) const {
_evaluate_vec<false>(column, size, flags);
}

template <bool is_vectorized>
void LikeColumnPredicate<is_vectorized>::evaluate_and_vec(const vectorized::IColumn& column,
uint16_t size, bool* flags) const {
void LikeColumnPredicate::evaluate_and_vec(const vectorized::IColumn& column, uint16_t size,
bool* flags) const {
_evaluate_vec<true>(column, size, flags);
}

template <bool is_vectorized>
uint16_t LikeColumnPredicate<is_vectorized>::evaluate(const vectorized::IColumn& column,
uint16_t* sel, uint16_t size) const {
uint16_t LikeColumnPredicate::evaluate(const vectorized::IColumn& column, uint16_t* sel,
uint16_t size) const {
uint16_t new_size = 0;
if constexpr (is_vectorized) {
if (column.is_nullable()) {
auto* nullable_col =
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
auto& null_map_data = nullable_col->get_null_map_column().get_data();
auto& nested_col = nullable_col->get_nested_column();
if (nested_col.is_column_dictionary()) {
auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);
auto& data_array = nested_col_ptr->get_data();
if (!nullable_col->has_null()) {
for (uint16_t i = 0; i != size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
StringRef cell_value = nested_col_ptr->get_shrink_value(data_array[idx]);
unsigned char flag = 0;
(_state->scalar_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state),
StringRef(cell_value.data, cell_value.size), pattern, &flag);
new_size += _opposite ^ flag;
}
} else {
for (uint16_t i = 0; i != size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
if (null_map_data[idx]) {
new_size += _opposite;
continue;
}

StringRef cell_value = nested_col_ptr->get_shrink_value(data_array[idx]);
unsigned char flag = 0;
(_state->scalar_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state),
StringRef(cell_value.data, cell_value.size), pattern, &flag);
new_size += _opposite ^ flag;
}
if (column.is_nullable()) {
auto* nullable_col = vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
auto& null_map_data = nullable_col->get_null_map_column().get_data();
auto& nested_col = nullable_col->get_nested_column();
if (nested_col.is_column_dictionary()) {
auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);
auto& data_array = nested_col_ptr->get_data();
if (!nullable_col->has_null()) {
for (uint16_t i = 0; i != size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
StringRef cell_value = nested_col_ptr->get_shrink_value(data_array[idx]);
unsigned char flag = 0;
(_state->scalar_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state),
StringRef(cell_value.data, cell_value.size), pattern, &flag);
new_size += _opposite ^ flag;
}
} else {
auto* str_col = vectorized::check_and_get_column<
vectorized::PredicateColumnType<TYPE_STRING>>(nested_col);
if (!nullable_col->has_null()) {
vectorized::ColumnUInt8::Container res(size, 0);
(_state->predicate_like_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state), *str_col,
pattern, res, sel, size);
for (uint16_t i = 0; i != size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
new_size += _opposite ^ res[i];
}
} else {
for (uint16_t i = 0; i != size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
if (null_map_data[idx]) {
new_size += _opposite;
continue;
}

StringRef cell_value = str_col->get_data()[idx];
unsigned char flag = 0;
(_state->scalar_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state),
StringRef(cell_value.data, cell_value.size), pattern, &flag);
new_size += _opposite ^ flag;
}
}
}
} else {
if (column.is_column_dictionary()) {
auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(column);
auto& data_array = nested_col_ptr->get_data();
for (uint16_t i = 0; i != size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
if (null_map_data[idx]) {
new_size += _opposite;
continue;
}

StringRef cell_value = nested_col_ptr->get_shrink_value(data_array[idx]);
unsigned char flag = 0;
(_state->scalar_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state),
StringRef(cell_value.data, cell_value.size), pattern, &flag);
new_size += _opposite ^ flag;
}
} else {
auto* str_col = vectorized::check_and_get_column<
vectorized::PredicateColumnType<TYPE_STRING>>(column);
}
} else {
auto* str_col =
vectorized::check_and_get_column<vectorized::PredicateColumnType<TYPE_STRING>>(
nested_col);
if (!nullable_col->has_null()) {
vectorized::ColumnUInt8::Container res(size, 0);
(_state->predicate_like_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state), *str_col, pattern,
Expand All @@ -153,13 +94,55 @@ uint16_t LikeColumnPredicate<is_vectorized>::evaluate(const vectorized::IColumn&
sel[new_size] = idx;
new_size += _opposite ^ res[i];
}
} else {
for (uint16_t i = 0; i != size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
if (null_map_data[idx]) {
new_size += _opposite;
continue;
}

StringRef cell_value = str_col->get_data()[idx];
unsigned char flag = 0;
(_state->scalar_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state),
StringRef(cell_value.data, cell_value.size), pattern, &flag);
new_size += _opposite ^ flag;
}
}
}
} else {
if (column.is_column_dictionary()) {
auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(column);
auto& data_array = nested_col_ptr->get_data();
for (uint16_t i = 0; i != size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
StringRef cell_value = nested_col_ptr->get_shrink_value(data_array[idx]);
unsigned char flag = 0;
(_state->scalar_function)(const_cast<vectorized::LikeSearchState*>(&_like_state),
StringRef(cell_value.data, cell_value.size), pattern,
&flag);
new_size += _opposite ^ flag;
}
} else {
auto* str_col =
vectorized::check_and_get_column<vectorized::PredicateColumnType<TYPE_STRING>>(
column);
vectorized::ColumnUInt8::Container res(size, 0);
(_state->predicate_like_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state), *str_col, pattern, res,
sel, size);
for (uint16_t i = 0; i != size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
new_size += _opposite ^ res[i];
}
}
}
return new_size;
}

template class LikeColumnPredicate<true>;
template class LikeColumnPredicate<false>;

} //namespace doris
Loading