Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions be/src/vec/functions/match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,72 @@ Status FunctionMatchRegexp::execute_match(FunctionContext* context, const std::s
return Status::OK();
}

Status FunctionMatchPhraseEdge::execute_match(
FunctionContext* context, const std::string& column_name,
const std::string& match_query_str, size_t input_rows_count, const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const {
RETURN_IF_ERROR(check(context, name));

std::vector<std::string> query_tokens =
analyse_query_str_token(inverted_index_ctx, match_query_str, column_name);
if (query_tokens.empty()) {
VLOG_DEBUG << fmt::format(
"token parser result is empty for query, "
"please check your query: '{}' and index parser: '{}'",
match_query_str,
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
return Status::OK();
}

int32_t current_src_array_offset = 0;
for (size_t i = 0; i < input_rows_count; i++) {
auto data_tokens = analyse_data_token(column_name, inverted_index_ctx, string_col, i,
array_offsets, current_src_array_offset);

int32_t dis_count = data_tokens.size() - query_tokens.size();
if (dis_count < 0) {
continue;
}

for (size_t j = 0; j < dis_count + 1; j++) {
bool match = true;
if (query_tokens.size() == 1) {
if (data_tokens[j].find(query_tokens[0]) == std::string::npos) {
match = false;
}
} else {
for (size_t k = 0; k < query_tokens.size(); k++) {
const std::string& data_token = data_tokens[j + k];
const std::string& query_token = query_tokens[k];
if (k == 0) {
if (!data_token.ends_with(query_token)) {
match = false;
break;
}
} else if (k == query_tokens.size() - 1) {
if (!data_token.starts_with(query_token)) {
match = false;
break;
}
} else {
if (data_token != query_token) {
match = false;
break;
}
}
}
}
if (match) {
result[i] = true;
break;
}
}
}

return Status::OK();
}

void register_function_match(SimpleFunctionFactory& factory) {
factory.register_function<FunctionMatchAny>();
factory.register_function<FunctionMatchAll>();
Expand Down
5 changes: 1 addition & 4 deletions be/src/vec/functions/match.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,7 @@ class FunctionMatchPhraseEdge : public FunctionMatchBase {
const std::string& match_query_str, size_t input_rows_count,
const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
"FunctionMatchPhraseEdge not support execute_match");
}
ColumnUInt8::Container& result) const override;
};

} // namespace doris::vectorized
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,27 @@
-- !sql --
6

-- !sql --
0

-- !sql --
874

-- !sql --
150

-- !sql --
20

-- !sql --
0

-- !sql --
874

-- !sql --
150

-- !sql --
20

Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,102 @@ suite("test_index_match_phrase_edge", "nonConcurrent"){
} finally {
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
}

def indexTbName2 = "test_index_match_phrase_edge2"
def indexTbName3 = "test_index_match_phrase_edge3"

sql "DROP TABLE IF EXISTS ${indexTbName2}"
sql "DROP TABLE IF EXISTS ${indexTbName3}"

sql """
CREATE TABLE ${indexTbName2} (
`@timestamp` int(11) NULL COMMENT "",
`clientip` varchar(20) NULL COMMENT "",
`request` text NULL COMMENT "",
`status` int(11) NULL COMMENT "",
`size` int(11) NULL COMMENT "",
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`@timestamp`)
COMMENT "OLAP"
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
"""

sql """
CREATE TABLE ${indexTbName3} (
`@timestamp` int(11) NULL COMMENT "",
`clientip` varchar(20) NULL COMMENT "",
`request` text NULL COMMENT "",
`status` int(11) NULL COMMENT "",
`size` int(11) NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`@timestamp`)
COMMENT "OLAP"
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
"""

def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
expected_succ_rows = -1, load_to_single_tablet = 'true' ->

// load the json data
streamLoad {
table "${table_name}"

// set http request header params
set 'label', label + "_" + UUID.randomUUID().toString()
set 'read_json_by_line', read_flag
set 'format', format_flag
file file_name // import json file
time 10000 // limit inflight 10s
if (expected_succ_rows >= 0) {
set 'max_filter_ratio', '1'
}

// if declared a check callback, the default check condition will ignore.
// So you must check all condition
check { result, exception, startTime, endTime ->
if (ignore_failure && expected_succ_rows < 0) { return }
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
if (expected_succ_rows >= 0) {
assertEquals(json.NumberLoadedRows, expected_succ_rows)
} else {
assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows)
assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
}
}
}
}

try {
load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json')
load_httplogs_data.call(indexTbName3, indexTbName3, 'true', 'json', 'documents-1000.json')

sql "sync"
sql """ set enable_common_expr_pushdown = true; """

GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute")
qt_sql """ select count() from ${indexTbName2} where request match_phrase_edge ''; """
qt_sql """ select count() from ${indexTbName2} where request match_phrase_edge 'age'; """
qt_sql """ select count() from ${indexTbName2} where request match_phrase_edge 'es/na'; """
qt_sql """ select count() from ${indexTbName2} where request match_phrase_edge 'ets/images/ti'; """
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")

qt_sql """ select count() from ${indexTbName3} where request match_phrase_edge ''; """
qt_sql """ select count() from ${indexTbName3} where request match_phrase_edge 'age'; """
qt_sql """ select count() from ${indexTbName3} where request match_phrase_edge 'es/na'; """
qt_sql """ select count() from ${indexTbName3} where request match_phrase_edge 'ets/images/ti'; """
} finally {
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
}
}