Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 16 additions & 17 deletions be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ Status InvertedIndexReader::match_index_search(
InvertedIndexQueryType query_type, const InvertedIndexQueryInfo& query_info,
const FulltextIndexSearcherPtr& index_searcher,
const std::shared_ptr<roaring::Roaring>& term_match_bitmap) {
TQueryOptions queryOptions = runtime_state->query_options();
const auto& queryOptions = runtime_state->query_options();
try {
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer);
auto query = QueryFactory::create(query_type, index_searcher, queryOptions, io_ctx);
Expand Down Expand Up @@ -262,13 +262,14 @@ Status FullTextIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatist
VLOG_DEBUG << column_name << " begin to search the fulltext index from clucene, query_str ["
<< search_str << "]";

const auto& queryOptions = runtime_state->query_options();
try {
InvertedIndexQueryInfo query_info;
InvertedIndexQueryCache::CacheKey cache_key;
auto index_file_key = _inverted_index_file_reader->get_index_file_cache_key(&_index_meta);

// terms
if (query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY) {
cache_key = {index_file_key, column_name, query_type, search_str};
query_info.terms.emplace_back(search_str);
} else {
if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY) {
Expand All @@ -291,6 +292,7 @@ Status FullTextIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatist
query_info.terms = inverted_index::InvertedIndexAnalyzer::get_analyse_result(
reader.get(), analyzer.get(), column_name, query_type);
}

if (query_info.terms.empty()) {
auto msg = fmt::format(
"token parser result is empty for query, "
Expand All @@ -304,22 +306,20 @@ Status FullTextIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatist
}
}

std::unique_ptr<lucene::search::Query> query;
// field_name
query_info.field_name = StringUtil::string_to_wstring(column_name);

if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY ||
query_type == InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY ||
query_type == InvertedIndexQueryType::MATCH_PHRASE_EDGE_QUERY ||
query_type == InvertedIndexQueryType::MATCH_ALL_QUERY ||
query_type == InvertedIndexQueryType::EQUAL_QUERY ||
query_type == InvertedIndexQueryType::MATCH_ANY_QUERY) {
std::string str_tokens = join(query_info.terms, " ");
if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY) {
str_tokens += " " + std::to_string(query_info.slop);
str_tokens += " " + std::to_string(query_info.ordered);
}
cache_key = {index_file_key, column_name, query_type, str_tokens};
// cache_key
std::string str_tokens = join(query_info.terms, " ");
if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY) {
str_tokens += " " + std::to_string(query_info.slop);
str_tokens += " " + std::to_string(query_info.ordered);
} else if (query_type == InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY ||
query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY) {
str_tokens += " " + std::to_string(queryOptions.inverted_index_max_expansions);
}
cache_key = {index_file_key, column_name, query_type, std::move(str_tokens)};

auto* cache = InvertedIndexQueryCache::instance();
InvertedIndexQueryCacheHandle cache_handler;

Expand All @@ -328,12 +328,11 @@ Status FullTextIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatist
if (cache_status.ok()) {
return Status::OK();
}
FulltextIndexSearcherPtr* searcher_ptr = nullptr;

InvertedIndexCacheHandle inverted_index_cache_handle;
RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, io_ctx, stats));
auto searcher_variant = inverted_index_cache_handle.get_index_searcher();
searcher_ptr = std::get_if<FulltextIndexSearcherPtr>(&searcher_variant);
auto* searcher_ptr = std::get_if<FulltextIndexSearcherPtr>(&searcher_variant);
if (searcher_ptr != nullptr) {
term_match_bitmap = std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(match_index_search(io_ctx, stats, runtime_state, query_type, query_info,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,9 @@
-- !sql --
6

-- !sql --
0

-- !sql --
1

Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,9 @@
-- !sql --
0

-- !sql --
4

-- !sql --
377

Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ suite("test_index_match_phrase_prefix_1", "nonConcurrent"){
sql """ INSERT INTO ${indexTbName1} VALUES (6, "O1704361998540E2Cemx9S 123456789", "O1704361998540E2Cemx9S 123456789", "O1704361998540E2Cemx9S 123456789"); """
sql """ INSERT INTO ${indexTbName1} VALUES (7, "O1704361998540E2Cemx9S*123456789", "O1704361998540E2Cemx9S*123456789", "O1704361998540E2Cemx9S*123456789"); """

sql """ INSERT INTO ${indexTbName1} VALUES (1, "", "s1", ""), (2, "", "s2", ""), (3, "", "s3", ""), (4, "", "s4", ""), (5, "", "tv s5", ""); """

try {
sql "sync"
sql """ set enable_common_expr_pushdown = true; """
Expand All @@ -58,7 +60,14 @@ suite("test_index_match_phrase_prefix_1", "nonConcurrent"){
qt_sql """ select count() from ${indexTbName1} where c match_phrase_prefix 'O1704361998540E2Cemx9S=123456789'; """
qt_sql """ select count() from ${indexTbName1} where d match_phrase_prefix 'O1704361998540E2Cemx9S=123456789'; """

sql """ set inverted_index_max_expansions = 3; """
qt_sql """ select count() from ${indexTbName1} where c match_phrase_prefix 'tv s'; """

sql """ set inverted_index_max_expansions = 5; """
qt_sql """ select count() from ${indexTbName1} where c match_phrase_prefix 'tv s'; """

} finally {
sql """ set inverted_index_max_expansions = 50; """
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,14 @@ suite("test_index_match_regexp", "nonConcurrent"){
qt_sql """ select count() from test_index_match_regexp where request match_regexp '.*tickets.*'; """
qt_sql """ select count() from test_index_match_regexp where request match_regexp 'nonexistence'; """

sql """ set inverted_index_max_expansions = 1; """
qt_sql """ select count() from test_index_match_regexp where request match_regexp 'b'; """

sql """ set inverted_index_max_expansions = 50; """
qt_sql """ select count() from test_index_match_regexp where request match_regexp 'b'; """

} finally {
sql """ set inverted_index_max_expansions = 50; """
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
}
}
Loading