Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions be/src/olap/rowset/segment_v2/segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,19 @@ Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key,
if (tablet_index) {
opts.need_bloom_filter = true;
opts.is_ngram_bf_index = true;
opts.gram_size = tablet_index->get_gram_size();
opts.gram_bf_size = tablet_index->get_gram_bf_size();
//narrow convert from int32_t to uint8_t and uint16_t which is dangerous
auto gram_size = tablet_index->get_gram_size();
auto gram_bf_size = tablet_index->get_gram_bf_size();
if (gram_size > 256 || gram_size < 1) {
return Status::NotSupported("Do not support ngram bloom filter for ngram_size: ",
gram_size);
}
if (gram_bf_size > 65535 || gram_bf_size < 64) {
return Status::NotSupported("Do not support ngram bloom filter for bf_size: ",
gram_bf_size);
}
opts.gram_size = gram_size;
opts.gram_bf_size = gram_bf_size;
}

opts.need_bitmap_index = column.has_bitmap_index();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,8 @@ public void checkColumn(Column column, KeysType keysType, boolean enableUniqueKe
if (ngramSize > 256 || ngramSize < 1) {
throw new AnalysisException("gram_size should be integer and less than 256");
}
if (bfSize > 65536 || bfSize < 64) {
throw new AnalysisException("bf_size should be integer and between 64 and 65536");
if (bfSize > 65535 || bfSize < 64) {
throw new AnalysisException("bf_size should be integer and between 64 and 65535");
}
} catch (NumberFormatException e) {
throw new AnalysisException("invalid ngram properties:" + e.getMessage(), e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,51 @@ suite("test_ngram_bloomfilter_index") {
qt_select_eq_3 "SELECT * FROM ${tableName} WHERE http_url = '/%/7212503657802320699%' ORDER BY key_id"
qt_select_in_3 "SELECT * FROM ${tableName} WHERE http_url IN ('/%/7212503657802320699%') ORDER BY key_id"
qt_select_like_3 "SELECT * FROM ${tableName} WHERE http_url like '/%/7212503657802320699%' ORDER BY key_id"

//case for bf_size 65536
def tableName2 = 'test_ngram_bloomfilter_index2'
sql "DROP TABLE IF EXISTS ${tableName2}"
test {
sql """
CREATE TABLE IF NOT EXISTS ${tableName2} (
`key_id` bigint(20) NULL COMMENT '',
`category` varchar(200) NULL COMMENT '',
`https_url` varchar(300) NULL COMMENT '',
`hostname` varchar(300) NULL,
`http_url` text NULL COMMENT '',
`url_path` varchar(2000) NULL COMMENT '',
`cnt` bigint(20) NULL COMMENT '',
`host_flag` boolean NULL COMMENT '',
INDEX idx_ngrambf (`http_url`) USING NGRAM_BF PROPERTIES("gram_size" = "2", "bf_size" = "65536")
) ENGINE=OLAP
DUPLICATE KEY(`key_id`, `category`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`key_id`) BUCKETS 3
PROPERTIES("replication_num" = "1");
"""
exception "bf_size should be integer and between 64 and 65535"
}

def tableName3 = 'test_ngram_bloomfilter_index3'
sql "DROP TABLE IF EXISTS ${tableName3}"
sql """
CREATE TABLE IF NOT EXISTS ${tableName3} (
`key_id` bigint(20) NULL COMMENT '',
`category` varchar(200) NULL COMMENT '',
`https_url` varchar(300) NULL COMMENT '',
`hostname` varchar(300) NULL,
`http_url` text NULL COMMENT '',
`url_path` varchar(2000) NULL COMMENT '',
`cnt` bigint(20) NULL COMMENT '',
`host_flag` boolean NULL COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`key_id`, `category`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`key_id`) BUCKETS 3
PROPERTIES("replication_num" = "1");
"""
test {
sql """ALTER TABLE ${tableName3} ADD INDEX idx_http_url(http_url) USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="65536") COMMENT 'http_url ngram_bf index'"""
exception "bf_size should be integer and between 64 and 65535"
}
}