Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 5 additions & 8 deletions be/src/olap/task/index_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -612,17 +612,14 @@ Status IndexBuilder::_add_nullable(const std::string& column_name,
// [size, offset_ptr, item_data_ptr, item_nullmap_ptr]
const auto* data_ptr = reinterpret_cast<const uint64_t*>(*ptr);
// total number length
auto element_cnt = size_t((unsigned long)(*data_ptr));
auto offset_data = *(data_ptr + 1);
const auto* offsets_ptr = (const uint8_t*)offset_data;
try {
if (element_cnt > 0) {
auto data = *(data_ptr + 2);
auto nested_null_map = *(data_ptr + 3);
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
field->get_sub_field(0)->size(), reinterpret_cast<const void*>(data),
reinterpret_cast<const uint8_t*>(nested_null_map), offsets_ptr, num_rows));
}
auto data = *(data_ptr + 2);
auto nested_null_map = *(data_ptr + 3);
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
field->get_sub_field(0)->size(), reinterpret_cast<const void*>(data),
reinterpret_cast<const uint8_t*>(nested_null_map), offsets_ptr, num_rows));
DBUG_EXECUTE_IF("IndexBuilder::_add_nullable_add_array_values_error", {
_CLTHROWA(CL_ERR_IO, "debug point: _add_nullable_add_array_values_error");
})
Expand Down
74 changes: 74 additions & 0 deletions be/test/olap/rowset/segment_v2/inverted_index_array_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,79 @@ class InvertedIndexArrayTest : public testing::Test {
}
}

void test_array_all_null(std::string_view rowset_id, int seg_id, Field* field) {
EXPECT_TRUE(field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY);
std::string index_path_prefix {InvertedIndexDescriptor::get_index_file_path_prefix(
local_segment_path(kTestDir, rowset_id, seg_id))};
int index_id = 26034;
std::string index_path =
InvertedIndexDescriptor::get_index_file_path_v1(index_path_prefix, index_id, "");
auto fs = io::global_local_filesystem();

auto index_meta_pb = std::make_unique<TabletIndexPB>();
index_meta_pb->set_index_type(IndexType::INVERTED);
index_meta_pb->set_index_id(index_id);
index_meta_pb->set_index_name("index_inverted_arr_all_null");
index_meta_pb->clear_col_unique_id();
index_meta_pb->add_col_unique_id(0);

TabletIndex idx_meta;
idx_meta.init_from_pb(*index_meta_pb.get());
auto index_file_writer = std::make_unique<InvertedIndexFileWriter>(
fs, index_path_prefix, std::string {rowset_id}, seg_id,
InvertedIndexStorageFormatPB::V1);
std::unique_ptr<segment_v2::InvertedIndexColumnWriter> _inverted_index_builder = nullptr;
EXPECT_EQ(InvertedIndexColumnWriter::create(field, &_inverted_index_builder,
index_file_writer.get(), &idx_meta),
Status::OK());

// Construct inner array type: DataTypeArray(DataTypeNullable(DataTypeString))
vectorized::DataTypePtr inner_string_type = std::make_shared<vectorized::DataTypeNullable>(
std::make_shared<vectorized::DataTypeString>());
vectorized::DataTypePtr array_type =
std::make_shared<vectorized::DataTypeArray>(inner_string_type);
// To support outer array null values, wrap it in a Nullable type
vectorized::DataTypePtr final_type =
std::make_shared<vectorized::DataTypeNullable>(array_type);

vectorized::MutableColumnPtr col = final_type->create_column();
col->insert(vectorized::Null());
col->insert(vectorized::Null());

vectorized::ColumnPtr column_array = std::move(col);
vectorized::ColumnWithTypeAndName type_and_name(column_array, final_type, "arr1");

vectorized::Block block;
block.insert(type_and_name);

TabletSchemaSPtr tablet_schema = create_schema_with_array();
vectorized::OlapBlockDataConvertor convertor(tablet_schema.get(), {0});
convertor.set_source_content(&block, 0, block.rows());

auto [st, accessor] = convertor.convert_column_data(0);
EXPECT_EQ(st, Status::OK());
const auto* data_ptr = reinterpret_cast<const uint64_t*>(accessor->get_data());
const auto* offsets_ptr = reinterpret_cast<const uint8_t*>(data_ptr[1]);
const void* item_data = reinterpret_cast<const void*>(data_ptr[2]);
const auto* item_nullmap = reinterpret_cast<const uint8_t*>(data_ptr[3]);
const auto* null_map = accessor->get_nullmap();

auto field_size = field->get_sub_field(0)->size();
st = _inverted_index_builder->add_array_values(field_size, item_data, item_nullmap,
offsets_ptr, block.rows());
EXPECT_EQ(st, Status::OK());
st = _inverted_index_builder->add_array_nulls(null_map, block.rows());
EXPECT_EQ(st, Status::OK());

EXPECT_EQ(_inverted_index_builder->finish(), Status::OK());
EXPECT_EQ(index_file_writer->close(), Status::OK());

std::vector<int> expected_null_bitmap = {0, 1};
ExpectedDocMap expected {};
check_terms_stats(index_path_prefix, &expected, expected_null_bitmap,
InvertedIndexStorageFormatPB::V1, &idx_meta);
}

private:
static void build_slices(vectorized::PaddedPODArray<Slice>& slices,
const vectorized::ColumnPtr& column_array, size_t num_strings) {
Expand Down Expand Up @@ -1007,6 +1080,7 @@ TEST_F(InvertedIndexArrayTest, ComplexNullCases) {
Field* field = FieldFactory::create(arrayTabletColumn);
test_null_write("complex_null", 0, field);
test_null_write_v2("complex_null_v2", 0, field);
test_array_all_null("complex_null_all_null", 0, field);
delete field;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --

-- !sql --

-- !sql --
1 \N
2 \N

-- !sql --

-- !sql --

-- !sql --
1 \N
2 \N

Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ suite("test_add_index_for_arr") {

// query without inverted index
// query rows with array_contains
def sql_query_name1 = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7')"
def sql_query_name1 = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7') order by id"
// query rows with !array_contains
def sql_query_name2 = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7')"
def sql_query_name2 = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7') order by id"

// add index for name
sql "ALTER TABLE my_test_array ADD INDEX name_idx (name) USING INVERTED;"
Expand All @@ -122,9 +122,9 @@ suite("test_add_index_for_arr") {
// query with inverted index
sql "set enable_inverted_index_query=true"
// query rows with array_contains
def sql_query_name1_inverted = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7')"
def sql_query_name1_inverted = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7') order by id"
// query rows with !array_contains
def sql_query_name2_inverted = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7')"
def sql_query_name2_inverted = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7') order by id"

// check result for query without inverted index and with inverted index
def size1 = sql_query_name1.size();
Expand All @@ -147,9 +147,38 @@ suite("test_add_index_for_arr") {
sql "drop index name_idx on my_test_array"
wait_for_latest_op_on_table_finish("my_test_array", timeout)

def sql_query_name1_without_inverted = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7')"
def sql_query_name2_without_inverted = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7')"
def sql_query_name1_without_inverted = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7') order by id"
def sql_query_name2_without_inverted = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7') order by id"

assertEquals(sql_query_name1.size(), sql_query_name1_without_inverted.size())
assertEquals(sql_query_name2.size(), sql_query_name2_without_inverted.size())
}

def table_name = "test_add_index_for_arr_all_null"
sql "DROP TABLE IF EXISTS ${table_name}"
sql """
CREATE TABLE IF NOT EXISTS ${table_name} (
`id` int(11) NULL,
`name` ARRAY<text> NULL,
)
DUPLICATE KEY(`id`)
DISTRIBUTED BY HASH(`id`) BUCKETS 1
properties("replication_num" = "1");
"""

sql "insert into ${table_name} values (1, null), (2, null)"
sql "ALTER TABLE ${table_name} ADD INDEX name_idx (name) USING INVERTED;"
wait_for_latest_op_on_table_finish("${table_name}", timeout)
// build index for name that name data can using inverted index
if (!isCloudMode()) {
sql "BUILD INDEX name_idx ON ${table_name}"
wait_for_build_index_on_partition_finish("${table_name}", timeout)
}

qt_sql "select /*+SET_VAR(enable_inverted_index_query=true)*/ * from ${table_name} where array_contains(name, 'text7') order by id"
qt_sql "select /*+SET_VAR(enable_inverted_index_query=true)*/ * from ${table_name} where !array_contains(name, 'text7') order by id"
qt_sql "select /*+SET_VAR(enable_inverted_index_query=true)*/ * from ${table_name} where name is null order by id"

qt_sql "select /*+SET_VAR(enable_inverted_index_query=false)*/ * from ${table_name} where array_contains(name, 'text7') order by id"
qt_sql "select /*+SET_VAR(enable_inverted_index_query=false)*/ * from ${table_name} where !array_contains(name, 'text7') order by id"
qt_sql "select /*+SET_VAR(enable_inverted_index_query=false)*/ * from ${table_name} where name is null order by id"
}
Loading