diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 84f2345bb83e69..85f76651046904 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -612,17 +612,14 @@ Status IndexBuilder::_add_nullable(const std::string& column_name, // [size, offset_ptr, item_data_ptr, item_nullmap_ptr] const auto* data_ptr = reinterpret_cast(*ptr); // total number length - auto element_cnt = size_t((unsigned long)(*data_ptr)); auto offset_data = *(data_ptr + 1); const auto* offsets_ptr = (const uint8_t*)offset_data; try { - if (element_cnt > 0) { - auto data = *(data_ptr + 2); - auto nested_null_map = *(data_ptr + 3); - RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values( - field->get_sub_field(0)->size(), reinterpret_cast(data), - reinterpret_cast(nested_null_map), offsets_ptr, num_rows)); - } + auto data = *(data_ptr + 2); + auto nested_null_map = *(data_ptr + 3); + RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values( + field->get_sub_field(0)->size(), reinterpret_cast(data), + reinterpret_cast(nested_null_map), offsets_ptr, num_rows)); DBUG_EXECUTE_IF("IndexBuilder::_add_nullable_add_array_values_error", { _CLTHROWA(CL_ERR_IO, "debug point: _add_nullable_add_array_values_error"); }) diff --git a/be/test/olap/rowset/segment_v2/inverted_index_array_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index_array_test.cpp index 4473dc39a05bf3..06e1559f091b2c 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index_array_test.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index_array_test.cpp @@ -942,6 +942,79 @@ class InvertedIndexArrayTest : public testing::Test { } } + void test_array_all_null(std::string_view rowset_id, int seg_id, Field* field) { + EXPECT_TRUE(field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY); + std::string index_path_prefix {InvertedIndexDescriptor::get_index_file_path_prefix( + local_segment_path(kTestDir, rowset_id, seg_id))}; + int index_id = 26034; + std::string index_path = + InvertedIndexDescriptor::get_index_file_path_v1(index_path_prefix, index_id, ""); + auto fs = io::global_local_filesystem(); + + auto index_meta_pb = std::make_unique(); + index_meta_pb->set_index_type(IndexType::INVERTED); + index_meta_pb->set_index_id(index_id); + index_meta_pb->set_index_name("index_inverted_arr_all_null"); + index_meta_pb->clear_col_unique_id(); + index_meta_pb->add_col_unique_id(0); + + TabletIndex idx_meta; + idx_meta.init_from_pb(*index_meta_pb.get()); + auto index_file_writer = std::make_unique( + fs, index_path_prefix, std::string {rowset_id}, seg_id, + InvertedIndexStorageFormatPB::V1); + std::unique_ptr _inverted_index_builder = nullptr; + EXPECT_EQ(InvertedIndexColumnWriter::create(field, &_inverted_index_builder, + index_file_writer.get(), &idx_meta), + Status::OK()); + + // Construct inner array type: DataTypeArray(DataTypeNullable(DataTypeString)) + vectorized::DataTypePtr inner_string_type = std::make_shared( + std::make_shared()); + vectorized::DataTypePtr array_type = + std::make_shared(inner_string_type); + // To support outer array null values, wrap it in a Nullable type + vectorized::DataTypePtr final_type = + std::make_shared(array_type); + + vectorized::MutableColumnPtr col = final_type->create_column(); + col->insert(vectorized::Null()); + col->insert(vectorized::Null()); + + vectorized::ColumnPtr column_array = std::move(col); + vectorized::ColumnWithTypeAndName type_and_name(column_array, final_type, "arr1"); + + vectorized::Block block; + block.insert(type_and_name); + + TabletSchemaSPtr tablet_schema = create_schema_with_array(); + vectorized::OlapBlockDataConvertor convertor(tablet_schema.get(), {0}); + convertor.set_source_content(&block, 0, block.rows()); + + auto [st, accessor] = convertor.convert_column_data(0); + EXPECT_EQ(st, Status::OK()); + const auto* data_ptr = reinterpret_cast(accessor->get_data()); + const auto* offsets_ptr = reinterpret_cast(data_ptr[1]); + const void* item_data = reinterpret_cast(data_ptr[2]); + const auto* item_nullmap = reinterpret_cast(data_ptr[3]); + const auto* null_map = accessor->get_nullmap(); + + auto field_size = field->get_sub_field(0)->size(); + st = _inverted_index_builder->add_array_values(field_size, item_data, item_nullmap, + offsets_ptr, block.rows()); + EXPECT_EQ(st, Status::OK()); + st = _inverted_index_builder->add_array_nulls(null_map, block.rows()); + EXPECT_EQ(st, Status::OK()); + + EXPECT_EQ(_inverted_index_builder->finish(), Status::OK()); + EXPECT_EQ(index_file_writer->close(), Status::OK()); + + std::vector expected_null_bitmap = {0, 1}; + ExpectedDocMap expected {}; + check_terms_stats(index_path_prefix, &expected, expected_null_bitmap, + InvertedIndexStorageFormatPB::V1, &idx_meta); + } + private: static void build_slices(vectorized::PaddedPODArray& slices, const vectorized::ColumnPtr& column_array, size_t num_strings) { @@ -1006,6 +1079,7 @@ TEST_F(InvertedIndexArrayTest, ComplexNullCases) { Field* field = FieldFactory::create(arrayTabletColumn); test_null_write("complex_null", 0, field); test_null_write_v2("complex_null_v2", 0, field); + test_array_all_null("complex_null_all_null", 0, field); delete field; } diff --git a/regression-test/data/inverted_index_p0/array_contains/test_add_index_for_arr.out b/regression-test/data/inverted_index_p0/array_contains/test_add_index_for_arr.out new file mode 100644 index 00000000000000..9bb146c0df551b --- /dev/null +++ b/regression-test/data/inverted_index_p0/array_contains/test_add_index_for_arr.out @@ -0,0 +1,17 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- + +-- !sql -- + +-- !sql -- +1 \N +2 \N + +-- !sql -- + +-- !sql -- + +-- !sql -- +1 \N +2 \N + diff --git a/regression-test/suites/inverted_index_p0/array_contains/test_add_index_for_arr.groovy b/regression-test/suites/inverted_index_p0/array_contains/test_add_index_for_arr.groovy index 6f3e772dd08151..78bec2d11b0d5f 100644 --- a/regression-test/suites/inverted_index_p0/array_contains/test_add_index_for_arr.groovy +++ b/regression-test/suites/inverted_index_p0/array_contains/test_add_index_for_arr.groovy @@ -106,9 +106,9 @@ suite("test_add_index_for_arr") { // query without inverted index // query rows with array_contains - def sql_query_name1 = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7')" + def sql_query_name1 = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7') order by id" // query rows with !array_contains - def sql_query_name2 = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7')" + def sql_query_name2 = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7') order by id" // add index for name sql "ALTER TABLE my_test_array ADD INDEX name_idx (name) USING INVERTED;" @@ -122,9 +122,9 @@ suite("test_add_index_for_arr") { // query with inverted index sql "set enable_inverted_index_query=true" // query rows with array_contains - def sql_query_name1_inverted = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7')" + def sql_query_name1_inverted = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7') order by id" // query rows with !array_contains - def sql_query_name2_inverted = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7')" + def sql_query_name2_inverted = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7') order by id" // check result for query without inverted index and with inverted index def size1 = sql_query_name1.size(); @@ -147,9 +147,38 @@ suite("test_add_index_for_arr") { sql "drop index name_idx on my_test_array" wait_for_latest_op_on_table_finish("my_test_array", timeout) - def sql_query_name1_without_inverted = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7')" - def sql_query_name2_without_inverted = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7')" + def sql_query_name1_without_inverted = sql "select id, name[1], description[1] from my_test_array where array_contains(name,'text7') order by id" + def sql_query_name2_without_inverted = sql "select id, name[1], description[1] from my_test_array where !array_contains(name,'text7') order by id" assertEquals(sql_query_name1.size(), sql_query_name1_without_inverted.size()) assertEquals(sql_query_name2.size(), sql_query_name2_without_inverted.size()) -} + + def table_name = "test_add_index_for_arr_all_null" + sql "DROP TABLE IF EXISTS ${table_name}" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + `id` int(11) NULL, + `name` ARRAY NULL, + ) + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + properties("replication_num" = "1"); + """ + + sql "insert into ${table_name} values (1, null), (2, null)" + sql "ALTER TABLE ${table_name} ADD INDEX name_idx (name) USING INVERTED;" + wait_for_latest_op_on_table_finish("${table_name}", timeout) + // build index for name that name data can using inverted index + if (!isCloudMode()) { + sql "BUILD INDEX name_idx ON ${table_name}" + wait_for_build_index_on_partition_finish("${table_name}", timeout) + } + + qt_sql "select /*+SET_VAR(enable_inverted_index_query=true)*/ * from ${table_name} where array_contains(name, 'text7') order by id" + qt_sql "select /*+SET_VAR(enable_inverted_index_query=true)*/ * from ${table_name} where !array_contains(name, 'text7') order by id" + qt_sql "select /*+SET_VAR(enable_inverted_index_query=true)*/ * from ${table_name} where name is null order by id" + + qt_sql "select /*+SET_VAR(enable_inverted_index_query=false)*/ * from ${table_name} where array_contains(name, 'text7') order by id" + qt_sql "select /*+SET_VAR(enable_inverted_index_query=false)*/ * from ${table_name} where !array_contains(name, 'text7') order by id" + qt_sql "select /*+SET_VAR(enable_inverted_index_query=false)*/ * from ${table_name} where name is null order by id" +} \ No newline at end of file