Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions be/src/olap/rowset/segment_v2/column_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,9 @@ Status ScalarColumnWriter::init() {
return Status::OK();
}
Status add_nulls(uint32_t count) override { return Status::OK(); }
Status add_array_nulls(uint32_t row_id) override { return Status::OK(); }
Status add_array_nulls(const uint8_t* null_map, size_t num_rows) override {
return Status::OK();
}
Status finish() override { return Status::OK(); }
int64_t size() const override { return 0; }
void close_on_error() override {}
Expand Down Expand Up @@ -951,11 +953,7 @@ Status ArrayColumnWriter::append_nullable(const uint8_t* null_map, const uint8_t
RETURN_IF_ERROR(append_data(ptr, num_rows));
if (is_nullable()) {
if (_opts.need_inverted_index) {
for (int row_id = 0; row_id < num_rows; row_id++) {
if (null_map[row_id] == 1) {
RETURN_IF_ERROR(_inverted_index_builder->add_array_nulls(row_id));
}
}
RETURN_IF_ERROR(_inverted_index_builder->add_array_nulls(null_map, num_rows));
}
RETURN_IF_ERROR(_null_writer->append_data(&null_map, num_rows));
}
Expand Down
52 changes: 38 additions & 14 deletions be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,26 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
return Status::OK();
}

Status add_array_nulls(uint32_t row_id) override {
_null_bitmap.add(row_id);
Status add_array_nulls(const uint8_t* null_map, size_t num_rows) override {
DCHECK(_rid >= num_rows);
if (num_rows == 0 || null_map == nullptr) {
return Status::OK();
}
std::vector<uint32_t> null_indices;
null_indices.reserve(num_rows / 8);

// because _rid is the row id in block, not segment, and we add data before we add nulls,
// so we need to subtract num_rows to get the row id in segment
for (size_t i = 0; i < num_rows; i++) {
if (null_map[i] == 1) {
null_indices.push_back(_rid - num_rows + static_cast<uint32_t>(i));
}
}

if (!null_indices.empty()) {
_null_bitmap.addMany(null_indices.size(), null_indices.data());
}

return Status::OK();
}

Expand Down Expand Up @@ -378,8 +396,9 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
return Status::OK();
}

Status add_array_values(size_t field_size, const void* value_ptr, const uint8_t* null_map,
const uint8_t* offsets_ptr, size_t count) override {
Status add_array_values(size_t field_size, const void* value_ptr,
const uint8_t* nested_null_map, const uint8_t* offsets_ptr,
size_t count) override {
DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_array_values_count_is_zero",
{ count = 0; })
if (count == 0) {
Expand All @@ -404,7 +423,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
lucene::document::Field* new_field = nullptr;
CL_NS(analysis)::TokenStream* ts = nullptr;
for (auto j = start_off; j < start_off + array_elem_size; ++j) {
if (null_map[j] == 1) {
if (nested_null_map && nested_null_map[j] == 1) {
continue;
}
auto* v = (Slice*)((const uint8_t*)value_ptr + j * field_size);
Expand Down Expand Up @@ -500,7 +519,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
for (int i = 0; i < count; ++i) {
auto array_elem_size = offsets[i + 1] - offsets[i];
for (size_t j = start_off; j < start_off + array_elem_size; ++j) {
if (null_map[j] == 1) {
if (nested_null_map && nested_null_map[j] == 1) {
continue;
}
const CppType* p = &reinterpret_cast<const CppType*>(value_ptr)[j];
Expand All @@ -520,7 +539,8 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_array_values_field_is_nullptr",
{ _field = nullptr; })
DBUG_EXECUTE_IF(
"InvertedIndexColumnWriterImpl::add_array_values_index_writer_is_nullptr",
"InvertedIndexColumnWriterImpl::add_array_values_index_writer_is_"
"nullptr",
{ _index_writer = nullptr; })
if (_field == nullptr || _index_writer == nullptr) {
LOG(ERROR) << "field or index writer is null in inverted index writer.";
Expand Down Expand Up @@ -582,9 +602,10 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
std::string new_value;
size_t value_length = sizeof(CppType);

DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_value_bkd_writer_add_throw_error", {
_CLTHROWA(CL_ERR_IllegalArgument, ("packedValue should be length=xxx"));
});
DBUG_EXECUTE_IF(
"InvertedIndexColumnWriterImpl::add_value_bkd_writer_add_throw_"
"error",
{ _CLTHROWA(CL_ERR_IllegalArgument, ("packedValue should be length=xxx")); });

_value_key_coder->full_encode_ascending(&value, &new_value);
_bkd_writer->add((const uint8_t*)new_value.c_str(), value_length, _rid);
Expand Down Expand Up @@ -643,8 +664,8 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
_bkd_writer->finish(data_out.get(), index_out.get()),
int(field_type));
} else {
LOG(WARNING)
<< "Inverted index writer create output error occurred: nullptr";
LOG(WARNING) << "Inverted index writer create output error "
"occurred: nullptr";
_CLTHROWA(CL_ERR_IO, "Create output error with nullptr");
}
} else if constexpr (field_is_slice_type(field_type)) {
Expand All @@ -653,9 +674,12 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
InvertedIndexDescriptor::get_temporary_null_bitmap_file_name()));
write_null_bitmap(null_bitmap_out.get());
DBUG_EXECUTE_IF(
"InvertedIndexWriter._throw_clucene_error_in_fulltext_writer_close", {
"InvertedIndexWriter._throw_clucene_error_in_fulltext_"
"writer_close",
{
_CLTHROWA(CL_ERR_IO,
"debug point: test throw error in fulltext index writer");
"debug point: test throw error in fulltext "
"index writer");
});
}
} catch (CLuceneError& e) {
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/inverted_index_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class InvertedIndexColumnWriter {
size_t count) = 0;

virtual Status add_nulls(uint32_t count) = 0;
virtual Status add_array_nulls(uint32_t row_id) = 0;
virtual Status add_array_nulls(const uint8_t* null_map, size_t num_rows) = 0;

virtual Status finish() = 0;

Expand Down
41 changes: 18 additions & 23 deletions be/src/olap/task/index_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -589,9 +589,9 @@ Status IndexBuilder::_write_inverted_index_data(TabletSchemaSPtr tablet_schema,
return converted_result.first;
}
const auto* ptr = (const uint8_t*)converted_result.second->get_data();
if (converted_result.second->get_nullmap()) {
RETURN_IF_ERROR(_add_nullable(column_name, writer_sign, field.get(),
converted_result.second->get_nullmap(), &ptr,
const auto* null_map = converted_result.second->get_nullmap();
if (null_map) {
RETURN_IF_ERROR(_add_nullable(column_name, writer_sign, field.get(), null_map, &ptr,
block->rows()));
} else {
RETURN_IF_ERROR(_add_data(column_name, writer_sign, field.get(), &ptr, block->rows()));
Expand All @@ -606,18 +606,6 @@ Status IndexBuilder::_add_nullable(const std::string& column_name,
const std::pair<int64_t, int64_t>& index_writer_sign,
Field* field, const uint8_t* null_map, const uint8_t** ptr,
size_t num_rows) {
size_t offset = 0;
auto next_run_step = [&]() {
size_t step = 1;
for (auto i = offset + 1; i < num_rows; ++i) {
if (null_map[offset] == null_map[i]) {
step++;
} else {
break;
}
}
return step;
};
// TODO: need to process null data for inverted index
if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
DCHECK(field->get_sub_field_count() == 1);
Expand All @@ -638,20 +626,27 @@ Status IndexBuilder::_add_nullable(const std::string& column_name,
DBUG_EXECUTE_IF("IndexBuilder::_add_nullable_add_array_values_error", {
_CLTHROWA(CL_ERR_IO, "debug point: _add_nullable_add_array_values_error");
})
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_nulls(null_map,
num_rows));
} catch (const std::exception& e) {
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
"CLuceneError occured: {}", e.what());
}
// we should refresh nullmap for array
for (int row_id = 0; row_id < num_rows; row_id++) {
if (null_map && null_map[row_id] == 1) {
RETURN_IF_ERROR(
_inverted_index_builders[index_writer_sign]->add_array_nulls(row_id));
}
}

return Status::OK();
}

size_t offset = 0;
auto next_run_step = [&]() {
size_t step = 1;
for (auto i = offset + 1; i < num_rows; ++i) {
if (null_map[offset] == null_map[i]) {
step++;
} else {
break;
}
}
return step;
};
try {
do {
auto step = next_run_step();
Expand Down
Loading
Loading