Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions cpp/src/arrow/array/builder_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,12 +239,6 @@ class DictionaryBuilderBase : public ArrayBuilder {
Status Resize(int64_t capacity) override {
ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
capacity = std::max(capacity, kMinBuilderCapacity);

if (capacity_ == 0) {
// Initialize hash table
// XXX should we let the user pass additional size heuristics?
delta_offset_ = 0;
}
ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
capacity_ = indices_builder_.capacity();
return Status::OK();
Expand All @@ -266,6 +260,10 @@ class DictionaryBuilderBase : public ArrayBuilder {

// Update internals for further uses of this DictionaryBuilder
delta_offset_ = memo_table_->size();

// ARROW-6861: Manually set capacity/length/null count until we can fix up
// Reset to not reset the memo table in ARROW-6869
capacity_ = length_ = null_count_ = 0;
indices_builder_.Reset();

return Status::OK();
Expand Down Expand Up @@ -380,6 +378,7 @@ class DictionaryBuilder : public internal::DictionaryBuilderBase<AdaptiveIntBuil
const uint8_t* valid_bytes = NULLPTR) {
int64_t null_count_before = this->indices_builder_.null_count();
ARROW_RETURN_NOT_OK(this->indices_builder_.AppendValues(values, length, valid_bytes));
this->capacity_ = this->indices_builder_.capacity();
this->length_ += length;
this->null_count_ += this->indices_builder_.null_count() - null_count_before;
return Status::OK();
Expand All @@ -402,6 +401,7 @@ class Dictionary32Builder : public internal::DictionaryBuilderBase<Int32Builder,
const uint8_t* valid_bytes = NULLPTR) {
int64_t null_count_before = this->indices_builder_.null_count();
ARROW_RETURN_NOT_OK(this->indices_builder_.AppendValues(values, length, valid_bytes));
this->capacity_ = this->indices_builder_.capacity();
this->length_ += length;
this->null_count_ += this->indices_builder_.null_count() - null_count_before;
return Status::OK();
Expand Down
51 changes: 51 additions & 0 deletions cpp/src/arrow/array_dict_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,57 @@ TYPED_TEST(TestDictionaryBuilder, Dictionary32_BasicPrimitive) {
ASSERT_TRUE(expected.Equals(result));
}

TYPED_TEST(TestDictionaryBuilder, FinishResetBehavior) {
// ARROW-6861
using c_type = typename TypeParam::c_type;
auto type = std::make_shared<TypeParam>();

Dictionary32Builder<TypeParam> builder;

ASSERT_OK(builder.Append(static_cast<c_type>(1)));
ASSERT_OK(builder.AppendNull());
ASSERT_OK(builder.Append(static_cast<c_type>(1)));
ASSERT_OK(builder.Append(static_cast<c_type>(2)));

// Properties from indices_builder propagated
ASSERT_LT(0, builder.capacity());
ASSERT_LT(0, builder.null_count());
ASSERT_EQ(4, builder.length());

std::shared_ptr<Array> result;
ASSERT_OK(builder.Finish(&result));

// Everything reset
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should you try an actual re-use of the builder below? In case some internal state (e.g. the memo) is not properly reinitialized.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, no problem.

ASSERT_EQ(0, builder.capacity());
ASSERT_EQ(0, builder.length());
ASSERT_EQ(0, builder.null_count());

// Use the builder again
ASSERT_OK(builder.Append(static_cast<c_type>(3)));
ASSERT_OK(builder.AppendNull());
ASSERT_OK(builder.Append(static_cast<c_type>(4)));

ASSERT_OK(builder.Finish(&result));

// Dictionary has 4 elements because the dictionary memo was not reset. This
// behavior will change after ARROW-6869
ASSERT_EQ(2, static_cast<const DictionaryArray&>(*result).dictionary()->length());
}

TEST(TestDictionaryBuilderAdHoc, AppendIndicesUpdateCapacity) {
DictionaryBuilder<Int32Type> builder;
Dictionary32Builder<Int32Type> builder32;

std::vector<int32_t> indices_i32 = {0, 1, 2};
std::vector<int64_t> indices_i64 = {0, 1, 2};

ASSERT_OK(builder.AppendIndices(indices_i64.data(), 3));
ASSERT_OK(builder32.AppendIndices(indices_i32.data(), 3));

ASSERT_LT(0, builder.capacity());
ASSERT_LT(0, builder32.capacity());
}

TEST(TestStringDictionaryBuilder, Basic) {
// Build the dictionary Array
StringDictionaryBuilder builder;
Expand Down