Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 146 additions & 0 deletions cpp/src/arrow/array-list-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
namespace arrow {

using internal::checked_cast;
using internal::checked_pointer_cast;

// ----------------------------------------------------------------------
// List tests
Expand Down Expand Up @@ -340,6 +341,151 @@ TEST_F(TestListArray, TestBuilderPreserveFieleName) {
ASSERT_EQ("counts", type.value_field()->name());
}

// ----------------------------------------------------------------------
// Map tests

class TestMapArray : public TestBuilder {
public:
void SetUp() {
TestBuilder::SetUp();

key_type_ = utf8();
value_type_ = int32();
type_ = map(key_type_, value_type_);

std::unique_ptr<ArrayBuilder> tmp;
ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
builder_ = checked_pointer_cast<MapBuilder>(std::move(tmp));
}

void Done() {
std::shared_ptr<Array> out;
FinishAndCheckPadding(builder_.get(), &out);
result_ = std::dynamic_pointer_cast<MapArray>(out);
}

protected:
std::shared_ptr<DataType> value_type_, key_type_;

std::shared_ptr<MapBuilder> builder_;
std::shared_ptr<MapArray> result_;
};

TEST_F(TestMapArray, Equality) {
auto& kb = checked_cast<StringBuilder&>(*builder_->key_builder());
auto& ib = checked_cast<Int32Builder&>(*builder_->item_builder());

std::shared_ptr<Array> array, equal_array, unequal_array;
std::vector<int32_t> equal_offsets = {0, 1, 2, 5, 6, 7, 8, 10};
std::vector<util::string_view> equal_keys = {"a", "a", "a", "b", "c",
"a", "a", "a", "a", "b"};
std::vector<int32_t> equal_values = {1, 2, 3, 4, 5, 2, 2, 2, 5, 6};
std::vector<int32_t> unequal_offsets = {0, 1, 4, 7};
std::vector<util::string_view> unequal_keys = {"a", "a", "b", "c", "a", "b", "c"};
std::vector<int32_t> unequal_values = {1, 2, 2, 2, 3, 4, 5};

// setup two equal arrays
for (auto out : {&array, &equal_array}) {
ASSERT_OK(builder_->AppendValues(equal_offsets.data(), equal_offsets.size()));
for (auto&& key : equal_keys) {
ASSERT_OK(kb.Append(key));
}
ASSERT_OK(ib.AppendValues(equal_values.data(), equal_values.size()));
ASSERT_OK(builder_->Finish(out));
}

// now an unequal one
ASSERT_OK(builder_->AppendValues(unequal_offsets.data(), unequal_offsets.size()));
for (auto&& key : unequal_keys) {
ASSERT_OK(kb.Append(key));
}
ASSERT_OK(ib.AppendValues(unequal_values.data(), unequal_values.size()));
ASSERT_OK(builder_->Finish(&unequal_array));

// Test array equality
EXPECT_TRUE(array->Equals(array));
EXPECT_TRUE(array->Equals(equal_array));
EXPECT_TRUE(equal_array->Equals(array));
EXPECT_FALSE(equal_array->Equals(unequal_array));
EXPECT_FALSE(unequal_array->Equals(equal_array));

// Test range equality
EXPECT_TRUE(array->RangeEquals(0, 1, 0, unequal_array));
EXPECT_FALSE(array->RangeEquals(0, 2, 0, unequal_array));
EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array));
EXPECT_TRUE(array->RangeEquals(2, 3, 2, unequal_array));
}

TEST_F(TestMapArray, BuildingIntToInt) {
auto type = map(int16(), int16());

auto expected_keys = ArrayFromJSON(int16(), R"([
0, 1, 2, 3, 4, 5,
0, 1, 2, 3, 4, 5
])");
auto expected_items = ArrayFromJSON(int16(), R"([
1, 1, 2, 3, 5, 8,
null, null, 0, 1, null, 2
])");
auto expected_offsets = ArrayFromJSON(int32(), "[0, 6, 6, 12, 12]")->data()->buffers[1];
auto expected_null_bitmap =
ArrayFromJSON(boolean(), "[1, 0, 1, 1]")->data()->buffers[1];

MapArray expected(type, 4, expected_offsets, expected_keys, expected_items,
expected_null_bitmap, 1, 0);

auto key_builder = std::make_shared<Int16Builder>();
auto item_builder = std::make_shared<Int16Builder>();
MapBuilder map_builder(default_memory_pool(), key_builder, item_builder);

std::shared_ptr<Array> actual;
ASSERT_OK(map_builder.Append());
ASSERT_OK(key_builder->AppendValues({0, 1, 2, 3, 4, 5}));
ASSERT_OK(item_builder->AppendValues({1, 1, 2, 3, 5, 8}));
ASSERT_OK(map_builder.AppendNull());
ASSERT_OK(map_builder.Append());
ASSERT_OK(key_builder->AppendValues({0, 1, 2, 3, 4, 5}));
ASSERT_OK(item_builder->AppendValues({-1, -1, 0, 1, -1, 2}, {0, 0, 1, 1, 0, 1}));
ASSERT_OK(map_builder.Append());
ASSERT_OK(map_builder.Finish(&actual));
ASSERT_OK(ValidateArray(*actual));

ASSERT_ARRAYS_EQUAL(*actual, expected);
}

TEST_F(TestMapArray, BuildingStringToInt) {
auto type = map(utf8(), int32());

std::vector<int32_t> offsets = {0, 2, 2, 3, 3};
auto expected_keys = ArrayFromJSON(utf8(), R"(["joe", "mark", "cap"])");
auto expected_values = ArrayFromJSON(int32(), "[0, null, 8]");
std::shared_ptr<Buffer> expected_null_bitmap;
ASSERT_OK(
BitUtil::BytesToBits({1, 0, 1, 1}, default_memory_pool(), &expected_null_bitmap));
MapArray expected(type, 4, Buffer::Wrap(offsets), expected_keys, expected_values,
expected_null_bitmap, 1);

auto key_builder = std::make_shared<StringBuilder>();
auto item_builder = std::make_shared<Int32Builder>();
MapBuilder map_builder(default_memory_pool(), key_builder, item_builder);

std::shared_ptr<Array> actual;
ASSERT_OK(map_builder.Append());
ASSERT_OK(key_builder->Append("joe"));
ASSERT_OK(item_builder->Append(0));
ASSERT_OK(key_builder->Append("mark"));
ASSERT_OK(item_builder->AppendNull());
ASSERT_OK(map_builder.AppendNull());
ASSERT_OK(map_builder.Append());
ASSERT_OK(key_builder->Append("cap"));
ASSERT_OK(item_builder->Append(8));
ASSERT_OK(map_builder.Append());
ASSERT_OK(map_builder.Finish(&actual));
ASSERT_OK(ValidateArray(*actual));

ASSERT_ARRAYS_EQUAL(*actual, expected);
}

// ----------------------------------------------------------------------
// FixedSizeList tests

Expand Down
89 changes: 81 additions & 8 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,7 @@ BooleanArray::BooleanArray(int64_t length, const std::shared_ptr<Buffer>& data,
// ----------------------------------------------------------------------
// ListArray

ListArray::ListArray(const std::shared_ptr<ArrayData>& data) {
DCHECK_EQ(data->type->id(), Type::LIST);
SetData(data);
}
ListArray::ListArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }

ListArray::ListArray(const std::shared_ptr<DataType>& type, int64_t length,
const std::shared_ptr<Buffer>& value_offsets,
Expand Down Expand Up @@ -275,6 +272,8 @@ Status ListArray::FromArrays(const Array& offsets, const Array& values, MemoryPo
void ListArray::SetData(const std::shared_ptr<ArrayData>& data) {
this->Array::SetData(data);
DCHECK_EQ(data->buffers.size(), 2);
DCHECK(data->type->id() == Type::LIST);
list_type_ = checked_cast<const ListType*>(data->type.get());

auto value_offsets = data->buffers[1];
raw_value_offsets_ = value_offsets == nullptr
Expand All @@ -285,16 +284,47 @@ void ListArray::SetData(const std::shared_ptr<ArrayData>& data) {
values_ = MakeArray(data_->child_data[0]);
}

const ListType* ListArray::list_type() const {
return checked_cast<const ListType*>(data_->type.get());
}

std::shared_ptr<DataType> ListArray::value_type() const {
return list_type()->value_type();
}

std::shared_ptr<Array> ListArray::values() const { return values_; }

// ----------------------------------------------------------------------
// MapArray

MapArray::MapArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }

MapArray::MapArray(const std::shared_ptr<DataType>& type, int64_t length,
const std::shared_ptr<Buffer>& offsets,
const std::shared_ptr<Array>& keys,
const std::shared_ptr<Array>& values,
const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
int64_t offset) {
auto pair_data = ArrayData::Make(type->children()[0]->type(), keys->data()->length,
{nullptr}, {keys->data(), values->data()}, 0, offset);
auto map_data = ArrayData::Make(type, length, {null_bitmap, offsets}, {pair_data},
null_count, offset);
SetData(map_data);
}

void MapArray::SetData(const std::shared_ptr<ArrayData>& data) {
DCHECK_EQ(data->type->id(), Type::MAP);
auto pair_data = data->child_data[0];
DCHECK_EQ(pair_data->type->id(), Type::STRUCT);
DCHECK_EQ(pair_data->null_count, 0);
DCHECK_EQ(pair_data->child_data.size(), 2);
DCHECK_EQ(pair_data->child_data[0]->null_count, 0);

auto pair_list_data = data->Copy();
pair_list_data->type = list(pair_data->type);
this->ListArray::SetData(pair_list_data);
data_->type = data->type;

keys_ = MakeArray(pair_data->child_data[0]);
items_ = MakeArray(pair_data->child_data[1]);
}

// ----------------------------------------------------------------------
// FixedSizeListArray

Expand Down Expand Up @@ -904,6 +934,49 @@ struct ValidateVisitor {
return ValidateOffsets(array);
}

Status Visit(const MapArray& array) {
if (array.length() < 0) {
return Status::Invalid("Length was negative");
}

auto value_offsets = array.value_offsets();
if (array.length() && !value_offsets) {
return Status::Invalid("value_offsets_ was null");
}
if (value_offsets->size() / static_cast<int>(sizeof(int32_t)) < array.length()) {
return Status::Invalid("offset buffer size (bytes): ", value_offsets->size(),
" isn't large enough for length: ", array.length());
}

if (!array.keys()) {
return Status::Invalid("keys was null");
}
const Status key_valid = ValidateArray(*array.values());
if (!key_valid.ok()) {
return Status::Invalid("key array invalid: ", key_valid.ToString());
}

if (!array.values()) {
return Status::Invalid("values was null");
}
const Status values_valid = ValidateArray(*array.values());
if (!values_valid.ok()) {
return Status::Invalid("values array invalid: ", values_valid.ToString());
}

const int32_t last_offset = array.value_offset(array.length());
if (array.values()->length() != last_offset) {
return Status::Invalid("Final offset invariant not equal to values length: ",
last_offset, "!=", array.values()->length());
}
if (array.keys()->length() != last_offset) {
return Status::Invalid("Final offset invariant not equal to keys length: ",
last_offset, "!=", array.keys()->length());
}

return ValidateOffsets(array);
}

Status Visit(const FixedSizeListArray& array) {
if (array.length() < 0) {
return Status::Invalid("Length was negative");
Expand Down
39 changes: 38 additions & 1 deletion cpp/src/arrow/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ class ARROW_EXPORT ListArray : public Array {
static Status FromArrays(const Array& offsets, const Array& values, MemoryPool* pool,
std::shared_ptr<Array>* out);

const ListType* list_type() const;
const ListType* list_type() const { return list_type_; }

/// \brief Return array object containing the list's values
std::shared_ptr<Array> values() const;
Expand All @@ -521,13 +521,50 @@ class ARROW_EXPORT ListArray : public Array {
}

protected:
// this constructor defers SetData to a derived array class
ListArray() = default;
void SetData(const std::shared_ptr<ArrayData>& data);
const int32_t* raw_value_offsets_;

private:
const ListType* list_type_;
std::shared_ptr<Array> values_;
};

// ----------------------------------------------------------------------
// MapArray

/// Concrete Array class for map data
///
/// NB: "value" in this context refers to a pair of a key and the correspondint item
class ARROW_EXPORT MapArray : public ListArray {
public:
using TypeClass = MapType;

explicit MapArray(const std::shared_ptr<ArrayData>& data);

MapArray(const std::shared_ptr<DataType>& type, int64_t length,
const std::shared_ptr<Buffer>& value_offsets,
const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& values,
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount, int64_t offset = 0);

const MapType* map_type() const { return map_type_; }

/// \brief Return array object containing all map keys
std::shared_ptr<Array> keys() const { return keys_; }

/// \brief Return array object containing all mapped items
std::shared_ptr<Array> items() const { return items_; }

protected:
void SetData(const std::shared_ptr<ArrayData>& data);

private:
const MapType* map_type_;
std::shared_ptr<Array> keys_, items_;
};

// ----------------------------------------------------------------------
// FixedSizeListArray

Expand Down
Loading