Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 20 additions & 55 deletions cpp/src/arrow/ipc/adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -309,66 +309,31 @@ class RecordBatchWriter : public ArrayVisitor {
return Status::OK();
}

Status Visit(const Int8Array& array) override {
return VisitFixedWidth<Int8Array>(array);
}

Status Visit(const Int16Array& array) override {
return VisitFixedWidth<Int16Array>(array);
}

Status Visit(const Int32Array& array) override {
return VisitFixedWidth<Int32Array>(array);
}

Status Visit(const Int64Array& array) override {
return VisitFixedWidth<Int64Array>(array);
}

Status Visit(const UInt8Array& array) override {
return VisitFixedWidth<UInt8Array>(array);
}

Status Visit(const UInt16Array& array) override {
return VisitFixedWidth<UInt16Array>(array);
}

Status Visit(const UInt32Array& array) override {
return VisitFixedWidth<UInt32Array>(array);
}

Status Visit(const UInt64Array& array) override {
return VisitFixedWidth<UInt64Array>(array);
}

Status Visit(const HalfFloatArray& array) override {
return VisitFixedWidth<HalfFloatArray>(array);
}

Status Visit(const FloatArray& array) override {
return VisitFixedWidth<FloatArray>(array);
}

Status Visit(const DoubleArray& array) override {
return VisitFixedWidth<DoubleArray>(array);
}
#define VISIT_FIXED_WIDTH(TYPE) \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a sidenote: When someone has a minute, we should think of how we could do this using std::enable_if instead of macros. I expect this to be much more compact.

Status Visit(const TYPE& array) override { return VisitFixedWidth<TYPE>(array); }

VISIT_FIXED_WIDTH(Int8Array);
VISIT_FIXED_WIDTH(Int16Array);
VISIT_FIXED_WIDTH(Int32Array);
VISIT_FIXED_WIDTH(Int64Array);
VISIT_FIXED_WIDTH(UInt8Array);
VISIT_FIXED_WIDTH(UInt16Array);
VISIT_FIXED_WIDTH(UInt32Array);
VISIT_FIXED_WIDTH(UInt64Array);
VISIT_FIXED_WIDTH(HalfFloatArray);
VISIT_FIXED_WIDTH(FloatArray);
VISIT_FIXED_WIDTH(DoubleArray);
VISIT_FIXED_WIDTH(DateArray);
VISIT_FIXED_WIDTH(Date32Array);
VISIT_FIXED_WIDTH(TimeArray);
VISIT_FIXED_WIDTH(TimestampArray);

#undef VISIT_FIXED_WIDTH

Status Visit(const StringArray& array) override { return VisitBinary(array); }

Status Visit(const BinaryArray& array) override { return VisitBinary(array); }

Status Visit(const DateArray& array) override {
return VisitFixedWidth<DateArray>(array);
}

Status Visit(const TimeArray& array) override {
return VisitFixedWidth<TimeArray>(array);
}

Status Visit(const TimestampArray& array) override {
return VisitFixedWidth<TimestampArray>(array);
}

Status Visit(const ListArray& array) override {
std::shared_ptr<Buffer> value_offsets;
RETURN_NOT_OK(GetZeroBasedValueOffsets<ListArray>(array, &value_offsets));
Expand Down
38 changes: 0 additions & 38 deletions cpp/src/arrow/ipc/feather-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -344,44 +344,6 @@ TEST_F(TestTableWriter, PrimitiveRoundTrip) {
ASSERT_EQ("f1", col->name());
}

Status MakeDictionaryFlat(std::shared_ptr<RecordBatch>* out) {
const int64_t length = 6;

std::vector<bool> is_valid = {true, true, false, true, true, true};
std::shared_ptr<Array> dict1, dict2;

std::vector<std::string> dict1_values = {"foo", "bar", "baz"};
std::vector<std::string> dict2_values = {"foo", "bar", "baz", "qux"};

ArrayFromVector<StringType, std::string>(dict1_values, &dict1);
ArrayFromVector<StringType, std::string>(dict2_values, &dict2);

auto f0_type = arrow::dictionary(arrow::int32(), dict1);
auto f1_type = arrow::dictionary(arrow::int8(), dict1);
auto f2_type = arrow::dictionary(arrow::int32(), dict2);

std::shared_ptr<Array> indices0, indices1, indices2;
std::vector<int32_t> indices0_values = {1, 2, -1, 0, 2, 0};
std::vector<int8_t> indices1_values = {0, 0, 2, 2, 1, 1};
std::vector<int32_t> indices2_values = {3, 0, 2, 1, 0, 2};

ArrayFromVector<Int32Type, int32_t>(is_valid, indices0_values, &indices0);
ArrayFromVector<Int8Type, int8_t>(is_valid, indices1_values, &indices1);
ArrayFromVector<Int32Type, int32_t>(is_valid, indices2_values, &indices2);

auto a0 = std::make_shared<DictionaryArray>(f0_type, indices0);
auto a1 = std::make_shared<DictionaryArray>(f1_type, indices1);
auto a2 = std::make_shared<DictionaryArray>(f2_type, indices2);

// construct batch
std::shared_ptr<Schema> schema(new Schema(
{field("dict1", f0_type), field("sparse", f1_type), field("dense", f2_type)}));

std::vector<std::shared_ptr<Array>> arrays = {a0, a1, a2};
out->reset(new RecordBatch(schema, length, arrays));
return Status::OK();
}

TEST_F(TestTableWriter, CategoryRoundtrip) {
std::shared_ptr<RecordBatch> batch;
ASSERT_OK(MakeDictionaryFlat(&batch));
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/ipc/ipc-adapter-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ INSTANTIATE_TEST_CASE_P(
RoundTripTests, TestRecordBatchParam,
::testing::Values(&MakeIntRecordBatch, &MakeStringTypesRecordBatch,
&MakeNonNullRecordBatch, &MakeZeroLengthRecordBatch, &MakeListRecordBatch,
&MakeDeeplyNestedList, &MakeStruct, &MakeUnion, &MakeDictionary));
&MakeDeeplyNestedList, &MakeStruct, &MakeUnion, &MakeDictionary, &MakeDates,
&MakeTimestamps, &MakeTimes));

void TestGetRecordBatchSize(std::shared_ptr<RecordBatch> batch) {
ipc::MockOutputStream mock;
Expand Down
97 changes: 97 additions & 0 deletions cpp/src/arrow/ipc/test-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,103 @@ Status MakeDictionary(std::shared_ptr<RecordBatch>* out) {
return Status::OK();
}

Status MakeDictionaryFlat(std::shared_ptr<RecordBatch>* out) {
const int64_t length = 6;

std::vector<bool> is_valid = {true, true, false, true, true, true};
std::shared_ptr<Array> dict1, dict2;

std::vector<std::string> dict1_values = {"foo", "bar", "baz"};
std::vector<std::string> dict2_values = {"foo", "bar", "baz", "qux"};

ArrayFromVector<StringType, std::string>(dict1_values, &dict1);
ArrayFromVector<StringType, std::string>(dict2_values, &dict2);

auto f0_type = arrow::dictionary(arrow::int32(), dict1);
auto f1_type = arrow::dictionary(arrow::int8(), dict1);
auto f2_type = arrow::dictionary(arrow::int32(), dict2);

std::shared_ptr<Array> indices0, indices1, indices2;
std::vector<int32_t> indices0_values = {1, 2, -1, 0, 2, 0};
std::vector<int8_t> indices1_values = {0, 0, 2, 2, 1, 1};
std::vector<int32_t> indices2_values = {3, 0, 2, 1, 0, 2};

ArrayFromVector<Int32Type, int32_t>(is_valid, indices0_values, &indices0);
ArrayFromVector<Int8Type, int8_t>(is_valid, indices1_values, &indices1);
ArrayFromVector<Int32Type, int32_t>(is_valid, indices2_values, &indices2);

auto a0 = std::make_shared<DictionaryArray>(f0_type, indices0);
auto a1 = std::make_shared<DictionaryArray>(f1_type, indices1);
auto a2 = std::make_shared<DictionaryArray>(f2_type, indices2);

// construct batch
std::shared_ptr<Schema> schema(new Schema(
{field("dict1", f0_type), field("sparse", f1_type), field("dense", f2_type)}));

std::vector<std::shared_ptr<Array>> arrays = {a0, a1, a2};
out->reset(new RecordBatch(schema, length, arrays));
return Status::OK();
}

Status MakeDates(std::shared_ptr<RecordBatch>* out) {
std::vector<bool> is_valid = {true, true, true, false, true, true, true};
auto f0 = field("f0", date32());
auto f1 = field("f1", date());
std::shared_ptr<Schema> schema(new Schema({f0, f1}));

std::vector<int64_t> date_values = {1489269000000, 1489270000000, 1489271000000,
1489272000000, 1489272000000, 1489273000000};
std::vector<int32_t> date32_values = {0, 1, 2, 3, 4, 5, 6};

std::shared_ptr<Array> date_array, date32_array;
ArrayFromVector<DateType, int64_t>(is_valid, date_values, &date_array);
ArrayFromVector<Date32Type, int32_t>(is_valid, date32_values, &date32_array);

std::vector<std::shared_ptr<Array>> arrays = {date32_array, date_array};
*out = std::make_shared<RecordBatch>(schema, date_array->length(), arrays);
return Status::OK();
}

Status MakeTimestamps(std::shared_ptr<RecordBatch>* out) {
std::vector<bool> is_valid = {true, true, true, false, true, true, true};
auto f0 = field("f0", timestamp(TimeUnit::MILLI));
auto f1 = field("f1", timestamp(TimeUnit::NANO));
auto f2 = field("f2", timestamp("US/Los_Angeles", TimeUnit::SECOND));
std::shared_ptr<Schema> schema(new Schema({f0, f1, f2}));

std::vector<int64_t> ts_values = {1489269000000, 1489270000000, 1489271000000,
1489272000000, 1489272000000, 1489273000000};

std::shared_ptr<Array> a0, a1, a2;
ArrayFromVector<TimestampType, int64_t>(f0->type, is_valid, ts_values, &a0);
ArrayFromVector<TimestampType, int64_t>(f1->type, is_valid, ts_values, &a1);
ArrayFromVector<TimestampType, int64_t>(f2->type, is_valid, ts_values, &a2);

ArrayVector arrays = {a0, a1, a2};
*out = std::make_shared<RecordBatch>(schema, a0->length(), arrays);
return Status::OK();
}

Status MakeTimes(std::shared_ptr<RecordBatch>* out) {
std::vector<bool> is_valid = {true, true, true, false, true, true, true};
auto f0 = field("f0", time(TimeUnit::MILLI));
auto f1 = field("f1", time(TimeUnit::NANO));
auto f2 = field("f2", time(TimeUnit::SECOND));
std::shared_ptr<Schema> schema(new Schema({f0, f1, f2}));

std::vector<int64_t> ts_values = {1489269000000, 1489270000000, 1489271000000,
1489272000000, 1489272000000, 1489273000000};

std::shared_ptr<Array> a0, a1, a2;
ArrayFromVector<TimeType, int64_t>(f0->type, is_valid, ts_values, &a0);
ArrayFromVector<TimeType, int64_t>(f1->type, is_valid, ts_values, &a1);
ArrayFromVector<TimeType, int64_t>(f2->type, is_valid, ts_values, &a2);

ArrayVector arrays = {a0, a1, a2};
*out = std::make_shared<RecordBatch>(schema, a0->length(), arrays);
return Status::OK();
}

} // namespace ipc
} // namespace arrow

Expand Down
18 changes: 18 additions & 0 deletions cpp/src/arrow/test-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@

namespace arrow {

using ArrayVector = std::vector<std::shared_ptr<Array>>;

namespace test {

template <typename T>
Expand Down Expand Up @@ -232,6 +234,22 @@ class TestBase : public ::testing::Test {
MemoryPool* pool_;
};

template <typename TYPE, typename C_TYPE>
void ArrayFromVector(const std::shared_ptr<DataType>& type,
const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
std::shared_ptr<Array>* out) {
MemoryPool* pool = default_memory_pool();
typename TypeTraits<TYPE>::BuilderType builder(pool, type);
for (size_t i = 0; i < values.size(); ++i) {
if (is_valid[i]) {
ASSERT_OK(builder.Append(values[i]));
} else {
ASSERT_OK(builder.AppendNull());
}
}
ASSERT_OK(builder.Finish(out));
}

template <typename TYPE, typename C_TYPE>
void ArrayFromVector(const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
std::shared_ptr<Array>* out) {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ struct ARROW_EXPORT Date32Type : public FixedWidthType {

Date32Type() : FixedWidthType(Type::DATE32) {}

int bit_width() const override { return static_cast<int>(sizeof(c_type) * 8); }
int bit_width() const override { return static_cast<int>(sizeof(c_type) * 4); }

Status Accept(TypeVisitor* visitor) const override;
std::string ToString() const override;
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/type_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ struct TypeTraits<Int64Type> {
template <>
struct TypeTraits<DateType> {
using ArrayType = DateArray;
// using BuilderType = DateBuilder;
using BuilderType = DateBuilder;

static inline int64_t bytes_required(int64_t elements) {
return elements * sizeof(int64_t);
Expand All @@ -145,7 +145,7 @@ struct TypeTraits<Date32Type> {
template <>
struct TypeTraits<TimestampType> {
using ArrayType = TimestampArray;
// using BuilderType = TimestampBuilder;
using BuilderType = TimestampBuilder;

static inline int64_t bytes_required(int64_t elements) {
return elements * sizeof(int64_t);
Expand All @@ -156,7 +156,7 @@ struct TypeTraits<TimestampType> {
template <>
struct TypeTraits<TimeType> {
using ArrayType = TimeArray;
// using BuilderType = TimestampBuilder;
using BuilderType = TimeBuilder;

static inline int64_t bytes_required(int64_t elements) {
return elements * sizeof(int64_t);
Expand Down