diff --git a/.gitignore b/.gitignore index a00cbba065a..29e0a85a58d 100644 --- a/.gitignore +++ b/.gitignore @@ -23,4 +23,5 @@ *.so *.dylib .build_cache_dir +.vscode MANIFEST diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc index 284bb57a02b..32f1f90ad08 100644 --- a/cpp/src/arrow/array.cc +++ b/cpp/src/arrow/array.cc @@ -493,6 +493,7 @@ Status MakePrimitiveArray(const std::shared_ptr& type, int64_t length, MAKE_PRIMITIVE_ARRAY_CASE(DOUBLE, DoubleArray); MAKE_PRIMITIVE_ARRAY_CASE(TIME, Int64Array); MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP, TimestampArray); + MAKE_PRIMITIVE_ARRAY_CASE(DATE, DateArray); default: return Status::NotImplemented(type->ToString()); } diff --git a/cpp/src/arrow/ipc/ipc-adapter-test.cc b/cpp/src/arrow/ipc/ipc-adapter-test.cc index 89993638932..289c95d3e40 100644 --- a/cpp/src/arrow/ipc/ipc-adapter-test.cc +++ b/cpp/src/arrow/ipc/ipc-adapter-test.cc @@ -175,7 +175,9 @@ INSTANTIATE_TEST_CASE_P( RoundTripTests, TestRecordBatchParam, ::testing::Values(&MakeIntRecordBatch, &MakeStringTypesRecordBatch, &MakeNonNullRecordBatch, &MakeZeroLengthRecordBatch, &MakeListRecordBatch, - &MakeDeeplyNestedList, &MakeStruct, &MakeUnion, &MakeDictionary)); + &MakeDeeplyNestedList, &MakeStruct, &MakeUnion, &MakeDictionary, + &MakeDateRecordBatch, // &MakeTimeRecordBatch, +)); void TestGetRecordBatchSize(std::shared_ptr batch) { ipc::MockOutputStream mock; diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc index 0458b85f007..0c485deac6b 100644 --- a/cpp/src/arrow/ipc/json-internal.cc +++ b/cpp/src/arrow/ipc/json-internal.cc @@ -521,6 +521,20 @@ class JsonArrayWriter : public ArrayVisitor { Status Visit(const BinaryArray& array) override { return WriteVarBytes(array); } + Status Visit(const DateArray& array) override { return WritePrimitive(array); } + + Status Visit(const TimeArray& array) override { return WritePrimitive(array); } + + Status Visit(const TimestampArray& array) override { return WritePrimitive(array); } + + Status Visit(const IntervalArray& array) override { + return Status::NotImplemented("interval"); + } + + Status Visit(const DecimalArray& array) override { + return Status::NotImplemented("decimal"); + } + Status Visit(const ListArray& array) override { WriteValidityField(array); WriteIntegerField("OFFSET", array.raw_value_offsets(), array.length() + 1); @@ -829,7 +843,10 @@ class JsonArrayReader { template typename std::enable_if::value || - std::is_base_of::value, + std::is_base_of::value || + std::is_base_of::value || + std::is_base_of::value || + std::is_base_of::value, Status>::type ReadArray(const RjObject& json_array, int32_t length, const std::vector& is_valid, const std::shared_ptr& type, std::shared_ptr* array) { @@ -939,6 +956,7 @@ class JsonArrayReader { return Status::OK(); } + template typename std::enable_if::value, Status>::type ReadArray( const RjObject& json_array, int32_t length, const std::vector& is_valid, @@ -1081,9 +1099,9 @@ class JsonArrayReader { TYPE_CASE(DoubleType); TYPE_CASE(StringType); TYPE_CASE(BinaryType); - NOT_IMPLEMENTED_CASE(DATE); - NOT_IMPLEMENTED_CASE(TIMESTAMP); - NOT_IMPLEMENTED_CASE(TIME); + TYPE_CASE(DateType); + TYPE_CASE(TimestampType); + TYPE_CASE(TimeType); NOT_IMPLEMENTED_CASE(INTERVAL); TYPE_CASE(ListType); TYPE_CASE(StructType); diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index 17a3a5fafe6..ebe6ee59048 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -78,6 +78,22 @@ static Status FloatFromFlatuffer( return Status::OK(); } +static inline TimeUnit FlatbufferToTimeUnit(flatbuf::TimeUnit unit) { + switch (unit) { + case flatbuf::TimeUnit_SECOND: + return TimeUnit::SECOND; + case flatbuf::TimeUnit_MILLISECOND: + return TimeUnit::MILLI; + case flatbuf::TimeUnit_MICROSECOND: + return TimeUnit::MICRO; + case flatbuf::TimeUnit_NANOSECOND: + return TimeUnit::NANO; + } + + return TimeUnit::SECOND; // Default +} + + // Forward declaration static Status FieldToFlatbuffer(FBB& fbb, const std::shared_ptr& field, DictionaryMemo* dictionary_memo, FieldOffset* offset); @@ -165,11 +181,31 @@ static Status UnionToFlatBuffer(FBB& fbb, const std::shared_ptr& type, return Status::OK(); } +static inline flatbuf::TimeUnit TimeUnitToFlatbuffer(TimeUnit unit) { + switch (unit) { + case TimeUnit::SECOND: + return flatbuf::TimeUnit_SECOND; + case TimeUnit::MILLI: + return flatbuf::TimeUnit_MILLISECOND; + case TimeUnit::MICRO: + return flatbuf::TimeUnit_MICROSECOND; + case TimeUnit::NANO: + return flatbuf::TimeUnit_NANOSECOND; + } + + return flatbuf::TimeUnit_SECOND; // Default +} + +#define TIME_TO_FB(fbb, unit, type) \ + flatbuf::Create ## type(fbb, TimeUnitToFlatbuffer(unit)).Union(); + + #define INT_TO_FB_CASE(BIT_WIDTH, IS_SIGNED) \ *out_type = flatbuf::Type_Int; \ *offset = IntToFlatbuffer(fbb, BIT_WIDTH, IS_SIGNED); \ break; + static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data, const std::vector>& children, std::shared_ptr* out) { switch (type) { @@ -190,7 +226,30 @@ static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data, *out = boolean(); return Status::OK(); case flatbuf::Type_Decimal: - case flatbuf::Type_Timestamp: + return Status::NotImplemented("Type Decimal is not implemented"); + case flatbuf::Type_Date: + *out = date(); + return Status::OK(); + case flatbuf::Type_Time: { + auto unit = static_cast(type_data)->unit(); + if ((unit < flatbuf::TimeUnit_MIN) || (unit > flatbuf::TimeUnit_MAX)) { + std::stringstream ss; + ss << "Unknown TimeUnit: " << unit << std::endl; + return Status::Invalid(ss.str()); + } + *out = time(FlatbufferToTimeUnit(unit)); + return Status::OK(); + } + case flatbuf::Type_Timestamp: { + auto unit = static_cast(type_data)->unit(); + if ((unit < flatbuf::TimeUnit_MIN) || (unit > flatbuf::TimeUnit_MAX)) { + std::stringstream ss; + ss << "Unknown TimeUnit: " << unit << std::endl; + return Status::Invalid(ss.str()); + } + *out = timestamp(FlatbufferToTimeUnit(unit)); + return Status::OK(); + } case flatbuf::Type_List: if (children.size() != 1) { return Status::Invalid("List must have exactly 1 child field"); @@ -292,6 +351,22 @@ static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr& type, case Type::UNION: *out_type = flatbuf::Type_Union; return UnionToFlatBuffer(fbb, type, children, dictionary_memo, offset); + case Type::DATE: + *out_type = flatbuf::Type_Date; + *offset = flatbuf::CreateDate(fbb).Union(); + break; + case Type::TIME: { + auto& unit = static_cast(*type).unit; + *out_type = flatbuf::Type_Time; + *offset = TIME_TO_FB(fbb, unit, Time); + } + break; + case Type::TIMESTAMP: { + auto& unit = static_cast(*type).unit; + *out_type = flatbuf::Type_Timestamp; + *offset = TIME_TO_FB(fbb, unit, Timestamp); + } + break; default: *out_type = flatbuf::Type_NONE; // Make clang-tidy happy std::stringstream ss; diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h index dc823662ee1..ee48a87c5be 100644 --- a/cpp/src/arrow/ipc/test-common.h +++ b/cpp/src/arrow/ipc/test-common.h @@ -425,6 +425,36 @@ Status MakeDictionary(std::shared_ptr* out) { return Status::OK(); } +Status MakeDateRecordBatch(std::shared_ptr* out) { + // Make the schema + auto f0 = field("f0", date()); + std::shared_ptr schema(new Schema({f0})); + + // Example data + std::shared_ptr date_array; + std::vector is_valid{true, true, false, true, true}; + std::vector values{0, -7, 636390, 706397, 736390}; + ArrayFromVector(is_valid, values, &date_array); + + out->reset(new RecordBatch(schema, values.size(), {date_array})); + return Status::OK(); +} + +Status MakeTimeRecordBatch(std::shared_ptr* out) { + // Make the schema + auto f0 = field("t0", timestamp(TimeUnit::MILLI)); + std::shared_ptr schema(new Schema({f0})); + + // Example data + std::shared_ptr time_array; + std::vector is_valid{true, true, false, true, true}; + std::vector values{0, -27, 390, 7097, 36390}; + ArrayFromVector(is_valid, values, &time_array); + + out->reset(new RecordBatch(schema, values.size(), {time_array})); + return Status::OK(); +} + } // namespace ipc } // namespace arrow diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index 7e69e42800e..5808adcfbd0 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -145,13 +145,11 @@ class ArrayPrinter : public ArrayVisitor { Status Visit(const BinaryArray& array) override { return WriteVarBytes(array); } - Status Visit(const DateArray& array) override { return Status::NotImplemented("date"); } + Status Visit(const DateArray& array) override { return WritePrimitive(array); } - Status Visit(const TimeArray& array) override { return Status::NotImplemented("time"); } + Status Visit(const TimeArray& array) override { return WritePrimitive(array); } - Status Visit(const TimestampArray& array) override { - return Status::NotImplemented("timestamp"); - } + Status Visit(const TimestampArray& array) override { return WritePrimitive(array); } Status Visit(const IntervalArray& array) override { return Status::NotImplemented("interval"); diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 9b1ab3288eb..5a84f027ee9 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -67,7 +67,7 @@ struct Type { // Variable-length bytes (no guarantee of UTF8-ness) BINARY, - // By default, int32 days since the UNIX epoch + // By default, int64 days since the UNIX epoch DATE, // Exact timestamp encoded with int64 since UNIX epoch @@ -439,7 +439,12 @@ struct ARROW_EXPORT DateType : public FixedWidthType { static std::string name() { return "date"; } }; -enum class TimeUnit : char { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 }; +enum class TimeUnit : char { + SECOND = 0, + MILLI = 1, + MICRO = 2, + NANO = 3, +}; struct ARROW_EXPORT TimeType : public FixedWidthType { static constexpr Type::type type_id = Type::TIME; diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h index fc4ad3d87d8..42e7586ef5a 100644 --- a/cpp/src/arrow/type_fwd.h +++ b/cpp/src/arrow/type_fwd.h @@ -88,21 +88,12 @@ _NUMERIC_TYPE_DECL(UInt64); _NUMERIC_TYPE_DECL(HalfFloat); _NUMERIC_TYPE_DECL(Float); _NUMERIC_TYPE_DECL(Double); +_NUMERIC_TYPE_DECL(Date); +_NUMERIC_TYPE_DECL(Time); +_NUMERIC_TYPE_DECL(Timestamp); #undef _NUMERIC_TYPE_DECL -struct DateType; -using DateArray = NumericArray; -using DateBuilder = NumericBuilder; - -struct TimeType; -using TimeArray = NumericArray; -using TimeBuilder = NumericBuilder; - -struct TimestampType; -using TimestampArray = NumericArray; -using TimestampBuilder = NumericBuilder; - struct IntervalType; using IntervalArray = NumericArray; @@ -125,6 +116,7 @@ std::shared_ptr ARROW_EXPORT float64(); std::shared_ptr ARROW_EXPORT utf8(); std::shared_ptr ARROW_EXPORT binary(); std::shared_ptr ARROW_EXPORT date(); +std::shared_ptr ARROW_EXPORT time(); } // namespace arrow diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index d6687c11bcf..c21fa4bf6d4 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -121,7 +121,7 @@ struct TypeTraits { template <> struct TypeTraits { using ArrayType = DateArray; - // using BuilderType = DateBuilder; + using BuilderType = DateBuilder; static inline int64_t bytes_required(int64_t elements) { return elements * sizeof(int64_t); @@ -133,7 +133,7 @@ struct TypeTraits { template <> struct TypeTraits { using ArrayType = TimestampArray; - // using BuilderType = TimestampBuilder; + using BuilderType = TimestampBuilder; static inline int64_t bytes_required(int64_t elements) { return elements * sizeof(int64_t); @@ -144,7 +144,7 @@ struct TypeTraits { template <> struct TypeTraits { using ArrayType = TimeArray; - // using BuilderType = TimestampBuilder; + using BuilderType = TimeBuilder; static inline int64_t bytes_required(int64_t elements) { return elements * sizeof(int64_t);