diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc index 7ab61f59f55..d13fa1e1081 100644 --- a/cpp/src/arrow/array.cc +++ b/cpp/src/arrow/array.cc @@ -148,6 +148,7 @@ template class NumericArray; template class NumericArray; template class NumericArray; template class NumericArray; +template class NumericArray; template class NumericArray; template class NumericArray; template class NumericArray; diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h index 1a4a9237a1f..26d53f7d758 100644 --- a/cpp/src/arrow/array.h +++ b/cpp/src/arrow/array.h @@ -468,6 +468,7 @@ extern template class ARROW_EXPORT NumericArray; extern template class ARROW_EXPORT NumericArray; extern template class ARROW_EXPORT NumericArray; extern template class ARROW_EXPORT NumericArray; +extern template class ARROW_EXPORT NumericArray; #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index 493b5e7ccab..1d94dbaa0e9 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -199,6 +199,7 @@ template class PrimitiveBuilder; template class PrimitiveBuilder; template class PrimitiveBuilder; template class PrimitiveBuilder; +template class PrimitiveBuilder; template class PrimitiveBuilder; template class PrimitiveBuilder; template class PrimitiveBuilder; @@ -411,6 +412,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr& type, BUILDER_CASE(INT32, Int32Builder); BUILDER_CASE(UINT64, UInt64Builder); BUILDER_CASE(INT64, Int64Builder); + BUILDER_CASE(DATE, DateBuilder); BUILDER_CASE(TIMESTAMP, TimestampBuilder); BUILDER_CASE(BOOL, BooleanBuilder); diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index 7162d31d246..205139849b4 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -220,6 +220,7 @@ using Int16Builder = NumericBuilder; using Int32Builder = NumericBuilder; using Int64Builder = NumericBuilder; using TimestampBuilder = NumericBuilder; +using DateBuilder = NumericBuilder; using HalfFloatBuilder = NumericBuilder; using FloatBuilder = NumericBuilder; diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index 5b172e41f68..4748cc3c04a 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -88,6 +88,10 @@ std::string StructType::ToString() const { return s.str(); } +std::string DateType::ToString() const { + return std::string("date"); +} + std::string UnionType::ToString() const { std::stringstream s; diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 8637081acd9..73005707c9e 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -413,14 +413,14 @@ struct ARROW_EXPORT UnionType : public DataType { struct ARROW_EXPORT DateType : public FixedWidthType { static constexpr Type::type type_id = Type::DATE; - using c_type = int32_t; + using c_type = int64_t; DateType() : FixedWidthType(Type::DATE) {} int bit_width() const override { return sizeof(c_type) * 8; } Status Accept(TypeVisitor* visitor) const override; - std::string ToString() const override { return name(); } + std::string ToString() const override; static std::string name() { return "date"; } }; diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h index 6d660f4fdee..a9db32df54d 100644 --- a/cpp/src/arrow/type_fwd.h +++ b/cpp/src/arrow/type_fwd.h @@ -87,13 +87,15 @@ _NUMERIC_TYPE_DECL(Double); #undef _NUMERIC_TYPE_DECL struct DateType; -class DateArray; +using DateArray = NumericArray; +using DateBuilder = NumericBuilder; struct TimeType; class TimeArray; struct TimestampType; using TimestampArray = NumericArray; +using TimestampBuilder = NumericBuilder; struct IntervalType; using IntervalArray = NumericArray; diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 3aaec0bd593..5616018d934 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -90,6 +90,14 @@ struct TypeTraits { static inline int bytes_required(int elements) { return elements * sizeof(int64_t); } }; +template <> +struct TypeTraits { + using ArrayType = DateArray; + // using BuilderType = DateBuilder; + + static inline int bytes_required(int elements) { return elements * sizeof(int64_t); } +}; + template <> struct TypeTraits { using ArrayType = TimestampArray; diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 5af93fb5865..39ba4c72e7d 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -45,6 +45,7 @@ from pyarrow.schema import (null, bool_, int8, int16, int32, int64, uint8, uint16, uint32, uint64, + timestamp, date, float_, double, string, list_, struct, field, DataType, Field, Schema, schema) diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx index 6c862751fc2..8b0603996e3 100644 --- a/python/pyarrow/array.pyx +++ b/python/pyarrow/array.pyx @@ -212,6 +212,10 @@ cdef class UInt64Array(NumericArray): pass +cdef class DateArray(NumericArray): + pass + + cdef class FloatArray(NumericArray): pass @@ -239,6 +243,7 @@ cdef dict _array_classes = { Type_INT16: Int16Array, Type_INT32: Int32Array, Type_INT64: Int64Array, + Type_DATE: DateArray, Type_FLOAT: FloatArray, Type_DOUBLE: DoubleArray, Type_LIST: ListArray, @@ -278,7 +283,7 @@ def from_pylist(object list_obj, DataType type=None): if type is None: check_status(pyarrow.ConvertPySequence(list_obj, &sp_array)) else: - raise NotImplementedError + raise NotImplementedError() return box_arrow_array(sp_array) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 15781ced443..419dd74846c 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -39,11 +39,18 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: Type_DOUBLE" arrow::Type::DOUBLE" Type_TIMESTAMP" arrow::Type::TIMESTAMP" + Type_DATE" arrow::Type::DATE" Type_STRING" arrow::Type::STRING" Type_LIST" arrow::Type::LIST" Type_STRUCT" arrow::Type::STRUCT" + enum TimeUnit" arrow::TimeUnit": + TimeUnit_SECOND" arrow::TimeUnit::SECOND" + TimeUnit_MILLI" arrow::TimeUnit::MILLI" + TimeUnit_MICRO" arrow::TimeUnit::MICRO" + TimeUnit_NANO" arrow::TimeUnit::NANO" + cdef cppclass CDataType" arrow::DataType": Type type @@ -74,6 +81,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CStringType" arrow::StringType"(CDataType): pass + cdef cppclass CTimestampType" arrow::TimestampType"(CDataType): + TimeUnit unit + cdef cppclass CField" arrow::Field": c_string name shared_ptr[CDataType] type @@ -132,6 +142,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CInt64Array" arrow::Int64Array"(CArray): int64_t Value(int i) + cdef cppclass CDateArray" arrow::DateArray"(CArray): + int64_t Value(int i) + + cdef cppclass CTimestampArray" arrow::TimestampArray"(CArray): + int64_t Value(int i) + cdef cppclass CFloatArray" arrow::FloatArray"(CArray): float Value(int i) diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx index 0d391e5f26b..847d2019066 100644 --- a/python/pyarrow/scalar.pyx +++ b/python/pyarrow/scalar.pyx @@ -20,6 +20,9 @@ from pyarrow.schema cimport DataType, box_data_type from pyarrow.compat import frombytes import pyarrow.schema as schema +import datetime + + NA = None cdef class NAType(Scalar): @@ -120,6 +123,32 @@ cdef class UInt64Value(ArrayValue): return ap.Value(self.index) +cdef class DateValue(ArrayValue): + + def as_py(self): + cdef CDateArray* ap = self.sp_array.get() + return datetime.date.fromtimestamp(ap.Value(self.index) / 1000) + + +cdef class TimestampValue(ArrayValue): + + def as_py(self): + cdef: + CTimestampArray* ap = self.sp_array.get() + CTimestampType* dtype = ap.type().get() + int64_t val = ap.Value(self.index) + + if dtype.unit == TimeUnit_SECOND: + return datetime.datetime.utcfromtimestamp(val) + elif dtype.unit == TimeUnit_MILLI: + return datetime.datetime.utcfromtimestamp(float(val) / 1000) + elif dtype.unit == TimeUnit_MICRO: + return datetime.datetime.utcfromtimestamp(float(val) / 1000000) + else: + # TimeUnit_NANO + raise NotImplementedError("Cannot convert nanosecond timestamps to datetime.datetime") + + cdef class FloatValue(ArrayValue): def as_py(self): @@ -184,6 +213,8 @@ cdef dict _scalar_classes = { Type_INT16: Int16Value, Type_INT32: Int32Value, Type_INT64: Int64Value, + Type_DATE: DateValue, + Type_TIMESTAMP: TimestampValue, Type_FLOAT: FloatValue, Type_DOUBLE: DoubleValue, Type_LIST: ListValue, diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx index e0badb97641..d05ac9ebc01 100644 --- a/python/pyarrow/schema.pyx +++ b/python/pyarrow/schema.pyx @@ -164,6 +164,7 @@ cdef set PRIMITIVE_TYPES = set([ Type_UINT16, Type_INT16, Type_UINT32, Type_INT32, Type_UINT64, Type_INT64, + Type_TIMESTAMP, Type_DATE, Type_FLOAT, Type_DOUBLE]) def null(): @@ -196,6 +197,12 @@ def uint64(): def int64(): return primitive_type(Type_INT64) +def timestamp(): + return primitive_type(Type_TIMESTAMP) + +def date(): + return primitive_type(Type_DATE) + def float_(): return primitive_type(Type_FLOAT) diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 8937f8db694..3ffee3e8272 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -18,6 +18,7 @@ from pyarrow.compat import unittest import pyarrow +import datetime class TestConvertList(unittest.TestCase): @@ -70,6 +71,33 @@ def test_string(self): assert arr.null_count == 1 assert arr.type == pyarrow.string() + def test_date(self): + data = [datetime.date(2000, 1, 1), None, datetime.date(1970, 1, 1), datetime.date(2040, 2, 26)] + arr = pyarrow.from_pylist(data) + assert len(arr) == 4 + assert arr.type == pyarrow.date() + assert arr.null_count == 1 + assert arr[0].as_py() == datetime.date(2000, 1, 1) + assert arr[1].as_py() is None + assert arr[2].as_py() == datetime.date(1970, 1, 1) + assert arr[3].as_py() == datetime.date(2040, 2, 26) + + def test_timestamp(self): + data = [ + datetime.datetime(2007, 7, 13, 1, 23, 34, 123456), + None, + datetime.datetime(2006, 1, 13, 12, 34, 56, 432539), + datetime.datetime(2010, 8, 13, 5, 46, 57, 437699) + ] + arr = pyarrow.from_pylist(data) + assert len(arr) == 4 + assert arr.type == pyarrow.timestamp() + assert arr.null_count == 1 + assert arr[0].as_py() == datetime.datetime(2007, 7, 13, 1, 23, 34, 123456) + assert arr[1].as_py() is None + assert arr[2].as_py() == datetime.datetime(2006, 1, 13, 12, 34, 56, 432539) + assert arr[3].as_py() == datetime.datetime(2010, 8, 13, 5, 46, 57, 437699) + def test_mixed_nesting_levels(self): pyarrow.from_pylist([1, 2, None]) pyarrow.from_pylist([[1], [2], None]) diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index b527ca7e808..cf50f3d1c2c 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +import datetime import unittest import numpy as np @@ -204,6 +205,20 @@ def test_timestamps_notimezone_nulls(self): }) self._check_pandas_roundtrip(df, timestamps_to_ms=False) + def test_date(self): + df = pd.DataFrame({ + 'date': [ + datetime.date(2000, 1, 1), + None, + datetime.date(1970, 1, 1), + datetime.date(2040, 2, 26) + ]}) + table = A.from_pandas_dataframe(df) + result = table.to_pandas() + expected = df.copy() + expected['date'] = pd.to_datetime(df['date']) + tm.assert_frame_equal(result, expected) + # def test_category(self): # repeats = 1000 # values = [b'foo', None, u'bar', 'qux', np.nan] diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc index ac2f533c408..e0cb7c20be3 100644 --- a/python/src/pyarrow/adapters/builtin.cc +++ b/python/src/pyarrow/adapters/builtin.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include "pyarrow/adapters/builtin.h" @@ -24,6 +25,7 @@ #include "arrow/status.h" #include "pyarrow/helpers.h" +#include "pyarrow/util/datetime.h" using arrow::ArrayBuilder; using arrow::DataType; @@ -55,6 +57,8 @@ class ScalarVisitor { none_count_(0), bool_count_(0), int_count_(0), + date_count_(0), + timestamp_count_(0), float_count_(0), string_count_(0) {} @@ -68,6 +72,10 @@ class ScalarVisitor { ++float_count_; } else if (IsPyInteger(obj)) { ++int_count_; + } else if (PyDate_CheckExact(obj)) { + ++date_count_; + } else if (PyDateTime_CheckExact(obj)) { + ++timestamp_count_; } else if (IsPyBaseString(obj)) { ++string_count_; } else { @@ -82,6 +90,10 @@ class ScalarVisitor { } else if (int_count_) { // TODO(wesm): tighter type later return INT64; + } else if (date_count_) { + return DATE; + } else if (timestamp_count_) { + return TIMESTAMP_US; } else if (bool_count_) { return BOOL; } else if (string_count_) { @@ -100,6 +112,8 @@ class ScalarVisitor { int64_t none_count_; int64_t bool_count_; int64_t int_count_; + int64_t date_count_; + int64_t timestamp_count_; int64_t float_count_; int64_t string_count_; @@ -297,6 +311,56 @@ class Int64Converter : public TypedConverter { } }; +class DateConverter : public TypedConverter { + public: + Status AppendData(PyObject* seq) override { + Py_ssize_t size = PySequence_Size(seq); + RETURN_NOT_OK(typed_builder_->Reserve(size)); + for (int64_t i = 0; i < size; ++i) { + OwnedRef item(PySequence_GetItem(seq, i)); + if (item.obj() == Py_None) { + typed_builder_->AppendNull(); + } else { + PyDateTime_Date* pydate = reinterpret_cast(item.obj()); + typed_builder_->Append(PyDate_to_ms(pydate)); + } + } + return Status::OK(); + } +}; + +class TimestampConverter : public TypedConverter { + public: + Status AppendData(PyObject* seq) override { + Py_ssize_t size = PySequence_Size(seq); + RETURN_NOT_OK(typed_builder_->Reserve(size)); + for (int64_t i = 0; i < size; ++i) { + OwnedRef item(PySequence_GetItem(seq, i)); + if (item.obj() == Py_None) { + typed_builder_->AppendNull(); + } else { + PyDateTime_DateTime* pydatetime = reinterpret_cast(item.obj()); + struct tm datetime = {0}; + datetime.tm_year = PyDateTime_GET_YEAR(pydatetime) - 1900; + datetime.tm_mon = PyDateTime_GET_MONTH(pydatetime) - 1; + datetime.tm_mday = PyDateTime_GET_DAY(pydatetime); + datetime.tm_hour = PyDateTime_DATE_GET_HOUR(pydatetime); + datetime.tm_min = PyDateTime_DATE_GET_MINUTE(pydatetime); + datetime.tm_sec = PyDateTime_DATE_GET_SECOND(pydatetime); + int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime); + RETURN_IF_PYERROR(); + struct tm epoch = {0}; + epoch.tm_year = 70; + epoch.tm_mday = 1; + // Microseconds since the epoch + int64_t val = lrint(difftime(mktime(&datetime), mktime(&epoch))) * 1000000 + us; + typed_builder_->Append(val); + } + } + return Status::OK(); + } +}; + class DoubleConverter : public TypedConverter { public: Status AppendData(PyObject* seq) override { @@ -379,6 +443,10 @@ std::shared_ptr GetConverter(const std::shared_ptr& type return std::make_shared(); case Type::INT64: return std::make_shared(); + case Type::DATE: + return std::make_shared(); + case Type::TIMESTAMP: + return std::make_shared(); case Type::DOUBLE: return std::make_shared(); case Type::STRING: @@ -409,6 +477,7 @@ Status ListConverter::Init(const std::shared_ptr& builder) { Status ConvertPySequence(PyObject* obj, std::shared_ptr* out) { std::shared_ptr type; int64_t size; + PyDateTime_IMPORT; RETURN_NOT_OK(InferArrowType(obj, &size, &type)); // Handle NA / NullType case diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc index 64b70869519..f8dff6d8241 100644 --- a/python/src/pyarrow/adapters/pandas.cc +++ b/python/src/pyarrow/adapters/pandas.cc @@ -35,6 +35,7 @@ #include "pyarrow/common.h" #include "pyarrow/config.h" +#include "pyarrow/util/datetime.h" namespace pyarrow { @@ -167,6 +168,28 @@ class ArrowSerializer { private: Status ConvertData(); + Status ConvertDates(std::shared_ptr* out) { + PyAcquireGIL lock; + + PyObject** objects = reinterpret_cast(PyArray_DATA(arr_)); + arrow::TypePtr string_type(new arrow::DateType()); + arrow::DateBuilder date_builder(pool_, string_type); + RETURN_NOT_OK(date_builder.Resize(length_)); + + Status s; + PyObject* obj; + for (int64_t i = 0; i < length_; ++i) { + obj = objects[i]; + if (PyDate_CheckExact(obj)) { + PyDateTime_Date* pydate = reinterpret_cast(obj); + date_builder.Append(PyDate_to_ms(pydate)); + } else { + date_builder.AppendNull(); + } + } + return date_builder.Finish(out); + } + Status ConvertObjectStrings(std::shared_ptr* out) { PyAcquireGIL lock; @@ -369,6 +392,10 @@ inline Status ArrowSerializer::Convert(std::shared_ptr* out) // TODO: mask not supported here const PyObject** objects = reinterpret_cast(PyArray_DATA(arr_)); + { + PyAcquireGIL lock; + PyDateTime_IMPORT; + } for (int64_t i = 0; i < length_; ++i) { if (PyObject_is_null(objects[i])) { @@ -377,6 +404,8 @@ inline Status ArrowSerializer::Convert(std::shared_ptr* out) return ConvertObjectStrings(out); } else if (PyBool_Check(objects[i])) { return ConvertBooleans(out); + } else if (PyDate_CheckExact(objects[i])) { + return ConvertDates(out); } else { return Status::TypeError("unhandled python type"); } @@ -547,6 +576,17 @@ struct arrow_traits { typedef typename npy_traits::value_type T; }; +template <> +struct arrow_traits { + static constexpr int npy_type = NPY_DATETIME; + static constexpr bool supports_nulls = true; + static constexpr int64_t na_value = std::numeric_limits::min(); + static constexpr bool is_boolean = false; + static constexpr bool is_pandas_numeric_not_nullable = false; + static constexpr bool is_pandas_numeric_nullable = true; + typedef typename npy_traits::value_type T; +}; + template <> struct arrow_traits { static constexpr int npy_type = NPY_OBJECT; @@ -567,24 +607,28 @@ static inline PyObject* make_pystring(const uint8_t* data, int32_t length) { inline void set_numpy_metadata(int type, DataType* datatype, PyArrayObject* out) { if (type == NPY_DATETIME) { - auto timestamp_type = static_cast(datatype); - // We only support ms resolution at the moment PyArray_Descr* descr = PyArray_DESCR(out); auto date_dtype = reinterpret_cast(descr->c_metadata); + if (datatype->type == arrow::Type::TIMESTAMP) { + auto timestamp_type = static_cast(datatype); - switch (timestamp_type->unit) { - case arrow::TimestampType::Unit::SECOND: - date_dtype->meta.base = NPY_FR_s; - break; - case arrow::TimestampType::Unit::MILLI: - date_dtype->meta.base = NPY_FR_ms; - break; - case arrow::TimestampType::Unit::MICRO: - date_dtype->meta.base = NPY_FR_us; - break; - case arrow::TimestampType::Unit::NANO: - date_dtype->meta.base = NPY_FR_ns; - break; + switch (timestamp_type->unit) { + case arrow::TimestampType::Unit::SECOND: + date_dtype->meta.base = NPY_FR_s; + break; + case arrow::TimestampType::Unit::MILLI: + date_dtype->meta.base = NPY_FR_ms; + break; + case arrow::TimestampType::Unit::MICRO: + date_dtype->meta.base = NPY_FR_us; + break; + case arrow::TimestampType::Unit::NANO: + date_dtype->meta.base = NPY_FR_ns; + break; + } + } else { + // datatype->type == arrow::Type::DATE + date_dtype->meta.base = NPY_FR_D; } } } @@ -666,7 +710,7 @@ class ArrowDeserializer { template inline typename std::enable_if< - arrow_traits::is_pandas_numeric_nullable, Status>::type + (T2 != arrow::Type::DATE) & arrow_traits::is_pandas_numeric_nullable, Status>::type ConvertValues(const std::shared_ptr& data) { typedef typename arrow_traits::T T; size_t chunk_offset = 0; @@ -697,6 +741,32 @@ class ArrowDeserializer { return Status::OK(); } + template + inline typename std::enable_if< + T2 == arrow::Type::DATE, Status>::type + ConvertValues(const std::shared_ptr& data) { + typedef typename arrow_traits::T T; + size_t chunk_offset = 0; + + RETURN_NOT_OK(AllocateOutput(arrow_traits::npy_type)); + + for (int c = 0; c < data->num_chunks(); c++) { + const std::shared_ptr arr = data->chunk(c); + auto prim_arr = static_cast(arr.get()); + auto in_values = reinterpret_cast(prim_arr->data()->data()); + auto out_values = reinterpret_cast(PyArray_DATA(out_)) + chunk_offset; + + for (int64_t i = 0; i < arr->length(); ++i) { + // There are 1000 * 60 * 60 * 24 = 86400000ms in a day + out_values[i] = arr->IsNull(i) ? arrow_traits::na_value : in_values[i] / 86400000; + } + + chunk_offset += arr->length(); + } + + return Status::OK(); + } + // Integer specialization template inline typename std::enable_if< @@ -879,6 +949,7 @@ Status ConvertColumnToPandas(const std::shared_ptr& col, PyObject* py_re FROM_ARROW_CASE(FLOAT); FROM_ARROW_CASE(DOUBLE); FROM_ARROW_CASE(STRING); + FROM_ARROW_CASE(DATE); FROM_ARROW_CASE(TIMESTAMP); default: return Status::NotImplemented("Arrow type reading not implemented"); diff --git a/python/src/pyarrow/helpers.cc b/python/src/pyarrow/helpers.cc index 08003aabf9f..af927448493 100644 --- a/python/src/pyarrow/helpers.cc +++ b/python/src/pyarrow/helpers.cc @@ -33,6 +33,8 @@ const std::shared_ptr INT8 = std::make_shared(); const std::shared_ptr INT16 = std::make_shared(); const std::shared_ptr INT32 = std::make_shared(); const std::shared_ptr INT64 = std::make_shared(); +const std::shared_ptr DATE = std::make_shared(); +const std::shared_ptr TIMESTAMP_US = std::make_shared(TimeUnit::MICRO); const std::shared_ptr FLOAT = std::make_shared(); const std::shared_ptr DOUBLE = std::make_shared(); const std::shared_ptr STRING = std::make_shared(); @@ -54,6 +56,10 @@ std::shared_ptr GetPrimitiveType(Type::type type) { GET_PRIMITIVE_TYPE(INT32, Int32Type); GET_PRIMITIVE_TYPE(UINT64, UInt64Type); GET_PRIMITIVE_TYPE(INT64, Int64Type); + GET_PRIMITIVE_TYPE(DATE, DateType); + case Type::TIMESTAMP: + return TIMESTAMP_US; + break; GET_PRIMITIVE_TYPE(BOOL, BooleanType); GET_PRIMITIVE_TYPE(FLOAT, FloatType); GET_PRIMITIVE_TYPE(DOUBLE, DoubleType); diff --git a/python/src/pyarrow/helpers.h b/python/src/pyarrow/helpers.h index fa9c713b0c2..e714bba5db4 100644 --- a/python/src/pyarrow/helpers.h +++ b/python/src/pyarrow/helpers.h @@ -38,6 +38,8 @@ extern const std::shared_ptr INT8; extern const std::shared_ptr INT16; extern const std::shared_ptr INT32; extern const std::shared_ptr INT64; +extern const std::shared_ptr DATE; +extern const std::shared_ptr TIMESTAMP_US; extern const std::shared_ptr FLOAT; extern const std::shared_ptr DOUBLE; extern const std::shared_ptr STRING; diff --git a/python/src/pyarrow/util/datetime.h b/python/src/pyarrow/util/datetime.h new file mode 100644 index 00000000000..b67accc388f --- /dev/null +++ b/python/src/pyarrow/util/datetime.h @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef PYARROW_UTIL_DATETIME_H +#define PYARROW_UTIL_DATETIME_H + +#include +#include + +namespace pyarrow { + +inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) { + struct tm date = {0}; + date.tm_year = PyDateTime_GET_YEAR(pydate) - 1900; + date.tm_mon = PyDateTime_GET_MONTH(pydate) - 1; + date.tm_mday = PyDateTime_GET_DAY(pydate); + struct tm epoch = {0}; + epoch.tm_year = 70; + epoch.tm_mday = 1; + // Milliseconds since the epoch + return lrint(difftime(mktime(&date), mktime(&epoch)) * 1000); +} + +} // namespace pyarrow + +#endif // PYARROW_UTIL_DATETIME_H