Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ template class NumericArray<Int16Type>;
template class NumericArray<Int32Type>;
template class NumericArray<Int64Type>;
template class NumericArray<TimestampType>;
template class NumericArray<DateType>;
template class NumericArray<HalfFloatType>;
template class NumericArray<FloatType>;
template class NumericArray<DoubleType>;
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,7 @@ extern template class ARROW_EXPORT NumericArray<HalfFloatType>;
extern template class ARROW_EXPORT NumericArray<FloatType>;
extern template class ARROW_EXPORT NumericArray<DoubleType>;
extern template class ARROW_EXPORT NumericArray<TimestampType>;
extern template class ARROW_EXPORT NumericArray<DateType>;

#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic pop
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ template class PrimitiveBuilder<Int8Type>;
template class PrimitiveBuilder<Int16Type>;
template class PrimitiveBuilder<Int32Type>;
template class PrimitiveBuilder<Int64Type>;
template class PrimitiveBuilder<DateType>;
template class PrimitiveBuilder<TimestampType>;
template class PrimitiveBuilder<HalfFloatType>;
template class PrimitiveBuilder<FloatType>;
Expand Down Expand Up @@ -411,6 +412,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
BUILDER_CASE(INT32, Int32Builder);
BUILDER_CASE(UINT64, UInt64Builder);
BUILDER_CASE(INT64, Int64Builder);
BUILDER_CASE(DATE, DateBuilder);
BUILDER_CASE(TIMESTAMP, TimestampBuilder);

BUILDER_CASE(BOOL, BooleanBuilder);
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ using Int16Builder = NumericBuilder<Int16Type>;
using Int32Builder = NumericBuilder<Int32Type>;
using Int64Builder = NumericBuilder<Int64Type>;
using TimestampBuilder = NumericBuilder<TimestampType>;
using DateBuilder = NumericBuilder<DateType>;

using HalfFloatBuilder = NumericBuilder<HalfFloatType>;
using FloatBuilder = NumericBuilder<FloatType>;
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ std::string StructType::ToString() const {
return s.str();
}

std::string DateType::ToString() const {
return std::string("date");
}

std::string UnionType::ToString() const {
std::stringstream s;

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,14 +413,14 @@ struct ARROW_EXPORT UnionType : public DataType {
struct ARROW_EXPORT DateType : public FixedWidthType {
static constexpr Type::type type_id = Type::DATE;

using c_type = int32_t;
using c_type = int64_t;

DateType() : FixedWidthType(Type::DATE) {}

int bit_width() const override { return sizeof(c_type) * 8; }

Status Accept(TypeVisitor* visitor) const override;
std::string ToString() const override { return name(); }
std::string ToString() const override;
static std::string name() { return "date"; }
};

Expand Down
4 changes: 3 additions & 1 deletion cpp/src/arrow/type_fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,15 @@ _NUMERIC_TYPE_DECL(Double);
#undef _NUMERIC_TYPE_DECL

struct DateType;
class DateArray;
using DateArray = NumericArray<DateType>;
using DateBuilder = NumericBuilder<DateType>;

struct TimeType;
class TimeArray;

struct TimestampType;
using TimestampArray = NumericArray<TimestampType>;
using TimestampBuilder = NumericBuilder<TimestampType>;

struct IntervalType;
using IntervalArray = NumericArray<IntervalType>;
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/type_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ struct TypeTraits<Int64Type> {
static inline int bytes_required(int elements) { return elements * sizeof(int64_t); }
};

template <>
struct TypeTraits<DateType> {
using ArrayType = DateArray;
// using BuilderType = DateBuilder;

static inline int bytes_required(int elements) { return elements * sizeof(int64_t); }
};

template <>
struct TypeTraits<TimestampType> {
using ArrayType = TimestampArray;
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from pyarrow.schema import (null, bool_,
int8, int16, int32, int64,
uint8, uint16, uint32, uint64,
timestamp, date,
float_, double, string,
list_, struct, field,
DataType, Field, Schema, schema)
Expand Down
7 changes: 6 additions & 1 deletion python/pyarrow/array.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,10 @@ cdef class UInt64Array(NumericArray):
pass


cdef class DateArray(NumericArray):
pass


cdef class FloatArray(NumericArray):
pass

Expand Down Expand Up @@ -239,6 +243,7 @@ cdef dict _array_classes = {
Type_INT16: Int16Array,
Type_INT32: Int32Array,
Type_INT64: Int64Array,
Type_DATE: DateArray,
Type_FLOAT: FloatArray,
Type_DOUBLE: DoubleArray,
Type_LIST: ListArray,
Expand Down Expand Up @@ -278,7 +283,7 @@ def from_pylist(object list_obj, DataType type=None):
if type is None:
check_status(pyarrow.ConvertPySequence(list_obj, &sp_array))
else:
raise NotImplementedError
raise NotImplementedError()

return box_arrow_array(sp_array)

Expand Down
16 changes: 16 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,18 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
Type_DOUBLE" arrow::Type::DOUBLE"

Type_TIMESTAMP" arrow::Type::TIMESTAMP"
Type_DATE" arrow::Type::DATE"
Type_STRING" arrow::Type::STRING"

Type_LIST" arrow::Type::LIST"
Type_STRUCT" arrow::Type::STRUCT"

enum TimeUnit" arrow::TimeUnit":
TimeUnit_SECOND" arrow::TimeUnit::SECOND"
TimeUnit_MILLI" arrow::TimeUnit::MILLI"
TimeUnit_MICRO" arrow::TimeUnit::MICRO"
TimeUnit_NANO" arrow::TimeUnit::NANO"

cdef cppclass CDataType" arrow::DataType":
Type type

Expand Down Expand Up @@ -74,6 +81,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CStringType" arrow::StringType"(CDataType):
pass

cdef cppclass CTimestampType" arrow::TimestampType"(CDataType):
TimeUnit unit

cdef cppclass CField" arrow::Field":
c_string name
shared_ptr[CDataType] type
Expand Down Expand Up @@ -132,6 +142,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CInt64Array" arrow::Int64Array"(CArray):
int64_t Value(int i)

cdef cppclass CDateArray" arrow::DateArray"(CArray):
int64_t Value(int i)

cdef cppclass CTimestampArray" arrow::TimestampArray"(CArray):
int64_t Value(int i)

cdef cppclass CFloatArray" arrow::FloatArray"(CArray):
float Value(int i)

Expand Down
31 changes: 31 additions & 0 deletions python/pyarrow/scalar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ from pyarrow.schema cimport DataType, box_data_type
from pyarrow.compat import frombytes
import pyarrow.schema as schema

import datetime


NA = None

cdef class NAType(Scalar):
Expand Down Expand Up @@ -120,6 +123,32 @@ cdef class UInt64Value(ArrayValue):
return ap.Value(self.index)


cdef class DateValue(ArrayValue):

def as_py(self):
cdef CDateArray* ap = <CDateArray*> self.sp_array.get()
return datetime.date.fromtimestamp(ap.Value(self.index) / 1000)


cdef class TimestampValue(ArrayValue):

def as_py(self):
cdef:
CTimestampArray* ap = <CTimestampArray*> self.sp_array.get()
CTimestampType* dtype = <CTimestampType*>ap.type().get()
int64_t val = ap.Value(self.index)

if dtype.unit == TimeUnit_SECOND:
return datetime.datetime.utcfromtimestamp(val)
elif dtype.unit == TimeUnit_MILLI:
return datetime.datetime.utcfromtimestamp(float(val) / 1000)
elif dtype.unit == TimeUnit_MICRO:
return datetime.datetime.utcfromtimestamp(float(val) / 1000000)
else:
# TimeUnit_NANO
raise NotImplementedError("Cannot convert nanosecond timestamps to datetime.datetime")


cdef class FloatValue(ArrayValue):

def as_py(self):
Expand Down Expand Up @@ -184,6 +213,8 @@ cdef dict _scalar_classes = {
Type_INT16: Int16Value,
Type_INT32: Int32Value,
Type_INT64: Int64Value,
Type_DATE: DateValue,
Type_TIMESTAMP: TimestampValue,
Type_FLOAT: FloatValue,
Type_DOUBLE: DoubleValue,
Type_LIST: ListValue,
Expand Down
7 changes: 7 additions & 0 deletions python/pyarrow/schema.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ cdef set PRIMITIVE_TYPES = set([
Type_UINT16, Type_INT16,
Type_UINT32, Type_INT32,
Type_UINT64, Type_INT64,
Type_TIMESTAMP, Type_DATE,
Type_FLOAT, Type_DOUBLE])

def null():
Expand Down Expand Up @@ -196,6 +197,12 @@ def uint64():
def int64():
return primitive_type(Type_INT64)

def timestamp():
return primitive_type(Type_TIMESTAMP)

def date():
return primitive_type(Type_DATE)

def float_():
return primitive_type(Type_FLOAT)

Expand Down
28 changes: 28 additions & 0 deletions python/pyarrow/tests/test_convert_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pyarrow.compat import unittest
import pyarrow

import datetime

class TestConvertList(unittest.TestCase):

Expand Down Expand Up @@ -70,6 +71,33 @@ def test_string(self):
assert arr.null_count == 1
assert arr.type == pyarrow.string()

def test_date(self):
data = [datetime.date(2000, 1, 1), None, datetime.date(1970, 1, 1), datetime.date(2040, 2, 26)]
arr = pyarrow.from_pylist(data)
assert len(arr) == 4
assert arr.type == pyarrow.date()
assert arr.null_count == 1
assert arr[0].as_py() == datetime.date(2000, 1, 1)
assert arr[1].as_py() is None
assert arr[2].as_py() == datetime.date(1970, 1, 1)
assert arr[3].as_py() == datetime.date(2040, 2, 26)

def test_timestamp(self):
data = [
datetime.datetime(2007, 7, 13, 1, 23, 34, 123456),
None,
datetime.datetime(2006, 1, 13, 12, 34, 56, 432539),
datetime.datetime(2010, 8, 13, 5, 46, 57, 437699)
]
arr = pyarrow.from_pylist(data)
assert len(arr) == 4
assert arr.type == pyarrow.timestamp()
assert arr.null_count == 1
assert arr[0].as_py() == datetime.datetime(2007, 7, 13, 1, 23, 34, 123456)
assert arr[1].as_py() is None
assert arr[2].as_py() == datetime.datetime(2006, 1, 13, 12, 34, 56, 432539)
assert arr[3].as_py() == datetime.datetime(2010, 8, 13, 5, 46, 57, 437699)

def test_mixed_nesting_levels(self):
pyarrow.from_pylist([1, 2, None])
pyarrow.from_pylist([[1], [2], None])
Expand Down
15 changes: 15 additions & 0 deletions python/pyarrow/tests/test_convert_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

import datetime
import unittest

import numpy as np
Expand Down Expand Up @@ -204,6 +205,20 @@ def test_timestamps_notimezone_nulls(self):
})
self._check_pandas_roundtrip(df, timestamps_to_ms=False)

def test_date(self):
df = pd.DataFrame({
'date': [
datetime.date(2000, 1, 1),
None,
datetime.date(1970, 1, 1),
datetime.date(2040, 2, 26)
]})
table = A.from_pandas_dataframe(df)
result = table.to_pandas()
expected = df.copy()
expected['date'] = pd.to_datetime(df['date'])
tm.assert_frame_equal(result, expected)

# def test_category(self):
# repeats = 1000
# values = [b'foo', None, u'bar', 'qux', np.nan]
Expand Down
Loading