diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index 9495f0c..b4a43a3 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -125,7 +125,8 @@ def to_numpy(self, dtype="object"): def __arrow_array__(self, type=None): return pyarrow.array( - self.to_numpy(), type=type if type is not None else pyarrow.time64("ns"), + self.to_numpy(dtype="object"), + type=type if type is not None else pyarrow.time64("ns"), ) diff --git a/tests/unit/test_arrow.py b/tests/unit/test_arrow.py new file mode 100644 index 0000000..dd0aed7 --- /dev/null +++ b/tests/unit/test_arrow.py @@ -0,0 +1,163 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime as dt + +import pandas +import pyarrow +import pytest + +# To register the types. +import db_dtypes # noqa + + +@pytest.mark.parametrize( + ("series", "expected"), + ( + (pandas.Series([], dtype="date"), pyarrow.array([], type=pyarrow.date32())), + ( + pandas.Series([None, None, None], dtype="date"), + pyarrow.array([None, None, None], type=pyarrow.date32()), + ), + ( + pandas.Series( + [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="date" + ), + pyarrow.array( + [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], + type=pyarrow.date32(), + ), + ), + ( + pandas.Series( + [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)], + dtype="date", + ), + pyarrow.array( + [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)], + type=pyarrow.date32(), + ), + ), + (pandas.Series([], dtype="time"), pyarrow.array([], type=pyarrow.time64("ns"))), + ( + pandas.Series([None, None, None], dtype="time"), + pyarrow.array([None, None, None], type=pyarrow.time64("ns")), + ), + ( + pandas.Series( + [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], dtype="time" + ), + pyarrow.array( + [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], + type=pyarrow.time64("ns"), + ), + ), + ( + pandas.Series( + [ + dt.time(0, 0, 0, 0), + dt.time(12, 30, 15, 125_000), + dt.time(23, 59, 59, 999_999), + ], + dtype="time", + ), + pyarrow.array( + [ + dt.time(0, 0, 0, 0), + dt.time(12, 30, 15, 125_000), + dt.time(23, 59, 59, 999_999), + ], + type=pyarrow.time64("ns"), + ), + ), + ), +) +def test_to_arrow(series, expected): + array = pyarrow.array(series) + assert array.equals(expected) + + +@pytest.mark.parametrize( + ("series", "expected"), + ( + (pandas.Series([], dtype="date"), pyarrow.array([], type=pyarrow.date64())), + ( + pandas.Series([None, None, None], dtype="date"), + pyarrow.array([None, None, None], type=pyarrow.date64()), + ), + ( + pandas.Series( + [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="date" + ), + pyarrow.array( + [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], + type=pyarrow.date64(), + ), + ), + ( + pandas.Series( + [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)], + dtype="date", + ), + pyarrow.array( + [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)], + type=pyarrow.date64(), + ), + ), + (pandas.Series([], dtype="time"), pyarrow.array([], type=pyarrow.time32("ms"))), + ( + pandas.Series([None, None, None], dtype="time"), + pyarrow.array([None, None, None], type=pyarrow.time32("ms")), + ), + ( + pandas.Series( + [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)], dtype="time" + ), + pyarrow.array( + [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)], + type=pyarrow.time32("ms"), + ), + ), + ( + pandas.Series( + [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], dtype="time" + ), + pyarrow.array( + [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], + type=pyarrow.time64("us"), + ), + ), + ( + pandas.Series( + [ + dt.time(0, 0, 0, 0), + dt.time(12, 30, 15, 125_000), + dt.time(23, 59, 59, 999_999), + ], + dtype="time", + ), + pyarrow.array( + [ + dt.time(0, 0, 0, 0), + dt.time(12, 30, 15, 125_000), + dt.time(23, 59, 59, 999_999), + ], + type=pyarrow.time64("us"), + ), + ), + ), +) +def test_to_arrow_w_arrow_type(series, expected): + array = pyarrow.array(series, type=expected.type) + assert array.equals(expected) diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index eca3a31..118458e 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -15,7 +15,6 @@ import datetime import packaging.version -import pyarrow.lib import pytest pd = pytest.importorskip("pandas") @@ -670,13 +669,3 @@ def test_bad_time_parsing(value, error): def test_bad_date_parsing(value, error): with pytest.raises(ValueError, match=error): _cls("date")([value]) - - -@for_date_and_time -def test_date___arrow__array__(dtype): - a = _make_one(dtype) - ar = a.__arrow_array__() - assert isinstance( - ar, pyarrow.Date32Array if dtype == "date" else pyarrow.Time64Array, - ) - assert [v.as_py() for v in ar] == list(a)