From c3a11fb8015a4bf00d120a44d62fdfbff8be48da Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sun, 16 Oct 2022 07:58:20 +0100 Subject: [PATCH 1/9] ENH: Improve Arrays --- pandas-stubs/_libs/tslibs/period.pyi | 4 +- pandas-stubs/core/arrays/boolean.pyi | 14 +- pandas-stubs/core/arrays/integer.pyi | 122 ++++- pandas-stubs/core/arrays/interval.pyi | 10 +- pandas-stubs/core/arrays/period.pyi | 10 +- pandas-stubs/core/arrays/sparse/array.pyi | 6 +- pandas-stubs/core/arrays/string_.pyi | 16 +- pandas-stubs/core/construction.pyi | 16 +- tests/test_arrays.py | 551 ++++++++++++++++++++++ 9 files changed, 718 insertions(+), 31 deletions(-) create mode 100644 tests/test_arrays.py diff --git a/pandas-stubs/_libs/tslibs/period.pyi b/pandas-stubs/_libs/tslibs/period.pyi index e0f93e54c..ec51c81f2 100644 --- a/pandas-stubs/_libs/tslibs/period.pyi +++ b/pandas-stubs/_libs/tslibs/period.pyi @@ -2,11 +2,13 @@ from typing import Any class IncompatibleFrequency(ValueError): ... +from pandas._libs.tslibs.offsets import BaseOffset + class Period: def __init__( self, value: Any = ..., - freqstr: Any = ..., + freq: str | BaseOffset | None = ..., ordinal: Any = ..., year: Any = ..., month: int = ..., diff --git a/pandas-stubs/core/arrays/boolean.pyi b/pandas-stubs/core/arrays/boolean.pyi index 935948cc6..db9ca8b82 100644 --- a/pandas-stubs/core/arrays/boolean.pyi +++ b/pandas-stubs/core/arrays/boolean.pyi @@ -1,4 +1,5 @@ import numpy as np +from pandas.core.arrays import ExtensionArray from pandas._typing import ( Scalar, @@ -21,16 +22,15 @@ class BooleanDtype(ExtensionDtype): def construct_array_type(cls) -> type_t[BooleanArray]: ... def __from_arrow__(self, array): ... -def coerce_to_array(values, mask=..., copy: bool = ...): ... - class BooleanArray(BaseMaskedArray): def __init__( self, values: np.ndarray, mask: np.ndarray, copy: bool = ... ) -> None: ... + def __setitem__(self, key: int | np.ndarray | slice, value: object) -> None: ... @property def dtype(self): ... - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ... - def __setitem__(self, key, value) -> None: ... - def astype(self, dtype, copy: bool = ...): ... - def any(self, skipna: bool = ..., **kwargs): ... - def all(self, skipna: bool = ..., **kwargs): ... + def astype( + self, dtype: str | np.dtype, copy: bool = ... + ) -> np.ndarray | ExtensionArray: ... + def any(self, skipna: bool = ..., **kwargs) -> bool: ... + def all(self, skipna: bool = ..., **kwargs) -> bool: ... diff --git a/pandas-stubs/core/arrays/integer.pyi b/pandas-stubs/core/arrays/integer.pyi index 30046a98a..5cc0be3fc 100644 --- a/pandas-stubs/core/arrays/integer.pyi +++ b/pandas-stubs/core/arrays/integer.pyi @@ -1,3 +1,23 @@ +from typing import ( + Literal, + Sequence, + overload, +) + +import numpy as np +import pandas as pd +from pandas.arrays import ( + BooleanArray, + DatetimeArray, + StringArray, +) + +from pandas._libs.missing import NAType +from pandas._typing import ( + Dtype, + npt, +) + from pandas.core.dtypes.base import ExtensionDtype as ExtensionDtype from .masked import BaseMaskedArray @@ -18,15 +38,103 @@ class _IntegerDtype(ExtensionDtype): def construct_array_type(cls): ... def __from_arrow__(self, array): ... -def safe_cast(values, dtype, copy): ... -def coerce_to_array(values, dtype, mask=..., copy: bool = ...): ... - class IntegerArray(BaseMaskedArray): def dtype(self): ... - def __init__(self, values, mask, copy: bool = ...) -> None: ... - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ... - def __setitem__(self, key, value) -> None: ... - def astype(self, dtype, copy: bool = ...): ... + def __init__( + self, + values: npt.NDArray[np.integer], + mask: npt.NDArray[np.bool_] | Sequence[bool], + copy: bool = ..., + ) -> None: ... + @overload # type: ignore[override] + def __setitem__(self, key: int, value: float | NAType) -> None: ... + @overload + def __setitem__( + self, + key: Sequence[int] | slice | npt.NDArray[np.integer], + value: float | NAType | Sequence[float | NAType] | npt.NDArray[np.integer], + ) -> None: ... + @overload # type: ignore[override] + def __getitem__(self, item: int) -> int | NAType: ... + @overload + def __getitem__( + self, item: slice | list[int] | npt.NDArray[np.integer] + ) -> IntegerArray: ... + # Note: the ignores are needed below due to types being subclasses, + # e.g., float32 and float64 or bool, int, float, complex + @overload + def astype( # type: ignore[misc] + self, dtype: Literal["str"] | type[str] | np.str_ + ) -> npt.NDArray[np.str_]: ... + @overload + def astype( # type: ignore[misc] + self, dtype: type[bool] | Literal["bool"] | type[np.bool_], copy: bool = ... + ) -> npt.NDArray[np.bool_]: ... + @overload + def astype( + self, + dtype: type[int] + | Literal["i1", "i2", "i4", "i8", "int8", "int16", "int32", "int64"] + | type[np.int8] + | type[np.int16] + | type[np.int32] + | type[np.int64], + copy: bool = ..., + ) -> npt.NDArray[np.signedinteger]: ... + @overload + def astype( + self, + dtype: Literal["u1", "u2", "u4", "u8", "uint8", "uint16", "uint32", "uint64"] + | type[np.uint8] + | type[np.uint16] + | type[np.uint32] + | type[np.uint64], + copy: bool = ..., + ) -> npt.NDArray[np.unsignedinteger]: ... + @overload + def astype( # type: ignore[misc] + self, dtype: Literal["f4", "float32"] | type[np.float32] + ) -> npt.NDArray[np.float32]: ... + @overload + def astype( + self, dtype: type[float] | Literal["float", "float64", "f8"] | type[np.float64] + ) -> npt.NDArray[np.float64]: ... + @overload + def astype( # type: ignore[misc] + self, dtype: Literal["complex64", "c8"] | type[np.complex64] + ) -> npt.NDArray[np.complex64]: ... + @overload + def astype( + self, + dtype: type[complex] + | Literal["complex", "complex128", "c16"] + | type[np.complex128], + ) -> npt.NDArray[np.complex128]: ... + @overload + def astype(self, dtype: Literal["boolean"] | pd.BooleanDtype) -> BooleanArray: ... + @overload + def astype( + self, + dtype: Literal[ + "Int8", "Int16", "Int32", "Int64", "UInt8", "UInt16", "UInt32", "UInt64" + ] + | pd.Int8Dtype + | pd.Int16Dtype + | pd.Int32Dtype + | pd.Int64Dtype + | pd.UInt8Dtype + | pd.UInt16Dtype + | pd.UInt32Dtype + | pd.UInt64Dtype, + ) -> IntegerArray: ... + @overload + def astype(self, dtype: Literal["string"] | pd.StringDtype) -> StringArray: ... + @overload + def astype( + self, dtype: type[np.datetime64] | Literal["M8[ns]"] + ) -> npt.NDArray[np.datetime64]: ... + @overload + def astype(self, dtype: pd.DatetimeTZDtype) -> DatetimeArray: ... class Int8Dtype(_IntegerDtype): ... class Int16Dtype(_IntegerDtype): ... diff --git a/pandas-stubs/core/arrays/interval.pyi b/pandas-stubs/core/arrays/interval.pyi index 1b63ed724..c2036b19d 100644 --- a/pandas-stubs/core/arrays/interval.pyi +++ b/pandas-stubs/core/arrays/interval.pyi @@ -17,13 +17,17 @@ class IntervalArray(IntervalMixin, ExtensionArray): cls, data, closed=..., dtype=..., copy: bool = ..., verify_integrity: bool = ... ): ... @classmethod - def from_breaks(cls, breaks, closed: str = ..., copy: bool = ..., dtype=...): ... + def from_breaks( + cls, breaks, closed: str = ..., copy: bool = ..., dtype=... + ) -> IntervalArray: ... @classmethod def from_arrays( cls, left, right, closed: str = ..., copy: bool = ..., dtype=... - ): ... + ) -> IntervalArray: ... @classmethod - def from_tuples(cls, data, closed: str = ..., copy: bool = ..., dtype=...): ... + def from_tuples( + cls, data, closed: str = ..., copy: bool = ..., dtype=... + ) -> IntervalArray: ... def __iter__(self): ... def __len__(self) -> int: ... def __getitem__(self, value): ... diff --git a/pandas-stubs/core/arrays/period.pyi b/pandas-stubs/core/arrays/period.pyi index 081baeb94..ea0fac180 100644 --- a/pandas-stubs/core/arrays/period.pyi +++ b/pandas-stubs/core/arrays/period.pyi @@ -6,10 +6,11 @@ from pandas.core.arrays.datetimelike import ( DatetimeLikeArrayMixin, ) -from pandas._libs.tslibs import Timestamp -from pandas._libs.tslibs.period import Period as Period +from pandas._libs.tslibs import Period +from pandas._libs.tslibs.timestamps import Timestamp +from pandas._typing import npt as npt -from pandas.tseries.offsets import Tick as Tick +from pandas.tseries.offsets import Tick class PeriodArray(DatetimeLikeArrayMixin, DatelikeOps): __array_priority__: int = ... @@ -43,11 +44,8 @@ class PeriodArray(DatetimeLikeArrayMixin, DatelikeOps): def asfreq(self, freq: str | None = ..., how: str = ...) -> Period: ... def astype(self, dtype, copy: bool = ...): ... -def raise_on_incompatible(left, right): ... def period_array( data: Sequence[Period | None], freq: str | Tick | None = ..., copy: bool = ..., ) -> PeriodArray: ... -def validate_dtype_freq(dtype, freq): ... -def dt64arr_to_periodarr(data, freq, tz=...): ... diff --git a/pandas-stubs/core/arrays/sparse/array.pyi b/pandas-stubs/core/arrays/sparse/array.pyi index c447b97a5..eb0650919 100644 --- a/pandas-stubs/core/arrays/sparse/array.pyi +++ b/pandas-stubs/core/arrays/sparse/array.pyi @@ -5,6 +5,8 @@ from pandas.core.arrays import ( ) from pandas.core.base import PandasObject +from pandas._libs.sparse import SparseIndex + class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin): def __init__( self, @@ -20,9 +22,9 @@ class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin): def __array__(self, dtype=..., copy=...) -> np.ndarray: ... def __setitem__(self, key, value) -> None: ... @property - def sp_index(self): ... + def sp_index(self) -> SparseIndex: ... @property - def sp_values(self): ... + def sp_values(self) -> np.ndarray: ... @property def dtype(self): ... @property diff --git a/pandas-stubs/core/arrays/string_.pyi b/pandas-stubs/core/arrays/string_.pyi index 700510120..a6d060b4e 100644 --- a/pandas-stubs/core/arrays/string_.pyi +++ b/pandas-stubs/core/arrays/string_.pyi @@ -1,6 +1,13 @@ +from typing import Sequence + +import numpy as np +import pandas as pd from pandas.core.arrays import PandasArray -from pandas._typing import type_t +from pandas._typing import ( + AnyArrayLike, + type_t, +) from pandas.core.dtypes.base import ExtensionDtype @@ -14,7 +21,12 @@ class StringDtype(ExtensionDtype): def __from_arrow__(self, array): ... class StringArray(PandasArray): - def __init__(self, values, copy: bool = ...) -> None: ... + def __init__( + self, + # Also pd.NA and np.nan but not possible it seems + values: AnyArrayLike | Sequence[str | None], + copy: bool = ..., + ) -> None: ... def __arrow_array__(self, type=...): ... def __setitem__(self, key, value) -> None: ... def fillna(self, value=..., method=..., limit=...): ... diff --git a/pandas-stubs/core/construction.pyi b/pandas-stubs/core/construction.pyi index 3cd517da3..02d855ac5 100644 --- a/pandas-stubs/core/construction.pyi +++ b/pandas-stubs/core/construction.pyi @@ -1,20 +1,30 @@ -from typing import Sequence +from typing import ( + Sequence, + Union, +) import numpy as np +import pandas as pd from pandas.core.indexes.api import Index from pandas.core.series import Series from pandas._typing import ( ArrayLike, Dtype, + npt, ) from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ABCExtensionArray def array( - data: Sequence[object], - dtype: str | np.dtype | ExtensionDtype | None = ..., + # str is forbidden even though Sequence[object] allows "abc" + data: npt.NDArray | Sequence[object] | pd.Index | pd.Series, + dtype: str + | np.dtype[np.generic] + | ExtensionDtype + | type[Union[str, bool, float, int]] + | None = ..., copy: bool = ..., ) -> ABCExtensionArray: ... def extract_array(obj, extract_numpy: bool = ...): ... diff --git a/tests/test_arrays.py b/tests/test_arrays.py new file mode 100644 index 000000000..b0bfe528d --- /dev/null +++ b/tests/test_arrays.py @@ -0,0 +1,551 @@ +from typing import Type + +import numpy as np +import numpy.typing as npt +import pandas as pd +from pandas.arrays import ( + BooleanArray, + DatetimeArray, + IntegerArray, + IntervalArray, + PandasArray, + PeriodArray, + SparseArray, + StringArray, + TimedeltaArray, +) +from pandas.core.arrays.base import ExtensionArray +from typing_extensions import assert_type + +from pandas._libs.sparse import ( + BlockIndex, + IntIndex, + SparseIndex, +) + +from tests import check + +from pandas.tseries.offsets import Day + +LIST_MASK = [False, True, False, False, False, False, False, False, True, False] +ARRAY_MASK = np.array(LIST_MASK) + + +def test_integer_array() -> None: + ints = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + non_null_int_arr = IntegerArray(ints, mask=np.array([False] * 10)) + int_arr = IntegerArray(ints, mask=ARRAY_MASK) + check(assert_type(int_arr, IntegerArray), IntegerArray) + check( + assert_type(IntegerArray(ints, mask=ARRAY_MASK, copy=True), IntegerArray), + IntegerArray, + ) + + nulled_ints = [1, 2, 3, 4, 5, 6, 7, 8, None, 10] + check( + assert_type(pd.array(nulled_ints, dtype="UInt8"), Type[ExtensionArray]), + IntegerArray, + ) + check( + assert_type(pd.array(nulled_ints, dtype=pd.UInt8Dtype()), Type[ExtensionArray]), + IntegerArray, + ) + check( + assert_type(pd.array(nulled_ints, dtype=float), Type[ExtensionArray]), + PandasArray, + ) + check(assert_type(pd.array(ints, dtype=int), Type[ExtensionArray]), PandasArray) + + check(assert_type(int_arr.astype("Int64"), IntegerArray), IntegerArray) + check(assert_type(int_arr.astype("UInt8"), IntegerArray), IntegerArray) + check( + assert_type( + int_arr.astype(pd.BooleanDtype()), + BooleanArray, + ), + BooleanArray, + ) + check(assert_type(int_arr.astype(float), npt.NDArray[np.float64]), np.ndarray) + check(assert_type(int_arr.astype(np.float64), npt.NDArray[np.float64]), np.ndarray) + non_null_int_arr.astype(bool) + non_null_int_arr.astype(np.bool_) + non_null_int_arr.astype("bool") + + non_null_int_arr.astype(int) + non_null_int_arr.astype("i1") + non_null_int_arr.astype("i2") + non_null_int_arr.astype("i4") + non_null_int_arr.astype("i8") + non_null_int_arr.astype("int8") + non_null_int_arr.astype("int16") + non_null_int_arr.astype("int32") + non_null_int_arr.astype("int64") + non_null_int_arr.astype(np.int8) + non_null_int_arr.astype(np.int16) + non_null_int_arr.astype(np.int32) + non_null_int_arr.astype(np.int64) + + non_null_int_arr.astype("u1") + non_null_int_arr.astype("u2") + non_null_int_arr.astype("u4") + non_null_int_arr.astype("u8") + non_null_int_arr.astype("uint8") + non_null_int_arr.astype("uint16") + non_null_int_arr.astype("uint32") + non_null_int_arr.astype("uint64") + non_null_int_arr.astype(np.uint8) + non_null_int_arr.astype(np.uint16) + non_null_int_arr.astype(np.uint32) + non_null_int_arr.astype(np.uint64) + + non_null_int_arr.astype(np.float32) + non_null_int_arr.astype("float32") + + non_null_int_arr.astype(float) + non_null_int_arr.astype("float") + non_null_int_arr.astype("float64") + non_null_int_arr.astype(np.float64) + + non_null_int_arr.astype(np.complex64) + non_null_int_arr.astype("float64") + + non_null_int_arr.astype("c8") + non_null_int_arr.astype("complex64") + non_null_int_arr.astype(np.complex128) + + non_null_int_arr.astype(complex) + non_null_int_arr.astype("complex") + non_null_int_arr.astype("c16") + non_null_int_arr.astype("complex128") + non_null_int_arr.astype(np.complex128) + + non_null_int_arr.astype("M8[ns]") + non_null_int_arr.astype(np.datetime64) + + non_null_int_arr.astype(str) + + int_arr.astype("boolean") + int_arr.astype(pd.BooleanDtype()) + + int_arr.astype("Int8") + int_arr.astype("Int16") + int_arr.astype("Int32") + int_arr.astype("Int64") + int_arr.astype("UInt8") + int_arr.astype("UInt16") + int_arr.astype("UInt32") + int_arr.astype("UInt64") + int_arr.astype(pd.Int8Dtype()) + int_arr.astype(pd.Int16Dtype()) + int_arr.astype(pd.Int32Dtype()) + int_arr.astype(pd.Int64Dtype()) + int_arr.astype(pd.UInt8Dtype()) + int_arr.astype(pd.UInt16Dtype()) + int_arr.astype(pd.UInt32Dtype()) + int_arr.astype(pd.UInt64Dtype()) + + int_arr.astype("string") + int_arr.astype(pd.StringDtype()) + + int_arr.astype(pd.DatetimeTZDtype(tz="UTC")) + + # TODO: Test get/set item + + +def test_string_array() -> None: + strings = np.array(["a", "b", "c", "d", "e", "f", "g", "h", None, "j"]) + check(assert_type(StringArray(strings, copy=False), StringArray), StringArray) + check(assert_type(StringArray(strings, copy=True), StringArray), StringArray) + + strings_list = strings.tolist() + check( + assert_type(pd.array(strings_list, dtype="string"), Type[ExtensionArray]), + StringArray, + ) + check( + assert_type(pd.array(strings, dtype="string"), Type[ExtensionArray]), + StringArray, + ) + check(assert_type(pd.array(strings, dtype=str), Type[ExtensionArray]), PandasArray) + check(assert_type(pd.array(strings), Type[ExtensionArray]), StringArray) + + +def test_boolean_array() -> None: + bools = np.array([True, False, True, False, True, False, True, False, True, False]) + check(assert_type(BooleanArray(bools, mask=ARRAY_MASK), BooleanArray), BooleanArray) + check( + assert_type(BooleanArray(bools, mask=ARRAY_MASK, copy=True), BooleanArray), + BooleanArray, + ) + + nulled_bools = [True, False, True, False, True, False, True, False, None, False] + check(assert_type(pd.array(nulled_bools), Type[ExtensionArray]), BooleanArray) + check( + assert_type(pd.array(nulled_bools, dtype="bool"), Type[ExtensionArray]), + PandasArray, + ) + check( + assert_type(pd.array(nulled_bools, dtype=bool), Type[ExtensionArray]), + PandasArray, + ) + check( + assert_type( + pd.array(nulled_bools, dtype=pd.BooleanDtype()), Type[ExtensionArray] + ), + BooleanArray, + ) + + +def test_period_array() -> None: + p1 = pd.Period("2000-01-01", freq="D") + p2 = pd.Period("2000-01-02", freq="D") + pa = PeriodArray(pd.Series([p1, p2])) + check(assert_type(pa, PeriodArray), PeriodArray) + check(assert_type(PeriodArray(pd.Index([p1, p2])), PeriodArray), PeriodArray) + int_arr: npt.NDArray[np.int_] = np.ndarray([0, 1, 2]) + check(assert_type(PeriodArray(int_arr, freq="D"), PeriodArray), PeriodArray) + check( + assert_type(PeriodArray(np.ndarray([0, 1, 2]), freq=Day()), PeriodArray), + PeriodArray, + ) + check(assert_type(PeriodArray(pa), PeriodArray), PeriodArray) + dt = pd.PeriodDtype(freq="D") + period_idx = pd.Index([p1, p2]) + check( + assert_type(PeriodArray(period_idx, dtype=dt, copy=False), PeriodArray), + PeriodArray, + ) + + check( + assert_type( + PeriodArray(period_idx, dtype=dt, freq="D", copy=False), PeriodArray + ), + PeriodArray, + ) + + check(assert_type(pd.array([p1, p2]), Type[ExtensionArray]), PeriodArray) + check( + assert_type(pd.array([p1, p2], dtype="period[D]"), Type[ExtensionArray]), + PeriodArray, + ) + + +def test_datetime_array() -> None: + values = [pd.Timestamp("2000-1-1"), pd.Timestamp("2000-1-2")] + check( + assert_type( + DatetimeArray( + pd.Index(values), dtype=np.dtype("M8[ns]"), freq="D", copy=False + ), + DatetimeArray, + ), + DatetimeArray, + ) + check( + assert_type( + DatetimeArray( + pd.Series(values), dtype=np.dtype("M8[ns]"), freq="D", copy=False + ), + DatetimeArray, + ), + DatetimeArray, + ) + np_values = np.array([np.datetime64(1, "ns"), np.datetime64(2, "ns")]) + dta = DatetimeArray(np_values) + check(assert_type(DatetimeArray(dta), DatetimeArray), DatetimeArray) + data = np.array([1, 2, 3], dtype="M8[ns]") + check( + assert_type( + DatetimeArray(data, copy=False, dtype=pd.DatetimeTZDtype(tz="US/Central")), + DatetimeArray, + ), + DatetimeArray, + ) + + check(assert_type(pd.array(data), Type[ExtensionArray]), DatetimeArray) + check(assert_type(pd.array(np_values), Type[ExtensionArray]), DatetimeArray) + + +def test_interval_array_construction() -> None: + ia = IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)]) + check(assert_type(IntervalArray(ia), IntervalArray), IntervalArray) + check( + assert_type( + IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)], closed="left"), + IntervalArray, + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)], closed="right"), + IntervalArray, + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)], closed="both"), + IntervalArray, + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)], closed="neither"), + IntervalArray, + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray( + [pd.Interval(0, 1), pd.Interval(1, 2)], + closed="neither", + verify_integrity=True, + ), + IntervalArray, + ), + IntervalArray, + ) + + check( + assert_type(IntervalArray.from_arrays([0, 1], [1, 2]), IntervalArray), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_arrays(np.array([0, 1]), np.array([1, 2])), IntervalArray + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_arrays(pd.Series([0, 1]), pd.Series([1, 2])), + IntervalArray, + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_arrays(pd.Index([0, 1]), pd.Index([1, 2])), IntervalArray + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_arrays([0, 1], [1, 2], closed="left", copy=False), + IntervalArray, + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_arrays( + [0, 1], [1, 2], closed="right", dtype=pd.IntervalDtype("int64") + ), + IntervalArray, + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_arrays( + [0, 1], [1, 2], closed="right", dtype=pd.IntervalDtype(float) + ), + IntervalArray, + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_arrays([0, 1], [1, 2], closed="both"), IntervalArray + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_arrays([0, 1], [1, 2], closed="neither"), IntervalArray + ), + IntervalArray, + ) + + breaks = [0, 1, 2, 3, 4.5] + check(assert_type(IntervalArray.from_breaks(breaks), IntervalArray), IntervalArray) + check( + assert_type( + IntervalArray.from_breaks(np.array(breaks), copy=False), IntervalArray + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_breaks(pd.Series(breaks), closed="left"), IntervalArray + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_breaks(pd.Index(breaks), closed="right"), IntervalArray + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_breaks(pd.Index(breaks), closed="both"), IntervalArray + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_breaks(pd.Index(breaks), closed="neither"), IntervalArray + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_breaks(pd.Index(breaks), dtype=pd.IntervalDtype(float)), + IntervalArray, + ), + IntervalArray, + ) + + +def test_integer_array_attrib_props() -> None: + ia = IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)]) + + ia.left + ia.right + ia.closed + ia.mid + ia.length + ia.is_empty + ia.is_non_overlapping_monotonic + + ia.contains(0.5) + ia.overlaps(pd.Interval(0.5, 1.0)) + ia.set_closed("right") + ia.set_closed("left") + ia.set_closed("both") + ia.set_closed("neither") + ia.to_tuples(True) + ia.to_tuples(False) + + +def test_timedelta_array() -> None: + td1, td2 = pd.Timedelta("1 days"), pd.Timedelta("2 days") + tda = TimedeltaArray(np.array([1, 2], dtype="timedelta64[ns]")) + check(assert_type(tda, TimedeltaArray), TimedeltaArray) + + tda = TimedeltaArray(np.array([1, 2], dtype="timedelta64[ns]"), copy=False) + tds = pd.Series([td1, td2]) + tdi = pd.Index([td1, td2]) + + check(assert_type(tda, TimedeltaArray), TimedeltaArray) + check(assert_type(TimedeltaArray(tds, freq="D"), TimedeltaArray), TimedeltaArray) + check(assert_type(TimedeltaArray(tds, freq=Day()), TimedeltaArray), TimedeltaArray) + check(assert_type(TimedeltaArray(tdi), TimedeltaArray), TimedeltaArray) + check(assert_type(TimedeltaArray(tda), TimedeltaArray), TimedeltaArray) + + check( + assert_type( + TimedeltaArray(tds, dtype=np.dtype("timedelta64[ns]")), TimedeltaArray + ), + TimedeltaArray, + ) + check( + assert_type( + TimedeltaArray(tds, dtype=np.dtype("timedelta64[ns]")), TimedeltaArray + ), + TimedeltaArray, + ) + + check( + assert_type( + pd.array(np.array([1, 2], dtype="timedelta64[ns]")), Type[ExtensionArray] + ), + TimedeltaArray, + ) + check(assert_type(pd.array(tdi), Type[ExtensionArray]), TimedeltaArray) + check(assert_type(pd.array(tds, copy=False), Type[ExtensionArray]), TimedeltaArray) + + +def test_sparse_array() -> None: + ints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] + nulled_ints = [1, 2, 3, 4, 5, 6, 7, 8, 9, np.nan] + zero_ints = [0, 2, 3, 4, 5, 6, 0, 8, 9, 0] + + check(assert_type(SparseArray(3.0, fill_value=np.nan), SparseArray), SparseArray) + check( + assert_type( + SparseArray(nulled_ints, fill_value=np.nan), + SparseArray, + ), + SparseArray, + ) + sa = SparseArray(zero_ints, fill_value=0) + check(assert_type(sa.sp_index, SparseIndex), IntIndex) + check( + assert_type(SparseArray(sa.sp_values, sparse_index=sa.sp_index), SparseArray), + SparseArray, + ) + sa_block = SparseArray(zero_ints, fill_value=0, kind="block") + check(assert_type(sa_block.sp_index, SparseIndex), BlockIndex) + check( + assert_type( + SparseArray(sa_block.sp_values, sparse_index=sa_block.sp_index), SparseArray + ), + SparseArray, + ) + + check( + assert_type( + SparseArray( + [True, False, False, False, False, False, False, True, False, False], + fill_value=False, + ), + SparseArray, + ), + SparseArray, + ) + check( + assert_type( + SparseArray( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + pd.NaT, + pd.NaT, + ], + fill_value=pd.NaT, + ), + SparseArray, + ), + SparseArray, + ) + check( + assert_type( + SparseArray([pd.Timedelta(days=1), pd.NaT, pd.NaT], fill_value=pd.NaT), + SparseArray, + ), + SparseArray, + ) + + check( + assert_type( + SparseArray(nulled_ints, kind="integer", copy=False), + SparseArray, + ), + SparseArray, + ) + check( + assert_type( + SparseArray(nulled_ints, kind="block", copy=True), + SparseArray, + ), + SparseArray, + ) + check(assert_type(SparseArray(ints, dtype="i4"), SparseArray), SparseArray) + check(assert_type(SparseArray(ints, dtype="int32"), SparseArray), SparseArray) + check(assert_type(SparseArray(ints, dtype=np.int16), SparseArray), SparseArray) From d1d767502a7089c313cff6a0e942354380035ba7 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sun, 16 Oct 2022 22:09:31 +0100 Subject: [PATCH 2/9] ENH: IMprove boolean array --- pandas-stubs/core/arrays/boolean.pyi | 31 ++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/pandas-stubs/core/arrays/boolean.pyi b/pandas-stubs/core/arrays/boolean.pyi index db9ca8b82..a4122c2f4 100644 --- a/pandas-stubs/core/arrays/boolean.pyi +++ b/pandas-stubs/core/arrays/boolean.pyi @@ -1,8 +1,17 @@ +from typing import ( + Sequence, + Union, + overload, +) + import numpy as np from pandas.core.arrays import ExtensionArray +from typing_extensions import TypeAlias +from pandas._libs.missing import NAType from pandas._typing import ( Scalar, + npt, type_t, ) @@ -22,11 +31,29 @@ class BooleanDtype(ExtensionDtype): def construct_array_type(cls) -> type_t[BooleanArray]: ... def __from_arrow__(self, array): ... +_ScalarType: TypeAlias = Union[bool, np.bool_, NAType, None] +_ArrayKey: TypeAlias = Union[Sequence[int], npt.NDArray[np.integer], slice] + class BooleanArray(BaseMaskedArray): def __init__( - self, values: np.ndarray, mask: np.ndarray, copy: bool = ... + self, + values: npt.NDArray[np.bool_], + mask: npt.NDArray[np.bool_], + copy: bool = ..., + ) -> None: ... + # Ignore overrides since more specific than super type + @overload # type: ignore[override] + def __setitem__(self, key: int, value: _ScalarType) -> None: ... + @overload + def __setitem__( + self, + key: _ArrayKey, + value: _ScalarType | Sequence[bool | NAType | None] | npt.NDArray[np.bool_], ) -> None: ... - def __setitem__(self, key: int | np.ndarray | slice, value: object) -> None: ... + @overload # type: ignore[override] + def __getitem__(self, item: int) -> bool | NAType: ... + @overload + def __getitem__(self, item: _ArrayKey) -> BooleanArray: ... @property def dtype(self): ... def astype( From 79dd892057994fa15800c4614052b85364c28729 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sun, 16 Oct 2022 22:14:05 +0100 Subject: [PATCH 3/9] BUG: Correct StringArray init --- pandas-stubs/core/arrays/integer.pyi | 5 +---- pandas-stubs/core/arrays/string_.pyi | 9 ++++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas-stubs/core/arrays/integer.pyi b/pandas-stubs/core/arrays/integer.pyi index 5cc0be3fc..91699e56a 100644 --- a/pandas-stubs/core/arrays/integer.pyi +++ b/pandas-stubs/core/arrays/integer.pyi @@ -13,10 +13,7 @@ from pandas.arrays import ( ) from pandas._libs.missing import NAType -from pandas._typing import ( - Dtype, - npt, -) +from pandas._typing import npt from pandas.core.dtypes.base import ExtensionDtype as ExtensionDtype diff --git a/pandas-stubs/core/arrays/string_.pyi b/pandas-stubs/core/arrays/string_.pyi index a6d060b4e..aace44d2d 100644 --- a/pandas-stubs/core/arrays/string_.pyi +++ b/pandas-stubs/core/arrays/string_.pyi @@ -1,11 +1,9 @@ -from typing import Sequence - import numpy as np import pandas as pd from pandas.core.arrays import PandasArray from pandas._typing import ( - AnyArrayLike, + npt, type_t, ) @@ -23,8 +21,9 @@ class StringDtype(ExtensionDtype): class StringArray(PandasArray): def __init__( self, - # Also pd.NA and np.nan but not possible it seems - values: AnyArrayLike | Sequence[str | None], + values: npt.NDArray[np.str_] + | npt.NDArray[np.string_] + | npt.NDArray[np.object_], copy: bool = ..., ) -> None: ... def __arrow_array__(self, type=...): ... From 538992d51b24f2022dc63232bccab7ee5b7770d1 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sun, 16 Oct 2022 23:02:41 +0100 Subject: [PATCH 4/9] TST: Make code tests --- tests/test_arrays.py | 62 ++++++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/tests/test_arrays.py b/tests/test_arrays.py index b0bfe528d..943f458db 100644 --- a/tests/test_arrays.py +++ b/tests/test_arrays.py @@ -414,22 +414,52 @@ def test_interval_array_construction() -> None: def test_integer_array_attrib_props() -> None: ia = IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)]) - ia.left - ia.right - ia.closed - ia.mid - ia.length - ia.is_empty - ia.is_non_overlapping_monotonic - - ia.contains(0.5) - ia.overlaps(pd.Interval(0.5, 1.0)) - ia.set_closed("right") - ia.set_closed("left") - ia.set_closed("both") - ia.set_closed("neither") - ia.to_tuples(True) - ia.to_tuples(False) + check(assert_type(ia.left, pd.Int64Index), pd.Int64Index) + check(assert_type(ia.right, pd.Int64Index), pd.Int64Index) + check(assert_type(ia.closed, str), str) + check(assert_type(ia.mid, pd.Float64Index), pd.Float64Index) + check(assert_type(ia.length, pd.Int64Index), pd.Int64Index) + check(assert_type(ia.is_empty, npt.NDArray[np.bool_]), np.ndarray) + check(assert_type(ia.is_non_overlapping_monotonic, bool), bool) + + check(assert_type(ia.contains(0.5), npt.NDArray[np.bool_]), np.ndarray) + check( + assert_type(ia.overlaps(pd.Interval(0.5, 1.0)), npt.NDArray[np.bool_]), + np.ndarray, + ) + check(assert_type(ia.set_closed("right"), IntervalArray), IntervalArray) + check(assert_type(ia.set_closed("left"), IntervalArray), IntervalArray) + check(assert_type(ia.set_closed("both"), IntervalArray), IntervalArray) + check(assert_type(ia.set_closed("neither"), IntervalArray), IntervalArray) + check(assert_type(ia.to_tuples(True), npt.NDArray[np.object_]), np.ndarray) + check(assert_type(ia.to_tuples(False), npt.NDArray[np.object_]), np.ndarray) + + ia_float = IntervalArray([pd.Interval(0, 1.5), pd.Interval(1, 2)]) + check(assert_type(ia_float.left, pd.Float64Index), pd.Float64Index) + check(assert_type(ia_float.right, pd.Float64Index), pd.Float64Index) + check(assert_type(ia_float.length, pd.Float64Index), pd.Float64Index) + + ia_ts = IntervalArray( + [ + pd.Interval(pd.Timestamp("2018-01-01"), pd.Timestamp("2018-01-02")), + pd.Interval(pd.Timestamp("2018-01-02"), pd.Timestamp("2018-01-03")), + ] + ) + check(assert_type(ia_ts.left, pd.DatetimeIndex), pd.DatetimeIndex) + check(assert_type(ia_ts.right, pd.DatetimeIndex), pd.DatetimeIndex) + check(assert_type(ia_ts.mid, pd.DatetimeIndex), pd.DatetimeIndex) + check(assert_type(ia_ts.length, pd.TimedeltaIndex), pd.TimedeltaIndex) + + ia_td = IntervalArray( + [ + pd.Interval(pd.Timedelta("1 days"), pd.Timedelta("2 days")), + pd.Interval(pd.Timedelta("2 days"), pd.Timedelta("3 days")), + ] + ) + check(assert_type(ia_td.left, pd.TimedeltaIndex), pd.TimedeltaIndex) + check(assert_type(ia_td.right, pd.TimedeltaIndex), pd.TimedeltaIndex) + check(assert_type(ia_td.mid, pd.TimedeltaIndex), pd.TimedeltaIndex) + check(assert_type(ia_td.length, pd.TimedeltaIndex), pd.TimedeltaIndex) def test_timedelta_array() -> None: From 224b79889906703e3f349bdc9033444eb65873fe Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 17 Oct 2022 07:32:02 +0100 Subject: [PATCH 5/9] ENH: Small improvements in IntervalArray --- pandas-stubs/_libs/interval.pyi | 4 ++-- pandas-stubs/core/arrays/interval.pyi | 14 +++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/pandas-stubs/_libs/interval.pyi b/pandas-stubs/_libs/interval.pyi index df4e56304..89b7242fc 100644 --- a/pandas-stubs/_libs/interval.pyi +++ b/pandas-stubs/_libs/interval.pyi @@ -53,8 +53,6 @@ class IntervalMixin: def open_left(self) -> bool: ... @property def open_right(self) -> bool: ... - @property - def is_empty(self) -> bool: ... class Interval(IntervalMixin, Generic[_OrderableT]): @property @@ -164,3 +162,5 @@ class IntervalTree(IntervalMixin): @property def is_monotonic_increasing(self) -> bool: ... def clear_mapping(self) -> None: ... + @property + def is_empty(self) -> bool: ... diff --git a/pandas-stubs/core/arrays/interval.pyi b/pandas-stubs/core/arrays/interval.pyi index c2036b19d..2e02a0bb9 100644 --- a/pandas-stubs/core/arrays/interval.pyi +++ b/pandas-stubs/core/arrays/interval.pyi @@ -1,4 +1,5 @@ import numpy as np +import pandas as pd from pandas import Index from pandas.core.arrays.base import ExtensionArray as ExtensionArray @@ -6,7 +7,10 @@ from pandas._libs.interval import ( Interval as Interval, IntervalMixin as IntervalMixin, ) -from pandas._typing import Axis +from pandas._typing import ( + Axis, + npt, +) from pandas.core.dtypes.generic import ABCExtensionArray @@ -56,7 +60,7 @@ class IntervalArray(IntervalMixin, ExtensionArray): @property def right(self) -> Index: ... @property - def closed(self) -> bool: ... + def closed(self) -> str: ... def set_closed(self, closed): ... @property def length(self) -> Index: ... @@ -68,5 +72,9 @@ class IntervalArray(IntervalMixin, ExtensionArray): def __arrow_array__(self, type=...): ... def to_tuples(self, na_tuple: bool = ...): ... def repeat(self, repeats, axis: Axis | None = ...): ... - def contains(self, other): ... + def contains( + self, other: float | pd.Timestamp | pd.Timedelta + ) -> npt.NDArray[np.bool_]: ... def overlaps(self, other: Interval) -> bool: ... + @property + def is_empty(self) -> npt.NDArray[np.bool_]: ... From ff026509dcd6d5509b9cbea7dc131e05db62f50b Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 19 Oct 2022 16:34:52 +0100 Subject: [PATCH 6/9] ENH: Improve boolean array --- pandas-stubs/core/arrays/boolean.pyi | 4 ++-- pandas-stubs/core/arrays/masked.pyi | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas-stubs/core/arrays/boolean.pyi b/pandas-stubs/core/arrays/boolean.pyi index a4122c2f4..29a5a7c97 100644 --- a/pandas-stubs/core/arrays/boolean.pyi +++ b/pandas-stubs/core/arrays/boolean.pyi @@ -55,9 +55,9 @@ class BooleanArray(BaseMaskedArray): @overload def __getitem__(self, item: _ArrayKey) -> BooleanArray: ... @property - def dtype(self): ... + def dtype(self) -> ExtensionDtype: ... def astype( - self, dtype: str | np.dtype, copy: bool = ... + self, dtype: npt.DTypeLike | ExtensionDtype, copy: bool = ... ) -> np.ndarray | ExtensionArray: ... def any(self, skipna: bool = ..., **kwargs) -> bool: ... def all(self, skipna: bool = ..., **kwargs) -> bool: ... diff --git a/pandas-stubs/core/arrays/masked.pyi b/pandas-stubs/core/arrays/masked.pyi index a1212531f..0b41f5ade 100644 --- a/pandas-stubs/core/arrays/masked.pyi +++ b/pandas-stubs/core/arrays/masked.pyi @@ -1,4 +1,5 @@ import numpy as np +from pandas import Series from pandas.core.arrays import ( ExtensionArray as ExtensionArray, ExtensionOpsMixin, @@ -23,9 +24,9 @@ class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin): __array_priority__: int = ... def __array__(self, dtype=...) -> np.ndarray: ... def __arrow_array__(self, type=...): ... - def isna(self): ... + def isna(self) -> npt.NDArray[np.bool_]: ... @property def nbytes(self) -> int: ... def take(self, indexer, allow_fill: bool = ..., fill_value=...): ... - def copy(self): ... - def value_counts(self, dropna: bool = ...): ... + def copy(self) -> BaseMaskedArray: ... + def value_counts(self, dropna: bool = ...) -> Series[int]: ... From 67248e0fbf3d348a01143ad5d8226e55f63c5167 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 19 Oct 2022 18:49:58 +0100 Subject: [PATCH 7/9] ENH: More array improvements --- pandas-stubs/core/arrays/boolean.pyi | 5 ++- pandas-stubs/core/arrays/string_.pyi | 50 ++++++++++++++++++--- pandas-stubs/core/arrays/timedeltas.pyi | 4 -- tests/test_arrays.py | 58 +++++++++++++++++++++++++ 4 files changed, 107 insertions(+), 10 deletions(-) diff --git a/pandas-stubs/core/arrays/boolean.pyi b/pandas-stubs/core/arrays/boolean.pyi index 29a5a7c97..1f7c7016f 100644 --- a/pandas-stubs/core/arrays/boolean.pyi +++ b/pandas-stubs/core/arrays/boolean.pyi @@ -48,7 +48,10 @@ class BooleanArray(BaseMaskedArray): def __setitem__( self, key: _ArrayKey, - value: _ScalarType | Sequence[bool | NAType | None] | npt.NDArray[np.bool_], + value: _ScalarType + | Sequence[bool | NAType | None] + | npt.NDArray[np.bool_] + | BooleanArray, ) -> None: ... @overload # type: ignore[override] def __getitem__(self, item: int) -> bool | NAType: ... diff --git a/pandas-stubs/core/arrays/string_.pyi b/pandas-stubs/core/arrays/string_.pyi index aace44d2d..ba9cc3e09 100644 --- a/pandas-stubs/core/arrays/string_.pyi +++ b/pandas-stubs/core/arrays/string_.pyi @@ -1,7 +1,18 @@ +from typing import ( + Sequence, + Union, + overload, +) + import numpy as np import pandas as pd -from pandas.core.arrays import PandasArray +from pandas.core.arrays import ( + ExtensionArray, + PandasArray, +) +from typing_extensions import TypeAlias +from pandas._libs.missing import NAType from pandas._typing import ( npt, type_t, @@ -18,6 +29,9 @@ class StringDtype(ExtensionDtype): def construct_array_type(cls) -> type_t[StringArray]: ... def __from_arrow__(self, array): ... +_ScalarType: TypeAlias = Union[str, NAType, None] +_ArrayKey: TypeAlias = Union[Sequence[int], npt.NDArray[np.integer], slice] + class StringArray(PandasArray): def __init__( self, @@ -26,8 +40,34 @@ class StringArray(PandasArray): | npt.NDArray[np.object_], copy: bool = ..., ) -> None: ... + @property + def na_value(self) -> NAType: ... def __arrow_array__(self, type=...): ... - def __setitem__(self, key, value) -> None: ... - def fillna(self, value=..., method=..., limit=...): ... - def astype(self, dtype, copy: bool = ...): ... - def value_counts(self, dropna: bool = ...): ... + @overload # type: ignore[override] + def __setitem__(self, key: int, value: _ScalarType) -> None: ... + @overload + def __setitem__( + self, + key: _ArrayKey, + value: _ScalarType + | Sequence[str | NAType | None] + | npt.NDArray[np.str_] + | npt.NDArray[np.string_] + | StringArray, + ) -> None: ... + @overload # type: ignore[override] + def __getitem__(self, item: int) -> str | NAType: ... + @overload + def __getitem__(self, item: _ArrayKey) -> StringArray: ... + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: ... + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: ... + def min( + self, axis: int | None = ..., skipna: bool = ..., **kwargs + ) -> str | NAType: ... + def max( + self, axis: int | None = ..., skipna: bool = ..., **kwargs + ) -> str | NAType: ... + def value_counts(self, dropna: bool = ...) -> pd.Series[int]: ... + def memory_usage(self, deep: bool = ...) -> int: ... diff --git a/pandas-stubs/core/arrays/timedeltas.pyi b/pandas-stubs/core/arrays/timedeltas.pyi index 699f8b0f7..c1b5c1a41 100644 --- a/pandas-stubs/core/arrays/timedeltas.pyi +++ b/pandas-stubs/core/arrays/timedeltas.pyi @@ -63,7 +63,3 @@ class TimedeltaArray(DatetimeLikeArrayMixin, TimelikeOps): nanoseconds: int = ... @property def components(self) -> int: ... - -def sequence_to_td64ns(data, copy: bool = ..., unit: str = ..., errors: str = ...): ... -def ints_to_td64ns(data, unit: str = ...): ... -def objects_to_td64ns(data, unit: str = ..., errors: str = ...): ... diff --git a/tests/test_arrays.py b/tests/test_arrays.py index 943f458db..4e4240b9d 100644 --- a/tests/test_arrays.py +++ b/tests/test_arrays.py @@ -499,6 +499,64 @@ def test_timedelta_array() -> None: check(assert_type(pd.array(tdi), Type[ExtensionArray]), TimedeltaArray) check(assert_type(pd.array(tds, copy=False), Type[ExtensionArray]), TimedeltaArray) + # check(assert_type(tda.microseconds,npt.NDArray[np.int_]), np.ndarray) + # check(assert_type(tda.nanoseconds,npt.NDArray[np.int_]), np.ndarray) + # check(assert_type(tda.days,npt.NDArray[np.int_]), np.ndarray) + # check(assert_type(tda.seconds,npt.NDArray[np.int_]), np.ndarray) + # + # all + # any + # check(assert_type(tda.argmax(), int), int) + # check(assert_type(tda.argmin(), int), int) + # check(assert_type(argsort, npt.NDArray[npt.intp]), np.ndarray) + # check(assert_type(tda.asi8, npt.NDArray[npt.intp]), np.ndarray) + # + # astype + # ceil + # components + # copy + # delete + # dropna + # dtype + # equals + # factorize + # fillna + # floor + # freq + # freqstr + # inferred_freq + # insert + # isin + # isna + # map + # max + # mean + # median + # min + # nbytes + # ndim + # ravel + # repeat + # reshape + # resolution + # round + # searchsorted + # shape + # shift + # size + # std + # sum + # swapaxes + # take + # check(assert_type(tda.to_numpy(), npt.NDArray[np.timedelta64]), np.ndarray) + # check(assert_type(tda.to_pytimedelta(), npt.NDArray[np.object_]), np.ndarray) + # tsa.tolist() + # check(assert_type(tda.total_seconds(), npt.NDArray[np.float64]),np.ndarray) + # transpose + # unique + # value_counts + # view + def test_sparse_array() -> None: ints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] From 9cb0923ab419e2a113e0c3ce2fa033202f50627e Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 25 Nov 2022 19:17:14 +0000 Subject: [PATCH 8/9] Improve interval --- pandas-stubs/_libs/interval.pyi | 2 + pandas-stubs/core/arrays/interval.pyi | 60 +++++++++--- tests/test_arrays.py | 134 +++++++++++++++++--------- 3 files changed, 137 insertions(+), 59 deletions(-) diff --git a/pandas-stubs/_libs/interval.pyi b/pandas-stubs/_libs/interval.pyi index afa1b0b27..303a8bd32 100644 --- a/pandas-stubs/_libs/interval.pyi +++ b/pandas-stubs/_libs/interval.pyi @@ -193,6 +193,8 @@ class Interval(IntervalMixin, Generic[_OrderableT]): def __ne__(self, other: Series[_OrderableT]) -> Series[bool]: ... # type: ignore[misc] @overload def __ne__(self, other: object) -> Literal[True]: ... + @property + def is_empty(self) -> bool: ... class IntervalTree(IntervalMixin): def __init__( diff --git a/pandas-stubs/core/arrays/interval.pyi b/pandas-stubs/core/arrays/interval.pyi index 2e02a0bb9..00240e3b2 100644 --- a/pandas-stubs/core/arrays/interval.pyi +++ b/pandas-stubs/core/arrays/interval.pyi @@ -1,3 +1,11 @@ +import datetime as dt +from typing import ( + Generic, + Literal, + Sequence, + overload, +) + import numpy as np import pandas as pd from pandas import Index @@ -9,21 +17,51 @@ from pandas._libs.interval import ( ) from pandas._typing import ( Axis, + IntervalT, npt, ) from pandas.core.dtypes.generic import ABCExtensionArray -class IntervalArray(IntervalMixin, ExtensionArray): +class IntervalArray(IntervalMixin, ExtensionArray, Generic[IntervalT]): ndim: int = ... can_hold_na: bool = ... def __new__( - cls, data, closed=..., dtype=..., copy: bool = ..., verify_integrity: bool = ... - ): ... + cls, + data: Sequence[IntervalT] | IntervalArray[IntervalT], + closed: Literal["left", "right", "both", "neither"] = ..., + dtype: pd.IntervalDtype = ..., + copy: bool = ..., + verify_integrity: bool = ..., + ) -> IntervalArray[IntervalT]: ... + @overload @classmethod def from_breaks( - cls, breaks, closed: str = ..., copy: bool = ..., dtype=... - ) -> IntervalArray: ... + cls, breaks: Sequence[int], closed: str = ..., copy: bool = ..., dtype=... + ) -> IntervalArray[Interval[int]]: ... + @overload + @classmethod + def from_breaks( + cls, breaks: Sequence[float], closed: str = ..., copy: bool = ..., dtype=... + ) -> IntervalArray[Interval[float]]: ... + @overload + @classmethod + def from_breaks( + cls, + breaks: Sequence[pd.Timestamp | np.datetime64 | dt.datetime], + closed: str = ..., + copy: bool = ..., + dtype=..., + ) -> IntervalArray[Interval[pd.Timestamp]]: ... + @overload + @classmethod + def from_breaks( + cls, + breaks: Sequence[pd.Timedelta | np.timedelta64 | dt.timedelta], + closed: str = ..., + copy: bool = ..., + dtype=..., + ) -> IntervalArray[Interval[pd.Timedelta]]: ... @classmethod def from_arrays( cls, left, right, closed: str = ..., copy: bool = ..., dtype=... @@ -32,10 +70,10 @@ class IntervalArray(IntervalMixin, ExtensionArray): def from_tuples( cls, data, closed: str = ..., copy: bool = ..., dtype=... ) -> IntervalArray: ... - def __iter__(self): ... + def __iter__(self) -> IntervalT: ... def __len__(self) -> int: ... - def __getitem__(self, value): ... - def __setitem__(self, key, value) -> None: ... + def __getitem__(self, value: IntervalT): ... + def __setitem__(self, key: int, value: IntervalT) -> None: ... def __eq__(self, other): ... def __ne__(self, other): ... def fillna(self, value=..., method=..., limit=...): ... @@ -61,7 +99,7 @@ class IntervalArray(IntervalMixin, ExtensionArray): def right(self) -> Index: ... @property def closed(self) -> str: ... - def set_closed(self, closed): ... + def set_closed(self, closed) -> IntervalArray[IntervalT]: ... @property def length(self) -> Index: ... @property @@ -70,11 +108,11 @@ class IntervalArray(IntervalMixin, ExtensionArray): def is_non_overlapping_monotonic(self) -> bool: ... def __array__(self, dtype=...) -> np.ndarray: ... def __arrow_array__(self, type=...): ... - def to_tuples(self, na_tuple: bool = ...): ... + def to_tuples(self, na_tuple: bool = ...) -> npt.NDArray[np.object_]: ... def repeat(self, repeats, axis: Axis | None = ...): ... def contains( self, other: float | pd.Timestamp | pd.Timedelta ) -> npt.NDArray[np.bool_]: ... - def overlaps(self, other: Interval) -> bool: ... + def overlaps(self, other: Interval) -> npt.NDArray[np.bool_]: ... @property def is_empty(self) -> npt.NDArray[np.bool_]: ... diff --git a/tests/test_arrays.py b/tests/test_arrays.py index 4e4240b9d..42be03e68 100644 --- a/tests/test_arrays.py +++ b/tests/test_arrays.py @@ -65,8 +65,16 @@ def test_integer_array() -> None: ), BooleanArray, ) - check(assert_type(int_arr.astype(float), npt.NDArray[np.float64]), np.ndarray) - check(assert_type(int_arr.astype(np.float64), npt.NDArray[np.float64]), np.ndarray) + check( + assert_type(int_arr.astype(float), npt.NDArray[np.float64]), + np.ndarray, + np.float64, + ) + check( + assert_type(int_arr.astype(np.float64), npt.NDArray[np.float64]), + np.ndarray, + np.float64, + ) non_null_int_arr.astype(bool) non_null_int_arr.astype(np.bool_) non_null_int_arr.astype("bool") @@ -268,32 +276,34 @@ def test_datetime_array() -> None: def test_interval_array_construction() -> None: ia = IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)]) - check(assert_type(IntervalArray(ia), IntervalArray), IntervalArray) + check( + assert_type(IntervalArray(ia), "IntervalArray[pd.Interval[int]]"), IntervalArray + ) check( assert_type( IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)], closed="left"), - IntervalArray, + "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) check( assert_type( IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)], closed="right"), - IntervalArray, + "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) check( assert_type( IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)], closed="both"), - IntervalArray, + "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) check( assert_type( IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)], closed="neither"), - IntervalArray, + "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) @@ -304,38 +314,42 @@ def test_interval_array_construction() -> None: closed="neither", verify_integrity=True, ), - IntervalArray, + "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) check( - assert_type(IntervalArray.from_arrays([0, 1], [1, 2]), IntervalArray), + assert_type( + IntervalArray.from_arrays([0, 1], [1, 2]), "IntervalArray[pd.Interval[int]]" + ), IntervalArray, ) check( assert_type( - IntervalArray.from_arrays(np.array([0, 1]), np.array([1, 2])), IntervalArray + IntervalArray.from_arrays(np.array([0, 1]), np.array([1, 2])), + "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) check( assert_type( IntervalArray.from_arrays(pd.Series([0, 1]), pd.Series([1, 2])), - IntervalArray, + "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) check( assert_type( - IntervalArray.from_arrays(pd.Index([0, 1]), pd.Index([1, 2])), IntervalArray + IntervalArray.from_arrays(pd.Index([0, 1]), pd.Index([1, 2])), + "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) check( assert_type( IntervalArray.from_arrays([0, 1], [1, 2], closed="left", copy=False), - IntervalArray, + "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) @@ -344,7 +358,7 @@ def test_interval_array_construction() -> None: IntervalArray.from_arrays( [0, 1], [1, 2], closed="right", dtype=pd.IntervalDtype("int64") ), - IntervalArray, + "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) @@ -353,59 +367,71 @@ def test_interval_array_construction() -> None: IntervalArray.from_arrays( [0, 1], [1, 2], closed="right", dtype=pd.IntervalDtype(float) ), - IntervalArray, + "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) check( assert_type( - IntervalArray.from_arrays([0, 1], [1, 2], closed="both"), IntervalArray + IntervalArray.from_arrays([0, 1], [1, 2], closed="both"), + "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) check( assert_type( - IntervalArray.from_arrays([0, 1], [1, 2], closed="neither"), IntervalArray + IntervalArray.from_arrays([0, 1], [1, 2], closed="neither"), + "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) breaks = [0, 1, 2, 3, 4.5] - check(assert_type(IntervalArray.from_breaks(breaks), IntervalArray), IntervalArray) check( assert_type( - IntervalArray.from_breaks(np.array(breaks), copy=False), IntervalArray + IntervalArray.from_breaks(breaks), "IntervalArray[pd.Interval[float]]" + ), + IntervalArray, + ) + check( + assert_type( + IntervalArray.from_breaks(np.array(breaks), copy=False), + "IntervalArray[pd.Interval[float]]", ), IntervalArray, ) check( assert_type( - IntervalArray.from_breaks(pd.Series(breaks), closed="left"), IntervalArray + IntervalArray.from_breaks(pd.Series(breaks), closed="left"), + "IntervalArray[pd.Interval[float]]", ), IntervalArray, ) check( assert_type( - IntervalArray.from_breaks(pd.Index(breaks), closed="right"), IntervalArray + IntervalArray.from_breaks(pd.Index(breaks), closed="right"), + "IntervalArray[pd.Interval[float]]", ), IntervalArray, ) check( assert_type( - IntervalArray.from_breaks(pd.Index(breaks), closed="both"), IntervalArray + IntervalArray.from_breaks(pd.Index(breaks), closed="both"), + "IntervalArray[pd.Interval[float]]", ), IntervalArray, ) check( assert_type( - IntervalArray.from_breaks(pd.Index(breaks), closed="neither"), IntervalArray + IntervalArray.from_breaks(pd.Index(breaks), closed="neither"), + "IntervalArray[pd.Interval[float]]", ), IntervalArray, ) check( assert_type( IntervalArray.from_breaks(pd.Index(breaks), dtype=pd.IntervalDtype(float)), - IntervalArray, + "IntervalArray[pd.Interval[float]]", ), IntervalArray, ) @@ -413,31 +439,43 @@ def test_interval_array_construction() -> None: def test_integer_array_attrib_props() -> None: ia = IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)]) - - check(assert_type(ia.left, pd.Int64Index), pd.Int64Index) - check(assert_type(ia.right, pd.Int64Index), pd.Int64Index) + check(assert_type(ia.left, pd.Index), pd.Int64Index) + check(assert_type(ia.right, pd.Index), pd.Int64Index) check(assert_type(ia.closed, str), str) - check(assert_type(ia.mid, pd.Float64Index), pd.Float64Index) - check(assert_type(ia.length, pd.Int64Index), pd.Int64Index) - check(assert_type(ia.is_empty, npt.NDArray[np.bool_]), np.ndarray) + check(assert_type(ia.mid, pd.Index), pd.Float64Index) + check(assert_type(ia.length, pd.Index), pd.Int64Index) + check(assert_type(ia.is_empty, npt.NDArray[np.bool_]), np.ndarray, np.bool_) check(assert_type(ia.is_non_overlapping_monotonic, bool), bool) - check(assert_type(ia.contains(0.5), npt.NDArray[np.bool_]), np.ndarray) + check(assert_type(ia.contains(0.5), npt.NDArray[np.bool_]), np.ndarray, np.bool_) check( assert_type(ia.overlaps(pd.Interval(0.5, 1.0)), npt.NDArray[np.bool_]), np.ndarray, + np.bool_, + ) + check( + assert_type(ia.set_closed("right"), "IntervalArray[pd.Interval[int]]"), + IntervalArray, + ) + check( + assert_type(ia.set_closed("left"), "IntervalArray[pd.Interval[int]]"), + IntervalArray, + ) + check( + assert_type(ia.set_closed("both"), "IntervalArray[pd.Interval[int]]"), + IntervalArray, + ) + check( + assert_type(ia.set_closed("neither"), "IntervalArray[pd.Interval[int]]"), + IntervalArray, ) - check(assert_type(ia.set_closed("right"), IntervalArray), IntervalArray) - check(assert_type(ia.set_closed("left"), IntervalArray), IntervalArray) - check(assert_type(ia.set_closed("both"), IntervalArray), IntervalArray) - check(assert_type(ia.set_closed("neither"), IntervalArray), IntervalArray) - check(assert_type(ia.to_tuples(True), npt.NDArray[np.object_]), np.ndarray) - check(assert_type(ia.to_tuples(False), npt.NDArray[np.object_]), np.ndarray) + check(assert_type(ia.to_tuples(True), npt.NDArray[np.object_]), np.ndarray, tuple) + check(assert_type(ia.to_tuples(False), npt.NDArray[np.object_]), np.ndarray, tuple) ia_float = IntervalArray([pd.Interval(0, 1.5), pd.Interval(1, 2)]) - check(assert_type(ia_float.left, pd.Float64Index), pd.Float64Index) - check(assert_type(ia_float.right, pd.Float64Index), pd.Float64Index) - check(assert_type(ia_float.length, pd.Float64Index), pd.Float64Index) + check(assert_type(ia_float.left, pd.Index), pd.Float64Index) + check(assert_type(ia_float.right, pd.Index), pd.Float64Index) + check(assert_type(ia_float.length, pd.Index), pd.Float64Index) ia_ts = IntervalArray( [ @@ -445,10 +483,10 @@ def test_integer_array_attrib_props() -> None: pd.Interval(pd.Timestamp("2018-01-02"), pd.Timestamp("2018-01-03")), ] ) - check(assert_type(ia_ts.left, pd.DatetimeIndex), pd.DatetimeIndex) - check(assert_type(ia_ts.right, pd.DatetimeIndex), pd.DatetimeIndex) - check(assert_type(ia_ts.mid, pd.DatetimeIndex), pd.DatetimeIndex) - check(assert_type(ia_ts.length, pd.TimedeltaIndex), pd.TimedeltaIndex) + check(assert_type(ia_ts.left, pd.Index), pd.DatetimeIndex) + check(assert_type(ia_ts.right, pd.Index), pd.DatetimeIndex) + check(assert_type(ia_ts.mid, pd.Index), pd.DatetimeIndex) + check(assert_type(ia_ts.length, pd.Index), pd.TimedeltaIndex) ia_td = IntervalArray( [ @@ -456,10 +494,10 @@ def test_integer_array_attrib_props() -> None: pd.Interval(pd.Timedelta("2 days"), pd.Timedelta("3 days")), ] ) - check(assert_type(ia_td.left, pd.TimedeltaIndex), pd.TimedeltaIndex) - check(assert_type(ia_td.right, pd.TimedeltaIndex), pd.TimedeltaIndex) - check(assert_type(ia_td.mid, pd.TimedeltaIndex), pd.TimedeltaIndex) - check(assert_type(ia_td.length, pd.TimedeltaIndex), pd.TimedeltaIndex) + check(assert_type(ia_td.left, pd.Index), pd.TimedeltaIndex) + check(assert_type(ia_td.right, pd.Index), pd.TimedeltaIndex) + check(assert_type(ia_td.mid, pd.Index), pd.TimedeltaIndex) + check(assert_type(ia_td.length, pd.Index), pd.TimedeltaIndex) def test_timedelta_array() -> None: From 4c693f771b4dbfe2b46cea632a2e7178bfdf231e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Wed, 16 Aug 2023 21:37:52 -0400 Subject: [PATCH 9/9] make CI pass --- pandas-stubs/core/arrays/base.pyi | 4 +- pandas-stubs/core/arrays/boolean.pyi | 7 +-- pandas-stubs/core/arrays/integer.pyi | 5 +- pandas-stubs/core/arrays/interval.pyi | 78 +++++++++++++++++++++----- pandas-stubs/core/arrays/period.pyi | 1 - pandas-stubs/core/arrays/string_.pyi | 2 +- pandas-stubs/core/series.pyi | 9 ++- pyproject.toml | 2 +- tests/extension/decimal/array.py | 12 +++- tests/test_arrays.py | 81 +++++++++++++-------------- 10 files changed, 129 insertions(+), 72 deletions(-) diff --git a/pandas-stubs/core/arrays/base.pyi b/pandas-stubs/core/arrays/base.pyi index 7141f70ff..759919385 100644 --- a/pandas-stubs/core/arrays/base.pyi +++ b/pandas-stubs/core/arrays/base.pyi @@ -10,11 +10,11 @@ from pandas._typing import ( npt, ) -from pandas.core.dtypes.dtypes import ExtensionDtype as ExtensionDtype +from pandas.core.dtypes.dtypes import ExtensionDtype class ExtensionArray: def __getitem__(self, item) -> Any: ... - def __setitem__(self, key: int | slice | np.ndarray, value) -> None: ... + def __setitem__(self, key: int | slice | np.ndarray, value: Self) -> None: ... def __len__(self) -> int: ... def __iter__(self): ... def __contains__(self, item: object) -> bool | np.bool_: ... diff --git a/pandas-stubs/core/arrays/boolean.pyi b/pandas-stubs/core/arrays/boolean.pyi index 5b1daa8f2..2a29cfb1c 100644 --- a/pandas-stubs/core/arrays/boolean.pyi +++ b/pandas-stubs/core/arrays/boolean.pyi @@ -6,7 +6,7 @@ from typing import ( import numpy as np from pandas.core.arrays import ExtensionArray -from pandas.core.arrays.masked import BaseMaskedArray as BaseMaskedArray +from pandas.core.arrays.masked import BaseMaskedArray from typing_extensions import TypeAlias from pandas._libs.missing import NAType @@ -15,7 +15,7 @@ from pandas._typing import ( type_t, ) -from pandas.core.dtypes.base import ExtensionDtype as ExtensionDtype +from pandas.core.dtypes.base import ExtensionDtype class BooleanDtype(ExtensionDtype): na_value: ClassVar[NAType] @@ -44,7 +44,7 @@ class BooleanArray(BaseMaskedArray): | npt.NDArray[np.bool_] | BooleanArray, ) -> None: ... - @overload # type: ignore[override] + @overload def __getitem__(self, item: int) -> bool | NAType: ... @overload def __getitem__(self, item: _ArrayKey) -> BooleanArray: ... @@ -55,4 +55,3 @@ class BooleanArray(BaseMaskedArray): ) -> np.ndarray | ExtensionArray: ... def any(self, skipna: bool = ..., **kwargs) -> bool: ... def all(self, skipna: bool = ..., **kwargs) -> bool: ... - def __setitem__(self, key, value) -> None: ... diff --git a/pandas-stubs/core/arrays/integer.pyi b/pandas-stubs/core/arrays/integer.pyi index b4a9da2b1..d7c829ecf 100644 --- a/pandas-stubs/core/arrays/integer.pyi +++ b/pandas-stubs/core/arrays/integer.pyi @@ -12,7 +12,6 @@ from pandas.arrays import ( StringArray, ) from pandas.core.arrays.masked import BaseMaskedArray -from typing_extensions import Self from pandas._libs.missing import NAType from pandas._typing import npt @@ -26,7 +25,7 @@ class _IntegerDtype(ExtensionDtype): @property def itemsize(self) -> int: ... @classmethod - def construct_array_type(cls) -> type[Self]: ... + def construct_array_type(cls) -> type[IntegerArray]: ... def __from_arrow__(self, array): ... class IntegerArray(BaseMaskedArray): @@ -45,7 +44,7 @@ class IntegerArray(BaseMaskedArray): key: Sequence[int] | slice | npt.NDArray[np.integer], value: float | NAType | Sequence[float | NAType] | npt.NDArray[np.integer], ) -> None: ... - @overload # type: ignore[override] + @overload def __getitem__(self, item: int) -> int | NAType: ... @overload def __getitem__( diff --git a/pandas-stubs/core/arrays/interval.pyi b/pandas-stubs/core/arrays/interval.pyi index 889c5a5d8..8de395e65 100644 --- a/pandas-stubs/core/arrays/interval.pyi +++ b/pandas-stubs/core/arrays/interval.pyi @@ -12,12 +12,13 @@ from pandas import ( Index, Series, ) -from pandas.core.arrays.base import ExtensionArray as ExtensionArray +from pandas.core.arrays.base import ExtensionArray +from pandas.core.base import IndexOpsMixin from typing_extensions import Self from pandas._libs.interval import ( - Interval as Interval, - IntervalMixin as IntervalMixin, + Interval, + IntervalMixin, ) from pandas._typing import ( Axis, @@ -41,43 +42,90 @@ class IntervalArray(IntervalMixin, ExtensionArray, Generic[IntervalT]): @overload @classmethod def from_breaks( - cls, breaks: Sequence[int], closed: str = ..., copy: bool = ..., dtype=... + cls, + breaks: Sequence[int] | npt.NDArray[np.integer] | IndexOpsMixin[int], + closed: Literal["left", "right", "both", "neither"] = ..., + copy: bool = ..., + dtype: pd.IntervalDtype = ..., ) -> IntervalArray[Interval[int]]: ... @overload @classmethod def from_breaks( - cls, breaks: Sequence[float], closed: str = ..., copy: bool = ..., dtype=... + cls, + breaks: Sequence[float] | npt.NDArray[np.floating] | IndexOpsMixin[float], + closed: Literal["left", "right", "both", "neither"] = ..., + copy: bool = ..., + dtype: pd.IntervalDtype = ..., ) -> IntervalArray[Interval[float]]: ... @overload @classmethod def from_breaks( cls, - breaks: Sequence[pd.Timestamp | np.datetime64 | dt.datetime], - closed: str = ..., + breaks: Sequence[np.datetime64 | dt.datetime] | IndexOpsMixin[pd.Timestamp], + closed: Literal["left", "right", "both", "neither"] = ..., copy: bool = ..., - dtype=..., + dtype: pd.IntervalDtype = ..., ) -> IntervalArray[Interval[pd.Timestamp]]: ... @overload @classmethod def from_breaks( cls, - breaks: Sequence[pd.Timedelta | np.timedelta64 | dt.timedelta], - closed: str = ..., + breaks: Sequence[np.timedelta64 | dt.timedelta] | IndexOpsMixin[pd.Timedelta], + closed: Literal["left", "right", "both", "neither"] = ..., copy: bool = ..., - dtype=..., + dtype: pd.IntervalDtype = ..., ) -> IntervalArray[Interval[pd.Timedelta]]: ... + @overload @classmethod def from_arrays( - cls, left, right, closed: str = ..., copy: bool = ..., dtype=... - ) -> IntervalArray: ... + cls, + left: Sequence[int] | npt.NDArray[np.integer] | IndexOpsMixin[int], + right: Sequence[int] | npt.NDArray[np.integer] | IndexOpsMixin[int], + closed: Literal["left", "right", "both", "neither"] = ..., + copy: bool = ..., + dtype: pd.IntervalDtype = ..., + ) -> IntervalArray[Interval[int]]: ... + @overload + @classmethod + def from_arrays( + cls, + left: Sequence[float] | npt.NDArray[np.floating] | IndexOpsMixin[float], + right: Sequence[float] | npt.NDArray[np.floating] | IndexOpsMixin[float], + closed: Literal["left", "right", "both", "neither"] = ..., + copy: bool = ..., + dtype: pd.IntervalDtype = ..., + ) -> IntervalArray[Interval[float]]: ... + @overload + @classmethod + def from_arrays( + cls, + left: Sequence[np.datetime64 | dt.datetime] | pd.DatetimeIndex, + right: Sequence[np.datetime64 | dt.datetime] | pd.DatetimeIndex, + closed: Literal["left", "right", "both", "neither"] = ..., + copy: bool = ..., + dtype: pd.IntervalDtype = ..., + ) -> IntervalArray[Interval[pd.Timestamp]]: ... + @overload + @classmethod + def from_arrays( + cls, + left: Sequence[np.timedelta64 | dt.timedelta] | pd.TimedeltaIndex, + right: Sequence[np.timedelta64 | dt.timedelta] | pd.TimedeltaIndex, + closed: Literal["left", "right", "both", "neither"] = ..., + copy: bool = ..., + dtype: pd.IntervalDtype = ..., + ) -> IntervalArray[Interval[pd.Timedelta]]: ... @classmethod def from_tuples( - cls, data, closed: str = ..., copy: bool = ..., dtype=... + cls, + data, + closed: Literal["left", "right", "both", "neither"] = ..., + copy: bool = ..., + dtype: pd.IntervalDtype = ..., ) -> IntervalArray: ... def __iter__(self) -> IntervalT: ... def __len__(self) -> int: ... def __getitem__(self, value: IntervalT): ... - def __setitem__(self, key: int, value: IntervalT) -> None: ... def __eq__(self, other): ... def __ne__(self, other): ... def fillna(self, value=..., method=..., limit=...): ... diff --git a/pandas-stubs/core/arrays/period.pyi b/pandas-stubs/core/arrays/period.pyi index 8bcb8efb5..5a3d0ed5b 100644 --- a/pandas-stubs/core/arrays/period.pyi +++ b/pandas-stubs/core/arrays/period.pyi @@ -8,7 +8,6 @@ from pandas.core.arrays.datetimelike import ( from pandas._libs.tslibs import Period from pandas._libs.tslibs.timestamps import Timestamp -from pandas._typing import npt as npt from pandas.tseries.offsets import Tick diff --git a/pandas-stubs/core/arrays/string_.pyi b/pandas-stubs/core/arrays/string_.pyi index 4951499ca..1063a6243 100644 --- a/pandas-stubs/core/arrays/string_.pyi +++ b/pandas-stubs/core/arrays/string_.pyi @@ -48,7 +48,7 @@ class StringArray(PandasArray): | npt.NDArray[np.string_] | StringArray, ) -> None: ... - @overload # type: ignore[override] + @overload def __getitem__(self, item: int) -> str | NAType: ... @overload def __getitem__(self, item: _ArrayKey) -> StringArray: ... diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index c1055c48e..43d4ebe98 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -221,6 +221,7 @@ class Series(IndexOpsMixin[S1], NDFrame): cls, data: DatetimeIndex | Sequence[Timestamp | np.datetime64 | datetime], index: Axes | None = ..., + *, dtype: TimestampDtypeArg = ..., name: Hashable | None = ..., copy: bool = ..., @@ -240,6 +241,7 @@ class Series(IndexOpsMixin[S1], NDFrame): cls, data: PeriodIndex, index: Axes | None = ..., + *, dtype: PeriodDtype = ..., name: Hashable | None = ..., copy: bool = ..., @@ -249,6 +251,7 @@ class Series(IndexOpsMixin[S1], NDFrame): cls, data: TimedeltaIndex | Sequence[Timedelta | np.timedelta64 | timedelta], index: Axes | None = ..., + *, dtype: TimedeltaDtypeArg = ..., name: Hashable | None = ..., copy: bool = ..., @@ -260,6 +263,7 @@ class Series(IndexOpsMixin[S1], NDFrame): | Interval[_OrderableT] | Sequence[Interval[_OrderableT]], index: Axes | None = ..., + *, dtype: Literal["Interval"] = ..., name: Hashable | None = ..., copy: bool = ..., @@ -268,8 +272,9 @@ class Series(IndexOpsMixin[S1], NDFrame): def __new__( cls, data: object | _ListLike | Series[S1] | dict[int, S1] | dict[_str, S1] | None, - dtype: type[S1], index: Axes | None = ..., + *, + dtype: type[S1], name: Hashable | None = ..., copy: bool = ..., ) -> Self: ... @@ -278,6 +283,7 @@ class Series(IndexOpsMixin[S1], NDFrame): cls, data: Series[S1] | dict[int, S1] | dict[_str, S1] = ..., index: Axes | None = ..., + *, dtype: Dtype = ..., name: Hashable | None = ..., copy: bool = ..., @@ -287,6 +293,7 @@ class Series(IndexOpsMixin[S1], NDFrame): cls, data: object | _ListLike | None = ..., index: Axes | None = ..., + *, dtype: Dtype = ..., name: Hashable | None = ..., copy: bool = ..., diff --git a/pyproject.toml b/pyproject.toml index a1e7fd27a..a0cb7d541 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ numpy = [ ] [tool.poetry.dev-dependencies] -mypy = "1.5.0" +mypy = "1.5.1" pandas = "2.0.3" pyarrow = ">=10.0.1" pytest = ">=7.1.2" diff --git a/tests/extension/decimal/array.py b/tests/extension/decimal/array.py index f29e73e58..b19aa2b46 100644 --- a/tests/extension/decimal/array.py +++ b/tests/extension/decimal/array.py @@ -3,6 +3,10 @@ import decimal import numbers import sys +from typing import ( + Sequence, + cast, +) import numpy as np import pandas as pd @@ -200,13 +204,15 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy=copy) - def __setitem__(self, key, value) -> None: + def __setitem__( + self, key, value: DecimalArray | decimal.Decimal | Sequence[decimal.Decimal] + ) -> None: if is_list_like(value): if is_scalar(key): raise ValueError("setting an array element with a sequence.") - value = [decimal.Decimal(v) for v in value] + value = [decimal.Decimal(v) for v in cast(Sequence[decimal.Decimal], value)] else: - value = decimal.Decimal(value) + value = decimal.Decimal(cast(decimal.Decimal, value)) key = check_array_indexer(self, key) self._data[key] = value diff --git a/tests/test_arrays.py b/tests/test_arrays.py index 42be03e68..af4ece7bf 100644 --- a/tests/test_arrays.py +++ b/tests/test_arrays.py @@ -1,5 +1,3 @@ -from typing import Type - import numpy as np import numpy.typing as npt import pandas as pd @@ -43,18 +41,18 @@ def test_integer_array() -> None: nulled_ints = [1, 2, 3, 4, 5, 6, 7, 8, None, 10] check( - assert_type(pd.array(nulled_ints, dtype="UInt8"), Type[ExtensionArray]), + assert_type(pd.array(nulled_ints, dtype="UInt8"), ExtensionArray), IntegerArray, ) check( - assert_type(pd.array(nulled_ints, dtype=pd.UInt8Dtype()), Type[ExtensionArray]), + assert_type(pd.array(nulled_ints, dtype=pd.UInt8Dtype()), ExtensionArray), IntegerArray, ) check( - assert_type(pd.array(nulled_ints, dtype=float), Type[ExtensionArray]), + assert_type(pd.array(nulled_ints, dtype=float), ExtensionArray), PandasArray, ) - check(assert_type(pd.array(ints, dtype=int), Type[ExtensionArray]), PandasArray) + check(assert_type(pd.array(ints, dtype=int), ExtensionArray), PandasArray) check(assert_type(int_arr.astype("Int64"), IntegerArray), IntegerArray) check(assert_type(int_arr.astype("UInt8"), IntegerArray), IntegerArray) @@ -167,15 +165,15 @@ def test_string_array() -> None: strings_list = strings.tolist() check( - assert_type(pd.array(strings_list, dtype="string"), Type[ExtensionArray]), + assert_type(pd.array(strings_list, dtype="string"), ExtensionArray), StringArray, ) check( - assert_type(pd.array(strings, dtype="string"), Type[ExtensionArray]), + assert_type(pd.array(strings, dtype="string"), ExtensionArray), StringArray, ) - check(assert_type(pd.array(strings, dtype=str), Type[ExtensionArray]), PandasArray) - check(assert_type(pd.array(strings), Type[ExtensionArray]), StringArray) + check(assert_type(pd.array(strings, dtype=str), ExtensionArray), PandasArray) + check(assert_type(pd.array(strings), ExtensionArray), StringArray) def test_boolean_array() -> None: @@ -187,19 +185,17 @@ def test_boolean_array() -> None: ) nulled_bools = [True, False, True, False, True, False, True, False, None, False] - check(assert_type(pd.array(nulled_bools), Type[ExtensionArray]), BooleanArray) + check(assert_type(pd.array(nulled_bools), ExtensionArray), BooleanArray) check( - assert_type(pd.array(nulled_bools, dtype="bool"), Type[ExtensionArray]), + assert_type(pd.array(nulled_bools, dtype="bool"), ExtensionArray), PandasArray, ) check( - assert_type(pd.array(nulled_bools, dtype=bool), Type[ExtensionArray]), + assert_type(pd.array(nulled_bools, dtype=bool), ExtensionArray), PandasArray, ) check( - assert_type( - pd.array(nulled_bools, dtype=pd.BooleanDtype()), Type[ExtensionArray] - ), + assert_type(pd.array(nulled_bools, dtype=pd.BooleanDtype()), ExtensionArray), BooleanArray, ) @@ -231,9 +227,9 @@ def test_period_array() -> None: PeriodArray, ) - check(assert_type(pd.array([p1, p2]), Type[ExtensionArray]), PeriodArray) + check(assert_type(pd.array([p1, p2]), ExtensionArray), PeriodArray) check( - assert_type(pd.array([p1, p2], dtype="period[D]"), Type[ExtensionArray]), + assert_type(pd.array([p1, p2], dtype="period[D]"), ExtensionArray), PeriodArray, ) @@ -270,8 +266,8 @@ def test_datetime_array() -> None: DatetimeArray, ) - check(assert_type(pd.array(data), Type[ExtensionArray]), DatetimeArray) - check(assert_type(pd.array(np_values), Type[ExtensionArray]), DatetimeArray) + check(assert_type(pd.array(data), ExtensionArray), DatetimeArray) + check(assert_type(pd.array(np_values), ExtensionArray), DatetimeArray) def test_interval_array_construction() -> None: @@ -325,16 +321,20 @@ def test_interval_array_construction() -> None: ), IntervalArray, ) + left: npt.NDArray[np.integer] = np.array([0, 1]) + right: npt.NDArray[np.integer] = np.array([1, 2]) check( assert_type( - IntervalArray.from_arrays(np.array([0, 1]), np.array([1, 2])), + IntervalArray.from_arrays(left, right), "IntervalArray[pd.Interval[int]]", ), IntervalArray, ) check( assert_type( - IntervalArray.from_arrays(pd.Series([0, 1]), pd.Series([1, 2])), + IntervalArray.from_arrays( + pd.Series([0, 1], dtype=int), pd.Series([1, 2], dtype=int) + ), "IntervalArray[pd.Interval[int]]", ), IntervalArray, @@ -386,23 +386,25 @@ def test_interval_array_construction() -> None: IntervalArray, ) - breaks = [0, 1, 2, 3, 4.5] + breaks = [0.0, 1.0, 2.0, 3.0, 4.5] check( assert_type( IntervalArray.from_breaks(breaks), "IntervalArray[pd.Interval[float]]" ), IntervalArray, ) + breaks_np: npt.NDArray[np.floating] = np.array(breaks) check( assert_type( - IntervalArray.from_breaks(np.array(breaks), copy=False), + IntervalArray.from_breaks(breaks_np, copy=False), "IntervalArray[pd.Interval[float]]", ), IntervalArray, ) + test: pd.Series[float] = pd.Series(breaks, dtype=float) check( assert_type( - IntervalArray.from_breaks(pd.Series(breaks), closed="left"), + IntervalArray.from_breaks(test, closed="left"), "IntervalArray[pd.Interval[float]]", ), IntervalArray, @@ -439,11 +441,11 @@ def test_interval_array_construction() -> None: def test_integer_array_attrib_props() -> None: ia = IntervalArray([pd.Interval(0, 1), pd.Interval(1, 2)]) - check(assert_type(ia.left, pd.Index), pd.Int64Index) - check(assert_type(ia.right, pd.Index), pd.Int64Index) + check(assert_type(ia.left, pd.Index), pd.Index, np.intp) + check(assert_type(ia.right, pd.Index), pd.Index, np.intp) check(assert_type(ia.closed, str), str) - check(assert_type(ia.mid, pd.Index), pd.Float64Index) - check(assert_type(ia.length, pd.Index), pd.Int64Index) + check(assert_type(ia.mid, pd.Index), pd.Index, float) + check(assert_type(ia.length, pd.Index), pd.Index, np.intp) check(assert_type(ia.is_empty, npt.NDArray[np.bool_]), np.ndarray, np.bool_) check(assert_type(ia.is_non_overlapping_monotonic, bool), bool) @@ -472,10 +474,10 @@ def test_integer_array_attrib_props() -> None: check(assert_type(ia.to_tuples(True), npt.NDArray[np.object_]), np.ndarray, tuple) check(assert_type(ia.to_tuples(False), npt.NDArray[np.object_]), np.ndarray, tuple) - ia_float = IntervalArray([pd.Interval(0, 1.5), pd.Interval(1, 2)]) - check(assert_type(ia_float.left, pd.Index), pd.Float64Index) - check(assert_type(ia_float.right, pd.Index), pd.Float64Index) - check(assert_type(ia_float.length, pd.Index), pd.Float64Index) + ia_float = IntervalArray([pd.Interval(0.0, 1.5), pd.Interval(1.0, 2.0)]) + check(assert_type(ia_float.left, pd.Index), pd.Index, float) + check(assert_type(ia_float.right, pd.Index), pd.Index, float) + check(assert_type(ia_float.length, pd.Index), pd.Index, float) ia_ts = IntervalArray( [ @@ -530,12 +532,12 @@ def test_timedelta_array() -> None: check( assert_type( - pd.array(np.array([1, 2], dtype="timedelta64[ns]")), Type[ExtensionArray] + pd.array(np.array([1, 2], dtype="timedelta64[ns]")), ExtensionArray ), TimedeltaArray, ) - check(assert_type(pd.array(tdi), Type[ExtensionArray]), TimedeltaArray) - check(assert_type(pd.array(tds, copy=False), Type[ExtensionArray]), TimedeltaArray) + check(assert_type(pd.array(tdi), ExtensionArray), TimedeltaArray) + check(assert_type(pd.array(tds, copy=False), ExtensionArray), TimedeltaArray) # check(assert_type(tda.microseconds,npt.NDArray[np.int_]), np.ndarray) # check(assert_type(tda.nanoseconds,npt.NDArray[np.int_]), np.ndarray) @@ -627,7 +629,7 @@ def test_sparse_array() -> None: check( assert_type( SparseArray( - [True, False, False, False, False, False, False, True, False, False], + [True, False], fill_value=False, ), SparseArray, @@ -639,9 +641,6 @@ def test_sparse_array() -> None: SparseArray( [ pd.Timestamp("2011-01-01"), - pd.Timestamp("2011-01-02"), - pd.Timestamp("2011-01-03"), - pd.NaT, pd.NaT, ], fill_value=pd.NaT, @@ -652,7 +651,7 @@ def test_sparse_array() -> None: ) check( assert_type( - SparseArray([pd.Timedelta(days=1), pd.NaT, pd.NaT], fill_value=pd.NaT), + SparseArray([pd.Timedelta(days=1), pd.NaT], fill_value=pd.NaT), SparseArray, ), SparseArray,