diff --git a/src/fast_array_utils/stats/__init__.py b/src/fast_array_utils/stats/__init__.py index b49f6ec..8010444 100644 --- a/src/fast_array_utils/stats/__init__.py +++ b/src/fast_array_utils/stats/__init__.py @@ -223,7 +223,7 @@ def _generic_op( ) -> NDArray[Any] | np.number[Any] | types.CupyArray | types.DaskArray: from ._generic_ops import generic_op - assert dtype is None or op in get_args(DtypeOps), f"`dtype` is not supported for operation '{op}'" + assert dtype is None or op in get_args(DtypeOps), f"`dtype` is not supported for operation {op!r}" validate_axis(x.ndim, axis) return generic_op(x, op, axis=axis, keep_cupy_as_array=keep_cupy_as_array, dtype=dtype) diff --git a/src/fast_array_utils/stats/_generic_ops.py b/src/fast_array_utils/stats/_generic_ops.py index d342517..ef833ed 100644 --- a/src/fast_array_utils/stats/_generic_ops.py +++ b/src/fast_array_utils/stats/_generic_ops.py @@ -8,7 +8,7 @@ from .. import types from ._typing import DtypeOps -from ._utils import _dask_inner +from ._utils import _dask_inner, _dtype_kw if TYPE_CHECKING: @@ -29,8 +29,8 @@ def _run_numpy_op( axis: Literal[0, 1] | None = None, dtype: DTypeLike | None = None, ) -> NDArray[Any] | np.number[Any] | types.CupyArray | types.DaskArray: - kwargs = {"dtype": dtype} if op in get_args(DtypeOps) else {} - return getattr(np, op)(x, axis=axis, **kwargs) # type: ignore[no-any-return] + arr = cast("NDArray[Any] | np.number[Any] | types.CupyArray | types.CupyCOOMatrix | types.DaskArray", getattr(np, op)(x, axis=axis, **_dtype_kw(dtype, op))) + return arr.toarray() if isinstance(arr, types.CupyCOOMatrix) else arr @singledispatch @@ -83,14 +83,15 @@ def _generic_op_cs( # just convert to sparse array, then `return x.{op}(dtype=dtype)` # https://github.com/scipy/scipy/issues/23768 - kwargs = {"dtype": dtype} if op in get_args(DtypeOps) else {} if axis is None: - return cast("np.number[Any]", getattr(x.data, op)(**kwargs)) + return cast("np.number[Any]", getattr(x.data, op)(**_dtype_kw(dtype, op))) if TYPE_CHECKING: # scipy-stubs thinks e.g. "int64" is invalid, which isn’t true assert isinstance(dtype, np.dtype | type | None) # convert to array so dimensions collapse as expected - x = (sp.csr_array if x.format == "csr" else sp.csc_array)(x, **kwargs) # type: ignore[call-overload] - return cast("NDArray[Any] | np.number[Any]", getattr(x, op)(axis=axis)) + x = (sp.csr_array if x.format == "csr" else sp.csc_array)(x, **_dtype_kw(dtype, op)) # type: ignore[arg-type] + rv = cast("NDArray[Any] | types.coo_array | np.number[Any]", getattr(x, op)(axis=axis)) + # old scipy versions’ sparray.{max,min}() return a 1×n/n×1 sparray here, so we squeeze + return rv.toarray().squeeze() if isinstance(rv, types.coo_array) else rv @generic_op.register(types.DaskArray) diff --git a/src/fast_array_utils/stats/_typing.py b/src/fast_array_utils/stats/_typing.py index e8b0b65..2ddc4c0 100644 --- a/src/fast_array_utils/stats/_typing.py +++ b/src/fast_array_utils/stats/_typing.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MPL-2.0 from __future__ import annotations -from typing import TYPE_CHECKING, Literal, Protocol +from typing import TYPE_CHECKING, Generic, Literal, Protocol, TypedDict, TypeVar import numpy as np @@ -49,3 +49,10 @@ def __call__( NoDtypeOps = Literal["max", "min"] DtypeOps = Literal["sum"] Ops: TypeAlias = NoDtypeOps | DtypeOps + + +_DT = TypeVar("_DT", bound="DTypeLike") + + +class DTypeKw(TypedDict, Generic[_DT], total=False): + dtype: _DT diff --git a/src/fast_array_utils/stats/_utils.py b/src/fast_array_utils/stats/_utils.py index 7d1fcbb..1ca4065 100644 --- a/src/fast_array_utils/stats/_utils.py +++ b/src/fast_array_utils/stats/_utils.py @@ -2,12 +2,13 @@ from __future__ import annotations from functools import partial -from typing import TYPE_CHECKING, Literal, cast, get_args +from typing import TYPE_CHECKING, Literal, TypeVar, cast, get_args import numpy as np from numpy.exceptions import AxisError from .. import types +from ..typing import GpuArray from ._typing import DtypeOps @@ -16,8 +17,8 @@ from numpy.typing import DTypeLike, NDArray - from ..typing import CpuArray, GpuArray - from ._typing import Ops + from ..typing import CpuArray + from ._typing import DTypeKw, Ops ComplexAxis: TypeAlias = tuple[Literal[0], Literal[1]] | tuple[Literal[0, 1]] | Literal[0, 1] | None @@ -65,13 +66,17 @@ def _dask_block( axis: ComplexAxis = None, dtype: DTypeLike | None = None, keepdims: bool = False, + computing_meta: bool = False, ) -> NDArray[Any] | types.CupyArray: from . import max, min, sum + if computing_meta: # dask.blockwise doesn’t allow to pass `meta` in, and reductions below don’t handle a 0d matrix + return (types.CupyArray if isinstance(a, GpuArray) else np.ndarray)((), dtype or a.dtype) + fns = {fn.__name__: fn for fn in (min, max, sum)} axis = _normalize_axis(axis, a.ndim) - rv = fns[op](a, axis=axis, dtype=dtype, keep_cupy_as_array=True) # type: ignore[misc,call-overload] + rv = fns[op](a, axis=axis, keep_cupy_as_array=True, **_dtype_kw(dtype, op)) # type: ignore[call-overload] shape = _get_shape(rv, axis=axis, keepdims=keepdims) return cast("NDArray[Any] | types.CupyArray", rv.reshape(shape)) @@ -105,5 +110,12 @@ def _get_shape(a: NDArray[Any] | np.number[Any] | types.CupyArray, *, axis: Lite assert axis is not None return (1, a.size) if axis == 0 else (a.size, 1) case _: # pragma: no cover - msg = f"{keepdims=}, {type(a)}" + msg = f"{keepdims=}, {a.ndim=}, {type(a)=}" raise AssertionError(msg) + + +DT = TypeVar("DT", bound="DTypeLike") + + +def _dtype_kw(dtype: DT | None, op: Ops) -> DTypeKw[DT]: + return {"dtype": dtype} if dtype is not None and op in get_args(DtypeOps) else {} diff --git a/tests/test_stats.py b/tests/test_stats.py index 54105b7..8d0a777 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -201,6 +201,20 @@ def test_sum_to_int(array_type: ArrayType[CpuArray | DiskArray | types.DaskArray np.testing.assert_array_equal(sum_, expected) +@pytest.mark.array_type(skip=ATS_SPARSE_DS) +@pytest.mark.parametrize("func", [stats.min, stats.max]) +def test_min_max(array_type: ArrayType[CpuArray | GpuArray | DiskArray | types.DaskArray], axis: Literal[0, 1] | None, func: StatFunNoDtype) -> None: + rng = np.random.default_rng(0) + np_arr = rng.random((100, 100)) + arr = array_type(np_arr) + + result = to_np_dense_checked(func(arr, axis=axis), axis, arr) + + expected = (np.min if func is stats.min else np.max)(np_arr, axis=axis) + np.testing.assert_array_equal(result, expected) + + +@pytest.mark.parametrize("func", [stats.sum, stats.min, stats.max]) @pytest.mark.parametrize( "data", [ @@ -211,14 +225,15 @@ def test_sum_to_int(array_type: ArrayType[CpuArray | DiskArray | types.DaskArray ) @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.array_type(Flags.Dask) -def test_sum_dask_shapes(array_type: ArrayType[types.DaskArray], axis: Literal[0, 1], data: list[list[int]]) -> None: +def test_dask_shapes(array_type: ArrayType[types.DaskArray], axis: Literal[0, 1], data: list[list[int]], func: StatFunNoDtype) -> None: np_arr = np.array(data, dtype=np.float32) arr = array_type(np_arr) assert 1 in arr.chunksize, "This test is supposed to test 1×n and n×1 chunk sizes" - sum_ = cast("NDArray[Any] | types.CupyArray", stats.sum(arr, axis=axis).compute()) - if isinstance(sum_, types.CupyArray): - sum_ = sum_.get() - np.testing.assert_almost_equal(np_arr.sum(axis=axis), sum_) + stat = cast("NDArray[Any] | types.CupyArray", func(arr, axis=axis).compute()) + if isinstance(stat, types.CupyArray): + stat = stat.get() + np_func = getattr(np, func.__name__) + np.testing.assert_almost_equal(stat, np_func(np_arr, axis=axis)) @pytest.mark.array_type(skip=ATS_SPARSE_DS) diff --git a/typings/cupy/_core/core.pyi b/typings/cupy/_core/core.pyi index 7d1bf96..2995181 100644 --- a/typings/cupy/_core/core.pyi +++ b/typings/cupy/_core/core.pyi @@ -3,7 +3,7 @@ from types import EllipsisType from typing import Any, Literal, Self, overload import numpy as np -from cupy.cuda import Stream +from cupy.cuda import MemoryPointer, Stream from numpy._core.multiarray import flagsobj from numpy.typing import DTypeLike, NDArray @@ -14,6 +14,15 @@ class ndarray: ndim: int flags: flagsobj + def __init__( + self, + shape: tuple[int, ...], + dtype: DTypeLike | None = ..., + memptr: MemoryPointer | None = None, + strides: tuple[int, ...] | None = None, + order: Literal["C", "F"] = "C", + ) -> None: ... + # cupy-specific def get( self, stream: Stream | None = None, order: Literal["C", "F", "A"] = "C", out: NDArray[Any] | None = None, blocking: bool = True diff --git a/typings/cupy/cuda.pyi b/typings/cupy/cuda.pyi index 659d7ee..595d499 100644 --- a/typings/cupy/cuda.pyi +++ b/typings/cupy/cuda.pyi @@ -1,2 +1,3 @@ # SPDX-License-Identifier: MPL-2.0 class Stream: ... +class MemoryPointer: ...