From 3845bf67af91cbd1156f2b19055dc9867ac8207f Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 21 Feb 2025 12:12:04 +0100 Subject: [PATCH 01/22] Less stringly typed --- src/testing/fast_array_utils/__init__.py | 191 ++++++++++++++--------- src/testing/fast_array_utils/pytest.py | 68 +++----- tests/test_stats.py | 9 +- tests/test_test_utils.py | 6 +- 4 files changed, 139 insertions(+), 135 deletions(-) diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index 01dcb79..aae3db8 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -3,7 +3,8 @@ from __future__ import annotations -import re +from dataclasses import dataclass +from functools import cache, cached_property from typing import TYPE_CHECKING import numpy as np @@ -37,48 +38,117 @@ def __call__( # noqa: D102 _DTypeLikeFloat64 = np.dtype[np.float64] | type[np.float64] -RE_ARRAY_QUAL = re.compile(r"(?P(?:\w+\.)*\w+)\.(?P[^\[]+)(?:\[(?P[\w.]+)\])?") - - -def get_array_cls(qualname: str) -> type[Array]: # noqa: PLR0911 - """Get a supported array class by qualname.""" - m = RE_ARRAY_QUAL.fullmatch(qualname) - assert m - match m["mod"], m["name"], m["inner"]: - case "numpy", "ndarray", None: - return np.ndarray - case "scipy.sparse", ( - "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" - ) as cls_name, None: - import scipy.sparse - - return getattr(scipy.sparse, cls_name) # type: ignore[no-any-return] - case "cupy", "ndarray", None: - import cupy as cp - - return cp.ndarray # type: ignore[no-any-return] - case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: - import cupyx.scipy.sparse as cu_sparse - - return getattr(cu_sparse, cls_name) # type: ignore[no-any-return] - case "dask.array", cls_name, _: - if TYPE_CHECKING: - from dask.array.core import Array as DaskArray - else: - from dask.array import Array as DaskArray - - return DaskArray - case "h5py", "Dataset", _: - import h5py - - return h5py.Dataset # type: ignore[no-any-return] - case "zarr", "Array", _: - import zarr - - return zarr.Array - case _: - msg = f"Unknown array class: {qualname}" - raise ValueError(msg) +@dataclass +class ArrayType: + """Supported array type.""" + + mod: str + name: str + inner: ArrayType | None + + @classmethod + @cache + def from_qualname(cls, qualname: str, inner: str | None = None) -> ArrayType: + """Get a supported array type by qualname.""" + mod, name = qualname.rsplit(".", 1) + return cls(mod, name, ArrayType.from_qualname(inner) if inner else None) + + def __str__(self) -> str: # noqa: D105 + rv = f"{self.mod}.{self.name}" + return f"{rv}[{self.inner}]" if self.inner else rv + + @cached_property + def cls(self) -> type[Array]: # noqa: PLR0911 + """Get a supported array class by qualname.""" + match self.mod, self.name, self.inner: + case "numpy", "ndarray", None: + return np.ndarray + case "scipy.sparse", ( + "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" + ) as cls_name, None: + import scipy.sparse + + return getattr(scipy.sparse, cls_name) # type: ignore[no-any-return] + case "cupy", "ndarray", None: + import cupy as cp + + return cp.ndarray # type: ignore[no-any-return] + case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: + import cupyx.scipy.sparse as cu_sparse + + return getattr(cu_sparse, cls_name) # type: ignore[no-any-return] + case "dask.array", cls_name, _: + if TYPE_CHECKING: + from dask.array.core import Array as DaskArray + else: + from dask.array import Array as DaskArray + + return DaskArray + case "h5py", "Dataset", _: + import h5py + + return h5py.Dataset # type: ignore[no-any-return] + case "zarr", "Array", _: + import zarr + + return zarr.Array + case _: + msg = f"Unknown array class: {self}" + raise ValueError(msg) + + def random( + self, + shape: tuple[int, int], + *, + dtype: _DTypeLikeFloat32 | _DTypeLikeFloat64 | None, + gen: np.random.Generator | None = None, + ) -> Array: + """Create a random array.""" + gen = np.random.default_rng(gen) + + match self.mod, self.name, self.inner: + case "numpy", "ndarray", None: + return gen.random(shape, dtype=dtype or np.float64) + case "scipy.sparse", ( + "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" + ) as cls_name, None: + fmt, container = cls_name.split("_") + return random_mat(shape, format=fmt, container=container, dtype=dtype) # type: ignore[arg-type] + case "cupy", "ndarray", None: + raise NotImplementedError + case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: + raise NotImplementedError + case "dask.array", cls_name, _: + raise NotImplementedError + case "h5py", "Dataset", _: + raise NotImplementedError + case "zarr", "Array", _: + raise NotImplementedError + case _: + msg = f"Unknown array class: {self}" + raise ValueError(msg) + + +_SUPPORTED_TYPE_NAMES_IN_DASK = [ + "numpy.ndarray", + "scipy.sparse.csr_array", + "scipy.sparse.csc_array", + "scipy.sparse.csr_matrix", + "scipy.sparse.csc_matrix", +] +_SUPPORTED_TYPE_NAMES_OTHER = [ + "h5py.Dataset", + "zarr.Array", + "cupy.ndarray", + "cupyx.scipy.sparse.csr_matrix", + "cupyx.scipy.sparse.csc_matrix", +] +SUPPORTED_TYPES_IN_DASK = tuple(map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_IN_DASK)) +SUPPORTED_TYPES_DASK = tuple( + ArrayType.from_qualname("dask.array.Array", t) for t in _SUPPORTED_TYPE_NAMES_IN_DASK +) +SUPPORTED_TYPES_OTHER = tuple(map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_OTHER)) +SUPPORTED_TYPES = (*SUPPORTED_TYPES_IN_DASK, *SUPPORTED_TYPES_DASK, *SUPPORTED_TYPES_OTHER) def random_mat( @@ -100,38 +170,3 @@ def random_mat( if container == "matrix" else random_sparr(shape, density=density, format=format, dtype=dtype, random_state=gen) ) - - -def random_array( - qualname: str, - shape: tuple[int, int], - *, - dtype: _DTypeLikeFloat32 | _DTypeLikeFloat64 | None, - gen: np.random.Generator | None = None, -) -> Array: - """Create a random array.""" - gen = np.random.default_rng(gen) - - m = RE_ARRAY_QUAL.fullmatch(qualname) - assert m - match m["mod"], m["name"], m["inner"]: - case "numpy", "ndarray", None: - return gen.random(shape, dtype=dtype or np.float64) - case "scipy.sparse", ( - "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" - ) as cls_name, None: - fmt, container = cls_name.split("_") - return random_mat(shape, format=fmt, container=container, dtype=dtype) # type: ignore[arg-type] - case "cupy", "ndarray", None: - raise NotImplementedError - case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: - raise NotImplementedError - case "dask.array", cls_name, _: - raise NotImplementedError - case "h5py", "Dataset", _: - raise NotImplementedError - case "zarr", "Array", _: - raise NotImplementedError - case _: - msg = f"Unknown array class: {qualname}" - raise ValueError(msg) diff --git a/src/testing/fast_array_utils/pytest.py b/src/testing/fast_array_utils/pytest.py index 85c8e18..05161d1 100644 --- a/src/testing/fast_array_utils/pytest.py +++ b/src/testing/fast_array_utils/pytest.py @@ -12,7 +12,7 @@ from fast_array_utils import types -from . import get_array_cls +from . import SUPPORTED_TYPES, ArrayType if TYPE_CHECKING: @@ -22,8 +22,6 @@ from testing.fast_array_utils import ToArray - from . import Array - def _skip_if_no(dist: str) -> pytest.MarkDecorator: return pytest.mark.skipif(not find_spec(dist), reason=f"{dist} not installed") @@ -31,65 +29,37 @@ def _skip_if_no(dist: str) -> pytest.MarkDecorator: @pytest.fixture( scope="session", - params=[ - pytest.param("numpy.ndarray"), - pytest.param("scipy.sparse.csr_array", marks=_skip_if_no("scipy")), - pytest.param("scipy.sparse.csc_array", marks=_skip_if_no("scipy")), - pytest.param("scipy.sparse.csr_matrix", marks=_skip_if_no("scipy")), - pytest.param("scipy.sparse.csc_matrix", marks=_skip_if_no("scipy")), - pytest.param("dask.array.Array[numpy.ndarray]", marks=_skip_if_no("dask")), - pytest.param("dask.array.Array[scipy.sparse.csr_array]", marks=_skip_if_no("dask")), - pytest.param("dask.array.Array[scipy.sparse.csc_array]", marks=_skip_if_no("dask")), - pytest.param("dask.array.Array[scipy.sparse.csr_matrix]", marks=_skip_if_no("dask")), - pytest.param("dask.array.Array[scipy.sparse.csc_matrix]", marks=_skip_if_no("dask")), - pytest.param("h5py.Dataset", marks=_skip_if_no("h5py")), - pytest.param("zarr.Array", marks=_skip_if_no("zarr")), - pytest.param("cupy.ndarray", marks=_skip_if_no("cupy")), - pytest.param("cupyx.scipy.sparse.csr_matrix", marks=_skip_if_no("cupy")), - pytest.param("cupyx.scipy.sparse.csc_matrix", marks=_skip_if_no("cupy")), - ], + params=[pytest.param(t, marks=_skip_if_no(t.mod.split(".")[0])) for t in SUPPORTED_TYPES], ) -def array_cls_name(request: pytest.FixtureRequest) -> str: - """Fixture for a supported array class.""" - return cast(str, request.param) - - -@pytest.fixture(scope="session") -def array_cls(array_cls_name: str) -> type[Array]: +def array_type(request: pytest.FixtureRequest) -> ArrayType: """Fixture for a supported array class.""" - return get_array_cls(array_cls_name) + return cast(ArrayType, request.param) @pytest.fixture(scope="session") -def to_array( - request: pytest.FixtureRequest, array_cls: type[Array], array_cls_name: str -) -> ToArray: +def to_array(request: pytest.FixtureRequest, array_type: ArrayType) -> ToArray: """Fixture for conversion into a supported array.""" - return get_to_array(array_cls, array_cls_name, request) + return get_to_array(array_type, request) -def get_to_array( - array_cls: type[Array], - array_cls_name: str | None = None, - request: pytest.FixtureRequest | None = None, -) -> ToArray: +def get_to_array(array_type: ArrayType, request: pytest.FixtureRequest | None = None) -> ToArray: """Create a function to convert to a supported array.""" - if array_cls is np.ndarray: + if array_type.cls is np.ndarray: return np.asarray # type: ignore[return-value] - if array_cls is types.DaskArray: - assert array_cls_name is not None - return to_dask_array(array_cls_name) - if array_cls is types.H5Dataset: + if array_type.cls is types.DaskArray: + assert array_type.inner is not None + return to_dask_array(array_type.inner) + if array_type.cls is types.H5Dataset: assert request is not None return request.getfixturevalue("to_h5py_dataset") # type: ignore[no-any-return] - if array_cls is types.ZarrArray: + if array_type.cls is types.ZarrArray: return to_zarr_array - if array_cls is types.CupyArray: + if array_type.cls is types.CupyArray: import cupy as cu return cu.asarray # type: ignore[no-any-return] - return array_cls # type: ignore[return-value] + return array_type.cls # type: ignore[return-value] def _half_chunk_size(a: tuple[int, ...]) -> tuple[int, ...]: @@ -100,16 +70,16 @@ def half_rounded_up(x: int) -> int: return tuple(half_rounded_up(x) for x in a) -def to_dask_array(array_cls_name: str) -> ToArray: +def to_dask_array(array_type: ArrayType) -> ToArray: """Convert to a dask array.""" if TYPE_CHECKING: import dask.array.core as da else: import dask.array as da - inner_cls_name = array_cls_name.removeprefix("dask.array.Array[").removesuffix("]") - inner_cls = get_array_cls(inner_cls_name) - to_array_fn: ToArray = get_to_array(array_cls=inner_cls) + inner_cls = array_type.inner + assert inner_cls is not None + to_array_fn: ToArray = get_to_array(inner_cls) def to_dask_array(x: ArrayLike, *, dtype: DTypeLike | None = None) -> types.DaskArray: x = np.asarray(x, dtype=dtype) diff --git a/tests/test_stats.py b/tests/test_stats.py index e86aacb..3c0918d 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -7,7 +7,6 @@ import pytest from fast_array_utils import stats, types -from testing.fast_array_utils import random_array if TYPE_CHECKING: @@ -15,7 +14,7 @@ from pytest_codspeed import BenchmarkFixture - from testing.fast_array_utils import Array, ToArray + from testing.fast_array_utils import Array, ArrayType, ToArray DTypeIn = type[np.float32 | np.float64 | np.int32 | np.bool_] DTypeOut = type[np.float32 | np.float64 | np.int64] @@ -75,13 +74,13 @@ def test_sum( @pytest.mark.parametrize("dtype", [np.float32, np.float64]) # random only supports float def test_sum_benchmark( benchmark: BenchmarkFixture, - array_cls_name: str, + array_type: ArrayType, axis: Literal[0, 1, None], dtype: type[np.float32 | np.float64], ) -> None: try: - shape = (1_000, 1_000) if "sparse" in array_cls_name else (100, 100) - arr = random_array(array_cls_name, shape, dtype=dtype) + shape = (1_000, 1_000) if "sparse" in array_type.mod else (100, 100) + arr = array_type.random(shape, dtype=dtype) except NotImplementedError: pytest.skip("random_array not implemented for dtype") diff --git a/tests/test_test_utils.py b/tests/test_test_utils.py index 36d41c3..1dcc07a 100644 --- a/tests/test_test_utils.py +++ b/tests/test_test_utils.py @@ -12,13 +12,13 @@ if TYPE_CHECKING: from numpy.typing import DTypeLike - from testing.fast_array_utils import Array, ToArray + from testing.fast_array_utils import ArrayType, ToArray @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_conv(array_cls: type[Array], to_array: ToArray, dtype: DTypeLike) -> None: +def test_conv(array_type: ArrayType, to_array: ToArray, dtype: DTypeLike) -> None: arr = to_array(np.arange(12).reshape(3, 4), dtype=dtype) - assert isinstance(arr, array_cls) + assert isinstance(arr, array_type.cls) if isinstance(arr, types.DaskArray): arr = arr.compute() # type: ignore[no-untyped-call] elif isinstance(arr, types.CupyArray): From 6ddf3222b4df0c3dfabbfdea127c51db50d9b241 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 21 Feb 2025 12:54:43 +0100 Subject: [PATCH 02/22] move conversion --- src/testing/fast_array_utils/__init__.py | 83 ++++++++++++++++++++- src/testing/fast_array_utils/pytest.py | 95 ++++-------------------- tests/test_asarray.py | 6 +- tests/test_stats.py | 6 +- tests/test_test_utils.py | 6 +- 5 files changed, 104 insertions(+), 92 deletions(-) diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index aae3db8..1e76909 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -3,7 +3,7 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import KW_ONLY, dataclass from functools import cache, cached_property from typing import TYPE_CHECKING @@ -11,8 +11,10 @@ if TYPE_CHECKING: + from collections.abc import Callable from typing import Any, Literal, Protocol, SupportsFloat, TypeAlias + import h5py from numpy.typing import ArrayLike, DTypeLike, NDArray from fast_array_utils import types @@ -38,13 +40,23 @@ def __call__( # noqa: D102 _DTypeLikeFloat64 = np.dtype[np.float64] | type[np.float64] +@dataclass +class ConversionContext: + """Conversion context required for h5py.""" + + hdf5_file: h5py.File + get_ds_name: Callable[[], str] + + @dataclass class ArrayType: - """Supported array type.""" + """Supported array type with methods for conversion and random generation.""" mod: str name: str - inner: ArrayType | None + inner: ArrayType | None = None + _: KW_ONLY + conversion_context: ConversionContext | None = None @classmethod @cache @@ -128,6 +140,63 @@ def random( msg = f"Unknown array class: {self}" raise ValueError(msg) + def __call__(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Array: + """Create a function to convert to a supported array.""" + from fast_array_utils import types + + fn: ToArray + if self.cls is np.ndarray: + fn = np.asarray # type: ignore[assignment] + elif self.cls is types.DaskArray: + if self.inner is None: + msg = "Cannot convert to dask array without inner array type" + raise AssertionError(msg) + fn = self.to_dask_array + elif self.cls is types.H5Dataset: + fn = self.to_h5py_dataset + elif self.cls is types.ZarrArray: + fn = self.to_zarr_array + elif self.cls is types.CupyArray: + import cupy as cu + + fn = cu.asarray + else: + fn = self.cls # type: ignore[assignment] + + return fn(x, dtype=dtype) + + def to_dask_array(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.DaskArray: + """Convert to a dask array.""" + if TYPE_CHECKING: + import dask.array.core as da + else: + import dask.array as da + + assert self.inner is not None + + x = np.asarray(x, dtype=dtype) + return da.from_array(self.inner.__call__(x), _half_chunk_size(x.shape)) # type: ignore[no-untyped-call,no-any-return] + + def to_h5py_dataset( + self, x: ArrayLike, /, *, dtype: DTypeLike | None = None + ) -> types.H5Dataset: + """Convert to a h5py dataset.""" + if (ctx := self.conversion_context) is None: + msg = "`conversion_context` must be set for h5py" + raise RuntimeError(msg) + arr = np.asarray(x, dtype=dtype) + return ctx.hdf5_file.create_dataset(ctx.get_ds_name(), arr.shape, arr.dtype, data=arr) + + @staticmethod + def to_zarr_array(x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.ZarrArray: + """Convert to a zarr array.""" + import zarr + + arr = np.asarray(x, dtype=dtype) + za = zarr.create_array({}, shape=arr.shape, dtype=arr.dtype) + za[...] = arr + return za + _SUPPORTED_TYPE_NAMES_IN_DASK = [ "numpy.ndarray", @@ -170,3 +239,11 @@ def random_mat( if container == "matrix" else random_sparr(shape, density=density, format=format, dtype=dtype, random_state=gen) ) + + +def _half_chunk_size(a: tuple[int, ...]) -> tuple[int, ...]: + def half_rounded_up(x: int) -> int: + div, mod = divmod(x, 2) + return div + (mod > 0) + + return tuple(half_rounded_up(x) for x in a) diff --git a/src/testing/fast_array_utils/pytest.py b/src/testing/fast_array_utils/pytest.py index 05161d1..5883749 100644 --- a/src/testing/fast_array_utils/pytest.py +++ b/src/testing/fast_array_utils/pytest.py @@ -7,21 +7,14 @@ from importlib.util import find_spec from typing import TYPE_CHECKING, cast -import numpy as np import pytest -from fast_array_utils import types - -from . import SUPPORTED_TYPES, ArrayType +from . import SUPPORTED_TYPES, ArrayType, ConversionContext if TYPE_CHECKING: from collections.abc import Generator - from numpy.typing import ArrayLike, DTypeLike - - from testing.fast_array_utils import ToArray - def _skip_if_no(dist: str) -> pytest.MarkDecorator: return pytest.mark.skipif(not find_spec(dist), reason=f"{dist} not installed") @@ -29,92 +22,34 @@ def _skip_if_no(dist: str) -> pytest.MarkDecorator: @pytest.fixture( scope="session", - params=[pytest.param(t, marks=_skip_if_no(t.mod.split(".")[0])) for t in SUPPORTED_TYPES], + params=[ + pytest.param(t, id=str(t), marks=_skip_if_no(t.mod.split(".")[0])) for t in SUPPORTED_TYPES + ], ) def array_type(request: pytest.FixtureRequest) -> ArrayType: """Fixture for a supported array class.""" - return cast(ArrayType, request.param) - - -@pytest.fixture(scope="session") -def to_array(request: pytest.FixtureRequest, array_type: ArrayType) -> ToArray: - """Fixture for conversion into a supported array.""" - return get_to_array(array_type, request) - - -def get_to_array(array_type: ArrayType, request: pytest.FixtureRequest | None = None) -> ToArray: - """Create a function to convert to a supported array.""" - if array_type.cls is np.ndarray: - return np.asarray # type: ignore[return-value] - if array_type.cls is types.DaskArray: - assert array_type.inner is not None - return to_dask_array(array_type.inner) - if array_type.cls is types.H5Dataset: - assert request is not None - return request.getfixturevalue("to_h5py_dataset") # type: ignore[no-any-return] - if array_type.cls is types.ZarrArray: - return to_zarr_array - if array_type.cls is types.CupyArray: - import cupy as cu - - return cu.asarray # type: ignore[no-any-return] - - return array_type.cls # type: ignore[return-value] - - -def _half_chunk_size(a: tuple[int, ...]) -> tuple[int, ...]: - def half_rounded_up(x: int) -> int: - div, mod = divmod(x, 2) - return div + (mod > 0) + from fast_array_utils.types import H5Dataset - return tuple(half_rounded_up(x) for x in a) - - -def to_dask_array(array_type: ArrayType) -> ToArray: - """Convert to a dask array.""" - if TYPE_CHECKING: - import dask.array.core as da - else: - import dask.array as da - - inner_cls = array_type.inner - assert inner_cls is not None - to_array_fn: ToArray = get_to_array(inner_cls) - - def to_dask_array(x: ArrayLike, *, dtype: DTypeLike | None = None) -> types.DaskArray: - x = np.asarray(x, dtype=dtype) - return da.from_array(to_array_fn(x), _half_chunk_size(x.shape)) # type: ignore[no-untyped-call,no-any-return] - - return to_dask_array + at = cast(ArrayType, request.param) + if at.cls is H5Dataset: + at.conversion_context = request.getfixturevalue("conversion_context") + return at @pytest.fixture(scope="session") # worker_id for xdist since we don't want to override open files -def to_h5py_dataset( +def conversion_context( tmp_path_factory: pytest.TempPathFactory, worker_id: str = "serial", -) -> Generator[ToArray, None, None]: +) -> Generator[ConversionContext, None, None]: """Convert to a h5py dataset.""" import h5py tmp_path = tmp_path_factory.mktemp("backed_adata") tmp_path = tmp_path / f"test_{worker_id}.h5ad" - with h5py.File(tmp_path, "x") as f: + def get_ds_name() -> str: + return os.environ["PYTEST_CURRENT_TEST"].rsplit(":", 1)[-1].split(" ", 1)[0] - def to_h5py_dataset(x: ArrayLike, *, dtype: DTypeLike | None = None) -> types.H5Dataset: - arr = np.asarray(x, dtype=dtype) - test_name = os.environ["PYTEST_CURRENT_TEST"].rsplit(":", 1)[-1].split(" ", 1)[0] - return f.create_dataset(test_name, arr.shape, arr.dtype, data=arr) - - yield to_h5py_dataset - - -def to_zarr_array(x: ArrayLike, *, dtype: DTypeLike | None = None) -> types.ZarrArray: - """Convert to a zarr array.""" - import zarr - - arr = np.asarray(x, dtype=dtype) - za = zarr.create_array({}, shape=arr.shape, dtype=arr.dtype) - za[...] = arr - return za + with h5py.File(tmp_path, "x") as f: + yield ConversionContext(hdf5_file=f, get_ds_name=get_ds_name) diff --git a/tests/test_asarray.py b/tests/test_asarray.py index c7caa57..2b1395b 100644 --- a/tests/test_asarray.py +++ b/tests/test_asarray.py @@ -9,11 +9,11 @@ if TYPE_CHECKING: - from testing.fast_array_utils import ToArray + from testing.fast_array_utils import ArrayType -def test_asarray(to_array: ToArray) -> None: - x = to_array([[1, 2, 3], [4, 5, 6]]) +def test_asarray(array_type: ArrayType) -> None: + x = array_type([[1, 2, 3], [4, 5, 6]]) arr = asarray(x) assert isinstance(arr, np.ndarray) assert arr.shape == (2, 3) diff --git a/tests/test_stats.py b/tests/test_stats.py index 3c0918d..7f9f688 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -14,7 +14,7 @@ from pytest_codspeed import BenchmarkFixture - from testing.fast_array_utils import Array, ArrayType, ToArray + from testing.fast_array_utils import Array, ArrayType DTypeIn = type[np.float32 | np.float64 | np.int32 | np.bool_] DTypeOut = type[np.float32 | np.float64 | np.int64] @@ -36,13 +36,13 @@ def dtype_arg(request: pytest.FixtureRequest) -> DTypeOut | None: def test_sum( - to_array: ToArray, + array_type: ArrayType, dtype_in: DTypeIn, dtype_arg: DTypeOut | None, axis: Literal[0, 1, None], ) -> None: np_arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype_in) - arr = to_array(np_arr.copy()) + arr = array_type(np_arr.copy()) assert arr.dtype == dtype_in sum_: Array[Any] | np.floating = stats.sum(arr, axis=axis, dtype=dtype_arg) # type: ignore[type-arg,arg-type] diff --git a/tests/test_test_utils.py b/tests/test_test_utils.py index 1dcc07a..fa7fe86 100644 --- a/tests/test_test_utils.py +++ b/tests/test_test_utils.py @@ -12,12 +12,12 @@ if TYPE_CHECKING: from numpy.typing import DTypeLike - from testing.fast_array_utils import ArrayType, ToArray + from testing.fast_array_utils import ArrayType @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_conv(array_type: ArrayType, to_array: ToArray, dtype: DTypeLike) -> None: - arr = to_array(np.arange(12).reshape(3, 4), dtype=dtype) +def test_conv(array_type: ArrayType, dtype: DTypeLike) -> None: + arr = array_type(np.arange(12).reshape(3, 4), dtype=dtype) assert isinstance(arr, array_type.cls) if isinstance(arr, types.DaskArray): arr = arr.compute() # type: ignore[no-untyped-call] From 7c35ec78a359e28979bdea1ec2e519e212370a93 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 21 Feb 2025 12:57:30 +0100 Subject: [PATCH 03/22] freeze --- src/testing/fast_array_utils/__init__.py | 2 +- src/testing/fast_array_utils/pytest.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index 1e76909..8bc4157 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -48,7 +48,7 @@ class ConversionContext: get_ds_name: Callable[[], str] -@dataclass +@dataclass(frozen=True) class ArrayType: """Supported array type with methods for conversion and random generation.""" diff --git a/src/testing/fast_array_utils/pytest.py b/src/testing/fast_array_utils/pytest.py index 5883749..b3714a9 100644 --- a/src/testing/fast_array_utils/pytest.py +++ b/src/testing/fast_array_utils/pytest.py @@ -3,6 +3,7 @@ from __future__ import annotations +import dataclasses import os from importlib.util import find_spec from typing import TYPE_CHECKING, cast @@ -32,7 +33,8 @@ def array_type(request: pytest.FixtureRequest) -> ArrayType: at = cast(ArrayType, request.param) if at.cls is H5Dataset: - at.conversion_context = request.getfixturevalue("conversion_context") + ctx = request.getfixturevalue("conversion_context") + at = dataclasses.replace(at, conversion_context=ctx) return at From e19ea7e374973bb80c3186aeff35b1ec4f3f24ca Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 21 Feb 2025 13:16:51 +0100 Subject: [PATCH 04/22] docs --- docs/conf.py | 7 +++++++ docs/index.rst | 7 ++++++- docs/testing.rst | 11 +++++++++++ pyproject.toml | 9 ++++++++- src/testing/fast_array_utils/__init__.py | 12 ++++++++++++ src/testing/fast_array_utils/pytest.py | 10 ++++++++-- 6 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 docs/testing.rst diff --git a/docs/conf.py b/docs/conf.py index abfd723..bd04676 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -32,6 +32,7 @@ "sphinx.ext.autodoc", "sphinx.ext.autosummary", "scanpydoc.elegant_typehints", + "sphinx_autofixture", ] # API documentation when building @@ -55,9 +56,11 @@ "np.dtype": "numpy.dtype", "np.number": "numpy.number", "np.integer": "numpy.integer", + "np.random.Generator": "numpy.random.Generator", "ArrayLike": "numpy.typing.ArrayLike", "DTypeLike": "numpy.typing.DTypeLike", "NDArray": "numpy.typing.NDArray", + "_pytest.fixtures.FixtureRequest": "pytest.FixtureRequest", **{ k: v for k_plain, v in { @@ -74,10 +77,14 @@ # If that doesn’t work, ignore them nitpick_ignore = { ("py:class", "fast_array_utils.types.T_co"), + ("py:class", "_DTypeLikeFloat32"), + ("py:class", "_DTypeLikeFloat64"), # sphinx bugs, should be covered by `autodoc_type_aliases` above + ("py:class", "Array"), ("py:class", "ArrayLike"), ("py:class", "DTypeLike"), ("py:class", "NDArray"), + ("py:class", "_pytest.fixtures.FixtureRequest"), } # Options for HTML output diff --git a/docs/index.rst b/docs/index.rst index 697b5c6..db58c7a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,10 +1,15 @@ ``fast_array_utils`` ==================== +.. toctree:: + :hidden: + + fast-array-utils + testing + .. automodule:: fast_array_utils :members: - ``fast_array_utils.conv`` ------------------------- diff --git a/docs/testing.rst b/docs/testing.rst new file mode 100644 index 0000000..0b17228 --- /dev/null +++ b/docs/testing.rst @@ -0,0 +1,11 @@ +``testing.fast_array_utils`` +============================ + +.. automodule:: testing.fast_array_utils + :members: + +``testing.fast_array_utils.pytest`` +----------------------------------- + +.. automodule:: testing.fast_array_utils.pytest + :members: diff --git a/pyproject.toml b/pyproject.toml index d14ded6..b7c6ea8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,14 @@ classifiers = [ ] dynamic = [ "description", "version" ] dependencies = [ "numba", "numpy" ] -optional-dependencies.doc = [ "furo", "scanpydoc>=0.15.2", "sphinx>=8", "sphinx-autodoc-typehints" ] +optional-dependencies.doc = [ + "furo", + "pytest", + "scanpydoc>=0.15.2", + "sphinx>=8", + "sphinx-autodoc-typehints", + "sphinx-autofixture", +] optional-dependencies.full = [ "dask", "fast-array-utils[sparse]", "h5py", "zarr" ] optional-dependencies.sparse = [ "scipy>=1.8" ] optional-dependencies.test = [ "coverage[toml]", "pytest", "pytest-codspeed" ] diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index 8bc4157..71a1d5f 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -40,6 +40,18 @@ def __call__( # noqa: D102 _DTypeLikeFloat64 = np.dtype[np.float64] | type[np.float64] +__all__ = [ + "SUPPORTED_TYPES", + "SUPPORTED_TYPES_DASK", + "SUPPORTED_TYPES_IN_DASK", + "SUPPORTED_TYPES_OTHER", + "Array", + "ArrayType", + "ConversionContext", + "ToArray", +] + + @dataclass class ConversionContext: """Conversion context required for h5py.""" diff --git a/src/testing/fast_array_utils/pytest.py b/src/testing/fast_array_utils/pytest.py index b3714a9..0d79a49 100644 --- a/src/testing/fast_array_utils/pytest.py +++ b/src/testing/fast_array_utils/pytest.py @@ -17,6 +17,9 @@ from collections.abc import Generator +__all__ = ["array_type", "conversion_context"] + + def _skip_if_no(dist: str) -> pytest.MarkDecorator: return pytest.mark.skipif(not find_spec(dist), reason=f"{dist} not installed") @@ -28,7 +31,7 @@ def _skip_if_no(dist: str) -> pytest.MarkDecorator: ], ) def array_type(request: pytest.FixtureRequest) -> ArrayType: - """Fixture for a supported array class.""" + """Fixture for a supported :class:`~testing.fast_array_utils.ArrayType`.""" from fast_array_utils.types import H5Dataset at = cast(ArrayType, request.param) @@ -44,7 +47,10 @@ def conversion_context( tmp_path_factory: pytest.TempPathFactory, worker_id: str = "serial", ) -> Generator[ConversionContext, None, None]: - """Convert to a h5py dataset.""" + """Fixture providing a :class:`~testing.fast_array_utils.ConversionContext`. + + Makes sure h5py works even when running tests in parallel. + """ import h5py tmp_path = tmp_path_factory.mktemp("backed_adata") From 5756fad0f28daaef5fec9a5188683a2af00424f4 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 21 Feb 2025 13:36:52 +0100 Subject: [PATCH 05/22] document all exported members --- docs/conf.py | 4 ++ src/testing/fast_array_utils/__init__.py | 50 ++++++++++++++++++------ src/testing/fast_array_utils/pytest.py | 5 ++- 3 files changed, 47 insertions(+), 12 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index bd04676..ad3318c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -39,6 +39,10 @@ nitpicky = True autosummary_generate = True autodoc_member_order = "bysource" +autodoc_default_options = { + "special-members": "__call__", # `True` adds too much stuff to dataclasses + "inherited-members": False, +} napoleon_google_docstring = False napoleon_numpy_docstring = True todo_include_todos = False diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index 71a1d5f..474c9f2 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -3,7 +3,7 @@ from __future__ import annotations -from dataclasses import KW_ONLY, dataclass +from dataclasses import KW_ONLY, dataclass, field from functools import cache, cached_property from typing import TYPE_CHECKING @@ -62,28 +62,44 @@ class ConversionContext: @dataclass(frozen=True) class ArrayType: - """Supported array type with methods for conversion and random generation.""" + """Supported array type with methods for conversion and random generation. + + Examples + -------- + >>> at = ArrayType("numpy", "ndarray") + >>> arr = at([1, 2, 3]) + >>> arr + array([1, 2, 3]) + >>> assert isinstance(arr, at.cls) + + """ mod: str + """Module name.""" name: str + """Array class name.""" inner: ArrayType | None = None + """Inner array type (e.g. for dask).""" + _: KW_ONLY - conversion_context: ConversionContext | None = None + + conversion_context: ConversionContext | None = field(default=None, compare=False) + """Conversion context required for converting to h5py.""" @classmethod @cache def from_qualname(cls, qualname: str, inner: str | None = None) -> ArrayType: - """Get a supported array type by qualname.""" + """Create from qualnames of type and inner type.""" mod, name = qualname.rsplit(".", 1) return cls(mod, name, ArrayType.from_qualname(inner) if inner else None) - def __str__(self) -> str: # noqa: D105 + def __repr__(self) -> str: # noqa: D105 rv = f"{self.mod}.{self.name}" return f"{rv}[{self.inner}]" if self.inner else rv @cached_property def cls(self) -> type[Array]: # noqa: PLR0911 - """Get a supported array class by qualname.""" + """Array class for :func:`isinstance` checks.""" match self.mod, self.name, self.inner: case "numpy", "ndarray", None: return np.ndarray @@ -153,7 +169,7 @@ def random( raise ValueError(msg) def __call__(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Array: - """Create a function to convert to a supported array.""" + """Convert to this array type.""" from fast_array_utils import types fn: ToArray @@ -224,12 +240,24 @@ def to_zarr_array(x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.Z "cupyx.scipy.sparse.csr_matrix", "cupyx.scipy.sparse.csc_matrix", ] -SUPPORTED_TYPES_IN_DASK = tuple(map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_IN_DASK)) -SUPPORTED_TYPES_DASK = tuple( +SUPPORTED_TYPES_IN_DASK: tuple[ArrayType, ...] = tuple( + map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_IN_DASK) +) +"""Supported array types that are valid inside dask arrays.""" +SUPPORTED_TYPES_DASK: tuple[ArrayType, ...] = tuple( ArrayType.from_qualname("dask.array.Array", t) for t in _SUPPORTED_TYPE_NAMES_IN_DASK ) -SUPPORTED_TYPES_OTHER = tuple(map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_OTHER)) -SUPPORTED_TYPES = (*SUPPORTED_TYPES_IN_DASK, *SUPPORTED_TYPES_DASK, *SUPPORTED_TYPES_OTHER) +"""Supported dask array types.""" +SUPPORTED_TYPES_OTHER: tuple[ArrayType, ...] = tuple( + map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_OTHER) +) +"""Supported array types that are not valid inside dask arrays.""" +SUPPORTED_TYPES: tuple[ArrayType, ...] = ( + *SUPPORTED_TYPES_IN_DASK, + *SUPPORTED_TYPES_DASK, + *SUPPORTED_TYPES_OTHER, +) +"""All supported array types.""" def random_mat( diff --git a/src/testing/fast_array_utils/pytest.py b/src/testing/fast_array_utils/pytest.py index 0d79a49..553761c 100644 --- a/src/testing/fast_array_utils/pytest.py +++ b/src/testing/fast_array_utils/pytest.py @@ -1,5 +1,8 @@ # SPDX-License-Identifier: MPL-2.0 -"""Testing utilities.""" +"""Pytest fixtures to get supported array types. + +Can be used as pytest plugin: ``pytest -p testing.fast_array_utils.pytest``. +""" from __future__ import annotations From 06d28f1bdc842c9e87c4b7b0f4acd4f5fd7ce15c Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 21 Feb 2025 17:04:56 +0100 Subject: [PATCH 06/22] avoid extra numpy coercion --- src/testing/fast_array_utils/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index 474c9f2..e5b103b 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -202,8 +202,8 @@ def to_dask_array(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> t assert self.inner is not None - x = np.asarray(x, dtype=dtype) - return da.from_array(self.inner.__call__(x), _half_chunk_size(x.shape)) # type: ignore[no-untyped-call,no-any-return] + arr = self.inner(x, dtype=dtype) + return da.from_array(arr, _half_chunk_size(arr.shape)) # type: ignore[no-untyped-call,no-any-return] def to_h5py_dataset( self, x: ArrayLike, /, *, dtype: DTypeLike | None = None From 5c622e2d86d2c1ef16fbf5fe7fbb006fc2e46ee3 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 21 Feb 2025 17:25:30 +0100 Subject: [PATCH 07/22] enable cupy-in-dask --- src/testing/fast_array_utils/__init__.py | 57 ++++++++++++++++-------- src/testing/fast_array_utils/pytest.py | 13 +++--- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index e5b103b..a700b27 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -43,9 +43,10 @@ def __call__( # noqa: D102 __all__ = [ "SUPPORTED_TYPES", "SUPPORTED_TYPES_DASK", - "SUPPORTED_TYPES_IN_DASK", - "SUPPORTED_TYPES_OTHER", - "Array", + "SUPPORTED_TYPES_DISK", + "SUPPORTED_TYPES_MEM", + "SUPPORTED_TYPES_MEM_DENSE", + "SUPPORTED_TYPES_MEM_SPARSE", "ArrayType", "ConversionContext", "ToArray", @@ -226,36 +227,56 @@ def to_zarr_array(x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.Z return za -_SUPPORTED_TYPE_NAMES_IN_DASK = [ +_SUPPORTED_TYPE_NAMES_DISK = [ + "h5py.Dataset", + "zarr.Array", +] +_SUPPORTED_TYPE_NAMES_DENSE = [ "numpy.ndarray", + "cupy.ndarray", +] +_SUPPORTED_TYPE_NAMES_SPARSE = [ "scipy.sparse.csr_array", "scipy.sparse.csc_array", "scipy.sparse.csr_matrix", "scipy.sparse.csc_matrix", -] -_SUPPORTED_TYPE_NAMES_OTHER = [ - "h5py.Dataset", - "zarr.Array", - "cupy.ndarray", "cupyx.scipy.sparse.csr_matrix", "cupyx.scipy.sparse.csc_matrix", ] -SUPPORTED_TYPES_IN_DASK: tuple[ArrayType, ...] = tuple( - map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_IN_DASK) + +SUPPORTED_TYPES_DISK: tuple[ArrayType, ...] = tuple( + map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_DISK) +) +"""Supported array types that represent on-disk data + +These on-disk array types are not supported inside dask arrays. +""" + +SUPPORTED_TYPES_MEM_DENSE: tuple[ArrayType, ...] = tuple( + map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_DENSE) +) +"""Supported dense in-memory array types.""" + +SUPPORTED_TYPES_MEM_SPARSE: tuple[ArrayType, ...] = tuple( + map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_SPARSE) +) +"""Supported sparse in-memory array types.""" + +SUPPORTED_TYPES_MEM: tuple[ArrayType, ...] = ( + *SUPPORTED_TYPES_MEM_DENSE, + *SUPPORTED_TYPES_MEM_SPARSE, ) """Supported array types that are valid inside dask arrays.""" + SUPPORTED_TYPES_DASK: tuple[ArrayType, ...] = tuple( - ArrayType.from_qualname("dask.array.Array", t) for t in _SUPPORTED_TYPE_NAMES_IN_DASK + ArrayType("dask.array", ".Array", t) for t in SUPPORTED_TYPES_MEM ) """Supported dask array types.""" -SUPPORTED_TYPES_OTHER: tuple[ArrayType, ...] = tuple( - map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_OTHER) -) -"""Supported array types that are not valid inside dask arrays.""" + SUPPORTED_TYPES: tuple[ArrayType, ...] = ( - *SUPPORTED_TYPES_IN_DASK, + *SUPPORTED_TYPES_MEM, *SUPPORTED_TYPES_DASK, - *SUPPORTED_TYPES_OTHER, + *SUPPORTED_TYPES_DISK, ) """All supported array types.""" diff --git a/src/testing/fast_array_utils/pytest.py b/src/testing/fast_array_utils/pytest.py index 553761c..a246a20 100644 --- a/src/testing/fast_array_utils/pytest.py +++ b/src/testing/fast_array_utils/pytest.py @@ -23,15 +23,18 @@ __all__ = ["array_type", "conversion_context"] -def _skip_if_no(dist: str) -> pytest.MarkDecorator: - return pytest.mark.skipif(not find_spec(dist), reason=f"{dist} not installed") +def _skip_if_unimportable(array_type: ArrayType) -> pytest.MarkDecorator: + dist = None + skip = False + for t in (array_type, array_type.inner): + if t and not find_spec(dist := t.mod.split(".", 1)[0]): + skip = True + return pytest.mark.skipif(skip, reason=f"{dist} not installed") @pytest.fixture( scope="session", - params=[ - pytest.param(t, id=str(t), marks=_skip_if_no(t.mod.split(".")[0])) for t in SUPPORTED_TYPES - ], + params=[pytest.param(t, id=str(t), marks=_skip_if_unimportable(t)) for t in SUPPORTED_TYPES], ) def array_type(request: pytest.FixtureRequest) -> ArrayType: """Fixture for a supported :class:`~testing.fast_array_utils.ArrayType`.""" From 2bb951db3fef309b087fdf30a74d09981069b106 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 21 Feb 2025 17:32:40 +0100 Subject: [PATCH 08/22] fix including testing module --- pyproject.toml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b7c6ea8..037a4a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,12 +38,16 @@ urls.'Documentation' = "https://icb-fast-array-utils.readthedocs-hosted.com/" urls.'Issue Tracker' = "https://github.com/scverse/fast-array-utils/issues" urls.'Source Code' = "https://github.com/scverse/fast-array-utils" -[tool.hatch.metadata.hooks.docstring-description] - [tool.hatch.version] source = "vcs" raw-options = { local_scheme = "no-local-version" } # be able to publish dev version +# TODO: support setting main package in the plugin +# [tool.hatch.metadata.hooks.docstring-description] + +[tool.hatch.build.targets.wheel] +packages = [ "src/testing", "src/fast_array_utils" ] + [tool.hatch.envs.default] installer = "uv" From 80ce4fbb6d03a28ba0f57e2e3ad62173cfef147f Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 21 Feb 2025 17:45:05 +0100 Subject: [PATCH 09/22] fix docs --- docs/conf.py | 5 +++-- src/testing/fast_array_utils/__init__.py | 12 ++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index ad3318c..6cb116f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -40,8 +40,9 @@ autosummary_generate = True autodoc_member_order = "bysource" autodoc_default_options = { - "special-members": "__call__", # `True` adds too much stuff to dataclasses - "inherited-members": False, + "special-members": True, + # everything except __call__ really, to avoid having to write autosummary templates + "exclude-members": "__setattr__,__delattr__,__repr__,__eq__,__hash__,__weakref__,__init__", } napoleon_google_docstring = False napoleon_numpy_docstring = True diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index a700b27..2eab9e8 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -180,11 +180,11 @@ def __call__(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Array: if self.inner is None: msg = "Cannot convert to dask array without inner array type" raise AssertionError(msg) - fn = self.to_dask_array + fn = self._to_dask_array elif self.cls is types.H5Dataset: - fn = self.to_h5py_dataset + fn = self._to_h5py_dataset elif self.cls is types.ZarrArray: - fn = self.to_zarr_array + fn = self._to_zarr_array elif self.cls is types.CupyArray: import cupy as cu @@ -194,7 +194,7 @@ def __call__(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Array: return fn(x, dtype=dtype) - def to_dask_array(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.DaskArray: + def _to_dask_array(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.DaskArray: """Convert to a dask array.""" if TYPE_CHECKING: import dask.array.core as da @@ -206,7 +206,7 @@ def to_dask_array(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> t arr = self.inner(x, dtype=dtype) return da.from_array(arr, _half_chunk_size(arr.shape)) # type: ignore[no-untyped-call,no-any-return] - def to_h5py_dataset( + def _to_h5py_dataset( self, x: ArrayLike, /, *, dtype: DTypeLike | None = None ) -> types.H5Dataset: """Convert to a h5py dataset.""" @@ -217,7 +217,7 @@ def to_h5py_dataset( return ctx.hdf5_file.create_dataset(ctx.get_ds_name(), arr.shape, arr.dtype, data=arr) @staticmethod - def to_zarr_array(x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.ZarrArray: + def _to_zarr_array(x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.ZarrArray: """Convert to a zarr array.""" import zarr From b2a0241a2e119249ede2e5586fc003ca3f0d1e67 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 21 Feb 2025 17:51:40 +0100 Subject: [PATCH 10/22] fix dask array types --- src/testing/fast_array_utils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index 2eab9e8..c245e2c 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -269,7 +269,7 @@ def _to_zarr_array(x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types. """Supported array types that are valid inside dask arrays.""" SUPPORTED_TYPES_DASK: tuple[ArrayType, ...] = tuple( - ArrayType("dask.array", ".Array", t) for t in SUPPORTED_TYPES_MEM + ArrayType("dask.array", "Array", t) for t in SUPPORTED_TYPES_MEM ) """Supported dask array types.""" From 49b2e9e1bd76af057f84b63fdfa992cc81c74157 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 21 Feb 2025 17:53:57 +0100 Subject: [PATCH 11/22] comment on `get_ds_name` --- src/testing/fast_array_utils/pytest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/testing/fast_array_utils/pytest.py b/src/testing/fast_array_utils/pytest.py index a246a20..5e465dd 100644 --- a/src/testing/fast_array_utils/pytest.py +++ b/src/testing/fast_array_utils/pytest.py @@ -63,6 +63,7 @@ def conversion_context( tmp_path = tmp_path / f"test_{worker_id}.h5ad" def get_ds_name() -> str: + """Get dataset name from test name, so tests running in parallel each get their own.""" return os.environ["PYTEST_CURRENT_TEST"].rsplit(":", 1)[-1].split(" ", 1)[0] with h5py.File(tmp_path, "x") as f: From 359d07df3d292133110797258f7c5ce8686883ef Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 21 Feb 2025 18:08:14 +0100 Subject: [PATCH 12/22] move arry stuff into submodule --- src/testing/fast_array_utils/__init__.py | 237 +--------------- src/testing/fast_array_utils/array_type.py | 310 +++++++++++++++++++++ 2 files changed, 314 insertions(+), 233 deletions(-) create mode 100644 src/testing/fast_array_utils/array_type.py diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index c245e2c..8d2b3e0 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -3,41 +3,13 @@ from __future__ import annotations -from dataclasses import KW_ONLY, dataclass, field -from functools import cache, cached_property from typing import TYPE_CHECKING -import numpy as np +from .array_type import ArrayType, ConversionContext, random_mat if TYPE_CHECKING: - from collections.abc import Callable - from typing import Any, Literal, Protocol, SupportsFloat, TypeAlias - - import h5py - from numpy.typing import ArrayLike, DTypeLike, NDArray - - from fast_array_utils import types - from fast_array_utils.types import CSBase - - Array: TypeAlias = ( - NDArray[Any] - | types.CSBase - | types.CupyArray - | types.DaskArray - | types.H5Dataset - | types.ZarrArray - ) - - class ToArray(Protocol): - """Convert to a supported array.""" - - def __call__( # noqa: D102 - self, data: ArrayLike, /, *, dtype: DTypeLike | None = None - ) -> Array: ... - - _DTypeLikeFloat32 = np.dtype[np.float32] | type[np.float32] - _DTypeLikeFloat64 = np.dtype[np.float64] | type[np.float64] + from .array_type import Array, ToArray # noqa: TC004 __all__ = [ @@ -47,186 +19,14 @@ def __call__( # noqa: D102 "SUPPORTED_TYPES_MEM", "SUPPORTED_TYPES_MEM_DENSE", "SUPPORTED_TYPES_MEM_SPARSE", + "Array", "ArrayType", "ConversionContext", "ToArray", + "random_mat", ] -@dataclass -class ConversionContext: - """Conversion context required for h5py.""" - - hdf5_file: h5py.File - get_ds_name: Callable[[], str] - - -@dataclass(frozen=True) -class ArrayType: - """Supported array type with methods for conversion and random generation. - - Examples - -------- - >>> at = ArrayType("numpy", "ndarray") - >>> arr = at([1, 2, 3]) - >>> arr - array([1, 2, 3]) - >>> assert isinstance(arr, at.cls) - - """ - - mod: str - """Module name.""" - name: str - """Array class name.""" - inner: ArrayType | None = None - """Inner array type (e.g. for dask).""" - - _: KW_ONLY - - conversion_context: ConversionContext | None = field(default=None, compare=False) - """Conversion context required for converting to h5py.""" - - @classmethod - @cache - def from_qualname(cls, qualname: str, inner: str | None = None) -> ArrayType: - """Create from qualnames of type and inner type.""" - mod, name = qualname.rsplit(".", 1) - return cls(mod, name, ArrayType.from_qualname(inner) if inner else None) - - def __repr__(self) -> str: # noqa: D105 - rv = f"{self.mod}.{self.name}" - return f"{rv}[{self.inner}]" if self.inner else rv - - @cached_property - def cls(self) -> type[Array]: # noqa: PLR0911 - """Array class for :func:`isinstance` checks.""" - match self.mod, self.name, self.inner: - case "numpy", "ndarray", None: - return np.ndarray - case "scipy.sparse", ( - "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" - ) as cls_name, None: - import scipy.sparse - - return getattr(scipy.sparse, cls_name) # type: ignore[no-any-return] - case "cupy", "ndarray", None: - import cupy as cp - - return cp.ndarray # type: ignore[no-any-return] - case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: - import cupyx.scipy.sparse as cu_sparse - - return getattr(cu_sparse, cls_name) # type: ignore[no-any-return] - case "dask.array", cls_name, _: - if TYPE_CHECKING: - from dask.array.core import Array as DaskArray - else: - from dask.array import Array as DaskArray - - return DaskArray - case "h5py", "Dataset", _: - import h5py - - return h5py.Dataset # type: ignore[no-any-return] - case "zarr", "Array", _: - import zarr - - return zarr.Array - case _: - msg = f"Unknown array class: {self}" - raise ValueError(msg) - - def random( - self, - shape: tuple[int, int], - *, - dtype: _DTypeLikeFloat32 | _DTypeLikeFloat64 | None, - gen: np.random.Generator | None = None, - ) -> Array: - """Create a random array.""" - gen = np.random.default_rng(gen) - - match self.mod, self.name, self.inner: - case "numpy", "ndarray", None: - return gen.random(shape, dtype=dtype or np.float64) - case "scipy.sparse", ( - "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" - ) as cls_name, None: - fmt, container = cls_name.split("_") - return random_mat(shape, format=fmt, container=container, dtype=dtype) # type: ignore[arg-type] - case "cupy", "ndarray", None: - raise NotImplementedError - case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: - raise NotImplementedError - case "dask.array", cls_name, _: - raise NotImplementedError - case "h5py", "Dataset", _: - raise NotImplementedError - case "zarr", "Array", _: - raise NotImplementedError - case _: - msg = f"Unknown array class: {self}" - raise ValueError(msg) - - def __call__(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Array: - """Convert to this array type.""" - from fast_array_utils import types - - fn: ToArray - if self.cls is np.ndarray: - fn = np.asarray # type: ignore[assignment] - elif self.cls is types.DaskArray: - if self.inner is None: - msg = "Cannot convert to dask array without inner array type" - raise AssertionError(msg) - fn = self._to_dask_array - elif self.cls is types.H5Dataset: - fn = self._to_h5py_dataset - elif self.cls is types.ZarrArray: - fn = self._to_zarr_array - elif self.cls is types.CupyArray: - import cupy as cu - - fn = cu.asarray - else: - fn = self.cls # type: ignore[assignment] - - return fn(x, dtype=dtype) - - def _to_dask_array(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.DaskArray: - """Convert to a dask array.""" - if TYPE_CHECKING: - import dask.array.core as da - else: - import dask.array as da - - assert self.inner is not None - - arr = self.inner(x, dtype=dtype) - return da.from_array(arr, _half_chunk_size(arr.shape)) # type: ignore[no-untyped-call,no-any-return] - - def _to_h5py_dataset( - self, x: ArrayLike, /, *, dtype: DTypeLike | None = None - ) -> types.H5Dataset: - """Convert to a h5py dataset.""" - if (ctx := self.conversion_context) is None: - msg = "`conversion_context` must be set for h5py" - raise RuntimeError(msg) - arr = np.asarray(x, dtype=dtype) - return ctx.hdf5_file.create_dataset(ctx.get_ds_name(), arr.shape, arr.dtype, data=arr) - - @staticmethod - def _to_zarr_array(x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.ZarrArray: - """Convert to a zarr array.""" - import zarr - - arr = np.asarray(x, dtype=dtype) - za = zarr.create_array({}, shape=arr.shape, dtype=arr.dtype) - za[...] = arr - return za - - _SUPPORTED_TYPE_NAMES_DISK = [ "h5py.Dataset", "zarr.Array", @@ -279,32 +79,3 @@ def _to_zarr_array(x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types. *SUPPORTED_TYPES_DISK, ) """All supported array types.""" - - -def random_mat( - shape: tuple[int, int], - *, - density: SupportsFloat = 0.01, - format: Literal["csr", "csc"] = "csr", # noqa: A002 - dtype: DTypeLike | None = None, - container: Literal["array", "matrix"] = "array", - gen: np.random.Generator | None = None, -) -> CSBase: - """Create a random matrix.""" - from scipy.sparse import random as random_spmat - from scipy.sparse import random_array as random_sparr - - m, n = shape - return ( - random_spmat(m, n, density=density, format=format, dtype=dtype, random_state=gen) - if container == "matrix" - else random_sparr(shape, density=density, format=format, dtype=dtype, random_state=gen) - ) - - -def _half_chunk_size(a: tuple[int, ...]) -> tuple[int, ...]: - def half_rounded_up(x: int) -> int: - div, mod = divmod(x, 2) - return div + (mod > 0) - - return tuple(half_rounded_up(x) for x in a) diff --git a/src/testing/fast_array_utils/array_type.py b/src/testing/fast_array_utils/array_type.py new file mode 100644 index 0000000..04e398b --- /dev/null +++ b/src/testing/fast_array_utils/array_type.py @@ -0,0 +1,310 @@ +# SPDX-License-Identifier: MPL-2.0 +"""ArrayType class and helpers.""" + +from __future__ import annotations + +from dataclasses import KW_ONLY, dataclass, field +from functools import cache, cached_property +from typing import TYPE_CHECKING + +import numpy as np + + +if TYPE_CHECKING: + from collections.abc import Callable + from typing import Any, Literal, Protocol, SupportsFloat, TypeAlias + + import h5py + from numpy.typing import ArrayLike, DTypeLike, NDArray + + from fast_array_utils import types + from fast_array_utils.types import CSBase + + Array: TypeAlias = ( + NDArray[Any] + | types.CSBase + | types.CupyArray + | types.DaskArray + | types.H5Dataset + | types.ZarrArray + ) + + class ToArray(Protocol): + """Convert to a supported array.""" + + def __call__( # noqa: D102 + self, data: ArrayLike, /, *, dtype: DTypeLike | None = None + ) -> Array: ... + + _DTypeLikeFloat32 = np.dtype[np.float32] | type[np.float32] + _DTypeLikeFloat64 = np.dtype[np.float64] | type[np.float64] + + +__all__ = [ + "SUPPORTED_TYPES", + "SUPPORTED_TYPES_DASK", + "SUPPORTED_TYPES_DISK", + "SUPPORTED_TYPES_MEM", + "SUPPORTED_TYPES_MEM_DENSE", + "SUPPORTED_TYPES_MEM_SPARSE", + "ArrayType", + "ConversionContext", + "ToArray", +] + + +@dataclass +class ConversionContext: + """Conversion context required for h5py.""" + + hdf5_file: h5py.File + get_ds_name: Callable[[], str] + + +@dataclass(frozen=True) +class ArrayType: + """Supported array type with methods for conversion and random generation. + + Examples + -------- + >>> at = ArrayType("numpy", "ndarray") + >>> arr = at([1, 2, 3]) + >>> arr + array([1, 2, 3]) + >>> assert isinstance(arr, at.cls) + + """ + + mod: str + """Module name.""" + name: str + """Array class name.""" + inner: ArrayType | None = None + """Inner array type (e.g. for dask).""" + + _: KW_ONLY + + conversion_context: ConversionContext | None = field(default=None, compare=False) + """Conversion context required for converting to h5py.""" + + @classmethod + @cache + def from_qualname(cls, qualname: str, inner: str | None = None) -> ArrayType: + """Create from qualnames of type and inner type.""" + mod, name = qualname.rsplit(".", 1) + return cls(mod, name, ArrayType.from_qualname(inner) if inner else None) + + def __repr__(self) -> str: # noqa: D105 + rv = f"{self.mod}.{self.name}" + return f"{rv}[{self.inner}]" if self.inner else rv + + @cached_property + def cls(self) -> type[Array]: # noqa: PLR0911 + """Array class for :func:`isinstance` checks.""" + match self.mod, self.name, self.inner: + case "numpy", "ndarray", None: + return np.ndarray + case "scipy.sparse", ( + "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" + ) as cls_name, None: + import scipy.sparse + + return getattr(scipy.sparse, cls_name) # type: ignore[no-any-return] + case "cupy", "ndarray", None: + import cupy as cp + + return cp.ndarray # type: ignore[no-any-return] + case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: + import cupyx.scipy.sparse as cu_sparse + + return getattr(cu_sparse, cls_name) # type: ignore[no-any-return] + case "dask.array", cls_name, _: + if TYPE_CHECKING: + from dask.array.core import Array as DaskArray + else: + from dask.array import Array as DaskArray + + return DaskArray + case "h5py", "Dataset", _: + import h5py + + return h5py.Dataset # type: ignore[no-any-return] + case "zarr", "Array", _: + import zarr + + return zarr.Array + case _: + msg = f"Unknown array class: {self}" + raise ValueError(msg) + + def random( + self, + shape: tuple[int, int], + *, + dtype: _DTypeLikeFloat32 | _DTypeLikeFloat64 | None, + gen: np.random.Generator | None = None, + ) -> Array: + """Create a random array.""" + gen = np.random.default_rng(gen) + + match self.mod, self.name, self.inner: + case "numpy", "ndarray", None: + return gen.random(shape, dtype=dtype or np.float64) + case "scipy.sparse", ( + "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" + ) as cls_name, None: + fmt, container = cls_name.split("_") + return random_mat(shape, format=fmt, container=container, dtype=dtype) # type: ignore[arg-type] + case "cupy", "ndarray", None: + raise NotImplementedError + case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: + raise NotImplementedError + case "dask.array", cls_name, _: + raise NotImplementedError + case "h5py", "Dataset", _: + raise NotImplementedError + case "zarr", "Array", _: + raise NotImplementedError + case _: + msg = f"Unknown array class: {self}" + raise ValueError(msg) + + def __call__(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Array: + """Convert to this array type.""" + from fast_array_utils import types + + fn: ToArray + if self.cls is np.ndarray: + fn = np.asarray # type: ignore[assignment] + elif self.cls is types.DaskArray: + if self.inner is None: + msg = "Cannot convert to dask array without inner array type" + raise AssertionError(msg) + fn = self._to_dask_array + elif self.cls is types.H5Dataset: + fn = self._to_h5py_dataset + elif self.cls is types.ZarrArray: + fn = self._to_zarr_array + elif self.cls is types.CupyArray: + import cupy as cu + + fn = cu.asarray + else: + fn = self.cls # type: ignore[assignment] + + return fn(x, dtype=dtype) + + def _to_dask_array(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.DaskArray: + """Convert to a dask array.""" + if TYPE_CHECKING: + import dask.array.core as da + else: + import dask.array as da + + assert self.inner is not None + + arr = self.inner(x, dtype=dtype) + return da.from_array(arr, _half_chunk_size(arr.shape)) # type: ignore[no-untyped-call,no-any-return] + + def _to_h5py_dataset( + self, x: ArrayLike, /, *, dtype: DTypeLike | None = None + ) -> types.H5Dataset: + """Convert to a h5py dataset.""" + if (ctx := self.conversion_context) is None: + msg = "`conversion_context` must be set for h5py" + raise RuntimeError(msg) + arr = np.asarray(x, dtype=dtype) + return ctx.hdf5_file.create_dataset(ctx.get_ds_name(), arr.shape, arr.dtype, data=arr) + + @staticmethod + def _to_zarr_array(x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.ZarrArray: + """Convert to a zarr array.""" + import zarr + + arr = np.asarray(x, dtype=dtype) + za = zarr.create_array({}, shape=arr.shape, dtype=arr.dtype) + za[...] = arr + return za + + +_SUPPORTED_TYPE_NAMES_DISK = [ + "h5py.Dataset", + "zarr.Array", +] +_SUPPORTED_TYPE_NAMES_DENSE = [ + "numpy.ndarray", + "cupy.ndarray", +] +_SUPPORTED_TYPE_NAMES_SPARSE = [ + "scipy.sparse.csr_array", + "scipy.sparse.csc_array", + "scipy.sparse.csr_matrix", + "scipy.sparse.csc_matrix", + "cupyx.scipy.sparse.csr_matrix", + "cupyx.scipy.sparse.csc_matrix", +] + +SUPPORTED_TYPES_DISK: tuple[ArrayType, ...] = tuple( + map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_DISK) +) +"""Supported array types that represent on-disk data + +These on-disk array types are not supported inside dask arrays. +""" + +SUPPORTED_TYPES_MEM_DENSE: tuple[ArrayType, ...] = tuple( + map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_DENSE) +) +"""Supported dense in-memory array types.""" + +SUPPORTED_TYPES_MEM_SPARSE: tuple[ArrayType, ...] = tuple( + map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_SPARSE) +) +"""Supported sparse in-memory array types.""" + +SUPPORTED_TYPES_MEM: tuple[ArrayType, ...] = ( + *SUPPORTED_TYPES_MEM_DENSE, + *SUPPORTED_TYPES_MEM_SPARSE, +) +"""Supported array types that are valid inside dask arrays.""" + +SUPPORTED_TYPES_DASK: tuple[ArrayType, ...] = tuple( + ArrayType("dask.array", "Array", t) for t in SUPPORTED_TYPES_MEM +) +"""Supported dask array types.""" + +SUPPORTED_TYPES: tuple[ArrayType, ...] = ( + *SUPPORTED_TYPES_MEM, + *SUPPORTED_TYPES_DASK, + *SUPPORTED_TYPES_DISK, +) +"""All supported array types.""" + + +def random_mat( + shape: tuple[int, int], + *, + density: SupportsFloat = 0.01, + format: Literal["csr", "csc"] = "csr", # noqa: A002 + dtype: DTypeLike | None = None, + container: Literal["array", "matrix"] = "array", + gen: np.random.Generator | None = None, +) -> CSBase: + """Create a random matrix.""" + from scipy.sparse import random as random_spmat + from scipy.sparse import random_array as random_sparr + + m, n = shape + return ( + random_spmat(m, n, density=density, format=format, dtype=dtype, random_state=gen) + if container == "matrix" + else random_sparr(shape, density=density, format=format, dtype=dtype, random_state=gen) + ) + + +def _half_chunk_size(a: tuple[int, ...]) -> tuple[int, ...]: + def half_rounded_up(x: int) -> int: + div, mod = divmod(x, 2) + return div + (mod > 0) + + return tuple(half_rounded_up(x) for x in a) From 43d88e322e9e943496b61dde8ff5c3156f8dcdfb Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 24 Feb 2025 10:23:04 +0100 Subject: [PATCH 13/22] we only have "Array" --- src/testing/fast_array_utils/array_type.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/testing/fast_array_utils/array_type.py b/src/testing/fast_array_utils/array_type.py index 04e398b..fe78e82 100644 --- a/src/testing/fast_array_utils/array_type.py +++ b/src/testing/fast_array_utils/array_type.py @@ -118,7 +118,7 @@ def cls(self) -> type[Array]: # noqa: PLR0911 import cupyx.scipy.sparse as cu_sparse return getattr(cu_sparse, cls_name) # type: ignore[no-any-return] - case "dask.array", cls_name, _: + case "dask.array", "Array", _: if TYPE_CHECKING: from dask.array.core import Array as DaskArray else: @@ -159,7 +159,7 @@ def random( raise NotImplementedError case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: raise NotImplementedError - case "dask.array", cls_name, _: + case "dask.array", "Array", _: raise NotImplementedError case "h5py", "Dataset", _: raise NotImplementedError From 9c9f3ca007d2989b57f7991ebc237119365a6a2d Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 24 Feb 2025 10:44:02 +0100 Subject: [PATCH 14/22] explicit benchmarks --- src/testing/fast_array_utils/array_type.py | 10 +++++++++- tests/test_stats.py | 14 +++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/testing/fast_array_utils/array_type.py b/src/testing/fast_array_utils/array_type.py index fe78e82..a01d4a1 100644 --- a/src/testing/fast_array_utils/array_type.py +++ b/src/testing/fast_array_utils/array_type.py @@ -160,7 +160,15 @@ def random( case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: raise NotImplementedError case "dask.array", "Array", _: - raise NotImplementedError + if TYPE_CHECKING: + from dask.array.wrap import zeros + else: + from dask.array import zeros + + arr = zeros(shape, dtype=dtype, chunks=_half_chunk_size(shape)) + return arr.map_blocks( + lambda x: self.random(x.shape, dtype=x.dtype, gen=gen), dtype=dtype + ) case "h5py", "Dataset", _: raise NotImplementedError case "zarr", "Array", _: diff --git a/tests/test_stats.py b/tests/test_stats.py index 7f9f688..22e9f2e 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -7,6 +7,8 @@ import pytest from fast_array_utils import stats, types +from testing.fast_array_utils import SUPPORTED_TYPES_MEM +from testing.fast_array_utils.pytest import _skip_if_unimportable if TYPE_CHECKING: @@ -72,17 +74,19 @@ def test_sum( @pytest.mark.benchmark @pytest.mark.parametrize("dtype", [np.float32, np.float64]) # random only supports float +@pytest.mark.parametrize( + "array_type", + # TODO(flying-sheep): remove need for private import # noqa: TD003 + [pytest.param(t, id=str(t), marks=_skip_if_unimportable(t)) for t in SUPPORTED_TYPES_MEM], +) def test_sum_benchmark( benchmark: BenchmarkFixture, array_type: ArrayType, axis: Literal[0, 1, None], dtype: type[np.float32 | np.float64], ) -> None: - try: - shape = (1_000, 1_000) if "sparse" in array_type.mod else (100, 100) - arr = array_type.random(shape, dtype=dtype) - except NotImplementedError: - pytest.skip("random_array not implemented for dtype") + shape = (1_000, 1_000) if "sparse" in array_type.mod else (100, 100) + arr = array_type.random(shape, dtype=dtype) stats.sum(arr, axis=axis) # type: ignore[arg-type] # warmup: numba compile benchmark(stats.sum, arr, axis=axis) From b14cb74759133420af194bc90d9ec1f4df970d97 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 24 Feb 2025 11:19:12 +0100 Subject: [PATCH 15/22] switch to flags --- src/testing/fast_array_utils/__init__.py | 71 ++++------------- src/testing/fast_array_utils/array_type.py | 89 ++++------------------ tests/test_stats.py | 9 ++- 3 files changed, 37 insertions(+), 132 deletions(-) diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index 8d2b3e0..6b76498 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -5,7 +5,7 @@ from typing import TYPE_CHECKING -from .array_type import ArrayType, ConversionContext, random_mat +from .array_type import ArrayType, ConversionContext, Flags, random_mat if TYPE_CHECKING: @@ -14,68 +14,29 @@ __all__ = [ "SUPPORTED_TYPES", - "SUPPORTED_TYPES_DASK", - "SUPPORTED_TYPES_DISK", - "SUPPORTED_TYPES_MEM", - "SUPPORTED_TYPES_MEM_DENSE", - "SUPPORTED_TYPES_MEM_SPARSE", "Array", "ArrayType", "ConversionContext", + "Flags", "ToArray", "random_mat", ] -_SUPPORTED_TYPE_NAMES_DISK = [ - "h5py.Dataset", - "zarr.Array", -] -_SUPPORTED_TYPE_NAMES_DENSE = [ - "numpy.ndarray", - "cupy.ndarray", -] -_SUPPORTED_TYPE_NAMES_SPARSE = [ - "scipy.sparse.csr_array", - "scipy.sparse.csc_array", - "scipy.sparse.csr_matrix", - "scipy.sparse.csc_matrix", - "cupyx.scipy.sparse.csr_matrix", - "cupyx.scipy.sparse.csc_matrix", -] - -SUPPORTED_TYPES_DISK: tuple[ArrayType, ...] = tuple( - map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_DISK) -) -"""Supported array types that represent on-disk data - -These on-disk array types are not supported inside dask arrays. -""" - -SUPPORTED_TYPES_MEM_DENSE: tuple[ArrayType, ...] = tuple( - map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_DENSE) +_TP_MEM = ( + ArrayType("numpy", "ndarray"), + ArrayType("cupy", "ndarray", Flags.Gpu), + *( + ArrayType("scipy.sparse", n, Flags.Sparse) + for n in ["csr_array", "csc_array", "csr_matrix", "csc_matrix"] + ), + *( + ArrayType("cupyx.scipy.sparse", n, Flags.Gpu | Flags.Sparse) + for n in ["csr_matrix", "csc_matrix"] + ), ) -"""Supported dense in-memory array types.""" +_TP_DASK = tuple(ArrayType("dask.array", "Array", Flags.Dask | t.flags, inner=t) for t in _TP_MEM) +_TP_DISK = tuple(ArrayType(m, n, Flags.Disk) for m, n in [("h5py", "Dataset"), ("zarr", "Array")]) -SUPPORTED_TYPES_MEM_SPARSE: tuple[ArrayType, ...] = tuple( - map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_SPARSE) -) -"""Supported sparse in-memory array types.""" - -SUPPORTED_TYPES_MEM: tuple[ArrayType, ...] = ( - *SUPPORTED_TYPES_MEM_DENSE, - *SUPPORTED_TYPES_MEM_SPARSE, -) -"""Supported array types that are valid inside dask arrays.""" - -SUPPORTED_TYPES_DASK: tuple[ArrayType, ...] = tuple( - ArrayType("dask.array", "Array", t) for t in SUPPORTED_TYPES_MEM -) -"""Supported dask array types.""" - -SUPPORTED_TYPES: tuple[ArrayType, ...] = ( - *SUPPORTED_TYPES_MEM, - *SUPPORTED_TYPES_DASK, - *SUPPORTED_TYPES_DISK, -) +SUPPORTED_TYPES: tuple[ArrayType, ...] = (*_TP_MEM, *_TP_DASK, *_TP_DISK) """All supported array types.""" diff --git a/src/testing/fast_array_utils/array_type.py b/src/testing/fast_array_utils/array_type.py index a01d4a1..deb7115 100644 --- a/src/testing/fast_array_utils/array_type.py +++ b/src/testing/fast_array_utils/array_type.py @@ -3,8 +3,9 @@ from __future__ import annotations +import enum from dataclasses import KW_ONLY, dataclass, field -from functools import cache, cached_property +from functools import cached_property from typing import TYPE_CHECKING import numpy as np @@ -40,17 +41,14 @@ def __call__( # noqa: D102 _DTypeLikeFloat64 = np.dtype[np.float64] | type[np.float64] -__all__ = [ - "SUPPORTED_TYPES", - "SUPPORTED_TYPES_DASK", - "SUPPORTED_TYPES_DISK", - "SUPPORTED_TYPES_MEM", - "SUPPORTED_TYPES_MEM_DENSE", - "SUPPORTED_TYPES_MEM_SPARSE", - "ArrayType", - "ConversionContext", - "ToArray", -] +__all__ = ["ArrayType", "ConversionContext", "ToArray"] + + +class Flags(enum.Flag): + Gpu = enum.auto() + Dask = enum.auto() + Sparse = enum.auto() + Disk = enum.auto() @dataclass @@ -79,21 +77,16 @@ class ArrayType: """Module name.""" name: str """Array class name.""" - inner: ArrayType | None = None - """Inner array type (e.g. for dask).""" + flags: Flags = Flags(0) # noqa: RUF009 + """Classification flags.""" _: KW_ONLY + inner: ArrayType | None = None + """Inner array type (e.g. for dask).""" conversion_context: ConversionContext | None = field(default=None, compare=False) """Conversion context required for converting to h5py.""" - @classmethod - @cache - def from_qualname(cls, qualname: str, inner: str | None = None) -> ArrayType: - """Create from qualnames of type and inner type.""" - mod, name = qualname.rsplit(".", 1) - return cls(mod, name, ArrayType.from_qualname(inner) if inner else None) - def __repr__(self) -> str: # noqa: D105 rv = f"{self.mod}.{self.name}" return f"{rv}[{self.inner}]" if self.inner else rv @@ -235,60 +228,6 @@ def _to_zarr_array(x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types. return za -_SUPPORTED_TYPE_NAMES_DISK = [ - "h5py.Dataset", - "zarr.Array", -] -_SUPPORTED_TYPE_NAMES_DENSE = [ - "numpy.ndarray", - "cupy.ndarray", -] -_SUPPORTED_TYPE_NAMES_SPARSE = [ - "scipy.sparse.csr_array", - "scipy.sparse.csc_array", - "scipy.sparse.csr_matrix", - "scipy.sparse.csc_matrix", - "cupyx.scipy.sparse.csr_matrix", - "cupyx.scipy.sparse.csc_matrix", -] - -SUPPORTED_TYPES_DISK: tuple[ArrayType, ...] = tuple( - map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_DISK) -) -"""Supported array types that represent on-disk data - -These on-disk array types are not supported inside dask arrays. -""" - -SUPPORTED_TYPES_MEM_DENSE: tuple[ArrayType, ...] = tuple( - map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_DENSE) -) -"""Supported dense in-memory array types.""" - -SUPPORTED_TYPES_MEM_SPARSE: tuple[ArrayType, ...] = tuple( - map(ArrayType.from_qualname, _SUPPORTED_TYPE_NAMES_SPARSE) -) -"""Supported sparse in-memory array types.""" - -SUPPORTED_TYPES_MEM: tuple[ArrayType, ...] = ( - *SUPPORTED_TYPES_MEM_DENSE, - *SUPPORTED_TYPES_MEM_SPARSE, -) -"""Supported array types that are valid inside dask arrays.""" - -SUPPORTED_TYPES_DASK: tuple[ArrayType, ...] = tuple( - ArrayType("dask.array", "Array", t) for t in SUPPORTED_TYPES_MEM -) -"""Supported dask array types.""" - -SUPPORTED_TYPES: tuple[ArrayType, ...] = ( - *SUPPORTED_TYPES_MEM, - *SUPPORTED_TYPES_DASK, - *SUPPORTED_TYPES_DISK, -) -"""All supported array types.""" - - def random_mat( shape: tuple[int, int], *, diff --git a/tests/test_stats.py b/tests/test_stats.py index 22e9f2e..368fa50 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -7,7 +7,8 @@ import pytest from fast_array_utils import stats, types -from testing.fast_array_utils import SUPPORTED_TYPES_MEM +from testing.fast_array_utils import SUPPORTED_TYPES +from testing.fast_array_utils.array_type import Flags from testing.fast_array_utils.pytest import _skip_if_unimportable @@ -77,7 +78,11 @@ def test_sum( @pytest.mark.parametrize( "array_type", # TODO(flying-sheep): remove need for private import # noqa: TD003 - [pytest.param(t, id=str(t), marks=_skip_if_unimportable(t)) for t in SUPPORTED_TYPES_MEM], + [ + pytest.param(t, id=str(t), marks=_skip_if_unimportable(t)) + for t in SUPPORTED_TYPES + if t.flags & ~(Flags.Dask | Flags.Disk | Flags.Gpu) + ], ) def test_sum_benchmark( benchmark: BenchmarkFixture, From 8a43ae885c130697591bf3f06821decf298bb584 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 24 Feb 2025 12:53:15 +0100 Subject: [PATCH 16/22] switch selection to marker --- pyproject.toml | 4 ++ src/testing/fast_array_utils/__init__.py | 12 +++-- src/testing/fast_array_utils/array_type.py | 21 ++++++-- src/testing/fast_array_utils/pytest.py | 61 ++++++++++++++++++---- tests/test_stats.py | 12 +---- tests/test_test_utils.py | 9 ++++ 6 files changed, 87 insertions(+), 32 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 037a4a4..2b3167e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,8 @@ urls.'Documentation' = "https://icb-fast-array-utils.readthedocs-hosted.com/" urls.'Issue Tracker' = "https://github.com/scverse/fast-array-utils/issues" urls.'Source Code' = "https://github.com/scverse/fast-array-utils" +entry_points.pytest11.fast_array_utils = "testing.fast_array_utils.pytest" + [tool.hatch.version] source = "vcs" raw-options = { local_scheme = "no-local-version" } # be able to publish dev version @@ -96,6 +98,8 @@ lint.per-file-ignores."tests/**/test_*.py" = [ "S101", # tests use `assert` ] lint.allowed-confusables = [ "×", "’" ] +lint.flake8-bugbear.extend-immutable-calls = [ "testing.fast_array_utils.Flags" ] + lint.flake8-copyright.notice-rgx = "SPDX-License-Identifier: MPL-2\\.0" lint.flake8-type-checking.exempt-modules = [ ] lint.flake8-type-checking.strict = true diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index 6b76498..ad7928d 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -24,19 +24,21 @@ _TP_MEM = ( - ArrayType("numpy", "ndarray"), - ArrayType("cupy", "ndarray", Flags.Gpu), + ArrayType("numpy", "ndarray", Flags.Any), + ArrayType("cupy", "ndarray", Flags.Any | Flags.Gpu), *( - ArrayType("scipy.sparse", n, Flags.Sparse) + ArrayType("scipy.sparse", n, Flags.Any | Flags.Sparse) for n in ["csr_array", "csc_array", "csr_matrix", "csc_matrix"] ), *( - ArrayType("cupyx.scipy.sparse", n, Flags.Gpu | Flags.Sparse) + ArrayType("cupyx.scipy.sparse", n, Flags.Any | Flags.Gpu | Flags.Sparse) for n in ["csr_matrix", "csc_matrix"] ), ) _TP_DASK = tuple(ArrayType("dask.array", "Array", Flags.Dask | t.flags, inner=t) for t in _TP_MEM) -_TP_DISK = tuple(ArrayType(m, n, Flags.Disk) for m, n in [("h5py", "Dataset"), ("zarr", "Array")]) +_TP_DISK = tuple( + ArrayType(m, n, Flags.Any | Flags.Disk) for m, n in [("h5py", "Dataset"), ("zarr", "Array")] +) SUPPORTED_TYPES: tuple[ArrayType, ...] = (*_TP_MEM, *_TP_DASK, *_TP_DISK) """All supported array types.""" diff --git a/src/testing/fast_array_utils/array_type.py b/src/testing/fast_array_utils/array_type.py index deb7115..cc0f331 100644 --- a/src/testing/fast_array_utils/array_type.py +++ b/src/testing/fast_array_utils/array_type.py @@ -12,7 +12,6 @@ if TYPE_CHECKING: - from collections.abc import Callable from typing import Any, Literal, Protocol, SupportsFloat, TypeAlias import h5py @@ -45,18 +44,30 @@ def __call__( # noqa: D102 class Flags(enum.Flag): + """Array classification flags. + + Use ``Flags(0)`` and ``~Flags(0)`` for no or all flags set. + """ + + Any = enum.auto() + Sparse = enum.auto() Gpu = enum.auto() Dask = enum.auto() - Sparse = enum.auto() Disk = enum.auto() + def __repr__(self) -> str: + if self is Flags(0): + return "Flags(0)" + if self is ~Flags(0): + return "~Flags(0)" + return super().__repr__() + @dataclass class ConversionContext: """Conversion context required for h5py.""" hdf5_file: h5py.File - get_ds_name: Callable[[], str] @dataclass(frozen=True) @@ -77,7 +88,7 @@ class ArrayType: """Module name.""" name: str """Array class name.""" - flags: Flags = Flags(0) # noqa: RUF009 + flags: Flags = Flags.Any """Classification flags.""" _: KW_ONLY @@ -215,7 +226,7 @@ def _to_h5py_dataset( msg = "`conversion_context` must be set for h5py" raise RuntimeError(msg) arr = np.asarray(x, dtype=dtype) - return ctx.hdf5_file.create_dataset(ctx.get_ds_name(), arr.shape, arr.dtype, data=arr) + return ctx.hdf5_file.create_dataset("data", arr.shape, arr.dtype, data=arr) @staticmethod def _to_zarr_array(x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.ZarrArray: diff --git a/src/testing/fast_array_utils/pytest.py b/src/testing/fast_array_utils/pytest.py index 5e465dd..ddccb80 100644 --- a/src/testing/fast_array_utils/pytest.py +++ b/src/testing/fast_array_utils/pytest.py @@ -7,22 +7,31 @@ from __future__ import annotations import dataclasses -import os from importlib.util import find_spec from typing import TYPE_CHECKING, cast import pytest -from . import SUPPORTED_TYPES, ArrayType, ConversionContext +from . import SUPPORTED_TYPES, ArrayType, ConversionContext, Flags if TYPE_CHECKING: from collections.abc import Generator + from _pytest.nodes import Node +else: + Node = object + __all__ = ["array_type", "conversion_context"] +def pytest_configure(config: pytest.Config) -> None: + config.addinivalue_line( + "markers", "array_type: filter tests using `testing.fast_array_utils.Flags`" + ) + + def _skip_if_unimportable(array_type: ArrayType) -> pytest.MarkDecorator: dist = None skip = False @@ -32,24 +41,57 @@ def _skip_if_unimportable(array_type: ArrayType) -> pytest.MarkDecorator: return pytest.mark.skipif(skip, reason=f"{dist} not installed") +def _resolve_sel( + select: Flags = ~Flags(0), skip: Flags = Flags(0), *, reason: str | None = None +) -> tuple[Flags, Flags, str | None]: + return select, skip, reason + + @pytest.fixture( - scope="session", params=[pytest.param(t, id=str(t), marks=_skip_if_unimportable(t)) for t in SUPPORTED_TYPES], ) def array_type(request: pytest.FixtureRequest) -> ArrayType: - """Fixture for a supported :class:`~testing.fast_array_utils.ArrayType`.""" + """Fixture for a supported :class:`~testing.fast_array_utils.ArrayType`. + + Use :class:`testing.fast_array_utils.Flags` to select or skip array types + + #. using ``select=``/``args[0]``: + + .. code:: python + + @pytest.mark.array_type(Flags.Sparse, reason="`something` only supports sparse arrays") + def test_something(array_type: ArrayType) -> None: + ... + + #. and/or using ``skip=``/``args[1]``: + + .. code:: python + + @pytest.mark.array_type(skip=Flags.Dask | Flags.Disk | Flags.Gpu) + def test_something(array_type: ArrayType) -> None: + ... + """ from fast_array_utils.types import H5Dataset at = cast(ArrayType, request.param) + + mark = cast(Node, request.node).get_closest_marker("array_type") + if mark: + select, skip, reason = _resolve_sel(*mark.args, **mark.kwargs) + if not (at.flags & select) or (at.flags & skip): + pytest.skip(reason or f"{at} not included in {select=}, {skip=}") + if at.cls is H5Dataset: ctx = request.getfixturevalue("conversion_context") at = dataclasses.replace(at, conversion_context=ctx) + return at -@pytest.fixture(scope="session") +@pytest.fixture # worker_id for xdist since we don't want to override open files def conversion_context( + request: pytest.FixtureRequest, tmp_path_factory: pytest.TempPathFactory, worker_id: str = "serial", ) -> Generator[ConversionContext, None, None]: @@ -59,12 +101,9 @@ def conversion_context( """ import h5py + node = cast(Node, request.node) tmp_path = tmp_path_factory.mktemp("backed_adata") - tmp_path = tmp_path / f"test_{worker_id}.h5ad" - - def get_ds_name() -> str: - """Get dataset name from test name, so tests running in parallel each get their own.""" - return os.environ["PYTEST_CURRENT_TEST"].rsplit(":", 1)[-1].split(" ", 1)[0] + tmp_path = tmp_path / f"test_{node.name}_{worker_id}.h5ad" with h5py.File(tmp_path, "x") as f: - yield ConversionContext(hdf5_file=f, get_ds_name=get_ds_name) + yield ConversionContext(hdf5_file=f) diff --git a/tests/test_stats.py b/tests/test_stats.py index 368fa50..954a2ce 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -7,9 +7,7 @@ import pytest from fast_array_utils import stats, types -from testing.fast_array_utils import SUPPORTED_TYPES from testing.fast_array_utils.array_type import Flags -from testing.fast_array_utils.pytest import _skip_if_unimportable if TYPE_CHECKING: @@ -74,16 +72,8 @@ def test_sum( @pytest.mark.benchmark +@pytest.mark.array_type(skip=Flags.Dask | Flags.Disk | Flags.Gpu) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) # random only supports float -@pytest.mark.parametrize( - "array_type", - # TODO(flying-sheep): remove need for private import # noqa: TD003 - [ - pytest.param(t, id=str(t), marks=_skip_if_unimportable(t)) - for t in SUPPORTED_TYPES - if t.flags & ~(Flags.Dask | Flags.Disk | Flags.Gpu) - ], -) def test_sum_benchmark( benchmark: BenchmarkFixture, array_type: ArrayType, diff --git a/tests/test_test_utils.py b/tests/test_test_utils.py index fa7fe86..b446551 100644 --- a/tests/test_test_utils.py +++ b/tests/test_test_utils.py @@ -7,6 +7,7 @@ import pytest from fast_array_utils import types +from testing.fast_array_utils.array_type import Flags if TYPE_CHECKING: @@ -25,3 +26,11 @@ def test_conv(array_type: ArrayType, dtype: DTypeLike) -> None: arr = arr.get() assert arr.shape == (3, 4) assert arr.dtype == dtype + + +def test_array_types(array_type: ArrayType) -> None: + assert array_type.flags & Flags.Any + assert ("sparse" in str(array_type)) == bool(array_type.flags & Flags.Sparse) + assert ("cupy" in str(array_type)) == bool(array_type.flags & Flags.Gpu) + assert ("dask" in str(array_type)) == bool(array_type.flags & Flags.Dask) + assert (array_type.mod in {"zarr", "h5py"}) == bool(array_type.flags & Flags.Disk) From 96d17fbf493843b54abee7c3bb20cc48686152b7 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 24 Feb 2025 13:07:29 +0100 Subject: [PATCH 17/22] switch matrix tests to fixture --- src/testing/fast_array_utils/array_type.py | 13 ++++++++++-- tests/test_sparse.py | 23 ++++++++++++---------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/testing/fast_array_utils/array_type.py b/src/testing/fast_array_utils/array_type.py index cc0f331..c46ebff 100644 --- a/src/testing/fast_array_utils/array_type.py +++ b/src/testing/fast_array_utils/array_type.py @@ -147,6 +147,8 @@ def random( *, dtype: _DTypeLikeFloat32 | _DTypeLikeFloat64 | None, gen: np.random.Generator | None = None, + # sparse only + density: SupportsFloat = 0.01, ) -> Array: """Create a random array.""" gen = np.random.default_rng(gen) @@ -158,7 +160,13 @@ def random( "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" ) as cls_name, None: fmt, container = cls_name.split("_") - return random_mat(shape, format=fmt, container=container, dtype=dtype) # type: ignore[arg-type] + return random_mat( + shape, + density=density, + format=fmt, # type: ignore[arg-type] + container=container, # type: ignore[arg-type] + dtype=dtype, + ) case "cupy", "ndarray", None: raise NotImplementedError case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: @@ -171,7 +179,8 @@ def random( arr = zeros(shape, dtype=dtype, chunks=_half_chunk_size(shape)) return arr.map_blocks( - lambda x: self.random(x.shape, dtype=x.dtype, gen=gen), dtype=dtype + lambda x: self.random(x.shape, dtype=x.dtype, gen=gen, density=density), + dtype=dtype, ) case "h5py", "Dataset", _: raise NotImplementedError diff --git a/tests/test_sparse.py b/tests/test_sparse.py index 91c8dab..6680df3 100644 --- a/tests/test_sparse.py +++ b/tests/test_sparse.py @@ -2,21 +2,23 @@ from __future__ import annotations from importlib.util import find_spec -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast import numpy as np import pytest from fast_array_utils.conv.scipy import to_dense -from testing.fast_array_utils import random_mat +from fast_array_utils.types import CSBase +from testing.fast_array_utils.array_type import Flags if TYPE_CHECKING: from typing import Literal - from numpy.typing import DTypeLike from pytest_codspeed import BenchmarkFixture + from testing.fast_array_utils.array_type import ArrayType, _DTypeLikeFloat32, _DTypeLikeFloat64 + pytestmark = [pytest.mark.skipif(not find_spec("scipy"), reason="scipy not installed")] @@ -36,14 +38,14 @@ def dtype(request: pytest.FixtureRequest) -> type[np.float32 | np.float64]: return request.param # type: ignore[no-any-return] +@pytest.mark.array_type(select=Flags.Sparse, skip=Flags.Dask) @pytest.mark.parametrize("order", ["C", "F"]) def test_to_dense( + array_type: ArrayType, order: Literal["C", "F"], - sp_fmt: Literal["csr", "csc"], - dtype: DTypeLike, - sp_container: Literal["array", "matrix"], + dtype: _DTypeLikeFloat32 | _DTypeLikeFloat64, ) -> None: - mat = random_mat((10, 10), density=0.1, format=sp_fmt, dtype=dtype, container=sp_container) + mat = cast(CSBase, array_type.random((10, 10), density=0.1, dtype=dtype)) arr = to_dense(mat, order=order) assert arr.flags[order] assert arr.dtype == mat.dtype @@ -51,13 +53,14 @@ def test_to_dense( @pytest.mark.benchmark +@pytest.mark.array_type(select=Flags.Sparse, skip=Flags.Dask) @pytest.mark.parametrize("order", ["C", "F"]) def test_to_dense_benchmark( benchmark: BenchmarkFixture, + array_type: ArrayType, order: Literal["C", "F"], - sp_fmt: Literal["csr", "csc"], - dtype: DTypeLike, + dtype: _DTypeLikeFloat32 | _DTypeLikeFloat64, ) -> None: - mat = random_mat((1_000, 1_000), format=sp_fmt, dtype=dtype, container="array") + mat = cast(CSBase, array_type.random((1_000, 1_000), dtype=dtype)) to_dense(mat, order=order) # warmup: numba compile benchmark(to_dense, mat, order=order) From ed382269ac74785d80401e3677afae1adc09fc33 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 24 Feb 2025 13:48:54 +0100 Subject: [PATCH 18/22] parametrize --- src/testing/fast_array_utils/__init__.py | 4 +- .../{array_type.py => _array_type.py} | 54 ++++++++++--------- tests/test_sparse.py | 17 +++--- tests/test_stats.py | 2 +- tests/test_test_utils.py | 2 +- 5 files changed, 42 insertions(+), 37 deletions(-) rename src/testing/fast_array_utils/{array_type.py => _array_type.py} (85%) diff --git a/src/testing/fast_array_utils/__init__.py b/src/testing/fast_array_utils/__init__.py index ad7928d..e26852f 100644 --- a/src/testing/fast_array_utils/__init__.py +++ b/src/testing/fast_array_utils/__init__.py @@ -5,11 +5,11 @@ from typing import TYPE_CHECKING -from .array_type import ArrayType, ConversionContext, Flags, random_mat +from ._array_type import ArrayType, ConversionContext, Flags, random_mat if TYPE_CHECKING: - from .array_type import Array, ToArray # noqa: TC004 + from ._array_type import Array, ToArray # noqa: TC004 __all__ = [ diff --git a/src/testing/fast_array_utils/array_type.py b/src/testing/fast_array_utils/_array_type.py similarity index 85% rename from src/testing/fast_array_utils/array_type.py rename to src/testing/fast_array_utils/_array_type.py index c46ebff..c273b95 100644 --- a/src/testing/fast_array_utils/array_type.py +++ b/src/testing/fast_array_utils/_array_type.py @@ -6,7 +6,7 @@ import enum from dataclasses import KW_ONLY, dataclass, field from functools import cached_property -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Generic, TypeVar import numpy as np @@ -29,15 +29,21 @@ | types.ZarrArray ) - class ToArray(Protocol): + Arr = TypeVar("Arr", bound=Array, default=Array) + Arr_co = TypeVar("Arr_co", bound=Array, covariant=True) + + Inner = TypeVar("Inner", bound="ArrayType[Any, None] | None", default=Any) + + class ToArray(Protocol, Generic[Arr_co]): """Convert to a supported array.""" - def __call__( # noqa: D102 - self, data: ArrayLike, /, *, dtype: DTypeLike | None = None - ) -> Array: ... + def __call__(self, data: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Array: ... _DTypeLikeFloat32 = np.dtype[np.float32] | type[np.float32] _DTypeLikeFloat64 = np.dtype[np.float64] | type[np.float64] +else: + Arr = TypeVar("Arr") + Inner = TypeVar("Inner") __all__ = ["ArrayType", "ConversionContext", "ToArray"] @@ -71,7 +77,7 @@ class ConversionContext: @dataclass(frozen=True) -class ArrayType: +class ArrayType(Generic[Arr, Inner]): """Supported array type with methods for conversion and random generation. Examples @@ -93,21 +99,21 @@ class ArrayType: _: KW_ONLY - inner: ArrayType | None = None + inner: Inner = None # type: ignore[assignment] """Inner array type (e.g. for dask).""" conversion_context: ConversionContext | None = field(default=None, compare=False) """Conversion context required for converting to h5py.""" - def __repr__(self) -> str: # noqa: D105 + def __repr__(self) -> str: rv = f"{self.mod}.{self.name}" return f"{rv}[{self.inner}]" if self.inner else rv @cached_property - def cls(self) -> type[Array]: # noqa: PLR0911 + def cls(self) -> type[Arr]: # noqa: PLR0911 """Array class for :func:`isinstance` checks.""" match self.mod, self.name, self.inner: case "numpy", "ndarray", None: - return np.ndarray + return np.ndarray # type: ignore[return-value] case "scipy.sparse", ( "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" ) as cls_name, None: @@ -128,7 +134,7 @@ def cls(self) -> type[Array]: # noqa: PLR0911 else: from dask.array import Array as DaskArray - return DaskArray + return DaskArray # type: ignore[return-value] case "h5py", "Dataset", _: import h5py @@ -136,7 +142,7 @@ def cls(self) -> type[Array]: # noqa: PLR0911 case "zarr", "Array", _: import zarr - return zarr.Array + return zarr.Array # type: ignore[return-value] case _: msg = f"Unknown array class: {self}" raise ValueError(msg) @@ -149,23 +155,21 @@ def random( gen: np.random.Generator | None = None, # sparse only density: SupportsFloat = 0.01, - ) -> Array: + ) -> Arr: """Create a random array.""" gen = np.random.default_rng(gen) match self.mod, self.name, self.inner: case "numpy", "ndarray", None: - return gen.random(shape, dtype=dtype or np.float64) + return gen.random(shape, dtype=dtype or np.float64) # type: ignore[return-value] case "scipy.sparse", ( "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" ) as cls_name, None: - fmt, container = cls_name.split("_") - return random_mat( - shape, - density=density, - format=fmt, # type: ignore[arg-type] - container=container, # type: ignore[arg-type] - dtype=dtype, + fmt: Literal["csr", "csc"] + container: Literal["array", "matrix"] + fmt, container = cls_name.split("_") # type: ignore[assignment] + return random_mat( # type: ignore[no-any-return] + shape, density=density, format=fmt, container=container, dtype=dtype ) case "cupy", "ndarray", None: raise NotImplementedError @@ -178,7 +182,7 @@ def random( from dask.array import zeros arr = zeros(shape, dtype=dtype, chunks=_half_chunk_size(shape)) - return arr.map_blocks( + return arr.map_blocks( # type: ignore[no-any-return] lambda x: self.random(x.shape, dtype=x.dtype, gen=gen, density=density), dtype=dtype, ) @@ -190,11 +194,11 @@ def random( msg = f"Unknown array class: {self}" raise ValueError(msg) - def __call__(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Array: + def __call__(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Arr: """Convert to this array type.""" from fast_array_utils import types - fn: ToArray + fn: ToArray[Arr] if self.cls is np.ndarray: fn = np.asarray # type: ignore[assignment] elif self.cls is types.DaskArray: @@ -213,7 +217,7 @@ def __call__(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Array: else: fn = self.cls # type: ignore[assignment] - return fn(x, dtype=dtype) + return fn(x, dtype=dtype) # type: ignore[return-value] def _to_dask_array(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.DaskArray: """Convert to a dask array.""" diff --git a/tests/test_sparse.py b/tests/test_sparse.py index 6680df3..bf928f7 100644 --- a/tests/test_sparse.py +++ b/tests/test_sparse.py @@ -2,14 +2,13 @@ from __future__ import annotations from importlib.util import find_spec -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING import numpy as np import pytest from fast_array_utils.conv.scipy import to_dense -from fast_array_utils.types import CSBase -from testing.fast_array_utils.array_type import Flags +from testing.fast_array_utils import Flags if TYPE_CHECKING: @@ -17,7 +16,9 @@ from pytest_codspeed import BenchmarkFixture - from testing.fast_array_utils.array_type import ArrayType, _DTypeLikeFloat32, _DTypeLikeFloat64 + from fast_array_utils.types import CSBase + from testing.fast_array_utils import ArrayType + from testing.fast_array_utils._array_type import _DTypeLikeFloat32, _DTypeLikeFloat64 pytestmark = [pytest.mark.skipif(not find_spec("scipy"), reason="scipy not installed")] @@ -41,11 +42,11 @@ def dtype(request: pytest.FixtureRequest) -> type[np.float32 | np.float64]: @pytest.mark.array_type(select=Flags.Sparse, skip=Flags.Dask) @pytest.mark.parametrize("order", ["C", "F"]) def test_to_dense( - array_type: ArrayType, + array_type: ArrayType[CSBase, None], order: Literal["C", "F"], dtype: _DTypeLikeFloat32 | _DTypeLikeFloat64, ) -> None: - mat = cast(CSBase, array_type.random((10, 10), density=0.1, dtype=dtype)) + mat = array_type.random((10, 10), density=0.1, dtype=dtype) arr = to_dense(mat, order=order) assert arr.flags[order] assert arr.dtype == mat.dtype @@ -57,10 +58,10 @@ def test_to_dense( @pytest.mark.parametrize("order", ["C", "F"]) def test_to_dense_benchmark( benchmark: BenchmarkFixture, - array_type: ArrayType, + array_type: ArrayType[CSBase, None], order: Literal["C", "F"], dtype: _DTypeLikeFloat32 | _DTypeLikeFloat64, ) -> None: - mat = cast(CSBase, array_type.random((1_000, 1_000), dtype=dtype)) + mat = array_type.random((1_000, 1_000), dtype=dtype) to_dense(mat, order=order) # warmup: numba compile benchmark(to_dense, mat, order=order) diff --git a/tests/test_stats.py b/tests/test_stats.py index 954a2ce..e1e08dc 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -7,7 +7,7 @@ import pytest from fast_array_utils import stats, types -from testing.fast_array_utils.array_type import Flags +from testing.fast_array_utils import Flags if TYPE_CHECKING: diff --git a/tests/test_test_utils.py b/tests/test_test_utils.py index b446551..54b8cbb 100644 --- a/tests/test_test_utils.py +++ b/tests/test_test_utils.py @@ -7,7 +7,7 @@ import pytest from fast_array_utils import types -from testing.fast_array_utils.array_type import Flags +from testing.fast_array_utils import Flags if TYPE_CHECKING: From 345bf3db0464d5369c36f5e1e210800d4feeb7dc Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 24 Feb 2025 13:50:30 +0100 Subject: [PATCH 19/22] fix docs --- docs/conf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index 6cb116f..03c1fa8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -82,6 +82,9 @@ # If that doesn’t work, ignore them nitpick_ignore = { ("py:class", "fast_array_utils.types.T_co"), + ("py:class", "Arr"), + ("py:class", "testing.fast_array_utils._array_type.Arr"), + ("py:class", "testing.fast_array_utils._array_type.Inner"), ("py:class", "_DTypeLikeFloat32"), ("py:class", "_DTypeLikeFloat64"), # sphinx bugs, should be covered by `autodoc_type_aliases` above From 923287d04fb7605db3757a091d9c49f05b4add2b Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 24 Feb 2025 15:23:24 +0100 Subject: [PATCH 20/22] docs --- docs/conf.py | 4 +++- src/testing/fast_array_utils/_array_type.py | 20 +++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 03c1fa8..2b4f3cd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,9 @@ autodoc_default_options = { "special-members": True, # everything except __call__ really, to avoid having to write autosummary templates - "exclude-members": "__setattr__,__delattr__,__repr__,__eq__,__hash__,__weakref__,__init__", + "exclude-members": ( + "__setattr__,__delattr__,__repr__,__eq__,__or__,__ror__,__hash__,__weakref__,__init__,__new__" + ), } napoleon_google_docstring = False napoleon_numpy_docstring = True diff --git a/src/testing/fast_array_utils/_array_type.py b/src/testing/fast_array_utils/_array_type.py index c273b95..7ae31e2 100644 --- a/src/testing/fast_array_utils/_array_type.py +++ b/src/testing/fast_array_utils/_array_type.py @@ -50,23 +50,21 @@ def __call__(self, data: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Arr class Flags(enum.Flag): - """Array classification flags. - - Use ``Flags(0)`` and ``~Flags(0)`` for no or all flags set. - """ + """Array classification flags.""" + None_ = 0 + """No array type.""" Any = enum.auto() + """Any array type.""" + Sparse = enum.auto() + """Sparse array.""" Gpu = enum.auto() + """GPU array.""" Dask = enum.auto() + """Dask array.""" Disk = enum.auto() - - def __repr__(self) -> str: - if self is Flags(0): - return "Flags(0)" - if self is ~Flags(0): - return "~Flags(0)" - return super().__repr__() + """On-disk array.""" @dataclass From 38d3da687ed770751d83a177b2298001dff4f275 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 24 Feb 2025 15:51:07 +0100 Subject: [PATCH 21/22] Fix typing --- src/fast_array_utils/conv/_asarray.py | 11 ++-- src/fast_array_utils/stats/_sum.py | 38 ++++++++----- src/testing/fast_array_utils/_array_type.py | 63 ++++++++++++--------- tests/test_sparse.py | 10 ++-- tests/test_stats.py | 26 +++++---- 5 files changed, 84 insertions(+), 64 deletions(-) diff --git a/src/fast_array_utils/conv/_asarray.py b/src/fast_array_utils/conv/_asarray.py index 1378d50..42f3e06 100644 --- a/src/fast_array_utils/conv/_asarray.py +++ b/src/fast_array_utils/conv/_asarray.py @@ -2,17 +2,16 @@ from __future__ import annotations from functools import singledispatch -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, cast import numpy as np +from numpy.typing import NDArray from .. import types if TYPE_CHECKING: - from typing import Any - - from numpy.typing import ArrayLike, NDArray + from numpy.typing import ArrayLike __all__ = ["asarray"] @@ -64,9 +63,9 @@ def _(x: types.OutOfCoreDataset[types.CSBase | NDArray[Any]]) -> NDArray[Any]: @asarray.register(types.CupyArray) def _(x: types.CupyArray) -> NDArray[Any]: - return x.get() # type: ignore[no-any-return] + return cast(NDArray[Any], x.get()) @asarray.register(types.CupySparseMatrix) def _(x: types.CupySparseMatrix) -> NDArray[Any]: - return x.toarray().get() # type: ignore[no-any-return] + return cast(NDArray[Any], x.toarray().get()) diff --git a/src/fast_array_utils/stats/_sum.py b/src/fast_array_utils/stats/_sum.py index 79660be..0d8ef56 100644 --- a/src/fast_array_utils/stats/_sum.py +++ b/src/fast_array_utils/stats/_sum.py @@ -2,26 +2,27 @@ from __future__ import annotations from functools import partial, singledispatch -from typing import TYPE_CHECKING, overload +from typing import TYPE_CHECKING, Any, cast, overload import numpy as np +from numpy.typing import NDArray from .. import types if TYPE_CHECKING: - from typing import Any, Literal + from typing import Literal - from numpy.typing import ArrayLike, DTypeLike, NDArray + from numpy.typing import ArrayLike, DTypeLike @overload def sum( - x: ArrayLike, /, *, axis: None = None, dtype: DTypeLike | None = None + x: ArrayLike | types.ZarrArray, /, *, axis: None = None, dtype: DTypeLike | None = None ) -> np.number[Any]: ... @overload def sum( - x: ArrayLike, /, *, axis: Literal[0, 1], dtype: DTypeLike | None = None + x: ArrayLike | types.ZarrArray, /, *, axis: Literal[0, 1], dtype: DTypeLike | None = None ) -> NDArray[Any]: ... @overload def sum( @@ -30,7 +31,11 @@ def sum( def sum( - x: ArrayLike, /, *, axis: Literal[0, 1, None] = None, dtype: DTypeLike | None = None + x: ArrayLike | types.ZarrArray, + /, + *, + axis: Literal[0, 1, None] = None, + dtype: DTypeLike | None = None, ) -> NDArray[Any] | np.number[Any] | types.DaskArray: """Sum over both or one axis. @@ -56,7 +61,7 @@ def _sum( dtype: DTypeLike | None = None, ) -> NDArray[Any] | np.number[Any] | types.DaskArray: assert not isinstance(x, types.CSBase | types.DaskArray) - return np.sum(x, axis=axis, dtype=dtype) # type: ignore[no-any-return] + return cast(NDArray[Any] | np.number[Any], np.sum(x, axis=axis, dtype=dtype)) @_sum.register(types.CSBase) @@ -67,7 +72,7 @@ def _( if isinstance(x, types.CSMatrix): x = sp.csr_array(x) if x.format == "csr" else sp.csc_array(x) - return np.sum(x, axis=axis, dtype=dtype) # type: ignore[no-any-return] + return cast(NDArray[Any] | np.number[Any], np.sum(x, axis=axis, dtype=dtype)) @_sum.register(types.DaskArray) @@ -108,11 +113,14 @@ def sum_drop_keepdims( # Explicitly use numpy result dtype (e.g. `NDArray[bool].sum().dtype == int64`) dtype = np.zeros(1, dtype=x.dtype).sum().dtype - return reduction( # type: ignore[no-any-return,no-untyped-call] - x, - sum_drop_keepdims, - partial(np.sum, dtype=dtype), - axis=axis, - dtype=dtype, - meta=np.array([], dtype=dtype), + return cast( + types.DaskArray, + reduction( # type: ignore[no-untyped-call] + x, + sum_drop_keepdims, + partial(np.sum, dtype=dtype), + axis=axis, + dtype=dtype, + meta=np.array([], dtype=dtype), + ), ) diff --git a/src/testing/fast_array_utils/_array_type.py b/src/testing/fast_array_utils/_array_type.py index 7ae31e2..e13e01f 100644 --- a/src/testing/fast_array_utils/_array_type.py +++ b/src/testing/fast_array_utils/_array_type.py @@ -6,13 +6,13 @@ import enum from dataclasses import KW_ONLY, dataclass, field from functools import cached_property -from typing import TYPE_CHECKING, Generic, TypeVar +from typing import TYPE_CHECKING, Generic, Literal, TypeVar, cast import numpy as np if TYPE_CHECKING: - from typing import Any, Literal, Protocol, SupportsFloat, TypeAlias + from typing import Any, Protocol, SupportsFloat, TypeAlias import h5py from numpy.typing import ArrayLike, DTypeLike, NDArray @@ -37,13 +37,14 @@ class ToArray(Protocol, Generic[Arr_co]): """Convert to a supported array.""" - def __call__(self, data: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Array: ... + def __call__(self, data: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Arr_co: ... _DTypeLikeFloat32 = np.dtype[np.float32] | type[np.float32] _DTypeLikeFloat64 = np.dtype[np.float64] | type[np.float64] else: Arr = TypeVar("Arr") Inner = TypeVar("Inner") + ToArray = list # needs to have 1 type parameter __all__ = ["ArrayType", "ConversionContext", "ToArray"] @@ -111,36 +112,36 @@ def cls(self) -> type[Arr]: # noqa: PLR0911 """Array class for :func:`isinstance` checks.""" match self.mod, self.name, self.inner: case "numpy", "ndarray", None: - return np.ndarray # type: ignore[return-value] + return cast(type[Arr], np.ndarray) case "scipy.sparse", ( "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" ) as cls_name, None: import scipy.sparse - return getattr(scipy.sparse, cls_name) # type: ignore[no-any-return] + return cast(type[Arr], getattr(scipy.sparse, cls_name)) case "cupy", "ndarray", None: import cupy as cp - return cp.ndarray # type: ignore[no-any-return] + return cast(type[Arr], cp.ndarray) case "cupyx.scipy.sparse", ("csr_matrix" | "csc_matrix") as cls_name, None: import cupyx.scipy.sparse as cu_sparse - return getattr(cu_sparse, cls_name) # type: ignore[no-any-return] + return cast(type[Arr], getattr(cu_sparse, cls_name)) case "dask.array", "Array", _: if TYPE_CHECKING: from dask.array.core import Array as DaskArray else: from dask.array import Array as DaskArray - return DaskArray # type: ignore[return-value] + return cast(type[Arr], DaskArray) case "h5py", "Dataset", _: import h5py - return h5py.Dataset # type: ignore[no-any-return] + return cast(type[Arr], h5py.Dataset) case "zarr", "Array", _: import zarr - return zarr.Array # type: ignore[return-value] + return cast(type[Arr], zarr.Array) case _: msg = f"Unknown array class: {self}" raise ValueError(msg) @@ -159,15 +160,18 @@ def random( match self.mod, self.name, self.inner: case "numpy", "ndarray", None: - return gen.random(shape, dtype=dtype or np.float64) # type: ignore[return-value] + return cast(Arr, gen.random(shape, dtype=dtype or np.float64)) case "scipy.sparse", ( "csr_array" | "csc_array" | "csr_matrix" | "csc_matrix" ) as cls_name, None: - fmt: Literal["csr", "csc"] - container: Literal["array", "matrix"] - fmt, container = cls_name.split("_") # type: ignore[assignment] - return random_mat( # type: ignore[no-any-return] - shape, density=density, format=fmt, container=container, dtype=dtype + fmt, container = cast( + tuple[Literal["csr", "csc"], Literal["array", "matrix"]], cls_name.split("_") + ) + return cast( + Arr, + random_mat( + shape, density=density, format=fmt, container=container, dtype=dtype + ), ) case "cupy", "ndarray", None: raise NotImplementedError @@ -180,9 +184,12 @@ def random( from dask.array import zeros arr = zeros(shape, dtype=dtype, chunks=_half_chunk_size(shape)) - return arr.map_blocks( # type: ignore[no-any-return] - lambda x: self.random(x.shape, dtype=x.dtype, gen=gen, density=density), - dtype=dtype, + return cast( + Arr, + arr.map_blocks( + lambda x: self.random(x.shape, dtype=x.dtype, gen=gen, density=density), + dtype=dtype, + ), ) case "h5py", "Dataset", _: raise NotImplementedError @@ -198,27 +205,29 @@ def __call__(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> Arr: fn: ToArray[Arr] if self.cls is np.ndarray: - fn = np.asarray # type: ignore[assignment] + fn = cast(ToArray[Arr], np.asarray) elif self.cls is types.DaskArray: if self.inner is None: msg = "Cannot convert to dask array without inner array type" raise AssertionError(msg) - fn = self._to_dask_array + fn = cast(ToArray[Arr], self._to_dask_array) elif self.cls is types.H5Dataset: - fn = self._to_h5py_dataset + fn = cast(ToArray[Arr], self._to_h5py_dataset) elif self.cls is types.ZarrArray: - fn = self._to_zarr_array + fn = cast(ToArray[Arr], self._to_zarr_array) elif self.cls is types.CupyArray: import cupy as cu - fn = cu.asarray + fn = cast(ToArray[Arr], cu.asarray) else: - fn = self.cls # type: ignore[assignment] + fn = cast(ToArray[Arr], self.cls) - return fn(x, dtype=dtype) # type: ignore[return-value] + return fn(x, dtype=dtype) def _to_dask_array(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> types.DaskArray: """Convert to a dask array.""" + from fast_array_utils.types import DaskArray + if TYPE_CHECKING: import dask.array.core as da else: @@ -227,7 +236,7 @@ def _to_dask_array(self, x: ArrayLike, /, *, dtype: DTypeLike | None = None) -> assert self.inner is not None arr = self.inner(x, dtype=dtype) - return da.from_array(arr, _half_chunk_size(arr.shape)) # type: ignore[no-untyped-call,no-any-return] + return cast(DaskArray, da.from_array(arr, _half_chunk_size(arr.shape))) # type: ignore[no-untyped-call] def _to_h5py_dataset( self, x: ArrayLike, /, *, dtype: DTypeLike | None = None diff --git a/tests/test_sparse.py b/tests/test_sparse.py index bf928f7..0bf10c6 100644 --- a/tests/test_sparse.py +++ b/tests/test_sparse.py @@ -2,7 +2,7 @@ from __future__ import annotations from importlib.util import find_spec -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal, cast import numpy as np import pytest @@ -12,8 +12,6 @@ if TYPE_CHECKING: - from typing import Literal - from pytest_codspeed import BenchmarkFixture from fast_array_utils.types import CSBase @@ -26,17 +24,17 @@ @pytest.fixture(scope="session", params=["csr", "csc"]) def sp_fmt(request: pytest.FixtureRequest) -> Literal["csr", "csc"]: - return request.param # type: ignore[no-any-return] + return cast(Literal["csr", "csc"], request.param) @pytest.fixture(scope="session", params=["array", "matrix"]) def sp_container(request: pytest.FixtureRequest) -> Literal["array", "matrix"]: - return request.param # type: ignore[no-any-return] + return cast(Literal["array", "matrix"], request.param) @pytest.fixture(scope="session", params=[np.float32, np.float64]) def dtype(request: pytest.FixtureRequest) -> type[np.float32 | np.float64]: - return request.param # type: ignore[no-any-return] + return cast(type[np.float32 | np.float64], request.param) @pytest.mark.array_type(select=Flags.Sparse, skip=Flags.Dask) diff --git a/tests/test_stats.py b/tests/test_stats.py index e1e08dc..500b75d 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MPL-2.0 from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal, cast import numpy as np import pytest @@ -11,29 +11,33 @@ if TYPE_CHECKING: - from typing import Any, Literal + from typing import Any + from numpy.typing import NDArray from pytest_codspeed import BenchmarkFixture - from testing.fast_array_utils import Array, ArrayType + from testing.fast_array_utils import ArrayType DTypeIn = type[np.float32 | np.float64 | np.int32 | np.bool_] DTypeOut = type[np.float32 | np.float64 | np.int64] +else: + DTypeIn = type + DTypeOut = type @pytest.fixture(scope="session", params=[0, 1, None]) def axis(request: pytest.FixtureRequest) -> Literal[0, 1, None]: - return request.param # type: ignore[no-any-return] + return cast(Literal[0, 1, None], request.param) @pytest.fixture(scope="session", params=[np.float32, np.float64, np.int32, np.bool_]) def dtype_in(request: pytest.FixtureRequest) -> DTypeIn: - return request.param # type: ignore[no-any-return] + return cast(DTypeIn, request.param) @pytest.fixture(scope="session", params=[np.float32, np.float64, None]) def dtype_arg(request: pytest.FixtureRequest) -> DTypeOut | None: - return request.param # type: ignore[no-any-return] + return cast(DTypeOut | None, request.param) def test_sum( @@ -46,7 +50,9 @@ def test_sum( arr = array_type(np_arr.copy()) assert arr.dtype == dtype_in - sum_: Array[Any] | np.floating = stats.sum(arr, axis=axis, dtype=dtype_arg) # type: ignore[type-arg,arg-type] + sum_: NDArray[Any] | np.number[Any] | types.DaskArray = stats.sum( + arr, axis=axis, dtype=dtype_arg + ) match axis, arr: case _, types.DaskArray(): @@ -68,7 +74,7 @@ def test_sum( else: assert sum_.dtype == dtype_in - np.testing.assert_array_equal(sum_, np.sum(np_arr, axis=axis, dtype=dtype_arg)) # type: ignore[arg-type] + np.testing.assert_array_equal(sum_, np.sum(np_arr, axis=axis, dtype=dtype_arg)) @pytest.mark.benchmark @@ -76,12 +82,12 @@ def test_sum( @pytest.mark.parametrize("dtype", [np.float32, np.float64]) # random only supports float def test_sum_benchmark( benchmark: BenchmarkFixture, - array_type: ArrayType, + array_type: ArrayType[NDArray[Any] | types.CSBase], axis: Literal[0, 1, None], dtype: type[np.float32 | np.float64], ) -> None: shape = (1_000, 1_000) if "sparse" in array_type.mod else (100, 100) arr = array_type.random(shape, dtype=dtype) - stats.sum(arr, axis=axis) # type: ignore[arg-type] # warmup: numba compile + stats.sum(arr, axis=axis) # warmup: numba compile benchmark(stats.sum, arr, axis=axis) From 758decc100a257b77b87549325452e7099597f9d Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 24 Feb 2025 16:01:20 +0100 Subject: [PATCH 22/22] add more flag tests --- tests/test_test_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_test_utils.py b/tests/test_test_utils.py index 54b8cbb..5ebfd5f 100644 --- a/tests/test_test_utils.py +++ b/tests/test_test_utils.py @@ -30,6 +30,8 @@ def test_conv(array_type: ArrayType, dtype: DTypeLike) -> None: def test_array_types(array_type: ArrayType) -> None: assert array_type.flags & Flags.Any + assert array_type.flags & ~Flags(0) + assert not (array_type.flags & Flags(0)) assert ("sparse" in str(array_type)) == bool(array_type.flags & Flags.Sparse) assert ("cupy" in str(array_type)) == bool(array_type.flags & Flags.Gpu) assert ("dask" in str(array_type)) == bool(array_type.flags & Flags.Dask)