From 80dc715886aef76971019cd5604b3fa703eebfea Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 29 Apr 2025 18:40:04 +0200 Subject: [PATCH 1/7] mostly works --- src/fast_array_utils/conv/__init__.py | 17 +++++++---- src/fast_array_utils/conv/_to_dense.py | 34 +++++++++++++++------ tests/test_to_dense.py | 42 ++++++++++++++++++++++---- 3 files changed, 71 insertions(+), 22 deletions(-) diff --git a/src/fast_array_utils/conv/__init__.py b/src/fast_array_utils/conv/__init__.py index fea40e9..afa818d 100644 --- a/src/fast_array_utils/conv/__init__.py +++ b/src/fast_array_utils/conv/__init__.py @@ -21,25 +21,28 @@ @overload -def to_dense(x: CpuArray | DiskArray | types.sparray | types.spmatrix | types.CSDataset, /, *, to_cpu_memory: bool = False) -> NDArray[Any]: ... +def to_dense( + x: CpuArray | DiskArray | types.sparray | types.spmatrix | types.CSDataset, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False +) -> NDArray[Any]: ... @overload -def to_dense(x: types.DaskArray, /, *, to_cpu_memory: Literal[False] = False) -> types.DaskArray: ... +def to_dense(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[False] = False) -> types.DaskArray: ... @overload -def to_dense(x: types.DaskArray, /, *, to_cpu_memory: Literal[True]) -> NDArray[Any]: ... +def to_dense(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[True]) -> NDArray[Any]: ... @overload -def to_dense(x: GpuArray | types.CupySpMatrix, /, *, to_cpu_memory: Literal[False] = False) -> types.CupyArray: ... +def to_dense(x: GpuArray | types.CupySpMatrix, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[False] = False) -> types.CupyArray: ... @overload -def to_dense(x: GpuArray | types.CupySpMatrix, /, *, to_cpu_memory: Literal[True]) -> NDArray[Any]: ... +def to_dense(x: GpuArray | types.CupySpMatrix, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[True]) -> NDArray[Any]: ... def to_dense( x: CpuArray | GpuArray | DiskArray | types.CSDataset | types.DaskArray | types.sparray | types.spmatrix | types.CupySpMatrix, /, *, + order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False, ) -> NDArray[Any] | types.DaskArray | types.CupyArray: r"""Convert x to a dense array. @@ -52,6 +55,8 @@ def to_dense( ---------- x Input object to be converted. + order + The order of the output array: ``C`` (row-major) or ``F`` (column-major). ``K`` and ``A`` derive the order from ``x``. to_cpu_memory Also load data into memory (resulting in a :class:`numpy.ndarray`). @@ -60,4 +65,4 @@ def to_dense( Dense form of ``x`` """ - return to_dense_(x, to_cpu_memory=to_cpu_memory) + return to_dense_(x, order=order, to_cpu_memory=to_cpu_memory) diff --git a/src/fast_array_utils/conv/_to_dense.py b/src/fast_array_utils/conv/_to_dense.py index 099995d..39ffdeb 100644 --- a/src/fast_array_utils/conv/_to_dense.py +++ b/src/fast_array_utils/conv/_to_dense.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: - from typing import Any + from typing import Any, Literal from numpy.typing import NDArray @@ -22,40 +22,54 @@ def to_dense_( x: CpuArray | GpuArray | DiskArray | types.DaskArray | types.sparray | types.spmatrix | types.CupySpMatrix, /, *, + order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False, ) -> NDArray[Any] | types.CupyArray | types.DaskArray: del to_cpu_memory # it already is - return np.asarray(x) + return np.asarray(x, order=order) @to_dense_.register(types.spmatrix | types.sparray) # type: ignore[call-overload,misc] -def _to_dense_cs(x: types.spmatrix | types.sparray, /, *, to_cpu_memory: bool = False) -> NDArray[Any]: +def _to_dense_cs(x: types.spmatrix | types.sparray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any]: from . import scipy del to_cpu_memory # it already is - return scipy.to_dense(x) + return scipy.to_dense(x, order=sparse_order(x, order=order)) @to_dense_.register(types.DaskArray) -def _to_dense_dask(x: types.DaskArray, /, *, to_cpu_memory: bool = False) -> NDArray[Any] | types.DaskArray: +def _to_dense_dask(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any] | types.DaskArray: from . import to_dense - x = x.map_blocks(partial(to_dense, to_cpu_memory=to_cpu_memory)) + x = x.map_blocks(partial(to_dense, order=order, to_cpu_memory=to_cpu_memory)) return x.compute() if to_cpu_memory else x # type: ignore[return-value] @to_dense_.register(types.CSDataset) -def _to_dense_ooc(x: types.CSDataset, /, *, to_cpu_memory: bool = False) -> NDArray[Any]: +def _to_dense_ooc(x: types.CSDataset, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any]: from . import to_dense if not to_cpu_memory: msg = "to_cpu_memory must be True if x is an CS{R,C}Dataset" raise ValueError(msg) # TODO(flying-sheep): why is to_memory of type Any? # noqa: TD003 - return to_dense(cast("types.CSBase", x.to_memory())) + return to_dense(cast("types.CSBase", x.to_memory()), order=sparse_order(x, order=order)) @to_dense_.register(types.CupyArray | types.CupySpMatrix) # type: ignore[call-overload,misc] -def _to_dense_cupy(x: GpuArray, /, *, to_cpu_memory: bool = False) -> NDArray[Any] | types.CupyArray: - x = x.toarray() if isinstance(x, types.CupySpMatrix) else x +def _to_dense_cupy(x: GpuArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any] | types.CupyArray: + import cupy as cu + + x = x.toarray(sparse_order(x, order=order)) if isinstance(x, types.CupySpMatrix) else cu.asarray(x, order=order) return x.get() if to_cpu_memory else x + + +def sparse_order(x: types.spmatrix | types.sparray | types.CupySpMatrix | types.CSDataset, /, *, order: Literal["K", "A", "C", "F"]) -> Literal["C", "F"]: + if TYPE_CHECKING: + from scipy.sparse._base import _spbase + + assert isinstance(x, _spbase | types.CSDataset) + + if order in {"K", "A"}: + order = "F" if x.format == "csc" else "C" + return cast("Literal['C', 'F']", order) diff --git a/tests/test_to_dense.py b/tests/test_to_dense.py index 119441c..1e97831 100644 --- a/tests/test_to_dense.py +++ b/tests/test_to_dense.py @@ -3,7 +3,7 @@ from contextlib import nullcontext from importlib.util import find_spec -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal import numpy as np import pytest @@ -25,26 +25,33 @@ @pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"]) -def test_to_dense(array_type: ArrayType[Array], *, to_cpu_memory: bool) -> None: +@pytest.mark.parametrize("order", argvalues=["K", "C", "F"]) # “A” behaves like “K” +def test_to_dense(array_type: ArrayType[Array], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None: x = array_type([[1, 2, 3], [4, 5, 6]], dtype=np.float32) if not to_cpu_memory and array_type.cls in {types.CSCDataset, types.CSRDataset}: with pytest.raises(ValueError, match="to_cpu_memory must be True if x is an CS{R,C}Dataset"): - to_dense(x, to_cpu_memory=to_cpu_memory) + to_dense(x, order=order, to_cpu_memory=to_cpu_memory) return with WARNS_NUMBA if issubclass(array_type.cls, types.CSBase) and not find_spec("numba") else nullcontext(): - arr = to_dense(x, to_cpu_memory=to_cpu_memory) + arr = to_dense(x, order=order, to_cpu_memory=to_cpu_memory) + assert_expected_cls(x, arr, to_cpu_memory=to_cpu_memory) assert arr.shape == (2, 3) + assert_expected_order(x, arr, order=order) @pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"]) -def test_to_dense_extra(coo_matrix_type: ArrayType[Array], *, to_cpu_memory: bool) -> None: +@pytest.mark.parametrize("order", argvalues=["K", "C", "F"]) # “A” behaves like “K” +def test_to_dense_extra(coo_matrix_type: ArrayType[Array], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None: src_mtx = coo_matrix_type([[1, 2, 3], [4, 5, 6]], dtype=np.float32) + with WARNS_NUMBA if not find_spec("numba") else nullcontext(): - arr = to_dense(src_mtx, to_cpu_memory=to_cpu_memory) + arr = to_dense(src_mtx, order=order, to_cpu_memory=to_cpu_memory) + assert_expected_cls(src_mtx, arr, to_cpu_memory=to_cpu_memory) assert arr.shape == (2, 3) + assert_expected_order(src_mtx, arr, order=order) def assert_expected_cls(orig: Array, converted: Array, *, to_cpu_memory: bool) -> None: @@ -56,3 +63,26 @@ def assert_expected_cls(orig: Array, converted: Array, *, to_cpu_memory: bool) - assert isinstance(converted, types.CupyArray) case _: assert isinstance(converted, np.ndarray) + + +def assert_expected_order(orig: Array, converted: Array, *, order: Literal["K", "C", "F"]) -> None: + order_expected = get_order(orig) if order == "K" else order + if isinstance(converted, types.DaskArray): + pass # TODO + else: + assert converted.flags.c_contiguous == (order_expected == "C") + assert converted.flags.f_contiguous == (order_expected == "F") + + +def get_order(orig: Array) -> Literal["C", "F"]: + match orig: + case np.ndarray() | types.CupyArray(): + return "C" if orig.flags["C_CONTIGUOUS"] else "F" + case types.spmatrix | types.CupySpMatrix() | types.CSCDataset() | types.CSRDataset(): + if TYPE_CHECKING: + from scipy.sparse._base import _spbase + + assert isinstance(orig, _spbase | types.CSDataset) + + return "C" if orig.format == "csr" else "F" + raise NotImplementedError From 6dc892525e497f25cb3d783a8969d1c961f37208 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 5 May 2025 13:09:04 +0200 Subject: [PATCH 2/7] some improvements --- src/fast_array_utils/conv/_to_dense.py | 2 +- tests/test_to_dense.py | 36 ++++++++++++++------------ typings/cupy/_core/core.pyi | 7 ++++- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/fast_array_utils/conv/_to_dense.py b/src/fast_array_utils/conv/_to_dense.py index 39ffdeb..d05d098 100644 --- a/src/fast_array_utils/conv/_to_dense.py +++ b/src/fast_array_utils/conv/_to_dense.py @@ -61,7 +61,7 @@ def _to_dense_cupy(x: GpuArray, /, *, order: Literal["K", "A", "C", "F"] = "K", import cupy as cu x = x.toarray(sparse_order(x, order=order)) if isinstance(x, types.CupySpMatrix) else cu.asarray(x, order=order) - return x.get() if to_cpu_memory else x + return x.get(order="A") if to_cpu_memory else x def sparse_order(x: types.spmatrix | types.sparray | types.CupySpMatrix | types.CSDataset, /, *, order: Literal["K", "A", "C", "F"]) -> Literal["C", "F"]: diff --git a/tests/test_to_dense.py b/tests/test_to_dense.py index 1e97831..666a25e 100644 --- a/tests/test_to_dense.py +++ b/tests/test_to_dense.py @@ -3,7 +3,7 @@ from contextlib import nullcontext from importlib.util import find_spec -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING import numpy as np import pytest @@ -13,7 +13,7 @@ if TYPE_CHECKING: - from typing import TypeAlias + from typing import Literal, TypeAlias from fast_array_utils.typing import CpuArray, DiskArray, GpuArray from testing.fast_array_utils import ArrayType @@ -58,7 +58,7 @@ def assert_expected_cls(orig: Array, converted: Array, *, to_cpu_memory: bool) - match (to_cpu_memory, orig): case False, types.DaskArray(): assert isinstance(converted, types.DaskArray) - assert_expected_cls(orig._meta, converted._meta, to_cpu_memory=to_cpu_memory) # noqa: SLF001 + assert_expected_cls(orig.compute(), converted.compute(), to_cpu_memory=to_cpu_memory) case False, types.CupyArray() | types.CupySpMatrix(): assert isinstance(converted, types.CupyArray) case _: @@ -66,23 +66,25 @@ def assert_expected_cls(orig: Array, converted: Array, *, to_cpu_memory: bool) - def assert_expected_order(orig: Array, converted: Array, *, order: Literal["K", "C", "F"]) -> None: - order_expected = get_order(orig) if order == "K" else order - if isinstance(converted, types.DaskArray): - pass # TODO - else: - assert converted.flags.c_contiguous == (order_expected == "C") - assert converted.flags.f_contiguous == (order_expected == "F") + match converted: + case types.CupyArray() | np.ndarray(): + order_expected = get_order(orig) if order == "K" else order + assert converted.flags.c_contiguous == (order_expected == "C") + assert converted.flags.f_contiguous == (order_expected == "F") + case types.DaskArray(): + assert_expected_order(orig, converted.compute(), order=order) + case _: + pytest.fail(f"Unsupported array type: {type(converted)}") def get_order(orig: Array) -> Literal["C", "F"]: match orig: case np.ndarray() | types.CupyArray(): - return "C" if orig.flags["C_CONTIGUOUS"] else "F" - case types.spmatrix | types.CupySpMatrix() | types.CSCDataset() | types.CSRDataset(): - if TYPE_CHECKING: - from scipy.sparse._base import _spbase - - assert isinstance(orig, _spbase | types.CSDataset) - + return "C" if orig.flags.c_contiguous else "F" + case _ if isinstance(orig, types.CSBase | types.CupyCSMatrix | types.CSDataset): return "C" if orig.format == "csr" else "F" - raise NotImplementedError + case types.DaskArray(): + return get_order(orig.compute()) + case types.ZarrArray() | types.H5Dataset(): + return "C" + pytest.fail(f"Unsupported array type: {type(orig)}") diff --git a/typings/cupy/_core/core.pyi b/typings/cupy/_core/core.pyi index ccd3874..f8d459e 100644 --- a/typings/cupy/_core/core.pyi +++ b/typings/cupy/_core/core.pyi @@ -3,6 +3,8 @@ from types import EllipsisType from typing import Any, Literal, Self, overload import numpy as np +from cupy.cuda import Stream +from numpy._core.multiarray import flagsobj from numpy.typing import NDArray class ndarray: @@ -10,9 +12,12 @@ class ndarray: shape: tuple[int, ...] size: int ndim: int + flags: flagsobj # cupy-specific - def get(self) -> NDArray[Any]: ... + def get( + self, stream: Stream | None = None, order: Literal["C", "F", "A"] = "C", out: NDArray[Any] | None = None, blocking: bool = True + ) -> NDArray[Any]: ... # operators def __array__(self) -> NDArray[Any]: ... From 396bab0c5c43d51baf2e56552b48fe1b7c888061 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 5 May 2025 15:22:45 +0200 Subject: [PATCH 3/7] coo typing --- src/fast_array_utils/types.py | 27 ++++++++++++---- src/testing/fast_array_utils/_array_type.py | 8 ++--- tests/conftest.py | 6 ++-- tests/test_test_utils.py | 4 +-- tests/test_to_dense.py | 34 +++++++++++++-------- 5 files changed, 51 insertions(+), 28 deletions(-) diff --git a/src/fast_array_utils/types.py b/src/fast_array_utils/types.py index 99ca6f3..189c433 100644 --- a/src/fast_array_utils/types.py +++ b/src/fast_array_utils/types.py @@ -8,11 +8,13 @@ __all__ = [ + "COOBase", "CSArray", "CSBase", "CSDataset", "CSMatrix", "CupyArray", + "CupyCOOMatrix", "CupyCSCMatrix", "CupyCSMatrix", "CupyCSRMatrix", @@ -22,6 +24,14 @@ "H5Group", "ZarrArray", "ZarrGroup", + "coo_array", + "coo_matrix", + "csc_array", + "csc_matrix", + "csr_array", + "csr_matrix", + "sparray", + "spmatrix", ] T_co = TypeVar("T_co", covariant=True) @@ -29,41 +39,46 @@ # scipy sparse if TYPE_CHECKING: - from scipy.sparse import csc_array, csc_matrix, csr_array, csr_matrix, sparray, spmatrix + from scipy.sparse import coo_array, coo_matrix, csc_array, csc_matrix, csr_array, csr_matrix, sparray, spmatrix else: try: # cs?_array isn’t available in older scipy versions - from scipy.sparse import csc_array, csr_array, sparray + from scipy.sparse import coo_array, csc_array, csr_array, sparray except ImportError: # pragma: no cover + coo_array = type("coo_array", (), {}) csc_array = type("csc_array", (), {}) csr_array = type("csr_array", (), {}) sparray = type("sparray", (), {}) - csc_array.__module__ = csr_array.__module__ = sparray.__module__ = "scipy.sparse" + coo_array.__module__ = csc_array.__module__ = csr_array.__module__ = sparray.__module__ = "scipy.sparse" try: # cs?_matrix is available when scipy is installed - from scipy.sparse import csc_matrix, csr_matrix, spmatrix + from scipy.sparse import coo_matrix, csc_matrix, csr_matrix, spmatrix except ImportError: # pragma: no cover + coo_matrix = type("coo_matrix", (), {}) csc_matrix = type("csc_matrix", (), {}) csr_matrix = type("csr_matrix", (), {}) spmatrix = type("spmatrix", (), {}) - csc_matrix.__module__ = csr_matrix.__module__ = spmatrix.__module__ = "scipy.sparse" + coo_matrix.__module__ = csc_matrix.__module__ = csr_matrix.__module__ = spmatrix.__module__ = "scipy.sparse" CSMatrix = csc_matrix | csr_matrix CSArray = csc_array | csr_array CSBase = CSMatrix | CSArray +COOBase = coo_matrix | coo_array """A sparse compressed matrix or array.""" if TYPE_CHECKING or find_spec("cupy"): # cupy always comes with cupyx from cupy import ndarray as CupyArray + from cupyx.scipy.sparse import coo_matrix as CupyCOOMatrix from cupyx.scipy.sparse import csc_matrix as CupyCSCMatrix from cupyx.scipy.sparse import csr_matrix as CupyCSRMatrix from cupyx.scipy.sparse import spmatrix as CupySpMatrix else: # pragma: no cover CupyArray = type("ndarray", (), {}) CupyArray.__module__ = "cupy" + CupyCOOMatrix = type("coo_matrix", (), {}) CupyCSCMatrix = type("csc_matrix", (), {}) CupyCSRMatrix = type("csr_matrix", (), {}) CupySpMatrix = type("spmatrix", (), {}) - CupyCSCMatrix.__module__ = CupyCSRMatrix.__module__ = CupySpMatrix.__module__ = "cupyx.scipy.sparse" + CupyCOOMatrix.__module__ = CupyCSCMatrix.__module__ = CupyCSRMatrix.__module__ = CupySpMatrix.__module__ = "cupyx.scipy.sparse" CupyCSMatrix = CupyCSRMatrix | CupyCSCMatrix diff --git a/src/testing/fast_array_utils/_array_type.py b/src/testing/fast_array_utils/_array_type.py index 0459079..4062a3c 100644 --- a/src/testing/fast_array_utils/_array_type.py +++ b/src/testing/fast_array_utils/_array_type.py @@ -22,14 +22,14 @@ import h5py from numpy.typing import ArrayLike, DTypeLike, NDArray - from fast_array_utils.types import CSBase from fast_array_utils.typing import CpuArray, DiskArray, GpuArray InnerArray = CpuArray | GpuArray | DiskArray Array: TypeAlias = InnerArray | types.DaskArray | types.CSDataset + ExtendedArray = Array | types.COOBase | types.CupyCOOMatrix - Arr = TypeVar("Arr", bound=Array, default=Array) - Arr_co = TypeVar("Arr_co", bound=Array, covariant=True) + Arr = TypeVar("Arr", bound=ExtendedArray, default=Array) + Arr_co = TypeVar("Arr_co", bound=ExtendedArray, covariant=True) Inner = TypeVar("Inner", bound="ArrayType[InnerArray, None] | None", default=Any) @@ -305,7 +305,7 @@ def _to_scipy_sparse( /, *, dtype: DTypeLike | None = None, - cls: type[CSBase] | None = None, + cls: type[types.CSBase] | None = None, ) -> types.CSBase: """Convert to a scipy sparse matrix/array.""" if isinstance(x, types.DaskArray): diff --git a/tests/conftest.py b/tests/conftest.py index a348e19..d389f5c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,8 @@ if TYPE_CHECKING: from collections.abc import Callable + from fast_array_utils import types + @pytest.fixture def dask_viz(request: pytest.FixtureRequest, cache: pytest.Cache) -> Callable[[object], None]: @@ -41,5 +43,5 @@ def viz(obj: object) -> None: @pytest.fixture(scope="session", params=COO_PARAMS) -def coo_matrix_type(request: pytest.FixtureRequest) -> ArrayType: - return cast("ArrayType", request.param) +def coo_matrix_type(request: pytest.FixtureRequest) -> ArrayType[types.COOBase | types.CupyCOOMatrix]: + return cast("ArrayType[types.COOBase | types.CupyCOOMatrix]", request.param) diff --git a/tests/test_test_utils.py b/tests/test_test_utils.py index bea9ce5..a78578a 100644 --- a/tests/test_test_utils.py +++ b/tests/test_test_utils.py @@ -15,9 +15,7 @@ if TYPE_CHECKING: from typing import Any - from cupyx.scipy.sparse import coo_matrix as CupyCooMatrix from numpy.typing import DTypeLike, NDArray - from scipy.sparse import coo_array, coo_matrix from testing.fast_array_utils import Array, ArrayType @@ -54,7 +52,7 @@ def test_conv_other(array_type: ArrayType, other_array_type: ArrayType) -> None: @pytest.mark.array_type(skip=Flags.Dask | Flags.Disk | Flags.Gpu) def test_conv_extra( array_type: ArrayType[NDArray[np.number[Any]] | types.CSBase], - coo_matrix_type: ArrayType[coo_matrix | coo_array | CupyCooMatrix], + coo_matrix_type: ArrayType[types.COOBase | types.CupyCOOMatrix], ) -> None: src_arr = array_type(np.arange(12).reshape(3, 4), dtype=np.float32) arr = coo_matrix_type(src_arr) diff --git a/tests/test_to_dense.py b/tests/test_to_dense.py index 666a25e..30406ea 100644 --- a/tests/test_to_dense.py +++ b/tests/test_to_dense.py @@ -13,12 +13,14 @@ if TYPE_CHECKING: + from collections.abc import Iterable from typing import Literal, TypeAlias from fast_array_utils.typing import CpuArray, DiskArray, GpuArray from testing.fast_array_utils import ArrayType Array: TypeAlias = CpuArray | GpuArray | DiskArray | types.CSDataset | types.DaskArray + ExtendedArray: TypeAlias = Array | types.COOBase | types.CupyCOOMatrix WARNS_NUMBA = pytest.warns(RuntimeWarning, match="numba is not installed; falling back to slow conversion") @@ -43,7 +45,7 @@ def test_to_dense(array_type: ArrayType[Array], *, order: Literal["K", "C", "F"] @pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"]) @pytest.mark.parametrize("order", argvalues=["K", "C", "F"]) # “A” behaves like “K” -def test_to_dense_extra(coo_matrix_type: ArrayType[Array], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None: +def test_to_dense_extra(coo_matrix_type: ArrayType[types.COOBase | types.CupyCOOMatrix], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None: src_mtx = coo_matrix_type([[1, 2, 3], [4, 5, 6]], dtype=np.float32) with WARNS_NUMBA if not find_spec("numba") else nullcontext(): @@ -54,7 +56,7 @@ def test_to_dense_extra(coo_matrix_type: ArrayType[Array], *, order: Literal["K" assert_expected_order(src_mtx, arr, order=order) -def assert_expected_cls(orig: Array, converted: Array, *, to_cpu_memory: bool) -> None: +def assert_expected_cls(orig: ExtendedArray, converted: Array, *, to_cpu_memory: bool) -> None: match (to_cpu_memory, orig): case False, types.DaskArray(): assert isinstance(converted, types.DaskArray) @@ -65,26 +67,32 @@ def assert_expected_cls(orig: Array, converted: Array, *, to_cpu_memory: bool) - assert isinstance(converted, np.ndarray) -def assert_expected_order(orig: Array, converted: Array, *, order: Literal["K", "C", "F"]) -> None: +def assert_expected_order(orig: ExtendedArray, converted: Array, *, order: Literal["K", "C", "F"]) -> None: match converted: case types.CupyArray() | np.ndarray(): - order_expected = get_order(orig) if order == "K" else order - assert converted.flags.c_contiguous == (order_expected == "C") - assert converted.flags.f_contiguous == (order_expected == "F") + orders = {order_exp: converted.flags[f"{order_exp}_CONTIGUOUS"] for order_exp in (get_orders(orig) if order == "K" else {order})} # type: ignore[index] + assert any(orders.values()), orders case types.DaskArray(): assert_expected_order(orig, converted.compute(), order=order) case _: pytest.fail(f"Unsupported array type: {type(converted)}") -def get_order(orig: Array) -> Literal["C", "F"]: +def get_orders(orig: ExtendedArray) -> Iterable[Literal["C", "F"]]: match orig: case np.ndarray() | types.CupyArray(): - return "C" if orig.flags.c_contiguous else "F" - case _ if isinstance(orig, types.CSBase | types.CupyCSMatrix | types.CSDataset): - return "C" if orig.format == "csr" else "F" + if orig.flags.c_contiguous: + yield "C" + if orig.flags.f_contiguous: + yield "F" + case _ if isinstance(orig, types.CSBase | types.COOBase | types.CupyCSMatrix | types.CupyCOOMatrix | types.CSDataset): + if orig.format in {"csr", "coo"}: + yield "C" + if orig.format == {"csc", "coo"}: + yield "F" case types.DaskArray(): - return get_order(orig.compute()) + yield from get_orders(orig.compute()) case types.ZarrArray() | types.H5Dataset(): - return "C" - pytest.fail(f"Unsupported array type: {type(orig)}") + yield "C" + case _: + pytest.fail(f"Unsupported array type: {type(orig)}") From f8de1e96d44d40fdb85ec69f49ec129526ed6859 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 5 May 2025 15:41:32 +0200 Subject: [PATCH 4/7] fix rest --- src/fast_array_utils/conv/_to_dense.py | 4 ++++ tests/test_to_dense.py | 18 +++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/fast_array_utils/conv/_to_dense.py b/src/fast_array_utils/conv/_to_dense.py index d05d098..5656a3e 100644 --- a/src/fast_array_utils/conv/_to_dense.py +++ b/src/fast_array_utils/conv/_to_dense.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: MPL-2.0 from __future__ import annotations +import warnings from functools import partial, singledispatch from typing import TYPE_CHECKING, cast @@ -41,6 +42,9 @@ def _to_dense_cs(x: types.spmatrix | types.sparray, /, *, order: Literal["K", "A def _to_dense_dask(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any] | types.DaskArray: from . import to_dense + if order == "F": + msg = f"{order=!r} will probably be ignored: Dask can not be made to emit F-contiguous arrays reliably." + warnings.warn(msg, RuntimeWarning, stacklevel=4) x = x.map_blocks(partial(to_dense, order=order, to_cpu_memory=to_cpu_memory)) return x.compute() if to_cpu_memory else x # type: ignore[return-value] diff --git a/tests/test_to_dense.py b/tests/test_to_dense.py index 30406ea..bb61218 100644 --- a/tests/test_to_dense.py +++ b/tests/test_to_dense.py @@ -35,12 +35,19 @@ def test_to_dense(array_type: ArrayType[Array], *, order: Literal["K", "C", "F"] to_dense(x, order=order, to_cpu_memory=to_cpu_memory) return - with WARNS_NUMBA if issubclass(array_type.cls, types.CSBase) and not find_spec("numba") else nullcontext(): + with ( + pytest.warns(RuntimeWarning, match="Dask can not be made to emit F-contiguous arrays") + if (order == "F" and array_type.cls is types.DaskArray) + else nullcontext(), + WARNS_NUMBA if issubclass(array_type.cls, types.CSBase) and not find_spec("numba") else nullcontext(), + ): arr = to_dense(x, order=order, to_cpu_memory=to_cpu_memory) assert_expected_cls(x, arr, to_cpu_memory=to_cpu_memory) assert arr.shape == (2, 3) - assert_expected_order(x, arr, order=order) + # Dask is unreliable: for explicit “F”, we emit a warning (tested above), for “K” we just ignore the result + if not (array_type.cls is types.DaskArray and order in {"F", "K"}): + assert_expected_order(x, arr, order=order) @pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"]) @@ -79,6 +86,11 @@ def assert_expected_order(orig: ExtendedArray, converted: Array, *, order: Liter def get_orders(orig: ExtendedArray) -> Iterable[Literal["C", "F"]]: + """Get the orders of an array. + + Numpy arrays with at most one axis of a length >1 are valid in both orders. + So are COO sparse matrices/arrays. + """ match orig: case np.ndarray() | types.CupyArray(): if orig.flags.c_contiguous: @@ -88,7 +100,7 @@ def get_orders(orig: ExtendedArray) -> Iterable[Literal["C", "F"]]: case _ if isinstance(orig, types.CSBase | types.COOBase | types.CupyCSMatrix | types.CupyCOOMatrix | types.CSDataset): if orig.format in {"csr", "coo"}: yield "C" - if orig.format == {"csc", "coo"}: + if orig.format in {"csc", "coo"}: yield "F" case types.DaskArray(): yield from get_orders(orig.compute()) From 2af572769e1ec0780276a7df3a32f5c6316d99f4 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 5 May 2025 15:58:11 +0200 Subject: [PATCH 5/7] fix docs --- src/fast_array_utils/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fast_array_utils/types.py b/src/fast_array_utils/types.py index 189c433..6269c69 100644 --- a/src/fast_array_utils/types.py +++ b/src/fast_array_utils/types.py @@ -58,10 +58,10 @@ csr_matrix = type("csr_matrix", (), {}) spmatrix = type("spmatrix", (), {}) coo_matrix.__module__ = csc_matrix.__module__ = csr_matrix.__module__ = spmatrix.__module__ = "scipy.sparse" +COOBase = coo_matrix | coo_array CSMatrix = csc_matrix | csr_matrix CSArray = csc_array | csr_array CSBase = CSMatrix | CSArray -COOBase = coo_matrix | coo_array """A sparse compressed matrix or array.""" From 5c31a9394e20a54da78c0ec55e287eb794a1f0cd Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 6 May 2025 10:17:28 +0200 Subject: [PATCH 6/7] more docs --- src/fast_array_utils/conv/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/fast_array_utils/conv/__init__.py b/src/fast_array_utils/conv/__init__.py index afa818d..485e8fd 100644 --- a/src/fast_array_utils/conv/__init__.py +++ b/src/fast_array_utils/conv/__init__.py @@ -57,6 +57,14 @@ def to_dense( Input object to be converted. order The order of the output array: ``C`` (row-major) or ``F`` (column-major). ``K`` and ``A`` derive the order from ``x``. + + The default matches numpy, and therefore diverges from the ``scipy.sparse`` matrices’ + :meth:`~scipy.sparse.csr_array.toarray`\ ’s default behavior + of always returning a ``C``-contiguous array. + Instead, CSC matrices become F-contiguous arrays when `order="K"` (the default). + + Dask :class:`~dask.array.Array`\ s concatenation behavior will result in ``order`` + having no effect on the :func:`dask.compute` / ``to_cpu_memory=True`` result. to_cpu_memory Also load data into memory (resulting in a :class:`numpy.ndarray`). From 3cc4b22ca9d06619d3c1226380ac7642a6803ebb Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 6 May 2025 10:28:37 +0200 Subject: [PATCH 7/7] oops --- src/fast_array_utils/conv/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fast_array_utils/conv/__init__.py b/src/fast_array_utils/conv/__init__.py index 485e8fd..58afd56 100644 --- a/src/fast_array_utils/conv/__init__.py +++ b/src/fast_array_utils/conv/__init__.py @@ -61,7 +61,7 @@ def to_dense( The default matches numpy, and therefore diverges from the ``scipy.sparse`` matrices’ :meth:`~scipy.sparse.csr_array.toarray`\ ’s default behavior of always returning a ``C``-contiguous array. - Instead, CSC matrices become F-contiguous arrays when `order="K"` (the default). + Instead, CSC matrices become F-contiguous arrays when ``order="K"`` (the default). Dask :class:`~dask.array.Array`\ s concatenation behavior will result in ``order`` having no effect on the :func:`dask.compute` / ``to_cpu_memory=True`` result.