diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3dd09d2e..83a4fb6c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,14 @@ Changelog ========= +3.0.6 - 2021-11-11 +------------------ + +**Bug fix** + +- We fixed a bug in :meth:`tabmat.SplitMatrix.matvec`, where incorrect matrix vector products were computed when a ``SplitMatrix`` did not contain any dense components. + + 3.0.5 - 2021-11-05 ------------------ @@ -184,7 +192,7 @@ We are trying to make releases for Windows. - Fix a bug in `matvec` for categorical components when the number of categories exceeds the number of rows. -0.0.6 - 2020-08-03 +0.0.6 - 2020-08-03 ------------------ See git history. diff --git a/src/tabmat/split_matrix.py b/src/tabmat/split_matrix.py index 5fc89273..e3f1fe61 100644 --- a/src/tabmat/split_matrix.py +++ b/src/tabmat/split_matrix.py @@ -343,14 +343,15 @@ def matvec( # as the target for storing the final output. This reduces the number # of output arrays allocated from 2 to 1. is_matrix_dense = [isinstance(m, DenseMatrix) for m in self.matrices] - dense_matrix_idx = np.argmax(is_matrix_dense) if np.any(is_matrix_dense): + dense_matrix_idx = np.argmax(is_matrix_dense) sub_cols = subset_cols[dense_matrix_idx] idx = self.indices[dense_matrix_idx] mat = self.matrices[dense_matrix_idx] in_vec = v[idx, ...] out = np.asarray(mat.matvec(in_vec, sub_cols, out), dtype=out_dtype) else: + dense_matrix_idx = -1 out = _prepare_out_array(out, out_shape, out_dtype) for i, (sub_cols, idx, mat) in enumerate( diff --git a/tests/test_split_matrix.py b/tests/test_split_matrix.py index 573ae10e..a67d4a67 100644 --- a/tests/test_split_matrix.py +++ b/tests/test_split_matrix.py @@ -1,10 +1,12 @@ from typing import List, Optional, Union import numpy as np +import pandas as pd import pytest import scipy.sparse as sps import tabmat as tm +from tabmat import from_pandas from tabmat.constructor import _split_sparse_and_dense_parts from tabmat.dense_matrix import DenseMatrix from tabmat.ext.sparse import csr_dense_sandwich @@ -237,3 +239,15 @@ def test_init_from_1d(): res = SplitMatrix([m1, m2]) assert res.shape == (10, 3) + + +@pytest.mark.parametrize("n_rows", [5, 10, 25]) +def test_matvec(n_rows): + np.random.seed(1234) + n_cols = 2 + categories = [f"cat_{val}" for val in range(5)] + X = pd.DataFrame(np.random.choice(categories, size=(n_rows, n_cols))).astype( + "category" + ) + mat = from_pandas(X, cat_threshold=0) + np.testing.assert_allclose(mat.matvec(np.array(mat.shape[1] * [1])), n_cols)