From 4612909d3a711665f24d610fcb8beca250e1d86e Mon Sep 17 00:00:00 2001 From: Marc-Antoine Schmidt Date: Wed, 21 Jul 2021 20:28:43 -0400 Subject: [PATCH 01/10] Improvements to SplitMatrix - Allow SplitMatrix to be constructed from another SplitMatrix. - Allow inputs of SplitMatrix to be 1-d - Implement __getitem__ for column subset - Also had to implement column subsetting for CategoricalMatrix - __repr__ uses the __repr__ function of components instead of str() --- src/quantcore/matrix/categorical_matrix.py | 27 +++++++++++++-- src/quantcore/matrix/split_matrix.py | 40 ++++++++++++++++++---- 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/src/quantcore/matrix/categorical_matrix.py b/src/quantcore/matrix/categorical_matrix.py index 05564a6f..3c72b123 100644 --- a/src/quantcore/matrix/categorical_matrix.py +++ b/src/quantcore/matrix/categorical_matrix.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Tuple, Union +from typing import Any, List, Optional, Tuple, Union import numpy as np import pandas as pd @@ -7,6 +7,7 @@ from .ext.categorical import matvec, sandwich_categorical, transpose_matvec from .ext.split import sandwich_cat_cat, sandwich_cat_dense from .matrix_base import MatrixBase +from .sparse_matrix import SparseMatrix from .util import ( check_matvec_out_shape, check_transpose_matvec_out_shape, @@ -15,6 +16,21 @@ ) +def _is_indexer_full_length(full_length: int, indexer: Any): + if isinstance(indexer, list): + if (np.asarray(indexer) > full_length - 1).any(): + raise IndexError("Index out-of-range.") + return len(set(indexer)) == full_length + elif isinstance(indexer, np.ndarray): + if (indexer > full_length - 1).any(): + raise IndexError("Index out-of-range.") + return len(np.unique(indexer)) == full_length + elif isinstance(indexer, slice): + return len(range(*indexer.indices(full_length))) == full_length + else: + raise ValueError(f"Indexing with {type(indexer)} is not allowed.") + + def _none_to_slice(arr: Optional[np.ndarray], n: int) -> Union[slice, np.ndarray]: if arr is None or len(arr) == n: return slice(None, None, None) @@ -262,8 +278,13 @@ def get_col_stds(self, weights: np.ndarray, col_means: np.ndarray) -> np.ndarray def __getitem__(self, item): if isinstance(item, tuple): row, col = item - if not (isinstance(col, slice) and col == slice(None, None, None)): - raise IndexError("Only column indexing is supported.") + if _is_indexer_full_length(self.shape[1], col): + if isinstance(row, int): + row = [row] + return CategoricalMatrix(self.cat[row]) + else: + # return a SparseMatrix if we subset columns + return SparseMatrix(self.tocsr()[row, col], dtype=self.dtype) else: row = item if isinstance(row, int): diff --git a/src/quantcore/matrix/split_matrix.py b/src/quantcore/matrix/split_matrix.py index 22554fa4..aa241080 100644 --- a/src/quantcore/matrix/split_matrix.py +++ b/src/quantcore/matrix/split_matrix.py @@ -127,13 +127,16 @@ def __init__( indices: Optional[List[np.ndarray]] = None, ): # First check that all matrices are valid types - for _, mat in enumerate(matrices): + for i, mat in enumerate(matrices): if not isinstance(mat, MatrixBase): raise ValueError( "Expected all elements of matrices to be subclasses of MatrixBase." ) if isinstance(mat, SplitMatrix): - raise ValueError("Elements of matrices cannot be SplitMatrix.") + # Flatten out the SplitMatrix + for j, imat in enumerate(mat.matrices): + matrices.insert(i + j, imat) + del matrices[i + j + 1] # removing the original SplitMatrix # Now that we know these are all MatrixBase, we can check consistent # shapes and dtypes. @@ -151,8 +154,10 @@ def __init__( f"but the first matrix has first dimension {n_row} and matrix {i} has " f"first dimension {mat.shape[0]}." ) - if len(mat.shape) != 2: - raise ValueError("All matrices should be two dimensional.") + if mat.ndim == 1: + matrices[i] = mat[:, np.newaxis] + elif mat.ndim > 2: + raise ValueError("All matrices should be at most two dimensional.") if indices is None: indices = [] @@ -377,14 +382,35 @@ def __getitem__(self, key): return SplitMatrix([mat[row, :] for mat in self.matrices], self.indices) else: - raise NotImplementedError( - f"Only row indexing is supported. Index passed was {key}." + if isinstance(col, int): + return self.getcol(col) + elif isinstance(col, list): + col_array = np.asarray(col) + elif isinstance(col, slice): + col_array = np.arange(*col.indices(self.shape[1])) + else: + raise ValueError(f"Indexing with {type(col)} is not allowed.") + subset_cols_indices, subset_cols, n_cols = self._split_col_subsets( + col_array ) + new_mat = [] + new_ind = [] + for ( + nind, + oind, + mat, + ) in zip(subset_cols_indices, subset_cols, self.matrices): + new_mat.append(mat[row, oind]) + new_ind.append(nind) + return SplitMatrix(matrices=new_mat, indices=new_ind) def __repr__(self): out = "SplitMatrix:" for i, mat in enumerate(self.matrices): - out += f"\n\nComponent {i} with type {mat.__class__.__name__}\n" + str(mat) + out += ( + f"\n\nComponent {i} with type {mat.__class__.__name__}\n" + + mat.__repr__() + ) return out __array_priority__ = 13 From 2c75a4f716227edbb823d5cf400f1b09affe427d Mon Sep 17 00:00:00 2001 From: Marc-Antoine Schmidt Date: Wed, 21 Jul 2021 20:43:31 -0400 Subject: [PATCH 02/10] removed test checking not 1d --- tests/test_split_matrix.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/test_split_matrix.py b/tests/test_split_matrix.py index dadc75df..14ea76e3 100644 --- a/tests/test_split_matrix.py +++ b/tests/test_split_matrix.py @@ -227,10 +227,3 @@ def check(mat): np.testing.assert_almost_equal(res, expected) many_random_tests(check) - - -def test_oned_dense_mat(): - m1 = mx.CategoricalMatrix(np.random.randint(0, 10, 10)) - m2 = mx.DenseMatrix(np.random.rand(10)) - with pytest.raises(ValueError): - mx.SplitMatrix([m1, m2]) From 9f44f5852c6b7f105aee2f5e0807e43878fd86bc Mon Sep 17 00:00:00 2001 From: Marc-Antoine Schmidt Date: Thu, 22 Jul 2021 00:28:01 -0400 Subject: [PATCH 03/10] column mapping and unordered split_col_subsets --- src/quantcore/matrix/split_matrix.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/quantcore/matrix/split_matrix.py b/src/quantcore/matrix/split_matrix.py index aa241080..1118c40c 100644 --- a/src/quantcore/matrix/split_matrix.py +++ b/src/quantcore/matrix/split_matrix.py @@ -206,6 +206,25 @@ def __init__( assert self.shape[1] > 0 + self.column_map = self._create_col_mapping() + + def _create_col_mapping(self): + colmap = np.empty(shape=(self.shape[1], 2), dtype=int) + for i, comp_idx in enumerate(self.indices): + for j, idx in enumerate(comp_idx): + colmap[idx] = [i, j] + return colmap + + def _split_col_subsets_unordered(self, cols): + n_comp = len(self.indices) + new_idx = [[] for _ in range(n_comp)] + current_col_loc = [[] for _ in range(n_comp)] + for counter, i in enumerate(cols): + comp, idx = self.column_map[i] + new_idx[comp].append(counter) + current_col_loc[comp].append(idx) + return new_idx, current_col_loc + def _split_col_subsets( self, cols: Optional[np.ndarray] ) -> Tuple[List[np.ndarray], List[Optional[np.ndarray]], int]: @@ -255,11 +274,8 @@ def getcol(self, i: int) -> Union[np.ndarray, sps.csr_matrix]: """Return matrix column at specified index.""" # wrap-around indexing i %= self.shape[1] - for mat, idx in zip(self.matrices, self.indices): - if i in idx: - loc = np.where(idx == i)[0][0] - return mat.getcol(loc) - raise RuntimeError(f"Column {i} was not found.") + comp, idx = self.column_map[i] + return self.matrices[comp].getcol(idx) def sandwich( self, @@ -390,7 +406,7 @@ def __getitem__(self, key): col_array = np.arange(*col.indices(self.shape[1])) else: raise ValueError(f"Indexing with {type(col)} is not allowed.") - subset_cols_indices, subset_cols, n_cols = self._split_col_subsets( + subset_cols_indices, subset_cols = self._split_col_subsets_unordered( col_array ) new_mat = [] From 831ef6dff2d449cb11d392847eae87d8fa15fbb0 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Schmidt Date: Fri, 23 Jul 2021 20:46:51 -0400 Subject: [PATCH 04/10] testing split matrix creation --- tests/test_matrices.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_matrices.py b/tests/test_matrices.py index be654dda..7ad28707 100644 --- a/tests/test_matrices.py +++ b/tests/test_matrices.py @@ -577,3 +577,10 @@ def test_pandas_to_matrix(): # was being changed in place. assert df["cl_obj"].dtype == object assert df["ds"].dtype == np.float64 + + +@pytest.mark.parametrize("mat", get_all_matrix_base_subclass_mats()) +def test_split_matrix_creation(mat): + sm = mx.SplitMatrix(matrices=[mat, mat]) + assert sm.shape[0] == mat.shape[0] + assert sm.shape[1] == 2 * mat.shape[1] From dc7b2a9d66f9dd89ae2adda4d83e674331963d9b Mon Sep 17 00:00:00 2001 From: Marc-Antoine Schmidt Date: Thu, 29 Jul 2021 01:19:49 -0400 Subject: [PATCH 05/10] don't modify in place + windows CI --- .github/workflows/tests-win.yml | 7 +------ src/quantcore/matrix/split_matrix.py | 26 ++++++++++++++------------ 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/.github/workflows/tests-win.yml b/.github/workflows/tests-win.yml index fe03754a..0e06e5e6 100644 --- a/.github/workflows/tests-win.yml +++ b/.github/workflows/tests-win.yml @@ -1,10 +1,5 @@ name: CI -on: - push: - paths: - - '.github/**' - - '**/*.pyx' - - '**/*.cpp' +on: push jobs: windows-ci: name: "Win - tests - Python ${{ matrix.PYTHON_VERSION }}" diff --git a/src/quantcore/matrix/split_matrix.py b/src/quantcore/matrix/split_matrix.py index 1118c40c..c5a94c05 100644 --- a/src/quantcore/matrix/split_matrix.py +++ b/src/quantcore/matrix/split_matrix.py @@ -126,27 +126,29 @@ def __init__( matrices: List[Union[DenseMatrix, SparseMatrix, CategoricalMatrix]], indices: Optional[List[np.ndarray]] = None, ): + flatten_matrices = [] # First check that all matrices are valid types - for i, mat in enumerate(matrices): + for mat in matrices: if not isinstance(mat, MatrixBase): raise ValueError( "Expected all elements of matrices to be subclasses of MatrixBase." ) if isinstance(mat, SplitMatrix): # Flatten out the SplitMatrix - for j, imat in enumerate(mat.matrices): - matrices.insert(i + j, imat) - del matrices[i + j + 1] # removing the original SplitMatrix + for imat in mat.matrices: + flatten_matrices.append(imat) + else: + flatten_matrices.append(mat) # Now that we know these are all MatrixBase, we can check consistent # shapes and dtypes. - self.dtype = matrices[0].dtype - n_row = matrices[0].shape[0] - for i, mat in enumerate(matrices): + self.dtype = flatten_matrices[0].dtype + n_row = flatten_matrices[0].shape[0] + for i, mat in enumerate(flatten_matrices): if mat.dtype != self.dtype: warnings.warn( "Matrices do not all have the same dtype. Dtypes are " - f"{[elt.dtype for elt in matrices]}." + f"{[elt.dtype for elt in flatten_matrices]}." ) if not mat.shape[0] == n_row: raise ValueError( @@ -155,14 +157,14 @@ def __init__( f"first dimension {mat.shape[0]}." ) if mat.ndim == 1: - matrices[i] = mat[:, np.newaxis] + flatten_matrices[i] = mat[:, np.newaxis] elif mat.ndim > 2: raise ValueError("All matrices should be at most two dimensional.") if indices is None: indices = [] current_idx = 0 - for mat in matrices: + for mat in flatten_matrices: indices.append( np.arange(current_idx, current_idx + mat.shape[1], dtype=np.int64) ) @@ -188,14 +190,14 @@ def __init__( assert isinstance(indices, list) - for i, (mat, idx) in enumerate(zip(matrices, indices)): + for i, (mat, idx) in enumerate(zip(flatten_matrices, indices)): if not mat.shape[1] == len(idx): raise ValueError( f"Element {i} of indices should should have length {mat.shape[1]}, " f"but it has shape {idx.shape}" ) - filtered_mats, filtered_idxs = _filter_out_empty(matrices, indices) + filtered_mats, filtered_idxs = _filter_out_empty(flatten_matrices, indices) combined_matrices, combined_indices = _combine_matrices( filtered_mats, filtered_idxs ) From f45a69c01e096495126d3312425427cda5ad8b10 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Schmidt Date: Thu, 29 Jul 2021 01:37:41 -0400 Subject: [PATCH 06/10] add Luca's test (temporary) --- tests/test_matrices.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_matrices.py b/tests/test_matrices.py index 7ad28707..821fc5f6 100644 --- a/tests/test_matrices.py +++ b/tests/test_matrices.py @@ -584,3 +584,9 @@ def test_split_matrix_creation(mat): sm = mx.SplitMatrix(matrices=[mat, mat]) assert sm.shape[0] == mat.shape[0] assert sm.shape[1] == 2 * mat.shape[1] + + +def test_Luca(): + df = pd.DataFrame({"u": ["a", "b"], "v": ["a", "b"]}) + X = mx.from_pandas(df, cat_threshold=False, object_as_cat=True) + X[:, [0, 1]] From 8ddb627d4bdad9c6684a6f530026c090a922a281 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Schmidt Date: Thu, 29 Jul 2021 13:48:06 -0400 Subject: [PATCH 07/10] filter out empty matrices --- src/quantcore/matrix/split_matrix.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/quantcore/matrix/split_matrix.py b/src/quantcore/matrix/split_matrix.py index c5a94c05..d6ccbb70 100644 --- a/src/quantcore/matrix/split_matrix.py +++ b/src/quantcore/matrix/split_matrix.py @@ -394,7 +394,7 @@ def __getitem__(self, key): row = key col = slice(None, None, None) # all columns - if col == slice(None, None, None): + if np.any(col == slice(None, None, None)): if isinstance(row, int): row = [row] @@ -404,6 +404,8 @@ def __getitem__(self, key): return self.getcol(col) elif isinstance(col, list): col_array = np.asarray(col) + elif isinstance(col, np.ndarray): + col_array = col elif isinstance(col, slice): col_array = np.arange(*col.indices(self.shape[1])) else: @@ -420,7 +422,8 @@ def __getitem__(self, key): ) in zip(subset_cols_indices, subset_cols, self.matrices): new_mat.append(mat[row, oind]) new_ind.append(nind) - return SplitMatrix(matrices=new_mat, indices=new_ind) + filtered_mat, filtered_ind = _filter_out_empty(new_mat, new_ind) + return SplitMatrix(matrices=filtered_mat, indices=filtered_ind) def __repr__(self): out = "SplitMatrix:" From 42b78eb3a7d14bf62e66f4b1c08c67b0d540baff Mon Sep 17 00:00:00 2001 From: Marc-Antoine Schmidt Date: Thu, 29 Jul 2021 13:50:52 -0400 Subject: [PATCH 08/10] Update src/quantcore/matrix/split_matrix.py added docstring Co-authored-by: Ben Thompson --- src/quantcore/matrix/split_matrix.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/quantcore/matrix/split_matrix.py b/src/quantcore/matrix/split_matrix.py index d6ccbb70..d512e4f0 100644 --- a/src/quantcore/matrix/split_matrix.py +++ b/src/quantcore/matrix/split_matrix.py @@ -211,6 +211,7 @@ def __init__( self.column_map = self._create_col_mapping() def _create_col_mapping(self): + """Define an array that maps from the true column index in the implicitly defined matrix to the local index within the indices array.""" colmap = np.empty(shape=(self.shape[1], 2), dtype=int) for i, comp_idx in enumerate(self.indices): for j, idx in enumerate(comp_idx): From cc7afeceafac38e78d20f92f2159191a21368807 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Schmidt Date: Thu, 29 Jul 2021 14:16:33 -0400 Subject: [PATCH 09/10] docstring formatting --- src/quantcore/matrix/split_matrix.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/quantcore/matrix/split_matrix.py b/src/quantcore/matrix/split_matrix.py index d512e4f0..b05094e1 100644 --- a/src/quantcore/matrix/split_matrix.py +++ b/src/quantcore/matrix/split_matrix.py @@ -211,7 +211,12 @@ def __init__( self.column_map = self._create_col_mapping() def _create_col_mapping(self): - """Define an array that maps from the true column index in the implicitly defined matrix to the local index within the indices array.""" + """Map from the global index in a SplitMatrix to the local position. + + For example, if the column in position 4 of a SplitMatrix is located at + position 1 of the second component of the SplitMatrix, the resulting + array will have [2, 1] located at position 4 of the mapping. + """ colmap = np.empty(shape=(self.shape[1], 2), dtype=int) for i, comp_idx in enumerate(self.indices): for j, idx in enumerate(comp_idx): From 0ce8c2d029c2b801e4dbda6f76c6663e7acdbfa6 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Schmidt Date: Tue, 3 Aug 2021 14:54:56 -0400 Subject: [PATCH 10/10] Removed advanced indexing This is a big commit with many changes: - partial support of integer indexing - removed advanced indexing for densematrix and sparsematrix - ensure that indexing of splitmatrix generates basematrix type - partial fix of standardizedmatrix indexing - added indexing tests (currently fails) --- src/quantcore/matrix/categorical_matrix.py | 7 ++- src/quantcore/matrix/dense_matrix.py | 18 +++++++ src/quantcore/matrix/sparse_matrix.py | 11 +++++ src/quantcore/matrix/split_matrix.py | 29 +++++++++-- src/quantcore/matrix/standardized_mat.py | 13 ++++- src/quantcore/matrix/util.py | 24 ++++++++++ tests/test_matrices.py | 56 ++++++++++++++++++++-- 7 files changed, 147 insertions(+), 11 deletions(-) diff --git a/src/quantcore/matrix/categorical_matrix.py b/src/quantcore/matrix/categorical_matrix.py index 3c72b123..9b099e77 100644 --- a/src/quantcore/matrix/categorical_matrix.py +++ b/src/quantcore/matrix/categorical_matrix.py @@ -17,7 +17,9 @@ def _is_indexer_full_length(full_length: int, indexer: Any): - if isinstance(indexer, list): + if isinstance(indexer, int): + return full_length == 1 + elif isinstance(indexer, list): if (np.asarray(indexer) > full_length - 1).any(): raise IndexError("Index out-of-range.") return len(set(indexer)) == full_length @@ -78,7 +80,7 @@ def _matvec_setup( other: Union[List, np.ndarray], cols: np.ndarray = None, ) -> Tuple[np.ndarray, Optional[np.ndarray]]: - other = np.asarray(other) + other = np.squeeze(np.asarray(other)) if other.ndim > 1: raise NotImplementedError( """CategoricalMatrix.matvec is only implemented for 1d arrays.""" @@ -284,6 +286,7 @@ def __getitem__(self, item): return CategoricalMatrix(self.cat[row]) else: # return a SparseMatrix if we subset columns + # TODO: this is inefficient. See issue #101. return SparseMatrix(self.tocsr()[row, col], dtype=self.dtype) else: row = item diff --git a/src/quantcore/matrix/dense_matrix.py b/src/quantcore/matrix/dense_matrix.py index 61e812ce..b753bd8a 100644 --- a/src/quantcore/matrix/dense_matrix.py +++ b/src/quantcore/matrix/dense_matrix.py @@ -1,3 +1,4 @@ +from collections.abc import Sequence from typing import List, Optional, Union import numpy as np @@ -36,6 +37,13 @@ def __new__(cls, input_array): # noqa obj = np.asarray(input_array).view(cls) if not np.issubdtype(obj.dtype, np.floating): raise NotImplementedError("DenseMatrix is only implemented for float data") + if obj.ndim == 1: + # when input is one-dimensional, convert to a N x 1 matrix + obj = obj[:, np.newaxis] + elif obj.ndim > 2: + raise ValueError( + "DenseMatrix only accepts inputs with at most two dimensions" + ) return obj def __array_finalize__(self, obj): @@ -151,3 +159,13 @@ def matvec( """Perform self[:, cols] @ other.""" check_matvec_out_shape(self, out) return self._matvec_helper(vec, None, cols, out, False) + + def __getitem__(self, item): + if isinstance(item, tuple) and len(item) == 2: + row, col = item + if isinstance(row, (Sequence, np.ndarray)) and isinstance( + col, (Sequence, np.ndarray) + ): + return super().__getitem__(np.ix_(row, col)) + + return super().__getitem__(item) diff --git a/src/quantcore/matrix/sparse_matrix.py b/src/quantcore/matrix/sparse_matrix.py index 261ce5bf..885f2a14 100644 --- a/src/quantcore/matrix/sparse_matrix.py +++ b/src/quantcore/matrix/sparse_matrix.py @@ -1,3 +1,4 @@ +from collections.abc import Sequence from typing import List, Optional, Union import numpy as np @@ -182,3 +183,13 @@ def get_col_stds(self, weights: np.ndarray, col_means: np.ndarray) -> np.ndarray def astype(self, dtype, order="K", casting="unsafe", copy=True): """Return SparseMatrix cast to new type.""" return super(SparseMatrix, self).astype(dtype, casting, copy) + + def __getitem__(self, item): + if isinstance(item, tuple) and len(item) == 2: + row, col = item + if isinstance(row, (Sequence, np.ndarray)) and isinstance( + col, (Sequence, np.ndarray) + ): + return super().__getitem__(np.ix_(row, col)) + + return super().__getitem__(item) diff --git a/src/quantcore/matrix/split_matrix.py b/src/quantcore/matrix/split_matrix.py index b05094e1..e5cf9763 100644 --- a/src/quantcore/matrix/split_matrix.py +++ b/src/quantcore/matrix/split_matrix.py @@ -1,5 +1,5 @@ import warnings -from typing import List, Optional, Tuple, Union +from typing import Any, List, Optional, Tuple, Union import numpy as np from scipy import sparse as sps @@ -9,6 +9,7 @@ from .ext.split import is_sorted, split_col_subsets from .matrix_base import MatrixBase from .sparse_matrix import SparseMatrix +from .standardized_mat import StandardizedMatrix from .util import ( check_matvec_out_shape, check_transpose_matvec_out_shape, @@ -16,6 +17,24 @@ ) +def as_mx(a: Any): + """Convert an array to a corresponding MatrixBase type. + + If the input is already a MatrixBase, return untouched. + If the input is sparse, return a SparseMatrix. + If the input is a numpy array, return a DenseMatrix. + Raise an error is input is another type. + """ + if isinstance(a, (MatrixBase, StandardizedMatrix)): + return a + elif sps.issparse(a): + return SparseMatrix(a) + elif isinstance(a, np.ndarray): + return DenseMatrix(a) + else: + raise ValueError(f"Cannot convert type {type(a)} to Matrix.") + + def split_sparse_and_dense_parts( arg1: sps.csc_matrix, threshold: float = 0.1 ) -> Tuple[DenseMatrix, SparseMatrix, np.ndarray, np.ndarray]: @@ -342,7 +361,7 @@ def matvec( assert not isinstance(v, sps.spmatrix) check_matvec_out_shape(self, out) - v = np.asarray(v) + v = np.squeeze(np.asarray(v)) if v.shape[0] != self.shape[1]: raise ValueError(f"shapes {self.shape} and {v.shape} not aligned") @@ -407,7 +426,7 @@ def __getitem__(self, key): return SplitMatrix([mat[row, :] for mat in self.matrices], self.indices) else: if isinstance(col, int): - return self.getcol(col) + return self.getcol(col)[row] elif isinstance(col, list): col_array = np.asarray(col) elif isinstance(col, np.ndarray): @@ -426,7 +445,9 @@ def __getitem__(self, key): oind, mat, ) in zip(subset_cols_indices, subset_cols, self.matrices): - new_mat.append(mat[row, oind]) + if len(oind) == 0: + continue + new_mat.append(as_mx(mat[row, oind].reshape(-1, len(nind)))) new_ind.append(nind) filtered_mat, filtered_ind = _filter_out_empty(new_mat, new_ind) return SplitMatrix(matrices=filtered_mat, indices=filtered_ind) diff --git a/src/quantcore/matrix/standardized_mat.py b/src/quantcore/matrix/standardized_mat.py index cbc6cbf8..7706a7ac 100644 --- a/src/quantcore/matrix/standardized_mat.py +++ b/src/quantcore/matrix/standardized_mat.py @@ -3,8 +3,9 @@ import numpy as np from scipy import sparse as sps -from . import MatrixBase, SparseMatrix -from .util import set_up_rows_or_cols, setup_restrictions +from .matrix_base import MatrixBase +from .sparse_matrix import SparseMatrix +from .util import _get_expected_shape, set_up_rows_or_cols, setup_restrictions class StandardizedMatrix: @@ -248,11 +249,19 @@ def __getitem__(self, item): col = slice(None, None, None) mat_part = self.mat.__getitem__(item) + if hasattr(mat_part, "reshape"): + expected_shape = _get_expected_shape(self.shape, item) + mat_part = mat_part.reshape(expected_shape) shift_part = self.shift[col] mult_part = self.mult if mult_part is not None: mult_part = np.atleast_1d(mult_part[col]) + if not isinstance(mat_part, MatrixBase): + if mult_part is not None: + mat_part = mat_part * mult_part + return mat_part + shift_part + if isinstance(row, int): out = mat_part.A if mult_part is not None: diff --git a/src/quantcore/matrix/util.py b/src/quantcore/matrix/util.py index 62945ba1..39bb1d1b 100644 --- a/src/quantcore/matrix/util.py +++ b/src/quantcore/matrix/util.py @@ -40,3 +40,27 @@ def check_transpose_matvec_out_shape(mat, out: Optional[np.ndarray]) -> None: def check_matvec_out_shape(mat, out: Optional[np.ndarray]) -> None: """Assert that the first dimension of the matvec output is correct.""" _check_out_shape(out, mat.shape[0]) + + +def _get_expected_axis_length(orig_length, indexer): + if isinstance(indexer, int): + return 1 + if isinstance(indexer, list): + return len(indexer) + elif isinstance(indexer, np.ndarray): + assert indexer.ndim < 2 + return len(indexer) + elif isinstance(indexer, slice): + return len(range(*indexer.indices(orig_length))) + else: + raise ValueError(f"Indexing with {type(indexer)} is not allowed.") + + +def _get_expected_shape(orig_shape, indexer): + if isinstance(indexer, tuple): + row, col = indexer + new_row_shape = _get_expected_axis_length(orig_shape[0], row) + new_col_shape = _get_expected_axis_length(orig_shape[1], col) + return (new_row_shape, new_col_shape) + else: + return (_get_expected_axis_length(orig_shape[0], indexer),) + orig_shape[1:] diff --git a/tests/test_matrices.py b/tests/test_matrices.py index 821fc5f6..1ce76652 100644 --- a/tests/test_matrices.py +++ b/tests/test_matrices.py @@ -9,6 +9,13 @@ import quantcore.matrix as mx +def safe_to_array(a): + if hasattr(a, "A"): + return a.A + else: + return a + + def base_array(order="F") -> np.ndarray: return np.array([[0, 0], [0, -1.0], [0, 2.0]], order=order) @@ -100,6 +107,14 @@ def get_matrices(): ) +def get_basic_indices(): + return (1, [0], slice(0, 10, 2), slice(None)) + + +def get_advanced_indices(): + return ([0], [0, 1], np.arange(2)) + + @pytest.mark.parametrize("mat", get_matrices()) @pytest.mark.parametrize("cols", [None, [], [1], np.array([1])]) def test_matvec_out_parameter_wrong_shape(mat, cols): @@ -237,7 +252,7 @@ def is_split_with_cat_part(x): ) ) - if has_categorical_component and len(shape) > 1: + if has_categorical_component and len(np.squeeze(other).shape) > 1: with pytest.raises(NotImplementedError, match="only implemented for 1d"): mat.matvec(other, cols) else: @@ -246,12 +261,12 @@ def is_split_with_cat_part(x): mat_subset, vec_subset = process_mat_vec_subsets(mat, other, None, cols, cols) expected = mat_subset.dot(vec_subset) - np.testing.assert_allclose(res, expected) + np.testing.assert_allclose(np.squeeze(res), np.squeeze(expected)) assert isinstance(res, np.ndarray) if cols is None: res2 = mat @ other - np.testing.assert_allclose(res2, expected) + np.testing.assert_allclose(np.squeeze(res2), np.squeeze(expected)) def process_mat_vec_subsets(mat, vec, mat_rows, mat_cols, vec_idxs): @@ -513,6 +528,41 @@ def test_standardize( np.testing.assert_allclose(unstandardized.A, asarray) +@pytest.mark.parametrize("mat", get_matrices()) +@pytest.mark.parametrize("rows", get_basic_indices()) +@pytest.mark.parametrize("cols", get_basic_indices()) +def test_basic_indexing(mat, rows, cols): + # row only + np.testing.assert_allclose( + np.squeeze(mat.A[rows]), np.squeeze(safe_to_array(mat[rows])) + ) + # col and row + np.testing.assert_allclose( + np.squeeze(mat.A[rows, cols]), np.squeeze(safe_to_array(mat[rows, cols])) + ) + + +@pytest.mark.parametrize("mat", get_matrices()) +@pytest.mark.parametrize("rows", get_advanced_indices()) +@pytest.mark.parametrize("cols", get_advanced_indices()) +def test_advanced_indexing(mat, rows, cols): + """quantcore.matrix does not support advanced indexing. + + Here, we are testing our multi-sequences indexing (e.g. a[[0, 1], [0, 1]]). + This means that we select the rows 0 and 1 and columns 0 and 1, resulting + in a 2 x 2 matrix. For numpy, this would result is a 1d matrix with 2 elements. + """ + # row only + np.testing.assert_allclose( + np.squeeze(mat.A[rows]), np.squeeze(safe_to_array(mat[rows])) + ) + # col and row + np.testing.assert_allclose( + np.squeeze(mat.A[np.ix_(rows, cols)]), + np.squeeze(safe_to_array(mat[rows, cols])), + ) + + @pytest.mark.parametrize("mat", get_matrices()) def test_indexing_int_row(mat: Union[mx.MatrixBase, mx.StandardizedMatrix]): res = mat[0, :]