From f503890330687bbb2207ab92517eec3a35ab7c6d Mon Sep 17 00:00:00 2001 From: Mike Dusenberry Date: Fri, 30 Oct 2015 14:35:15 -0700 Subject: [PATCH 1/8] Adding remaining methods to PySpark BlockMatrix: cache, persist, validate, transpose. --- python/pyspark/mllib/linalg/distributed.py | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index 43cb0beef1bd3..de34cadb4084e 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -29,6 +29,7 @@ from pyspark import RDD from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper from pyspark.mllib.linalg import _convert_to_vector, Matrix +from pyspark.storagelevel import StorageLevel __all__ = ['DistributedMatrix', 'RowMatrix', 'IndexedRow', @@ -789,6 +790,30 @@ def numCols(self): """ return self._java_matrix_wrapper.call("numCols") + def cache(self): + """ + Caches the underlying RDD. + """ + self._java_matrix_wrapper.call("cache") + return self + + def persist(self, storageLevel): + """ + Persists the underlying RDD with the specified storage level. + """ + if not isinstance(storageLevel, StorageLevel): + raise TypeError("`storageLevel` should be a StorageLevel, got %s" % type(storageLevel)) + javaStorageLevel = self._java_matrix_wrapper._sc._getJavaStorageLevel(storageLevel) + self._java_matrix_wrapper.call("persist", javaStorageLevel) + return self + + def validate(self): + """ + Validates the block matrix info against the matrix data (`blocks`) + and throws an exception if any error is found. + """ + self._java_matrix_wrapper.call("validate") + def add(self, other): """ Adds two block matrices together. The matrices must have the @@ -857,6 +882,22 @@ def multiply(self, other): java_block_matrix = self._java_matrix_wrapper.call("multiply", other_java_block_matrix) return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock) + def transpose(self): + """ + Transpose this BlockMatrix. Returns a new BlockMatrix + instance sharing the same underlying data. Is a lazy operation. + + >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), + ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) + >>> mat = BlockMatrix(blocks, 3, 2) + + >>> mat_transposed = mat.transpose() + >>> mat_transposed.toLocalMatrix() + DenseMatrix(2, 6, [1.0, 4.0, 2.0, 5.0, 3.0, 6.0, 7.0, 10.0, 8.0, 11.0, 9.0, 12.0], 0) + """ + java_transposed_matrix = self._java_matrix_wrapper.call("transpose") + return BlockMatrix(java_transposed_matrix, self.colsPerBlock, self.rowsPerBlock) + def toLocalMatrix(self): """ Collect the distributed matrix on the driver as a DenseMatrix. From edf3e45ea0492b29a4d1aeef452cc87c00067725 Mon Sep 17 00:00:00 2001 From: Mike Dusenberry Date: Fri, 30 Oct 2015 14:56:19 -0700 Subject: [PATCH 2/8] Adding remaining method to PySpark CoordinateMatrix: transpose. --- python/pyspark/mllib/linalg/distributed.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index de34cadb4084e..e2883367fefe0 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -515,6 +515,25 @@ def numCols(self): """ return self._java_matrix_wrapper.call("numCols") + def transpose(self): + """ + Transpose this CoordinateMatrix. + + >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2), + ... MatrixEntry(1, 0, 2), + ... MatrixEntry(2, 1, 3.7)]) + >>> mat = CoordinateMatrix(entries) + >>> mat_transposed = mat.transpose() + + >>> print(mat_transposed.numRows()) + 2 + + >>> print(mat_transposed.numCols()) + 3 + """ + java_transposed_matrix = self._java_matrix_wrapper.call("transpose") + return CoordinateMatrix(java_transposed_matrix) + def toRowMatrix(self): """ Convert this matrix to a RowMatrix. From e1532790e80a6b1032c7df7548c6db95167d7f56 Mon Sep 17 00:00:00 2001 From: Mike Dusenberry Date: Fri, 30 Oct 2015 15:27:37 -0700 Subject: [PATCH 3/8] Adding remaining method to PySpark IndexedRowMatrix: computeGramianMatrix. Note that 'multiply' and 'computeSVD' are part of the SPARK-6227 PR. --- python/pyspark/mllib/linalg/distributed.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index e2883367fefe0..78f7e739b343f 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -312,6 +312,20 @@ def columnSimilarities(self): java_coordinate_matrix = self._java_matrix_wrapper.call("columnSimilarities") return CoordinateMatrix(java_coordinate_matrix) + def computeGramianMatrix(self): + """ + Computes the Gramian matrix `A^T A`. Note that this cannot be + computed on matrices with more than 65535 columns. + + >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]), + ... IndexedRow(1, [4, 5, 6])]) + >>> mat = IndexedRowMatrix(rows) + + >>> mat.computeGramianMatrix() + DenseMatrix(3, 3, [17.0, 22.0, 27.0, 22.0, 29.0, 36.0, 27.0, 36.0, 45.0], 0) + """ + return self._java_matrix_wrapper.call("computeGramianMatrix") + def toRowMatrix(self): """ Convert this matrix to a RowMatrix. From 587bea5e5550fa4bcaee61c06dfdad9a935fb092 Mon Sep 17 00:00:00 2001 From: Mike Dusenberry Date: Mon, 2 Nov 2015 13:53:15 -0800 Subject: [PATCH 4/8] Adding remaining methods to PySpark RowMatrix: computeGramianMatrix, computeCovariance, computeColumnSummaryStatistics, columnSimilarities, tallSkinnyQR. --- python/pyspark/mllib/linalg/__init__.py | 24 ++++ python/pyspark/mllib/linalg/distributed.py | 145 ++++++++++++++++++++- 2 files changed, 168 insertions(+), 1 deletion(-) diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index 4cd7306edb11b..610afc5e052e7 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -1235,6 +1235,30 @@ def sparse(numRows, numCols, colPtrs, rowIndices, values): return SparseMatrix(numRows, numCols, colPtrs, rowIndices, values) +class QRDecomposition(object): + """ + Represents QR factors. + """ + def __init__(self, Q, R): + self._Q = Q + self._R = R + + @property + def Q(self): + """ + An orthogonal matrix Q in a QR decomposition. + May be null if not computed. + """ + return self._Q + + @property + def R(self): + """ + An upper triangular matrix R in a QR decomposition. + """ + return self._R + + def _test(): import doctest (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS) diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index 78f7e739b343f..0db2682a6ba28 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -28,7 +28,8 @@ from pyspark import RDD from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper -from pyspark.mllib.linalg import _convert_to_vector, Matrix +from pyspark.mllib.linalg import _convert_to_vector, Matrix, QRDecomposition +from pyspark.mllib.stat import MultivariateStatisticalSummary from pyspark.storagelevel import StorageLevel @@ -152,6 +153,148 @@ def numCols(self): """ return self._java_matrix_wrapper.call("numCols") + def computeColumnSummaryStatistics(self): + """ + Computes column-wise summary statistics. + + :return: :class:`MultivariateStatisticalSummary` object + containing column-wise summary statistics. + + >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]]) + >>> mat = RowMatrix(rows) + + >>> colStats = mat.computeColumnSummaryStatistics() + >>> colStats.mean() + array([ 2.5, 3.5, 4.5]) + """ + java_col_stats = self._java_matrix_wrapper.call("computeColumnSummaryStatistics") + return MultivariateStatisticalSummary(java_col_stats) + + def computeCovariance(self): + """ + Computes the covariance matrix, treating each row as an + observation. Note that this cannot be computed on matrices + with more than 65535 columns. + + >>> rows = sc.parallelize([[1, 2], [2, 1]]) + >>> mat = RowMatrix(rows) + + >>> mat.computeCovariance() + DenseMatrix(2, 2, [0.5, -0.5, -0.5, 0.5], 0) + """ + return self._java_matrix_wrapper.call("computeCovariance") + + def computeGramianMatrix(self): + """ + Computes the Gramian matrix `A^T A`. Note that this cannot be + computed on matrices with more than 65535 columns. + + >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]]) + >>> mat = RowMatrix(rows) + + >>> mat.computeGramianMatrix() + DenseMatrix(3, 3, [17.0, 22.0, 27.0, 22.0, 29.0, 36.0, 27.0, 36.0, 45.0], 0) + """ + return self._java_matrix_wrapper.call("computeGramianMatrix") + + def columnSimilarities(self, threshold=0.0): + """ + Compute similarities between columns of this matrix. + + The threshold parameter is a trade-off knob between estimate + quality and computational cost. + + The default threshold setting of 0 guarantees deterministically + correct results, but uses the brute-force approach of computing + normalized dot products. + + Setting the threshold to positive values uses a sampling + approach and incurs strictly less computational cost than the + brute-force approach. However the similarities computed will + be estimates. + + The sampling guarantees relative-error correctness for those + pairs of columns that have similarity greater than the given + similarity threshold. + + To describe the guarantee, we set some notation: + * Let A be the smallest in magnitude non-zero element of + this matrix. + * Let B be the largest in magnitude non-zero element of + this matrix. + * Let L be the maximum number of non-zeros per row. + + For example, for {0,1} matrices: A=B=1. + Another example, for the Netflix matrix: A=1, B=5 + + For those column pairs that are above the threshold, the + computed similarity is correct to within 20% relative error + with probability at least 1 - (0.981)^10/B^ + + The shuffle size is bounded by the *smaller* of the following + two expressions: + + * O(n log(n) L / (threshold * A)) + * O(m L^2^) + + The latter is the cost of the brute-force approach, so for + non-zero thresholds, the cost is always cheaper than the + brute-force approach. + + :param: threshold: Set to 0 for deterministic guaranteed + correctness. Similarities above this + threshold are estimated with the cost vs + estimate quality trade-off described above. + :return: An n x n sparse upper-triangular CoordinateMatrix of + cosine similarities between columns of this matrix. + + >>> rows = sc.parallelize([[1, 2], [1, 5]]) + >>> mat = RowMatrix(rows) + + >>> sims = mat.columnSimilarities() + >>> round(sims.entries.first().value, 12) + 0.919145030018 + """ + java_sims_mat = self._java_matrix_wrapper.call("columnSimilarities", float(threshold)) + return CoordinateMatrix(java_sims_mat) + + def tallSkinnyQR(self, computeQ=False): + """ + Compute the QR decomposition of this RowMatrix. + + The implementation is designed to optimize the QR decomposition + (factorization) for the RowMatrix of a tall and skinny shape. + + Reference: + Paul G. Constantine, David F. Gleich. "Tall and skinny QR + factorizations in MapReduce architectures" + ([[http://dx.doi.org/10.1145/1996092.1996103]]) + + :param: computeQ: whether to computeQ + :return: QRDecomposition(Q: RowMatrix, R: Matrix), where + Q = None if computeQ = false. + + >>> rows = sc.parallelize([[3, -6], [4, -8], [0, 1]]) + >>> mat = RowMatrix(rows) + >>> decomp = mat.tallSkinnyQR(True) + + >>> # Test with absolute values + >>> decomp.Q.rows.map(lambda row: abs(row.toArray()).tolist()).collect() + [[0.6..., 0.0], [0.8..., 0.0], [0.0, 1.0]] + + >>> # Test with absolute values + >>> abs(decomp.R.toArray()).tolist() + [[5.0, 10.0], [0.0, 1.0]] + """ + decomp = JavaModelWrapper(self._java_matrix_wrapper.call("tallSkinnyQR", computeQ)) + if computeQ: + java_Q = decomp.call("Q") + Q = RowMatrix(java_Q) + else: + Q = None + R = decomp.call("R") + return QRDecomposition(Q, R) + class IndexedRow(object): """ From 12baa78f6a05e58b1b8cc3ed3431d35cddabfb51 Mon Sep 17 00:00:00 2001 From: Mike Dusenberry Date: Tue, 3 Nov 2015 12:18:50 -0800 Subject: [PATCH 5/8] Improving robustness of PySpark test for Python 2.6. --- python/pyspark/mllib/linalg/distributed.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index 0db2682a6ba28..f2eee49c6f928 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -252,8 +252,8 @@ def columnSimilarities(self, threshold=0.0): >>> mat = RowMatrix(rows) >>> sims = mat.columnSimilarities() - >>> round(sims.entries.first().value, 12) - 0.919145030018 + >>> sims.entries.first().value + 0.91914503... """ java_sims_mat = self._java_matrix_wrapper.call("columnSimilarities", float(threshold)) return CoordinateMatrix(java_sims_mat) @@ -277,13 +277,16 @@ def tallSkinnyQR(self, computeQ=False): >>> rows = sc.parallelize([[3, -6], [4, -8], [0, 1]]) >>> mat = RowMatrix(rows) >>> decomp = mat.tallSkinnyQR(True) + >>> Q = decomp.Q + >>> R = decomp.R >>> # Test with absolute values - >>> decomp.Q.rows.map(lambda row: abs(row.toArray()).tolist()).collect() + >>> absQRows = Q.rows.map(lambda row: abs(row.toArray()).tolist()) + >>> absQRows.collect() [[0.6..., 0.0], [0.8..., 0.0], [0.0, 1.0]] >>> # Test with absolute values - >>> abs(decomp.R.toArray()).tolist() + >>> abs(R.toArray()).tolist() [[5.0, 10.0], [0.0, 1.0]] """ decomp = JavaModelWrapper(self._java_matrix_wrapper.call("tallSkinnyQR", computeQ)) From fca41ca4df8f04389dc9ce87de7a2af591476e2e Mon Sep 17 00:00:00 2001 From: Mike Dusenberry Date: Tue, 3 Nov 2015 12:20:28 -0800 Subject: [PATCH 6/8] Adding experimental tag to QRDecomposition. --- python/pyspark/mllib/linalg/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index 610afc5e052e7..b724ed88f3d9e 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -43,7 +43,8 @@ __all__ = ['Vector', 'DenseVector', 'SparseVector', 'Vectors', - 'Matrix', 'DenseMatrix', 'SparseMatrix', 'Matrices'] + 'Matrix', 'DenseMatrix', 'SparseMatrix', 'Matrices', + 'QRDecomposition'] if sys.version_info[:2] == (2, 7): @@ -1237,6 +1238,8 @@ def sparse(numRows, numCols, colPtrs, rowIndices, values): class QRDecomposition(object): """ + .. note:: Experimental + Represents QR factors. """ def __init__(self, Q, R): From f1410bf63f99bbb3b46ec8da03406b3bd1768783 Mon Sep 17 00:00:00 2001 From: Mike Dusenberry Date: Thu, 21 Apr 2016 11:02:46 -0700 Subject: [PATCH 7/8] Adding @since annotations, and adding a comment to Scala. --- .../mllib/linalg/distributed/IndexedRowMatrix.scala | 3 ++- python/pyspark/mllib/linalg/__init__.py | 3 +++ python/pyspark/mllib/linalg/distributed.py | 13 ++++++++++++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala index 06b9c4ac67bb0..b03b3ecde94f4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala @@ -188,7 +188,8 @@ class IndexedRowMatrix @Since("1.0.0") ( } /** - * Computes the Gramian matrix `A^T A`. + * Computes the Gramian matrix `A^T A`. Note that this cannot be + * computed on matrices with more than 65535 columns. */ @Since("1.0.0") def computeGramianMatrix(): Matrix = { diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index b724ed88f3d9e..70509a6d9bece 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -38,6 +38,7 @@ import numpy as np +from pyspark import since from pyspark.sql.types import UserDefinedType, StructField, StructType, ArrayType, DoubleType, \ IntegerType, ByteType, BooleanType @@ -1247,6 +1248,7 @@ def __init__(self, Q, R): self._R = R @property + @since('2.0.0') def Q(self): """ An orthogonal matrix Q in a QR decomposition. @@ -1255,6 +1257,7 @@ def Q(self): return self._Q @property + @since('2.0.0') def R(self): """ An upper triangular matrix R in a QR decomposition. diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index f2eee49c6f928..136dd3319c9cc 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -26,7 +26,7 @@ from py4j.java_gateway import JavaObject -from pyspark import RDD +from pyspark import RDD, since from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper from pyspark.mllib.linalg import _convert_to_vector, Matrix, QRDecomposition from pyspark.mllib.stat import MultivariateStatisticalSummary @@ -153,6 +153,7 @@ def numCols(self): """ return self._java_matrix_wrapper.call("numCols") + @since('2.0.0') def computeColumnSummaryStatistics(self): """ Computes column-wise summary statistics. @@ -170,6 +171,7 @@ def computeColumnSummaryStatistics(self): java_col_stats = self._java_matrix_wrapper.call("computeColumnSummaryStatistics") return MultivariateStatisticalSummary(java_col_stats) + @since('2.0.0') def computeCovariance(self): """ Computes the covariance matrix, treating each row as an @@ -184,6 +186,7 @@ def computeCovariance(self): """ return self._java_matrix_wrapper.call("computeCovariance") + @since('2.0.0') def computeGramianMatrix(self): """ Computes the Gramian matrix `A^T A`. Note that this cannot be @@ -197,6 +200,7 @@ def computeGramianMatrix(self): """ return self._java_matrix_wrapper.call("computeGramianMatrix") + @since('2.0.0') def columnSimilarities(self, threshold=0.0): """ Compute similarities between columns of this matrix. @@ -258,6 +262,7 @@ def columnSimilarities(self, threshold=0.0): java_sims_mat = self._java_matrix_wrapper.call("columnSimilarities", float(threshold)) return CoordinateMatrix(java_sims_mat) + @since('2.0.0') def tallSkinnyQR(self, computeQ=False): """ Compute the QR decomposition of this RowMatrix. @@ -458,6 +463,7 @@ def columnSimilarities(self): java_coordinate_matrix = self._java_matrix_wrapper.call("columnSimilarities") return CoordinateMatrix(java_coordinate_matrix) + @since('2.0.0') def computeGramianMatrix(self): """ Computes the Gramian matrix `A^T A`. Note that this cannot be @@ -675,6 +681,7 @@ def numCols(self): """ return self._java_matrix_wrapper.call("numCols") + @since('2.0.0') def transpose(self): """ Transpose this CoordinateMatrix. @@ -969,6 +976,7 @@ def numCols(self): """ return self._java_matrix_wrapper.call("numCols") + @since('2.0.0') def cache(self): """ Caches the underlying RDD. @@ -976,6 +984,7 @@ def cache(self): self._java_matrix_wrapper.call("cache") return self + @since('2.0.0') def persist(self, storageLevel): """ Persists the underlying RDD with the specified storage level. @@ -986,6 +995,7 @@ def persist(self, storageLevel): self._java_matrix_wrapper.call("persist", javaStorageLevel) return self + @since('2.0.0') def validate(self): """ Validates the block matrix info against the matrix data (`blocks`) @@ -1061,6 +1071,7 @@ def multiply(self, other): java_block_matrix = self._java_matrix_wrapper.call("multiply", other_java_block_matrix) return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock) + @since('2.0.0') def transpose(self): """ Transpose this BlockMatrix. Returns a new BlockMatrix From c0c9565706c148cb7dd64250630931ab41838d3b Mon Sep 17 00:00:00 2001 From: Mike Dusenberry Date: Thu, 21 Apr 2016 14:44:26 -0700 Subject: [PATCH 8/8] Adding the subtract method. --- python/pyspark/mllib/linalg/distributed.py | 35 ++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index 136dd3319c9cc..af34ce346b0ca 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -1036,6 +1036,41 @@ def add(self, other): java_block_matrix = self._java_matrix_wrapper.call("add", other_java_block_matrix) return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock) + @since('2.0.0') + def subtract(self, other): + """ + Subtracts the given block matrix `other` from this block matrix: + `this - other`. The matrices must have the same size and + matching `rowsPerBlock` and `colsPerBlock` values. If one of + the sub matrix blocks that are being subtracted is a + SparseMatrix, the resulting sub matrix block will also be a + SparseMatrix, even if it is being subtracted from a DenseMatrix. + If two dense sub matrix blocks are subtracted, the output block + will also be a DenseMatrix. + + >>> dm1 = Matrices.dense(3, 2, [3, 1, 5, 4, 6, 2]) + >>> dm2 = Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]) + >>> sm = Matrices.sparse(3, 2, [0, 1, 3], [0, 1, 2], [1, 2, 3]) + >>> blocks1 = sc.parallelize([((0, 0), dm1), ((1, 0), dm2)]) + >>> blocks2 = sc.parallelize([((0, 0), dm2), ((1, 0), dm1)]) + >>> blocks3 = sc.parallelize([((0, 0), sm), ((1, 0), dm2)]) + >>> mat1 = BlockMatrix(blocks1, 3, 2) + >>> mat2 = BlockMatrix(blocks2, 3, 2) + >>> mat3 = BlockMatrix(blocks3, 3, 2) + + >>> mat1.subtract(mat2).toLocalMatrix() + DenseMatrix(6, 2, [-4.0, -7.0, -4.0, 4.0, 7.0, 4.0, -6.0, -5.0, -10.0, 6.0, 5.0, 10.0], 0) + + >>> mat2.subtract(mat3).toLocalMatrix() + DenseMatrix(6, 2, [6.0, 8.0, 9.0, -4.0, -7.0, -4.0, 10.0, 9.0, 9.0, -6.0, -5.0, -10.0], 0) + """ + if not isinstance(other, BlockMatrix): + raise TypeError("Other should be a BlockMatrix, got %s" % type(other)) + + other_java_block_matrix = other._java_matrix_wrapper._java_model + java_block_matrix = self._java_matrix_wrapper.call("subtract", other_java_block_matrix) + return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock) + def multiply(self, other): """ Left multiplies this BlockMatrix by `other`, another