From 31a057a3a4dfe0208c04bbd921e5e1792237f128 Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Fri, 21 Oct 2022 16:01:42 +0200
Subject: [PATCH 01/23] Move clear from blas1 to io. Also add missing io file
 in root

---
 include/alp/base/blas3.hpp      |  3 +++
 include/alp/base/io.hpp         | 17 +++++++++++++++++
 include/alp/io.hpp              | 34 +++++++++++++++++++++++++++++++++
 include/alp/reference/blas1.hpp | 32 -------------------------------
 include/alp/reference/io.hpp    | 32 +++++++++++++++++++++++++++++++
 5 files changed, 86 insertions(+), 32 deletions(-)
 create mode 100644 include/alp/io.hpp

diff --git a/include/alp/base/blas3.hpp b/include/alp/base/blas3.hpp
index 99e648b64..534dd61d2 100644
--- a/include/alp/base/blas3.hpp
+++ b/include/alp/base/blas3.hpp
@@ -24,9 +24,12 @@
 
 #include <alp/backends.hpp>
 #include <alp/phase.hpp>
+#include <alp/identities.hpp>
+#include <alp/monoid.hpp>
 
 #include "matrix.hpp"
 #include "vector.hpp"
+#include "io.hpp"
 
 namespace alp {
 
diff --git a/include/alp/base/io.hpp b/include/alp/base/io.hpp
index 52cfc3930..75a67dd0c 100644
--- a/include/alp/base/io.hpp
+++ b/include/alp/base/io.hpp
@@ -54,6 +54,23 @@ namespace alp {
 	 * @{
 	 */
 
+	/**
+	 * Clears all elements from the given vector \a x.
+	 *
+	 * At the end of this operation, the number of nonzero elements in this vector
+	 * will be zero. The size of the vector remains unchanged.
+	 */
+	template<
+		typename DataType, typename DataStructure, typename View,
+		typename ImfR, typename ImfC, Backend backend
+	>
+	RC clear(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x
+	) noexcept {
+		(void) x;
+		return PANIC;
+	}
+
 	/**
 	 * Constructs a dense vector from a container of exactly alp::size(x)
 	 * elements. This function aliases to the buildVector routine that takes
diff --git a/include/alp/io.hpp b/include/alp/io.hpp
new file mode 100644
index 000000000..f99cf5664
--- /dev/null
+++ b/include/alp/io.hpp
@@ -0,0 +1,34 @@
+
+/*
+ *   Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 21st of February, 2017
+ */
+
+#ifndef _H_ALP_IO
+#define _H_ALP_IO
+
+#include "base/io.hpp"
+
+// now include all specialisations contained in the backend directories:
+#ifdef _ALP_WITH_REFERENCE
+ #include <alp/reference/io.hpp>
+#endif
+
+#endif // end ``_H_ALP_IO''
+
diff --git a/include/alp/reference/blas1.hpp b/include/alp/reference/blas1.hpp
index f2ed86f5c..552a78cc2 100644
--- a/include/alp/reference/blas1.hpp
+++ b/include/alp/reference/blas1.hpp
@@ -179,38 +179,6 @@ namespace alp {
 	 * @{
 	 */
 
-	/**
-	 * Clears all elements from the given vector \a x.
-	 *
-	 * At the end of this operation, the number of nonzero elements in this vector
-	 * will be zero. The size of the vector remains unchanged.
-	 *
-	 * @return alp::SUCCESS When the vector is successfully cleared.
-	 *
-	 * \note This function cannot fail.
-	 *
-	//  * \parblock
-	//  * \par Performance semantics
-	//  *      This function
-	//  *        -# contains \f$ \mathcal{O}(n) \f$ work,
-	//  *        -# will not allocate new dynamic memory,
-	//  *        -# will take at most \f$ \Theta(1) \f$ memory beyond the memory
-	//  *           already used by the application before the call to this
-	//  *           function.
-	//  *        -# will move at most \f$ \mathit{sizeof}(\mathit{bool}) +
-	//  *           \mathit{sizeof}(\mathit{size\_t}) \f$ bytes of data.
-	//  * \endparblock
-	 */
-	template<
-		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC
-	>
-	RC clear(
-		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x
-	) noexcept {
-		throw std::runtime_error( "Needs an implementation" );
-		return SUCCESS;
-	}
-
 	/**
 	 * Request the size (dimension) of a given Vector.
 	 *
diff --git a/include/alp/reference/io.hpp b/include/alp/reference/io.hpp
index e785a3b7d..c89c73530 100644
--- a/include/alp/reference/io.hpp
+++ b/include/alp/reference/io.hpp
@@ -28,6 +28,38 @@
 
 namespace alp {
 
+	/**
+	 * Clears all elements from the given vector \a x.
+	 *
+	 * At the end of this operation, the number of nonzero elements in this vector
+	 * will be zero. The size of the vector remains unchanged.
+	 *
+	 * @return alp::SUCCESS When the vector is successfully cleared.
+	 *
+	 * \note This function cannot fail.
+	 *
+	//  * \parblock
+	//  * \par Performance semantics
+	//  *      This function
+	//  *        -# contains \f$ \mathcal{O}(n) \f$ work,
+	//  *        -# will not allocate new dynamic memory,
+	//  *        -# will take at most \f$ \Theta(1) \f$ memory beyond the memory
+	//  *           already used by the application before the call to this
+	//  *           function.
+	//  *        -# will move at most \f$ \mathit{sizeof}(\mathit{bool}) +
+	//  *           \mathit{sizeof}(\mathit{size\_t}) \f$ bytes of data.
+	//  * \endparblock
+	 */
+	template<
+		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC
+	>
+	RC clear(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x
+	) noexcept {
+		throw std::runtime_error( "Needs an implementation" );
+		return SUCCESS;
+	}
+
 	/**
 	 * Assigns elements to a matrix from an iterator.
 	 *

From 58caae0c816b342094b0eb6c2995ea6362114491 Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Fri, 21 Oct 2022 17:06:34 +0200
Subject: [PATCH 02/23] Remove unnecessary functions from blas1.hpp

---
 include/alp/blas1.hpp | 366 ------------------------------------------
 1 file changed, 366 deletions(-)

diff --git a/include/alp/blas1.hpp b/include/alp/blas1.hpp
index 36a6d4959..5def561b9 100644
--- a/include/alp/blas1.hpp
+++ b/include/alp/blas1.hpp
@@ -34,371 +34,5 @@
  #include <alp/reference/blas1.hpp>
 #endif
 
-// the remainder implements several backend-agnostic short-cuts
-
-#define NO_CAST_RING_ASSERT( x, y, z )                                             \
-	static_assert( x,                                                              \
-		"\n\n"                                                                     \
-		"************************************************************************" \
-		"************************************************************************" \
-		"**********************\n"                                                 \
-		"*     ERROR      | " y " " z ".\n"                                        \
-		"************************************************************************" \
-		"************************************************************************" \
-		"**********************\n"                                                 \
-		"* Possible fix 1 | Remove no_casting from the template parameters in "    \
-		"this call to " y ".\n"                                                    \
-		"* Possible fix 2 | For all mismatches in the domains of input "           \
-		"parameters and the semiring domains, as specified in the documentation "  \
-		"of the function " y ", supply an input argument of the expected type "    \
-		"instead.\n"                                                               \
-		"* Possible fix 3 | Provide a compatible semiring where all domains "      \
-		"match those of the input parameters, as specified in the documentation "  \
-		"of the function " y ".\n"                                                 \
-		"************************************************************************" \
-		"************************************************************************" \
-		"**********************\n" );
-
-namespace alp {
-
-	/**
-	 * A standard vector to use for mask parameters. Indicates no mask shall be
-	 * used.
-	 */
-	#ifdef NO_MASK
-		#undef NO_MASK
-	#endif
-	#define NO_MASK internal::Vector< bool >( 0 )
-
-	/**
-	 * Executes an arbitrary element-wise user-defined function \a f using any
-	 * number of vectors of equal length, following the nonzero pattern of the
-	 * given vector \a x.
-	 *
-	 * The user-defined function is passed as a lambda which can capture, at
-	 * the very least, other instances of type alp::Vector. Use of this function
-	 * is preferable whenever multiple element-wise operations are requested that
-	 * use one or more identical input vectors. Performing the computation one
-	 * after the other in blocking mode would require the same vector to be
-	 * streamed multiple times, while with this function the operations can be
-	 * fused explicitly instead.
-	 *
-	 * It shall always be legal to capture non-GraphBLAS objects for read access
-	 * only. It shall \em not be legal to capture instances of type alp::Matrix
-	 * for read and/or write access.
-	 *
-	 * If alp::Properties::writableCaptured evaluates true then captured
-	 * non-GraphBLAS objects can also be written to, not just read from. The
-	 * captured variable is, however, completely local to the calling user process
-	 * only-- it will not be synchronised between user processes.
-	 * As a rule of thumb, data-centric GraphBLAS implementations \em cannot
-	 * support this and will thus have alp::Properties::writableCaptured evaluate
-	 * to false. A portable GraphBLAS algorithm should provide a different code
-	 * path to handle this case.
-	 * When it is legal to write to captured scalar, this function can, e.g., be
-	 * used to perform reduction-like operations on any number of equally sized
-	 * input vectors.  This would be preferable to a chained number of calls to
-	 * alp::dot in case where some vectors are shared between subsequent calls,
-	 * for example; the shared vectors are streamed only once using this lambda-
-	 * enabled function.
-	 *
-	 * \warning The lambda shall only be executed on the data local to the user
-	 *          process calling this function! This is different from the various
-	 *          fold functions, or alp::dot, in that the semantics of those
-	 *          functions always end with a globally synchronised result. To
-	 *          achieve the same effect with user-defined lambdas, the users
-	 *          should manually prescribe how to combine the local results into
-	 *          global ones, for instance, by a subsequent call to
-	 *          alp::collectives<>::allreduce.
-	 *
-	 * \note This is an addition to the GraphBLAS. It is alike user-defined
-	 *       operators, monoids, and semirings, except it allows execution on
-	 *       arbitrarily many inputs and arbitrarily many outputs.
-	 *
-	 * @tparam Func the user-defined lambda function type.
-	 * @tparam DataType the type of the user-supplied vector example.
-	 * @tparam backend  the backend type of the user-supplied vector example.
-	 *
-	 * @param[in] f The user-supplied lambda. This lambda should only capture
-	 *              and reference vectors of the same length as \a x. The lambda
-	 *              function should prescribe the operations required to execute
-	 *              at a given index \a i. Captured GraphBLAS vectors can access
-	 *              that element via the operator[]. It is illegal to access any
-	 *              element not at position \a i. The lambda takes only the single
-	 *              parameter \a i of type <code>const size_t</code>. Captured
-	 *              scalars will not be globally updated-- the user must program
-	 *              this explicitly. Scalars and other non-GraphBLAS containers
-	 *              are always local to their user process.
-	 * @param[in] x The vector the lambda will be executed on. This argument
-	 *              determines which indices \a i will be accessed during the
-	 *              elementwise operation-- elements with indices \a i that
-	 *              do not appear in \a x will be skipped during evaluation of
-	 *              \a f.
-	 * @param[in] args All vectors the lambda is to access elements of. Must be of
-	 *                 the same length as \a x. If this constraint is violated,
-	 *                 alp::MISMATCH shall be returned. <em>This is a variadic
-	 *                 argument and can contain any number of containers of type
-	 *                 alp::Vector, passed as though they were separate
-	 *                 arguments.</em>
-	 *
-	 * \note In future GraphBLAS implementations, \a args, apart from doing
-	 *       dimension checking, should also facilitate any data distribution
-	 *       necessary to successfully execute the element-wise operation. Current
-	 *       implementations do not require this since they use the same static
-	 *       distribution for all containers.
-	 *
-	 * \warning Using a alp::Vector inside a lambda passed to this function while
-	 *          not passing that same vector into \a args, will result in undefined
-	 *          behaviour.
-	 *
-	 * \note It would be natural to have \a x equal to one of the captured
-	 *       GraphBLAS vectors in \a f.
-	 *
-	 * \warning Due to the constraints on \a f described above, it is illegal to
-	 *          capture some vector \a y and have the following line in the body
-	 *          of \a f: <code>x[i] += x[i+1]</code>. Vectors can only be
-	 *          dereferenced at position \a i and \a i alone.
-	 *
-	 * @return alp::SUCCESS  When the lambda is successfully executed.
-	 * @return alp::MISMATCH When two or more vectors passed to \a args are not of
-	 *                       equal length.
-	 *
-	 * \parblock
-	 * \par Example.
-	 *
-	 * An example valid use:
-	 *
-	 * \code
-	 * void f(
-	 *      double &alpha,
-	 *      alp::Vector< double > &y,
-	 *      const double beta,
-	 *      const alp::Vector< double > &x,
-	 *      const alp::Semiring< double > ring
-	 * ) {
-	 *      assert( alp::size(x) == alp::size(y) );
-	 *      assert( alp::nnz(x) == alp::size(x) );
-	 *      assert( alp::nnz(y) == alp::size(y) );
-	 *      alpha = ring.getZero();
-	 *      alp::eWiseLambda(
-	 *          [&alpha,beta,&x,&y,ring]( const size_t i ) {
-	 *              double mul;
-	 *              const auto mul_op = ring.getMultiplicativeOperator();
-	 *              const auto add_op = ring.getAdditiveOperator();
-	 *              alp::apply( y[i], beta, x[i], mul_op );
-	 *              alp::apply( mul, x[i], y[i], mul_op );
-	 *              alp::foldl( alpha, mul, add_op );
-	 *      }, x, y );
-	 *      alp::collectives::allreduce( alpha, add_op );
-	 * }
-	 * \endcode
-	 *
-	 * This code takes a value \a beta, a vector \a x, and a semiring \a ring and
-	 * computes:
-	 *   1) \a y as the element-wise multiplication (under \a ring) of \a beta and
-	 *      \a x; and
-	 *   2) \a alpha as the dot product (under \a ring) of \a x and \a y.
-	 * This function can easily be made agnostic to whatever exact semiring is used
-	 * by templating the type of \a ring. As it is, this code is functionally
-	 * equivalent to:
-	 *
-	 * \code
-	 * alp::eWiseMul( y, beta, x, ring );
-	 * alp::dot( alpha, x, y, ring );
-	 * \endcode
-	 *
-	 * The version using the lambdas, however, is expected to execute
-	 * faster as both \a x and \a y are streamed only once, while the
-	 * latter code may stream both vectors twice.
-	 * \endparblock
-	 *
-	 * \warning The following code is invalid:
-	 *          \code
-	 *              template< class Operator >
-	 *              void f(
-	 *                   alp::Vector< double > &x,
-	 *                   const Operator op
-	 *              ) {
-	 *                   alp::eWiseLambda(
-	 *                       [&x,&op]( const size_t i ) {
-	 *                           alp::apply( x[i], x[i], x[i+1], op );
-	 *                   }, x );
-	 *              }
-	 *          \endcode
-	 *          Only a Vector::lambda_reference to position exactly equal to \a i
-	 *          may be used within this function.
-	 *
-	 * \warning There is no similar concept in the official GraphBLAS specs.
-	 *
-	 * \warning Captured scalars will be local to the user process executing the
-	 *          lambda. To retrieve the global dot product, an allreduce must
-	 *          explicitly be called.
-	 *
-	 * @see Vector::operator[]()
-	 * @see Vector::lambda_reference
-	 */
-	template<
-		typename Func,
-		typename DataType,
-		Backend backend,
-		typename... Args
-	>
-	RC eWiseLambda(
-		const Func f,
-		const internal::Vector< DataType, backend > & x, Args...
-	) {
-		(void)f;
-		(void)x;
-		return PANIC;
-	}
-
-	/**
-	 * Alias for a simple reduce call.
-	 *
-	 * Will use no mask and will set the accumulator to the given Monoid's
-	 * operator.
-	 */
-	template<
-		Descriptor descr = descriptors::no_operation,
-		class Monoid,
-		typename IOType, typename InputType,
-		Backend backend
-	>
-	RC foldl( IOType &x,
-		const internal::Vector< InputType, backend > &y,
-		const Monoid &monoid = Monoid(),
-		const typename std::enable_if< !alp::is_object< IOType >::value &&
-			alp::is_monoid< Monoid >::value,
-		void >::type * const = NULL
-	) {
-		// create empty mask
-		internal::Vector< bool, backend > mask( 0 );
-		// call regular reduce function
-		return foldl< descr >( x, y, mask, monoid );
-	}
-
-	/**
-	 * Alias for a simple reduce call.
-	 *
-	 * Will use no mask and will set the accumulator to the given Monoid's
-	 * operator.
-	 */
-	template<
-		Descriptor descr = descriptors::no_operation,
-		class OP,
-		typename IOType, typename InputType,
-		Backend backend
-	>
-	RC foldl( IOType &x,
-		const internal::Vector< InputType, backend > &y,
-		const OP &op = OP(),
-		const typename std::enable_if< !alp::is_object< IOType >::value &&
-			alp::is_operator< OP >::value,
-		void >::type * const = NULL
-	) {
-		// create empty mask
-		internal::Vector< bool, backend > mask( 0 );
-		// call regular reduce function
-		return foldl< descr >( x, y, mask, op );
-	}
-
-	/**
-	 * Provides a generic implementation of the dot computation on semirings by
-	 * translating it into a dot computation on an additive commutative monoid
-	 * with any multiplicative operator.
-	 *
-	 * For return codes, exception behaviour, performance semantics, template
-	 * and non-template arguments, @see alp::dot.
-	 */
-	template<
-		Descriptor descr = descriptors::no_operation, class Ring,
-		typename IOType, typename InputType1, typename InputType2,
-		Backend backend
-	>
-	RC dot( IOType &x,
-		const internal::Vector< InputType1, backend > &left,
-		const internal::Vector< InputType2, backend > &right,
-		const Ring &ring = Ring(),
-		const typename std::enable_if<
-			!alp::is_object< InputType1 >::value &&
-			!alp::is_object< InputType2 >::value &&
-			!alp::is_object< IOType >::value &&
-			alp::is_semiring< Ring >::value,
-		void >::type * const = NULL
-	) {
-		return alp::dot< descr >( x,
-			left, right,
-			ring.getAdditiveMonoid(),
-			ring.getMultiplicativeOperator()
-		);
-	}
-
-	/**
-	 * Provides a generic implementation of the 2-norm computation.
-	 *
-	 * Proceeds by computing a dot-product on itself and then taking the square
-	 * root of the result.
-	 *
-	 * This function is only available when the output type is floating point.
-	 *
-	 * For return codes, exception behaviour, performance semantics, template
-	 * and non-template arguments, @see alp::dot.
-	 *
-	 * @param[out] x The 2-norm of \a y. The input value of \a x will be ignored.
-	 * @param[in]  y The vector to compute the norm of.
-	 * @param[in] ring The Semiring under which the 2-norm is to be computed.
-	 *
-	 * \warning This function computes \a x out-of-place. This is contrary to
-	 *          standard ALP/GraphBLAS functions that are always in-place.
-	 *
-	 * \warning A \a ring is not sufficient for computing a two-norm. This
-	 *          implementation assumes the standard <tt>sqrt</tt> function
-	 *          must be applied on the result of a dot-product of \a y with
-	 *          itself under the supplied semiring.
-	 */
-	// template<
-	// 	Descriptor descr = descriptors::no_operation, class Ring,
-	// 	typename InputType, typename OutputType, typename OutputStructure,
-	// 	Backend backend
-	// >
-	// RC norm2( Scalar< OutputType, OutputStructure, backend > &x,
-	// 	const internal::Vector< InputType, backend > &y,
-	// 	const Ring &ring = Ring(),
-	// 	const typename std::enable_if<
-	// 		std::is_floating_point< OutputType >::value,
-	// 	void >::type * const = NULL
-	// ) {
-	// 	RC ret = alp::dot< descr >( x, y, y, ring );
-	// 	if( ret == SUCCESS ) {
-	// 		x = sqrt( x );
-	// 	}
-	// 	return ret;
-	// }
-
-	/** Specialization for C++ scalars */
-	template<
-		Descriptor descr = descriptors::no_operation, class Ring,
-		typename InputType, typename OutputType,
-		Backend backend
-	>
-	RC norm2( OutputType &x,
-		const internal::Vector< InputType, backend > &y,
-		const Ring &ring = Ring(),
-		const typename std::enable_if<
-			std::is_floating_point< OutputType >::value,
-		void >::type * const = NULL
-	) {
-		RC ret = alp::dot< descr >( x, y, y, ring );
-		if( ret == SUCCESS ) {
-			x = sqrt( x );
-		}
-		return ret;
-	}
-
-
-} // namespace alp
-
-#undef NO_CAST_RING_ASSERT
-
 #endif // end ``_H_ALP_BLAS1''
 

From ae55714b86377c2cd692988b7f739ba61d97f807 Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Fri, 21 Oct 2022 17:07:08 +0200
Subject: [PATCH 03/23] Fix the way NO_CAST_ASSERT and NO_CAST_OP_ASSERT are
 defined

---
 include/alp/reference/blas0.hpp |  4 ++--
 include/alp/reference/blas1.hpp |  2 --
 include/alp/reference/blas3.hpp | 26 ++++++++++++++++++++++++--
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/include/alp/reference/blas0.hpp b/include/alp/reference/blas0.hpp
index 90ce16107..ac9c4e249 100644
--- a/include/alp/reference/blas0.hpp
+++ b/include/alp/reference/blas0.hpp
@@ -32,7 +32,6 @@
 #include <alp/type_traits.hpp>
 #include <alp/scalar.hpp>
 
-#ifndef NO_CAST_ASSERT
 #define NO_CAST_ASSERT( x, y, z )                                              \
 	static_assert( x,                                                          \
 		"\n\n"                                                                 \
@@ -49,7 +48,6 @@
 		"********************************************************************" \
 		"********************************************************************" \
 		"******************************\n" );
-#endif
 
 namespace alp {
 
@@ -373,5 +371,7 @@ namespace alp {
 	
 } // end namespace ``alp''
 
+#undef NO_CAST_ASSERT
+
 #endif // end ``_H_ALP_REFERENCE_BLAS0''
 
diff --git a/include/alp/reference/blas1.hpp b/include/alp/reference/blas1.hpp
index 552a78cc2..8e6a0709c 100644
--- a/include/alp/reference/blas1.hpp
+++ b/include/alp/reference/blas1.hpp
@@ -35,7 +35,6 @@
 #include "blas2.hpp"
 #include <graphblas/utils/iscomplex.hpp> // use from grb
 
-#ifndef NO_CAST_ASSERT
 #define NO_CAST_ASSERT( x, y, z )                                              \
 	static_assert( x,                                                          \
 		"\n\n"                                                                 \
@@ -52,7 +51,6 @@
 		"********************************************************************" \
 		"********************************************************************" \
 		"******************************\n" );
-#endif
 
 #define NO_CAST_OP_ASSERT( x, y, z )                                           \
 	static_assert( x,                                                          \
diff --git a/include/alp/reference/blas3.hpp b/include/alp/reference/blas3.hpp
index 1fee9ff6c..71c1f9464 100644
--- a/include/alp/reference/blas3.hpp
+++ b/include/alp/reference/blas3.hpp
@@ -36,7 +36,6 @@
 #include "matrix.hpp"
 #include "vector.hpp"
 
-#ifndef NO_CAST_ASSERT
 #define NO_CAST_ASSERT( x, y, z )                                              \
 	static_assert( x,                                                          \
 		"\n\n"                                                                 \
@@ -59,7 +58,29 @@
 		"********************************************************************" \
 		"********************************************************************" \
 		"******************************\n" );
-#endif
+
+#define NO_CAST_OP_ASSERT( x, y, z )                                           \
+	static_assert( x,                                                          \
+		"\n\n"                                                                 \
+		"********************************************************************" \
+		"********************************************************************" \
+		"******************************\n"                                     \
+		"*     ERROR      | " y " " z ".\n"                                    \
+		"********************************************************************" \
+		"********************************************************************" \
+		"******************************\n"                                     \
+		"* Possible fix 1 | Remove no_casting from the template parameters "   \
+		"in this call to " y ".\n"                                             \
+		"* Possible fix 2 | For all mismatches in the domains of input "       \
+		"parameters and the operator domains, as specified in the "            \
+		"documentation of the function " y ", supply an input argument of "    \
+		"the expected type instead.\n"                                         \
+		"* Possible fix 3 | Provide a compatible operator where all domains "  \
+		"match those of the input parameters, as specified in the "            \
+		"documentation of the function " y ".\n"                               \
+		"********************************************************************" \
+		"********************************************************************" \
+		"******************************\n" );
 
 namespace alp {
 	namespace internal {
@@ -1658,6 +1679,7 @@ namespace alp {
 } // end namespace ``alp''
 
 #undef NO_CAST_ASSERT
+#undef NO_CAST_OP_ASSERT
 
 #endif // end ``_H_ALP_REFERENCE_BLAS3''
 

From 7cf7a466cb07cf76b29b0f3fd492dc0f6a5cf86f Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Fri, 21 Oct 2022 17:10:47 +0200
Subject: [PATCH 04/23] Add missing includes

---
 include/alp/reference/blas2.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/alp/reference/blas2.hpp b/include/alp/reference/blas2.hpp
index 4da796d0d..ac1c4b34c 100644
--- a/include/alp/reference/blas2.hpp
+++ b/include/alp/reference/blas2.hpp
@@ -29,6 +29,7 @@
 #include <alp/config.hpp>
 #include <alp/rc.hpp>
 #include <alp/matrix.hpp>
+#include <alp/blas3.hpp>
 #include <graphblas/utils/iscomplex.hpp>
 
 #define NO_CAST_OP_ASSERT( x, y, z )                                           \

From 7383b0dbb3fbd611189ed68025cd408925d20618 Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Fri, 21 Oct 2022 17:33:11 +0200
Subject: [PATCH 05/23] Move resize to io.hpp

---
 include/alp/base/io.hpp         |  39 ++++++++++
 include/alp/reference/blas0.hpp |  40 -----------
 include/alp/reference/blas1.hpp |  38 ----------
 include/alp/reference/blas2.hpp |  38 ----------
 include/alp/reference/io.hpp    | 124 ++++++++++++++++++++++++++++++++
 5 files changed, 163 insertions(+), 116 deletions(-)

diff --git a/include/alp/base/io.hpp b/include/alp/base/io.hpp
index 75a67dd0c..d82858a87 100644
--- a/include/alp/base/io.hpp
+++ b/include/alp/base/io.hpp
@@ -71,6 +71,45 @@ namespace alp {
 		return PANIC;
 	}
 
+	/**
+	 * Resizes the Scalar to have at least the given number of nonzeroes.
+	 * The contents of the scalar are not retained.
+	 */
+	template< typename InputType, typename InputStructure, typename length_type >
+	RC resize( Scalar< InputType, InputStructure, reference > &s, const length_type new_nz ) {
+		(void) s;
+		(void) new_nz;
+		return PANIC;
+	}
+
+	/**
+	 * Resizes the vector to have at least the given number of nonzeroes.
+	 * The contents of the vector are not retained.
+	 */
+	template< typename InputType, typename InputStructure, typename View, typename ImfR, typename ImfC, typename length_type >
+	RC resize(
+		Vector< InputType, InputStructure, Density::Dense, View, ImfR, ImfC, reference > &x,
+		const length_type new_nz
+	) noexcept {
+		(void) x;
+		(void) new_nz;
+		return PANIC;
+	}
+
+	/**
+	 * Resizes the matrix to have at least the given number of nonzeroes.
+	 * The contents of the matrix are not retained.
+	 */
+	template< typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC >
+	RC resize(
+		Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &A,
+		const size_t new_nz
+	) noexcept {
+		(void) A;
+		(void) new_nz;
+		return PANIC;
+	}
+
 	/**
 	 * Constructs a dense vector from a container of exactly alp::size(x)
 	 * elements. This function aliases to the buildVector routine that takes
diff --git a/include/alp/reference/blas0.hpp b/include/alp/reference/blas0.hpp
index ac9c4e249..bc07e948b 100644
--- a/include/alp/reference/blas0.hpp
+++ b/include/alp/reference/blas0.hpp
@@ -185,46 +185,6 @@ namespace alp {
 	 * @{
 	 */
 
-	/** Resizes the Scalar to have at least the given number of nonzeroes.
-	 * The contents of the scalar are not retained.
-	 *
-	 * Resizing of dense containers is not allowed as the capacity is determined
-	 * by the container dimensions and the storage scheme. Therefore, this
-	 * function will not change the capacity of the container.
-	 * 
-	 * The resize function for Scalars exist to maintain compatibility with
-	 * other containers (i.e., vector and matrix).
-	 *
-	 * Even though the capacity remains unchanged, the contents of the scalar
-	 * are not retained to maintain compatibility with the general specification.
-	 * However, the actual memory will not be reallocated. Rather, the scalar
-	 * will be marked as uninitialized.
-	 * 
-	 * @param[in] x      The Scalar to be resized.
-	 * @param[in] new_nz The number of nonzeroes this vector is to contain.
-	 *
-	 * @return SUCCESS   If \a new_nz is not larger than 1.
-	 *         ILLEGAL   If \a new_nz is larger than 1.
-	 *
-	 * \parblock
-	 * \par Performance semantics.
-	 *        -$ This function consitutes \f$ \Theta(1) \f$ work.
-	 *        -# This function allocates \f$ \Theta(0) \f$
-	 *           bytes of dynamic memory.
-	 *        -# This function does not make system calls.
-	 * \endparblock
-	 * \todo add documentation. In particular, think about the meaning with \a P > 1.
-	 */
-	template< typename InputType, typename InputStructure, typename length_type >
-	RC resize( Scalar< InputType, InputStructure, reference > &s, const length_type new_nz ) {
-		if( new_nz <= 1 ) {
-			setInitialized( s, false );
-			return SUCCESS;
-		} else {
-			return ILLEGAL;
-		}
-	}
-
 	/**
 	 * @brief Reference implementation of \a apply.
 	 */
diff --git a/include/alp/reference/blas1.hpp b/include/alp/reference/blas1.hpp
index 8e6a0709c..c29f6f490 100644
--- a/include/alp/reference/blas1.hpp
+++ b/include/alp/reference/blas1.hpp
@@ -233,44 +233,6 @@ namespace alp {
 		return 0;
 	}
 
-	/** Resizes the vector to have at least the given number of nonzeroes.
-	 * The contents of the vector are not retained.
-	 *
-	 * Resizing of dense containers is not allowed as the capacity is determined
-	 * by the container dimensions and the storage scheme. Therefore, this
-	 * function will not change the capacity of the vector.
-	 *
-	 * Even though the capacity remains unchanged, the contents of the vector
-	 * are not retained to maintain compatibility with the general specification.
-	 * However, the actual memory will not be reallocated. Rather, the vector
-	 * will be marked as uninitialized.
-	 *
-	 * @param[in] x      The Vector to be resized.
-	 * @param[in] new_nz The number of nonzeroes this vector is to contain.
-	 *
-	 * @return SUCCESS   If \a new_nz is not larger than the current capacity
-	 *                   of the vector.
-	 *         ILLEGAL   If \a new_nz is larger than the current capacity of
-	 *                   the vector.
-	 *
-	 * \parblock
-	 * \par Performance semantics.
-	 *        -$ This function consitutes \f$ \Theta(1) \f$ work.
-	 *        -# This function allocates \f$ \Theta(0) \f$
-	 *           bytes of dynamic memory.
-	 *        -# This function does not make system calls.
-	 * \endparblock
-	 * \todo add documentation. In particular, think about the meaning with \a P > 1.
-	 */
-	template< typename InputType, typename InputStructure, typename View, typename ImfR, typename ImfC, typename length_type >
-	RC resize( Vector< InputType, InputStructure, Density::Dense, View, ImfR, ImfC, reference > &x, const length_type new_nz ) {
-		(void)x;
-		(void)new_nz;
-		// TODO implement
-		// setInitialized( x, false );
-		return PANIC;
-	}
-
 	/**
 	 * Sets all elements of a Vector to the given value. Can be masked.
 	 *
diff --git a/include/alp/reference/blas2.hpp b/include/alp/reference/blas2.hpp
index ac1c4b34c..37649ff0a 100644
--- a/include/alp/reference/blas2.hpp
+++ b/include/alp/reference/blas2.hpp
@@ -83,44 +83,6 @@ namespace alp {
 		return A.nz;
 	}
 
-	/**
-	 * Resizes the matrix to have at least the given number of nonzeroes.
-	 * The contents of the matrix are not retained.
-	 *
-	 * Resizing of dense containers is not allowed as the capacity is determined
-	 * by the container dimensions and the storage scheme. Therefore, this
-	 * function will not change the capacity of the matrix.
-	 *
-	 * Even though the capacity remains unchanged, the contents of the matrix
-	 * are not retained to maintain compatibility with the general specification.
-	 * However, the actual memory will not be reallocated. Rather, the matrix
-	 * will be marked as uninitialized.
-	 *
-	 * @param[in] A         The matrix to be resized.
-	 * @param[in] nonzeroes The number of nonzeroes this matrix is to contain.
-	 *
-	 * @return SUCCESS   If \a new_nz is not larger than the current capacity
-	 *                   of the matrix.
-	 *         ILLEGAL   If \a new_nz is larger than the current capacity of
-	 *                   the matrix.
-	 *
-	 * \parblock
-	 * \par Performance semantics.
-	 *        -$ This function consitutes \f$ \Theta(1) \f$ work.
-	 *        -# This function allocates \f$ \Theta(0) \f$
-	 *           bytes of dynamic memory.
-	 *        -# This function does not make system calls.
-	 * \endparblock
-	 */
-	template< typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC >
-	RC resize( Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &A, const size_t new_nz ) noexcept {
-		(void)A;
-		(void)new_nz;
-		// TODO implement
-		// setInitialized( A, false );
-		return PANIC;
-	}
-
 	/** \internal Delegates to fully masked variant */
 	template< Descriptor descr = descriptors::no_operation,
 		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
diff --git a/include/alp/reference/io.hpp b/include/alp/reference/io.hpp
index c89c73530..e3bb9ad73 100644
--- a/include/alp/reference/io.hpp
+++ b/include/alp/reference/io.hpp
@@ -60,6 +60,130 @@ namespace alp {
 		return SUCCESS;
 	}
 
+	/**
+	 * Resizes the Scalar to have at least the given number of nonzeroes.
+	 * The contents of the scalar are not retained.
+	 *
+	 * Resizing of dense containers is not allowed as the capacity is determined
+	 * by the container dimensions and the storage scheme. Therefore, this
+	 * function will not change the capacity of the container.
+	 *
+	 * The resize function for Scalars exist to maintain compatibility with
+	 * other containers (i.e., vector and matrix).
+	 *
+	 * Even though the capacity remains unchanged, the contents of the scalar
+	 * are not retained to maintain compatibility with the general specification.
+	 * However, the actual memory will not be reallocated. Rather, the scalar
+	 * will be marked as uninitialized.
+	 *
+	 * @param[in] x      The Scalar to be resized.
+	 * @param[in] new_nz The number of nonzeroes this vector is to contain.
+	 *
+	 * @return SUCCESS   If \a new_nz is not larger than 1.
+	 *         ILLEGAL   If \a new_nz is larger than 1.
+	 *
+	 * \parblock
+	 * \par Performance semantics.
+	 *        -$ This function consitutes \f$ \Theta(1) \f$ work.
+	 *        -# This function allocates \f$ \Theta(0) \f$
+	 *           bytes of dynamic memory.
+	 *        -# This function does not make system calls.
+	 * \endparblock
+	 * \todo add documentation. In particular, think about the meaning with \a P > 1.
+	 */
+	template< typename InputType, typename InputStructure, typename length_type >
+	RC resize( Scalar< InputType, InputStructure, reference > &s, const length_type new_nz ) noexcept {
+		if( new_nz <= 1 ) {
+			setInitialized( s, false );
+			return SUCCESS;
+		} else {
+			return ILLEGAL;
+		}
+	}
+
+	/**
+	 * Resizes the vector to have at least the given number of nonzeroes.
+	 * The contents of the vector are not retained.
+	 *
+	 * Resizing of dense containers is not allowed as the capacity is determined
+	 * by the container dimensions and the storage scheme. Therefore, this
+	 * function will not change the capacity of the vector.
+	 *
+	 * Even though the capacity remains unchanged, the contents of the vector
+	 * are not retained to maintain compatibility with the general specification.
+	 * However, the actual memory will not be reallocated. Rather, the vector
+	 * will be marked as uninitialized.
+	 *
+	 * @param[in] x      The Vector to be resized.
+	 * @param[in] new_nz The number of nonzeroes this vector is to contain.
+	 *
+	 * @return SUCCESS   If \a new_nz is not larger than the current capacity
+	 *                   of the vector.
+	 *         ILLEGAL   If \a new_nz is larger than the current capacity of
+	 *                   the vector.
+	 *
+	 * \parblock
+	 * \par Performance semantics.
+	 *        -$ This function consitutes \f$ \Theta(1) \f$ work.
+	 *        -# This function allocates \f$ \Theta(0) \f$
+	 *           bytes of dynamic memory.
+	 *        -# This function does not make system calls.
+	 * \endparblock
+	 * \todo add documentation. In particular, think about the meaning with \a P > 1.
+	 */
+	template< typename InputType, typename InputStructure, typename View, typename ImfR, typename ImfC, typename length_type >
+	RC resize(
+		Vector< InputType, InputStructure, Density::Dense, View, ImfR, ImfC, reference > &x,
+		const length_type new_nz
+	) noexcept {
+		(void) x;
+		(void) new_nz;
+		// \todo Add implementation.
+		// setInitialized( x, false );
+		return PANIC;
+	}
+
+	/**
+	 * Resizes the matrix to have at least the given number of nonzeroes.
+	 * The contents of the matrix are not retained.
+	 *
+	 * Resizing of dense containers is not allowed as the capacity is determined
+	 * by the container dimensions and the storage scheme. Therefore, this
+	 * function will not change the capacity of the matrix.
+	 *
+	 * Even though the capacity remains unchanged, the contents of the matrix
+	 * are not retained to maintain compatibility with the general specification.
+	 * However, the actual memory will not be reallocated. Rather, the matrix
+	 * will be marked as uninitialized.
+	 *
+	 * @param[in] A         The matrix to be resized.
+	 * @param[in] nonzeroes The number of nonzeroes this matrix is to contain.
+	 *
+	 * @return SUCCESS   If \a new_nz is not larger than the current capacity
+	 *                   of the matrix.
+	 *         ILLEGAL   If \a new_nz is larger than the current capacity of
+	 *                   the matrix.
+	 *
+	 * \parblock
+	 * \par Performance semantics.
+	 *        -$ This function consitutes \f$ \Theta(1) \f$ work.
+	 *        -# This function allocates \f$ \Theta(0) \f$
+	 *           bytes of dynamic memory.
+	 *        -# This function does not make system calls.
+	 * \endparblock
+	 */
+	template< typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC >
+	RC resize(
+		Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &A,
+		const size_t new_nz
+	) noexcept {
+		(void) A;
+		(void) new_nz;
+		// \todo Add implementation.
+		// setInitialized( A, false );
+		return PANIC;
+	}
+
 	/**
 	 * Assigns elements to a matrix from an iterator.
 	 *

From 1f83d0ddaa2d50342d4f51c8fc5e1b0afb39e9dc Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Fri, 21 Oct 2022 17:43:13 +0200
Subject: [PATCH 06/23] Move set operations to io.hpp

---
 include/alp/base/io.hpp         |  85 +++++++
 include/alp/reference/blas1.hpp | 256 ---------------------
 include/alp/reference/blas3.hpp | 116 ----------
 include/alp/reference/io.hpp    | 378 ++++++++++++++++++++++++++++++++
 4 files changed, 463 insertions(+), 372 deletions(-)

diff --git a/include/alp/base/io.hpp b/include/alp/base/io.hpp
index d82858a87..0dec80ef9 100644
--- a/include/alp/base/io.hpp
+++ b/include/alp/base/io.hpp
@@ -110,6 +110,91 @@ namespace alp {
 		return PANIC;
 	}
 
+	/**
+	 * Sets all elements of a Vector to the given value.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename DataStructure, typename View,
+		typename ImfR, typename ImfC,
+		typename T, typename ValStructure,
+		Backend backend
+	>
+	RC set(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
+		const Scalar< T, ValStructure, reference > val,
+		const typename std::enable_if<
+			!alp::is_object< DataType >::value &&
+			!alp::is_object< T >::value,
+		void >::type * const = NULL
+	) {
+		return PANIC;
+	}
+
+	/**
+	 * Sets the element of a given Vector at a given position to a given value.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC, typename ValStructure,
+		typename T
+	>
+	RC setElement(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x,
+		const Scalar< T, ValStructure, reference > val,
+		const size_t i,
+		const typename std::enable_if< ! alp::is_object< DataType >::value && ! alp::is_object< T >::value, void >::type * const = NULL
+	) {
+		return PANIC;
+	}
+
+	/** C++ scalar variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC,
+		typename T
+	>
+	RC setElement(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x,
+		const T val,
+		const size_t i,
+		const typename std::enable_if< ! alp::is_object< DataType >::value && ! alp::is_object< T >::value, void >::type * const = NULL
+	) {
+		return PANIC;
+	}
+
+	/**
+	 * Sets all elements of the output matrix to the values of the input matrix.
+	 * C = A
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		Backend backend
+	>
+	RC set(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &A
+	) noexcept {
+		return PANIC;
+	}
+
+	/**
+	 * Sets all elements of the given matrix to the value of the given scalar.
+	 * C = val
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType, typename InputStructure,
+		Backend backend
+	>
+	RC set(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Scalar< InputType, InputStructure, backend > &val
+	) noexcept {
+		return PANIC;
+	}
+
 	/**
 	 * Constructs a dense vector from a container of exactly alp::size(x)
 	 * elements. This function aliases to the buildVector routine that takes
diff --git a/include/alp/reference/blas1.hpp b/include/alp/reference/blas1.hpp
index c29f6f490..b4f265d98 100644
--- a/include/alp/reference/blas1.hpp
+++ b/include/alp/reference/blas1.hpp
@@ -233,262 +233,6 @@ namespace alp {
 		return 0;
 	}
 
-	/**
-	 * Sets all elements of a Vector to the given value. Can be masked.
-	 *
-	 * This function is functionally equivalent to
-	 * \code
-	 * alp::operators::right_assign< DataType > op;
-	 * return foldl< descr >( x, val, op );
-	 * \endcode,
-	 * \code
-	 * alp::operators::left_assign< DataType > op;
-	 * return foldr< descr >( val, x, op );
-	 * \endcode, and the following pseudocode
-	 * \code
-	 * for( size_t i = 0; i < size(x); ++i ) {
-	 *     if( mask(i) ) { setElement( x, i, val ); }
-	 * \endcode.
-	 *
-	 * @tparam descr         The descriptor used for this operation.
-	 * @tparam DataType      The type of each element in the vector \a x.
-	 * @tparam DataStructure The structure of the vector \a x.
-	 * @tparam View          The view type applied to the vector \a x.
-	 * @tparam T             The type of the given value.
-	 *
-	 * \parblock
-	 * \par Accepted descriptors
-	 *   -# alp::descriptors::no_operation
-	 *   -# alp::descriptors::no_casting
-	 * \endparblock
-	 *
-	 * @param[in,out] x The Vector of which every element is to be set to equal
-	 *                  \a val.
-	 * @param[in]   val The value to set each element of \a x equal to.
-	 *
-	 * @returns SUCCESS       When the call completes successfully.
-	 *
-	 * When \a descr includes alp::descriptors::no_casting and if \a T does not
-	 * match \a DataType, the code shall not compile.
-	 *
-	//  * \parblock
-	//  * \par Performance semantics
-	//  * A call to this function
-	//  *   -# consists of \f$ \Theta(n) \f$ work;
-	//  *   -# moves \f$ \Theta(n) \f$ bytes of memory;
-	//  *   -# does not allocate nor free any dynamic memory;
-	//  *   -# shall not make any system calls.
-	//  * \endparblock
-	 *
-	 * @see alp::foldl.
-	 * @see alp::foldr.
-	 * @see alp::operators::left_assign.
-	 * @see alp::operators::right_assign.
-	 * @see alp::setElement.
-	 */
-	template<
-		Descriptor descr = descriptors::no_operation,
-		typename DataType, typename DataStructure, typename View,
-		typename ImfR, typename ImfC,
-		typename T, typename ValStructure
-	>
-	RC set(
-		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x,
-		const Scalar< T, ValStructure, reference > val,
-		const typename std::enable_if<
-			!alp::is_object< DataType >::value &&
-			!alp::is_object< T >::value,
-		void >::type * const = NULL
-	) {
-		// static sanity checks
-		NO_CAST_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, unmasked)",
-			"called with a value type that does not match that of the given "
-			"vector" );
-
-		if( ! internal::getInitialized( val ) ) {
-			internal::setInitialized( x, false );
-			return SUCCESS;
-		}
-
-		// foldl requires left-hand side to be initialized prior to the call
-		internal::setInitialized( x, true );
-		return foldl( x, val, alp::operators::right_assign< DataType >() );
-	}
-
-	/**
-	 * Sets the element of a given Vector at a given position to a given value.
-	 *
-	 * If the input Vector \a x already has an element \f$ x_i \f$, that element
-	 * is overwritten to the given value \a val. If no such element existed, it
-	 * is added and set equal to \a val. The number of nonzeroes in \a x may thus
-	 * be increased by one due to a call to this function.
-	 *
-	 * The parameter \a i may not be greater or equal than the size of \a x.
-	 *
-	 * @tparam descr         The descriptor to be used during evaluation of this
-	 *                       function.
-	 * @tparam DataType      The type of the elements of \a x.
-	 * @tparam DataStructure The structure of the vector \a x.
-	 * @tparam View          The view type applied to the vector \a x.
-	 * @tparam T             The type of the value to be set.
-	 *
-	 * @param[in,out] x The vector to be modified.
-	 * @param[in]   val The value \f$ x_i \f$ should read after function exit.
-	 * @param[in]     i The index of the element of \a x to set.
-	 *
-	 * @return alp::SUCCESS   Upon successful execution of this operation.
-	 * @return alp::MISMATCH  If \a i is greater or equal than the dimension of
-	 *                        \a x.
-	 *
-	 * \parblock
-	 * \par Accepted descriptors
-	 *   -# alp::descriptors::no_operation
-	 *   -# alp::descriptors::no_casting
-	 * \endparblock
-	 *
-	 * When \a descr includes alp::descriptors::no_casting and if \a T does not
-	 * match \a DataType, the code shall not compile.
-	 *
-	//  * \parblock
-	//  * \par Performance semantics
-	//  * A call to this function
-	//  *   -# consists of \f$ \Theta(1) \f$ work;
-	//  *   -# moves \f$ \Theta(1) \f$ bytes of memory;
-	//  *   -# does not allocate nor free any dynamic memory;
-	//  *   -# shall not make any system calls.
-	//  * \endparblock
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC, typename ValStructure,
-		typename T
-	>
-	RC setElement( Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x,
-		const Scalar< T, ValStructure, reference > val,
-		const size_t i,
-		const typename std::enable_if< ! alp::is_object< DataType >::value && ! alp::is_object< T >::value, void >::type * const = NULL ) {
-		// static sanity checks
-		NO_CAST_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, at index)",
-			"called with a value type that does not match that of the given "
-			"Vector" );
-
-		throw std::runtime_error( "Needs an implementation." );
-
-		// done
-		return SUCCESS;
-	}
-
-	/** C++ scalar variant */
-	template< Descriptor descr = descriptors::no_operation,
-		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC,
-		typename T
-	>
-	RC setElement( Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x,
-		const T val,
-		const size_t i,
-		const typename std::enable_if< ! alp::is_object< DataType >::value && ! alp::is_object< T >::value, void >::type * const = NULL ) {
-		// static sanity checks
-		NO_CAST_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, at index)",
-			"called with a value type that does not match that of the given "
-			"Vector" );
-
-		// delegate
-		return setElement( x, Scalar< T >( val ), i );
-	}
-
-	/**
-	 * Sets the content of a given vector \a x to be equal to that of
-	 * another given vector \a y. Can be masked.
-	 *
-	 * This operation is functionally equivalent to
-	 * \code
-	 * alp::operators::right_assign< T > op;
-	 * alp::foldl( x, y, op );
-	 * \endcode,
-	 * \code
-	 * alp::operators::left_assign < T > op;
-	 * alp::foldr( y, x, op );
-	 * \endcode, as well as the following pseudocode
-	 * \code
-	 * for( each nonzero in y ) {
-	 *    setElement( x, nonzero.index, nonzero.value );
-	 * }
-	 * \endcode.
-	 *
-	 * The vector \a x may not equal \a y.
-	 *
-	 * \parblock
-	 * \par Accepted descriptors
-	 *   -# alp::descriptors::no_operation
-	 *   -# alp::descriptors::no_casting
-	 * \endparblock
-	 *
-	 * @tparam descr           The descriptor of the operation.
-	 * @tparam OutputType      The type of each element in the output vector.
-	 * @tparam InputType       The type of each element in the input vector.
-	 * @tparam OutputStructure The structure of the ouput vector.
-	 * @tparam InputStructure  The structure of the input vector.
-	 * @tparam OuputView       The view applied to the output vector.
-	 * @tparam InputView       The view applied to the input vector.
-	 *
-	 * @param[in,out] x The vector to be set.
-	 * @param[in]     y The source vector.
-	 *
-	 * When \a descr includes alp::descriptors::no_casting and if \a InputType
-	 * does not match \a OutputType, the code shall not compile.
-	 *
-	//  * \parblock
-	//  * \par Performance semantics
-	//  * A call to this function
-	//  *   -# consists of \f$ \Theta(n) \f$ work;
-	//  *   -# moves \f$ \Theta(n) \f$ bytes of memory;
-	//  *   -# does not allocate nor free any dynamic memory;
-	//  *   -# shall not make any system calls.
-	//  * \endparblock
-	 *
-	 * @see alp::foldl.
-	 * @see alp::foldr.
-	 * @see alp::operators::left_assign.
-	 * @see alp::operators::right_assign.
-	 * @see alp::setElement.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC
-	>
-	RC set(
-		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & x,
-		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > & y
-	) {
-		// static sanity checks
-		NO_CAST_ASSERT(
-			( ! ( descr & descriptors::no_casting ) || std::is_same< OutputType, InputType >::value ), "alp::copy (Vector)", "called with vector parameters whose element data types do not match" );
-		constexpr bool out_is_void = std::is_void< OutputType >::value;
-		constexpr bool in_is_void = std::is_void< OutputType >::value;
-		static_assert( ! in_is_void || out_is_void,
-			"alp::set (reference, Vector <- Vector, masked): "
-			"if input is void, then the output must be also" );
-		static_assert( ! ( descr & descriptors::use_index ) || ! out_is_void,
-			"alp::set (reference, Vector <- Vector, masked): "
-			"use_index descriptor cannot be set if output vector is void" );
-
-		// check contract
-		if( reinterpret_cast< void * >( &x ) == reinterpret_cast< const void * >( &y ) ) {
-			return ILLEGAL;
-		}
-
-		if( getLength( x ) != getLength( y ) ) {
-			return MISMATCH;
-		}
-
-		if( !internal::getInitialized( y ) ) {
-			setInitialized( x, false );
-			return SUCCESS;
-		}
-
-		internal::setInitialized( x, true );
-		return foldl( x, y, alp::operators::right_assign< OutputType >() );
-	}
-
 	/**
 	 * Folds all elements in a ALP Vector \a x into a single value \a beta.
 	 *
diff --git a/include/alp/reference/blas3.hpp b/include/alp/reference/blas3.hpp
index 71c1f9464..30af8de39 100644
--- a/include/alp/reference/blas3.hpp
+++ b/include/alp/reference/blas3.hpp
@@ -1560,122 +1560,6 @@ namespace alp {
 
 	}
 
-	/**
-	 * Sets all elements of the output matrix to the values of the input matrix.
-	 * C = A
-	 * 
-	 * @tparam descr
-	 * @tparam OutputType      Data type of the output matrix C
-	 * @tparam OutputStructure Structure of the matrix C
-	 * @tparam OutputView      View type applied to the matrix C
-	 * @tparam InputType       Data type of the scalar a
-	 *
-	 * @param C    Matrix whose values are to be set
-	 * @param A    The input matrix
-	 *
-	 * @return RC  SUCCESS on the successful execution of the set
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC
-	>
-	RC set(
-		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &C,
-		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &A
-	) noexcept {
-		static_assert(
-			!std::is_same< OutputType, void >::value,
-			"alp::set (set to value): cannot have a pattern matrix as output"
-		);
-#ifdef _DEBUG
-		std::cout << "Called alp::set (matrix-to-matrix, reference)" << std::endl;
-#endif
-		// static checks
-		NO_CAST_ASSERT(
-			( !( descr & descriptors::no_casting ) || std::is_same< InputType, OutputType >::value ),
-			"alp::set", "called with non-matching value types"
-		);
-
-		static_assert(
-			!internal::is_functor_based<
-				Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference >
-			>::value,
-			"alp::set cannot be called with a functor-based matrix as a destination."
-		);
-
-		// TODO: Improve this check to account for non-zero structrue (i.e., bands)
-		//       and algebraic properties (e.g., symmetry)
-		static_assert(
-			std::is_same< OutputStructure, InputStructure >::value,
-			"alp::set cannot be called for containers with different structures."
-		);
-
-		if( ( nrows( C ) != nrows( A ) ) || ( ncols( C ) != ncols( A ) ) ) {
-			return MISMATCH;
-		}
-
-		if( !internal::getInitialized( A ) ) {
-			internal::setInitialized( C, false );
-			return SUCCESS;
-		}
-
-		internal::setInitialized( C, true );
-		return foldl( C, A, alp::operators::right_assign< OutputType >() );
-	}
-
-	/**
-	 * Sets all elements of the given matrix to the value of the given scalar.
-	 * C = val
-	 * 
-	 * @tparam descr
-	 * @tparam OutputType      Data type of the output matrix C
-	 * @tparam OutputStructure Structure of the matrix C
-	 * @tparam OutputView      View type applied to the matrix C
-	 * @tparam InputType       Data type of the scalar a
-	 *
-	 * @param C    Matrix whose values are to be set
-	 * @param val  The value to set the elements of the matrix C
-	 *
-	 * @return RC  SUCCESS on the successful execution of the set
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename InputType, typename InputStructure
-	>
-	RC set(
-		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &C,
-		const Scalar< InputType, InputStructure, reference > &val
-	) noexcept {
-
-		static_assert(
-			!std::is_same< OutputType, void >::value,
-			"alp::set (set to matrix): cannot have a pattern matrix as output"
-		);
-#ifdef _DEBUG
-		std::cout << "Called alp::set (matrix-to-value, reference)" << std::endl;
-#endif
-		// static checks
-		NO_CAST_ASSERT(
-			( !( descr & descriptors::no_casting ) || std::is_same< InputType, OutputType >::value ),
-			"alp::set", "called with non-matching value types"
-		);
-
-		static_assert(
-			!internal::is_functor_based<
-				Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference >
-			>::value,
-			"alp::set cannot be called with a functor-based matrix as a destination."
-		);
-
-		if( !internal::getInitialized( val ) ) {
-			internal::setInitialized( C, false );
-			return SUCCESS;
-		}
-
-		internal::setInitialized( C, true );
-		return foldl( C, val, alp::operators::right_assign< OutputType >() );
-	}
-
 } // end namespace ``alp''
 
 #undef NO_CAST_ASSERT
diff --git a/include/alp/reference/io.hpp b/include/alp/reference/io.hpp
index e3bb9ad73..0a915bee3 100644
--- a/include/alp/reference/io.hpp
+++ b/include/alp/reference/io.hpp
@@ -184,6 +184,384 @@ namespace alp {
 		return PANIC;
 	}
 
+	/**
+	 * Sets all elements of a Vector to the given value. Can be masked.
+	 *
+	 * This function is functionally equivalent to
+	 * \code
+	 * alp::operators::right_assign< DataType > op;
+	 * return foldl< descr >( x, val, op );
+	 * \endcode,
+	 * \code
+	 * alp::operators::left_assign< DataType > op;
+	 * return foldr< descr >( val, x, op );
+	 * \endcode, and the following pseudocode
+	 * \code
+	 * for( size_t i = 0; i < size(x); ++i ) {
+	 *     if( mask(i) ) { setElement( x, i, val ); }
+	 * \endcode.
+	 *
+	 * @tparam descr         The descriptor used for this operation.
+	 * @tparam DataType      The type of each element in the vector \a x.
+	 * @tparam DataStructure The structure of the vector \a x.
+	 * @tparam View          The view type applied to the vector \a x.
+	 * @tparam T             The type of the given value.
+	 *
+	 * \parblock
+	 * \par Accepted descriptors
+	 *   -# alp::descriptors::no_operation
+	 *   -# alp::descriptors::no_casting
+	 * \endparblock
+	 *
+	 * @param[in,out] x The Vector of which every element is to be set to equal
+	 *                  \a val.
+	 * @param[in]   val The value to set each element of \a x equal to.
+	 *
+	 * @returns SUCCESS       When the call completes successfully.
+	 *
+	 * When \a descr includes alp::descriptors::no_casting and if \a T does not
+	 * match \a DataType, the code shall not compile.
+	 *
+	//  * \parblock
+	//  * \par Performance semantics
+	//  * A call to this function
+	//  *   -# consists of \f$ \Theta(n) \f$ work;
+	//  *   -# moves \f$ \Theta(n) \f$ bytes of memory;
+	//  *   -# does not allocate nor free any dynamic memory;
+	//  *   -# shall not make any system calls.
+	//  * \endparblock
+	 *
+	 * @see alp::foldl.
+	 * @see alp::foldr.
+	 * @see alp::operators::left_assign.
+	 * @see alp::operators::right_assign.
+	 * @see alp::setElement.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename DataStructure, typename View,
+		typename ImfR, typename ImfC,
+		typename T, typename ValStructure
+	>
+	RC set(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x,
+		const Scalar< T, ValStructure, reference > val,
+		const typename std::enable_if<
+			!alp::is_object< DataType >::value &&
+			!alp::is_object< T >::value,
+		void >::type * const = NULL
+	) {
+		// static sanity checks
+		NO_CAST_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, unmasked)",
+			"called with a value type that does not match that of the given "
+			"vector" );
+
+		if( ! internal::getInitialized( val ) ) {
+			internal::setInitialized( x, false );
+			return SUCCESS;
+		}
+
+		// foldl requires left-hand side to be initialized prior to the call
+		internal::setInitialized( x, true );
+		return foldl( x, val, alp::operators::right_assign< DataType >() );
+	}
+
+	/**
+	 * Sets the element of a given Vector at a given position to a given value.
+	 *
+	 * If the input Vector \a x already has an element \f$ x_i \f$, that element
+	 * is overwritten to the given value \a val. If no such element existed, it
+	 * is added and set equal to \a val. The number of nonzeroes in \a x may thus
+	 * be increased by one due to a call to this function.
+	 *
+	 * The parameter \a i may not be greater or equal than the size of \a x.
+	 *
+	 * @tparam descr         The descriptor to be used during evaluation of this
+	 *                       function.
+	 * @tparam DataType      The type of the elements of \a x.
+	 * @tparam DataStructure The structure of the vector \a x.
+	 * @tparam View          The view type applied to the vector \a x.
+	 * @tparam T             The type of the value to be set.
+	 *
+	 * @param[in,out] x The vector to be modified.
+	 * @param[in]   val The value \f$ x_i \f$ should read after function exit.
+	 * @param[in]     i The index of the element of \a x to set.
+	 *
+	 * @return alp::SUCCESS   Upon successful execution of this operation.
+	 * @return alp::MISMATCH  If \a i is greater or equal than the dimension of
+	 *                        \a x.
+	 *
+	 * \parblock
+	 * \par Accepted descriptors
+	 *   -# alp::descriptors::no_operation
+	 *   -# alp::descriptors::no_casting
+	 * \endparblock
+	 *
+	 * When \a descr includes alp::descriptors::no_casting and if \a T does not
+	 * match \a DataType, the code shall not compile.
+	 *
+	//  * \parblock
+	//  * \par Performance semantics
+	//  * A call to this function
+	//  *   -# consists of \f$ \Theta(1) \f$ work;
+	//  *   -# moves \f$ \Theta(1) \f$ bytes of memory;
+	//  *   -# does not allocate nor free any dynamic memory;
+	//  *   -# shall not make any system calls.
+	//  * \endparblock
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC, typename ValStructure,
+		typename T
+	>
+	RC setElement(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x,
+		const Scalar< T, ValStructure, reference > val,
+		const size_t i,
+		const typename std::enable_if< ! alp::is_object< DataType >::value && ! alp::is_object< T >::value, void >::type * const = NULL
+	) {
+		// static sanity checks
+		NO_CAST_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, at index)",
+			"called with a value type that does not match that of the given "
+			"Vector" );
+
+		throw std::runtime_error( "Needs an implementation." );
+
+		// done
+		return SUCCESS;
+	}
+
+	/** C++ scalar variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC,
+		typename T
+	>
+	RC setElement(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x,
+		const T val,
+		const size_t i,
+		const typename std::enable_if< ! alp::is_object< DataType >::value && ! alp::is_object< T >::value, void >::type * const = NULL
+	) {
+		// static sanity checks
+		NO_CAST_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, at index)",
+			"called with a value type that does not match that of the given "
+			"Vector" );
+
+		// delegate
+		return setElement( x, Scalar< T >( val ), i );
+	}
+
+	/**
+	 * Sets the content of a given vector \a x to be equal to that of
+	 * another given vector \a y. Can be masked.
+	 *
+	 * This operation is functionally equivalent to
+	 * \code
+	 * alp::operators::right_assign< T > op;
+	 * alp::foldl( x, y, op );
+	 * \endcode,
+	 * \code
+	 * alp::operators::left_assign < T > op;
+	 * alp::foldr( y, x, op );
+	 * \endcode, as well as the following pseudocode
+	 * \code
+	 * for( each nonzero in y ) {
+	 *    setElement( x, nonzero.index, nonzero.value );
+	 * }
+	 * \endcode.
+	 *
+	 * The vector \a x may not equal \a y.
+	 *
+	 * \parblock
+	 * \par Accepted descriptors
+	 *   -# alp::descriptors::no_operation
+	 *   -# alp::descriptors::no_casting
+	 * \endparblock
+	 *
+	 * @tparam descr           The descriptor of the operation.
+	 * @tparam OutputType      The type of each element in the output vector.
+	 * @tparam InputType       The type of each element in the input vector.
+	 * @tparam OutputStructure The structure of the ouput vector.
+	 * @tparam InputStructure  The structure of the input vector.
+	 * @tparam OuputView       The view applied to the output vector.
+	 * @tparam InputView       The view applied to the input vector.
+	 *
+	 * @param[in,out] x The vector to be set.
+	 * @param[in]     y The source vector.
+	 *
+	 * When \a descr includes alp::descriptors::no_casting and if \a InputType
+	 * does not match \a OutputType, the code shall not compile.
+	 *
+	//  * \parblock
+	//  * \par Performance semantics
+	//  * A call to this function
+	//  *   -# consists of \f$ \Theta(n) \f$ work;
+	//  *   -# moves \f$ \Theta(n) \f$ bytes of memory;
+	//  *   -# does not allocate nor free any dynamic memory;
+	//  *   -# shall not make any system calls.
+	//  * \endparblock
+	 *
+	 * @see alp::foldl.
+	 * @see alp::foldr.
+	 * @see alp::operators::left_assign.
+	 * @see alp::operators::right_assign.
+	 * @see alp::setElement.
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC
+	>
+	RC set(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &x,
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &y
+	) {
+		// static sanity checks
+		NO_CAST_ASSERT(
+			( ! ( descr & descriptors::no_casting ) || std::is_same< OutputType, InputType >::value ), "alp::copy (Vector)", "called with vector parameters whose element data types do not match" );
+		constexpr bool out_is_void = std::is_void< OutputType >::value;
+		constexpr bool in_is_void = std::is_void< OutputType >::value;
+		static_assert( ! in_is_void || out_is_void,
+			"alp::set (reference, Vector <- Vector, masked): "
+			"if input is void, then the output must be also" );
+		static_assert( ! ( descr & descriptors::use_index ) || ! out_is_void,
+			"alp::set (reference, Vector <- Vector, masked): "
+			"use_index descriptor cannot be set if output vector is void" );
+
+		// check contract
+		if( reinterpret_cast< void * >( &x ) == reinterpret_cast< const void * >( &y ) ) {
+			return ILLEGAL;
+		}
+
+		if( getLength( x ) != getLength( y ) ) {
+			return MISMATCH;
+		}
+
+		if( !internal::getInitialized( y ) ) {
+			setInitialized( x, false );
+			return SUCCESS;
+		}
+
+		internal::setInitialized( x, true );
+		return foldl( x, y, alp::operators::right_assign< OutputType >() );
+	}
+
+	/**
+	 * Sets all elements of the output matrix to the values of the input matrix.
+	 * C = A
+	 *
+	 * @tparam descr
+	 * @tparam OutputType      Data type of the output matrix C
+	 * @tparam OutputStructure Structure of the matrix C
+	 * @tparam OutputView      View type applied to the matrix C
+	 * @tparam InputType       Data type of the scalar a
+	 *
+	 * @param C    Matrix whose values are to be set
+	 * @param A    The input matrix
+	 *
+	 * @return RC  SUCCESS on the successful execution of the set
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC
+	>
+	RC set(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &C,
+		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &A
+	) noexcept {
+		static_assert(
+			!std::is_same< OutputType, void >::value,
+			"alp::set (set to value): cannot have a pattern matrix as output"
+		);
+#ifdef _DEBUG
+		std::cout << "Called alp::set (matrix-to-matrix, reference)" << std::endl;
+#endif
+		// static checks
+		NO_CAST_ASSERT(
+			( !( descr & descriptors::no_casting ) || std::is_same< InputType, OutputType >::value ),
+			"alp::set", "called with non-matching value types"
+		);
+
+		static_assert(
+			!internal::is_functor_based<
+				Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference >
+			>::value,
+			"alp::set cannot be called with a functor-based matrix as a destination."
+		);
+
+		// TODO: Improve this check to account for non-zero structrue (i.e., bands)
+		//       and algebraic properties (e.g., symmetry)
+		static_assert(
+			std::is_same< OutputStructure, InputStructure >::value,
+			"alp::set cannot be called for containers with different structures."
+		);
+
+		if( ( nrows( C ) != nrows( A ) ) || ( ncols( C ) != ncols( A ) ) ) {
+			return MISMATCH;
+		}
+
+		if( !internal::getInitialized( A ) ) {
+			internal::setInitialized( C, false );
+			return SUCCESS;
+		}
+
+		internal::setInitialized( C, true );
+		return foldl( C, A, alp::operators::right_assign< OutputType >() );
+	}
+
+	/**
+	 * Sets all elements of the given matrix to the value of the given scalar.
+	 * C = val
+	 *
+	 * @tparam descr
+	 * @tparam OutputType      Data type of the output matrix C
+	 * @tparam OutputStructure Structure of the matrix C
+	 * @tparam OutputView      View type applied to the matrix C
+	 * @tparam InputType       Data type of the scalar a
+	 *
+	 * @param C    Matrix whose values are to be set
+	 * @param val  The value to set the elements of the matrix C
+	 *
+	 * @return RC  SUCCESS on the successful execution of the set
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType, typename InputStructure
+	>
+	RC set(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &C,
+		const Scalar< InputType, InputStructure, reference > &val
+	) noexcept {
+
+		static_assert(
+			!std::is_same< OutputType, void >::value,
+			"alp::set (set to matrix): cannot have a pattern matrix as output"
+		);
+#ifdef _DEBUG
+		std::cout << "Called alp::set (matrix-to-value, reference)" << std::endl;
+#endif
+		// static checks
+		NO_CAST_ASSERT(
+			( !( descr & descriptors::no_casting ) || std::is_same< InputType, OutputType >::value ),
+			"alp::set", "called with non-matching value types"
+		);
+
+		static_assert(
+			!internal::is_functor_based<
+				Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference >
+			>::value,
+			"alp::set cannot be called with a functor-based matrix as a destination."
+		);
+
+		if( !internal::getInitialized( val ) ) {
+			internal::setInitialized( C, false );
+			return SUCCESS;
+		}
+
+		internal::setInitialized( C, true );
+		return foldl( C, val, alp::operators::right_assign< OutputType >() );
+	}
+
 	/**
 	 * Assigns elements to a matrix from an iterator.
 	 *

From e087e56fca4a4874dfed2e3c5f7de7a29372700e Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Fri, 21 Oct 2022 17:54:56 +0200
Subject: [PATCH 07/23] Remove setElement taking a C++ scalar

---
 include/alp/base/io.hpp      | 15 ---------------
 include/alp/reference/io.hpp | 21 ---------------------
 2 files changed, 36 deletions(-)

diff --git a/include/alp/base/io.hpp b/include/alp/base/io.hpp
index 0dec80ef9..9e76d57bd 100644
--- a/include/alp/base/io.hpp
+++ b/include/alp/base/io.hpp
@@ -148,21 +148,6 @@ namespace alp {
 		return PANIC;
 	}
 
-	/** C++ scalar variant */
-	template<
-		Descriptor descr = descriptors::no_operation,
-		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC,
-		typename T
-	>
-	RC setElement(
-		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x,
-		const T val,
-		const size_t i,
-		const typename std::enable_if< ! alp::is_object< DataType >::value && ! alp::is_object< T >::value, void >::type * const = NULL
-	) {
-		return PANIC;
-	}
-
 	/**
 	 * Sets all elements of the output matrix to the values of the input matrix.
 	 * C = A
diff --git a/include/alp/reference/io.hpp b/include/alp/reference/io.hpp
index 0a915bee3..3fd9a93ec 100644
--- a/include/alp/reference/io.hpp
+++ b/include/alp/reference/io.hpp
@@ -331,27 +331,6 @@ namespace alp {
 		return SUCCESS;
 	}
 
-	/** C++ scalar variant */
-	template<
-		Descriptor descr = descriptors::no_operation,
-		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC,
-		typename T
-	>
-	RC setElement(
-		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x,
-		const T val,
-		const size_t i,
-		const typename std::enable_if< ! alp::is_object< DataType >::value && ! alp::is_object< T >::value, void >::type * const = NULL
-	) {
-		// static sanity checks
-		NO_CAST_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, at index)",
-			"called with a value type that does not match that of the given "
-			"Vector" );
-
-		// delegate
-		return setElement( x, Scalar< T >( val ), i );
-	}
-
 	/**
 	 * Sets the content of a given vector \a x to be equal to that of
 	 * another given vector \a y. Can be masked.

From 436852421c399692e04ae5882dac94275486d5ce Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Mon, 24 Oct 2022 09:41:51 +0200
Subject: [PATCH 08/23] Add base definitions for blas1.hpp

---
 include/alp/base/blas1.hpp | 625 +++++++++++++++++++++++++++++++++++++
 1 file changed, 625 insertions(+)
 create mode 100644 include/alp/base/blas1.hpp

diff --git a/include/alp/base/blas1.hpp b/include/alp/base/blas1.hpp
new file mode 100644
index 000000000..5d057ac03
--- /dev/null
+++ b/include/alp/base/blas1.hpp
@@ -0,0 +1,625 @@
+
+/*
+ *   Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 5th of December 2016
+ */
+
+#ifndef _H_ALP_BASE_BLAS1
+#define _H_ALP_BASE_BLAS1
+
+#include <alp/rc.hpp>
+#include <alp/ops.hpp>
+#include <alp/phase.hpp>
+#include <alp/monoid.hpp>
+#include <alp/backends.hpp>
+#include <alp/semiring.hpp>
+#include <alp/descriptors.hpp>
+#include <alp/internalops.hpp>
+
+#include <assert.h>
+
+
+namespace alp {
+
+	/**
+	 * \defgroup BLAS1 The Level-1 ALP/GraphBLAS routines
+	 *
+	 * A collection of functions that allow ALP/GraphBLAS operators, monoids, and
+	 * semirings work on a mix of zero-dimensional and one-dimensional containers;
+	 * i.e., allows various linear algebra operations on scalars and objects of
+	 * type #grb::Vector.
+	 *
+	 * All functions return an error code of the enum-type #grb::RC.
+	 *
+	 * Primitives which produce vector output:
+	 *   -# #grb::set (three variants);
+	 *   -# #grb::foldr (in-place reduction to the right, scalar-to-vector and
+	 *      vector-to-vector);
+	 *   -# #grb::foldl (in-place reduction to the left, scalar-to-vector and
+	 *      vector-to-vector);
+	 *   -# #grb::eWiseApply (out-of-place application of a binary function);
+	 *   -# #grb::eWiseAdd (in-place addition of two vectors, a vector and a
+	 *      scalar, into a vector); and
+	 *   -# #grb::eWiseMul (in-place multiplication of two vectors, a vector and a
+	 *      scalar, into a vector).
+	 *
+	 * \note When #grb::eWiseAdd or #grb::eWiseMul using two input scalars is
+	 *       required, consider forming first the resulting scalar using level-0
+	 *       primitives, and then using #grb::set, #grb::foldl, or #grb::foldr, as
+	 *       appropriate.
+	 *
+	 * Primitives that produce scalar output:
+	 *   -# #grb::foldr (reduction to the right, vector-to-scalar);
+	 *   -# #grb::foldl (reduction to the left, vector-to-scalar).
+	 *
+	 * Primitives that do not require an operator, monoid, or semiring:
+	 *   -# #grb::set (three variants).
+	 *
+	 * Primitives that could take an operator (see #grb::operators):
+	 *   -# #grb::foldr, #grb::foldl, and #grb::eWiseApply.
+	 * Such operators typically can only be applied on \em dense vectors, i.e.,
+	 * vectors with #grb::nnz equal to its #grb::size. Operations on sparse
+	 * vectors require an intepretation of missing vector elements, which monoids
+	 * or semirings provide.
+	 *
+	 * Therefore, all aforementioned functions are also defined for monoids instead
+	 * of operators.
+	 *
+	 * The following functions are defined for monoids and semirings, but not for
+	 * operators alone:
+	 *   -# #grb::eWiseAdd (in-place addition).
+	 *
+	 * The following functions require a semiring, and are not defined for
+	 * operators or monoids alone:
+	 *   -# #grb::dot (in-place reduction of two vectors into a scalar); and
+	 *   -# #grb::eWiseMul (in-place multiplication).
+	 *
+	 * Sometimes, operations that are defined for semirings we would sometimes also
+	 * like enabled on \em improper semirings. ALP/GraphBLAS statically checks most
+	 * properties required for composing proper semirings, and as such, attempts to
+	 * compose improper ones will result in a compilation error. In such cases, we
+	 * allow to pass an additive monoid and a multiplicative operator instead of a
+	 * semiring. The following functions allow this:
+	 *   -# #grb::dot, #grb::eWiseAdd, #grb::eWiseMul.
+	 * The given multiplicative operator can be any binary operator, and in
+	 * particular does not need to be associative.
+	 *
+	 * The algebraic structures lost with improper semirings typically correspond to
+	 * distributivity, zero being an annihilator to multiplication, as well as the
+	 * concept of \em one. Due to the latter lost structure, the above functions on
+	 * impure semirings are \em not defined for pattern inputs.
+	 *
+	 * \warning I.e., any attempt to use containers of the form
+	 *          \code
+	 *              grb::Vector<void>
+	 *              grb::Matrix<void>
+	 *          \endcode
+	 *          with an improper semiring will result in a compile-time error.
+	 *
+	 * \note Pattern containers are perfectly fine to use with proper semirings.
+	 *
+	 * \warning If an improper semiring does not have the property that the zero
+	 *          identity acts as an annihilator over the multiplicative operator,
+	 *          then the result of #grb::eWiseMul may be unintuitive. Please take
+	 *          great care in the use of improper semrings.
+	 *
+	 * For fusing multiple BLAS-1 style operations on any number of inputs and
+	 * outputs, users can pass their own operator function to be executed for
+	 * every index \a i.
+	 *   -# grb::eWiseLambda.
+	 * This requires manual application of operators, monoids, and/or semirings
+	 * via level-0 interface -- see #grb::apply, #grb::foldl, and #grb::foldr.
+	 *
+	 * For all of these functions, the element types of input and output types
+	 * do not have to match the domains of the given operator, monoid, or
+	 * semiring unless the #grb::descriptors::no_casting descriptor was passed.
+	 *
+	 * An implementation, whether blocking or non-blocking, should have clear
+	 * performance semantics for every sequence of graphBLAS calls, no matter
+	 * whether those are made from sequential or parallel contexts. Backends
+	 * may define different performance semantics depending on which #grb::Phase
+	 * primitives execute in.
+	 *
+	 * @{
+	 */
+
+	/**
+	 * A standard vector to use for mask parameters.
+	 *
+	 * Indicates that no mask shall be used.
+	 *
+	 * \internal Do not use this symbol within backend implementations.
+	 */
+	#define NO_MASK Vector< bool >( 0 )
+
+	/**
+	 * Executes an arbitrary element-wise user-defined function \a f using any
+	 * number of vectors of equal length, following the nonzero pattern of the
+	 * given vector \a x.
+	 *
+	 * The user-defined function is passed as a lambda which can capture, at
+	 * the very least, other instances of type grb::Vector. Use of this function
+	 * is preferable whenever multiple element-wise operations are requested that
+	 * use one or more identical input vectors. Performing the computation one
+	 * after the other in blocking mode would require the same vector to be
+	 * streamed multiple times, while with this function the operations can be
+	 * fused explicitly instead.
+	 *
+	 * It shall always be legal to capture non-GraphBLAS objects for read access
+	 * only. It shall \em not be legal to capture instances of type grb::Matrix
+	 * for read and/or write access.
+	 *
+	 * If grb::Properties::writableCaptured evaluates true then captured
+	 * non-GraphBLAS objects can also be written to, not just read from. The
+	 * captured variable is, however, completely local to the calling user process
+	 * only-- it will not be synchronised between user processes.
+	 * As a rule of thumb, data-centric GraphBLAS implementations \em cannot
+	 * support this and will thus have grb::Properties::writableCaptured evaluate
+	 * to false. A portable GraphBLAS algorithm should provide a different code
+	 * path to handle this case.
+	 * When it is legal to write to captured scalar, this function can, e.g., be
+	 * used to perform reduction-like operations on any number of equally sized
+	 * input vectors.  This would be preferable to a chained number of calls to
+	 * grb::dot in case where some vectors are shared between subsequent calls,
+	 * for example; the shared vectors are streamed only once using this lambda-
+	 * enabled function.
+	 *
+	 * \warning The lambda shall only be executed on the data local to the user
+	 *          process calling this function! This is different from the various
+	 *          fold functions, or grb::dot, in that the semantics of those
+	 *          functions always end with a globally synchronised result. To
+	 *          achieve the same effect with user-defined lambdas, the users
+	 *          should manually prescribe how to combine the local results into
+	 *          global ones, for instance, by a subsequent call to
+	 *          grb::collectives<>::allreduce.
+	 *
+	 * \note This is an addition to the GraphBLAS. It is alike user-defined
+	 *       operators, monoids, and semirings, except it allows execution on
+	 *       arbitrarily many inputs and arbitrarily many outputs.
+	 *
+	 * @tparam Func the user-defined lambda function type.
+	 * @tparam DataType the type of the user-supplied vector example.
+	 * @tparam backend  the backend type of the user-supplied vector example.
+	 *
+	 * @param[in] f The user-supplied lambda. This lambda should only capture
+	 *              and reference vectors of the same length as \a x. The lambda
+	 *              function should prescribe the operations required to execute
+	 *              at a given index \a i. Captured GraphBLAS vectors can access
+	 *              that element via the operator[]. It is illegal to access any
+	 *              element not at position \a i. The lambda takes only the single
+	 *              parameter \a i of type <code>const size_t</code>. Captured
+	 *              scalars will not be globally updated-- the user must program
+	 *              this explicitly. Scalars and other non-GraphBLAS containers
+	 *              are always local to their user process.
+	 * @param[in] x The vector the lambda will be executed on. This argument
+	 *              determines which indices \a i will be accessed during the
+	 *              elementwise operation-- elements with indices \a i that
+	 *              do not appear in \a x will be skipped during evaluation of
+	 *              \a f.
+	 * @param[in] args All vectors the lambda is to access elements of. Must be of
+	 *                 the same length as \a x. If this constraint is violated,
+	 *                 grb::MISMATCH shall be returned. <em>This is a variadic
+	 *                 argument and can contain any number of containers of type
+	 *                 grb::Vector, passed as though they were separate
+	 *                 arguments.</em>
+	 *
+	 * \note In future GraphBLAS implementations, \a args, apart from doing
+	 *       dimension checking, should also facilitate any data distribution
+	 *       necessary to successfully execute the element-wise operation. Current
+	 *       implementations do not require this since they use the same static
+	 *       distribution for all containers.
+	 *
+	 * \warning Using a grb::Vector inside a lambda passed to this function while
+	 *          not passing that same vector into \a args, will result in undefined
+	 *          behaviour.
+	 *
+	 * \note It would be natural to have \a x equal to one of the captured
+	 *       GraphBLAS vectors in \a f.
+	 *
+	 * \warning Due to the constraints on \a f described above, it is illegal to
+	 *          capture some vector \a y and have the following line in the body
+	 *          of \a f: <code>x[i] += x[i+1]</code>. Vectors can only be
+	 *          dereferenced at position \a i and \a i alone.
+	 *
+	 * @return grb::SUCCESS  When the lambda is successfully executed.
+	 * @return grb::MISMATCH When two or more vectors passed to \a args are not of
+	 *                       equal length.
+	 *
+	 * \parblock
+	 * \par Example.
+	 *
+	 * An example valid use:
+	 *
+	 * \code
+	 * void f(
+	 *      double &alpha,
+	 *      grb::Vector< double > &y,
+	 *      const double beta,
+	 *      const grb::Vector< double > &x,
+	 *      const grb::Semiring< double > ring
+	 * ) {
+	 *      assert( grb::size(x) == grb::size(y) );
+	 *      assert( grb::nnz(x) == grb::size(x) );
+	 *      assert( grb::nnz(y) == grb::size(y) );
+	 *      alpha = ring.getZero();
+	 *      grb::eWiseLambda(
+	 *          [&alpha,beta,&x,&y,ring]( const size_t i ) {
+	 *              double mul;
+	 *              const auto mul_op = ring.getMultiplicativeOperator();
+	 *              const auto add_op = ring.getAdditiveOperator();
+	 *              grb::apply( y[i], beta, x[i], mul_op );
+	 *              grb::apply( mul, x[i], y[i], mul_op );
+	 *              grb::foldl( alpha, mul, add_op );
+	 *      }, x, y );
+	 *      grb::collectives::allreduce( alpha, add_op );
+	 * }
+	 * \endcode
+	 *
+	 * This code takes a value \a beta, a vector \a x, and a semiring \a ring and
+	 * computes:
+	 *   1) \a y as the element-wise multiplication (under \a ring) of \a beta and
+	 *      \a x; and
+	 *   2) \a alpha as the dot product (under \a ring) of \a x and \a y.
+	 * This function can easily be made agnostic to whatever exact semiring is used
+	 * by templating the type of \a ring. As it is, this code is functionally
+	 * equivalent to:
+	 *
+	 * \code
+	 * grb::eWiseMul( y, beta, x, ring );
+	 * grb::dot( alpha, x, y, ring );
+	 * \endcode
+	 *
+	 * The version using the lambdas, however, is expected to execute
+	 * faster as both \a x and \a y are streamed only once, while the
+	 * latter code may stream both vectors twice.
+	 * \endparblock
+	 *
+	 * \warning The following code is invalid:
+	 *          \code
+	 *              template< class Operator >
+	 *              void f(
+	 *                   grb::Vector< double > &x,
+	 *                   const Operator op
+	 *              ) {
+	 *                   grb::eWiseLambda(
+	 *                       [&x,&op]( const size_t i ) {
+	 *                           grb::apply( x[i], x[i], x[i+1], op );
+	 *                   }, x );
+	 *              }
+	 *          \endcode
+	 *          Only a Vector::lambda_reference to position exactly equal to \a i
+	 *          may be used within this function.
+	 *
+	 * \warning There is no similar concept in the official GraphBLAS specs.
+	 *
+	 * \warning Captured scalars will be local to the user process executing the
+	 *          lambda. To retrieve the global dot product, an allreduce must
+	 *          explicitly be called.
+	 *
+	 * @see Vector::operator[]()
+	 * @see Vector::lambda_reference
+	 *
+	 * \todo Revise specification regarding recent changes on phases, performance
+	 *       semantics, and capacities.
+	 */
+	template<
+		typename Func,
+		typename DataType,
+		Backend backend,
+		typename Coords,
+		typename... Args
+	>
+	RC eWiseLambda(
+		const Func f,
+		const Vector< DataType, backend, Coords > & x, Args...
+	) {
+#ifndef NDEBUG
+		const bool should_not_call_base_vector_ewiselambda = false;
+		assert( should_not_call_base_vector_ewiselambda );
+#endif
+		(void)f;
+		(void)x;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Reduces, or \em folds, a vector into a scalar.
+	 *
+	 * Reduction takes place according a monoid \f$ (\oplus,1) \f$, where
+	 * \f$ \oplus:\ D_1 \times D_2 \to D_3 \f$ with associated identities
+	 * \f$ 1_k in D_k \f$. Usually, \f$ D_k \subseteq D_3, 1 \leq k < 3 \f$,
+	 * though other more exotic structures may be envisioned (and used).
+	 *
+	 * Let \f$ x_0 = 1 \f$ and let
+	 * \f$ x_{i+1} = \begin{cases}
+	 *   x_i \oplus y_i\text{ if }y_i\text{ is nonzero and }m_i\text{ evaluates true}
+	 *   x_i\text{ otherwise}
+	 * \end{cases},\f$
+	 * for all \f$ i \in \{ 0, 1, \ldots, n-1 \} \f$.
+	 *
+	 * \note Per this definition, the folding happens in a left-to-right direction.
+	 *       If another direction is wanted, which may have use in cases where
+	 *       \f$ D_1 \f$ differs from \f$ D_2 \f$, then either a monoid with those
+	 *       operator domains switched may be supplied, or #grb::foldr may be used
+	 *       instead.
+	 *
+	 * After a successfull call, \a x will be equal to \f$ x_n \f$.
+	 *
+	 * Note that the operator \f$ \oplus \f$ must be associative since it is part
+	 * of a monoid. This algebraic property is exploited when parallelising the
+	 * requested operation. The identity is required when parallelising over
+	 * multiple user processes.
+	 *
+	 * \warning In so doing, the order of the evaluation of the reduction operation
+	 *          should not be expected to be a serial, left-to-right, evaluation of
+	 *          the computation chain.
+	 *
+	 * @tparam descr     The descriptor to be used (descriptors::no_operation if
+	 *                   left unspecified).
+	 * @tparam Monoid    The monoid to use for reduction.
+	 * @tparam InputType The type of the elements in the supplied ALP/GraphBLAS
+	 *                   vector \a y.
+	 * @tparam MaskType  The type of the elements in the supplied ALP/GraphBLAS
+	 *                   vector \a mask.
+	 * @tparam IOType    The type of the output scalar \a x.
+	 *
+	 * @param[out]   x   The result of the reduction.
+	 * @param[in]    y   Any ALP/GraphBLAS vector. This vector may be sparse.
+	 * @param[in]  mask  Any ALP/GraphBLAS vector. This vector may be sparse.
+	 * @param[in] monoid The monoid under which to perform this reduction.
+	 *
+	 * @return grb::SUCCESS  When the call completed successfully.
+	 * @return grb::MISMATCH If a \a mask was not empty and does not have size
+	 *                       equal to \a y.
+	 * @return grb::ILLEGAL  If the provided input vector \a y was not dense, while
+	 *                       #grb::descriptors::dense was given.
+	 *
+	 * \parblock
+	 * \par Valid descriptors
+	 * grb::descriptors::no_operation, grb::descriptors::no_casting,
+	 * grb::descriptors::dense, grb::descriptors::invert_mask,
+	 * grb::descriptors::structural, grb::descriptors::structural_complement
+	 *
+	 * \note Invalid descriptors will be ignored.
+	 *
+	 * If grb::descriptors::no_casting is given, then 1) the first domain of
+	 * \a monoid must match \a InputType, 2) the second domain of \a op must match
+	 * \a IOType, 3) the third domain must match \a IOType, and 4) the element type
+	 * of \a mask must be <tt>bool</tt>. If one of these is not true, the code
+	 * shall not compile.
+	 * \endparblock
+	 *
+	 * \parblock
+	 * \par Performance semantics
+	 * Backends must specify performance semantics in the amount of work, intra-
+	 * process data movement, inter-process data movement, and the number of
+	 * user process synchronisations required. They should also specify whether
+	 * any system calls may be made, in particularly those related to dynamic
+	 * memory management. If new memory may be allocated, they must specify how
+	 * much.
+	 * \endparblock
+	 *
+	 * @see grb::foldr provides similar in-place functionality.
+	 * @see grb::eWiseApply provides out-of-place semantics.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		class Monoid,
+		typename InputType, typename IOType, typename MaskType,
+		Backend backend, typename Coords
+	>
+	RC foldl(
+		IOType &x,
+		const Vector< InputType, backend, Coords > &y,
+		const Vector< MaskType, backend, Coords > &mask,
+		const Monoid &monoid = Monoid(),
+		const typename std::enable_if< !grb::is_object< IOType >::value &&
+			!grb::is_object< InputType >::value &&
+			!grb::is_object< MaskType >::value &&
+			grb::is_monoid< Monoid >::value, void
+		>::type * const = nullptr
+	) {
+#ifndef NDEBUG
+		const bool should_not_call_base_scalar_foldl = false;
+		assert( should_not_call_base_scalar_foldl );
+#endif
+		(void) y;
+		(void) x;
+		(void) mask;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Folds a vector into a scalar, left-to-right.
+	 *
+	 * Unmasked monoid variant. See masked variant for the full documentation.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		class Monoid,
+		typename IOType, typename InputType,
+		Backend backend,
+		typename Coords
+	>
+	RC foldl(
+		IOType &x,
+		const Vector< InputType, backend, Coords > &y,
+		const Monoid &monoid = Monoid(),
+		const typename std::enable_if<
+			!grb::is_object< IOType >::value &&
+			grb::is_monoid< Monoid >::value,
+		void >::type * const = nullptr
+	) {
+#ifndef NDEBUG
+		const bool should_not_call_base_scalar_foldl_nomask = false;
+		assert( should_not_call_base_scalar_foldl_nomask );
+#endif
+		(void) y;
+		(void) x;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Folds a vector into a scalar, left-to-right.
+	 *
+	 * Unmasked operator variant.
+	 *
+	 * \deprecated This signature is deprecated. It was implemented for reference
+	 *             (and reference_omp), but could not be implemented for BSP1D and
+	 *             other distributed-memory backends. This signature may be removed
+	 *             with any release beyond 0.6.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		class OP,
+		typename IOType, typename InputType, typename MaskType,
+		Backend backend, typename Coords
+	>
+	RC foldl(
+		IOType &x,
+		const Vector< InputType, backend, Coords > &y,
+		const Vector< MaskType, backend, Coords > &mask,
+		const OP &op = OP(),
+		const typename std::enable_if<
+			!grb::is_object< IOType >::value &&
+			!grb::is_object< MaskType >::value &&
+			grb::is_operator< OP >::value,
+		void >::type * const = nullptr
+	) {
+#ifndef NDEBUG
+		const bool should_not_call_base_scalar_foldl_op = false;
+		assert( should_not_call_base_scalar_foldl_op );
+#endif
+		(void) x;
+		(void) y;
+		(void) mask;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Folds a vector into a scalar, right-to-left.
+	 *
+	 * Masked variant. See the masked, left-to-right variant for the full
+	 * documentation.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		class Monoid,
+		typename InputType, typename IOType, typename MaskType,
+		Backend backend, typename Coords
+	>
+	RC foldr(
+		const Vector< InputType, backend, Coords > &x,
+		const Vector< MaskType, backend, Coords > &mask,
+		IOType &y,
+		const Monoid &monoid = Monoid(),
+		const typename std::enable_if< !grb::is_object< IOType >::value &&
+			!grb::is_object< InputType >::value &&
+			!grb::is_object< MaskType >::value &&
+			grb::is_monoid< Monoid >::value, void
+		>::type * const = nullptr
+	) {
+#ifndef NDEBUG
+		const bool should_not_call_base_scalar_foldr = false;
+		assert( should_not_call_base_scalar_foldr );
+#endif
+		(void) y;
+		(void) x;
+		(void) mask;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Folds a vector into a scalar, right-to-left.
+	 *
+	 * Unmasked variant. See the masked, left-to-right variant for the full
+	 * documentation.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		class Monoid,
+		typename IOType, typename InputType,
+		Backend backend, typename Coords
+	>
+	RC foldr(
+		const Vector< InputType, backend, Coords > &y,
+		IOType &x,
+		const Monoid &monoid = Monoid(),
+		const typename std::enable_if<
+			!grb::is_object< IOType >::value &&
+			grb::is_monoid< Monoid >::value,
+		void >::type * const = nullptr
+	) {
+#ifndef NDEBUG
+		const bool should_not_call_base_scalar_foldr_nomask = false;
+		assert( should_not_call_base_scalar_foldr_nomask );
+#endif
+		(void) y;
+		(void) x;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Dot product over a given semiring.
+	 *
+	 * \todo Write specification.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation, class Ring,
+		typename IOType, typename InputType1, typename InputType2,
+		Backend backend, typename Coords
+	>
+	RC dot( IOType &x,
+		const Vector< InputType1, backend, Coords > &left,
+		const Vector< InputType2, backend, Coords > &right,
+		const Ring &ring = Ring(),
+		const Phase &phase = EXECUTE,
+		const typename std::enable_if<
+			!grb::is_object< InputType1 >::value &&
+			!grb::is_object< InputType2 >::value &&
+			!grb::is_object< IOType >::value &&
+			grb::is_semiring< Ring >::value,
+		void >::type * const = nullptr
+	) {
+#ifdef _DEBUG
+		std::cout << "Should not call base grb::dot (semiring version)\n";
+#endif
+#ifndef NDEBUG
+		const bool should_not_call_base_dot_semiring = false;
+		assert( should_not_call_base_dot_semiring );
+#endif
+		(void) x;
+		(void) left;
+		(void) right;
+		(void) ring;
+		(void) phase;
+		return UNSUPPORTED;
+	}
+
+	/** @} */
+
+} // end namespace grb
+
+#endif // end _H_GRB_BASE_BLAS1
+

From 08b0bc4593b871fda97dae04ffbb3aa98524e360 Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Mon, 24 Oct 2022 10:09:00 +0200
Subject: [PATCH 09/23] Generalize base IO primitives over backend

Also fix minor code style issues
---
 include/alp/base/io.hpp | 94 +++++++++++++++++++++++++++--------------
 1 file changed, 63 insertions(+), 31 deletions(-)

diff --git a/include/alp/base/io.hpp b/include/alp/base/io.hpp
index 9e76d57bd..43147c20d 100644
--- a/include/alp/base/io.hpp
+++ b/include/alp/base/io.hpp
@@ -75,8 +75,11 @@ namespace alp {
 	 * Resizes the Scalar to have at least the given number of nonzeroes.
 	 * The contents of the scalar are not retained.
 	 */
-	template< typename InputType, typename InputStructure, typename length_type >
-	RC resize( Scalar< InputType, InputStructure, reference > &s, const length_type new_nz ) {
+	template<
+		typename InputType, typename InputStructure,
+		typename length_type, Backend backend
+	>
+	RC resize( Scalar< InputType, InputStructure, backend > &s, const length_type new_nz ) {
 		(void) s;
 		(void) new_nz;
 		return PANIC;
@@ -86,9 +89,13 @@ namespace alp {
 	 * Resizes the vector to have at least the given number of nonzeroes.
 	 * The contents of the vector are not retained.
 	 */
-	template< typename InputType, typename InputStructure, typename View, typename ImfR, typename ImfC, typename length_type >
+	template<
+		typename InputType, typename InputStructure, typename View,
+		typename ImfR, typename ImfC,
+		typename length_type, Backend backend
+	>
 	RC resize(
-		Vector< InputType, InputStructure, Density::Dense, View, ImfR, ImfC, reference > &x,
+		Vector< InputType, InputStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
 		const length_type new_nz
 	) noexcept {
 		(void) x;
@@ -100,9 +107,12 @@ namespace alp {
 	 * Resizes the matrix to have at least the given number of nonzeroes.
 	 * The contents of the matrix are not retained.
 	 */
-	template< typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC >
+	template<
+		typename InputType, typename InputStructure, typename InputView,
+		typename InputImfR, typename InputImfC, Backend backend
+	>
 	RC resize(
-		Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &A,
+		Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &A,
 		const size_t new_nz
 	) noexcept {
 		(void) A;
@@ -122,7 +132,7 @@ namespace alp {
 	>
 	RC set(
 		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
-		const Scalar< T, ValStructure, reference > val,
+		const Scalar< T, ValStructure, backend > val,
 		const typename std::enable_if<
 			!alp::is_object< DataType >::value &&
 			!alp::is_object< T >::value,
@@ -136,12 +146,14 @@ namespace alp {
 	 */
 	template<
 		Descriptor descr = descriptors::no_operation,
-		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC, typename ValStructure,
-		typename T
+		typename DataType, typename DataStructure, typename View,
+		typename ImfR, typename ImfC,
+		typename T, typename ValStructure,
+		Backend backend
 	>
 	RC setElement(
-		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x,
-		const Scalar< T, ValStructure, reference > val,
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
+		const Scalar< T, ValStructure, backend > val,
 		const size_t i,
 		const typename std::enable_if< ! alp::is_object< DataType >::value && ! alp::is_object< T >::value, void >::type * const = NULL
 	) {
@@ -152,9 +164,12 @@ namespace alp {
 	 * Sets all elements of the output matrix to the values of the input matrix.
 	 * C = A
 	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView,
+		typename OutputImfR, typename OutputImfC,
+		typename InputType, typename InputStructure, typename InputView,
+		typename InputImfR, typename InputImfC,
 		Backend backend
 	>
 	RC set(
@@ -168,8 +183,10 @@ namespace alp {
 	 * Sets all elements of the given matrix to the value of the given scalar.
 	 * C = val
 	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView,
+		typename OutputImfR, typename OutputImfC,
 		typename InputType, typename InputStructure,
 		Backend backend
 	>
@@ -207,16 +224,18 @@ namespace alp {
 	 * Any existing values in \a x that overlap with newer values will hence
 	 * be overwritten.
 	 */
-	template< Descriptor descr = descriptors::no_operation,
+	template<
+		Descriptor descr = descriptors::no_operation,
 		typename InputType,
 		class Merger = operators::right_assign< InputType >,
 		typename fwd_iterator1, typename fwd_iterator2,
 		Backend backend, typename Coords
 	>
-	RC buildVector( internal::Vector< InputType, backend > & x,
+	RC buildVector(
+		internal::Vector< InputType, backend > &x,
 		fwd_iterator1 ind_start, const fwd_iterator1 ind_end,
 		fwd_iterator2 val_start, const fwd_iterator2 val_end,
-		const IOMode mode, const Merger & merger = Merger()
+		const IOMode mode, const Merger &merger = Merger()
 	) {
 		operators::right_assign< InputType > accum;
 		return buildVector< descr >( x, accum, ind_start, ind_end, val_start, val_end, mode, merger );
@@ -277,13 +296,15 @@ namespace alp {
 	 * @returns alp::ILLEGAL When a nonzero has an index larger than alp::size(x).
 	 * @returns alp::PANIC   If an unmitigable error has occured during ingestion.
 	 */
-	template< Descriptor descr = descriptors::no_operation,
+	template<
+		Descriptor descr = descriptors::no_operation,
 		typename InputType,
 		class Merger = operators::right_assign< InputType >,
 		typename fwd_iterator1, typename fwd_iterator2,
 		Backend backend, typename Coords
 	>
-	RC buildVectorUnique( internal::Vector< InputType, backend > & x,
+	RC buildVectorUnique(
+		internal::Vector< InputType, backend > &x,
 		fwd_iterator1 ind_start, const fwd_iterator1 ind_end,
 		fwd_iterator2 val_start, const fwd_iterator2 val_end,
 		const IOMode mode
@@ -352,7 +373,7 @@ namespace alp {
 	 *           \em once; the three input iterators \a I, \a J, and \a V thus
 	 *           may have exactly one copyeach, meaning that all input may be
 	 *           traversed only once.
-	 *        -# Each of the at most three iterator copies will be incremented
+	 *  base/blas1.hpp      -# Each of the at most three iterator copies will be incremented
 	 *           at most \f$ \mathit{nz} \f$ times.
 	 *        -# Each position of the each of the at most three iterator copies
 	 *           will be dereferenced exactly once.
@@ -375,15 +396,17 @@ namespace alp {
 	 *       matrix construction is costly and the user is referred to the
 	 *       costly buildMatrix() function instead.
 	 */
-	template< Descriptor descr = descriptors::no_operation,
+	template<
+		Descriptor descr = descriptors::no_operation,
 		typename InputType,
 		typename fwd_iterator1 = const size_t * __restrict__,
 		typename fwd_iterator2 = const size_t * __restrict__,
 		typename fwd_iterator3 = const InputType * __restrict__,
 		typename length_type = size_t,
-		Backend implementation = config::default_backend >
+		Backend implementation = config::default_backend
+	>
 	RC buildMatrixUnique(
-		internal::Matrix< InputType, implementation > & A,
+		internal::Matrix< InputType, implementation > &A,
 		fwd_iterator1 I, fwd_iterator1 I_end,
 		fwd_iterator2 J, fwd_iterator2 J_end,
 		fwd_iterator3 V, fwd_iterator3 V_end,
@@ -401,13 +424,15 @@ namespace alp {
 	 * Alias that transforms a set of pointers and an array length to the
 	 * buildMatrixUnique variant based on iterators.
 	 */
-	template< Descriptor descr = descriptors::no_operation,
+	template<
+		Descriptor descr = descriptors::no_operation,
 		typename InputType,
 		typename fwd_iterator1 = const size_t * __restrict__,
 		typename fwd_iterator2 = const size_t * __restrict__,
 		typename fwd_iterator3 = const InputType * __restrict__,
 		typename length_type = size_t,
-		Backend implementation = config::default_backend >
+		Backend implementation = config::default_backend
+	>
 	RC buildMatrixUnique( internal::Matrix< InputType, implementation > &A,
 		fwd_iterator1 I, fwd_iterator2 J, fwd_iterator3 V,
 		const size_t nz, const IOMode mode
@@ -421,13 +446,19 @@ namespace alp {
 	}
 
 	/** Version of the above #buildMatrixUnique that handles \a NULL value pointers. */
-	template< Descriptor descr = descriptors::no_operation,
+	template<
+		Descriptor descr = descriptors::no_operation,
 		typename InputType,
 		typename fwd_iterator1 = const size_t * __restrict__,
 		typename fwd_iterator2 = const size_t * __restrict__,
 		typename length_type = size_t,
-		Backend implementation = config::default_backend >
-	RC buildMatrixUnique( internal::Matrix< InputType, implementation > & A, fwd_iterator1 I, fwd_iterator2 J, const length_type nz, const IOMode mode ) {
+		Backend implementation = config::default_backend
+	>
+	RC buildMatrixUnique(
+		internal::Matrix< InputType, implementation > &A,
+		fwd_iterator1 I, fwd_iterator2 J,
+		const length_type nz, const IOMode mode
+	) {
 		// derive synchronized iterator
 		auto start = utils::makeSynchronized( I, J, I + nz, J + nz );
 		const auto end = utils::makeSynchronized( I + nz, J + nz, I + nz, J + nz );
@@ -480,7 +511,8 @@ namespace alp {
 		typename InputType, typename fwd_iterator,
 		Backend implementation = config::default_backend
 	>
-	RC buildMatrixUnique( internal::Matrix< InputType, implementation > & A,
+	RC buildMatrixUnique(
+		internal::Matrix< InputType, implementation > &A,
 		fwd_iterator start, const fwd_iterator end,
 		const IOMode mode
 	) {

From 3ce0851dc7a737af8397112f4bc7510d1c41fc39 Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Mon, 24 Oct 2022 10:09:50 +0200
Subject: [PATCH 10/23] Add missing NO_CAST_ASSERT definition in IO

---
 include/alp/reference/io.hpp | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/include/alp/reference/io.hpp b/include/alp/reference/io.hpp
index 3fd9a93ec..cb049946b 100644
--- a/include/alp/reference/io.hpp
+++ b/include/alp/reference/io.hpp
@@ -26,6 +26,23 @@
 #include <alp/base/io.hpp>
 #include "matrix.hpp"
 
+#define NO_CAST_ASSERT( x, y, z )                                              \
+	static_assert( x,                                                          \
+		"\n\n"                                                                 \
+		"********************************************************************" \
+		"********************************************************************" \
+		"******************************\n"                                     \
+		"*     ERROR      | " y " " z ".\n"                                    \
+		"********************************************************************" \
+		"********************************************************************" \
+		"******************************\n"                                     \
+		"* Possible fix 1 | Remove no_casting from the template parameters "   \
+		"in this call to " y ".\n"                                             \
+		"* Possible fix 2 | Provide a value that matches the expected type.\n" \
+		"********************************************************************" \
+		"********************************************************************" \
+		"******************************\n" );
+
 namespace alp {
 
 	/**
@@ -719,5 +736,7 @@ namespace alp {
 
 } // end namespace ``alp''
 
+#undef NO_CAST_ASSERT
+
 #endif // end ``_H_ALP_REFERENCE_IO''
 

From dbf2337db7a9d332fff7e232069d0543ff620b3b Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Mon, 24 Oct 2022 17:25:09 +0200
Subject: [PATCH 11/23] Place dummy function definitions from blas1 into the
 base blas1 header

---
 include/alp/base/blas1.hpp | 1211 ++++++++++++++++++++++--------------
 1 file changed, 729 insertions(+), 482 deletions(-)

diff --git a/include/alp/base/blas1.hpp b/include/alp/base/blas1.hpp
index 5d057ac03..70dd394a9 100644
--- a/include/alp/base/blas1.hpp
+++ b/include/alp/base/blas1.hpp
@@ -39,587 +39,834 @@ namespace alp {
 
 	/**
 	 * \defgroup BLAS1 The Level-1 ALP/GraphBLAS routines
-	 *
-	 * A collection of functions that allow ALP/GraphBLAS operators, monoids, and
-	 * semirings work on a mix of zero-dimensional and one-dimensional containers;
-	 * i.e., allows various linear algebra operations on scalars and objects of
-	 * type #grb::Vector.
-	 *
-	 * All functions return an error code of the enum-type #grb::RC.
-	 *
-	 * Primitives which produce vector output:
-	 *   -# #grb::set (three variants);
-	 *   -# #grb::foldr (in-place reduction to the right, scalar-to-vector and
-	 *      vector-to-vector);
-	 *   -# #grb::foldl (in-place reduction to the left, scalar-to-vector and
-	 *      vector-to-vector);
-	 *   -# #grb::eWiseApply (out-of-place application of a binary function);
-	 *   -# #grb::eWiseAdd (in-place addition of two vectors, a vector and a
-	 *      scalar, into a vector); and
-	 *   -# #grb::eWiseMul (in-place multiplication of two vectors, a vector and a
-	 *      scalar, into a vector).
-	 *
-	 * \note When #grb::eWiseAdd or #grb::eWiseMul using two input scalars is
-	 *       required, consider forming first the resulting scalar using level-0
-	 *       primitives, and then using #grb::set, #grb::foldl, or #grb::foldr, as
-	 *       appropriate.
-	 *
-	 * Primitives that produce scalar output:
-	 *   -# #grb::foldr (reduction to the right, vector-to-scalar);
-	 *   -# #grb::foldl (reduction to the left, vector-to-scalar).
-	 *
-	 * Primitives that do not require an operator, monoid, or semiring:
-	 *   -# #grb::set (three variants).
-	 *
-	 * Primitives that could take an operator (see #grb::operators):
-	 *   -# #grb::foldr, #grb::foldl, and #grb::eWiseApply.
-	 * Such operators typically can only be applied on \em dense vectors, i.e.,
-	 * vectors with #grb::nnz equal to its #grb::size. Operations on sparse
-	 * vectors require an intepretation of missing vector elements, which monoids
-	 * or semirings provide.
-	 *
-	 * Therefore, all aforementioned functions are also defined for monoids instead
-	 * of operators.
-	 *
-	 * The following functions are defined for monoids and semirings, but not for
-	 * operators alone:
-	 *   -# #grb::eWiseAdd (in-place addition).
-	 *
-	 * The following functions require a semiring, and are not defined for
-	 * operators or monoids alone:
-	 *   -# #grb::dot (in-place reduction of two vectors into a scalar); and
-	 *   -# #grb::eWiseMul (in-place multiplication).
-	 *
-	 * Sometimes, operations that are defined for semirings we would sometimes also
-	 * like enabled on \em improper semirings. ALP/GraphBLAS statically checks most
-	 * properties required for composing proper semirings, and as such, attempts to
-	 * compose improper ones will result in a compilation error. In such cases, we
-	 * allow to pass an additive monoid and a multiplicative operator instead of a
-	 * semiring. The following functions allow this:
-	 *   -# #grb::dot, #grb::eWiseAdd, #grb::eWiseMul.
-	 * The given multiplicative operator can be any binary operator, and in
-	 * particular does not need to be associative.
-	 *
-	 * The algebraic structures lost with improper semirings typically correspond to
-	 * distributivity, zero being an annihilator to multiplication, as well as the
-	 * concept of \em one. Due to the latter lost structure, the above functions on
-	 * impure semirings are \em not defined for pattern inputs.
-	 *
-	 * \warning I.e., any attempt to use containers of the form
-	 *          \code
-	 *              grb::Vector<void>
-	 *              grb::Matrix<void>
-	 *          \endcode
-	 *          with an improper semiring will result in a compile-time error.
-	 *
-	 * \note Pattern containers are perfectly fine to use with proper semirings.
-	 *
-	 * \warning If an improper semiring does not have the property that the zero
-	 *          identity acts as an annihilator over the multiplicative operator,
-	 *          then the result of #grb::eWiseMul may be unintuitive. Please take
-	 *          great care in the use of improper semrings.
-	 *
-	 * For fusing multiple BLAS-1 style operations on any number of inputs and
-	 * outputs, users can pass their own operator function to be executed for
-	 * every index \a i.
-	 *   -# grb::eWiseLambda.
-	 * This requires manual application of operators, monoids, and/or semirings
-	 * via level-0 interface -- see #grb::apply, #grb::foldl, and #grb::foldr.
-	 *
-	 * For all of these functions, the element types of input and output types
-	 * do not have to match the domains of the given operator, monoid, or
-	 * semiring unless the #grb::descriptors::no_casting descriptor was passed.
-	 *
-	 * An implementation, whether blocking or non-blocking, should have clear
-	 * performance semantics for every sequence of graphBLAS calls, no matter
-	 * whether those are made from sequential or parallel contexts. Backends
-	 * may define different performance semantics depending on which #grb::Phase
-	 * primitives execute in.
-	 *
 	 * @{
 	 */
 
 	/**
-	 * A standard vector to use for mask parameters.
-	 *
-	 * Indicates that no mask shall be used.
-	 *
-	 * \internal Do not use this symbol within backend implementations.
+	 * Folds all elements in a ALP Vector \a x into a single value \a beta.
 	 */
-	#define NO_MASK Vector< bool >( 0 )
+	template< Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure,
+		class Monoid,
+		Backend backend
+	>
+	RC foldr(
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+		Scalar< IOType, IOStructure, backend > &beta,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value && !alp::is_object< IOType >::value && alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) beta;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/** C++ scalar variant */
+	template< Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType,
+		class Monoid,
+		Backend backend
+	>
+	RC foldr(
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+		IOType &beta,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value && !alp::is_object< IOType >::value && alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		return foldr( x, Scalar< IOType >( beta ), monoid );
+	}
 
 	/**
-	 * Executes an arbitrary element-wise user-defined function \a f using any
-	 * number of vectors of equal length, following the nonzero pattern of the
-	 * given vector \a x.
-	 *
-	 * The user-defined function is passed as a lambda which can capture, at
-	 * the very least, other instances of type grb::Vector. Use of this function
-	 * is preferable whenever multiple element-wise operations are requested that
-	 * use one or more identical input vectors. Performing the computation one
-	 * after the other in blocking mode would require the same vector to be
-	 * streamed multiple times, while with this function the operations can be
-	 * fused explicitly instead.
-	 *
-	 * It shall always be legal to capture non-GraphBLAS objects for read access
-	 * only. It shall \em not be legal to capture instances of type grb::Matrix
-	 * for read and/or write access.
-	 *
-	 * If grb::Properties::writableCaptured evaluates true then captured
-	 * non-GraphBLAS objects can also be written to, not just read from. The
-	 * captured variable is, however, completely local to the calling user process
-	 * only-- it will not be synchronised between user processes.
-	 * As a rule of thumb, data-centric GraphBLAS implementations \em cannot
-	 * support this and will thus have grb::Properties::writableCaptured evaluate
-	 * to false. A portable GraphBLAS algorithm should provide a different code
-	 * path to handle this case.
-	 * When it is legal to write to captured scalar, this function can, e.g., be
-	 * used to perform reduction-like operations on any number of equally sized
-	 * input vectors.  This would be preferable to a chained number of calls to
-	 * grb::dot in case where some vectors are shared between subsequent calls,
-	 * for example; the shared vectors are streamed only once using this lambda-
-	 * enabled function.
-	 *
-	 * \warning The lambda shall only be executed on the data local to the user
-	 *          process calling this function! This is different from the various
-	 *          fold functions, or grb::dot, in that the semantics of those
-	 *          functions always end with a globally synchronised result. To
-	 *          achieve the same effect with user-defined lambdas, the users
-	 *          should manually prescribe how to combine the local results into
-	 *          global ones, for instance, by a subsequent call to
-	 *          grb::collectives<>::allreduce.
-	 *
-	 * \note This is an addition to the GraphBLAS. It is alike user-defined
-	 *       operators, monoids, and semirings, except it allows execution on
-	 *       arbitrarily many inputs and arbitrarily many outputs.
-	 *
-	 * @tparam Func the user-defined lambda function type.
-	 * @tparam DataType the type of the user-supplied vector example.
-	 * @tparam backend  the backend type of the user-supplied vector example.
-	 *
-	 * @param[in] f The user-supplied lambda. This lambda should only capture
-	 *              and reference vectors of the same length as \a x. The lambda
-	 *              function should prescribe the operations required to execute
-	 *              at a given index \a i. Captured GraphBLAS vectors can access
-	 *              that element via the operator[]. It is illegal to access any
-	 *              element not at position \a i. The lambda takes only the single
-	 *              parameter \a i of type <code>const size_t</code>. Captured
-	 *              scalars will not be globally updated-- the user must program
-	 *              this explicitly. Scalars and other non-GraphBLAS containers
-	 *              are always local to their user process.
-	 * @param[in] x The vector the lambda will be executed on. This argument
-	 *              determines which indices \a i will be accessed during the
-	 *              elementwise operation-- elements with indices \a i that
-	 *              do not appear in \a x will be skipped during evaluation of
-	 *              \a f.
-	 * @param[in] args All vectors the lambda is to access elements of. Must be of
-	 *                 the same length as \a x. If this constraint is violated,
-	 *                 grb::MISMATCH shall be returned. <em>This is a variadic
-	 *                 argument and can contain any number of containers of type
-	 *                 grb::Vector, passed as though they were separate
-	 *                 arguments.</em>
-	 *
-	 * \note In future GraphBLAS implementations, \a args, apart from doing
-	 *       dimension checking, should also facilitate any data distribution
-	 *       necessary to successfully execute the element-wise operation. Current
-	 *       implementations do not require this since they use the same static
-	 *       distribution for all containers.
-	 *
-	 * \warning Using a grb::Vector inside a lambda passed to this function while
-	 *          not passing that same vector into \a args, will result in undefined
-	 *          behaviour.
-	 *
-	 * \note It would be natural to have \a x equal to one of the captured
-	 *       GraphBLAS vectors in \a f.
-	 *
-	 * \warning Due to the constraints on \a f described above, it is illegal to
-	 *          capture some vector \a y and have the following line in the body
-	 *          of \a f: <code>x[i] += x[i+1]</code>. Vectors can only be
-	 *          dereferenced at position \a i and \a i alone.
-	 *
-	 * @return grb::SUCCESS  When the lambda is successfully executed.
-	 * @return grb::MISMATCH When two or more vectors passed to \a args are not of
-	 *                       equal length.
-	 *
-	 * \parblock
-	 * \par Example.
-	 *
-	 * An example valid use:
-	 *
-	 * \code
-	 * void f(
-	 *      double &alpha,
-	 *      grb::Vector< double > &y,
-	 *      const double beta,
-	 *      const grb::Vector< double > &x,
-	 *      const grb::Semiring< double > ring
-	 * ) {
-	 *      assert( grb::size(x) == grb::size(y) );
-	 *      assert( grb::nnz(x) == grb::size(x) );
-	 *      assert( grb::nnz(y) == grb::size(y) );
-	 *      alpha = ring.getZero();
-	 *      grb::eWiseLambda(
-	 *          [&alpha,beta,&x,&y,ring]( const size_t i ) {
-	 *              double mul;
-	 *              const auto mul_op = ring.getMultiplicativeOperator();
-	 *              const auto add_op = ring.getAdditiveOperator();
-	 *              grb::apply( y[i], beta, x[i], mul_op );
-	 *              grb::apply( mul, x[i], y[i], mul_op );
-	 *              grb::foldl( alpha, mul, add_op );
-	 *      }, x, y );
-	 *      grb::collectives::allreduce( alpha, add_op );
-	 * }
-	 * \endcode
-	 *
-	 * This code takes a value \a beta, a vector \a x, and a semiring \a ring and
-	 * computes:
-	 *   1) \a y as the element-wise multiplication (under \a ring) of \a beta and
-	 *      \a x; and
-	 *   2) \a alpha as the dot product (under \a ring) of \a x and \a y.
-	 * This function can easily be made agnostic to whatever exact semiring is used
-	 * by templating the type of \a ring. As it is, this code is functionally
-	 * equivalent to:
-	 *
-	 * \code
-	 * grb::eWiseMul( y, beta, x, ring );
-	 * grb::dot( alpha, x, y, ring );
-	 * \endcode
-	 *
-	 * The version using the lambdas, however, is expected to execute
-	 * faster as both \a x and \a y are streamed only once, while the
-	 * latter code may stream both vectors twice.
-	 * \endparblock
-	 *
-	 * \warning The following code is invalid:
-	 *          \code
-	 *              template< class Operator >
-	 *              void f(
-	 *                   grb::Vector< double > &x,
-	 *                   const Operator op
-	 *              ) {
-	 *                   grb::eWiseLambda(
-	 *                       [&x,&op]( const size_t i ) {
-	 *                           grb::apply( x[i], x[i], x[i+1], op );
-	 *                   }, x );
-	 *              }
-	 *          \endcode
-	 *          Only a Vector::lambda_reference to position exactly equal to \a i
-	 *          may be used within this function.
-	 *
-	 * \warning There is no similar concept in the official GraphBLAS specs.
-	 *
-	 * \warning Captured scalars will be local to the user process executing the
-	 *          lambda. To retrieve the global dot product, an allreduce must
-	 *          explicitly be called.
-	 *
-	 * @see Vector::operator[]()
-	 * @see Vector::lambda_reference
+	 * For all elements in a ALP Vector \a y, fold the value \f$ \alpha \f$
+	 * into each element.
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldr(
+		const Scalar< InputType, InputStructure, backend > &alpha,
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &y,
+		const Monoid & monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value && !alp::is_object< IOType >::value && alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) alpha;
+		(void) y;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes y = x + y, operator variant.
 	 *
-	 * \todo Revise specification regarding recent changes on phases, performance
-	 *       semantics, and capacities.
+	 * Specialisation for scalar \a x.
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class OP, Backend backend
+	>
+	RC foldr(
+		const Scalar< InputType, InputStructure, backend > &alpha,
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &y,
+		const OP & op = OP(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value && ! alp::is_object< IOType >::value && alp::is_operator< OP >::value
+		> * const = nullptr
+	) {
+		(void) alhpa;
+		(void) y;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Folds all elements in a ALP Vector \a x into the corresponding
+	 * elements from an input/output vector \a y.
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class OP, Backend backend
+	>
+	RC foldr(
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &y,
+		const OP & op = OP(),
+		const std::enable_if_t<
+			alp::is_operator< OP >::value && ! alp::is_object< InputType >::value && ! alp::is_object< IOType >::value
+		> * = nullptr
+	) {
+		(void) x;
+		(void) y;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Folds all elements in a ALP Vector \a x into the corresponding
+	 * elements from an input/output vector \a y.
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldr(
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &y,
+		const Monoid & monoid = Monoid(),
+		const std::enable_if_t<
+			alp::is_monoid< Monoid >::value && ! alp::is_object< InputType >::value && ! alp::is_object< IOType >::value
+		> * = nullptr
+	) {
+		(void) x;
+		(void) y;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * For all elements in a ALP Vector \a x, fold the value \f$ \beta \f$
+	 * into each element.
 	 */
 	template<
-		typename Func,
-		typename DataType,
-		Backend backend,
-		typename Coords,
-		typename... Args
+		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		typename InputType, typename InputStructure,
+		class Op,
+		Backend backend
 	>
-	RC eWiseLambda(
-		const Func f,
-		const Vector< DataType, backend, Coords > & x, Args...
+	RC foldl(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &x,
+		const Scalar< InputType, InputStructure, backend > beta,
+		const Op &op = Op(),
+		const std::enable_if_t<
+			! alp::is_object< IOType >::value && ! alp::is_object< InputType >::value && alp::is_operator< Op >::value
+		> * = nullptr
 	) {
-#ifndef NDEBUG
-		const bool should_not_call_base_vector_ewiselambda = false;
-		assert( should_not_call_base_vector_ewiselambda );
-#endif
-		(void)f;
-		(void)x;
+		(void) x;
+		(void) beta;
+		(void) op;
 		return UNSUPPORTED;
 	}
 
 	/**
-	 * Reduces, or \em folds, a vector into a scalar.
-	 *
-	 * Reduction takes place according a monoid \f$ (\oplus,1) \f$, where
-	 * \f$ \oplus:\ D_1 \times D_2 \to D_3 \f$ with associated identities
-	 * \f$ 1_k in D_k \f$. Usually, \f$ D_k \subseteq D_3, 1 \leq k < 3 \f$,
-	 * though other more exotic structures may be envisioned (and used).
-	 *
-	 * Let \f$ x_0 = 1 \f$ and let
-	 * \f$ x_{i+1} = \begin{cases}
-	 *   x_i \oplus y_i\text{ if }y_i\text{ is nonzero and }m_i\text{ evaluates true}
-	 *   x_i\text{ otherwise}
-	 * \end{cases},\f$
-	 * for all \f$ i \in \{ 0, 1, \ldots, n-1 \} \f$.
-	 *
-	 * \note Per this definition, the folding happens in a left-to-right direction.
-	 *       If another direction is wanted, which may have use in cases where
-	 *       \f$ D_1 \f$ differs from \f$ D_2 \f$, then either a monoid with those
-	 *       operator domains switched may be supplied, or #grb::foldr may be used
-	 *       instead.
-	 *
-	 * After a successfull call, \a x will be equal to \f$ x_n \f$.
-	 *
-	 * Note that the operator \f$ \oplus \f$ must be associative since it is part
-	 * of a monoid. This algebraic property is exploited when parallelising the
-	 * requested operation. The identity is required when parallelising over
-	 * multiple user processes.
-	 *
-	 * \warning In so doing, the order of the evaluation of the reduction operation
-	 *          should not be expected to be a serial, left-to-right, evaluation of
-	 *          the computation chain.
-	 *
-	 * @tparam descr     The descriptor to be used (descriptors::no_operation if
-	 *                   left unspecified).
-	 * @tparam Monoid    The monoid to use for reduction.
-	 * @tparam InputType The type of the elements in the supplied ALP/GraphBLAS
-	 *                   vector \a y.
-	 * @tparam MaskType  The type of the elements in the supplied ALP/GraphBLAS
-	 *                   vector \a mask.
-	 * @tparam IOType    The type of the output scalar \a x.
-	 *
-	 * @param[out]   x   The result of the reduction.
-	 * @param[in]    y   Any ALP/GraphBLAS vector. This vector may be sparse.
-	 * @param[in]  mask  Any ALP/GraphBLAS vector. This vector may be sparse.
-	 * @param[in] monoid The monoid under which to perform this reduction.
-	 *
-	 * @return grb::SUCCESS  When the call completed successfully.
-	 * @return grb::MISMATCH If a \a mask was not empty and does not have size
-	 *                       equal to \a y.
-	 * @return grb::ILLEGAL  If the provided input vector \a y was not dense, while
-	 *                       #grb::descriptors::dense was given.
-	 *
-	 * \parblock
-	 * \par Valid descriptors
-	 * grb::descriptors::no_operation, grb::descriptors::no_casting,
-	 * grb::descriptors::dense, grb::descriptors::invert_mask,
-	 * grb::descriptors::structural, grb::descriptors::structural_complement
+	 * Folds all elements in a ALP Vector \a y into the corresponding
+	 * elements from an input/output vector \a x.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		class OP,
+		Backend backend
+	>
+	RC foldl(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &x,
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
+		const OP &op = OP(),
+		const std::enable_if_t<
+			alp::is_operator< OP >::value && !alp::is_object< IOType >::value && !alp::is_object< InputType >::value
+		> * = nullptr
+	) {
+		(void) x;
+		(void) y;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Folds all elements in a ALP Vector \a y into the corresponding
+	 * elements from an input/output vector \a x.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldl(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &x,
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			alp::is_monoid< Monoid >::value && ! alp::is_object< IOType >::value && ! alp::is_object< InputType >::value
+		  > * = nullptr
+		) {
+		(void) x;
+		(void) y;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Calculates the element-wise operation on one scalar to elements of one
+	 * vector, \f$ z = x .* \beta \f$, using the given operator.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR, typename InputImfC,
+		typename InputType2, typename InputStructure2,
+		class OP,
+		Backend backend
+	>
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR, InputImfC, backend > &x,
+		const Scalar< InputType2, InputStructure2, backend > &beta,
+		const OP &op = OP(),
+		const typename std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_operator< OP >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) x;
+		(void) beta;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ z = x \odot y \f$, out of place.
 	 *
-	 * \note Invalid descriptors will be ignored.
+	 * Specialisation for \a x and \a y scalar, operator version.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2,
+		class OP,
+		Backend backend
+	>
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Scalar< InputType1, InputStructure1, backend> &alpha,
+		const Scalar< InputType2, InputStructure2, backend> &beta,
+		const OP &op = OP(),
+		const typename std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_operator< OP >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) alpha;
+		(void) beta;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ z = x \odot y \f$, out of place.
 	 *
-	 * If grb::descriptors::no_casting is given, then 1) the first domain of
-	 * \a monoid must match \a InputType, 2) the second domain of \a op must match
-	 * \a IOType, 3) the third domain must match \a IOType, and 4) the element type
-	 * of \a mask must be <tt>bool</tt>. If one of these is not true, the code
-	 * shall not compile.
-	 * \endparblock
+	 * Specialisation for \a x and \a y scalar, monoid version.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2,
+		class Monoid,
+		Backend backend
+	>
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Scalar< InputType1, InputStructure1, backend> &alpha,
+		const Scalar< InputType2, InputStructure2, backend> &beta,
+		const Monoid &monoid = Monoid(),
+		const typename std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) alhpa;
+		(void) beta;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ z = x \odot y \f$, out of place.
 	 *
-	 * \parblock
-	 * \par Performance semantics
-	 * Backends must specify performance semantics in the amount of work, intra-
-	 * process data movement, inter-process data movement, and the number of
-	 * user process synchronisations required. They should also specify whether
-	 * any system calls may be made, in particularly those related to dynamic
-	 * memory management. If new memory may be allocated, they must specify how
-	 * much.
-	 * \endparblock
+	 * Monoid version.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class Monoid,
+		Backend backend
+	>
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const Monoid &monoid = Monoid(),
+		const typename std::enable_if_t<
+			! alp::is_object< OutputType >::value &&
+			! alp::is_object< InputType1 >::value &&
+			! alp::is_object< InputType2 >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) x;
+		(void) y;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ z = x \odot y \f$, out of place.
 	 *
-	 * @see grb::foldr provides similar in-place functionality.
-	 * @see grb::eWiseApply provides out-of-place semantics.
+	 * Specialisation for scalar \a x. Monoid version.
 	 */
 	template<
 		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
 		class Monoid,
-		typename InputType, typename IOType, typename MaskType,
-		Backend backend, typename Coords
+		Backend backend
 	>
-	RC foldl(
-		IOType &x,
-		const Vector< InputType, backend, Coords > &y,
-		const Vector< MaskType, backend, Coords > &mask,
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Scalar< InputType1, InputStructure1, backend> &alpha,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
 		const Monoid &monoid = Monoid(),
-		const typename std::enable_if< !grb::is_object< IOType >::value &&
-			!grb::is_object< InputType >::value &&
-			!grb::is_object< MaskType >::value &&
-			grb::is_monoid< Monoid >::value, void
-		>::type * const = nullptr
+		const typename std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
 	) {
-#ifndef NDEBUG
-		const bool should_not_call_base_scalar_foldl = false;
-		assert( should_not_call_base_scalar_foldl );
-#endif
+		(void) z;
+		(void) alhpa;
 		(void) y;
-		(void) x;
-		(void) mask;
 		(void) monoid;
 		return UNSUPPORTED;
 	}
 
 	/**
-	 * Folds a vector into a scalar, left-to-right.
+	 * Computes \f$ z = x \odot y \f$, out of place.
 	 *
-	 * Unmasked monoid variant. See masked variant for the full documentation.
+	 * Specialisation for scalar \a y. Monoid version.
 	 */
 	template<
 		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2,
 		class Monoid,
-		typename IOType, typename InputType,
-		Backend backend,
-		typename Coords
+		Backend backend
 	>
-	RC foldl(
-		IOType &x,
-		const Vector< InputType, backend, Coords > &y,
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Scalar< InputType2, InputStructure2, backend > &beta,
 		const Monoid &monoid = Monoid(),
 		const typename std::enable_if<
-			!grb::is_object< IOType >::value &&
-			grb::is_monoid< Monoid >::value,
-		void >::type * const = nullptr
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
 	) {
-#ifndef NDEBUG
-		const bool should_not_call_base_scalar_foldl_nomask = false;
-		assert( should_not_call_base_scalar_foldl_nomask );
-#endif
-		(void) y;
+		(void) z;
 		(void) x;
+		(void) beta;
 		(void) monoid;
 		return UNSUPPORTED;
 	}
 
 	/**
-	 * Folds a vector into a scalar, left-to-right.
-	 *
-	 * Unmasked operator variant.
-	 *
-	 * \deprecated This signature is deprecated. It was implemented for reference
-	 *             (and reference_omp), but could not be implemented for BSP1D and
-	 *             other distributed-memory backends. This signature may be removed
-	 *             with any release beyond 0.6.
+	 * Calculates the element-wise operation on one scalar to elements of one
+	 * vector, \f$ z = \alpha .* y \f$, using the given operator.
 	 */
 	template<
 		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
 		class OP,
-		typename IOType, typename InputType, typename MaskType,
-		Backend backend, typename Coords
+		Backend backend
 	>
-	RC foldl(
-		IOType &x,
-		const Vector< InputType, backend, Coords > &y,
-		const Vector< MaskType, backend, Coords > &mask,
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Scalar< InputType1, InputStructure1, backend > &alpha,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
 		const OP &op = OP(),
-		const typename std::enable_if<
-			!grb::is_object< IOType >::value &&
-			!grb::is_object< MaskType >::value &&
-			grb::is_operator< OP >::value,
-		void >::type * const = nullptr
+		const typename std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_operator< OP >::value
+		> * const = nullptr
 	) {
-#ifndef NDEBUG
-		const bool should_not_call_base_scalar_foldl_op = false;
-		assert( should_not_call_base_scalar_foldl_op );
-#endif
+		(void) z;
+		(void) alpha;
+		(void) y;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Calculates the element-wise operation on elements of two vectors,
+	 * \f$ z = x .* y \f$, using the given operator.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class OP,
+		Backend backend
+	>
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const OP &op = OP(),
+		const typename std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_operator< OP >::value
+		> * const = nullptr
+	) {
+		(void) z;
 		(void) x;
 		(void) y;
-		(void) mask;
 		(void) op;
 		return UNSUPPORTED;
 	}
 
 	/**
-	 * Folds a vector into a scalar, right-to-left.
+	 * Calculates the element-wise multiplication of two vectors,
+	 *     \f$ z = z + x .* y \f$,
+	 * under a given semiring.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class Ring,
+		Backend backend
+	>
+	RC eWiseMul(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const Ring &ring = Ring(),
+		const typename std::enable_if_t<
+			! alp::is_object< OutputType >::value &&
+			! alp::is_object< InputType1 >::value &&
+			! alp::is_object< InputType2 >::value &&
+			alp::is_semiring< Ring >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) x;
+		(void) y;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ z = z + x * y \f$.
 	 *
-	 * Masked variant. See the masked, left-to-right variant for the full
-	 * documentation.
+	 * Specialisation for scalar \a x.
 	 */
 	template<
 		Descriptor descr = descriptors::no_operation,
-		class Monoid,
-		typename InputType, typename IOType, typename MaskType,
-		Backend backend, typename Coords
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class Ring,
+		Backend backend
 	>
-	RC foldr(
-		const Vector< InputType, backend, Coords > &x,
-		const Vector< MaskType, backend, Coords > &mask,
-		IOType &y,
-		const Monoid &monoid = Monoid(),
-		const typename std::enable_if< !grb::is_object< IOType >::value &&
-			!grb::is_object< InputType >::value &&
-			!grb::is_object< MaskType >::value &&
-			grb::is_monoid< Monoid >::value, void
-		>::type * const = nullptr
+	RC eWiseMul(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Scalar< InputType1, InputStructure1, backend > &alpha,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const Ring &ring = Ring(),
+		const typename std::enable_if_t<
+			! alp::is_object< OutputType >::value &&
+			! alp::is_object< InputType1 >::value &&
+			! alp::is_object< InputType2 >::value &&
+			alp::is_semiring< Ring >::value
+		> * const = nullptr
 	) {
-#ifndef NDEBUG
-		const bool should_not_call_base_scalar_foldr = false;
-		assert( should_not_call_base_scalar_foldr );
-#endif
+		(void) z;
+		(void) alpha;
 		(void) y;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ z = z + x * y \f$.
+	 *
+	 * Specialisation for scalar \a y.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2,
+		class Ring
+	>
+	RC eWiseMul(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Scalar< InputType2, InputStructure2, backend > &beta,
+		const Ring &ring = Ring(),
+		const typename std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_semiring< Ring >::value
+		> * const = nullptr
+	) {
+		(void) z;
 		(void) x;
-		(void) mask;
-		(void) monoid;
+		(void) beta;
+		(void) ring;
 		return UNSUPPORTED;
 	}
 
 	/**
-	 * Folds a vector into a scalar, right-to-left.
+	 * Calculates the dot product, \f$ \alpha = (x,y) \f$, under a given additive
+	 * monoid and multiplicative operator.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class AddMonoid, class AnyOp
+	>
+	RC dot(
+		Scalar< OutputType, OutputStructure, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const AddMonoid &addMonoid = AddMonoid(),
+		const AnyOp &anyOp = AnyOp(),
+		const typename std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< AddMonoid >::value &&
+			alp::is_operator< AnyOp >::value
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) y;
+		(void) addMonoid;
+		(void) anyOp;
+		return UNSUPPORTED;
+	}
+
+	/** C++ scalar specialization */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class AddMonoid, class AnyOp,
+		Backend backend
+	>
+	RC dot(
+		OutputType &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const AddMonoid &addMonoid = AddMonoid(),
+		const AnyOp &anyOp = AnyOp(),
+		const typename std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< AddMonoid >::value &&
+			alp::is_operator< AnyOp >::value
+		>::type * const = nullptr
+	) {
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Provides a generic implementation of the dot computation on semirings by
+	 * translating it into a dot computation on an additive commutative monoid
+	 * with any multiplicative operator.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class Ring,
+		Backend backend
+	>
+	RC dot(
+		Scalar< IOType, IOStructure, backend > &x,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &left,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &right,
+		const Ring &ring = Ring(),
+		const typename std::enable_if_t<
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			!alp::is_object< IOType >::value &&
+			alp::is_semiring< Ring >::value,
+		> * const = nullptr
+	) {
+		return alp::dot< descr >( x,
+			left, right,
+			ring.getAdditiveMonoid(),
+			ring.getMultiplicativeOperator()
+		);
+	}
+
+	/** C++ scalar specialization. */
+	template<
+		Descriptor descr = descriptors::no_operation, class Ring,
+		typename IOType,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		Backend backend
+	>
+	RC dot(
+		IOType &x,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &left,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &right,
+		const Ring &ring = Ring(),
+		const typename std::enable_if_t<
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			!alp::is_object< IOType >::value &&
+			alp::is_semiring< Ring >::value,
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) left;
+		(void) right;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * This is the eWiseLambda that performs length checking by recursion.
+	 *
+	 * in the backend implementation all vectors are distributed equally, so no
+	 * need to synchronise any data structures. We do need to do error checking
+	 * though, to see when to return alp::MISMATCH. That's this function.
+	 *
+	 * @see Vector::operator[]()
+	 * @see Vector::lambda_backend
+	 */
+	template<
+		typename Func,
+		typename DataType1, typename DataStructure1, typename DataView1, typename InputImfR1, typename InputImfC1,
+		typename DataType2, typename DataStructure2, typename DataView2, typename InputImfR2, typename InputImfC2,
+		Backend backend,
+		typename... Args
+	>
+	RC eWiseLambda(
+		const Func f,
+		Vector< DataType1, DataStructure1, Density::Dense, DataView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< DataType2, DataStructure2, Density::Dense, DataView2, InputImfR2, InputImfC2, backend > &y,
+		Args const &... args
+	) {
+		(void) f;
+		(void) x;
+		(void) y;
+		(void) args;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * No implementation notes. This is the `real' implementation on backend
+	 * vectors.
 	 *
-	 * Unmasked variant. See the masked, left-to-right variant for the full
-	 * documentation.
+	 * @see Vector::operator[]()
+	 * @see Vector::lambda_backend
+	 */
+	template<
+		typename Func,
+		typename DataType, typename DataStructure, typename DataView, typename DataImfR, typename DataImfC,
+		Backend backend
+	>
+	RC eWiseLambda(
+		const Func f,
+		Vector< DataType, DataStructure, Density::Dense, DataView, DataImfR, DataImfC, backend > &x
+	) {
+		(void) f;
+		(void) x;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Reduces a vector into a scalar. Reduction takes place according a monoid
+	 * \f$ (\oplus,1) \f$, where \f$ \oplus:\ D_1 \times D_2 \to D_3 \f$ with an
+	 * associated identity \f$ 1 \in \{D_1,D_2,D_3\} \f$. Elements from the given
+	 * vector \f$ y \in \{D_1,D_2\} \f$ will be applied at the left-hand or right-
+	 * hand side of \f$ \oplus \f$; which, exactly, is implementation-dependent
+	 * but should not matter since \f$ \oplus \f$ should be associative.
 	 */
 	template<
 		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
 		class Monoid,
-		typename IOType, typename InputType,
-		Backend backend, typename Coords
+		Backend backend
 	>
-	RC foldr(
-		const Vector< InputType, backend, Coords > &y,
-		IOType &x,
+	RC foldl(
+		Scalar< IOType, IOStructure, backend > &alpha,
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
 		const Monoid &monoid = Monoid(),
-		const typename std::enable_if<
-			!grb::is_object< IOType >::value &&
-			grb::is_monoid< Monoid >::value,
-		void >::type * const = nullptr
+		const typename std::enable_if_t<
+			! alp::is_object< IOType >::value && ! alp::is_object< InputType >::value && alp::is_monoid< Monoid >::value
+		> * const = nullptr
 	) {
-#ifndef NDEBUG
-		const bool should_not_call_base_scalar_foldr_nomask = false;
-		assert( should_not_call_base_scalar_foldr_nomask );
-#endif
+		(void) alpha;
 		(void) y;
-		(void) x;
 		(void) monoid;
 		return UNSUPPORTED;
 	}
 
 	/**
-	 * Dot product over a given semiring.
-	 *
-	 * \todo Write specification.
+	 * Sort vectors, function available to user, e.g. to sort eigenvectors
 	 */
 	template<
-		Descriptor descr = descriptors::no_operation, class Ring,
-		typename IOType, typename InputType1, typename InputType2,
-		Backend backend, typename Coords
+		typename IndexType, typename IndexStructure, typename IndexView, typename IndexImfR, typename IndexImfC,
+		typename ValueType, typename ValueStructure, typename ValueView, typename ValueImfR, typename ValueImfC,
+		typename Compare,
+		Backend backend
 	>
-	RC dot( IOType &x,
-		const Vector< InputType1, backend, Coords > &left,
-		const Vector< InputType2, backend, Coords > &right,
+	RC sort(
+		Vector< IndexType, IndexStructure, Density::Dense, IndexView, IndexImfR, IndexImfC, backend > &permutation,
+		const Vector< ValueType, ValueStructure, Density::Dense, ValueView, ValueImfR, ValueImfC, backend > &toSort,
+		Compare cmp
+	) noexcept {
+		(void) permutation;
+		(void) toSort;
+		(void) cmp
+		return UNSUPPORTED;
+	}
+
+    /**
+	 * Provides a generic implementation of the 2-norm computation.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		class Ring,
+		Backend backend
+	>
+	RC norm2(
+		Scalar< OutputType, OutputStructure, backend > &x,
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
 		const Ring &ring = Ring(),
-		const Phase &phase = EXECUTE,
-		const typename std::enable_if<
-			!grb::is_object< InputType1 >::value &&
-			!grb::is_object< InputType2 >::value &&
-			!grb::is_object< IOType >::value &&
-			grb::is_semiring< Ring >::value,
-		void >::type * const = nullptr
+		const typename std::enable_if_t<
+			std::is_floating_point< OutputType >::value || grb::utils::is_complex< OutputType >::value
+		> * const = nullptr
 	) {
-#ifdef _DEBUG
-		std::cout << "Should not call base grb::dot (semiring version)\n";
-#endif
-#ifndef NDEBUG
-		const bool should_not_call_base_dot_semiring = false;
-		assert( should_not_call_base_dot_semiring );
-#endif
 		(void) x;
-		(void) left;
-		(void) right;
+		(void) y;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/** C++ scalar version */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		class Ring,
+		Backend backend
+	>
+	RC norm2(
+		OutputType &x,
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
+		const Ring &ring = Ring(),
+		const typename std::enable_if_t<
+			std::is_floating_point< OutputType >::value || grb::utils::is_complex< OutputType >::value
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) y;
 		(void) ring;
-		(void) phase;
 		return UNSUPPORTED;
 	}
 
 	/** @} */
 
-} // end namespace grb
+} // end namespace alp
 
-#endif // end _H_GRB_BASE_BLAS1
+#endif // end _H_ALP_BASE_BLAS1
 

From 6dbca5bb155bfdb65478c3d6c603c2b074fde1c1 Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Mon, 24 Oct 2022 17:29:42 +0200
Subject: [PATCH 12/23] Remove masked operation variants from blas1

---
 include/alp/reference/blas1.hpp | 246 --------------------------------
 1 file changed, 246 deletions(-)

diff --git a/include/alp/reference/blas1.hpp b/include/alp/reference/blas1.hpp
index b4f265d98..51b7b8d04 100644
--- a/include/alp/reference/blas1.hpp
+++ b/include/alp/reference/blas1.hpp
@@ -878,46 +878,6 @@ namespace alp {
 		return SUCCESS;
 	}
 
-	/**
-	 * For all elements in a ALP Vector \a x, fold the value \f$ \beta \f$
-	 * into each element.
-	 *
-	 * Masked operator variant.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType, typename InputStructure,
-		class Op
-	>
-	RC foldl(
-		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, reference > & x,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & m,
-		const Scalar< InputType, InputStructure, reference > &beta,
-		const Op & op = Op(),
-		const std::enable_if_t<
-			!alp::is_object< IOType >::value && ! alp::is_object< InputType >::value && alp::is_operator< Op >::value
-		> * = nullptr
-	) {
-		// static sanity checks
-		NO_CAST_OP_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< typename Op::D1, IOType >::value ), "alp::foldl",
-			"called with a vector x of a type that does not match the first domain "
-			"of the given operator" );
-		NO_CAST_OP_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< typename Op::D2, InputType >::value ), "alp::foldl",
-			"called on a vector y of a type that does not match the second domain "
-			"of the given operator" );
-		NO_CAST_OP_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< typename Op::D3, IOType >::value ), "alp::foldl",
-			"called on a vector x of a type that does not match the third domain "
-			"of the given operator" );
-		NO_CAST_OP_ASSERT(
-			( ! ( descr & descriptors::no_casting ) || std::is_same< bool, MaskType >::value ), "alp::foldl (reference, vector <- scalar, masked)", "provided mask does not have boolean entries" );
-		if( size( m ) == 0 ) {
-			return foldl< descr >( x, beta, op );
-		}
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
 	/**
 	 * For all elements in a ALP Vector \a x, fold the value \f$ \beta \f$
 	 * into each element.
@@ -1018,46 +978,6 @@ namespace alp {
 		return SUCCESS;
 	}
 
-	/**
-	 * For all elements in a ALP Vector \a x, fold the value \f$ \beta \f$
-	 * into each element.
-	 *
-	 * Masked monoid variant.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType,
-		class Monoid
-	>
-	RC foldl( Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, reference > & x,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & m,
-		const InputType & beta,
-		const Monoid & monoid = Monoid(),
-		const std::enable_if_t<
-			!alp::is_object< IOType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType >::value && alp::is_monoid< Monoid >::value
-		> * = nullptr
-		) {
-		// static sanity checks
-		NO_CAST_OP_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< typename Monoid::D1, IOType >::value ), "alp::foldl",
-			"called with a vector x of a type that does not match the first domain "
-			"of the given monoid" );
-		NO_CAST_OP_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< typename Monoid::D2, InputType >::value ), "alp::foldl",
-			"called on a vector y of a type that does not match the second domain "
-			"of the given monoid" );
-		NO_CAST_OP_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< typename Monoid::D3, IOType >::value ), "alp::foldl",
-			"called on a vector x of a type that does not match the third domain "
-			"of the given monoid" );
-		NO_CAST_OP_ASSERT(
-			( ! ( descr & descriptors::no_casting ) || std::is_same< bool, MaskType >::value ), "alp::foldl (reference, vector <- scalar, masked, monoid)", "provided mask does not have boolean entries" );
-		if( size( m ) == 0 ) {
-			return foldl< descr >( x, beta, monoid );
-		}
-
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
 	/**
 	 * Folds all elements in a ALP Vector \a y into the corresponding
 	 * elements from an input/output vector \a x. The vectors must be of equal
@@ -1427,37 +1347,6 @@ namespace alp {
 		return eWiseApply< descr >( z, alpha, beta, monoid.getOperator() );
 	}
 
-	/**
-	 * Computes \f$ z = x \odot y \f$, out of place.
-	 *
-	 * Specialisation for scalar \a y, masked operator version.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
-		typename InputType2, typename InputStructure2,
-		class OP
-	>
-	RC eWiseApply( Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & z,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & mask,
-		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, reference > & x,
-		const Scalar< InputType2, InputStructure2, reference > &beta,
-		const OP & op = OP(),
-		const typename std::enable_if< ! alp::is_object< OutputType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value &&
-				alp::is_operator< OP >::value,
-			void >::type * const = NULL ) {
-	#ifdef _DEBUG
-		std::cout << "In masked eWiseApply ([T1]<-[T2]<-T3, using operator)\n";
-	#endif
-		// check for empty mask
-		if( size( mask ) == 0 ) {
-			return eWiseApply< descr >( z, x, beta, op );
-		}
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
 	/**
 	 * Computes \f$ z = x \odot y \f$, out of place.
 	 *
@@ -1530,87 +1419,6 @@ namespace alp {
 		return SUCCESS;
 	}
 
-	/**
-	 * Computes \f$ z = x \odot y \f$, out of place.
-	 *
-	 * Masked monoid version.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
-		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
-		class Monoid
-	>
-	RC eWiseApply( Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & z,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & mask,
-		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, reference > & x,
-		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, reference > & y,
-		const Monoid & monoid = Monoid(),
-		const typename std::enable_if< ! alp::is_object< OutputType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value &&
-				alp::is_monoid< Monoid >::value,
-			void >::type * const = NULL ) {
-	#ifdef _DEBUG
-		std::cout << "In masked eWiseApply ([T1]<-[T2]<-[T3], using monoid)\n";
-	#endif
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
-	/**
-	 * Computes \f$ z = x \odot y \f$, out of place.
-	 *
-	 * Specialisation for scalar \a x. Masked monoid version.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType1, typename InputStructure1,
-		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
-		class Monoid
-	>
-	RC eWiseApply( Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & z,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & mask,
-		const Scalar< InputType1, InputStructure1, reference> &alpha,
-		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, reference > & y,
-		const Monoid & monoid = Monoid(),
-		const typename std::enable_if< ! alp::is_object< OutputType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value &&
-				alp::is_monoid< Monoid >::value,
-			void >::type * const = NULL ) {
-	#ifdef _DEBUG
-		std::cout << "In masked eWiseApply ([T1]<-T2<-[T3], using monoid)\n";
-	#endif
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
-	/**
-	 * Computes \f$ z = x \odot y \f$, out of place.
-	 *
-	 * Specialisation for scalar \a y. Masked monoid version.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
-		typename InputType2, typename InputStructure2,
-		class Monoid
-	>
-	RC eWiseApply( Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & z,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & mask,
-		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, reference > & x,
-		const Scalar< InputType2, InputStructure2, reference > &beta,
-		const Monoid & monoid = Monoid(),
-		const typename std::enable_if< ! alp::is_object< OutputType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value &&
-				alp::is_monoid< Monoid >::value,
-			void >::type * const = NULL ) {
-	#ifdef _DEBUG
-		std::cout << "In masked eWiseApply ([T1]<-[T2]<-T3, using monoid)\n";
-	#endif
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
 	/**
 	 * Calculates the element-wise operation on one scalar to elements of one
 	 * vector, \f$ z = \alpha .* y \f$, using the given operator. The input and
@@ -1698,33 +1506,6 @@ namespace alp {
 		return SUCCESS;
 	}
 
-	/**
-	 * Computes \f$ z = x \odot y \f$, out of place.
-	 *
-	 * Specialisation for scalar \a x. Masked operator version.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType1, typename InputStructure1,
-		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
-		class OP
-	>
-	RC eWiseApply( Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & z,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & mask,
-		const Scalar< InputType1, InputStructure1, reference> &alpha,
-		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, reference > & y,
-		const OP & op = OP(),
-		const typename std::enable_if< ! alp::is_object< OutputType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value &&
-				alp::is_operator< OP >::value,
-			void >::type * const = NULL ) {
-	#ifdef _DEBUG
-		std::cout << "In masked eWiseApply ([T1]<-T2<-[T3], operator variant)\n";
-	#endif
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
 	/**
 	 * Calculates the element-wise operation on elements of two vectors,
 	 * \f$ z = x .* y \f$, using the given operator. The vectors must be
@@ -1818,33 +1599,6 @@ namespace alp {
 		return SUCCESS;
 	}
 
-	/**
-	 * Computes \f$ z = x \odot y \f$, out of place.
-	 *
-	 * Masked operator version.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
-		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
-		class OP
-	>
-	RC eWiseApply( Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & z,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & mask,
-		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, reference > & x,
-		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, reference > & y,
-		const OP & op = OP(),
-		const typename std::enable_if< ! alp::is_object< OutputType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value &&
-				alp::is_operator< OP >::value,
-			void >::type * const = NULL ) {
-	#ifdef _DEBUG
-		std::cout << "In masked eWiseApply ([T1]<-[T2]<-[T3], using operator)\n";
-	#endif
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
 	/**
 	 * Calculates the element-wise multiplication of two vectors,
 	 *     \f$ z = z + x .* y \f$,

From 6f13b2f7795cbc2bab5d52a49bf360208041fb54 Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Mon, 24 Oct 2022 17:33:10 +0200
Subject: [PATCH 13/23] Fix minor code style issues

---
 include/alp/base/io.hpp         | 38 ++++++++++++++++++++++-----------
 include/alp/reference/blas2.hpp |  1 -
 include/alp/reference/io.hpp    | 30 +++++++++++++-------------
 3 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/include/alp/base/io.hpp b/include/alp/base/io.hpp
index 43147c20d..4a5d1ff3c 100644
--- a/include/alp/base/io.hpp
+++ b/include/alp/base/io.hpp
@@ -68,7 +68,7 @@ namespace alp {
 		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x
 	) noexcept {
 		(void) x;
-		return PANIC;
+		return UNSUPPORTED;
 	}
 
 	/**
@@ -82,7 +82,7 @@ namespace alp {
 	RC resize( Scalar< InputType, InputStructure, backend > &s, const length_type new_nz ) {
 		(void) s;
 		(void) new_nz;
-		return PANIC;
+		return UNSUPPORTED;
 	}
 
 	/**
@@ -100,7 +100,7 @@ namespace alp {
 	) noexcept {
 		(void) x;
 		(void) new_nz;
-		return PANIC;
+		return UNSUPPORTED;
 	}
 
 	/**
@@ -117,7 +117,7 @@ namespace alp {
 	) noexcept {
 		(void) A;
 		(void) new_nz;
-		return PANIC;
+		return UNSUPPORTED;
 	}
 
 	/**
@@ -133,12 +133,14 @@ namespace alp {
 	RC set(
 		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
 		const Scalar< T, ValStructure, backend > val,
-		const typename std::enable_if<
+		const typename std::enable_if_t<
 			!alp::is_object< DataType >::value &&
-			!alp::is_object< T >::value,
-		void >::type * const = NULL
+			!alp::is_object< T >::value
+		> * const = nullptr
 	) {
-		return PANIC;
+		(void) x;
+		(void) val;
+		return UNSUPPORTED;
 	}
 
 	/**
@@ -155,9 +157,15 @@ namespace alp {
 		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
 		const Scalar< T, ValStructure, backend > val,
 		const size_t i,
-		const typename std::enable_if< ! alp::is_object< DataType >::value && ! alp::is_object< T >::value, void >::type * const = NULL
+		const typename std::enable_if_t<
+			!alp::is_object< DataType >::value &&
+			!alp::is_object< T >::value
+		> * const = nullptr
 	) {
-		return PANIC;
+		(void) x;
+		(void) val;
+		(void) i;
+		return UNSUPPORTED;
 	}
 
 	/**
@@ -176,7 +184,9 @@ namespace alp {
 		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
 		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &A
 	) noexcept {
-		return PANIC;
+		(void) C;
+		(void) A;
+		return UNSUPPORTED;
 	}
 
 	/**
@@ -194,7 +204,9 @@ namespace alp {
 		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
 		const Scalar< InputType, InputStructure, backend > &val
 	) noexcept {
-		return PANIC;
+		(void) C;
+		(void) val;
+		return UNSUPPORTED;
 	}
 
 	/**
@@ -520,7 +532,7 @@ namespace alp {
 		(void)start;
 		(void)end;
 		(void)mode;
-		return PANIC;
+		return UNSUPPORTED;
 	}
 
 	/** @} */
diff --git a/include/alp/reference/blas2.hpp b/include/alp/reference/blas2.hpp
index 37649ff0a..558cf49b4 100644
--- a/include/alp/reference/blas2.hpp
+++ b/include/alp/reference/blas2.hpp
@@ -29,7 +29,6 @@
 #include <alp/config.hpp>
 #include <alp/rc.hpp>
 #include <alp/matrix.hpp>
-#include <alp/blas3.hpp>
 #include <graphblas/utils/iscomplex.hpp>
 
 #define NO_CAST_OP_ASSERT( x, y, z )                                           \
diff --git a/include/alp/reference/io.hpp b/include/alp/reference/io.hpp
index cb049946b..dd734ad93 100644
--- a/include/alp/reference/io.hpp
+++ b/include/alp/reference/io.hpp
@@ -71,7 +71,7 @@ namespace alp {
 		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC
 	>
 	RC clear(
-		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x
 	) noexcept {
 		throw std::runtime_error( "Needs an implementation" );
 		return SUCCESS;
@@ -261,7 +261,7 @@ namespace alp {
 		typename T, typename ValStructure
 	>
 	RC set(
-		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x,
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x,
 		const Scalar< T, ValStructure, reference > val,
 		const typename std::enable_if<
 			!alp::is_object< DataType >::value &&
@@ -269,11 +269,11 @@ namespace alp {
 		void >::type * const = NULL
 	) {
 		// static sanity checks
-		NO_CAST_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, unmasked)",
+		NO_CAST_ASSERT( ( !( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, unmasked)",
 			"called with a value type that does not match that of the given "
 			"vector" );
 
-		if( ! internal::getInitialized( val ) ) {
+		if( !internal::getInitialized( val ) ) {
 			internal::setInitialized( x, false );
 			return SUCCESS;
 		}
@@ -332,13 +332,13 @@ namespace alp {
 		typename T
 	>
 	RC setElement(
-		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x,
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x,
 		const Scalar< T, ValStructure, reference > val,
 		const size_t i,
-		const typename std::enable_if< ! alp::is_object< DataType >::value && ! alp::is_object< T >::value, void >::type * const = NULL
+		const typename std::enable_if< !alp::is_object< DataType >::value && !alp::is_object< T >::value, void >::type * const = NULL
 	) {
 		// static sanity checks
-		NO_CAST_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, at index)",
+		NO_CAST_ASSERT( ( !( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, at index)",
 			"called with a value type that does not match that of the given "
 			"Vector" );
 
@@ -414,13 +414,13 @@ namespace alp {
 	) {
 		// static sanity checks
 		NO_CAST_ASSERT(
-			( ! ( descr & descriptors::no_casting ) || std::is_same< OutputType, InputType >::value ), "alp::copy (Vector)", "called with vector parameters whose element data types do not match" );
+			( !( descr & descriptors::no_casting ) || std::is_same< OutputType, InputType >::value ), "alp::copy (Vector)", "called with vector parameters whose element data types do not match" );
 		constexpr bool out_is_void = std::is_void< OutputType >::value;
 		constexpr bool in_is_void = std::is_void< OutputType >::value;
-		static_assert( ! in_is_void || out_is_void,
+		static_assert( !in_is_void || out_is_void,
 			"alp::set (reference, Vector <- Vector, masked): "
 			"if input is void, then the output must be also" );
-		static_assert( ! ( descr & descriptors::use_index ) || ! out_is_void,
+		static_assert( !( descr & descriptors::use_index ) || !out_is_void,
 			"alp::set (reference, Vector <- Vector, masked): "
 			"use_index descriptor cannot be set if output vector is void" );
 
@@ -606,7 +606,7 @@ namespace alp {
 	 *
 	 */
 	template< typename InputType, typename fwd_iterator >
-	RC buildMatrixUnique( internal::Matrix< InputType, reference > & A, fwd_iterator start, const fwd_iterator end ) {
+	RC buildMatrixUnique( internal::Matrix< InputType, reference > &A, fwd_iterator start, const fwd_iterator end ) {
 		return A.template buildMatrixUnique( start, end );
 	}
 
@@ -617,7 +617,7 @@ namespace alp {
 	 * @see alp::buildMatrix
 	 */
 	template< typename InputType, typename fwd_iterator >
-	RC buildMatrix( internal::Matrix< InputType, reference > & A, fwd_iterator start, const fwd_iterator end ) {
+	RC buildMatrix( internal::Matrix< InputType, reference > &A, fwd_iterator start, const fwd_iterator end ) {
 		return A.template buildMatrixUnique( start, end );
 	}
 
@@ -670,7 +670,7 @@ namespace alp {
 	 *
 	 */
 	template< typename MatrixT, typename fwd_iterator >
-	RC buildMatrixUnique( MatrixT & A, const fwd_iterator & start, const fwd_iterator & end ) noexcept {
+	RC buildMatrixUnique( MatrixT &A, const fwd_iterator &start, const fwd_iterator &end ) noexcept {
 		(void)A;
 		(void)start;
 		(void)end;
@@ -687,8 +687,8 @@ namespace alp {
 	template< typename InputType, typename Structure, typename View, typename ImfR, typename ImfC, typename fwd_iterator >
 	RC buildMatrix(
 		Matrix< InputType, Structure, Density::Dense, View, ImfR, ImfC, reference > &A,
-		const fwd_iterator & start,
-		const fwd_iterator & end
+		const fwd_iterator &start,
+		const fwd_iterator &end
 	) noexcept {
 		(void)A;
 		(void)start;

From 3d82d4e355c05604b72fe66eae8ada9fecd7a6f8 Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Mon, 24 Oct 2022 17:46:18 +0200
Subject: [PATCH 14/23] Move size-related operations to IO

---
 include/alp/base/io.hpp         | 27 ++++++++++++++
 include/alp/reference/blas1.hpp | 56 -----------------------------
 include/alp/reference/io.hpp    | 64 +++++++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+), 56 deletions(-)

diff --git a/include/alp/base/io.hpp b/include/alp/base/io.hpp
index 4a5d1ff3c..1b373b196 100644
--- a/include/alp/base/io.hpp
+++ b/include/alp/base/io.hpp
@@ -54,6 +54,33 @@ namespace alp {
 	 * @{
 	 */
 
+	/**
+	 * Request the size (dimension) of a given Vector.
+	 */
+	template<
+		typename DataType, typename DataStructure, typename View,
+		typename ImfR, typename ImfC, Backend backend
+	>
+	size_t size(
+		const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x
+	) noexcept {
+		(void) x;
+		return SIZE_MAX;
+	}
+
+	/**
+	 * Request the number of nonzeroes in a given Vector.
+	 */
+	template<
+		typename DataType, typename DataStructure, typename View,
+		typename ImfR, typename ImfC, Backend backend
+	>
+	size_t nnz(
+		const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x
+	) noexcept {
+		(void) x;
+		return SIZE_MAX;
+	}
 	/**
 	 * Clears all elements from the given vector \a x.
 	 *
diff --git a/include/alp/reference/blas1.hpp b/include/alp/reference/blas1.hpp
index 51b7b8d04..af7b7abc5 100644
--- a/include/alp/reference/blas1.hpp
+++ b/include/alp/reference/blas1.hpp
@@ -177,62 +177,6 @@ namespace alp {
 	 * @{
 	 */
 
-	/**
-	 * Request the size (dimension) of a given Vector.
-	 *
-	 * The dimension is set at construction of the given Vector and cannot
-	 * be changed. A call to this function shall always succeed.
-	 *
-	 * @tparam DataType      The type of elements contained in the vector \a x.
-	 * @tparam DataStructure The structure of the vector \a x.
-	 * @tparam View          The view type applied to the vector \a x.
-	 *
-	 * @param[in] x The Vector of which to retrieve the size.
-	 *
-	 * @return The size of the Vector \a x.
-	 *
-	//  * \parblock
-	//  * \par Performance semantics
-	//  * A call to this function
-	//  *  -# consists of \f$ \Theta(1) \f$ work;
-	//  *  -# moves \f$ \Theta(1) \f$ bytes of memory;
-	//  *  -# does not allocate any dynamic memory;
-	//  *  -# shall not make any system calls.
-	//  * \endparblock
-	 */
-	template< typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC >
-	size_t size( const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x ) noexcept {
-		return getLength( x );
-	}
-
-	/**
-	 * Request the number of nonzeroes in a given Vector.
-	 *
-	 * A call to this function always succeeds.
-	 *
-	 * @tparam DataType      The type of elements contained in the vector \a x.
-	 * @tparam DataStructure The structure of the vector \a x.
-	 * @tparam View          The view type applied to the vector \a x.
-	 *
-	 * @param[in] x The Vector of which to retrieve the number of nonzeroes.
-	 *
-	 * @return The number of nonzeroes in \a x.
-	 *
-	//  * \parblock
-	//  * \par Performance semantics
-	//  * A call to this function
-	//  *   -# consists of \f$ \Theta(1) \f$ work;
-	//  *   -# moves \f$ \Theta(1) \f$ bytes of memory;
-	//  *   -# does not allocate nor free any dynamic memory;
-	//  *   -# shall not make any system calls.
-	//  * \endparblock
-	 */
-	template< typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC >
-	size_t nnz( const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x ) noexcept {
-		throw std::runtime_error( "Needs an implementation." );
-		return 0;
-	}
-
 	/**
 	 * Folds all elements in a ALP Vector \a x into a single value \a beta.
 	 *
diff --git a/include/alp/reference/io.hpp b/include/alp/reference/io.hpp
index dd734ad93..c7311d2e4 100644
--- a/include/alp/reference/io.hpp
+++ b/include/alp/reference/io.hpp
@@ -45,6 +45,70 @@
 
 namespace alp {
 
+	/**
+	 * Request the size (dimension) of a given Vector.
+	 *
+	 * The dimension is set at construction of the given Vector and cannot
+	 * be changed. A call to this function shall always succeed.
+	 *
+	 * @tparam DataType      The type of elements contained in the vector \a x.
+	 * @tparam DataStructure The structure of the vector \a x.
+	 * @tparam View          The view type applied to the vector \a x.
+	 *
+	 * @param[in] x The Vector of which to retrieve the size.
+	 *
+	 * @return The size of the Vector \a x.
+	 *
+	//  * \parblock
+	//  * \par Performance semantics
+	//  * A call to this function
+	//  *  -# consists of \f$ \Theta(1) \f$ work;
+	//  *  -# moves \f$ \Theta(1) \f$ bytes of memory;
+	//  *  -# does not allocate any dynamic memory;
+	//  *  -# shall not make any system calls.
+	//  * \endparblock
+	 */
+	template<
+		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC
+	>
+	size_t size(
+		const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x
+	) noexcept {
+		return getLength( x );
+	}
+
+	/**
+	 * Request the number of nonzeroes in a given Vector.
+	 *
+	 * A call to this function always succeeds.
+	 *
+	 * @tparam DataType      The type of elements contained in the vector \a x.
+	 * @tparam DataStructure The structure of the vector \a x.
+	 * @tparam View          The view type applied to the vector \a x.
+	 *
+	 * @param[in] x The Vector of which to retrieve the number of nonzeroes.
+	 *
+	 * @return The number of nonzeroes in \a x.
+	 *
+	//  * \parblock
+	//  * \par Performance semantics
+	//  * A call to this function
+	//  *   -# consists of \f$ \Theta(1) \f$ work;
+	//  *   -# moves \f$ \Theta(1) \f$ bytes of memory;
+	//  *   -# does not allocate nor free any dynamic memory;
+	//  *   -# shall not make any system calls.
+	//  * \endparblock
+	 */
+	template<
+		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC
+	>
+	size_t nnz(
+		const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x
+	) noexcept {
+		throw std::runtime_error( "Needs an implementation." );
+		return 0;
+	}
+
 	/**
 	 * Clears all elements from the given vector \a x.
 	 *

From 3a1ded13501b66781ce7bbdbd97287286fb3bfac Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Mon, 24 Oct 2022 17:54:39 +0200
Subject: [PATCH 15/23] Remove unnecessary include

---
 include/alp/base/blas1.hpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/alp/base/blas1.hpp b/include/alp/base/blas1.hpp
index 70dd394a9..e5cd6f90d 100644
--- a/include/alp/base/blas1.hpp
+++ b/include/alp/base/blas1.hpp
@@ -32,9 +32,6 @@
 #include <alp/descriptors.hpp>
 #include <alp/internalops.hpp>
 
-#include <assert.h>
-
-
 namespace alp {
 
 	/**

From eb776b8a01c25475881cbc1b673dbd05f5278f8a Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Tue, 25 Oct 2022 09:42:10 +0200
Subject: [PATCH 16/23] Remove obsolete blas2 base definitions

---
 include/alp/base/blas2.hpp | 587 -------------------------------------
 1 file changed, 587 deletions(-)

diff --git a/include/alp/base/blas2.hpp b/include/alp/base/blas2.hpp
index c1b87765e..55abf5b77 100644
--- a/include/alp/base/blas2.hpp
+++ b/include/alp/base/blas2.hpp
@@ -56,593 +56,6 @@ namespace alp {
 	 * @{
 	 */
 
-	/**
-	 * Right-handed sparse matrix times vector multiplication, \f$ u = Av \f$.
-	 *
-	 * Let \f$ u \f$ and \f$ \mathit{mask} \f$ each be a #alp::Vector of #alp::size
-	 * \f$ m \f$, \f$ v \f$ be a #alp::Vector of #alp::size \f$ n \f$, and let
-	 * \f$ A \f$ be a #Matrix with #alp::nrows \f$ m \f$ and #alp::ncols \f$ n \f$.
-	 * Let furthermore \f$ z \f$ be an interal vector of size \f$ m \f$.
-	 * A call to this function first computes \f$ z = Av \f$ over the provided
-	 * \a ring. It then left-folds \f$ z \f$ into \f$ u \f$ using the provided
-	 * \a accumulator.
-	 *
-	 * @see Vector for an in-depth description of a GraphBLAS vector.
-	 * @see size   for retrieving the length of a given GraphBLAS vector.
-	 * @see Matrix for an in-depth description of a GraphBLAS matrix.
-	 * @see nrows  for retrieving the number of rows of a given GraphBLAS matrix.
-	 * @see ncols  for retrieving the number of columns of a given GraphBLAS
-	 *             vector.
-	 *
-	 * Formally, the exact operation executed is
-	 *  \f$ u_i^\mathit{out} = u_i^\mathit{in} \bigodot z_i, \f$
-	 * for all \f$ i \in \{ 0, 1, \ldots, m-1 \} \f$ for which
-	 * \f$ \mathit{mask}_i \f$ evaluates <tt>true</tt>. If there is a nonzero at
-	 * \f$ z_i \f$ but no nonzero at \f$ u_i^\mathit{in} \f$ then the latter is interpreted as the additive
-	 * identity \f$ \mathbf{0} \f$ of the given \a ring.
-	 * For \f$ z \f$, we formally have:
-	 *  \f$ z_i = \bigoplus{i=0}^{m-1} \left( A_{ij} \bigotimes v_j \right), \f$
-	 * where \f$ \bigodot \f$ represents the \a accumulator, \f$ \bigoplus \f$
-	 * represents the additive operator of the provided \a ring, and
-	 * \f$ \bigotimes \f$ represents the multiplicative operator of \a ring. If here
-	 * \f$ v_j \f$ does not exist, it is considered to be equal to the additive
-	 * identity of the given \a ring.
-	 *
-	 * \note The additive identity of a given \a ring is an annihilator of
-	 *       nonzeroes from \f$ A \f$ under the multiplicative operator of \a ring;
-	 *       that is, \f$ z_i \f$ will be \f$ \mathbf{0} \f$ always. This can, of
-	 *       course, be exploited during sparse matrix--sparse vector (SpMSpV)
-	 *       multiplication.
-	 *
-	 * \note A good implementation is very careful about forming \f$ z \f$
-	 *       explicitly and, even if it is formed already, is very careful about
-	 *       making use of \f$ z \f$. Making use of an explicit buffer will result
-	 *       in \f$ \Theta(m) \f$ data movement and may only be warrented when
-	 *       \f$ A \f$ has many nonzeroes per row and \f$ v \f$ is dense.
-	 *
-	 * @tparam descr    Any combination of one or more #alp::descriptors. When
-	 *                  ommitted, the default #alp::descriptors:no_operation will
-	 *                  be assumed.
-	 * @tparam Ring     The generalised semi-ring the matrix--vector multiplication
-	 *                  is to be executed under.
-	 * @tparam IOType   The type of the elements of the output vector \a u.
-	 * @tparam InputType1 The type of the elements of the input vector \a v.
-	 * @tparam InputType2 The type of the elements of the input matrix \a A.
-	 * @tparam Operator The type of the \a accumulator. Must be a GraphBLAS
-	 *                  operator; see also #alp::operators.
-	 * @tparam InputType3 The type of the elements of the mask vector \a mask.
-	 * @tparam implementation Which back-end the given vectors and matrices belong
-	 *                        to. These must all belong to the same back-end.
-	 *
-	 * @param[in,out] u The output vector. Depending on the provided
-	 *                  \a accumulator, old vector values may affect new values.
-	 * @param[in]  mask The mask vector. The vector #alp::size must be equal to
-	 *                  that of \a u, \em or it must be equal to zero. A \a mask
-	 *                  of alp::size zero will be ignored (assumed <tt>true</tt>
-	 *                  always.
-	 * @param[in] accumulator The operator \f$ \bigodot \f$ in the above
-	 *                        description.
-	 * @param[in] A     The input matrix. Its #alp::nrows must equal the
-	 *                  #alp::size of \a u.
-	 * @param[in] v     The input vector. Its #alp::size must equal the
-	 *                  #alp::ncols of \a A.
-	 * @param[in] ring  The semiring to perform the matrix--vector multiplication
-	 *                  under. Unless #alp::descriptors::no_casting is defined,
-	 *                  elements from \a u, \a A, and \a v will be cast to the
-	 *                  domains of the additive and multiplicative operators of
-	 *                  \a ring as they are applied during the multiplication.
-	 *
-	 * \warning Even if #alp::operators::right_assign is provided as accumulator,
-	 *          old values of \a u may \em not be overwritten if the computation
-	 *          ends up not writing any new values to those values. To throw away
-	 *          old vector values use alp::descriptors::explicit_zero (for dense
-	 *          vectors only if you wish to retain sparsity of the output vector),
-	 *          or first simply use alp::clear on \a u.
-	 *
-	 * The above semantics may be changed by the following descriptors:
-	 *   * #descriptors::invert_mask: \f$ u_i^\mathit{out} \f$ will be written to
-	 *     if and only if \f$ \mathit{mask}_i \f$ evaluates <tt>false</tt>.
-	 *   * #descriptors::transpose_matrix: \f$ A \f$ is interpreted as \f$ A^T \f$
-	 *     instead.
-	 *   * #descriptors::structural: when evaluating \f$ \mathit{mask}_i \f$, only
-	 *     the structure of \f$ \mathit{mask} \f$ is considered (as opposed to its
-	 *     elements); if \f$ \mathit{mask} \f$ has a nonzero at its \f$ i \f$th
-	 *     index, it is considered to evaluate <tt>true</tt> no matter what the
-	 *     actual value of \f$ \mathit{mask}_i \f$ was.
-	 *   * #descriptors::structural_complement: a combination of two descriptors:
-	 *     #descriptors::structural and #descriptors::invert_mask (and thus
-	 *     equivalent to <tt>structural | invert_mask</tt>). Its net effect is if
-	 *     \f$ \mathit{mask} \f$ does \em not have a nonzero at the \f$ i \f$th
-	 *     index, the mask is considered to evaluate <tt>true</tt>.
-	 *   * #descriptors::add_identity: the matrix \f$ A \f$ is instead interpreted
-	 *     as \f$ A + \mathbf{1} \f$, where \f$ \mathbf{1} \f$ is the
-	 *     multiplicative identity of the given ring.
-	 *   * #descriptors::use_index: when referencing \f$ v_i \f$, if assigned, then
-	 *     instead of using the value itself, its index \f$ i \f$ is used instead.
-	 *   * #descriptors::in_place: the \a accumulator is ignored; the additive
-	 *     operator of the given \a ring is used in its place. Under certain
-	 *     conditions, an implementation can exploit this semantic to active
-	 *     faster computations.
-	 *   * #descriptors::explicit_zero: if \f$ \mathbf{0} \f$ would be assigned to
-	 *     a previously unassigned index, assign \f$ \mathbf{0} \f$ explicitly to
-	 *     that index. Here, \f$ \mathbf{0} \f$ is the additive identity of the
-	 *     provided \a ring.
-	 *
-	 * \parblock
-	 * \par Performance semantics
-	 * Performance semantics vary depending on whether a mask was provided, and on
-	 * whether the input vector is sparse or dense. If the input vector \f$ v \f$
-	 * is sparse, let \f$ J \f$ be its set of assigned indices. If a non-trivial
-	 * mask \f$ \mathit{mask} \f$ is given, let \f$ I \f$ be the set of indices for
-	 * which the corresponding \f$ \mathit{mask}_i \f$ evaluate <tt>true</tt>. Then:
-	 *   -# For the performance guarantee on the amount of work this function
-	 *      entails the following table applies:<br>
-	 *      \f$ \begin{tabular}{cccc}
-	 *           Masked & Dense input  & Sparse input \\
-	 *           \noalign{\smallskip}
-	 *           no  & $\Theta(2\mathit{nnz}(A))$      & $\Theta(2\mathit{nnz}(A_{:,J}))$ \\
-	 *           yes & $\Theta(2\mathit{nnz}(A_{I,:})$ & $\Theta(\min\{2\mathit{nnz}(A_{I,:}),2\mathit{nnz}(A_{:,J})\})$
-	 *          \end{tabular}. \f$
-	 *   -# For the amount of data movements, the following table applies:<br>
-	 *      \f$ \begin{tabular}{cccc}
-	 *           Masked & Dense input  & Sparse input \\
-	 *           \noalign{\smallskip}
-	 *           no  & $\Theta(\mathit{nnz}(A)+\min\{m,n\}+m+n)$                         & $\Theta(\mathit{nnz}(A_{:,J}+\min\{m,2|J|\}+|J|)+\mathcal{O}(2m)$ \\
-	 *           yes & $\Theta(\mathit{nnz}(A_{I,:})+\min\{|I|,n\}+2|I|)+\mathcal{O}(n)$ &
-	 * $\Theta(\min\{\Theta(\mathit{nnz}(A_{I,:})+\min\{|I|,n\}+2|I|)+\mathcal{O}(n),\mathit{nnz}(A_{:,J}+\min\{m,|J|\}+2|J|)+\mathcal{O}(2m))$ \end{tabular}. \f$
-	 *   -# A call to this function under no circumstance will allocate nor free
-	 *      dynamic memory.
-	 *   -# A call to this function under no circumstance will make system calls.
-	 * The above performance bounds may be changed by the following desciptors:
-	 *   * #descriptors::invert_mask: replaces \f$ \Theta(|I|) \f$ data movement
-	 *     costs with a \f$ \mathcal{O}(2m) \f$ cost instead, or a
-	 *     \f$ \mathcal{O}(m) \f$ cost if #descriptors::structural was defined as
-	 *     well (see below). In other words, implementations are not required to
-	 *     implement inverted operations efficiently (\f$ 2\Theta(m-|I|) \f$ data
-	 *     movements would be optimal but costs another \f$ \Theta(m) \f$ memory
-	 *     to maintain).
-	 *   * #descriptors::structural: removes \f$ \Theta(|I|) \f$ data movement
-	 *     costs as the mask values need no longer be touched.
-	 *   * #descriptors::add_identity: adds, at most, the costs of alp::foldl
-	 *     (on vectors) to all performance metrics.
-	 *   * #descriptors::use_index: removes \f$ \Theta(n) \f$ or
-	 *     \f$ \Theta(|J|) \f$ data movement costs as the input vector values need
-	 *     no longer be touched.
-	 *   * #descriptors::in_place (see also above): turns \f$ \mathcal{O}(2m) \f$
-	 *     data movements into \f$ \mathcal{O}(m) \f$ instead; i.e., it halves the
-	 *     amount of data movements for writing the output.
-	 *   * #descriptors::dense: the input, output, and mask vectors are assumed to
-	 *     be dense. This allows the implementation to skip checks or other code
-	 *     blocks related to handling of sparse vectors. This may result in use of
-	 *     unitialised memory if any of the provided vectors were, in fact,
-	 *     sparse.
-	 * Implementations that support multiple user processes must characterise data
-	 * movement between then.
-	 * \endparblock
-	 *
-	 * @returns alp::SUCCESS  If the computation completed successfully.
-	 * @returns alp::MISMATCH If there is at least one mismatch between vector
-	 *                        dimensions or between vectors and the given matrix.
-	 * @returns alp::OVERLAP  If two or more provided vectors refer to the same
-	 *                        vector.
-	 *
-	 * When a non-SUCCESS error code is returned, it shall be as though the call
-	 * was never made. Note that all GraphBLAS functions may additionally return
-	 * #alp::PANIC, which indicates the library has entered an undefined state; if
-	 * this error code is returned, the only sensible thing a user can do is exit,
-	 * or at least refrain from using any GraphBLAS functions for the remainder of
-	 * the application.
-	 */
-	template< 
-		class Ring,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		typename InputType3,
-		Descriptor descr = descriptors::no_operation,
-		enum Backend implementation = config::default_backend >
-	RC mxv( internal::Vector< IOType, implementation > & u,
-		const internal::Vector< InputType3, implementation > & mask,
-		const internal::Matrix< InputType2, implementation > & A,
-		const internal::Vector< InputType1, implementation > & v,
-		const Ring & ring,
-		typename std::enable_if< alp::is_semiring< Ring >::value, void >::type * = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::mxv (output-masked)\n";
- #endif
-#endif
-		(void)u;
-		(void)mask;
-		(void)A;
-		(void)v;
-		(void)ring;
-		return UNSUPPORTED;
-	}
-
-	/**
-	 * A short-hand for an unmasked #alp::mxv.
-	 *
-	 * @see alp::mxv for the full documentation.
-	 */
-	template< Descriptor descr = descriptors::no_operation, class Ring, typename IOType, typename InputType1, typename InputType2, Backend implementation = config::default_backend >
-	RC mxv( internal::Vector< IOType, implementation > & u,
-		const internal::Matrix< InputType2, implementation > & A,
-		const internal::Vector< InputType1, implementation > & v,
-		const Ring & ring,
-		typename std::enable_if< alp::is_semiring< Ring >::value, void >::type * = NULL ) {
-#ifdef _DEBUG
-#ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::mxv\n";
-#else
-		printf( "Selected backend does not implement alp::mxv\n" );
-#endif
-#endif
-		(void)u;
-		(void)A;
-		(void)v;
-		(void)ring;
-		return UNSUPPORTED;
-	}
-
-	/**
-	 * Left-handed sparse matrix times vector multiplication, \f$ u = vA \f$.
-	 *
-	 * If \a descr does not have #alp::descriptors::transpose_matrix defined, the
-	 * semantics and performance semantics of this function are exactly that of
-	 * alp::mxv with the #alp::descriptors::transpose_matrix set.
-	 * In the other case, the functional and performance semantics of this function
-	 * are exactly that of alp::mxv without the #alp::descriptors::transpose_matrix
-	 * set.
-	 *
-	 * @see alp::mxv for the full documentation.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		class Ring,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		typename InputType3,
-		enum Backend implementation = config::default_backend >
-	RC vxm( internal::Vector< IOType, implementation > & u,
-		const internal::Vector< InputType3, implementation > & mask,
-		const internal::Vector< InputType1, implementation > & v,
-		const internal::Matrix< InputType2, implementation > & A,
-		const Ring & ring,
-		typename std::enable_if< alp::is_semiring< Ring >::value, void >::type * = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::vxm (output-masked)\n";
- #endif
-#endif
-		(void)u;
-		(void)mask;
-		(void)v;
-		(void)A;
-		(void)ring;
-		return UNSUPPORTED;
-	}
-
-	/**
-	 * A short-hand for an unmasked alp::vxm.
-	 *
-	 * @see alp::vxm for the full documentation.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		class Ring,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		enum Backend implementation = config::default_backend >
-	RC vxm( internal::Vector< IOType, implementation > & u,
-		const internal::Vector< InputType1, implementation > & v,
-		const internal::Matrix< InputType2, implementation > & A,
-		const Ring & ring,
-		typename std::enable_if< alp::is_semiring< Ring >::value, void >::type * = NULL ) {
-#ifdef _DEBUG
-  #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::vxm\n";
- #endif
-#endif
-		(void)u;
-		(void)v;
-		(void)A;
-		(void)ring;
-		return UNSUPPORTED;
-	}
-
-	/** TODO documentation */
-	template< Descriptor descr = descriptors::no_operation,
-		class AdditiveMonoid,
-		class MultiplicativeOperator,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		typename InputType3,
-		typename InputType4,
-		Backend backend >
-	RC vxm( internal::Vector< IOType, backend > & u,
-		const internal::Vector< InputType3, backend > & mask,
-		const internal::Vector< InputType1, backend > & v,
-		const internal::Vector< InputType4, backend > & v_mask,
-		const internal::Matrix< InputType2, backend > & A,
-		const AdditiveMonoid & add = AdditiveMonoid(),
-		const MultiplicativeOperator & mul = MultiplicativeOperator(),
-		const typename std::enable_if< alp::is_monoid< AdditiveMonoid >::value && alp::is_operator< MultiplicativeOperator >::value && ! alp::is_object< IOType >::value &&
-				! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value && ! alp::is_object< InputType3 >::value && ! alp::is_object< InputType4 >::value &&
-				! std::is_same< InputType2, void >::value,
-			void >::type * const = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement vxm (doubly-masked)\n";
- #endif
-#endif
-		(void)u;
-		(void)mask;
-		(void)v;
-		(void)v_mask;
-		(void)A;
-		(void)add;
-		(void)mul;
-		return UNSUPPORTED;
-	}
-
-	/** TODO documentation */
-	template< Descriptor descr = descriptors::no_operation,
-		class AdditiveMonoid,
-		class MultiplicativeOperator,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		typename InputType3,
-		typename InputType4,
-		Backend backend >
-	RC mxv( internal::Vector< IOType, backend > & u,
-		const internal::Vector< InputType3, backend > & mask,
-		const internal::Matrix< InputType2, backend > & A,
-		const internal::Vector< InputType1, backend > & v,
-		const internal::Vector< InputType4, backend > & v_mask,
-		const AdditiveMonoid & add = AdditiveMonoid(),
-		const MultiplicativeOperator & mul = MultiplicativeOperator(),
-		const typename std::enable_if< alp::is_monoid< AdditiveMonoid >::value && alp::is_operator< MultiplicativeOperator >::value && ! alp::is_object< IOType >::value &&
-				! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value && ! alp::is_object< InputType3 >::value && ! alp::is_object< InputType4 >::value &&
-				! std::is_same< InputType2, void >::value,
-			void >::type * const = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement mxv (doubly-masked)\n";
- #endif
-#endif
-		(void)u;
-		(void)mask;
-		(void)A;
-		(void)v;
-		(void)v_mask;
-		(void)add;
-		(void)mul;
-		return UNSUPPORTED;
-	}
-
-	/** TODO documentation */
-	template< Descriptor descr = descriptors::no_operation,
-		class AdditiveMonoid,
-		class MultiplicativeOperator,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		typename InputType3,
-		Backend backend >
-	RC mxv( internal::Vector< IOType, backend > & u,
-		const internal::Vector< InputType3, backend > & mask,
-		const internal::Matrix< InputType2, backend > & A,
-		const internal::Vector< InputType1, backend > & v,
-		const AdditiveMonoid & add = AdditiveMonoid(),
-		const MultiplicativeOperator & mul = MultiplicativeOperator(),
-		const typename std::enable_if< alp::is_monoid< AdditiveMonoid >::value && alp::is_operator< MultiplicativeOperator >::value && ! alp::is_object< IOType >::value &&
-				! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value && ! alp::is_object< InputType3 >::value && ! std::is_same< InputType2, void >::value,
-			void >::type * const = NULL ) {
-		(void)u;
-		(void)mask;
-		(void)A;
-		(void)v;
-		(void)add;
-		(void)mul;
-		return UNSUPPORTED;
-	}
-
-	/** TODO documentation */
-	template< Descriptor descr = descriptors::no_operation,
-		class AdditiveMonoid,
-		class MultiplicativeOperator,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		Backend backend >
-	RC vxm( internal::Vector< IOType, backend > & u,
-		const internal::Vector< InputType1, backend > & v,
-		const internal::Matrix< InputType2, backend > & A,
-		const AdditiveMonoid & add = AdditiveMonoid(),
-		const MultiplicativeOperator & mul = MultiplicativeOperator(),
-		const typename std::enable_if< alp::is_monoid< AdditiveMonoid >::value && alp::is_operator< MultiplicativeOperator >::value && ! alp::is_object< IOType >::value &&
-				! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value && ! std::is_same< InputType2, void >::value,
-			void >::type * const = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement vxm (unmasked)\n";
- #endif
-#endif
-		(void)u;
-		(void)v;
-		(void)A;
-		(void)add;
-		(void)mul;
-		return UNSUPPORTED;
-	}
-
-	/** TODO documentation */
-	template< Descriptor descr = descriptors::no_operation,
-		class AdditiveMonoid,
-		class MultiplicativeOperator,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		typename InputType3,
-		Backend implementation >
-	RC vxm( internal::Vector< IOType, implementation > & u,
-		const internal::Vector< InputType3, implementation > & mask,
-		const internal::Vector< InputType1, implementation > & v,
-		const internal::Matrix< InputType2, implementation > & A,
-		const AdditiveMonoid & add = AdditiveMonoid(),
-		const MultiplicativeOperator & mul = MultiplicativeOperator(),
-		typename std::enable_if< alp::is_monoid< AdditiveMonoid >::value && alp::is_operator< MultiplicativeOperator >::value && ! alp::is_object< IOType >::value &&
-				! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value && ! std::is_same< InputType2, void >::value,
-			void >::type * = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::vxm (output-masked)\n";
- #endif
-#endif
-		(void)u;
-		(void)mask;
-		(void)v;
-		(void)A;
-		(void)add;
-		(void)mul;
-		return UNSUPPORTED;
-	}
-
-	/** TODO documentation */
-	template< Descriptor descr = descriptors::no_operation,
-		class AdditiveMonoid,
-		class MultiplicativeOperator,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		Backend backend >
-	RC mxv( internal::Vector< IOType, backend > & u,
-		const internal::Matrix< InputType2, backend > & A,
-		const internal::Vector< InputType1, backend > & v,
-		const AdditiveMonoid & add = AdditiveMonoid(),
-		const MultiplicativeOperator & mul = MultiplicativeOperator(),
-		const typename std::enable_if< alp::is_monoid< AdditiveMonoid >::value && alp::is_operator< MultiplicativeOperator >::value && ! alp::is_object< IOType >::value &&
-				! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value && ! std::is_same< InputType2, void >::value,
-			void >::type * const = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::mxv (unmasked)\n";
- #endif
-#endif
-		(void)u;
-		(void)A;
-		(void)v;
-		(void)add;
-		(void)mul;
-		return UNSUPPORTED;
-	}
-
-	/**
-	 * Executes an arbitrary element-wise user-defined function \a f on all
-	 * nonzero elements of a given matrix \a A.
-	 *
-	 * The user-defined function is passed as a lambda which can capture whatever
-	 * the user would like, including one or multiple alp::Vector instances, or
-	 * multiple scalars. When capturing vectors, these should also be passed as a
-	 * additional arguments to this functions so to make sure those vectors are
-	 * synchronised for access on all row- and column- indices corresponding to
-	 * locally stored nonzeroes of \a A.
-	 *
-	 * Only the elements of a single matrix may be iterated upon.
-	 *
-	 * \note Rationale: while it is reasonable to expect an implementation be able
-	 *       to synchronise vector elements, it may be unreasonable to expect two
-	 *       different matrices can be jointly accessed via arbitrary lambda
-	 *       functions.
-	 *
-	 * \warning The lambda shall only be executed on the data local to the user
-	 *          process calling this function! This is different from the various
-	 *          fold functions, or alp::dot, in that the semantics of those
-	 *          functions always result in globally synchronised result. To
-	 *          achieve the same effect with user-defined lambdas, the users
-	 *          should manually prescribe how to combine the local results into
-	 *          global ones, for instance, by subsequent calls to
-	 *          alp::collectives.
-	 *
-	 * \note This is an addition to the GraphBLAS. It is alike user-defined
-	 *       operators, monoids, and semirings, except it allows execution on
-	 *       arbitrarily many inputs and arbitrarily many outputs.
-	 *
-	 * @tparam Func     the user-defined lambda function type.
-	 * @tparam DataType the type of the user-supplied matrix.
-	 * @tparam backend  the backend type of the user-supplied vector example.
-	 *
-	 * @param[in] f The user-supplied lambda. This lambda should only capture
-	 *              and reference vectors of the same length as either the row or
-	 *              column dimension length of \a A. The lambda function should
-	 *              prescribe the operations required to execute on a given
-	 *              reference to a matrix nonzero of \a A (of type \a DataType) at
-	 *              a given index \f$ (i,j) \f$. Captured GraphBLAS vectors can
-	 *              access corresponding elements via Vector::operator[] or
-	 *              Vector::operator(). It is illegal to access any element not at
-	 *              position \a i if the vector length is equal to the row
-	 *              dimension. It is illegal to access any element not at position
-	 *              \a j if the vector length is equal to the column dimension.
-	 *              Vectors of length neither equal to the column or row dimension
-	 *              may \em not be referenced or undefined behaviour will occur. The
-	 *              reference to the matrix nonzero is non \a const and may thus be
-	 *              modified. New nonzeroes may \em not be added through this lambda
-	 *              functionality. The function \a f must have the following
-	 *              signature:
-	 *              <code>(DataType &nz, const size_t i, const size_t j)</code>.
-	 *              The GraphBLAS implementation decides which nonzeroes of \a A are
-	 *              dereferenced, and thus also decides the values \a i and \a j the
-	 *              user function is evaluated on.
-	 * @param[in] A The matrix the lambda is to access the elements of.
-	 * @param[in] args All vectors the lambda is to access elements of. Must be of
-	 *                 the same length as \a nrows(A) or \a ncols(A). If this
-	 *                 constraint is violated, alp::MISMATCH shall be returned. If
-	 *                 the vector length equals \a nrows(A), the vector shall be
-	 *                 synchronized for access on \a i. If the vector length equals
-	 *                 \a ncols(A), the vector shall be synchronized for access on
-	 *                 \a j. If \a A is square, the vectors will be synchronised for
-	 *                 access on both \a x and \a y. <em>This is a variadic argument
-	 *                 and can contain any number of containers of type alp::Vector,
-	 *                 passed as though they were separate arguments.</em>
-	 *
-	 * \warning Using a alp::Vector inside a lambda passed to this function while
-	 *          not passing that same vector into \a args, will result in undefined
-	 *          behaviour.
-	 *
-	 * \warning Due to the constraints on \a f described above, it is illegal to
-	 *          capture some vector \a y and have the following line in the body
-	 *          of \a f: <code>x[i] += x[i+1]</code>. Vectors can only be
-	 *          dereferenced at position \a i and \a i alone, and similarly for
-	 *          access using \a j. For square matrices, however, the following
-	 *          code in the body is accepted, however: <code>x[i] += x[j]</code>.
-	 *
-	 * @return alp::SUCCESS  When the lambda is successfully executed.
-	 * @return alp::MISMATCH When two or more vectors passed to \a args are not of
-	 *                       appropriate length.
-	 *
-	 * \warning Captured scalars will be local to the user process executing the
-	 *          lambda. To retrieve the global dot product, an allreduce must
-	 *          explicitly be called.
-	 *
-	 * @see Vector::operator[]()
-	 * @see Vector::operator()()
-	 * @see Vector::lambda_reference
-	 */
-	template< typename Func, typename DataType, Backend implementation = config::default_backend, typename... Args >
-	RC eWiseLambda( const Func f, const internal::Matrix< DataType, implementation > & A, Args... /*args*/ ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::eWiseLambda (matrices)\n";
- #endif
-#endif
-		(void)f;
-		(void)A;
-		return UNSUPPORTED;
-	}
-
 	/** @} */
 
 } // namespace alp

From 28c2e8a3f40432b19332fa60586c296e4ba15265 Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Tue, 25 Oct 2022 09:50:03 +0200
Subject: [PATCH 17/23] Move nnz definitions from blas2 to io

---
 include/alp/base/io.hpp         | 28 ++++++++++++++++++++++++++++
 include/alp/reference/blas2.hpp | 21 ---------------------
 include/alp/reference/io.hpp    | 25 +++++++++++++++++++++++++
 3 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/include/alp/base/io.hpp b/include/alp/base/io.hpp
index 1b373b196..5ae91018c 100644
--- a/include/alp/base/io.hpp
+++ b/include/alp/base/io.hpp
@@ -81,6 +81,34 @@ namespace alp {
 		(void) x;
 		return SIZE_MAX;
 	}
+
+	/**
+	 * Retrieve the number of nonzeroes contained in this matrix.
+	 *
+	 * @returns The number of nonzeroes the current matrix contains.
+	 *
+	 * \parblock
+	 * \par Performance semantics.
+	 *        -# This function consitutes \f$ \Theta(1) \f$ work.
+	 *        -# This function allocates no additional dynamic memory.
+	 *        -# This function uses \f$ \mathcal{O}(1) \f$ memory
+	 *           beyond that which was already used at function entry.
+	 *        -# This function will move
+	 *             \f$ \mathit{sizeof}( size\_t ) \f$
+	 *           bytes of memory.
+	 * \endparblock
+	 */
+	template<
+		typename DataType, typename Structure, typename View,
+		typename ImfR, typename ImfC, Backend backend
+	>
+	size_t nnz(
+		const Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, reference > &A
+	) noexcept {
+		(void) A;
+		return SIZE_MAX;
+	}
+
 	/**
 	 * Clears all elements from the given vector \a x.
 	 *
diff --git a/include/alp/reference/blas2.hpp b/include/alp/reference/blas2.hpp
index 558cf49b4..dd3c6aa00 100644
--- a/include/alp/reference/blas2.hpp
+++ b/include/alp/reference/blas2.hpp
@@ -61,27 +61,6 @@ namespace alp {
 	 * @{
 	 */
 
-	/**
-	 * Retrieve the number of nonzeroes contained in this matrix.
-	 *
-	 * @returns The number of nonzeroes the current matrix contains.
-	 *
-	 * \parblock
-	 * \par Performance semantics.
-	 *        -# This function consitutes \f$ \Theta(1) \f$ work.
-	 *        -# This function allocates no additional dynamic memory.
-	 *        -# This function uses \f$ \mathcal{O}(1) \f$ memory
-	 *           beyond that which was already used at function entry.
-	 *        -# This function will move
-	 *             \f$ \mathit{sizeof}( size\_t ) \f$
-	 *           bytes of memory.
-	 * \endparblock
-	 */
-	template< typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC >
-	size_t nnz( const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > & A ) noexcept {
-		return A.nz;
-	}
-
 	/** \internal Delegates to fully masked variant */
 	template< Descriptor descr = descriptors::no_operation,
 		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
diff --git a/include/alp/reference/io.hpp b/include/alp/reference/io.hpp
index c7311d2e4..3b5a673b0 100644
--- a/include/alp/reference/io.hpp
+++ b/include/alp/reference/io.hpp
@@ -109,6 +109,31 @@ namespace alp {
 		return 0;
 	}
 
+	/**
+	 * Retrieve the number of nonzeroes contained in this matrix.
+	 *
+	 * @returns The number of nonzeroes the current matrix contains.
+	 *
+	 * \parblock
+	 * \par Performance semantics.
+	 *        -# This function consitutes \f$ \Theta(1) \f$ work.
+	 *        -# This function allocates no additional dynamic memory.
+	 *        -# This function uses \f$ \mathcal{O}(1) \f$ memory
+	 *           beyond that which was already used at function entry.
+	 *        -# This function will move
+	 *             \f$ \mathit{sizeof}( size\_t ) \f$
+	 *           bytes of memory.
+	 * \endparblock
+	 */
+	template<
+		typename DataType, typename Structure, typename View, typename ImfR, typename ImfC
+	>
+	size_t nnz(
+		const Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, reference > &A
+	) noexcept {
+		return A.nz;
+	}
+
 	/**
 	 * Clears all elements from the given vector \a x.
 	 *

From 7cc47c7796c8c3d7075a65ed2ae2be8eeef5c36f Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Tue, 25 Oct 2022 10:19:26 +0200
Subject: [PATCH 18/23] Add base definitions of corresponding ALP primitives
 into blas2 base

---
 include/alp/base/blas2.hpp | 409 +++++++++++++++++++++++++++++++++++++
 1 file changed, 409 insertions(+)

diff --git a/include/alp/base/blas2.hpp b/include/alp/base/blas2.hpp
index 55abf5b77..c0f467aa8 100644
--- a/include/alp/base/blas2.hpp
+++ b/include/alp/base/blas2.hpp
@@ -56,6 +56,415 @@ namespace alp {
 	 * @{
 	 */
 
+	template<
+		Descriptor descr = descriptors::no_operation,
+		class Ring,
+		typename IOType = typename Ring::D4, typename IOStructure,
+		typename IOView, typename IOImfR, typename IOImfC,
+		typename InputType1 = typename Ring::D1, typename InputStructure1,
+		typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2 = typename Ring::D2, typename InputStructure2,
+		typename InputView2, typename InputImfR2, typename InputImfC2,
+		Backend backend
+	>
+	RC vxm(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &u,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
+		const Ring &ring = Ring(),
+		const typename std::enable_if_t< alp::is_semiring< Ring >::value > * const = nullptr
+	) {
+		(void) u;
+		(void) v;
+		(void) A;
+		return UNSUPPORTED;
+	}
+
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure, typename IOView,
+		typename IOImfR, typename IOImfC,
+		typename InputType1, typename InputStructure1, typename InputView1,
+		typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2,
+		typename InputImfR2, typename InputImfC2,
+		class AdditiveMonoid, class MultiplicativeOperator,
+		Backend backend
+	>
+	RC vxm(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &u,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
+		const AdditiveMonoid &add = AdditiveMonoid(),
+		const MultiplicativeOperator &mul = MultiplicativeOperator(),
+		const typename std::enable_if_t<
+			alp::is_monoid< AdditiveMonoid >::value &&
+			alp::is_operator< MultiplicativeOperator >::value &&
+			!alp::is_object< IOType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			!std::is_same< InputType2, void >::value
+		> * const = nullptr
+	) {
+		(void) u;
+		(void) v;
+		(void) A;
+		(void) add;
+		(void) mul;
+		return UNSUPPORTED;
+	}
+
+	template<
+		Descriptor descr = descriptors::no_operation,
+		class Ring,
+		typename IOType = typename Ring::D4, typename IOStructure,
+		typename IOView, typename IOImfR, typename IOImfC,
+		typename InputType2 = typename Ring::D2, typename InputStructure2,
+		typename InputView2, typename InputImfR2, typename InputImfC2,
+		typename InputType1 = typename Ring::D1, typename InputStructure1,
+		typename InputView1, typename InputImfR1, typename InputImfC1,
+		Backend backend
+	>
+	RC mxv(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &u,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
+		const Ring &ring,
+		const typename std::enable_if_t< alp::is_semiring< Ring >::value > * const = nullptr
+	) {
+		(void) u;
+		(void) A;
+		(void) v;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure, typename IOView,
+		typename IOImfR, typename IOImfC,
+		typename InputType2, typename InputStructure2, typename InputView2,
+		typename InputImfR2, typename InputImfC2,
+		typename InputType1, typename InputStructure1, typename InputView1,
+		typename InputImfR1, typename InputImfC1,
+		class AdditiveMonoid, class MultiplicativeOperator,
+		Backend backend
+	>
+	RC mxv(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &u,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
+		const AdditiveMonoid &add = AdditiveMonoid(),
+		const MultiplicativeOperator &mul = MultiplicativeOperator(),
+		const typename std::enable_if_t<
+			alp::is_monoid< AdditiveMonoid >::value &&
+			alp::is_operator< MultiplicativeOperator >::value &&
+			!alp::is_object< IOType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			!std::is_same< InputType2, void >::value
+		> * const = nullptr
+	) {
+		(void) u;
+		(void) A;
+		(void) v;
+		(void) add;
+		(void) mul;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * @see alp::eWiseLambda for the user-level specification.
+	 */
+	template<
+		typename Func,
+		typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
+		Backend backend
+	>
+	RC eWiseLambda(
+		const Func f,
+		Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, backend > &A
+	) {
+		(void) f;
+		(void) A;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * This function provides dimension checking and will defer to the below
+	 * function for the actual implementation.
+	 *
+	 * @see alp::eWiseLambda for the user-level specification.
+	 */
+	template<
+		typename Func,
+		typename DataType1, typename Structure1, typename View1, typename ImfR1, typename ImfC1,
+		typename DataType2, typename Structure2, typename View2, typename ImfR2, typename ImfC2,
+		Backend backend,
+		typename... Args
+	>
+	RC eWiseLambda(
+		const Func f,
+		Matrix< DataType1, Structure1, Density::Dense, View1, ImfR1, ImfC1, backend > &A,
+		const Vector< DataType2, Structure2, Density::Dense, View2, ImfR2, ImfC2, backend > &x,
+		Args const &... args
+	) {
+		// do size checking
+		if( !( getLength( x ) == nrows( A ) || getLength( x ) == ncols( A ) ) ) {
+			std::cerr << "Mismatching dimensions: given vector of size " << size( x )
+				<< " has nothing to do with either matrix dimension (" << nrows( A ) << " nor " << ncols( A ) << ").\n";
+			return MISMATCH;
+		}
+
+		return eWiseLambda( f, A, args... );
+	}
+
+	/**
+	 * For all elements in a ALP Matrix \a B, fold the value \f$ \alpha \f$
+	 * into each element.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldr(
+		const Scalar< InputType, InputStructure, backend > &alpha,
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &B,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value &&
+			!alp::is_object< IOType >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) alpha;
+		(void) B;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/** Folds element-wise alpha into B, operator variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Operator,
+		Backend backend
+	>
+	RC foldr(
+		const Scalar< InputType, InputStructure, backend > &alpha,
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &B,
+		const Operator &op = Operator(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value &&
+			!alp::is_object< IOType >::value &&
+			alp::is_operator< Operator >::value
+		> * const = nullptr
+	) {
+		(void) alpha;
+		(void) B;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/** Folds element-wise A into B, monoid variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldr(
+		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &A,
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &B,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value &&
+			!alp::is_object< IOType >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) B;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/** Folds element-wise A into B, operator variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Operator,
+		Backend backend
+	>
+	RC foldr(
+		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &A,
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &B,
+		const Operator &op = Operator(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value &&
+			!alp::is_object< IOType >::value &&
+			alp::is_operator< Operator >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) B;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/** Folds element-wise B into A, monoid variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldl(
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &A,
+		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &B,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< IOType >::value &&
+			!alp::is_object< InputType >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) B;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/** Folds element-wise B into A, operator variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Operator,
+		Backend backend
+	>
+	RC foldl(
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &A,
+		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &B,
+		const Operator &op = Operator(),
+		const std::enable_if_t<
+			!alp::is_object< IOType >::value &&
+			!alp::is_object< InputType >::value &&
+			alp::is_operator< Operator >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) B;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/** Folds element-wise beta into A, monoid variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldl(
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &A,
+		const Scalar< InputType, InputStructure, backend > &beta,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< IOType >::value &&
+			!alp::is_object< InputType >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) beta;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/** Folds element-wise beta into A, operator variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Operator,
+		Backend backend
+	>
+	RC foldl(
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &A,
+		const Scalar< InputType, InputStructure, backend > &beta,
+		const Operator &op = Operator(),
+		const std::enable_if_t<
+			!alp::is_object< IOType >::value &&
+			!alp::is_object< InputType >::value &&
+			alp::is_operator< Operator >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) beta;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Returns a view over the input matrix returning conjugate of the accessed element.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
+		Backend backend,
+		std::enable_if_t<
+			!structures::is_a< Structure, structures::Square >::value
+		> * = nullptr
+	>
+	Matrix<
+		DataType, Structure, Density::Dense,
+		view::Functor< std::function< void( DataType &, const size_t, const size_t ) > >,
+		imf::Id, imf::Id,
+		backend
+	>
+	conjugate(
+		const Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, backend > &A,
+		const std::enable_if_t<
+			!alp::is_object< DataType >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		return UNSUPPORTED;
+	}
+
+	/** Specialization for square matrices */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
+		Backend backend,
+		std::enable_if_t<
+			structures::is_a< Structure, structures::Square >::value
+		> * = nullptr
+	>
+	Matrix<
+		DataType, Structure, Density::Dense,
+		view::Functor< std::function< void( DataType &, const size_t, const size_t ) > >,
+		imf::Id, imf::Id,
+		backend
+	>
+	conjugate(
+		const Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, backend > &A,
+		const std::enable_if_t<
+			!alp::is_object< DataType >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		return UNSUPPORTED;
+	}
 	/** @} */
 
 } // namespace alp

From 3490b9816307bc97e35cabc925efda57bb6eb2f7 Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Tue, 25 Oct 2022 10:26:29 +0200
Subject: [PATCH 19/23] Remove obsolete operation definitions from blas3 base

---
 include/alp/base/blas3.hpp | 119 -------------------------------------
 1 file changed, 119 deletions(-)

diff --git a/include/alp/base/blas3.hpp b/include/alp/base/blas3.hpp
index 534dd61d2..d86b04917 100644
--- a/include/alp/base/blas3.hpp
+++ b/include/alp/base/blas3.hpp
@@ -42,125 +42,6 @@ namespace alp {
 	 * @{
 	 */
 
-	/**
-	 * Unmaked sparse matrix--sparse matrix multiplication (SpMSpM).
-	 *
-	 * @tparam descr      The descriptors under which to perform the computation.
-	 * @tparam OutputType The type of elements in the output matrix.
-	 * @tparam InputType1 The type of elements in the left-hand side input
-	 *                    matrix.
-	 * @tparam InputType2 The type of elements in the right-hand side input
-	 *                    matrix.
-	 * @tparam Semiring   The semiring under which to perform the
-	 *                    multiplication.
-	 * @tparam Backend    The backend that should perform the computation.
-	 *
-	 * @returns SUCCESS If the computation completed as intended.
-	 * @returns FAILED  If the call was not not preceded by one to
-	 *                  #alp::resize( C, A, B ); \em and the current capacity of
-	 *                  \a C was insufficient to store the multiplication of \a A
-	 *                  and \a B. The contents of \a C shall be undefined (which
-	 *                  is why #FAILED is returned instead of #ILLEGAL-- this
-	 *                  error has side effects).
-	 *
-	 * @param[out] C The output matrix \f$ C = AB \f$ when the function returns
-	 *               #SUCCESS.
-	 * @param[in]  A The left-hand side input matrix \f$ A \f$.
-	 * @param[in]  B The left-hand side input matrix \f$ B \f$.
-	 *
-	 * @param[in] ring (Optional.) The semiring under which the computation should
-	 *                             proceed.
-	 */
-	template<
-		Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename InputType1, typename InputType2,
-		class Semiring,
-		Backend backend
-	>
-	RC mxm( internal::Matrix< OutputType, backend > &C,
-		const internal::Matrix< InputType1, backend > &A, const internal::Matrix< InputType2, backend > &B,
-		const Semiring &ring = Semiring(),
-		const PHASE &phase = NUMERICAL
-	) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::mxm (semiring version)\n";
- #endif
-#endif
-		(void)C;
-		(void)A;
-		(void)B;
-		(void)ring;
-		(void)phase;
-		// this is the generic stub implementation
-		return UNSUPPORTED;
-	}
-
-	/**
-	 * Interprets three vectors x, y, and z as a series of row coordinates,
-	 * column coordinates, and nonzeroes, respectively, and stores the thus
-	 * defined nonzeroes in a given output matrix A.
-	 *
-	 * If this function does not return SUCCESS, A will have been cleared.
-	 *
-	 * A must have been pre-allocated to store the nonzero pattern the three
-	 * given vectors x, y, and z encode, or ILLEGAL shall be returned.
-	 *
-	 * \note A call to this function hence must be preceded by a successful
-	 *       call to alp::resize( matrix, nnz );
-	 *
-	 * @param[out] A The output matrix
-	 * @param[in]  x A vector of row indices.
-	 * @param[in]  y A vector of column indices.
-	 * @param[in]  z A vector of nonzero values.
-	 *
-	 * If x, y, and z are sparse, they must have the exact same sparsity
-	 * structure.
-	 *
-	 * \par Descriptors
-	 *
-	 * None allowed.
-	 *
-	 * @returns SUCCESS  If A was constructed successfully.
-	 * @returns MISMATCH If y or z does not match the size of x.
-	 * @returns ILLEGAL  If y or z do not have the same number of nonzeroes
-	 *                   as x.
-	 * @returns ILLEGAL  If y or z has a different sparsity pattern from x.
-	 * @returns ILLEGAL  If the capacity of A was insufficient to store the
-	 *                   given sparsity pattern.
-	 *
-	 * @see alp::resize
-	 */
-	template< Descriptor descr = descriptors::no_operation, typename OutputType, typename InputType1, typename InputType2, typename InputType3, Backend backend >
-	RC zip( internal::Matrix< OutputType, backend > & A, const internal::Vector< InputType1, backend > & x, const internal::Vector< InputType2, backend > & y, const internal::Vector< InputType3, backend > & z ) {
-		(void)x;
-		(void)y;
-		(void)z;
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::zip (vectors into matrices, non-void)\n";
- #endif
-#endif
-		const RC ret = alp::clear( A );
-		return ret == SUCCESS ? UNSUPPORTED : ret;
-	}
-
-	/**
-	 * Specialisation of alp::zip for void output matrices.
-	 */
-	template< Descriptor descr = descriptors::no_operation, typename InputType1, typename InputType2, typename InputType3, Backend backend >
-	RC zip( internal::Matrix< void, backend > & A, const internal::Vector< InputType1, backend > & x, const internal::Vector< InputType2, backend > & y ) {
-		(void)x;
-		(void)y;
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::zip (vectors into matrices, void)\n";
- #endif
-#endif
-		const RC ret = alp::clear( A );
-		return ret == SUCCESS ? UNSUPPORTED : ret;
-	}
-
 	/**
 	 * @}
 	 */

From 424130e94216974cfba0411caa782b172814b258 Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Tue, 25 Oct 2022 10:43:34 +0200
Subject: [PATCH 20/23] Add base definitions of operations defined in ALP blas3
 files

---
 include/alp/base/blas3.hpp | 282 +++++++++++++++++++++++++++++++++++++
 1 file changed, 282 insertions(+)

diff --git a/include/alp/base/blas3.hpp b/include/alp/base/blas3.hpp
index d86b04917..52d797dbe 100644
--- a/include/alp/base/blas3.hpp
+++ b/include/alp/base/blas3.hpp
@@ -42,6 +42,288 @@ namespace alp {
 	 * @{
 	 */
 
+	/**
+	 * @brief Computes \f$ C = A . B \f$ for a given monoid.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class MulMonoid,
+		Backend backend
+	>
+	RC eWiseApply(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
+		const MulMonoid &mulmono,
+		const typename std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< MulMonoid >::value
+		> * const = nullptr
+	) {
+		(void) C;
+		(void) A;
+		(void) B;
+		(void) mulmono;
+		return UNSUPPORTED;
+	}
+
+
+	/**
+	 * Computes \f$ C = alpha . B \f$ for a given monoid.
+	 *
+	 * Case where \a A is a scalar.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class MulMonoid,
+		Backend backend
+	>
+	RC eWiseApply(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Scalar< InputType1, InputStructure1, backend > &alpha,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
+		const MulMonoid &mulmono,
+		const typename std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< MulMonoid >::value
+		> * const = nullptr
+	) {
+		(void) C;
+		(void) alpha;
+		(void) B;
+		(void) mulmono;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ C = A . beta \f$ for a given monoid.
+	 *
+	 * Case where \a B is a scalar.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2,
+		class MulMonoid,
+		Backend backend
+	>
+	RC eWiseApply(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
+		const Scalar< InputType2, InputStructure2, backend > &beta,
+		const MulMonoid &mulmono,
+		const typename std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< MulMonoid >::value
+		> * const = nullptr
+	) {
+		(void) C;
+		(void) A;
+		(void) beta;
+		(void) mulmono;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Calculates the element-wise multiplication of two matrices,
+	 *     \f$ C = C + A .* B \f$,
+	 * under a given semiring.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation, class Ring,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		Backend backend
+	>
+	RC eWiseMul(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
+		const Ring &ring = Ring(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_semiring< Ring >::value
+		> * const = nullptr
+	) {
+		(void) C;
+		(void) A;
+		(void) B;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * eWiseMul, version where A is a scalar.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation, class Ring,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		Backend backend
+	>
+	RC eWiseMul(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Scalar< InputType1, InputStructure1, backend > &alpha,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
+		const Ring &ring = Ring(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_semiring< Ring >::value
+		> * const = nullptr
+	) {
+		(void) C;
+		(void) alpha;
+		(void) B;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * eWiseMul, version where B is a scalar.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation, class Ring,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2,
+		Backend backend
+	>
+	RC eWiseMul(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
+		const Scalar< InputType2, InputStructure2, backend > &beta,
+		const Ring &ring = Ring(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_semiring< Ring >::value
+		> * const = nullptr
+	) {
+		(void) C;
+		(void) A;
+		(void) beta;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * @brief  Outer product of two vectors. The result matrix \a A will contain \f$ uv^T \f$.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class Operator,
+		Backend backend
+	>
+	RC outer(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &A,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &u,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &v,
+		const Operator &mul = Operator(),
+		const typename std::enable_if_t<
+			alp::is_operator< Operator >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			!alp::is_object< OutputType >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) u;
+		(void) v;
+		(void) mul;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Returns a view over the general rank-1 matrix computed with the outer product.
+	 * This avoids creating the resulting container. The elements are calculated lazily on access.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class Operator,
+		Backend backend
+	>
+	Matrix<
+		typename Operator::D3, structures::General, Density::Dense,
+		view::Functor< std::function< void( InputType1 &, const size_t, const size_t ) > >,
+		imf::Id, imf::Id,
+		backend
+	>
+	outer(
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const Operator &mul = Operator(),
+		const typename std::enable_if<
+			alp::is_operator< Operator >::value &&
+			! alp::is_object< InputType1 >::value &&
+			! alp::is_object< InputType2 >::value
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) y;
+		(void) mul;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Returns a view over the general rank-1 matrix computed with the outer product.
+	 * Version for the case when input vectors are the same vector,
+	 * which results in a symmetric matrix.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		class Operator,
+		Backend backend
+	>
+	Matrix<
+		typename Operator::D3,
+		typename std::conditional<
+			grb::utils::is_complex< typename Operator::D3 >::value,
+			alp::structures::Hermitian,
+			alp::structures::Symmetric
+		>::type,
+		Density::Dense,
+		view::Functor< std::function< void( typename Operator::D3 &, const size_t, const size_t ) > >,
+		imf::Id, imf::Id,
+		backend
+	>
+	outer(
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+		const Operator &mul = Operator(),
+		const typename std::enable_if_t<
+			alp::is_operator< Operator >::value &&
+			!alp::is_object< InputType >::value
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) mul;
+		return UNSUPPORTED;
+	}
 	/**
 	 * @}
 	 */

From 06b91224693429324b3ebf80bc82604f55ece432 Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Tue, 25 Oct 2022 10:48:01 +0200
Subject: [PATCH 21/23] Minor code style fixes in base/blas0

---
 include/alp/base/blas0.hpp | 52 ++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 19 deletions(-)

diff --git a/include/alp/base/blas0.hpp b/include/alp/base/blas0.hpp
index c0a214619..f093ee0fd 100644
--- a/include/alp/base/blas0.hpp
+++ b/include/alp/base/blas0.hpp
@@ -152,12 +152,12 @@ namespace alp {
 		const Scalar< InputType1, InputStructure1, implementation > &x,
 		const Scalar< InputType2, InputStructure2, implementation > &y,
 		const OP &op = OP(),
-		const typename std::enable_if<
+		const typename std::enable_if_t<
 			alp::is_operator< OP >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
-			!alp::is_object< OutputType >::value,
-		void >::type * = NULL
+			!alp::is_object< OutputType >::value
+		> * = nullptr
 	) {
 #ifdef _DEBUG
 		std::cerr << "Selected backend does not implement alp::apply (scalar)\n";
@@ -167,10 +167,10 @@ namespace alp {
 		assert( backend_does_not_support_scalar_apply );
 #endif
 
-		(void)out;
-		(void)x;
-		(void)y;
-		(void)op;
+		(void) out;
+		(void) x;
+		(void) y;
+		(void) op;
 
 		return UNSUPPORTED;
 	}
@@ -248,11 +248,18 @@ namespace alp {
 		class OP, 
 		typename InputType, typename InputStructure, 
 		typename IOType, typename IOStructure,
-		enum Backend implementation = config::default_backend >
-	RC foldr( const Scalar< InputType, InputStructure, implementation > &x,
+		enum Backend implementation = config::default_backend
+	>
+	RC foldr(
+		const Scalar< InputType, InputStructure, implementation > &x,
 		Scalar< IOType, IOStructure, implementation > &y,
 		const OP & op = OP(),
-		const typename std::enable_if< alp::is_operator< OP >::value && ! alp::is_object< InputType >::value && ! alp::is_object< IOType >::value, void >::type * = NULL ) {
+		const typename std::enable_if_t<
+			alp::is_operator< OP >::value &&
+			! alp::is_object< InputType >::value &&
+			! alp::is_object< IOType >::value
+		> * = nullptr
+	) {
 
 #ifdef _DEBUG
 		std::cerr << "Selected backend does not implement alp::foldr (scalar)\n";
@@ -262,9 +269,9 @@ namespace alp {
 		assert( backend_does_not_support_scalar_foldr );
 #endif
 		
-		(void)x;
-		(void)y;
-		(void)op;
+		(void) x;
+		(void) y;
+		(void) op;
 
 		return UNSUPPORTED;
 	}
@@ -342,11 +349,18 @@ namespace alp {
 		class OP, 
 		typename InputType, typename InputStructure, 
 		typename IOType, typename IOStructure,
-		enum Backend implementation = config::default_backend >
-	RC foldl( Scalar< IOType, IOStructure, implementation > &x,
+		enum Backend implementation = config::default_backend
+	>
+	RC foldl(
+		Scalar< IOType, IOStructure, implementation > &x,
 		const Scalar< InputType, InputStructure, implementation > &y,
 		const OP & op = OP(),
-		const typename std::enable_if< alp::is_operator< OP >::value && ! alp::is_object< InputType >::value && ! alp::is_object< IOType >::value, void >::type * = NULL ) {
+		const typename std::enable_if_t<
+			alp::is_operator< OP >::value &&
+			! alp::is_object< InputType >::value &&
+			! alp::is_object< IOType >::value
+		> * = nullptr
+	) {
 
 #ifdef _DEBUG
 		std::cerr << "Selected backend does not implement alp::foldl (scalar)\n";
@@ -356,9 +370,9 @@ namespace alp {
 		assert( backend_does_not_support_scalar_foldl );
 #endif
 
-		(void)x;
-		(void)y;
-		(void)op;
+		(void) x;
+		(void) y;
+		(void) op;
 
 		return UNSUPPORTED;
 	}

From 8e4fb4d1b80c505f3f6ba0362bea32933fe3a24b Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Tue, 25 Oct 2022 11:23:22 +0200
Subject: [PATCH 22/23] Minor code style fixes

---
 include/alp/base/blas0.hpp      |  6 ++---
 include/alp/base/blas1.hpp      | 46 ++++++++++++++++-----------------
 include/alp/base/blas2.hpp      |  8 +++---
 include/alp/base/blas3.hpp      | 10 +++----
 include/alp/base/io.hpp         |  4 +--
 include/alp/reference/blas1.hpp |  4 +--
 include/alp/reference/blas2.hpp | 10 +++----
 include/alp/reference/blas3.hpp |  8 +++---
 8 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/include/alp/base/blas0.hpp b/include/alp/base/blas0.hpp
index f093ee0fd..e2d83ecbb 100644
--- a/include/alp/base/blas0.hpp
+++ b/include/alp/base/blas0.hpp
@@ -152,7 +152,7 @@ namespace alp {
 		const Scalar< InputType1, InputStructure1, implementation > &x,
 		const Scalar< InputType2, InputStructure2, implementation > &y,
 		const OP &op = OP(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			alp::is_operator< OP >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -254,7 +254,7 @@ namespace alp {
 		const Scalar< InputType, InputStructure, implementation > &x,
 		Scalar< IOType, IOStructure, implementation > &y,
 		const OP & op = OP(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			alp::is_operator< OP >::value &&
 			! alp::is_object< InputType >::value &&
 			! alp::is_object< IOType >::value
@@ -355,7 +355,7 @@ namespace alp {
 		Scalar< IOType, IOStructure, implementation > &x,
 		const Scalar< InputType, InputStructure, implementation > &y,
 		const OP & op = OP(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			alp::is_operator< OP >::value &&
 			! alp::is_object< InputType >::value &&
 			! alp::is_object< IOType >::value
diff --git a/include/alp/base/blas1.hpp b/include/alp/base/blas1.hpp
index e5cd6f90d..e0d76ee84 100644
--- a/include/alp/base/blas1.hpp
+++ b/include/alp/base/blas1.hpp
@@ -267,7 +267,7 @@ namespace alp {
 		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR, InputImfC, backend > &x,
 		const Scalar< InputType2, InputStructure2, backend > &beta,
 		const OP &op = OP(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -299,7 +299,7 @@ namespace alp {
 		const Scalar< InputType1, InputStructure1, backend> &alpha,
 		const Scalar< InputType2, InputStructure2, backend> &beta,
 		const OP &op = OP(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -331,7 +331,7 @@ namespace alp {
 		const Scalar< InputType1, InputStructure1, backend> &alpha,
 		const Scalar< InputType2, InputStructure2, backend> &beta,
 		const Monoid &monoid = Monoid(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -363,7 +363,7 @@ namespace alp {
 		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
 		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
 		const Monoid &monoid = Monoid(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			! alp::is_object< OutputType >::value &&
 			! alp::is_object< InputType1 >::value &&
 			! alp::is_object< InputType2 >::value &&
@@ -395,7 +395,7 @@ namespace alp {
 		const Scalar< InputType1, InputStructure1, backend> &alpha,
 		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
 		const Monoid &monoid = Monoid(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -458,7 +458,7 @@ namespace alp {
 		const Scalar< InputType1, InputStructure1, backend > &alpha,
 		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
 		const OP &op = OP(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -489,7 +489,7 @@ namespace alp {
 		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
 		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
 		const OP &op = OP(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -521,10 +521,10 @@ namespace alp {
 		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
 		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
 		const Ring &ring = Ring(),
-		const typename std::enable_if_t<
-			! alp::is_object< OutputType >::value &&
-			! alp::is_object< InputType1 >::value &&
-			! alp::is_object< InputType2 >::value &&
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
 			alp::is_semiring< Ring >::value
 		> * const = nullptr
 	) {
@@ -553,10 +553,10 @@ namespace alp {
 		const Scalar< InputType1, InputStructure1, backend > &alpha,
 		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
 		const Ring &ring = Ring(),
-		const typename std::enable_if_t<
-			! alp::is_object< OutputType >::value &&
-			! alp::is_object< InputType1 >::value &&
-			! alp::is_object< InputType2 >::value &&
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
 			alp::is_semiring< Ring >::value
 		> * const = nullptr
 	) {
@@ -584,7 +584,7 @@ namespace alp {
 		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
 		const Scalar< InputType2, InputStructure2, backend > &beta,
 		const Ring &ring = Ring(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -615,7 +615,7 @@ namespace alp {
 		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
 		const AddMonoid &addMonoid = AddMonoid(),
 		const AnyOp &anyOp = AnyOp(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -645,7 +645,7 @@ namespace alp {
 		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
 		const AddMonoid &addMonoid = AddMonoid(),
 		const AnyOp &anyOp = AnyOp(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -674,7 +674,7 @@ namespace alp {
 		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &left,
 		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &right,
 		const Ring &ring = Ring(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
 			!alp::is_object< IOType >::value &&
@@ -701,7 +701,7 @@ namespace alp {
 		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &left,
 		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &right,
 		const Ring &ring = Ring(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
 			!alp::is_object< IOType >::value &&
@@ -785,7 +785,7 @@ namespace alp {
 		Scalar< IOType, IOStructure, backend > &alpha,
 		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
 		const Monoid &monoid = Monoid(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			! alp::is_object< IOType >::value && ! alp::is_object< InputType >::value && alp::is_monoid< Monoid >::value
 		> * const = nullptr
 	) {
@@ -829,7 +829,7 @@ namespace alp {
 		Scalar< OutputType, OutputStructure, backend > &x,
 		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
 		const Ring &ring = Ring(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			std::is_floating_point< OutputType >::value || grb::utils::is_complex< OutputType >::value
 		> * const = nullptr
 	) {
@@ -851,7 +851,7 @@ namespace alp {
 		OutputType &x,
 		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
 		const Ring &ring = Ring(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			std::is_floating_point< OutputType >::value || grb::utils::is_complex< OutputType >::value
 		> * const = nullptr
 	) {
diff --git a/include/alp/base/blas2.hpp b/include/alp/base/blas2.hpp
index c0f467aa8..2b90ca671 100644
--- a/include/alp/base/blas2.hpp
+++ b/include/alp/base/blas2.hpp
@@ -72,7 +72,7 @@ namespace alp {
 		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
 		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
 		const Ring &ring = Ring(),
-		const typename std::enable_if_t< alp::is_semiring< Ring >::value > * const = nullptr
+		const std::enable_if_t< alp::is_semiring< Ring >::value > * const = nullptr
 	) {
 		(void) u;
 		(void) v;
@@ -97,7 +97,7 @@ namespace alp {
 		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
 		const AdditiveMonoid &add = AdditiveMonoid(),
 		const MultiplicativeOperator &mul = MultiplicativeOperator(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			alp::is_monoid< AdditiveMonoid >::value &&
 			alp::is_operator< MultiplicativeOperator >::value &&
 			!alp::is_object< IOType >::value &&
@@ -130,7 +130,7 @@ namespace alp {
 		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
 		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
 		const Ring &ring,
-		const typename std::enable_if_t< alp::is_semiring< Ring >::value > * const = nullptr
+		const std::enable_if_t< alp::is_semiring< Ring >::value > * const = nullptr
 	) {
 		(void) u;
 		(void) A;
@@ -156,7 +156,7 @@ namespace alp {
 		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
 		const AdditiveMonoid &add = AdditiveMonoid(),
 		const MultiplicativeOperator &mul = MultiplicativeOperator(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			alp::is_monoid< AdditiveMonoid >::value &&
 			alp::is_operator< MultiplicativeOperator >::value &&
 			!alp::is_object< IOType >::value &&
diff --git a/include/alp/base/blas3.hpp b/include/alp/base/blas3.hpp
index 52d797dbe..b6501d5c2 100644
--- a/include/alp/base/blas3.hpp
+++ b/include/alp/base/blas3.hpp
@@ -58,7 +58,7 @@ namespace alp {
 		const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
 		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
 		const MulMonoid &mulmono,
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -91,7 +91,7 @@ namespace alp {
 		const Scalar< InputType1, InputStructure1, backend > &alpha,
 		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
 		const MulMonoid &mulmono,
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -123,7 +123,7 @@ namespace alp {
 		const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
 		const Scalar< InputType2, InputStructure2, backend > &beta,
 		const MulMonoid &mulmono,
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -242,7 +242,7 @@ namespace alp {
 		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &u,
 		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &v,
 		const Operator &mul = Operator(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			alp::is_operator< Operator >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
@@ -315,7 +315,7 @@ namespace alp {
 	outer(
 		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
 		const Operator &mul = Operator(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			alp::is_operator< Operator >::value &&
 			!alp::is_object< InputType >::value
 		> * const = nullptr
diff --git a/include/alp/base/io.hpp b/include/alp/base/io.hpp
index 5ae91018c..7ac808be3 100644
--- a/include/alp/base/io.hpp
+++ b/include/alp/base/io.hpp
@@ -188,7 +188,7 @@ namespace alp {
 	RC set(
 		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
 		const Scalar< T, ValStructure, backend > val,
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< DataType >::value &&
 			!alp::is_object< T >::value
 		> * const = nullptr
@@ -212,7 +212,7 @@ namespace alp {
 		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
 		const Scalar< T, ValStructure, backend > val,
 		const size_t i,
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			!alp::is_object< DataType >::value &&
 			!alp::is_object< T >::value
 		> * const = nullptr
diff --git a/include/alp/reference/blas1.hpp b/include/alp/reference/blas1.hpp
index af7b7abc5..cd8a9f099 100644
--- a/include/alp/reference/blas1.hpp
+++ b/include/alp/reference/blas1.hpp
@@ -1838,7 +1838,7 @@ namespace alp {
 		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, reference > &y,
 		const AddMonoid &addMonoid = AddMonoid(),
 		const AnyOp &anyOp = AnyOp(),
-		const typename std::enable_if_t< !alp::is_object< OutputType >::value &&
+		const std::enable_if_t< !alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
 			alp::is_monoid< AddMonoid >::value &&
@@ -2175,7 +2175,7 @@ namespace alp {
 		Scalar< IOType, IOStructure, reference > &alpha,
 		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &y,
 		const Monoid &monoid = Monoid(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			! alp::is_object< IOType >::value && ! alp::is_object< InputType >::value && alp::is_monoid< Monoid >::value
 		> * const = nullptr
 	) {
diff --git a/include/alp/reference/blas2.hpp b/include/alp/reference/blas2.hpp
index dd3c6aa00..536821212 100644
--- a/include/alp/reference/blas2.hpp
+++ b/include/alp/reference/blas2.hpp
@@ -306,7 +306,7 @@ namespace alp {
 		template<
 			size_t BandIndex, typename Func,
 			typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				BandIndex >= std::tuple_size< typename Structure::band_intervals >::value
 			> * = nullptr
 		>
@@ -319,7 +319,7 @@ namespace alp {
 		template<
 			size_t BandIndex, typename Func,
 			typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				BandIndex >= std::tuple_size< typename Structure::band_intervals >::value
 			> * = nullptr
 		>
@@ -341,7 +341,7 @@ namespace alp {
 		template<
 			size_t band_index, typename Func,
 			typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index < std::tuple_size< typename Structure::band_intervals >::value
 			> * = nullptr
 		>
@@ -435,7 +435,7 @@ namespace alp {
 			typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
 			typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
 			typename InputTypeScalar, typename InputStructureScalar,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index >= std::tuple_size< typename IOStructure::band_intervals >::value
 			> * = nullptr
 		>
@@ -467,7 +467,7 @@ namespace alp {
 			typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
 			typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
 			typename InputTypeScalar, typename InputStructureScalar,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index < std::tuple_size< typename IOStructure::band_intervals >::value
 			> * = nullptr
 		>
diff --git a/include/alp/reference/blas3.hpp b/include/alp/reference/blas3.hpp
index 30af8de39..cbbce6b38 100644
--- a/include/alp/reference/blas3.hpp
+++ b/include/alp/reference/blas3.hpp
@@ -541,7 +541,7 @@ namespace alp {
 			typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
 			typename InputTypeScalar2, typename InputStructureScalar2,
 			class Operator,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index >= std::tuple_size< typename OutputStructure::band_intervals >::value
 			> * = nullptr
 		>
@@ -582,7 +582,7 @@ namespace alp {
 			typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
 			typename InputTypeScalar2, typename InputStructureScalar2,
 			class Operator,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index < std::tuple_size< typename OutputStructure::band_intervals >::value
 			> * = nullptr
 		>
@@ -931,7 +931,7 @@ namespace alp {
 			typename InputTypeScalar1, typename InputStructureScalar1,
 			typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
 			typename InputTypeScalar2, typename InputStructureScalar2,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index >= std::tuple_size< typename OutputStructure::band_intervals >::value
 			> * = nullptr
 		>
@@ -969,7 +969,7 @@ namespace alp {
 			typename InputTypeScalar1, typename InputStructureScalar1,
 			typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
 			typename InputTypeScalar2, typename InputStructureScalar2,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index < std::tuple_size< typename OutputStructure::band_intervals >::value
 			> * = nullptr
 		>

From d4b777d8f00e4f56b9447ff04e22fb1b13ca0dee Mon Sep 17 00:00:00 2001
From: Vladimir Dimic <vladimir.dimic@huawei.com>
Date: Tue, 25 Oct 2022 13:37:06 +0200
Subject: [PATCH 23/23] Add asserts in base functions without an RC return
 value

---
 include/alp/base/io.hpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/alp/base/io.hpp b/include/alp/base/io.hpp
index 7ac808be3..2c1cf14c2 100644
--- a/include/alp/base/io.hpp
+++ b/include/alp/base/io.hpp
@@ -64,6 +64,11 @@ namespace alp {
 	size_t size(
 		const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x
 	) noexcept {
+
+#ifndef NDEBUG
+		const bool selected_backend_does_not_support_size_for_vector = false;
+		assert( selected_backend_does_not_support_size_for_vector );
+#endif
 		(void) x;
 		return SIZE_MAX;
 	}
@@ -78,6 +83,11 @@ namespace alp {
 	size_t nnz(
 		const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x
 	) noexcept {
+
+#ifndef NDEBUG
+		const bool selected_backend_does_not_support_nnz_for_vector = false;
+		assert( selected_backend_does_not_support_nnz_for_vector );
+#endif
 		(void) x;
 		return SIZE_MAX;
 	}
@@ -105,6 +115,11 @@ namespace alp {
 	size_t nnz(
 		const Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, reference > &A
 	) noexcept {
+
+#ifndef NDEBUG
+		const bool selected_backend_does_not_support_nnz_for_matrix = false;
+		assert( selected_backend_does_not_support_nnz_for_matrix );
+#endif
 		(void) A;
 		return SIZE_MAX;
 	}