diff --git a/include/alp/base/blas0.hpp b/include/alp/base/blas0.hpp
index c0a214619..e2d83ecbb 100644
--- a/include/alp/base/blas0.hpp
+++ b/include/alp/base/blas0.hpp
@@ -152,12 +152,12 @@ namespace alp {
 		const Scalar< InputType1, InputStructure1, implementation > &x,
 		const Scalar< InputType2, InputStructure2, implementation > &y,
 		const OP &op = OP(),
-		const typename std::enable_if<
+		const std::enable_if_t<
 			alp::is_operator< OP >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
-			!alp::is_object< OutputType >::value,
-		void >::type * = NULL
+			!alp::is_object< OutputType >::value
+		> * = nullptr
 	) {
 #ifdef _DEBUG
 		std::cerr << "Selected backend does not implement alp::apply (scalar)\n";
@@ -167,10 +167,10 @@ namespace alp {
 		assert( backend_does_not_support_scalar_apply );
 #endif
 
-		(void)out;
-		(void)x;
-		(void)y;
-		(void)op;
+		(void) out;
+		(void) x;
+		(void) y;
+		(void) op;
 
 		return UNSUPPORTED;
 	}
@@ -248,11 +248,18 @@ namespace alp {
 		class OP, 
 		typename InputType, typename InputStructure, 
 		typename IOType, typename IOStructure,
-		enum Backend implementation = config::default_backend >
-	RC foldr( const Scalar< InputType, InputStructure, implementation > &x,
+		enum Backend implementation = config::default_backend
+	>
+	RC foldr(
+		const Scalar< InputType, InputStructure, implementation > &x,
 		Scalar< IOType, IOStructure, implementation > &y,
 		const OP & op = OP(),
-		const typename std::enable_if< alp::is_operator< OP >::value && ! alp::is_object< InputType >::value && ! alp::is_object< IOType >::value, void >::type * = NULL ) {
+		const std::enable_if_t<
+			alp::is_operator< OP >::value &&
+			! alp::is_object< InputType >::value &&
+			! alp::is_object< IOType >::value
+		> * = nullptr
+	) {
 
 #ifdef _DEBUG
 		std::cerr << "Selected backend does not implement alp::foldr (scalar)\n";
@@ -262,9 +269,9 @@ namespace alp {
 		assert( backend_does_not_support_scalar_foldr );
 #endif
 		
-		(void)x;
-		(void)y;
-		(void)op;
+		(void) x;
+		(void) y;
+		(void) op;
 
 		return UNSUPPORTED;
 	}
@@ -342,11 +349,18 @@ namespace alp {
 		class OP, 
 		typename InputType, typename InputStructure, 
 		typename IOType, typename IOStructure,
-		enum Backend implementation = config::default_backend >
-	RC foldl( Scalar< IOType, IOStructure, implementation > &x,
+		enum Backend implementation = config::default_backend
+	>
+	RC foldl(
+		Scalar< IOType, IOStructure, implementation > &x,
 		const Scalar< InputType, InputStructure, implementation > &y,
 		const OP & op = OP(),
-		const typename std::enable_if< alp::is_operator< OP >::value && ! alp::is_object< InputType >::value && ! alp::is_object< IOType >::value, void >::type * = NULL ) {
+		const std::enable_if_t<
+			alp::is_operator< OP >::value &&
+			! alp::is_object< InputType >::value &&
+			! alp::is_object< IOType >::value
+		> * = nullptr
+	) {
 
 #ifdef _DEBUG
 		std::cerr << "Selected backend does not implement alp::foldl (scalar)\n";
@@ -356,9 +370,9 @@ namespace alp {
 		assert( backend_does_not_support_scalar_foldl );
 #endif
 
-		(void)x;
-		(void)y;
-		(void)op;
+		(void) x;
+		(void) y;
+		(void) op;
 
 		return UNSUPPORTED;
 	}
diff --git a/include/alp/base/blas1.hpp b/include/alp/base/blas1.hpp
new file mode 100644
index 000000000..e0d76ee84
--- /dev/null
+++ b/include/alp/base/blas1.hpp
@@ -0,0 +1,869 @@
+
+/*
+ *   Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 5th of December 2016
+ */
+
+#ifndef _H_ALP_BASE_BLAS1
+#define _H_ALP_BASE_BLAS1
+
+#include <alp/rc.hpp>
+#include <alp/ops.hpp>
+#include <alp/phase.hpp>
+#include <alp/monoid.hpp>
+#include <alp/backends.hpp>
+#include <alp/semiring.hpp>
+#include <alp/descriptors.hpp>
+#include <alp/internalops.hpp>
+
+namespace alp {
+
+	/**
+	 * \defgroup BLAS1 The Level-1 ALP/GraphBLAS routines
+	 * @{
+	 */
+
+	/**
+	 * Folds all elements in a ALP Vector \a x into a single value \a beta.
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure,
+		class Monoid,
+		Backend backend
+	>
+	RC foldr(
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+		Scalar< IOType, IOStructure, backend > &beta,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value && !alp::is_object< IOType >::value && alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) beta;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/** C++ scalar variant */
+	template< Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType,
+		class Monoid,
+		Backend backend
+	>
+	RC foldr(
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+		IOType &beta,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value && !alp::is_object< IOType >::value && alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		return foldr( x, Scalar< IOType >( beta ), monoid );
+	}
+
+	/**
+	 * For all elements in a ALP Vector \a y, fold the value \f$ \alpha \f$
+	 * into each element.
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldr(
+		const Scalar< InputType, InputStructure, backend > &alpha,
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &y,
+		const Monoid & monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value && !alp::is_object< IOType >::value && alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) alpha;
+		(void) y;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes y = x + y, operator variant.
+	 *
+	 * Specialisation for scalar \a x.
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class OP, Backend backend
+	>
+	RC foldr(
+		const Scalar< InputType, InputStructure, backend > &alpha,
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &y,
+		const OP & op = OP(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value && ! alp::is_object< IOType >::value && alp::is_operator< OP >::value
+		> * const = nullptr
+	) {
+		(void) alhpa;
+		(void) y;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Folds all elements in a ALP Vector \a x into the corresponding
+	 * elements from an input/output vector \a y.
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class OP, Backend backend
+	>
+	RC foldr(
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &y,
+		const OP & op = OP(),
+		const std::enable_if_t<
+			alp::is_operator< OP >::value && ! alp::is_object< InputType >::value && ! alp::is_object< IOType >::value
+		> * = nullptr
+	) {
+		(void) x;
+		(void) y;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Folds all elements in a ALP Vector \a x into the corresponding
+	 * elements from an input/output vector \a y.
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldr(
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &y,
+		const Monoid & monoid = Monoid(),
+		const std::enable_if_t<
+			alp::is_monoid< Monoid >::value && ! alp::is_object< InputType >::value && ! alp::is_object< IOType >::value
+		> * = nullptr
+	) {
+		(void) x;
+		(void) y;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * For all elements in a ALP Vector \a x, fold the value \f$ \beta \f$
+	 * into each element.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		typename InputType, typename InputStructure,
+		class Op,
+		Backend backend
+	>
+	RC foldl(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &x,
+		const Scalar< InputType, InputStructure, backend > beta,
+		const Op &op = Op(),
+		const std::enable_if_t<
+			! alp::is_object< IOType >::value && ! alp::is_object< InputType >::value && alp::is_operator< Op >::value
+		> * = nullptr
+	) {
+		(void) x;
+		(void) beta;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Folds all elements in a ALP Vector \a y into the corresponding
+	 * elements from an input/output vector \a x.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		class OP,
+		Backend backend
+	>
+	RC foldl(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &x,
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
+		const OP &op = OP(),
+		const std::enable_if_t<
+			alp::is_operator< OP >::value && !alp::is_object< IOType >::value && !alp::is_object< InputType >::value
+		> * = nullptr
+	) {
+		(void) x;
+		(void) y;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Folds all elements in a ALP Vector \a y into the corresponding
+	 * elements from an input/output vector \a x.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldl(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &x,
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			alp::is_monoid< Monoid >::value && ! alp::is_object< IOType >::value && ! alp::is_object< InputType >::value
+		  > * = nullptr
+		) {
+		(void) x;
+		(void) y;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Calculates the element-wise operation on one scalar to elements of one
+	 * vector, \f$ z = x .* \beta \f$, using the given operator.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR, typename InputImfC,
+		typename InputType2, typename InputStructure2,
+		class OP,
+		Backend backend
+	>
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR, InputImfC, backend > &x,
+		const Scalar< InputType2, InputStructure2, backend > &beta,
+		const OP &op = OP(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_operator< OP >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) x;
+		(void) beta;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ z = x \odot y \f$, out of place.
+	 *
+	 * Specialisation for \a x and \a y scalar, operator version.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2,
+		class OP,
+		Backend backend
+	>
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Scalar< InputType1, InputStructure1, backend> &alpha,
+		const Scalar< InputType2, InputStructure2, backend> &beta,
+		const OP &op = OP(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_operator< OP >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) alpha;
+		(void) beta;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ z = x \odot y \f$, out of place.
+	 *
+	 * Specialisation for \a x and \a y scalar, monoid version.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2,
+		class Monoid,
+		Backend backend
+	>
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Scalar< InputType1, InputStructure1, backend> &alpha,
+		const Scalar< InputType2, InputStructure2, backend> &beta,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) alhpa;
+		(void) beta;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ z = x \odot y \f$, out of place.
+	 *
+	 * Monoid version.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class Monoid,
+		Backend backend
+	>
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			! alp::is_object< OutputType >::value &&
+			! alp::is_object< InputType1 >::value &&
+			! alp::is_object< InputType2 >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) x;
+		(void) y;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ z = x \odot y \f$, out of place.
+	 *
+	 * Specialisation for scalar \a x. Monoid version.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class Monoid,
+		Backend backend
+	>
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Scalar< InputType1, InputStructure1, backend> &alpha,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) alhpa;
+		(void) y;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ z = x \odot y \f$, out of place.
+	 *
+	 * Specialisation for scalar \a y. Monoid version.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2,
+		class Monoid,
+		Backend backend
+	>
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Scalar< InputType2, InputStructure2, backend > &beta,
+		const Monoid &monoid = Monoid(),
+		const typename std::enable_if<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) x;
+		(void) beta;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Calculates the element-wise operation on one scalar to elements of one
+	 * vector, \f$ z = \alpha .* y \f$, using the given operator.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class OP,
+		Backend backend
+	>
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Scalar< InputType1, InputStructure1, backend > &alpha,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const OP &op = OP(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_operator< OP >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) alpha;
+		(void) y;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Calculates the element-wise operation on elements of two vectors,
+	 * \f$ z = x .* y \f$, using the given operator.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class OP,
+		Backend backend
+	>
+	RC eWiseApply(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const OP &op = OP(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_operator< OP >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) x;
+		(void) y;
+		(void) op;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Calculates the element-wise multiplication of two vectors,
+	 *     \f$ z = z + x .* y \f$,
+	 * under a given semiring.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class Ring,
+		Backend backend
+	>
+	RC eWiseMul(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const Ring &ring = Ring(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_semiring< Ring >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) x;
+		(void) y;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ z = z + x * y \f$.
+	 *
+	 * Specialisation for scalar \a x.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class Ring,
+		Backend backend
+	>
+	RC eWiseMul(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Scalar< InputType1, InputStructure1, backend > &alpha,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const Ring &ring = Ring(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_semiring< Ring >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) alpha;
+		(void) y;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ z = z + x * y \f$.
+	 *
+	 * Specialisation for scalar \a y.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2,
+		class Ring
+	>
+	RC eWiseMul(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Scalar< InputType2, InputStructure2, backend > &beta,
+		const Ring &ring = Ring(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_semiring< Ring >::value
+		> * const = nullptr
+	) {
+		(void) z;
+		(void) x;
+		(void) beta;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Calculates the dot product, \f$ \alpha = (x,y) \f$, under a given additive
+	 * monoid and multiplicative operator.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class AddMonoid, class AnyOp
+	>
+	RC dot(
+		Scalar< OutputType, OutputStructure, backend > &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const AddMonoid &addMonoid = AddMonoid(),
+		const AnyOp &anyOp = AnyOp(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< AddMonoid >::value &&
+			alp::is_operator< AnyOp >::value
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) y;
+		(void) addMonoid;
+		(void) anyOp;
+		return UNSUPPORTED;
+	}
+
+	/** C++ scalar specialization */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class AddMonoid, class AnyOp,
+		Backend backend
+	>
+	RC dot(
+		OutputType &z,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const AddMonoid &addMonoid = AddMonoid(),
+		const AnyOp &anyOp = AnyOp(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< AddMonoid >::value &&
+			alp::is_operator< AnyOp >::value
+		>::type * const = nullptr
+	) {
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Provides a generic implementation of the dot computation on semirings by
+	 * translating it into a dot computation on an additive commutative monoid
+	 * with any multiplicative operator.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class Ring,
+		Backend backend
+	>
+	RC dot(
+		Scalar< IOType, IOStructure, backend > &x,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &left,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &right,
+		const Ring &ring = Ring(),
+		const std::enable_if_t<
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			!alp::is_object< IOType >::value &&
+			alp::is_semiring< Ring >::value,
+		> * const = nullptr
+	) {
+		return alp::dot< descr >( x,
+			left, right,
+			ring.getAdditiveMonoid(),
+			ring.getMultiplicativeOperator()
+		);
+	}
+
+	/** C++ scalar specialization. */
+	template<
+		Descriptor descr = descriptors::no_operation, class Ring,
+		typename IOType,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		Backend backend
+	>
+	RC dot(
+		IOType &x,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &left,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &right,
+		const Ring &ring = Ring(),
+		const std::enable_if_t<
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			!alp::is_object< IOType >::value &&
+			alp::is_semiring< Ring >::value,
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) left;
+		(void) right;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * This is the eWiseLambda that performs length checking by recursion.
+	 *
+	 * in the backend implementation all vectors are distributed equally, so no
+	 * need to synchronise any data structures. We do need to do error checking
+	 * though, to see when to return alp::MISMATCH. That's this function.
+	 *
+	 * @see Vector::operator[]()
+	 * @see Vector::lambda_backend
+	 */
+	template<
+		typename Func,
+		typename DataType1, typename DataStructure1, typename DataView1, typename InputImfR1, typename InputImfC1,
+		typename DataType2, typename DataStructure2, typename DataView2, typename InputImfR2, typename InputImfC2,
+		Backend backend,
+		typename... Args
+	>
+	RC eWiseLambda(
+		const Func f,
+		Vector< DataType1, DataStructure1, Density::Dense, DataView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< DataType2, DataStructure2, Density::Dense, DataView2, InputImfR2, InputImfC2, backend > &y,
+		Args const &... args
+	) {
+		(void) f;
+		(void) x;
+		(void) y;
+		(void) args;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * No implementation notes. This is the `real' implementation on backend
+	 * vectors.
+	 *
+	 * @see Vector::operator[]()
+	 * @see Vector::lambda_backend
+	 */
+	template<
+		typename Func,
+		typename DataType, typename DataStructure, typename DataView, typename DataImfR, typename DataImfC,
+		Backend backend
+	>
+	RC eWiseLambda(
+		const Func f,
+		Vector< DataType, DataStructure, Density::Dense, DataView, DataImfR, DataImfC, backend > &x
+	) {
+		(void) f;
+		(void) x;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Reduces a vector into a scalar. Reduction takes place according a monoid
+	 * \f$ (\oplus,1) \f$, where \f$ \oplus:\ D_1 \times D_2 \to D_3 \f$ with an
+	 * associated identity \f$ 1 \in \{D_1,D_2,D_3\} \f$. Elements from the given
+	 * vector \f$ y \in \{D_1,D_2\} \f$ will be applied at the left-hand or right-
+	 * hand side of \f$ \oplus \f$; which, exactly, is implementation-dependent
+	 * but should not matter since \f$ \oplus \f$ should be associative.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldl(
+		Scalar< IOType, IOStructure, backend > &alpha,
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			! alp::is_object< IOType >::value && ! alp::is_object< InputType >::value && alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) alpha;
+		(void) y;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Sort vectors, function available to user, e.g. to sort eigenvectors
+	 */
+	template<
+		typename IndexType, typename IndexStructure, typename IndexView, typename IndexImfR, typename IndexImfC,
+		typename ValueType, typename ValueStructure, typename ValueView, typename ValueImfR, typename ValueImfC,
+		typename Compare,
+		Backend backend
+	>
+	RC sort(
+		Vector< IndexType, IndexStructure, Density::Dense, IndexView, IndexImfR, IndexImfC, backend > &permutation,
+		const Vector< ValueType, ValueStructure, Density::Dense, ValueView, ValueImfR, ValueImfC, backend > &toSort,
+		Compare cmp
+	) noexcept {
+		(void) permutation;
+		(void) toSort;
+		(void) cmp
+		return UNSUPPORTED;
+	}
+
+    /**
+	 * Provides a generic implementation of the 2-norm computation.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		class Ring,
+		Backend backend
+	>
+	RC norm2(
+		Scalar< OutputType, OutputStructure, backend > &x,
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
+		const Ring &ring = Ring(),
+		const std::enable_if_t<
+			std::is_floating_point< OutputType >::value || grb::utils::is_complex< OutputType >::value
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) y;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/** C++ scalar version */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		class Ring,
+		Backend backend
+	>
+	RC norm2(
+		OutputType &x,
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
+		const Ring &ring = Ring(),
+		const std::enable_if_t<
+			std::is_floating_point< OutputType >::value || grb::utils::is_complex< OutputType >::value
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) y;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/** @} */
+
+} // end namespace alp
+
+#endif // end _H_ALP_BASE_BLAS1
+
diff --git a/include/alp/base/blas2.hpp b/include/alp/base/blas2.hpp
index c1b87765e..2b90ca671 100644
--- a/include/alp/base/blas2.hpp
+++ b/include/alp/base/blas2.hpp
@@ -56,593 +56,415 @@ namespace alp {
 	 * @{
 	 */
 
-	/**
-	 * Right-handed sparse matrix times vector multiplication, \f$ u = Av \f$.
-	 *
-	 * Let \f$ u \f$ and \f$ \mathit{mask} \f$ each be a #alp::Vector of #alp::size
-	 * \f$ m \f$, \f$ v \f$ be a #alp::Vector of #alp::size \f$ n \f$, and let
-	 * \f$ A \f$ be a #Matrix with #alp::nrows \f$ m \f$ and #alp::ncols \f$ n \f$.
-	 * Let furthermore \f$ z \f$ be an interal vector of size \f$ m \f$.
-	 * A call to this function first computes \f$ z = Av \f$ over the provided
-	 * \a ring. It then left-folds \f$ z \f$ into \f$ u \f$ using the provided
-	 * \a accumulator.
-	 *
-	 * @see Vector for an in-depth description of a GraphBLAS vector.
-	 * @see size   for retrieving the length of a given GraphBLAS vector.
-	 * @see Matrix for an in-depth description of a GraphBLAS matrix.
-	 * @see nrows  for retrieving the number of rows of a given GraphBLAS matrix.
-	 * @see ncols  for retrieving the number of columns of a given GraphBLAS
-	 *             vector.
-	 *
-	 * Formally, the exact operation executed is
-	 *  \f$ u_i^\mathit{out} = u_i^\mathit{in} \bigodot z_i, \f$
-	 * for all \f$ i \in \{ 0, 1, \ldots, m-1 \} \f$ for which
-	 * \f$ \mathit{mask}_i \f$ evaluates <tt>true</tt>. If there is a nonzero at
-	 * \f$ z_i \f$ but no nonzero at \f$ u_i^\mathit{in} \f$ then the latter is interpreted as the additive
-	 * identity \f$ \mathbf{0} \f$ of the given \a ring.
-	 * For \f$ z \f$, we formally have:
-	 *  \f$ z_i = \bigoplus{i=0}^{m-1} \left( A_{ij} \bigotimes v_j \right), \f$
-	 * where \f$ \bigodot \f$ represents the \a accumulator, \f$ \bigoplus \f$
-	 * represents the additive operator of the provided \a ring, and
-	 * \f$ \bigotimes \f$ represents the multiplicative operator of \a ring. If here
-	 * \f$ v_j \f$ does not exist, it is considered to be equal to the additive
-	 * identity of the given \a ring.
-	 *
-	 * \note The additive identity of a given \a ring is an annihilator of
-	 *       nonzeroes from \f$ A \f$ under the multiplicative operator of \a ring;
-	 *       that is, \f$ z_i \f$ will be \f$ \mathbf{0} \f$ always. This can, of
-	 *       course, be exploited during sparse matrix--sparse vector (SpMSpV)
-	 *       multiplication.
-	 *
-	 * \note A good implementation is very careful about forming \f$ z \f$
-	 *       explicitly and, even if it is formed already, is very careful about
-	 *       making use of \f$ z \f$. Making use of an explicit buffer will result
-	 *       in \f$ \Theta(m) \f$ data movement and may only be warrented when
-	 *       \f$ A \f$ has many nonzeroes per row and \f$ v \f$ is dense.
-	 *
-	 * @tparam descr    Any combination of one or more #alp::descriptors. When
-	 *                  ommitted, the default #alp::descriptors:no_operation will
-	 *                  be assumed.
-	 * @tparam Ring     The generalised semi-ring the matrix--vector multiplication
-	 *                  is to be executed under.
-	 * @tparam IOType   The type of the elements of the output vector \a u.
-	 * @tparam InputType1 The type of the elements of the input vector \a v.
-	 * @tparam InputType2 The type of the elements of the input matrix \a A.
-	 * @tparam Operator The type of the \a accumulator. Must be a GraphBLAS
-	 *                  operator; see also #alp::operators.
-	 * @tparam InputType3 The type of the elements of the mask vector \a mask.
-	 * @tparam implementation Which back-end the given vectors and matrices belong
-	 *                        to. These must all belong to the same back-end.
-	 *
-	 * @param[in,out] u The output vector. Depending on the provided
-	 *                  \a accumulator, old vector values may affect new values.
-	 * @param[in]  mask The mask vector. The vector #alp::size must be equal to
-	 *                  that of \a u, \em or it must be equal to zero. A \a mask
-	 *                  of alp::size zero will be ignored (assumed <tt>true</tt>
-	 *                  always.
-	 * @param[in] accumulator The operator \f$ \bigodot \f$ in the above
-	 *                        description.
-	 * @param[in] A     The input matrix. Its #alp::nrows must equal the
-	 *                  #alp::size of \a u.
-	 * @param[in] v     The input vector. Its #alp::size must equal the
-	 *                  #alp::ncols of \a A.
-	 * @param[in] ring  The semiring to perform the matrix--vector multiplication
-	 *                  under. Unless #alp::descriptors::no_casting is defined,
-	 *                  elements from \a u, \a A, and \a v will be cast to the
-	 *                  domains of the additive and multiplicative operators of
-	 *                  \a ring as they are applied during the multiplication.
-	 *
-	 * \warning Even if #alp::operators::right_assign is provided as accumulator,
-	 *          old values of \a u may \em not be overwritten if the computation
-	 *          ends up not writing any new values to those values. To throw away
-	 *          old vector values use alp::descriptors::explicit_zero (for dense
-	 *          vectors only if you wish to retain sparsity of the output vector),
-	 *          or first simply use alp::clear on \a u.
-	 *
-	 * The above semantics may be changed by the following descriptors:
-	 *   * #descriptors::invert_mask: \f$ u_i^\mathit{out} \f$ will be written to
-	 *     if and only if \f$ \mathit{mask}_i \f$ evaluates <tt>false</tt>.
-	 *   * #descriptors::transpose_matrix: \f$ A \f$ is interpreted as \f$ A^T \f$
-	 *     instead.
-	 *   * #descriptors::structural: when evaluating \f$ \mathit{mask}_i \f$, only
-	 *     the structure of \f$ \mathit{mask} \f$ is considered (as opposed to its
-	 *     elements); if \f$ \mathit{mask} \f$ has a nonzero at its \f$ i \f$th
-	 *     index, it is considered to evaluate <tt>true</tt> no matter what the
-	 *     actual value of \f$ \mathit{mask}_i \f$ was.
-	 *   * #descriptors::structural_complement: a combination of two descriptors:
-	 *     #descriptors::structural and #descriptors::invert_mask (and thus
-	 *     equivalent to <tt>structural | invert_mask</tt>). Its net effect is if
-	 *     \f$ \mathit{mask} \f$ does \em not have a nonzero at the \f$ i \f$th
-	 *     index, the mask is considered to evaluate <tt>true</tt>.
-	 *   * #descriptors::add_identity: the matrix \f$ A \f$ is instead interpreted
-	 *     as \f$ A + \mathbf{1} \f$, where \f$ \mathbf{1} \f$ is the
-	 *     multiplicative identity of the given ring.
-	 *   * #descriptors::use_index: when referencing \f$ v_i \f$, if assigned, then
-	 *     instead of using the value itself, its index \f$ i \f$ is used instead.
-	 *   * #descriptors::in_place: the \a accumulator is ignored; the additive
-	 *     operator of the given \a ring is used in its place. Under certain
-	 *     conditions, an implementation can exploit this semantic to active
-	 *     faster computations.
-	 *   * #descriptors::explicit_zero: if \f$ \mathbf{0} \f$ would be assigned to
-	 *     a previously unassigned index, assign \f$ \mathbf{0} \f$ explicitly to
-	 *     that index. Here, \f$ \mathbf{0} \f$ is the additive identity of the
-	 *     provided \a ring.
-	 *
-	 * \parblock
-	 * \par Performance semantics
-	 * Performance semantics vary depending on whether a mask was provided, and on
-	 * whether the input vector is sparse or dense. If the input vector \f$ v \f$
-	 * is sparse, let \f$ J \f$ be its set of assigned indices. If a non-trivial
-	 * mask \f$ \mathit{mask} \f$ is given, let \f$ I \f$ be the set of indices for
-	 * which the corresponding \f$ \mathit{mask}_i \f$ evaluate <tt>true</tt>. Then:
-	 *   -# For the performance guarantee on the amount of work this function
-	 *      entails the following table applies:<br>
-	 *      \f$ \begin{tabular}{cccc}
-	 *           Masked & Dense input  & Sparse input \\
-	 *           \noalign{\smallskip}
-	 *           no  & $\Theta(2\mathit{nnz}(A))$      & $\Theta(2\mathit{nnz}(A_{:,J}))$ \\
-	 *           yes & $\Theta(2\mathit{nnz}(A_{I,:})$ & $\Theta(\min\{2\mathit{nnz}(A_{I,:}),2\mathit{nnz}(A_{:,J})\})$
-	 *          \end{tabular}. \f$
-	 *   -# For the amount of data movements, the following table applies:<br>
-	 *      \f$ \begin{tabular}{cccc}
-	 *           Masked & Dense input  & Sparse input \\
-	 *           \noalign{\smallskip}
-	 *           no  & $\Theta(\mathit{nnz}(A)+\min\{m,n\}+m+n)$                         & $\Theta(\mathit{nnz}(A_{:,J}+\min\{m,2|J|\}+|J|)+\mathcal{O}(2m)$ \\
-	 *           yes & $\Theta(\mathit{nnz}(A_{I,:})+\min\{|I|,n\}+2|I|)+\mathcal{O}(n)$ &
-	 * $\Theta(\min\{\Theta(\mathit{nnz}(A_{I,:})+\min\{|I|,n\}+2|I|)+\mathcal{O}(n),\mathit{nnz}(A_{:,J}+\min\{m,|J|\}+2|J|)+\mathcal{O}(2m))$ \end{tabular}. \f$
-	 *   -# A call to this function under no circumstance will allocate nor free
-	 *      dynamic memory.
-	 *   -# A call to this function under no circumstance will make system calls.
-	 * The above performance bounds may be changed by the following desciptors:
-	 *   * #descriptors::invert_mask: replaces \f$ \Theta(|I|) \f$ data movement
-	 *     costs with a \f$ \mathcal{O}(2m) \f$ cost instead, or a
-	 *     \f$ \mathcal{O}(m) \f$ cost if #descriptors::structural was defined as
-	 *     well (see below). In other words, implementations are not required to
-	 *     implement inverted operations efficiently (\f$ 2\Theta(m-|I|) \f$ data
-	 *     movements would be optimal but costs another \f$ \Theta(m) \f$ memory
-	 *     to maintain).
-	 *   * #descriptors::structural: removes \f$ \Theta(|I|) \f$ data movement
-	 *     costs as the mask values need no longer be touched.
-	 *   * #descriptors::add_identity: adds, at most, the costs of alp::foldl
-	 *     (on vectors) to all performance metrics.
-	 *   * #descriptors::use_index: removes \f$ \Theta(n) \f$ or
-	 *     \f$ \Theta(|J|) \f$ data movement costs as the input vector values need
-	 *     no longer be touched.
-	 *   * #descriptors::in_place (see also above): turns \f$ \mathcal{O}(2m) \f$
-	 *     data movements into \f$ \mathcal{O}(m) \f$ instead; i.e., it halves the
-	 *     amount of data movements for writing the output.
-	 *   * #descriptors::dense: the input, output, and mask vectors are assumed to
-	 *     be dense. This allows the implementation to skip checks or other code
-	 *     blocks related to handling of sparse vectors. This may result in use of
-	 *     unitialised memory if any of the provided vectors were, in fact,
-	 *     sparse.
-	 * Implementations that support multiple user processes must characterise data
-	 * movement between then.
-	 * \endparblock
-	 *
-	 * @returns alp::SUCCESS  If the computation completed successfully.
-	 * @returns alp::MISMATCH If there is at least one mismatch between vector
-	 *                        dimensions or between vectors and the given matrix.
-	 * @returns alp::OVERLAP  If two or more provided vectors refer to the same
-	 *                        vector.
-	 *
-	 * When a non-SUCCESS error code is returned, it shall be as though the call
-	 * was never made. Note that all GraphBLAS functions may additionally return
-	 * #alp::PANIC, which indicates the library has entered an undefined state; if
-	 * this error code is returned, the only sensible thing a user can do is exit,
-	 * or at least refrain from using any GraphBLAS functions for the remainder of
-	 * the application.
-	 */
-	template< 
+	template<
+		Descriptor descr = descriptors::no_operation,
 		class Ring,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		typename InputType3,
+		typename IOType = typename Ring::D4, typename IOStructure,
+		typename IOView, typename IOImfR, typename IOImfC,
+		typename InputType1 = typename Ring::D1, typename InputStructure1,
+		typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2 = typename Ring::D2, typename InputStructure2,
+		typename InputView2, typename InputImfR2, typename InputImfC2,
+		Backend backend
+	>
+	RC vxm(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &u,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
+		const Ring &ring = Ring(),
+		const std::enable_if_t< alp::is_semiring< Ring >::value > * const = nullptr
+	) {
+		(void) u;
+		(void) v;
+		(void) A;
+		return UNSUPPORTED;
+	}
+
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure, typename IOView,
+		typename IOImfR, typename IOImfC,
+		typename InputType1, typename InputStructure1, typename InputView1,
+		typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2,
+		typename InputImfR2, typename InputImfC2,
+		class AdditiveMonoid, class MultiplicativeOperator,
+		Backend backend
+	>
+	RC vxm(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &u,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
+		const AdditiveMonoid &add = AdditiveMonoid(),
+		const MultiplicativeOperator &mul = MultiplicativeOperator(),
+		const std::enable_if_t<
+			alp::is_monoid< AdditiveMonoid >::value &&
+			alp::is_operator< MultiplicativeOperator >::value &&
+			!alp::is_object< IOType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			!std::is_same< InputType2, void >::value
+		> * const = nullptr
+	) {
+		(void) u;
+		(void) v;
+		(void) A;
+		(void) add;
+		(void) mul;
+		return UNSUPPORTED;
+	}
+
+	template<
 		Descriptor descr = descriptors::no_operation,
-		enum Backend implementation = config::default_backend >
-	RC mxv( internal::Vector< IOType, implementation > & u,
-		const internal::Vector< InputType3, implementation > & mask,
-		const internal::Matrix< InputType2, implementation > & A,
-		const internal::Vector< InputType1, implementation > & v,
-		const Ring & ring,
-		typename std::enable_if< alp::is_semiring< Ring >::value, void >::type * = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::mxv (output-masked)\n";
- #endif
-#endif
-		(void)u;
-		(void)mask;
-		(void)A;
-		(void)v;
-		(void)ring;
+		class Ring,
+		typename IOType = typename Ring::D4, typename IOStructure,
+		typename IOView, typename IOImfR, typename IOImfC,
+		typename InputType2 = typename Ring::D2, typename InputStructure2,
+		typename InputView2, typename InputImfR2, typename InputImfC2,
+		typename InputType1 = typename Ring::D1, typename InputStructure1,
+		typename InputView1, typename InputImfR1, typename InputImfC1,
+		Backend backend
+	>
+	RC mxv(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &u,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
+		const Ring &ring,
+		const std::enable_if_t< alp::is_semiring< Ring >::value > * const = nullptr
+	) {
+		(void) u;
+		(void) A;
+		(void) v;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename IOType, typename IOStructure, typename IOView,
+		typename IOImfR, typename IOImfC,
+		typename InputType2, typename InputStructure2, typename InputView2,
+		typename InputImfR2, typename InputImfC2,
+		typename InputType1, typename InputStructure1, typename InputView1,
+		typename InputImfR1, typename InputImfC1,
+		class AdditiveMonoid, class MultiplicativeOperator,
+		Backend backend
+	>
+	RC mxv(
+		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &u,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
+		const AdditiveMonoid &add = AdditiveMonoid(),
+		const MultiplicativeOperator &mul = MultiplicativeOperator(),
+		const std::enable_if_t<
+			alp::is_monoid< AdditiveMonoid >::value &&
+			alp::is_operator< MultiplicativeOperator >::value &&
+			!alp::is_object< IOType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			!std::is_same< InputType2, void >::value
+		> * const = nullptr
+	) {
+		(void) u;
+		(void) A;
+		(void) v;
+		(void) add;
+		(void) mul;
 		return UNSUPPORTED;
 	}
 
 	/**
-	 * A short-hand for an unmasked #alp::mxv.
-	 *
-	 * @see alp::mxv for the full documentation.
+	 * @see alp::eWiseLambda for the user-level specification.
 	 */
-	template< Descriptor descr = descriptors::no_operation, class Ring, typename IOType, typename InputType1, typename InputType2, Backend implementation = config::default_backend >
-	RC mxv( internal::Vector< IOType, implementation > & u,
-		const internal::Matrix< InputType2, implementation > & A,
-		const internal::Vector< InputType1, implementation > & v,
-		const Ring & ring,
-		typename std::enable_if< alp::is_semiring< Ring >::value, void >::type * = NULL ) {
-#ifdef _DEBUG
-#ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::mxv\n";
-#else
-		printf( "Selected backend does not implement alp::mxv\n" );
-#endif
-#endif
-		(void)u;
-		(void)A;
-		(void)v;
-		(void)ring;
+	template<
+		typename Func,
+		typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
+		Backend backend
+	>
+	RC eWiseLambda(
+		const Func f,
+		Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, backend > &A
+	) {
+		(void) f;
+		(void) A;
 		return UNSUPPORTED;
 	}
 
 	/**
-	 * Left-handed sparse matrix times vector multiplication, \f$ u = vA \f$.
+	 * This function provides dimension checking and will defer to the below
+	 * function for the actual implementation.
 	 *
-	 * If \a descr does not have #alp::descriptors::transpose_matrix defined, the
-	 * semantics and performance semantics of this function are exactly that of
-	 * alp::mxv with the #alp::descriptors::transpose_matrix set.
-	 * In the other case, the functional and performance semantics of this function
-	 * are exactly that of alp::mxv without the #alp::descriptors::transpose_matrix
-	 * set.
-	 *
-	 * @see alp::mxv for the full documentation.
+	 * @see alp::eWiseLambda for the user-level specification.
 	 */
-	template< Descriptor descr = descriptors::no_operation,
-		class Ring,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		typename InputType3,
-		enum Backend implementation = config::default_backend >
-	RC vxm( internal::Vector< IOType, implementation > & u,
-		const internal::Vector< InputType3, implementation > & mask,
-		const internal::Vector< InputType1, implementation > & v,
-		const internal::Matrix< InputType2, implementation > & A,
-		const Ring & ring,
-		typename std::enable_if< alp::is_semiring< Ring >::value, void >::type * = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::vxm (output-masked)\n";
- #endif
-#endif
-		(void)u;
-		(void)mask;
-		(void)v;
-		(void)A;
-		(void)ring;
-		return UNSUPPORTED;
+	template<
+		typename Func,
+		typename DataType1, typename Structure1, typename View1, typename ImfR1, typename ImfC1,
+		typename DataType2, typename Structure2, typename View2, typename ImfR2, typename ImfC2,
+		Backend backend,
+		typename... Args
+	>
+	RC eWiseLambda(
+		const Func f,
+		Matrix< DataType1, Structure1, Density::Dense, View1, ImfR1, ImfC1, backend > &A,
+		const Vector< DataType2, Structure2, Density::Dense, View2, ImfR2, ImfC2, backend > &x,
+		Args const &... args
+	) {
+		// do size checking
+		if( !( getLength( x ) == nrows( A ) || getLength( x ) == ncols( A ) ) ) {
+			std::cerr << "Mismatching dimensions: given vector of size " << size( x )
+				<< " has nothing to do with either matrix dimension (" << nrows( A ) << " nor " << ncols( A ) << ").\n";
+			return MISMATCH;
+		}
+
+		return eWiseLambda( f, A, args... );
 	}
 
 	/**
-	 * A short-hand for an unmasked alp::vxm.
-	 *
-	 * @see alp::vxm for the full documentation.
+	 * For all elements in a ALP Matrix \a B, fold the value \f$ \alpha \f$
+	 * into each element.
 	 */
-	template< Descriptor descr = descriptors::no_operation,
-		class Ring,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		enum Backend implementation = config::default_backend >
-	RC vxm( internal::Vector< IOType, implementation > & u,
-		const internal::Vector< InputType1, implementation > & v,
-		const internal::Matrix< InputType2, implementation > & A,
-		const Ring & ring,
-		typename std::enable_if< alp::is_semiring< Ring >::value, void >::type * = NULL ) {
-#ifdef _DEBUG
-  #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::vxm\n";
- #endif
-#endif
-		(void)u;
-		(void)v;
-		(void)A;
-		(void)ring;
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldr(
+		const Scalar< InputType, InputStructure, backend > &alpha,
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &B,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value &&
+			!alp::is_object< IOType >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) alpha;
+		(void) B;
+		(void) monoid;
 		return UNSUPPORTED;
 	}
 
-	/** TODO documentation */
-	template< Descriptor descr = descriptors::no_operation,
-		class AdditiveMonoid,
-		class MultiplicativeOperator,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		typename InputType3,
-		typename InputType4,
-		Backend backend >
-	RC vxm( internal::Vector< IOType, backend > & u,
-		const internal::Vector< InputType3, backend > & mask,
-		const internal::Vector< InputType1, backend > & v,
-		const internal::Vector< InputType4, backend > & v_mask,
-		const internal::Matrix< InputType2, backend > & A,
-		const AdditiveMonoid & add = AdditiveMonoid(),
-		const MultiplicativeOperator & mul = MultiplicativeOperator(),
-		const typename std::enable_if< alp::is_monoid< AdditiveMonoid >::value && alp::is_operator< MultiplicativeOperator >::value && ! alp::is_object< IOType >::value &&
-				! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value && ! alp::is_object< InputType3 >::value && ! alp::is_object< InputType4 >::value &&
-				! std::is_same< InputType2, void >::value,
-			void >::type * const = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement vxm (doubly-masked)\n";
- #endif
-#endif
-		(void)u;
-		(void)mask;
-		(void)v;
-		(void)v_mask;
-		(void)A;
-		(void)add;
-		(void)mul;
+	/** Folds element-wise alpha into B, operator variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Operator,
+		Backend backend
+	>
+	RC foldr(
+		const Scalar< InputType, InputStructure, backend > &alpha,
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &B,
+		const Operator &op = Operator(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value &&
+			!alp::is_object< IOType >::value &&
+			alp::is_operator< Operator >::value
+		> * const = nullptr
+	) {
+		(void) alpha;
+		(void) B;
+		(void) op;
 		return UNSUPPORTED;
 	}
 
-	/** TODO documentation */
-	template< Descriptor descr = descriptors::no_operation,
-		class AdditiveMonoid,
-		class MultiplicativeOperator,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		typename InputType3,
-		typename InputType4,
-		Backend backend >
-	RC mxv( internal::Vector< IOType, backend > & u,
-		const internal::Vector< InputType3, backend > & mask,
-		const internal::Matrix< InputType2, backend > & A,
-		const internal::Vector< InputType1, backend > & v,
-		const internal::Vector< InputType4, backend > & v_mask,
-		const AdditiveMonoid & add = AdditiveMonoid(),
-		const MultiplicativeOperator & mul = MultiplicativeOperator(),
-		const typename std::enable_if< alp::is_monoid< AdditiveMonoid >::value && alp::is_operator< MultiplicativeOperator >::value && ! alp::is_object< IOType >::value &&
-				! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value && ! alp::is_object< InputType3 >::value && ! alp::is_object< InputType4 >::value &&
-				! std::is_same< InputType2, void >::value,
-			void >::type * const = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement mxv (doubly-masked)\n";
- #endif
-#endif
-		(void)u;
-		(void)mask;
-		(void)A;
-		(void)v;
-		(void)v_mask;
-		(void)add;
-		(void)mul;
+	/** Folds element-wise A into B, monoid variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldr(
+		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &A,
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &B,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value &&
+			!alp::is_object< IOType >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) B;
+		(void) monoid;
 		return UNSUPPORTED;
 	}
 
-	/** TODO documentation */
-	template< Descriptor descr = descriptors::no_operation,
-		class AdditiveMonoid,
-		class MultiplicativeOperator,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		typename InputType3,
-		Backend backend >
-	RC mxv( internal::Vector< IOType, backend > & u,
-		const internal::Vector< InputType3, backend > & mask,
-		const internal::Matrix< InputType2, backend > & A,
-		const internal::Vector< InputType1, backend > & v,
-		const AdditiveMonoid & add = AdditiveMonoid(),
-		const MultiplicativeOperator & mul = MultiplicativeOperator(),
-		const typename std::enable_if< alp::is_monoid< AdditiveMonoid >::value && alp::is_operator< MultiplicativeOperator >::value && ! alp::is_object< IOType >::value &&
-				! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value && ! alp::is_object< InputType3 >::value && ! std::is_same< InputType2, void >::value,
-			void >::type * const = NULL ) {
-		(void)u;
-		(void)mask;
-		(void)A;
-		(void)v;
-		(void)add;
-		(void)mul;
+	/** Folds element-wise A into B, operator variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Operator,
+		Backend backend
+	>
+	RC foldr(
+		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &A,
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &B,
+		const Operator &op = Operator(),
+		const std::enable_if_t<
+			!alp::is_object< InputType >::value &&
+			!alp::is_object< IOType >::value &&
+			alp::is_operator< Operator >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) B;
+		(void) op;
 		return UNSUPPORTED;
 	}
 
-	/** TODO documentation */
-	template< Descriptor descr = descriptors::no_operation,
-		class AdditiveMonoid,
-		class MultiplicativeOperator,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		Backend backend >
-	RC vxm( internal::Vector< IOType, backend > & u,
-		const internal::Vector< InputType1, backend > & v,
-		const internal::Matrix< InputType2, backend > & A,
-		const AdditiveMonoid & add = AdditiveMonoid(),
-		const MultiplicativeOperator & mul = MultiplicativeOperator(),
-		const typename std::enable_if< alp::is_monoid< AdditiveMonoid >::value && alp::is_operator< MultiplicativeOperator >::value && ! alp::is_object< IOType >::value &&
-				! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value && ! std::is_same< InputType2, void >::value,
-			void >::type * const = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement vxm (unmasked)\n";
- #endif
-#endif
-		(void)u;
-		(void)v;
-		(void)A;
-		(void)add;
-		(void)mul;
+	/** Folds element-wise B into A, monoid variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldl(
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &A,
+		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &B,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< IOType >::value &&
+			!alp::is_object< InputType >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) B;
+		(void) monoid;
 		return UNSUPPORTED;
 	}
 
-	/** TODO documentation */
-	template< Descriptor descr = descriptors::no_operation,
-		class AdditiveMonoid,
-		class MultiplicativeOperator,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		typename InputType3,
-		Backend implementation >
-	RC vxm( internal::Vector< IOType, implementation > & u,
-		const internal::Vector< InputType3, implementation > & mask,
-		const internal::Vector< InputType1, implementation > & v,
-		const internal::Matrix< InputType2, implementation > & A,
-		const AdditiveMonoid & add = AdditiveMonoid(),
-		const MultiplicativeOperator & mul = MultiplicativeOperator(),
-		typename std::enable_if< alp::is_monoid< AdditiveMonoid >::value && alp::is_operator< MultiplicativeOperator >::value && ! alp::is_object< IOType >::value &&
-				! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value && ! std::is_same< InputType2, void >::value,
-			void >::type * = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::vxm (output-masked)\n";
- #endif
-#endif
-		(void)u;
-		(void)mask;
-		(void)v;
-		(void)A;
-		(void)add;
-		(void)mul;
+	/** Folds element-wise B into A, operator variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Operator,
+		Backend backend
+	>
+	RC foldl(
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &A,
+		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &B,
+		const Operator &op = Operator(),
+		const std::enable_if_t<
+			!alp::is_object< IOType >::value &&
+			!alp::is_object< InputType >::value &&
+			alp::is_operator< Operator >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) B;
+		(void) op;
 		return UNSUPPORTED;
 	}
 
-	/** TODO documentation */
-	template< Descriptor descr = descriptors::no_operation,
-		class AdditiveMonoid,
-		class MultiplicativeOperator,
-		typename IOType,
-		typename InputType1,
-		typename InputType2,
-		Backend backend >
-	RC mxv( internal::Vector< IOType, backend > & u,
-		const internal::Matrix< InputType2, backend > & A,
-		const internal::Vector< InputType1, backend > & v,
-		const AdditiveMonoid & add = AdditiveMonoid(),
-		const MultiplicativeOperator & mul = MultiplicativeOperator(),
-		const typename std::enable_if< alp::is_monoid< AdditiveMonoid >::value && alp::is_operator< MultiplicativeOperator >::value && ! alp::is_object< IOType >::value &&
-				! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value && ! std::is_same< InputType2, void >::value,
-			void >::type * const = NULL ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::mxv (unmasked)\n";
- #endif
-#endif
-		(void)u;
-		(void)A;
-		(void)v;
-		(void)add;
-		(void)mul;
+	/** Folds element-wise beta into A, monoid variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Monoid,
+		Backend backend
+	>
+	RC foldl(
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &A,
+		const Scalar< InputType, InputStructure, backend > &beta,
+		const Monoid &monoid = Monoid(),
+		const std::enable_if_t<
+			!alp::is_object< IOType >::value &&
+			!alp::is_object< InputType >::value &&
+			alp::is_monoid< Monoid >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) beta;
+		(void) monoid;
+		return UNSUPPORTED;
+	}
+
+	/** Folds element-wise beta into A, operator variant */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure,
+		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+		class Operator,
+		Backend backend
+	>
+	RC foldl(
+		Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &A,
+		const Scalar< InputType, InputStructure, backend > &beta,
+		const Operator &op = Operator(),
+		const std::enable_if_t<
+			!alp::is_object< IOType >::value &&
+			!alp::is_object< InputType >::value &&
+			alp::is_operator< Operator >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) beta;
+		(void) op;
 		return UNSUPPORTED;
 	}
 
 	/**
-	 * Executes an arbitrary element-wise user-defined function \a f on all
-	 * nonzero elements of a given matrix \a A.
-	 *
-	 * The user-defined function is passed as a lambda which can capture whatever
-	 * the user would like, including one or multiple alp::Vector instances, or
-	 * multiple scalars. When capturing vectors, these should also be passed as a
-	 * additional arguments to this functions so to make sure those vectors are
-	 * synchronised for access on all row- and column- indices corresponding to
-	 * locally stored nonzeroes of \a A.
-	 *
-	 * Only the elements of a single matrix may be iterated upon.
-	 *
-	 * \note Rationale: while it is reasonable to expect an implementation be able
-	 *       to synchronise vector elements, it may be unreasonable to expect two
-	 *       different matrices can be jointly accessed via arbitrary lambda
-	 *       functions.
-	 *
-	 * \warning The lambda shall only be executed on the data local to the user
-	 *          process calling this function! This is different from the various
-	 *          fold functions, or alp::dot, in that the semantics of those
-	 *          functions always result in globally synchronised result. To
-	 *          achieve the same effect with user-defined lambdas, the users
-	 *          should manually prescribe how to combine the local results into
-	 *          global ones, for instance, by subsequent calls to
-	 *          alp::collectives.
-	 *
-	 * \note This is an addition to the GraphBLAS. It is alike user-defined
-	 *       operators, monoids, and semirings, except it allows execution on
-	 *       arbitrarily many inputs and arbitrarily many outputs.
-	 *
-	 * @tparam Func     the user-defined lambda function type.
-	 * @tparam DataType the type of the user-supplied matrix.
-	 * @tparam backend  the backend type of the user-supplied vector example.
-	 *
-	 * @param[in] f The user-supplied lambda. This lambda should only capture
-	 *              and reference vectors of the same length as either the row or
-	 *              column dimension length of \a A. The lambda function should
-	 *              prescribe the operations required to execute on a given
-	 *              reference to a matrix nonzero of \a A (of type \a DataType) at
-	 *              a given index \f$ (i,j) \f$. Captured GraphBLAS vectors can
-	 *              access corresponding elements via Vector::operator[] or
-	 *              Vector::operator(). It is illegal to access any element not at
-	 *              position \a i if the vector length is equal to the row
-	 *              dimension. It is illegal to access any element not at position
-	 *              \a j if the vector length is equal to the column dimension.
-	 *              Vectors of length neither equal to the column or row dimension
-	 *              may \em not be referenced or undefined behaviour will occur. The
-	 *              reference to the matrix nonzero is non \a const and may thus be
-	 *              modified. New nonzeroes may \em not be added through this lambda
-	 *              functionality. The function \a f must have the following
-	 *              signature:
-	 *              <code>(DataType &nz, const size_t i, const size_t j)</code>.
-	 *              The GraphBLAS implementation decides which nonzeroes of \a A are
-	 *              dereferenced, and thus also decides the values \a i and \a j the
-	 *              user function is evaluated on.
-	 * @param[in] A The matrix the lambda is to access the elements of.
-	 * @param[in] args All vectors the lambda is to access elements of. Must be of
-	 *                 the same length as \a nrows(A) or \a ncols(A). If this
-	 *                 constraint is violated, alp::MISMATCH shall be returned. If
-	 *                 the vector length equals \a nrows(A), the vector shall be
-	 *                 synchronized for access on \a i. If the vector length equals
-	 *                 \a ncols(A), the vector shall be synchronized for access on
-	 *                 \a j. If \a A is square, the vectors will be synchronised for
-	 *                 access on both \a x and \a y. <em>This is a variadic argument
-	 *                 and can contain any number of containers of type alp::Vector,
-	 *                 passed as though they were separate arguments.</em>
-	 *
-	 * \warning Using a alp::Vector inside a lambda passed to this function while
-	 *          not passing that same vector into \a args, will result in undefined
-	 *          behaviour.
-	 *
-	 * \warning Due to the constraints on \a f described above, it is illegal to
-	 *          capture some vector \a y and have the following line in the body
-	 *          of \a f: <code>x[i] += x[i+1]</code>. Vectors can only be
-	 *          dereferenced at position \a i and \a i alone, and similarly for
-	 *          access using \a j. For square matrices, however, the following
-	 *          code in the body is accepted, however: <code>x[i] += x[j]</code>.
-	 *
-	 * @return alp::SUCCESS  When the lambda is successfully executed.
-	 * @return alp::MISMATCH When two or more vectors passed to \a args are not of
-	 *                       appropriate length.
-	 *
-	 * \warning Captured scalars will be local to the user process executing the
-	 *          lambda. To retrieve the global dot product, an allreduce must
-	 *          explicitly be called.
-	 *
-	 * @see Vector::operator[]()
-	 * @see Vector::operator()()
-	 * @see Vector::lambda_reference
+	 * Returns a view over the input matrix returning conjugate of the accessed element.
 	 */
-	template< typename Func, typename DataType, Backend implementation = config::default_backend, typename... Args >
-	RC eWiseLambda( const Func f, const internal::Matrix< DataType, implementation > & A, Args... /*args*/ ) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::eWiseLambda (matrices)\n";
- #endif
-#endif
-		(void)f;
-		(void)A;
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
+		Backend backend,
+		std::enable_if_t<
+			!structures::is_a< Structure, structures::Square >::value
+		> * = nullptr
+	>
+	Matrix<
+		DataType, Structure, Density::Dense,
+		view::Functor< std::function< void( DataType &, const size_t, const size_t ) > >,
+		imf::Id, imf::Id,
+		backend
+	>
+	conjugate(
+		const Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, backend > &A,
+		const std::enable_if_t<
+			!alp::is_object< DataType >::value
+		> * const = nullptr
+	) {
+		(void) A;
 		return UNSUPPORTED;
 	}
 
+	/** Specialization for square matrices */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
+		Backend backend,
+		std::enable_if_t<
+			structures::is_a< Structure, structures::Square >::value
+		> * = nullptr
+	>
+	Matrix<
+		DataType, Structure, Density::Dense,
+		view::Functor< std::function< void( DataType &, const size_t, const size_t ) > >,
+		imf::Id, imf::Id,
+		backend
+	>
+	conjugate(
+		const Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, backend > &A,
+		const std::enable_if_t<
+			!alp::is_object< DataType >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		return UNSUPPORTED;
+	}
 	/** @} */
 
 } // namespace alp
diff --git a/include/alp/base/blas3.hpp b/include/alp/base/blas3.hpp
index 99e648b64..b6501d5c2 100644
--- a/include/alp/base/blas3.hpp
+++ b/include/alp/base/blas3.hpp
@@ -24,9 +24,12 @@
 
 #include <alp/backends.hpp>
 #include <alp/phase.hpp>
+#include <alp/identities.hpp>
+#include <alp/monoid.hpp>
 
 #include "matrix.hpp"
 #include "vector.hpp"
+#include "io.hpp"
 
 namespace alp {
 
@@ -40,124 +43,287 @@ namespace alp {
 	 */
 
 	/**
-	 * Unmaked sparse matrix--sparse matrix multiplication (SpMSpM).
-	 *
-	 * @tparam descr      The descriptors under which to perform the computation.
-	 * @tparam OutputType The type of elements in the output matrix.
-	 * @tparam InputType1 The type of elements in the left-hand side input
-	 *                    matrix.
-	 * @tparam InputType2 The type of elements in the right-hand side input
-	 *                    matrix.
-	 * @tparam Semiring   The semiring under which to perform the
-	 *                    multiplication.
-	 * @tparam Backend    The backend that should perform the computation.
-	 *
-	 * @returns SUCCESS If the computation completed as intended.
-	 * @returns FAILED  If the call was not not preceded by one to
-	 *                  #alp::resize( C, A, B ); \em and the current capacity of
-	 *                  \a C was insufficient to store the multiplication of \a A
-	 *                  and \a B. The contents of \a C shall be undefined (which
-	 *                  is why #FAILED is returned instead of #ILLEGAL-- this
-	 *                  error has side effects).
-	 *
-	 * @param[out] C The output matrix \f$ C = AB \f$ when the function returns
-	 *               #SUCCESS.
-	 * @param[in]  A The left-hand side input matrix \f$ A \f$.
-	 * @param[in]  B The left-hand side input matrix \f$ B \f$.
-	 *
-	 * @param[in] ring (Optional.) The semiring under which the computation should
-	 *                             proceed.
+	 * @brief Computes \f$ C = A . B \f$ for a given monoid.
 	 */
 	template<
 		Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename InputType1, typename InputType2,
-		class Semiring,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class MulMonoid,
 		Backend backend
 	>
-	RC mxm( internal::Matrix< OutputType, backend > &C,
-		const internal::Matrix< InputType1, backend > &A, const internal::Matrix< InputType2, backend > &B,
-		const Semiring &ring = Semiring(),
-		const PHASE &phase = NUMERICAL
+	RC eWiseApply(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
+		const MulMonoid &mulmono,
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< MulMonoid >::value
+		> * const = nullptr
 	) {
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::mxm (semiring version)\n";
- #endif
-#endif
-		(void)C;
-		(void)A;
-		(void)B;
-		(void)ring;
-		(void)phase;
-		// this is the generic stub implementation
+		(void) C;
+		(void) A;
+		(void) B;
+		(void) mulmono;
 		return UNSUPPORTED;
 	}
 
+
 	/**
-	 * Interprets three vectors x, y, and z as a series of row coordinates,
-	 * column coordinates, and nonzeroes, respectively, and stores the thus
-	 * defined nonzeroes in a given output matrix A.
-	 *
-	 * If this function does not return SUCCESS, A will have been cleared.
-	 *
-	 * A must have been pre-allocated to store the nonzero pattern the three
-	 * given vectors x, y, and z encode, or ILLEGAL shall be returned.
-	 *
-	 * \note A call to this function hence must be preceded by a successful
-	 *       call to alp::resize( matrix, nnz );
-	 *
-	 * @param[out] A The output matrix
-	 * @param[in]  x A vector of row indices.
-	 * @param[in]  y A vector of column indices.
-	 * @param[in]  z A vector of nonzero values.
-	 *
-	 * If x, y, and z are sparse, they must have the exact same sparsity
-	 * structure.
-	 *
-	 * \par Descriptors
+	 * Computes \f$ C = alpha . B \f$ for a given monoid.
 	 *
-	 * None allowed.
-	 *
-	 * @returns SUCCESS  If A was constructed successfully.
-	 * @returns MISMATCH If y or z does not match the size of x.
-	 * @returns ILLEGAL  If y or z do not have the same number of nonzeroes
-	 *                   as x.
-	 * @returns ILLEGAL  If y or z has a different sparsity pattern from x.
-	 * @returns ILLEGAL  If the capacity of A was insufficient to store the
-	 *                   given sparsity pattern.
+	 * Case where \a A is a scalar.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class MulMonoid,
+		Backend backend
+	>
+	RC eWiseApply(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Scalar< InputType1, InputStructure1, backend > &alpha,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
+		const MulMonoid &mulmono,
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< MulMonoid >::value
+		> * const = nullptr
+	) {
+		(void) C;
+		(void) alpha;
+		(void) B;
+		(void) mulmono;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Computes \f$ C = A . beta \f$ for a given monoid.
 	 *
-	 * @see alp::resize
+	 * Case where \a B is a scalar.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2,
+		class MulMonoid,
+		Backend backend
+	>
+	RC eWiseApply(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
+		const Scalar< InputType2, InputStructure2, backend > &beta,
+		const MulMonoid &mulmono,
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_monoid< MulMonoid >::value
+		> * const = nullptr
+	) {
+		(void) C;
+		(void) A;
+		(void) beta;
+		(void) mulmono;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Calculates the element-wise multiplication of two matrices,
+	 *     \f$ C = C + A .* B \f$,
+	 * under a given semiring.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation, class Ring,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		Backend backend
+	>
+	RC eWiseMul(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
+		const Ring &ring = Ring(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_semiring< Ring >::value
+		> * const = nullptr
+	) {
+		(void) C;
+		(void) A;
+		(void) B;
+		(void) ring;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * eWiseMul, version where A is a scalar.
 	 */
-	template< Descriptor descr = descriptors::no_operation, typename OutputType, typename InputType1, typename InputType2, typename InputType3, Backend backend >
-	RC zip( internal::Matrix< OutputType, backend > & A, const internal::Vector< InputType1, backend > & x, const internal::Vector< InputType2, backend > & y, const internal::Vector< InputType3, backend > & z ) {
-		(void)x;
-		(void)y;
-		(void)z;
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::zip (vectors into matrices, non-void)\n";
- #endif
-#endif
-		const RC ret = alp::clear( A );
-		return ret == SUCCESS ? UNSUPPORTED : ret;
+	template<
+		Descriptor descr = descriptors::no_operation, class Ring,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		Backend backend
+	>
+	RC eWiseMul(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Scalar< InputType1, InputStructure1, backend > &alpha,
+		const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
+		const Ring &ring = Ring(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_semiring< Ring >::value
+		> * const = nullptr
+	) {
+		(void) C;
+		(void) alpha;
+		(void) B;
+		(void) ring;
+		return UNSUPPORTED;
 	}
 
 	/**
-	 * Specialisation of alp::zip for void output matrices.
+	 * eWiseMul, version where B is a scalar.
 	 */
-	template< Descriptor descr = descriptors::no_operation, typename InputType1, typename InputType2, typename InputType3, Backend backend >
-	RC zip( internal::Matrix< void, backend > & A, const internal::Vector< InputType1, backend > & x, const internal::Vector< InputType2, backend > & y ) {
-		(void)x;
-		(void)y;
-#ifdef _DEBUG
- #ifndef _ALP_NO_STDIO
-		std::cerr << "Selected backend does not implement alp::zip (vectors into matrices, void)\n";
- #endif
-#endif
-		const RC ret = alp::clear( A );
-		return ret == SUCCESS ? UNSUPPORTED : ret;
+	template<
+		Descriptor descr = descriptors::no_operation, class Ring,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2,
+		Backend backend
+	>
+	RC eWiseMul(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
+		const Scalar< InputType2, InputStructure2, backend > &beta,
+		const Ring &ring = Ring(),
+		const std::enable_if_t<
+			!alp::is_object< OutputType >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			alp::is_semiring< Ring >::value
+		> * const = nullptr
+	) {
+		(void) C;
+		(void) A;
+		(void) beta;
+		(void) ring;
+		return UNSUPPORTED;
 	}
 
+	/**
+	 * @brief  Outer product of two vectors. The result matrix \a A will contain \f$ uv^T \f$.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class Operator,
+		Backend backend
+	>
+	RC outer(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &A,
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &u,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &v,
+		const Operator &mul = Operator(),
+		const std::enable_if_t<
+			alp::is_operator< Operator >::value &&
+			!alp::is_object< InputType1 >::value &&
+			!alp::is_object< InputType2 >::value &&
+			!alp::is_object< OutputType >::value
+		> * const = nullptr
+	) {
+		(void) A;
+		(void) u;
+		(void) v;
+		(void) mul;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Returns a view over the general rank-1 matrix computed with the outer product.
+	 * This avoids creating the resulting container. The elements are calculated lazily on access.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+		class Operator,
+		Backend backend
+	>
+	Matrix<
+		typename Operator::D3, structures::General, Density::Dense,
+		view::Functor< std::function< void( InputType1 &, const size_t, const size_t ) > >,
+		imf::Id, imf::Id,
+		backend
+	>
+	outer(
+		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+		const Operator &mul = Operator(),
+		const typename std::enable_if<
+			alp::is_operator< Operator >::value &&
+			! alp::is_object< InputType1 >::value &&
+			! alp::is_object< InputType2 >::value
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) y;
+		(void) mul;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Returns a view over the general rank-1 matrix computed with the outer product.
+	 * Version for the case when input vectors are the same vector,
+	 * which results in a symmetric matrix.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+		class Operator,
+		Backend backend
+	>
+	Matrix<
+		typename Operator::D3,
+		typename std::conditional<
+			grb::utils::is_complex< typename Operator::D3 >::value,
+			alp::structures::Hermitian,
+			alp::structures::Symmetric
+		>::type,
+		Density::Dense,
+		view::Functor< std::function< void( typename Operator::D3 &, const size_t, const size_t ) > >,
+		imf::Id, imf::Id,
+		backend
+	>
+	outer(
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+		const Operator &mul = Operator(),
+		const std::enable_if_t<
+			alp::is_operator< Operator >::value &&
+			!alp::is_object< InputType >::value
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) mul;
+		return UNSUPPORTED;
+	}
 	/**
 	 * @}
 	 */
diff --git a/include/alp/base/io.hpp b/include/alp/base/io.hpp
index 52cfc3930..2c1cf14c2 100644
--- a/include/alp/base/io.hpp
+++ b/include/alp/base/io.hpp
@@ -54,6 +54,231 @@ namespace alp {
 	 * @{
 	 */
 
+	/**
+	 * Request the size (dimension) of a given Vector.
+	 */
+	template<
+		typename DataType, typename DataStructure, typename View,
+		typename ImfR, typename ImfC, Backend backend
+	>
+	size_t size(
+		const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x
+	) noexcept {
+
+#ifndef NDEBUG
+		const bool selected_backend_does_not_support_size_for_vector = false;
+		assert( selected_backend_does_not_support_size_for_vector );
+#endif
+		(void) x;
+		return SIZE_MAX;
+	}
+
+	/**
+	 * Request the number of nonzeroes in a given Vector.
+	 */
+	template<
+		typename DataType, typename DataStructure, typename View,
+		typename ImfR, typename ImfC, Backend backend
+	>
+	size_t nnz(
+		const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x
+	) noexcept {
+
+#ifndef NDEBUG
+		const bool selected_backend_does_not_support_nnz_for_vector = false;
+		assert( selected_backend_does_not_support_nnz_for_vector );
+#endif
+		(void) x;
+		return SIZE_MAX;
+	}
+
+	/**
+	 * Retrieve the number of nonzeroes contained in this matrix.
+	 *
+	 * @returns The number of nonzeroes the current matrix contains.
+	 *
+	 * \parblock
+	 * \par Performance semantics.
+	 *        -# This function consitutes \f$ \Theta(1) \f$ work.
+	 *        -# This function allocates no additional dynamic memory.
+	 *        -# This function uses \f$ \mathcal{O}(1) \f$ memory
+	 *           beyond that which was already used at function entry.
+	 *        -# This function will move
+	 *             \f$ \mathit{sizeof}( size\_t ) \f$
+	 *           bytes of memory.
+	 * \endparblock
+	 */
+	template<
+		typename DataType, typename Structure, typename View,
+		typename ImfR, typename ImfC, Backend backend
+	>
+	size_t nnz(
+		const Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, reference > &A
+	) noexcept {
+
+#ifndef NDEBUG
+		const bool selected_backend_does_not_support_nnz_for_matrix = false;
+		assert( selected_backend_does_not_support_nnz_for_matrix );
+#endif
+		(void) A;
+		return SIZE_MAX;
+	}
+
+	/**
+	 * Clears all elements from the given vector \a x.
+	 *
+	 * At the end of this operation, the number of nonzero elements in this vector
+	 * will be zero. The size of the vector remains unchanged.
+	 */
+	template<
+		typename DataType, typename DataStructure, typename View,
+		typename ImfR, typename ImfC, Backend backend
+	>
+	RC clear(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x
+	) noexcept {
+		(void) x;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Resizes the Scalar to have at least the given number of nonzeroes.
+	 * The contents of the scalar are not retained.
+	 */
+	template<
+		typename InputType, typename InputStructure,
+		typename length_type, Backend backend
+	>
+	RC resize( Scalar< InputType, InputStructure, backend > &s, const length_type new_nz ) {
+		(void) s;
+		(void) new_nz;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Resizes the vector to have at least the given number of nonzeroes.
+	 * The contents of the vector are not retained.
+	 */
+	template<
+		typename InputType, typename InputStructure, typename View,
+		typename ImfR, typename ImfC,
+		typename length_type, Backend backend
+	>
+	RC resize(
+		Vector< InputType, InputStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
+		const length_type new_nz
+	) noexcept {
+		(void) x;
+		(void) new_nz;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Resizes the matrix to have at least the given number of nonzeroes.
+	 * The contents of the matrix are not retained.
+	 */
+	template<
+		typename InputType, typename InputStructure, typename InputView,
+		typename InputImfR, typename InputImfC, Backend backend
+	>
+	RC resize(
+		Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &A,
+		const size_t new_nz
+	) noexcept {
+		(void) A;
+		(void) new_nz;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Sets all elements of a Vector to the given value.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename DataStructure, typename View,
+		typename ImfR, typename ImfC,
+		typename T, typename ValStructure,
+		Backend backend
+	>
+	RC set(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
+		const Scalar< T, ValStructure, backend > val,
+		const std::enable_if_t<
+			!alp::is_object< DataType >::value &&
+			!alp::is_object< T >::value
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) val;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Sets the element of a given Vector at a given position to a given value.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename DataStructure, typename View,
+		typename ImfR, typename ImfC,
+		typename T, typename ValStructure,
+		Backend backend
+	>
+	RC setElement(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
+		const Scalar< T, ValStructure, backend > val,
+		const size_t i,
+		const std::enable_if_t<
+			!alp::is_object< DataType >::value &&
+			!alp::is_object< T >::value
+		> * const = nullptr
+	) {
+		(void) x;
+		(void) val;
+		(void) i;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Sets all elements of the output matrix to the values of the input matrix.
+	 * C = A
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView,
+		typename OutputImfR, typename OutputImfC,
+		typename InputType, typename InputStructure, typename InputView,
+		typename InputImfR, typename InputImfC,
+		Backend backend
+	>
+	RC set(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &A
+	) noexcept {
+		(void) C;
+		(void) A;
+		return UNSUPPORTED;
+	}
+
+	/**
+	 * Sets all elements of the given matrix to the value of the given scalar.
+	 * C = val
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView,
+		typename OutputImfR, typename OutputImfC,
+		typename InputType, typename InputStructure,
+		Backend backend
+	>
+	RC set(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+		const Scalar< InputType, InputStructure, backend > &val
+	) noexcept {
+		(void) C;
+		(void) val;
+		return UNSUPPORTED;
+	}
+
 	/**
 	 * Constructs a dense vector from a container of exactly alp::size(x)
 	 * elements. This function aliases to the buildVector routine that takes
@@ -81,16 +306,18 @@ namespace alp {
 	 * Any existing values in \a x that overlap with newer values will hence
 	 * be overwritten.
 	 */
-	template< Descriptor descr = descriptors::no_operation,
+	template<
+		Descriptor descr = descriptors::no_operation,
 		typename InputType,
 		class Merger = operators::right_assign< InputType >,
 		typename fwd_iterator1, typename fwd_iterator2,
 		Backend backend, typename Coords
 	>
-	RC buildVector( internal::Vector< InputType, backend > & x,
+	RC buildVector(
+		internal::Vector< InputType, backend > &x,
 		fwd_iterator1 ind_start, const fwd_iterator1 ind_end,
 		fwd_iterator2 val_start, const fwd_iterator2 val_end,
-		const IOMode mode, const Merger & merger = Merger()
+		const IOMode mode, const Merger &merger = Merger()
 	) {
 		operators::right_assign< InputType > accum;
 		return buildVector< descr >( x, accum, ind_start, ind_end, val_start, val_end, mode, merger );
@@ -151,13 +378,15 @@ namespace alp {
 	 * @returns alp::ILLEGAL When a nonzero has an index larger than alp::size(x).
 	 * @returns alp::PANIC   If an unmitigable error has occured during ingestion.
 	 */
-	template< Descriptor descr = descriptors::no_operation,
+	template<
+		Descriptor descr = descriptors::no_operation,
 		typename InputType,
 		class Merger = operators::right_assign< InputType >,
 		typename fwd_iterator1, typename fwd_iterator2,
 		Backend backend, typename Coords
 	>
-	RC buildVectorUnique( internal::Vector< InputType, backend > & x,
+	RC buildVectorUnique(
+		internal::Vector< InputType, backend > &x,
 		fwd_iterator1 ind_start, const fwd_iterator1 ind_end,
 		fwd_iterator2 val_start, const fwd_iterator2 val_end,
 		const IOMode mode
@@ -226,7 +455,7 @@ namespace alp {
 	 *           \em once; the three input iterators \a I, \a J, and \a V thus
 	 *           may have exactly one copyeach, meaning that all input may be
 	 *           traversed only once.
-	 *        -# Each of the at most three iterator copies will be incremented
+	 *  base/blas1.hpp      -# Each of the at most three iterator copies will be incremented
 	 *           at most \f$ \mathit{nz} \f$ times.
 	 *        -# Each position of the each of the at most three iterator copies
 	 *           will be dereferenced exactly once.
@@ -249,15 +478,17 @@ namespace alp {
 	 *       matrix construction is costly and the user is referred to the
 	 *       costly buildMatrix() function instead.
 	 */
-	template< Descriptor descr = descriptors::no_operation,
+	template<
+		Descriptor descr = descriptors::no_operation,
 		typename InputType,
 		typename fwd_iterator1 = const size_t * __restrict__,
 		typename fwd_iterator2 = const size_t * __restrict__,
 		typename fwd_iterator3 = const InputType * __restrict__,
 		typename length_type = size_t,
-		Backend implementation = config::default_backend >
+		Backend implementation = config::default_backend
+	>
 	RC buildMatrixUnique(
-		internal::Matrix< InputType, implementation > & A,
+		internal::Matrix< InputType, implementation > &A,
 		fwd_iterator1 I, fwd_iterator1 I_end,
 		fwd_iterator2 J, fwd_iterator2 J_end,
 		fwd_iterator3 V, fwd_iterator3 V_end,
@@ -275,13 +506,15 @@ namespace alp {
 	 * Alias that transforms a set of pointers and an array length to the
 	 * buildMatrixUnique variant based on iterators.
 	 */
-	template< Descriptor descr = descriptors::no_operation,
+	template<
+		Descriptor descr = descriptors::no_operation,
 		typename InputType,
 		typename fwd_iterator1 = const size_t * __restrict__,
 		typename fwd_iterator2 = const size_t * __restrict__,
 		typename fwd_iterator3 = const InputType * __restrict__,
 		typename length_type = size_t,
-		Backend implementation = config::default_backend >
+		Backend implementation = config::default_backend
+	>
 	RC buildMatrixUnique( internal::Matrix< InputType, implementation > &A,
 		fwd_iterator1 I, fwd_iterator2 J, fwd_iterator3 V,
 		const size_t nz, const IOMode mode
@@ -295,13 +528,19 @@ namespace alp {
 	}
 
 	/** Version of the above #buildMatrixUnique that handles \a NULL value pointers. */
-	template< Descriptor descr = descriptors::no_operation,
+	template<
+		Descriptor descr = descriptors::no_operation,
 		typename InputType,
 		typename fwd_iterator1 = const size_t * __restrict__,
 		typename fwd_iterator2 = const size_t * __restrict__,
 		typename length_type = size_t,
-		Backend implementation = config::default_backend >
-	RC buildMatrixUnique( internal::Matrix< InputType, implementation > & A, fwd_iterator1 I, fwd_iterator2 J, const length_type nz, const IOMode mode ) {
+		Backend implementation = config::default_backend
+	>
+	RC buildMatrixUnique(
+		internal::Matrix< InputType, implementation > &A,
+		fwd_iterator1 I, fwd_iterator2 J,
+		const length_type nz, const IOMode mode
+	) {
 		// derive synchronized iterator
 		auto start = utils::makeSynchronized( I, J, I + nz, J + nz );
 		const auto end = utils::makeSynchronized( I + nz, J + nz, I + nz, J + nz );
@@ -354,7 +593,8 @@ namespace alp {
 		typename InputType, typename fwd_iterator,
 		Backend implementation = config::default_backend
 	>
-	RC buildMatrixUnique( internal::Matrix< InputType, implementation > & A,
+	RC buildMatrixUnique(
+		internal::Matrix< InputType, implementation > &A,
 		fwd_iterator start, const fwd_iterator end,
 		const IOMode mode
 	) {
@@ -362,7 +602,7 @@ namespace alp {
 		(void)start;
 		(void)end;
 		(void)mode;
-		return PANIC;
+		return UNSUPPORTED;
 	}
 
 	/** @} */
diff --git a/include/alp/blas1.hpp b/include/alp/blas1.hpp
index 36a6d4959..5def561b9 100644
--- a/include/alp/blas1.hpp
+++ b/include/alp/blas1.hpp
@@ -34,371 +34,5 @@
  #include <alp/reference/blas1.hpp>
 #endif
 
-// the remainder implements several backend-agnostic short-cuts
-
-#define NO_CAST_RING_ASSERT( x, y, z )                                             \
-	static_assert( x,                                                              \
-		"\n\n"                                                                     \
-		"************************************************************************" \
-		"************************************************************************" \
-		"**********************\n"                                                 \
-		"*     ERROR      | " y " " z ".\n"                                        \
-		"************************************************************************" \
-		"************************************************************************" \
-		"**********************\n"                                                 \
-		"* Possible fix 1 | Remove no_casting from the template parameters in "    \
-		"this call to " y ".\n"                                                    \
-		"* Possible fix 2 | For all mismatches in the domains of input "           \
-		"parameters and the semiring domains, as specified in the documentation "  \
-		"of the function " y ", supply an input argument of the expected type "    \
-		"instead.\n"                                                               \
-		"* Possible fix 3 | Provide a compatible semiring where all domains "      \
-		"match those of the input parameters, as specified in the documentation "  \
-		"of the function " y ".\n"                                                 \
-		"************************************************************************" \
-		"************************************************************************" \
-		"**********************\n" );
-
-namespace alp {
-
-	/**
-	 * A standard vector to use for mask parameters. Indicates no mask shall be
-	 * used.
-	 */
-	#ifdef NO_MASK
-		#undef NO_MASK
-	#endif
-	#define NO_MASK internal::Vector< bool >( 0 )
-
-	/**
-	 * Executes an arbitrary element-wise user-defined function \a f using any
-	 * number of vectors of equal length, following the nonzero pattern of the
-	 * given vector \a x.
-	 *
-	 * The user-defined function is passed as a lambda which can capture, at
-	 * the very least, other instances of type alp::Vector. Use of this function
-	 * is preferable whenever multiple element-wise operations are requested that
-	 * use one or more identical input vectors. Performing the computation one
-	 * after the other in blocking mode would require the same vector to be
-	 * streamed multiple times, while with this function the operations can be
-	 * fused explicitly instead.
-	 *
-	 * It shall always be legal to capture non-GraphBLAS objects for read access
-	 * only. It shall \em not be legal to capture instances of type alp::Matrix
-	 * for read and/or write access.
-	 *
-	 * If alp::Properties::writableCaptured evaluates true then captured
-	 * non-GraphBLAS objects can also be written to, not just read from. The
-	 * captured variable is, however, completely local to the calling user process
-	 * only-- it will not be synchronised between user processes.
-	 * As a rule of thumb, data-centric GraphBLAS implementations \em cannot
-	 * support this and will thus have alp::Properties::writableCaptured evaluate
-	 * to false. A portable GraphBLAS algorithm should provide a different code
-	 * path to handle this case.
-	 * When it is legal to write to captured scalar, this function can, e.g., be
-	 * used to perform reduction-like operations on any number of equally sized
-	 * input vectors.  This would be preferable to a chained number of calls to
-	 * alp::dot in case where some vectors are shared between subsequent calls,
-	 * for example; the shared vectors are streamed only once using this lambda-
-	 * enabled function.
-	 *
-	 * \warning The lambda shall only be executed on the data local to the user
-	 *          process calling this function! This is different from the various
-	 *          fold functions, or alp::dot, in that the semantics of those
-	 *          functions always end with a globally synchronised result. To
-	 *          achieve the same effect with user-defined lambdas, the users
-	 *          should manually prescribe how to combine the local results into
-	 *          global ones, for instance, by a subsequent call to
-	 *          alp::collectives<>::allreduce.
-	 *
-	 * \note This is an addition to the GraphBLAS. It is alike user-defined
-	 *       operators, monoids, and semirings, except it allows execution on
-	 *       arbitrarily many inputs and arbitrarily many outputs.
-	 *
-	 * @tparam Func the user-defined lambda function type.
-	 * @tparam DataType the type of the user-supplied vector example.
-	 * @tparam backend  the backend type of the user-supplied vector example.
-	 *
-	 * @param[in] f The user-supplied lambda. This lambda should only capture
-	 *              and reference vectors of the same length as \a x. The lambda
-	 *              function should prescribe the operations required to execute
-	 *              at a given index \a i. Captured GraphBLAS vectors can access
-	 *              that element via the operator[]. It is illegal to access any
-	 *              element not at position \a i. The lambda takes only the single
-	 *              parameter \a i of type <code>const size_t</code>. Captured
-	 *              scalars will not be globally updated-- the user must program
-	 *              this explicitly. Scalars and other non-GraphBLAS containers
-	 *              are always local to their user process.
-	 * @param[in] x The vector the lambda will be executed on. This argument
-	 *              determines which indices \a i will be accessed during the
-	 *              elementwise operation-- elements with indices \a i that
-	 *              do not appear in \a x will be skipped during evaluation of
-	 *              \a f.
-	 * @param[in] args All vectors the lambda is to access elements of. Must be of
-	 *                 the same length as \a x. If this constraint is violated,
-	 *                 alp::MISMATCH shall be returned. <em>This is a variadic
-	 *                 argument and can contain any number of containers of type
-	 *                 alp::Vector, passed as though they were separate
-	 *                 arguments.</em>
-	 *
-	 * \note In future GraphBLAS implementations, \a args, apart from doing
-	 *       dimension checking, should also facilitate any data distribution
-	 *       necessary to successfully execute the element-wise operation. Current
-	 *       implementations do not require this since they use the same static
-	 *       distribution for all containers.
-	 *
-	 * \warning Using a alp::Vector inside a lambda passed to this function while
-	 *          not passing that same vector into \a args, will result in undefined
-	 *          behaviour.
-	 *
-	 * \note It would be natural to have \a x equal to one of the captured
-	 *       GraphBLAS vectors in \a f.
-	 *
-	 * \warning Due to the constraints on \a f described above, it is illegal to
-	 *          capture some vector \a y and have the following line in the body
-	 *          of \a f: <code>x[i] += x[i+1]</code>. Vectors can only be
-	 *          dereferenced at position \a i and \a i alone.
-	 *
-	 * @return alp::SUCCESS  When the lambda is successfully executed.
-	 * @return alp::MISMATCH When two or more vectors passed to \a args are not of
-	 *                       equal length.
-	 *
-	 * \parblock
-	 * \par Example.
-	 *
-	 * An example valid use:
-	 *
-	 * \code
-	 * void f(
-	 *      double &alpha,
-	 *      alp::Vector< double > &y,
-	 *      const double beta,
-	 *      const alp::Vector< double > &x,
-	 *      const alp::Semiring< double > ring
-	 * ) {
-	 *      assert( alp::size(x) == alp::size(y) );
-	 *      assert( alp::nnz(x) == alp::size(x) );
-	 *      assert( alp::nnz(y) == alp::size(y) );
-	 *      alpha = ring.getZero();
-	 *      alp::eWiseLambda(
-	 *          [&alpha,beta,&x,&y,ring]( const size_t i ) {
-	 *              double mul;
-	 *              const auto mul_op = ring.getMultiplicativeOperator();
-	 *              const auto add_op = ring.getAdditiveOperator();
-	 *              alp::apply( y[i], beta, x[i], mul_op );
-	 *              alp::apply( mul, x[i], y[i], mul_op );
-	 *              alp::foldl( alpha, mul, add_op );
-	 *      }, x, y );
-	 *      alp::collectives::allreduce( alpha, add_op );
-	 * }
-	 * \endcode
-	 *
-	 * This code takes a value \a beta, a vector \a x, and a semiring \a ring and
-	 * computes:
-	 *   1) \a y as the element-wise multiplication (under \a ring) of \a beta and
-	 *      \a x; and
-	 *   2) \a alpha as the dot product (under \a ring) of \a x and \a y.
-	 * This function can easily be made agnostic to whatever exact semiring is used
-	 * by templating the type of \a ring. As it is, this code is functionally
-	 * equivalent to:
-	 *
-	 * \code
-	 * alp::eWiseMul( y, beta, x, ring );
-	 * alp::dot( alpha, x, y, ring );
-	 * \endcode
-	 *
-	 * The version using the lambdas, however, is expected to execute
-	 * faster as both \a x and \a y are streamed only once, while the
-	 * latter code may stream both vectors twice.
-	 * \endparblock
-	 *
-	 * \warning The following code is invalid:
-	 *          \code
-	 *              template< class Operator >
-	 *              void f(
-	 *                   alp::Vector< double > &x,
-	 *                   const Operator op
-	 *              ) {
-	 *                   alp::eWiseLambda(
-	 *                       [&x,&op]( const size_t i ) {
-	 *                           alp::apply( x[i], x[i], x[i+1], op );
-	 *                   }, x );
-	 *              }
-	 *          \endcode
-	 *          Only a Vector::lambda_reference to position exactly equal to \a i
-	 *          may be used within this function.
-	 *
-	 * \warning There is no similar concept in the official GraphBLAS specs.
-	 *
-	 * \warning Captured scalars will be local to the user process executing the
-	 *          lambda. To retrieve the global dot product, an allreduce must
-	 *          explicitly be called.
-	 *
-	 * @see Vector::operator[]()
-	 * @see Vector::lambda_reference
-	 */
-	template<
-		typename Func,
-		typename DataType,
-		Backend backend,
-		typename... Args
-	>
-	RC eWiseLambda(
-		const Func f,
-		const internal::Vector< DataType, backend > & x, Args...
-	) {
-		(void)f;
-		(void)x;
-		return PANIC;
-	}
-
-	/**
-	 * Alias for a simple reduce call.
-	 *
-	 * Will use no mask and will set the accumulator to the given Monoid's
-	 * operator.
-	 */
-	template<
-		Descriptor descr = descriptors::no_operation,
-		class Monoid,
-		typename IOType, typename InputType,
-		Backend backend
-	>
-	RC foldl( IOType &x,
-		const internal::Vector< InputType, backend > &y,
-		const Monoid &monoid = Monoid(),
-		const typename std::enable_if< !alp::is_object< IOType >::value &&
-			alp::is_monoid< Monoid >::value,
-		void >::type * const = NULL
-	) {
-		// create empty mask
-		internal::Vector< bool, backend > mask( 0 );
-		// call regular reduce function
-		return foldl< descr >( x, y, mask, monoid );
-	}
-
-	/**
-	 * Alias for a simple reduce call.
-	 *
-	 * Will use no mask and will set the accumulator to the given Monoid's
-	 * operator.
-	 */
-	template<
-		Descriptor descr = descriptors::no_operation,
-		class OP,
-		typename IOType, typename InputType,
-		Backend backend
-	>
-	RC foldl( IOType &x,
-		const internal::Vector< InputType, backend > &y,
-		const OP &op = OP(),
-		const typename std::enable_if< !alp::is_object< IOType >::value &&
-			alp::is_operator< OP >::value,
-		void >::type * const = NULL
-	) {
-		// create empty mask
-		internal::Vector< bool, backend > mask( 0 );
-		// call regular reduce function
-		return foldl< descr >( x, y, mask, op );
-	}
-
-	/**
-	 * Provides a generic implementation of the dot computation on semirings by
-	 * translating it into a dot computation on an additive commutative monoid
-	 * with any multiplicative operator.
-	 *
-	 * For return codes, exception behaviour, performance semantics, template
-	 * and non-template arguments, @see alp::dot.
-	 */
-	template<
-		Descriptor descr = descriptors::no_operation, class Ring,
-		typename IOType, typename InputType1, typename InputType2,
-		Backend backend
-	>
-	RC dot( IOType &x,
-		const internal::Vector< InputType1, backend > &left,
-		const internal::Vector< InputType2, backend > &right,
-		const Ring &ring = Ring(),
-		const typename std::enable_if<
-			!alp::is_object< InputType1 >::value &&
-			!alp::is_object< InputType2 >::value &&
-			!alp::is_object< IOType >::value &&
-			alp::is_semiring< Ring >::value,
-		void >::type * const = NULL
-	) {
-		return alp::dot< descr >( x,
-			left, right,
-			ring.getAdditiveMonoid(),
-			ring.getMultiplicativeOperator()
-		);
-	}
-
-	/**
-	 * Provides a generic implementation of the 2-norm computation.
-	 *
-	 * Proceeds by computing a dot-product on itself and then taking the square
-	 * root of the result.
-	 *
-	 * This function is only available when the output type is floating point.
-	 *
-	 * For return codes, exception behaviour, performance semantics, template
-	 * and non-template arguments, @see alp::dot.
-	 *
-	 * @param[out] x The 2-norm of \a y. The input value of \a x will be ignored.
-	 * @param[in]  y The vector to compute the norm of.
-	 * @param[in] ring The Semiring under which the 2-norm is to be computed.
-	 *
-	 * \warning This function computes \a x out-of-place. This is contrary to
-	 *          standard ALP/GraphBLAS functions that are always in-place.
-	 *
-	 * \warning A \a ring is not sufficient for computing a two-norm. This
-	 *          implementation assumes the standard <tt>sqrt</tt> function
-	 *          must be applied on the result of a dot-product of \a y with
-	 *          itself under the supplied semiring.
-	 */
-	// template<
-	// 	Descriptor descr = descriptors::no_operation, class Ring,
-	// 	typename InputType, typename OutputType, typename OutputStructure,
-	// 	Backend backend
-	// >
-	// RC norm2( Scalar< OutputType, OutputStructure, backend > &x,
-	// 	const internal::Vector< InputType, backend > &y,
-	// 	const Ring &ring = Ring(),
-	// 	const typename std::enable_if<
-	// 		std::is_floating_point< OutputType >::value,
-	// 	void >::type * const = NULL
-	// ) {
-	// 	RC ret = alp::dot< descr >( x, y, y, ring );
-	// 	if( ret == SUCCESS ) {
-	// 		x = sqrt( x );
-	// 	}
-	// 	return ret;
-	// }
-
-	/** Specialization for C++ scalars */
-	template<
-		Descriptor descr = descriptors::no_operation, class Ring,
-		typename InputType, typename OutputType,
-		Backend backend
-	>
-	RC norm2( OutputType &x,
-		const internal::Vector< InputType, backend > &y,
-		const Ring &ring = Ring(),
-		const typename std::enable_if<
-			std::is_floating_point< OutputType >::value,
-		void >::type * const = NULL
-	) {
-		RC ret = alp::dot< descr >( x, y, y, ring );
-		if( ret == SUCCESS ) {
-			x = sqrt( x );
-		}
-		return ret;
-	}
-
-
-} // namespace alp
-
-#undef NO_CAST_RING_ASSERT
-
 #endif // end ``_H_ALP_BLAS1''
 
diff --git a/include/alp/io.hpp b/include/alp/io.hpp
new file mode 100644
index 000000000..f99cf5664
--- /dev/null
+++ b/include/alp/io.hpp
@@ -0,0 +1,34 @@
+
+/*
+ *   Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 21st of February, 2017
+ */
+
+#ifndef _H_ALP_IO
+#define _H_ALP_IO
+
+#include "base/io.hpp"
+
+// now include all specialisations contained in the backend directories:
+#ifdef _ALP_WITH_REFERENCE
+ #include <alp/reference/io.hpp>
+#endif
+
+#endif // end ``_H_ALP_IO''
+
diff --git a/include/alp/reference/blas0.hpp b/include/alp/reference/blas0.hpp
index 90ce16107..bc07e948b 100644
--- a/include/alp/reference/blas0.hpp
+++ b/include/alp/reference/blas0.hpp
@@ -32,7 +32,6 @@
 #include <alp/type_traits.hpp>
 #include <alp/scalar.hpp>
 
-#ifndef NO_CAST_ASSERT
 #define NO_CAST_ASSERT( x, y, z )                                              \
 	static_assert( x,                                                          \
 		"\n\n"                                                                 \
@@ -49,7 +48,6 @@
 		"********************************************************************" \
 		"********************************************************************" \
 		"******************************\n" );
-#endif
 
 namespace alp {
 
@@ -187,46 +185,6 @@ namespace alp {
 	 * @{
 	 */
 
-	/** Resizes the Scalar to have at least the given number of nonzeroes.
-	 * The contents of the scalar are not retained.
-	 *
-	 * Resizing of dense containers is not allowed as the capacity is determined
-	 * by the container dimensions and the storage scheme. Therefore, this
-	 * function will not change the capacity of the container.
-	 * 
-	 * The resize function for Scalars exist to maintain compatibility with
-	 * other containers (i.e., vector and matrix).
-	 *
-	 * Even though the capacity remains unchanged, the contents of the scalar
-	 * are not retained to maintain compatibility with the general specification.
-	 * However, the actual memory will not be reallocated. Rather, the scalar
-	 * will be marked as uninitialized.
-	 * 
-	 * @param[in] x      The Scalar to be resized.
-	 * @param[in] new_nz The number of nonzeroes this vector is to contain.
-	 *
-	 * @return SUCCESS   If \a new_nz is not larger than 1.
-	 *         ILLEGAL   If \a new_nz is larger than 1.
-	 *
-	 * \parblock
-	 * \par Performance semantics.
-	 *        -$ This function consitutes \f$ \Theta(1) \f$ work.
-	 *        -# This function allocates \f$ \Theta(0) \f$
-	 *           bytes of dynamic memory.
-	 *        -# This function does not make system calls.
-	 * \endparblock
-	 * \todo add documentation. In particular, think about the meaning with \a P > 1.
-	 */
-	template< typename InputType, typename InputStructure, typename length_type >
-	RC resize( Scalar< InputType, InputStructure, reference > &s, const length_type new_nz ) {
-		if( new_nz <= 1 ) {
-			setInitialized( s, false );
-			return SUCCESS;
-		} else {
-			return ILLEGAL;
-		}
-	}
-
 	/**
 	 * @brief Reference implementation of \a apply.
 	 */
@@ -373,5 +331,7 @@ namespace alp {
 	
 } // end namespace ``alp''
 
+#undef NO_CAST_ASSERT
+
 #endif // end ``_H_ALP_REFERENCE_BLAS0''
 
diff --git a/include/alp/reference/blas1.hpp b/include/alp/reference/blas1.hpp
index f2ed86f5c..cd8a9f099 100644
--- a/include/alp/reference/blas1.hpp
+++ b/include/alp/reference/blas1.hpp
@@ -35,7 +35,6 @@
 #include "blas2.hpp"
 #include <graphblas/utils/iscomplex.hpp> // use from grb
 
-#ifndef NO_CAST_ASSERT
 #define NO_CAST_ASSERT( x, y, z )                                              \
 	static_assert( x,                                                          \
 		"\n\n"                                                                 \
@@ -52,7 +51,6 @@
 		"********************************************************************" \
 		"********************************************************************" \
 		"******************************\n" );
-#endif
 
 #define NO_CAST_OP_ASSERT( x, y, z )                                           \
 	static_assert( x,                                                          \
@@ -179,388 +177,6 @@ namespace alp {
 	 * @{
 	 */
 
-	/**
-	 * Clears all elements from the given vector \a x.
-	 *
-	 * At the end of this operation, the number of nonzero elements in this vector
-	 * will be zero. The size of the vector remains unchanged.
-	 *
-	 * @return alp::SUCCESS When the vector is successfully cleared.
-	 *
-	 * \note This function cannot fail.
-	 *
-	//  * \parblock
-	//  * \par Performance semantics
-	//  *      This function
-	//  *        -# contains \f$ \mathcal{O}(n) \f$ work,
-	//  *        -# will not allocate new dynamic memory,
-	//  *        -# will take at most \f$ \Theta(1) \f$ memory beyond the memory
-	//  *           already used by the application before the call to this
-	//  *           function.
-	//  *        -# will move at most \f$ \mathit{sizeof}(\mathit{bool}) +
-	//  *           \mathit{sizeof}(\mathit{size\_t}) \f$ bytes of data.
-	//  * \endparblock
-	 */
-	template<
-		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC
-	>
-	RC clear(
-		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x
-	) noexcept {
-		throw std::runtime_error( "Needs an implementation" );
-		return SUCCESS;
-	}
-
-	/**
-	 * Request the size (dimension) of a given Vector.
-	 *
-	 * The dimension is set at construction of the given Vector and cannot
-	 * be changed. A call to this function shall always succeed.
-	 *
-	 * @tparam DataType      The type of elements contained in the vector \a x.
-	 * @tparam DataStructure The structure of the vector \a x.
-	 * @tparam View          The view type applied to the vector \a x.
-	 *
-	 * @param[in] x The Vector of which to retrieve the size.
-	 *
-	 * @return The size of the Vector \a x.
-	 *
-	//  * \parblock
-	//  * \par Performance semantics
-	//  * A call to this function
-	//  *  -# consists of \f$ \Theta(1) \f$ work;
-	//  *  -# moves \f$ \Theta(1) \f$ bytes of memory;
-	//  *  -# does not allocate any dynamic memory;
-	//  *  -# shall not make any system calls.
-	//  * \endparblock
-	 */
-	template< typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC >
-	size_t size( const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x ) noexcept {
-		return getLength( x );
-	}
-
-	/**
-	 * Request the number of nonzeroes in a given Vector.
-	 *
-	 * A call to this function always succeeds.
-	 *
-	 * @tparam DataType      The type of elements contained in the vector \a x.
-	 * @tparam DataStructure The structure of the vector \a x.
-	 * @tparam View          The view type applied to the vector \a x.
-	 *
-	 * @param[in] x The Vector of which to retrieve the number of nonzeroes.
-	 *
-	 * @return The number of nonzeroes in \a x.
-	 *
-	//  * \parblock
-	//  * \par Performance semantics
-	//  * A call to this function
-	//  *   -# consists of \f$ \Theta(1) \f$ work;
-	//  *   -# moves \f$ \Theta(1) \f$ bytes of memory;
-	//  *   -# does not allocate nor free any dynamic memory;
-	//  *   -# shall not make any system calls.
-	//  * \endparblock
-	 */
-	template< typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC >
-	size_t nnz( const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x ) noexcept {
-		throw std::runtime_error( "Needs an implementation." );
-		return 0;
-	}
-
-	/** Resizes the vector to have at least the given number of nonzeroes.
-	 * The contents of the vector are not retained.
-	 *
-	 * Resizing of dense containers is not allowed as the capacity is determined
-	 * by the container dimensions and the storage scheme. Therefore, this
-	 * function will not change the capacity of the vector.
-	 *
-	 * Even though the capacity remains unchanged, the contents of the vector
-	 * are not retained to maintain compatibility with the general specification.
-	 * However, the actual memory will not be reallocated. Rather, the vector
-	 * will be marked as uninitialized.
-	 *
-	 * @param[in] x      The Vector to be resized.
-	 * @param[in] new_nz The number of nonzeroes this vector is to contain.
-	 *
-	 * @return SUCCESS   If \a new_nz is not larger than the current capacity
-	 *                   of the vector.
-	 *         ILLEGAL   If \a new_nz is larger than the current capacity of
-	 *                   the vector.
-	 *
-	 * \parblock
-	 * \par Performance semantics.
-	 *        -$ This function consitutes \f$ \Theta(1) \f$ work.
-	 *        -# This function allocates \f$ \Theta(0) \f$
-	 *           bytes of dynamic memory.
-	 *        -# This function does not make system calls.
-	 * \endparblock
-	 * \todo add documentation. In particular, think about the meaning with \a P > 1.
-	 */
-	template< typename InputType, typename InputStructure, typename View, typename ImfR, typename ImfC, typename length_type >
-	RC resize( Vector< InputType, InputStructure, Density::Dense, View, ImfR, ImfC, reference > &x, const length_type new_nz ) {
-		(void)x;
-		(void)new_nz;
-		// TODO implement
-		// setInitialized( x, false );
-		return PANIC;
-	}
-
-	/**
-	 * Sets all elements of a Vector to the given value. Can be masked.
-	 *
-	 * This function is functionally equivalent to
-	 * \code
-	 * alp::operators::right_assign< DataType > op;
-	 * return foldl< descr >( x, val, op );
-	 * \endcode,
-	 * \code
-	 * alp::operators::left_assign< DataType > op;
-	 * return foldr< descr >( val, x, op );
-	 * \endcode, and the following pseudocode
-	 * \code
-	 * for( size_t i = 0; i < size(x); ++i ) {
-	 *     if( mask(i) ) { setElement( x, i, val ); }
-	 * \endcode.
-	 *
-	 * @tparam descr         The descriptor used for this operation.
-	 * @tparam DataType      The type of each element in the vector \a x.
-	 * @tparam DataStructure The structure of the vector \a x.
-	 * @tparam View          The view type applied to the vector \a x.
-	 * @tparam T             The type of the given value.
-	 *
-	 * \parblock
-	 * \par Accepted descriptors
-	 *   -# alp::descriptors::no_operation
-	 *   -# alp::descriptors::no_casting
-	 * \endparblock
-	 *
-	 * @param[in,out] x The Vector of which every element is to be set to equal
-	 *                  \a val.
-	 * @param[in]   val The value to set each element of \a x equal to.
-	 *
-	 * @returns SUCCESS       When the call completes successfully.
-	 *
-	 * When \a descr includes alp::descriptors::no_casting and if \a T does not
-	 * match \a DataType, the code shall not compile.
-	 *
-	//  * \parblock
-	//  * \par Performance semantics
-	//  * A call to this function
-	//  *   -# consists of \f$ \Theta(n) \f$ work;
-	//  *   -# moves \f$ \Theta(n) \f$ bytes of memory;
-	//  *   -# does not allocate nor free any dynamic memory;
-	//  *   -# shall not make any system calls.
-	//  * \endparblock
-	 *
-	 * @see alp::foldl.
-	 * @see alp::foldr.
-	 * @see alp::operators::left_assign.
-	 * @see alp::operators::right_assign.
-	 * @see alp::setElement.
-	 */
-	template<
-		Descriptor descr = descriptors::no_operation,
-		typename DataType, typename DataStructure, typename View,
-		typename ImfR, typename ImfC,
-		typename T, typename ValStructure
-	>
-	RC set(
-		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x,
-		const Scalar< T, ValStructure, reference > val,
-		const typename std::enable_if<
-			!alp::is_object< DataType >::value &&
-			!alp::is_object< T >::value,
-		void >::type * const = NULL
-	) {
-		// static sanity checks
-		NO_CAST_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, unmasked)",
-			"called with a value type that does not match that of the given "
-			"vector" );
-
-		if( ! internal::getInitialized( val ) ) {
-			internal::setInitialized( x, false );
-			return SUCCESS;
-		}
-
-		// foldl requires left-hand side to be initialized prior to the call
-		internal::setInitialized( x, true );
-		return foldl( x, val, alp::operators::right_assign< DataType >() );
-	}
-
-	/**
-	 * Sets the element of a given Vector at a given position to a given value.
-	 *
-	 * If the input Vector \a x already has an element \f$ x_i \f$, that element
-	 * is overwritten to the given value \a val. If no such element existed, it
-	 * is added and set equal to \a val. The number of nonzeroes in \a x may thus
-	 * be increased by one due to a call to this function.
-	 *
-	 * The parameter \a i may not be greater or equal than the size of \a x.
-	 *
-	 * @tparam descr         The descriptor to be used during evaluation of this
-	 *                       function.
-	 * @tparam DataType      The type of the elements of \a x.
-	 * @tparam DataStructure The structure of the vector \a x.
-	 * @tparam View          The view type applied to the vector \a x.
-	 * @tparam T             The type of the value to be set.
-	 *
-	 * @param[in,out] x The vector to be modified.
-	 * @param[in]   val The value \f$ x_i \f$ should read after function exit.
-	 * @param[in]     i The index of the element of \a x to set.
-	 *
-	 * @return alp::SUCCESS   Upon successful execution of this operation.
-	 * @return alp::MISMATCH  If \a i is greater or equal than the dimension of
-	 *                        \a x.
-	 *
-	 * \parblock
-	 * \par Accepted descriptors
-	 *   -# alp::descriptors::no_operation
-	 *   -# alp::descriptors::no_casting
-	 * \endparblock
-	 *
-	 * When \a descr includes alp::descriptors::no_casting and if \a T does not
-	 * match \a DataType, the code shall not compile.
-	 *
-	//  * \parblock
-	//  * \par Performance semantics
-	//  * A call to this function
-	//  *   -# consists of \f$ \Theta(1) \f$ work;
-	//  *   -# moves \f$ \Theta(1) \f$ bytes of memory;
-	//  *   -# does not allocate nor free any dynamic memory;
-	//  *   -# shall not make any system calls.
-	//  * \endparblock
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC, typename ValStructure,
-		typename T
-	>
-	RC setElement( Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x,
-		const Scalar< T, ValStructure, reference > val,
-		const size_t i,
-		const typename std::enable_if< ! alp::is_object< DataType >::value && ! alp::is_object< T >::value, void >::type * const = NULL ) {
-		// static sanity checks
-		NO_CAST_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, at index)",
-			"called with a value type that does not match that of the given "
-			"Vector" );
-
-		throw std::runtime_error( "Needs an implementation." );
-
-		// done
-		return SUCCESS;
-	}
-
-	/** C++ scalar variant */
-	template< Descriptor descr = descriptors::no_operation,
-		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC,
-		typename T
-	>
-	RC setElement( Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > & x,
-		const T val,
-		const size_t i,
-		const typename std::enable_if< ! alp::is_object< DataType >::value && ! alp::is_object< T >::value, void >::type * const = NULL ) {
-		// static sanity checks
-		NO_CAST_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, at index)",
-			"called with a value type that does not match that of the given "
-			"Vector" );
-
-		// delegate
-		return setElement( x, Scalar< T >( val ), i );
-	}
-
-	/**
-	 * Sets the content of a given vector \a x to be equal to that of
-	 * another given vector \a y. Can be masked.
-	 *
-	 * This operation is functionally equivalent to
-	 * \code
-	 * alp::operators::right_assign< T > op;
-	 * alp::foldl( x, y, op );
-	 * \endcode,
-	 * \code
-	 * alp::operators::left_assign < T > op;
-	 * alp::foldr( y, x, op );
-	 * \endcode, as well as the following pseudocode
-	 * \code
-	 * for( each nonzero in y ) {
-	 *    setElement( x, nonzero.index, nonzero.value );
-	 * }
-	 * \endcode.
-	 *
-	 * The vector \a x may not equal \a y.
-	 *
-	 * \parblock
-	 * \par Accepted descriptors
-	 *   -# alp::descriptors::no_operation
-	 *   -# alp::descriptors::no_casting
-	 * \endparblock
-	 *
-	 * @tparam descr           The descriptor of the operation.
-	 * @tparam OutputType      The type of each element in the output vector.
-	 * @tparam InputType       The type of each element in the input vector.
-	 * @tparam OutputStructure The structure of the ouput vector.
-	 * @tparam InputStructure  The structure of the input vector.
-	 * @tparam OuputView       The view applied to the output vector.
-	 * @tparam InputView       The view applied to the input vector.
-	 *
-	 * @param[in,out] x The vector to be set.
-	 * @param[in]     y The source vector.
-	 *
-	 * When \a descr includes alp::descriptors::no_casting and if \a InputType
-	 * does not match \a OutputType, the code shall not compile.
-	 *
-	//  * \parblock
-	//  * \par Performance semantics
-	//  * A call to this function
-	//  *   -# consists of \f$ \Theta(n) \f$ work;
-	//  *   -# moves \f$ \Theta(n) \f$ bytes of memory;
-	//  *   -# does not allocate nor free any dynamic memory;
-	//  *   -# shall not make any system calls.
-	//  * \endparblock
-	 *
-	 * @see alp::foldl.
-	 * @see alp::foldr.
-	 * @see alp::operators::left_assign.
-	 * @see alp::operators::right_assign.
-	 * @see alp::setElement.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC
-	>
-	RC set(
-		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & x,
-		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > & y
-	) {
-		// static sanity checks
-		NO_CAST_ASSERT(
-			( ! ( descr & descriptors::no_casting ) || std::is_same< OutputType, InputType >::value ), "alp::copy (Vector)", "called with vector parameters whose element data types do not match" );
-		constexpr bool out_is_void = std::is_void< OutputType >::value;
-		constexpr bool in_is_void = std::is_void< OutputType >::value;
-		static_assert( ! in_is_void || out_is_void,
-			"alp::set (reference, Vector <- Vector, masked): "
-			"if input is void, then the output must be also" );
-		static_assert( ! ( descr & descriptors::use_index ) || ! out_is_void,
-			"alp::set (reference, Vector <- Vector, masked): "
-			"use_index descriptor cannot be set if output vector is void" );
-
-		// check contract
-		if( reinterpret_cast< void * >( &x ) == reinterpret_cast< const void * >( &y ) ) {
-			return ILLEGAL;
-		}
-
-		if( getLength( x ) != getLength( y ) ) {
-			return MISMATCH;
-		}
-
-		if( !internal::getInitialized( y ) ) {
-			setInitialized( x, false );
-			return SUCCESS;
-		}
-
-		internal::setInitialized( x, true );
-		return foldl( x, y, alp::operators::right_assign< OutputType >() );
-	}
-
 	/**
 	 * Folds all elements in a ALP Vector \a x into a single value \a beta.
 	 *
@@ -1206,46 +822,6 @@ namespace alp {
 		return SUCCESS;
 	}
 
-	/**
-	 * For all elements in a ALP Vector \a x, fold the value \f$ \beta \f$
-	 * into each element.
-	 *
-	 * Masked operator variant.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType, typename InputStructure,
-		class Op
-	>
-	RC foldl(
-		Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, reference > & x,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & m,
-		const Scalar< InputType, InputStructure, reference > &beta,
-		const Op & op = Op(),
-		const std::enable_if_t<
-			!alp::is_object< IOType >::value && ! alp::is_object< InputType >::value && alp::is_operator< Op >::value
-		> * = nullptr
-	) {
-		// static sanity checks
-		NO_CAST_OP_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< typename Op::D1, IOType >::value ), "alp::foldl",
-			"called with a vector x of a type that does not match the first domain "
-			"of the given operator" );
-		NO_CAST_OP_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< typename Op::D2, InputType >::value ), "alp::foldl",
-			"called on a vector y of a type that does not match the second domain "
-			"of the given operator" );
-		NO_CAST_OP_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< typename Op::D3, IOType >::value ), "alp::foldl",
-			"called on a vector x of a type that does not match the third domain "
-			"of the given operator" );
-		NO_CAST_OP_ASSERT(
-			( ! ( descr & descriptors::no_casting ) || std::is_same< bool, MaskType >::value ), "alp::foldl (reference, vector <- scalar, masked)", "provided mask does not have boolean entries" );
-		if( size( m ) == 0 ) {
-			return foldl< descr >( x, beta, op );
-		}
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
 	/**
 	 * For all elements in a ALP Vector \a x, fold the value \f$ \beta \f$
 	 * into each element.
@@ -1346,46 +922,6 @@ namespace alp {
 		return SUCCESS;
 	}
 
-	/**
-	 * For all elements in a ALP Vector \a x, fold the value \f$ \beta \f$
-	 * into each element.
-	 *
-	 * Masked monoid variant.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType,
-		class Monoid
-	>
-	RC foldl( Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, reference > & x,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & m,
-		const InputType & beta,
-		const Monoid & monoid = Monoid(),
-		const std::enable_if_t<
-			!alp::is_object< IOType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType >::value && alp::is_monoid< Monoid >::value
-		> * = nullptr
-		) {
-		// static sanity checks
-		NO_CAST_OP_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< typename Monoid::D1, IOType >::value ), "alp::foldl",
-			"called with a vector x of a type that does not match the first domain "
-			"of the given monoid" );
-		NO_CAST_OP_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< typename Monoid::D2, InputType >::value ), "alp::foldl",
-			"called on a vector y of a type that does not match the second domain "
-			"of the given monoid" );
-		NO_CAST_OP_ASSERT( ( ! ( descr & descriptors::no_casting ) || std::is_same< typename Monoid::D3, IOType >::value ), "alp::foldl",
-			"called on a vector x of a type that does not match the third domain "
-			"of the given monoid" );
-		NO_CAST_OP_ASSERT(
-			( ! ( descr & descriptors::no_casting ) || std::is_same< bool, MaskType >::value ), "alp::foldl (reference, vector <- scalar, masked, monoid)", "provided mask does not have boolean entries" );
-		if( size( m ) == 0 ) {
-			return foldl< descr >( x, beta, monoid );
-		}
-
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
 	/**
 	 * Folds all elements in a ALP Vector \a y into the corresponding
 	 * elements from an input/output vector \a x. The vectors must be of equal
@@ -1755,37 +1291,6 @@ namespace alp {
 		return eWiseApply< descr >( z, alpha, beta, monoid.getOperator() );
 	}
 
-	/**
-	 * Computes \f$ z = x \odot y \f$, out of place.
-	 *
-	 * Specialisation for scalar \a y, masked operator version.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
-		typename InputType2, typename InputStructure2,
-		class OP
-	>
-	RC eWiseApply( Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & z,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & mask,
-		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, reference > & x,
-		const Scalar< InputType2, InputStructure2, reference > &beta,
-		const OP & op = OP(),
-		const typename std::enable_if< ! alp::is_object< OutputType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value &&
-				alp::is_operator< OP >::value,
-			void >::type * const = NULL ) {
-	#ifdef _DEBUG
-		std::cout << "In masked eWiseApply ([T1]<-[T2]<-T3, using operator)\n";
-	#endif
-		// check for empty mask
-		if( size( mask ) == 0 ) {
-			return eWiseApply< descr >( z, x, beta, op );
-		}
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
 	/**
 	 * Computes \f$ z = x \odot y \f$, out of place.
 	 *
@@ -1858,87 +1363,6 @@ namespace alp {
 		return SUCCESS;
 	}
 
-	/**
-	 * Computes \f$ z = x \odot y \f$, out of place.
-	 *
-	 * Masked monoid version.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
-		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
-		class Monoid
-	>
-	RC eWiseApply( Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & z,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & mask,
-		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, reference > & x,
-		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, reference > & y,
-		const Monoid & monoid = Monoid(),
-		const typename std::enable_if< ! alp::is_object< OutputType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value &&
-				alp::is_monoid< Monoid >::value,
-			void >::type * const = NULL ) {
-	#ifdef _DEBUG
-		std::cout << "In masked eWiseApply ([T1]<-[T2]<-[T3], using monoid)\n";
-	#endif
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
-	/**
-	 * Computes \f$ z = x \odot y \f$, out of place.
-	 *
-	 * Specialisation for scalar \a x. Masked monoid version.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType1, typename InputStructure1,
-		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
-		class Monoid
-	>
-	RC eWiseApply( Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & z,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & mask,
-		const Scalar< InputType1, InputStructure1, reference> &alpha,
-		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, reference > & y,
-		const Monoid & monoid = Monoid(),
-		const typename std::enable_if< ! alp::is_object< OutputType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value &&
-				alp::is_monoid< Monoid >::value,
-			void >::type * const = NULL ) {
-	#ifdef _DEBUG
-		std::cout << "In masked eWiseApply ([T1]<-T2<-[T3], using monoid)\n";
-	#endif
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
-	/**
-	 * Computes \f$ z = x \odot y \f$, out of place.
-	 *
-	 * Specialisation for scalar \a y. Masked monoid version.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
-		typename InputType2, typename InputStructure2,
-		class Monoid
-	>
-	RC eWiseApply( Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & z,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & mask,
-		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, reference > & x,
-		const Scalar< InputType2, InputStructure2, reference > &beta,
-		const Monoid & monoid = Monoid(),
-		const typename std::enable_if< ! alp::is_object< OutputType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value &&
-				alp::is_monoid< Monoid >::value,
-			void >::type * const = NULL ) {
-	#ifdef _DEBUG
-		std::cout << "In masked eWiseApply ([T1]<-[T2]<-T3, using monoid)\n";
-	#endif
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
 	/**
 	 * Calculates the element-wise operation on one scalar to elements of one
 	 * vector, \f$ z = \alpha .* y \f$, using the given operator. The input and
@@ -2026,33 +1450,6 @@ namespace alp {
 		return SUCCESS;
 	}
 
-	/**
-	 * Computes \f$ z = x \odot y \f$, out of place.
-	 *
-	 * Specialisation for scalar \a x. Masked operator version.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType1, typename InputStructure1,
-		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
-		class OP
-	>
-	RC eWiseApply( Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & z,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & mask,
-		const Scalar< InputType1, InputStructure1, reference> &alpha,
-		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, reference > & y,
-		const OP & op = OP(),
-		const typename std::enable_if< ! alp::is_object< OutputType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value &&
-				alp::is_operator< OP >::value,
-			void >::type * const = NULL ) {
-	#ifdef _DEBUG
-		std::cout << "In masked eWiseApply ([T1]<-T2<-[T3], operator variant)\n";
-	#endif
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
 	/**
 	 * Calculates the element-wise operation on elements of two vectors,
 	 * \f$ z = x .* y \f$, using the given operator. The vectors must be
@@ -2146,33 +1543,6 @@ namespace alp {
 		return SUCCESS;
 	}
 
-	/**
-	 * Computes \f$ z = x \odot y \f$, out of place.
-	 *
-	 * Masked operator version.
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename MaskType, typename MaskStructure, typename MaskView, typename MaskImfR, typename MaskImfC,
-		typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
-		typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
-		class OP
-	>
-	RC eWiseApply( Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > & z,
-		const Vector< MaskType, MaskStructure, Density::Dense, MaskView, MaskImfR, MaskImfC, reference > & mask,
-		const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, reference > & x,
-		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, reference > & y,
-		const OP & op = OP(),
-		const typename std::enable_if< ! alp::is_object< OutputType >::value && ! alp::is_object< MaskType >::value && ! alp::is_object< InputType1 >::value && ! alp::is_object< InputType2 >::value &&
-				alp::is_operator< OP >::value,
-			void >::type * const = NULL ) {
-	#ifdef _DEBUG
-		std::cout << "In masked eWiseApply ([T1]<-[T2]<-[T3], using operator)\n";
-	#endif
-		throw std::runtime_error( "Needs an implementation." );
-		return SUCCESS;
-	}
-
 	/**
 	 * Calculates the element-wise multiplication of two vectors,
 	 *     \f$ z = z + x .* y \f$,
@@ -2468,7 +1838,7 @@ namespace alp {
 		const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, reference > &y,
 		const AddMonoid &addMonoid = AddMonoid(),
 		const AnyOp &anyOp = AnyOp(),
-		const typename std::enable_if_t< !alp::is_object< OutputType >::value &&
+		const std::enable_if_t< !alp::is_object< OutputType >::value &&
 			!alp::is_object< InputType1 >::value &&
 			!alp::is_object< InputType2 >::value &&
 			alp::is_monoid< AddMonoid >::value &&
@@ -2805,7 +2175,7 @@ namespace alp {
 		Scalar< IOType, IOStructure, reference > &alpha,
 		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &y,
 		const Monoid &monoid = Monoid(),
-		const typename std::enable_if_t<
+		const std::enable_if_t<
 			! alp::is_object< IOType >::value && ! alp::is_object< InputType >::value && alp::is_monoid< Monoid >::value
 		> * const = nullptr
 	) {
diff --git a/include/alp/reference/blas2.hpp b/include/alp/reference/blas2.hpp
index 4da796d0d..536821212 100644
--- a/include/alp/reference/blas2.hpp
+++ b/include/alp/reference/blas2.hpp
@@ -61,65 +61,6 @@ namespace alp {
 	 * @{
 	 */
 
-	/**
-	 * Retrieve the number of nonzeroes contained in this matrix.
-	 *
-	 * @returns The number of nonzeroes the current matrix contains.
-	 *
-	 * \parblock
-	 * \par Performance semantics.
-	 *        -# This function consitutes \f$ \Theta(1) \f$ work.
-	 *        -# This function allocates no additional dynamic memory.
-	 *        -# This function uses \f$ \mathcal{O}(1) \f$ memory
-	 *           beyond that which was already used at function entry.
-	 *        -# This function will move
-	 *             \f$ \mathit{sizeof}( size\_t ) \f$
-	 *           bytes of memory.
-	 * \endparblock
-	 */
-	template< typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC >
-	size_t nnz( const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > & A ) noexcept {
-		return A.nz;
-	}
-
-	/**
-	 * Resizes the matrix to have at least the given number of nonzeroes.
-	 * The contents of the matrix are not retained.
-	 *
-	 * Resizing of dense containers is not allowed as the capacity is determined
-	 * by the container dimensions and the storage scheme. Therefore, this
-	 * function will not change the capacity of the matrix.
-	 *
-	 * Even though the capacity remains unchanged, the contents of the matrix
-	 * are not retained to maintain compatibility with the general specification.
-	 * However, the actual memory will not be reallocated. Rather, the matrix
-	 * will be marked as uninitialized.
-	 *
-	 * @param[in] A         The matrix to be resized.
-	 * @param[in] nonzeroes The number of nonzeroes this matrix is to contain.
-	 *
-	 * @return SUCCESS   If \a new_nz is not larger than the current capacity
-	 *                   of the matrix.
-	 *         ILLEGAL   If \a new_nz is larger than the current capacity of
-	 *                   the matrix.
-	 *
-	 * \parblock
-	 * \par Performance semantics.
-	 *        -$ This function consitutes \f$ \Theta(1) \f$ work.
-	 *        -# This function allocates \f$ \Theta(0) \f$
-	 *           bytes of dynamic memory.
-	 *        -# This function does not make system calls.
-	 * \endparblock
-	 */
-	template< typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC >
-	RC resize( Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &A, const size_t new_nz ) noexcept {
-		(void)A;
-		(void)new_nz;
-		// TODO implement
-		// setInitialized( A, false );
-		return PANIC;
-	}
-
 	/** \internal Delegates to fully masked variant */
 	template< Descriptor descr = descriptors::no_operation,
 		typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
@@ -365,7 +306,7 @@ namespace alp {
 		template<
 			size_t BandIndex, typename Func,
 			typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				BandIndex >= std::tuple_size< typename Structure::band_intervals >::value
 			> * = nullptr
 		>
@@ -378,7 +319,7 @@ namespace alp {
 		template<
 			size_t BandIndex, typename Func,
 			typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				BandIndex >= std::tuple_size< typename Structure::band_intervals >::value
 			> * = nullptr
 		>
@@ -400,7 +341,7 @@ namespace alp {
 		template<
 			size_t band_index, typename Func,
 			typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index < std::tuple_size< typename Structure::band_intervals >::value
 			> * = nullptr
 		>
@@ -494,7 +435,7 @@ namespace alp {
 			typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
 			typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
 			typename InputTypeScalar, typename InputStructureScalar,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index >= std::tuple_size< typename IOStructure::band_intervals >::value
 			> * = nullptr
 		>
@@ -526,7 +467,7 @@ namespace alp {
 			typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
 			typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
 			typename InputTypeScalar, typename InputStructureScalar,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index < std::tuple_size< typename IOStructure::band_intervals >::value
 			> * = nullptr
 		>
diff --git a/include/alp/reference/blas3.hpp b/include/alp/reference/blas3.hpp
index 1fee9ff6c..cbbce6b38 100644
--- a/include/alp/reference/blas3.hpp
+++ b/include/alp/reference/blas3.hpp
@@ -36,7 +36,6 @@
 #include "matrix.hpp"
 #include "vector.hpp"
 
-#ifndef NO_CAST_ASSERT
 #define NO_CAST_ASSERT( x, y, z )                                              \
 	static_assert( x,                                                          \
 		"\n\n"                                                                 \
@@ -59,7 +58,29 @@
 		"********************************************************************" \
 		"********************************************************************" \
 		"******************************\n" );
-#endif
+
+#define NO_CAST_OP_ASSERT( x, y, z )                                           \
+	static_assert( x,                                                          \
+		"\n\n"                                                                 \
+		"********************************************************************" \
+		"********************************************************************" \
+		"******************************\n"                                     \
+		"*     ERROR      | " y " " z ".\n"                                    \
+		"********************************************************************" \
+		"********************************************************************" \
+		"******************************\n"                                     \
+		"* Possible fix 1 | Remove no_casting from the template parameters "   \
+		"in this call to " y ".\n"                                             \
+		"* Possible fix 2 | For all mismatches in the domains of input "       \
+		"parameters and the operator domains, as specified in the "            \
+		"documentation of the function " y ", supply an input argument of "    \
+		"the expected type instead.\n"                                         \
+		"* Possible fix 3 | Provide a compatible operator where all domains "  \
+		"match those of the input parameters, as specified in the "            \
+		"documentation of the function " y ".\n"                               \
+		"********************************************************************" \
+		"********************************************************************" \
+		"******************************\n" );
 
 namespace alp {
 	namespace internal {
@@ -520,7 +541,7 @@ namespace alp {
 			typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
 			typename InputTypeScalar2, typename InputStructureScalar2,
 			class Operator,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index >= std::tuple_size< typename OutputStructure::band_intervals >::value
 			> * = nullptr
 		>
@@ -561,7 +582,7 @@ namespace alp {
 			typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
 			typename InputTypeScalar2, typename InputStructureScalar2,
 			class Operator,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index < std::tuple_size< typename OutputStructure::band_intervals >::value
 			> * = nullptr
 		>
@@ -910,7 +931,7 @@ namespace alp {
 			typename InputTypeScalar1, typename InputStructureScalar1,
 			typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
 			typename InputTypeScalar2, typename InputStructureScalar2,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index >= std::tuple_size< typename OutputStructure::band_intervals >::value
 			> * = nullptr
 		>
@@ -948,7 +969,7 @@ namespace alp {
 			typename InputTypeScalar1, typename InputStructureScalar1,
 			typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
 			typename InputTypeScalar2, typename InputStructureScalar2,
-			typename std::enable_if_t<
+			std::enable_if_t<
 				band_index < std::tuple_size< typename OutputStructure::band_intervals >::value
 			> * = nullptr
 		>
@@ -1539,125 +1560,10 @@ namespace alp {
 
 	}
 
-	/**
-	 * Sets all elements of the output matrix to the values of the input matrix.
-	 * C = A
-	 * 
-	 * @tparam descr
-	 * @tparam OutputType      Data type of the output matrix C
-	 * @tparam OutputStructure Structure of the matrix C
-	 * @tparam OutputView      View type applied to the matrix C
-	 * @tparam InputType       Data type of the scalar a
-	 *
-	 * @param C    Matrix whose values are to be set
-	 * @param A    The input matrix
-	 *
-	 * @return RC  SUCCESS on the successful execution of the set
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC
-	>
-	RC set(
-		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &C,
-		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &A
-	) noexcept {
-		static_assert(
-			!std::is_same< OutputType, void >::value,
-			"alp::set (set to value): cannot have a pattern matrix as output"
-		);
-#ifdef _DEBUG
-		std::cout << "Called alp::set (matrix-to-matrix, reference)" << std::endl;
-#endif
-		// static checks
-		NO_CAST_ASSERT(
-			( !( descr & descriptors::no_casting ) || std::is_same< InputType, OutputType >::value ),
-			"alp::set", "called with non-matching value types"
-		);
-
-		static_assert(
-			!internal::is_functor_based<
-				Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference >
-			>::value,
-			"alp::set cannot be called with a functor-based matrix as a destination."
-		);
-
-		// TODO: Improve this check to account for non-zero structrue (i.e., bands)
-		//       and algebraic properties (e.g., symmetry)
-		static_assert(
-			std::is_same< OutputStructure, InputStructure >::value,
-			"alp::set cannot be called for containers with different structures."
-		);
-
-		if( ( nrows( C ) != nrows( A ) ) || ( ncols( C ) != ncols( A ) ) ) {
-			return MISMATCH;
-		}
-
-		if( !internal::getInitialized( A ) ) {
-			internal::setInitialized( C, false );
-			return SUCCESS;
-		}
-
-		internal::setInitialized( C, true );
-		return foldl( C, A, alp::operators::right_assign< OutputType >() );
-	}
-
-	/**
-	 * Sets all elements of the given matrix to the value of the given scalar.
-	 * C = val
-	 * 
-	 * @tparam descr
-	 * @tparam OutputType      Data type of the output matrix C
-	 * @tparam OutputStructure Structure of the matrix C
-	 * @tparam OutputView      View type applied to the matrix C
-	 * @tparam InputType       Data type of the scalar a
-	 *
-	 * @param C    Matrix whose values are to be set
-	 * @param val  The value to set the elements of the matrix C
-	 *
-	 * @return RC  SUCCESS on the successful execution of the set
-	 */
-	template< Descriptor descr = descriptors::no_operation,
-		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
-		typename InputType, typename InputStructure
-	>
-	RC set(
-		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &C,
-		const Scalar< InputType, InputStructure, reference > &val
-	) noexcept {
-
-		static_assert(
-			!std::is_same< OutputType, void >::value,
-			"alp::set (set to matrix): cannot have a pattern matrix as output"
-		);
-#ifdef _DEBUG
-		std::cout << "Called alp::set (matrix-to-value, reference)" << std::endl;
-#endif
-		// static checks
-		NO_CAST_ASSERT(
-			( !( descr & descriptors::no_casting ) || std::is_same< InputType, OutputType >::value ),
-			"alp::set", "called with non-matching value types"
-		);
-
-		static_assert(
-			!internal::is_functor_based<
-				Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference >
-			>::value,
-			"alp::set cannot be called with a functor-based matrix as a destination."
-		);
-
-		if( !internal::getInitialized( val ) ) {
-			internal::setInitialized( C, false );
-			return SUCCESS;
-		}
-
-		internal::setInitialized( C, true );
-		return foldl( C, val, alp::operators::right_assign< OutputType >() );
-	}
-
 } // end namespace ``alp''
 
 #undef NO_CAST_ASSERT
+#undef NO_CAST_OP_ASSERT
 
 #endif // end ``_H_ALP_REFERENCE_BLAS3''
 
diff --git a/include/alp/reference/io.hpp b/include/alp/reference/io.hpp
index e785a3b7d..3b5a673b0 100644
--- a/include/alp/reference/io.hpp
+++ b/include/alp/reference/io.hpp
@@ -26,8 +26,627 @@
 #include <alp/base/io.hpp>
 #include "matrix.hpp"
 
+#define NO_CAST_ASSERT( x, y, z )                                              \
+	static_assert( x,                                                          \
+		"\n\n"                                                                 \
+		"********************************************************************" \
+		"********************************************************************" \
+		"******************************\n"                                     \
+		"*     ERROR      | " y " " z ".\n"                                    \
+		"********************************************************************" \
+		"********************************************************************" \
+		"******************************\n"                                     \
+		"* Possible fix 1 | Remove no_casting from the template parameters "   \
+		"in this call to " y ".\n"                                             \
+		"* Possible fix 2 | Provide a value that matches the expected type.\n" \
+		"********************************************************************" \
+		"********************************************************************" \
+		"******************************\n" );
+
 namespace alp {
 
+	/**
+	 * Request the size (dimension) of a given Vector.
+	 *
+	 * The dimension is set at construction of the given Vector and cannot
+	 * be changed. A call to this function shall always succeed.
+	 *
+	 * @tparam DataType      The type of elements contained in the vector \a x.
+	 * @tparam DataStructure The structure of the vector \a x.
+	 * @tparam View          The view type applied to the vector \a x.
+	 *
+	 * @param[in] x The Vector of which to retrieve the size.
+	 *
+	 * @return The size of the Vector \a x.
+	 *
+	//  * \parblock
+	//  * \par Performance semantics
+	//  * A call to this function
+	//  *  -# consists of \f$ \Theta(1) \f$ work;
+	//  *  -# moves \f$ \Theta(1) \f$ bytes of memory;
+	//  *  -# does not allocate any dynamic memory;
+	//  *  -# shall not make any system calls.
+	//  * \endparblock
+	 */
+	template<
+		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC
+	>
+	size_t size(
+		const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x
+	) noexcept {
+		return getLength( x );
+	}
+
+	/**
+	 * Request the number of nonzeroes in a given Vector.
+	 *
+	 * A call to this function always succeeds.
+	 *
+	 * @tparam DataType      The type of elements contained in the vector \a x.
+	 * @tparam DataStructure The structure of the vector \a x.
+	 * @tparam View          The view type applied to the vector \a x.
+	 *
+	 * @param[in] x The Vector of which to retrieve the number of nonzeroes.
+	 *
+	 * @return The number of nonzeroes in \a x.
+	 *
+	//  * \parblock
+	//  * \par Performance semantics
+	//  * A call to this function
+	//  *   -# consists of \f$ \Theta(1) \f$ work;
+	//  *   -# moves \f$ \Theta(1) \f$ bytes of memory;
+	//  *   -# does not allocate nor free any dynamic memory;
+	//  *   -# shall not make any system calls.
+	//  * \endparblock
+	 */
+	template<
+		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC
+	>
+	size_t nnz(
+		const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x
+	) noexcept {
+		throw std::runtime_error( "Needs an implementation." );
+		return 0;
+	}
+
+	/**
+	 * Retrieve the number of nonzeroes contained in this matrix.
+	 *
+	 * @returns The number of nonzeroes the current matrix contains.
+	 *
+	 * \parblock
+	 * \par Performance semantics.
+	 *        -# This function consitutes \f$ \Theta(1) \f$ work.
+	 *        -# This function allocates no additional dynamic memory.
+	 *        -# This function uses \f$ \mathcal{O}(1) \f$ memory
+	 *           beyond that which was already used at function entry.
+	 *        -# This function will move
+	 *             \f$ \mathit{sizeof}( size\_t ) \f$
+	 *           bytes of memory.
+	 * \endparblock
+	 */
+	template<
+		typename DataType, typename Structure, typename View, typename ImfR, typename ImfC
+	>
+	size_t nnz(
+		const Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, reference > &A
+	) noexcept {
+		return A.nz;
+	}
+
+	/**
+	 * Clears all elements from the given vector \a x.
+	 *
+	 * At the end of this operation, the number of nonzero elements in this vector
+	 * will be zero. The size of the vector remains unchanged.
+	 *
+	 * @return alp::SUCCESS When the vector is successfully cleared.
+	 *
+	 * \note This function cannot fail.
+	 *
+	//  * \parblock
+	//  * \par Performance semantics
+	//  *      This function
+	//  *        -# contains \f$ \mathcal{O}(n) \f$ work,
+	//  *        -# will not allocate new dynamic memory,
+	//  *        -# will take at most \f$ \Theta(1) \f$ memory beyond the memory
+	//  *           already used by the application before the call to this
+	//  *           function.
+	//  *        -# will move at most \f$ \mathit{sizeof}(\mathit{bool}) +
+	//  *           \mathit{sizeof}(\mathit{size\_t}) \f$ bytes of data.
+	//  * \endparblock
+	 */
+	template<
+		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC
+	>
+	RC clear(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x
+	) noexcept {
+		throw std::runtime_error( "Needs an implementation" );
+		return SUCCESS;
+	}
+
+	/**
+	 * Resizes the Scalar to have at least the given number of nonzeroes.
+	 * The contents of the scalar are not retained.
+	 *
+	 * Resizing of dense containers is not allowed as the capacity is determined
+	 * by the container dimensions and the storage scheme. Therefore, this
+	 * function will not change the capacity of the container.
+	 *
+	 * The resize function for Scalars exist to maintain compatibility with
+	 * other containers (i.e., vector and matrix).
+	 *
+	 * Even though the capacity remains unchanged, the contents of the scalar
+	 * are not retained to maintain compatibility with the general specification.
+	 * However, the actual memory will not be reallocated. Rather, the scalar
+	 * will be marked as uninitialized.
+	 *
+	 * @param[in] x      The Scalar to be resized.
+	 * @param[in] new_nz The number of nonzeroes this vector is to contain.
+	 *
+	 * @return SUCCESS   If \a new_nz is not larger than 1.
+	 *         ILLEGAL   If \a new_nz is larger than 1.
+	 *
+	 * \parblock
+	 * \par Performance semantics.
+	 *        -$ This function consitutes \f$ \Theta(1) \f$ work.
+	 *        -# This function allocates \f$ \Theta(0) \f$
+	 *           bytes of dynamic memory.
+	 *        -# This function does not make system calls.
+	 * \endparblock
+	 * \todo add documentation. In particular, think about the meaning with \a P > 1.
+	 */
+	template< typename InputType, typename InputStructure, typename length_type >
+	RC resize( Scalar< InputType, InputStructure, reference > &s, const length_type new_nz ) noexcept {
+		if( new_nz <= 1 ) {
+			setInitialized( s, false );
+			return SUCCESS;
+		} else {
+			return ILLEGAL;
+		}
+	}
+
+	/**
+	 * Resizes the vector to have at least the given number of nonzeroes.
+	 * The contents of the vector are not retained.
+	 *
+	 * Resizing of dense containers is not allowed as the capacity is determined
+	 * by the container dimensions and the storage scheme. Therefore, this
+	 * function will not change the capacity of the vector.
+	 *
+	 * Even though the capacity remains unchanged, the contents of the vector
+	 * are not retained to maintain compatibility with the general specification.
+	 * However, the actual memory will not be reallocated. Rather, the vector
+	 * will be marked as uninitialized.
+	 *
+	 * @param[in] x      The Vector to be resized.
+	 * @param[in] new_nz The number of nonzeroes this vector is to contain.
+	 *
+	 * @return SUCCESS   If \a new_nz is not larger than the current capacity
+	 *                   of the vector.
+	 *         ILLEGAL   If \a new_nz is larger than the current capacity of
+	 *                   the vector.
+	 *
+	 * \parblock
+	 * \par Performance semantics.
+	 *        -$ This function consitutes \f$ \Theta(1) \f$ work.
+	 *        -# This function allocates \f$ \Theta(0) \f$
+	 *           bytes of dynamic memory.
+	 *        -# This function does not make system calls.
+	 * \endparblock
+	 * \todo add documentation. In particular, think about the meaning with \a P > 1.
+	 */
+	template< typename InputType, typename InputStructure, typename View, typename ImfR, typename ImfC, typename length_type >
+	RC resize(
+		Vector< InputType, InputStructure, Density::Dense, View, ImfR, ImfC, reference > &x,
+		const length_type new_nz
+	) noexcept {
+		(void) x;
+		(void) new_nz;
+		// \todo Add implementation.
+		// setInitialized( x, false );
+		return PANIC;
+	}
+
+	/**
+	 * Resizes the matrix to have at least the given number of nonzeroes.
+	 * The contents of the matrix are not retained.
+	 *
+	 * Resizing of dense containers is not allowed as the capacity is determined
+	 * by the container dimensions and the storage scheme. Therefore, this
+	 * function will not change the capacity of the matrix.
+	 *
+	 * Even though the capacity remains unchanged, the contents of the matrix
+	 * are not retained to maintain compatibility with the general specification.
+	 * However, the actual memory will not be reallocated. Rather, the matrix
+	 * will be marked as uninitialized.
+	 *
+	 * @param[in] A         The matrix to be resized.
+	 * @param[in] nonzeroes The number of nonzeroes this matrix is to contain.
+	 *
+	 * @return SUCCESS   If \a new_nz is not larger than the current capacity
+	 *                   of the matrix.
+	 *         ILLEGAL   If \a new_nz is larger than the current capacity of
+	 *                   the matrix.
+	 *
+	 * \parblock
+	 * \par Performance semantics.
+	 *        -$ This function consitutes \f$ \Theta(1) \f$ work.
+	 *        -# This function allocates \f$ \Theta(0) \f$
+	 *           bytes of dynamic memory.
+	 *        -# This function does not make system calls.
+	 * \endparblock
+	 */
+	template< typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC >
+	RC resize(
+		Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &A,
+		const size_t new_nz
+	) noexcept {
+		(void) A;
+		(void) new_nz;
+		// \todo Add implementation.
+		// setInitialized( A, false );
+		return PANIC;
+	}
+
+	/**
+	 * Sets all elements of a Vector to the given value. Can be masked.
+	 *
+	 * This function is functionally equivalent to
+	 * \code
+	 * alp::operators::right_assign< DataType > op;
+	 * return foldl< descr >( x, val, op );
+	 * \endcode,
+	 * \code
+	 * alp::operators::left_assign< DataType > op;
+	 * return foldr< descr >( val, x, op );
+	 * \endcode, and the following pseudocode
+	 * \code
+	 * for( size_t i = 0; i < size(x); ++i ) {
+	 *     if( mask(i) ) { setElement( x, i, val ); }
+	 * \endcode.
+	 *
+	 * @tparam descr         The descriptor used for this operation.
+	 * @tparam DataType      The type of each element in the vector \a x.
+	 * @tparam DataStructure The structure of the vector \a x.
+	 * @tparam View          The view type applied to the vector \a x.
+	 * @tparam T             The type of the given value.
+	 *
+	 * \parblock
+	 * \par Accepted descriptors
+	 *   -# alp::descriptors::no_operation
+	 *   -# alp::descriptors::no_casting
+	 * \endparblock
+	 *
+	 * @param[in,out] x The Vector of which every element is to be set to equal
+	 *                  \a val.
+	 * @param[in]   val The value to set each element of \a x equal to.
+	 *
+	 * @returns SUCCESS       When the call completes successfully.
+	 *
+	 * When \a descr includes alp::descriptors::no_casting and if \a T does not
+	 * match \a DataType, the code shall not compile.
+	 *
+	//  * \parblock
+	//  * \par Performance semantics
+	//  * A call to this function
+	//  *   -# consists of \f$ \Theta(n) \f$ work;
+	//  *   -# moves \f$ \Theta(n) \f$ bytes of memory;
+	//  *   -# does not allocate nor free any dynamic memory;
+	//  *   -# shall not make any system calls.
+	//  * \endparblock
+	 *
+	 * @see alp::foldl.
+	 * @see alp::foldr.
+	 * @see alp::operators::left_assign.
+	 * @see alp::operators::right_assign.
+	 * @see alp::setElement.
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename DataStructure, typename View,
+		typename ImfR, typename ImfC,
+		typename T, typename ValStructure
+	>
+	RC set(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x,
+		const Scalar< T, ValStructure, reference > val,
+		const typename std::enable_if<
+			!alp::is_object< DataType >::value &&
+			!alp::is_object< T >::value,
+		void >::type * const = NULL
+	) {
+		// static sanity checks
+		NO_CAST_ASSERT( ( !( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, unmasked)",
+			"called with a value type that does not match that of the given "
+			"vector" );
+
+		if( !internal::getInitialized( val ) ) {
+			internal::setInitialized( x, false );
+			return SUCCESS;
+		}
+
+		// foldl requires left-hand side to be initialized prior to the call
+		internal::setInitialized( x, true );
+		return foldl( x, val, alp::operators::right_assign< DataType >() );
+	}
+
+	/**
+	 * Sets the element of a given Vector at a given position to a given value.
+	 *
+	 * If the input Vector \a x already has an element \f$ x_i \f$, that element
+	 * is overwritten to the given value \a val. If no such element existed, it
+	 * is added and set equal to \a val. The number of nonzeroes in \a x may thus
+	 * be increased by one due to a call to this function.
+	 *
+	 * The parameter \a i may not be greater or equal than the size of \a x.
+	 *
+	 * @tparam descr         The descriptor to be used during evaluation of this
+	 *                       function.
+	 * @tparam DataType      The type of the elements of \a x.
+	 * @tparam DataStructure The structure of the vector \a x.
+	 * @tparam View          The view type applied to the vector \a x.
+	 * @tparam T             The type of the value to be set.
+	 *
+	 * @param[in,out] x The vector to be modified.
+	 * @param[in]   val The value \f$ x_i \f$ should read after function exit.
+	 * @param[in]     i The index of the element of \a x to set.
+	 *
+	 * @return alp::SUCCESS   Upon successful execution of this operation.
+	 * @return alp::MISMATCH  If \a i is greater or equal than the dimension of
+	 *                        \a x.
+	 *
+	 * \parblock
+	 * \par Accepted descriptors
+	 *   -# alp::descriptors::no_operation
+	 *   -# alp::descriptors::no_casting
+	 * \endparblock
+	 *
+	 * When \a descr includes alp::descriptors::no_casting and if \a T does not
+	 * match \a DataType, the code shall not compile.
+	 *
+	//  * \parblock
+	//  * \par Performance semantics
+	//  * A call to this function
+	//  *   -# consists of \f$ \Theta(1) \f$ work;
+	//  *   -# moves \f$ \Theta(1) \f$ bytes of memory;
+	//  *   -# does not allocate nor free any dynamic memory;
+	//  *   -# shall not make any system calls.
+	//  * \endparblock
+	 */
+	template<
+		Descriptor descr = descriptors::no_operation,
+		typename DataType, typename DataStructure, typename View, typename ImfR, typename ImfC, typename ValStructure,
+		typename T
+	>
+	RC setElement(
+		Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x,
+		const Scalar< T, ValStructure, reference > val,
+		const size_t i,
+		const typename std::enable_if< !alp::is_object< DataType >::value && !alp::is_object< T >::value, void >::type * const = NULL
+	) {
+		// static sanity checks
+		NO_CAST_ASSERT( ( !( descr & descriptors::no_casting ) || std::is_same< DataType, T >::value ), "alp::set (Vector, at index)",
+			"called with a value type that does not match that of the given "
+			"Vector" );
+
+		throw std::runtime_error( "Needs an implementation." );
+
+		// done
+		return SUCCESS;
+	}
+
+	/**
+	 * Sets the content of a given vector \a x to be equal to that of
+	 * another given vector \a y. Can be masked.
+	 *
+	 * This operation is functionally equivalent to
+	 * \code
+	 * alp::operators::right_assign< T > op;
+	 * alp::foldl( x, y, op );
+	 * \endcode,
+	 * \code
+	 * alp::operators::left_assign < T > op;
+	 * alp::foldr( y, x, op );
+	 * \endcode, as well as the following pseudocode
+	 * \code
+	 * for( each nonzero in y ) {
+	 *    setElement( x, nonzero.index, nonzero.value );
+	 * }
+	 * \endcode.
+	 *
+	 * The vector \a x may not equal \a y.
+	 *
+	 * \parblock
+	 * \par Accepted descriptors
+	 *   -# alp::descriptors::no_operation
+	 *   -# alp::descriptors::no_casting
+	 * \endparblock
+	 *
+	 * @tparam descr           The descriptor of the operation.
+	 * @tparam OutputType      The type of each element in the output vector.
+	 * @tparam InputType       The type of each element in the input vector.
+	 * @tparam OutputStructure The structure of the ouput vector.
+	 * @tparam InputStructure  The structure of the input vector.
+	 * @tparam OuputView       The view applied to the output vector.
+	 * @tparam InputView       The view applied to the input vector.
+	 *
+	 * @param[in,out] x The vector to be set.
+	 * @param[in]     y The source vector.
+	 *
+	 * When \a descr includes alp::descriptors::no_casting and if \a InputType
+	 * does not match \a OutputType, the code shall not compile.
+	 *
+	//  * \parblock
+	//  * \par Performance semantics
+	//  * A call to this function
+	//  *   -# consists of \f$ \Theta(n) \f$ work;
+	//  *   -# moves \f$ \Theta(n) \f$ bytes of memory;
+	//  *   -# does not allocate nor free any dynamic memory;
+	//  *   -# shall not make any system calls.
+	//  * \endparblock
+	 *
+	 * @see alp::foldl.
+	 * @see alp::foldr.
+	 * @see alp::operators::left_assign.
+	 * @see alp::operators::right_assign.
+	 * @see alp::setElement.
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC
+	>
+	RC set(
+		Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &x,
+		const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &y
+	) {
+		// static sanity checks
+		NO_CAST_ASSERT(
+			( !( descr & descriptors::no_casting ) || std::is_same< OutputType, InputType >::value ), "alp::copy (Vector)", "called with vector parameters whose element data types do not match" );
+		constexpr bool out_is_void = std::is_void< OutputType >::value;
+		constexpr bool in_is_void = std::is_void< OutputType >::value;
+		static_assert( !in_is_void || out_is_void,
+			"alp::set (reference, Vector <- Vector, masked): "
+			"if input is void, then the output must be also" );
+		static_assert( !( descr & descriptors::use_index ) || !out_is_void,
+			"alp::set (reference, Vector <- Vector, masked): "
+			"use_index descriptor cannot be set if output vector is void" );
+
+		// check contract
+		if( reinterpret_cast< void * >( &x ) == reinterpret_cast< const void * >( &y ) ) {
+			return ILLEGAL;
+		}
+
+		if( getLength( x ) != getLength( y ) ) {
+			return MISMATCH;
+		}
+
+		if( !internal::getInitialized( y ) ) {
+			setInitialized( x, false );
+			return SUCCESS;
+		}
+
+		internal::setInitialized( x, true );
+		return foldl( x, y, alp::operators::right_assign< OutputType >() );
+	}
+
+	/**
+	 * Sets all elements of the output matrix to the values of the input matrix.
+	 * C = A
+	 *
+	 * @tparam descr
+	 * @tparam OutputType      Data type of the output matrix C
+	 * @tparam OutputStructure Structure of the matrix C
+	 * @tparam OutputView      View type applied to the matrix C
+	 * @tparam InputType       Data type of the scalar a
+	 *
+	 * @param C    Matrix whose values are to be set
+	 * @param A    The input matrix
+	 *
+	 * @return RC  SUCCESS on the successful execution of the set
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC
+	>
+	RC set(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &C,
+		const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &A
+	) noexcept {
+		static_assert(
+			!std::is_same< OutputType, void >::value,
+			"alp::set (set to value): cannot have a pattern matrix as output"
+		);
+#ifdef _DEBUG
+		std::cout << "Called alp::set (matrix-to-matrix, reference)" << std::endl;
+#endif
+		// static checks
+		NO_CAST_ASSERT(
+			( !( descr & descriptors::no_casting ) || std::is_same< InputType, OutputType >::value ),
+			"alp::set", "called with non-matching value types"
+		);
+
+		static_assert(
+			!internal::is_functor_based<
+				Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference >
+			>::value,
+			"alp::set cannot be called with a functor-based matrix as a destination."
+		);
+
+		// TODO: Improve this check to account for non-zero structrue (i.e., bands)
+		//       and algebraic properties (e.g., symmetry)
+		static_assert(
+			std::is_same< OutputStructure, InputStructure >::value,
+			"alp::set cannot be called for containers with different structures."
+		);
+
+		if( ( nrows( C ) != nrows( A ) ) || ( ncols( C ) != ncols( A ) ) ) {
+			return MISMATCH;
+		}
+
+		if( !internal::getInitialized( A ) ) {
+			internal::setInitialized( C, false );
+			return SUCCESS;
+		}
+
+		internal::setInitialized( C, true );
+		return foldl( C, A, alp::operators::right_assign< OutputType >() );
+	}
+
+	/**
+	 * Sets all elements of the given matrix to the value of the given scalar.
+	 * C = val
+	 *
+	 * @tparam descr
+	 * @tparam OutputType      Data type of the output matrix C
+	 * @tparam OutputStructure Structure of the matrix C
+	 * @tparam OutputView      View type applied to the matrix C
+	 * @tparam InputType       Data type of the scalar a
+	 *
+	 * @param C    Matrix whose values are to be set
+	 * @param val  The value to set the elements of the matrix C
+	 *
+	 * @return RC  SUCCESS on the successful execution of the set
+	 */
+	template< Descriptor descr = descriptors::no_operation,
+		typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+		typename InputType, typename InputStructure
+	>
+	RC set(
+		Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &C,
+		const Scalar< InputType, InputStructure, reference > &val
+	) noexcept {
+
+		static_assert(
+			!std::is_same< OutputType, void >::value,
+			"alp::set (set to matrix): cannot have a pattern matrix as output"
+		);
+#ifdef _DEBUG
+		std::cout << "Called alp::set (matrix-to-value, reference)" << std::endl;
+#endif
+		// static checks
+		NO_CAST_ASSERT(
+			( !( descr & descriptors::no_casting ) || std::is_same< InputType, OutputType >::value ),
+			"alp::set", "called with non-matching value types"
+		);
+
+		static_assert(
+			!internal::is_functor_based<
+				Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference >
+			>::value,
+			"alp::set cannot be called with a functor-based matrix as a destination."
+		);
+
+		if( !internal::getInitialized( val ) ) {
+			internal::setInitialized( C, false );
+			return SUCCESS;
+		}
+
+		internal::setInitialized( C, true );
+		return foldl( C, val, alp::operators::right_assign< OutputType >() );
+	}
+
 	/**
 	 * Assigns elements to a matrix from an iterator.
 	 *
@@ -76,7 +695,7 @@ namespace alp {
 	 *
 	 */
 	template< typename InputType, typename fwd_iterator >
-	RC buildMatrixUnique( internal::Matrix< InputType, reference > & A, fwd_iterator start, const fwd_iterator end ) {
+	RC buildMatrixUnique( internal::Matrix< InputType, reference > &A, fwd_iterator start, const fwd_iterator end ) {
 		return A.template buildMatrixUnique( start, end );
 	}
 
@@ -87,7 +706,7 @@ namespace alp {
 	 * @see alp::buildMatrix
 	 */
 	template< typename InputType, typename fwd_iterator >
-	RC buildMatrix( internal::Matrix< InputType, reference > & A, fwd_iterator start, const fwd_iterator end ) {
+	RC buildMatrix( internal::Matrix< InputType, reference > &A, fwd_iterator start, const fwd_iterator end ) {
 		return A.template buildMatrixUnique( start, end );
 	}
 
@@ -140,7 +759,7 @@ namespace alp {
 	 *
 	 */
 	template< typename MatrixT, typename fwd_iterator >
-	RC buildMatrixUnique( MatrixT & A, const fwd_iterator & start, const fwd_iterator & end ) noexcept {
+	RC buildMatrixUnique( MatrixT &A, const fwd_iterator &start, const fwd_iterator &end ) noexcept {
 		(void)A;
 		(void)start;
 		(void)end;
@@ -157,8 +776,8 @@ namespace alp {
 	template< typename InputType, typename Structure, typename View, typename ImfR, typename ImfC, typename fwd_iterator >
 	RC buildMatrix(
 		Matrix< InputType, Structure, Density::Dense, View, ImfR, ImfC, reference > &A,
-		const fwd_iterator & start,
-		const fwd_iterator & end
+		const fwd_iterator &start,
+		const fwd_iterator &end
 	) noexcept {
 		(void)A;
 		(void)start;
@@ -206,5 +825,7 @@ namespace alp {
 
 } // end namespace ``alp''
 
+#undef NO_CAST_ASSERT
+
 #endif // end ``_H_ALP_REFERENCE_IO''