Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 7 additions & 26 deletions include/alp/reference/blas2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,47 +374,28 @@ namespace alp {
* Upon completion, calls itself for the next band.
*/
template<
size_t BandIndex, typename Func,
size_t band_index, typename Func,
typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
typename std::enable_if_t<
BandIndex < std::tuple_size< typename Structure::band_intervals >::value
band_index < std::tuple_size< typename Structure::band_intervals >::value
> * = nullptr
>
RC eWiseLambda(
const Func f,
alp::Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, reference > &A
) {
const size_t M = nrows( A );
const size_t N = ncols( A );

const std::ptrdiff_t l = structures::get_lower_bandwidth< BandIndex >( A );
const std::ptrdiff_t u = structures::get_upper_bandwidth< BandIndex >( A );

// In case of symmetry the iteration domain intersects the the upper
// (or lower) domain of A
constexpr bool is_sym_a = structures::is_a< Structure, structures::Symmetric >::value;

// Temporary until adding multiple symmetry directions
constexpr bool sym_up_a = is_sym_a;
const auto i_limits = structures::calculate_row_coordinate_limits< band_index >( A );

/** i-coordinate lower and upper limits considering matrix size and band limits */
const std::ptrdiff_t i_l_lim = std::max( static_cast< std::ptrdiff_t >( 0 ), -u );
const std::ptrdiff_t i_u_lim = std::min( M, -l + N );
for( size_t i = i_limits.first; i < i_limits.second; ++i ) {

for( size_t i = static_cast< size_t >( i_l_lim ); i < static_cast< size_t >( i_u_lim ); ++i ) {
/** j-coordinate lower and upper limits considering matrix size and symmetry */
const std::ptrdiff_t j_sym_l_lim = is_sym_a && sym_up_a ? i : 0;
const std::ptrdiff_t j_sym_u_lim = is_sym_a && !sym_up_a ? i + 1 : N;
/** j-coordinate lower and upper limits, also considering the band limits in addition to the factors above */
const std::ptrdiff_t j_l_lim = std::max( j_sym_l_lim, l );
const std::ptrdiff_t j_u_lim = std::min( j_sym_u_lim, u );
const auto j_limits = structures::calculate_column_coordinate_limits< band_index >( A, i );

for( size_t j = static_cast< size_t >( j_l_lim ); j < static_cast< size_t >( j_u_lim ); ++j ) {
for( size_t j = j_limits.first; j < j_limits.second; ++j ) {
auto &a_val = internal::access( A, internal::getStorageIndex( A, i, j ) );
f( i, j, a_val );
}
}
return eWiseLambda< BandIndex + 1 >( f, A );
return eWiseLambda< band_index + 1 >( f, A );
}

} // namespace internal
Expand Down
83 changes: 25 additions & 58 deletions include/alp/reference/blas3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,11 @@ namespace alp {
const std::ptrdiff_t N { static_cast< std::ptrdiff_t >( ncols( C ) ) };
const std::ptrdiff_t K { static_cast< std::ptrdiff_t >( ncols( A ) ) };

const std::ptrdiff_t l_a { structures::get_lower_bandwidth< BandPos1 >( A ) };
const std::ptrdiff_t u_a { structures::get_upper_bandwidth< BandPos1 >( A ) };
const std::ptrdiff_t l_a { structures::get_lower_limit< BandPos1 >( A ) };
const std::ptrdiff_t u_a { structures::get_upper_limit< BandPos1 >( A ) };

const std::ptrdiff_t l_b { structures::get_lower_bandwidth< BandPos2 >( B ) };
const std::ptrdiff_t u_b { structures::get_upper_bandwidth< BandPos2 >( B ) };
const std::ptrdiff_t l_b { structures::get_lower_limit< BandPos2 >( B ) };
const std::ptrdiff_t u_b { structures::get_upper_limit< BandPos2 >( B ) };

// In case of symmetry the iteration domain intersects the the upper
// (or lower) domain of C
Expand Down Expand Up @@ -1270,46 +1270,32 @@ namespace alp {

/** Specialization for a valid band position. Assuming matrices have matching dimensions. */
template<
size_t BandPos,
size_t band_index,
typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
typename std::enable_if_t<
BandPos < std::tuple_size< typename OutputStructure::band_intervals >::value
band_index < std::tuple_size< typename OutputStructure::band_intervals >::value
> * = nullptr
>
RC set_band(
alp::Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &C,
const alp::Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, reference > &A
) noexcept {

const size_t M = nrows( C );
const size_t N = ncols( C );

const std::ptrdiff_t l = structures::get_lower_bandwidth< BandPos >( C );
const std::ptrdiff_t u = structures::get_upper_bandwidth< BandPos >( C );

// In case of symmetry the iteration domain intersects the the upper
// (or lower) domain of C
constexpr bool is_sym_c { structures::is_a< OutputStructure, structures::Symmetric >::value };
constexpr bool is_sym_a { structures::is_a< InputStructure, structures::Symmetric >::value };

// Temporary until adding multiple symmetry directions
constexpr bool sym_up_c { is_sym_c };
constexpr bool sym_up_a { is_sym_a };

/** i-coordinate lower and upper limits considering matrix size and band limits */
const std::ptrdiff_t i_l_lim = std::max( static_cast< std::ptrdiff_t >( 0 ), -u );
const std::ptrdiff_t i_u_lim = std::min( M, -l + N );
const auto i_limits = structures::calculate_row_coordinate_limits< band_index >( A );

for( size_t i = i_limits.first; i < i_limits.second; ++i ) {

for( size_t i = static_cast< size_t >( i_l_lim ); i < static_cast< size_t >( i_u_lim ); ++i ) {
/** j-coordinate lower and upper limits considering matrix size and symmetry */
const std::ptrdiff_t j_sym_l_lim = is_sym_c && sym_up_c ? i : 0;
const std::ptrdiff_t j_sym_u_lim = is_sym_c && !sym_up_c ? i + 1 : N;
/** j-coordinate lower and upper limits, also considering the band limits in addition to the factors above */
const std::ptrdiff_t j_l_lim = std::max( j_sym_l_lim, l );
const std::ptrdiff_t j_u_lim = std::min( j_sym_u_lim, u );
const auto j_limits = structures::calculate_column_coordinate_limits< band_index >( A, i );

for( size_t j = static_cast< size_t >( j_l_lim ); j < static_cast< size_t >( j_u_lim ); ++j ) {
for( size_t j = j_limits.first; j < j_limits.second; ++j ) {
auto &c_val = internal::access( C, internal::getStorageIndex( C, i, j ) );
if( sym_up_c == sym_up_a ) {
c_val = internal::access( A, internal::getStorageIndex( A, i, j ) );
Expand All @@ -1319,13 +1305,12 @@ namespace alp {
}
}

return set_band< BandPos + 1 >( C, A );
return set_band< band_index + 1 >( C, A );
}

} // namespace internal

/**
* Sets all elements of the output matrix to the values of the input matrix. Unmasked version.
* Sets all elements of the output matrix to the values of the input matrix.
* C = A
*
* @tparam descr
Expand Down Expand Up @@ -1393,11 +1378,11 @@ namespace alp {
* Specializations implement bound checking.
*/
template<
size_t BandPos,
size_t band_index,
typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
typename std::enable_if_t<
BandPos >= std::tuple_size< typename OutputStructure::band_intervals >::value
band_index >= std::tuple_size< typename OutputStructure::band_intervals >::value
> * = nullptr
>
RC set_band(
Expand All @@ -1407,11 +1392,11 @@ namespace alp {

/** Specialization for out-of-range band position - nothing to do */
template<
size_t BandPos,
size_t band_index,
typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
typename InputType, typename InputStructure,
typename std::enable_if_t<
BandPos >= std::tuple_size< typename OutputStructure::band_intervals >::value
band_index >= std::tuple_size< typename OutputStructure::band_intervals >::value
> * = nullptr
>
RC set_band(
Expand All @@ -1425,50 +1410,32 @@ namespace alp {

/** Specialization for a valid band position */
template<
size_t BandPos,
size_t band_index,
typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
typename InputType, typename InputStructure,
typename std::enable_if_t<
BandPos < std::tuple_size< typename OutputStructure::band_intervals >::value
band_index < std::tuple_size< typename OutputStructure::band_intervals >::value
> * = nullptr
>
RC set_band(
alp::Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &C,
const Scalar< InputType, InputStructure, reference > &val
) noexcept {

const size_t M = nrows( C );
const size_t N = ncols( C );

const std::ptrdiff_t l = structures::get_lower_bandwidth< BandPos >( C );
const std::ptrdiff_t u = structures::get_upper_bandwidth< BandPos >( C );

// In case of symmetry the iteration domain intersects the the upper
// (or lower) domain of C
constexpr bool is_sym_c { structures::is_a< OutputStructure, structures::Symmetric >::value };

// Temporary until adding multiple symmetry directions
constexpr bool sym_up_c { is_sym_c };
// i-coordinate lower and upper limits considering matrix size and band limits
const auto i_limits = structures::calculate_row_coordinate_limits< band_index >( C );

/** i-coordinate lower and upper limits considering matrix size and band limits */
const std::ptrdiff_t i_l_lim = std::max( static_cast< std::ptrdiff_t >( 0 ), -u );
const std::ptrdiff_t i_u_lim = std::min( M, -l + N );
for( size_t i = i_limits.first; i < i_limits.second; ++i ) {

for( size_t i = static_cast< size_t >( i_l_lim ); i < static_cast< size_t >( i_u_lim ); ++i ) {
/** j-coordinate lower and upper limits considering matrix size and symmetry */
const std::ptrdiff_t j_sym_l_lim = is_sym_c && sym_up_c ? i : 0;
const std::ptrdiff_t j_sym_u_lim = is_sym_c && !sym_up_c ? i + 1 : N;
/** j-coordinate lower and upper limits, also considering the band limits in addition to the factors above */
const std::ptrdiff_t j_l_lim = std::max( j_sym_l_lim, l );
const std::ptrdiff_t j_u_lim = std::min( j_sym_u_lim, u );
const auto j_limits = structures::calculate_column_coordinate_limits< band_index >( C, i );

for( size_t j = static_cast< size_t >( j_l_lim ); j < static_cast< size_t >( j_u_lim ); ++j ) {
for( size_t j = j_limits.first; j < j_limits.second; ++j ) {
auto &c_val = internal::access( C, internal::getStorageIndex( C, i, j ) );
c_val = *val;
}
}

return set_band< BandPos + 1 >( C, val );
return set_band< band_index + 1 >( C, val );
}

} // namespace internal
Expand Down
108 changes: 106 additions & 2 deletions include/alp/reference/matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2421,7 +2421,7 @@ namespace alp {
};

template<size_t band, typename T, typename Structure, enum Density density, typename View, typename ImfL, typename ImfR >
std::ptrdiff_t get_lower_bandwidth(const alp::Matrix< T, Structure, density, View, ImfL, ImfR, reference > &A) {
std::ptrdiff_t get_lower_limit(const alp::Matrix< T, Structure, density, View, ImfL, ImfR, reference > &A) {

const std::ptrdiff_t m = nrows( A );
constexpr std::ptrdiff_t cl_a = std::tuple_element< band, typename Structure::band_intervals >::type::left;
Expand All @@ -2433,7 +2433,7 @@ namespace alp {
}

template<size_t band, typename T, typename Structure, enum Density density, typename View, typename ImfL, typename ImfR >
std::ptrdiff_t get_upper_bandwidth(const alp::Matrix< T, Structure, density, View, ImfL, ImfR, reference > &A) {
std::ptrdiff_t get_upper_limit(const alp::Matrix< T, Structure, density, View, ImfL, ImfR, reference > &A) {

const std::ptrdiff_t n = ncols( A );
constexpr std::ptrdiff_t cu_a = std::tuple_element< band, typename Structure::band_intervals >::type::right;
Expand All @@ -2444,6 +2444,110 @@ namespace alp {

}

/**
* Calculates the iteration space for row-dimension for the given matrix and band index.
*
* @tparam MatrixType The type of ALP matrix
* @tparam band_index The index of the desired matrix band
*
* @param[in] A ALP matrix
*
* @returns a pair of size_t values,
* the first representing lower and the second upper limit.
*/
template<
size_t band_index, typename MatrixType,
std::enable_if_t<
is_matrix< MatrixType >::value
> * = nullptr
>
std::pair< size_t, size_t > calculate_row_coordinate_limits( const MatrixType &A ) {

using Structure = typename MatrixType::structure;

static_assert(
band_index < std::tuple_size< typename Structure::band_intervals >::value,
"Provided band index is out of bounds."
);

// cast matrix dimensions to signed integer to allow for comparison with negative numbers
const std::ptrdiff_t M = static_cast< std::ptrdiff_t >( nrows( A ) );
const std::ptrdiff_t N = static_cast< std::ptrdiff_t >( ncols( A ) );

// band limits are negated and inverted due to different orientation
// of coordinate system of band and matrix dimensions.
const std::ptrdiff_t l = -structures::get_upper_limit< band_index >( A );
const std::ptrdiff_t u = N - structures::get_lower_limit< band_index >( A );

// fit the limits within the matrix dimensions
const size_t lower_limit = static_cast< size_t >( std::max( std::min( l, M ), static_cast< std::ptrdiff_t >( 0 ) ) );
const size_t upper_limit = static_cast< size_t >( std::max( std::min( u, M ), static_cast< std::ptrdiff_t >( 0 ) ) );

assert( lower_limit <= upper_limit );

return std::make_pair( lower_limit, upper_limit );
}

/**
* Calculates the iteration space for column-dimension for the given matrix, band index and row index.
*
* @tparam MatrixType The type of ALP matrix
* @tparam band_index The index of the desired matrix band
*
* @param[in] A ALP matrix
* @param[in] row Row index
*
* @returns a pair of size_t values,
* the first representing lower and the second upper limit.
*/
template<
size_t band_index, typename MatrixType,
std::enable_if_t<
is_matrix< MatrixType >::value
> * = nullptr
>
std::pair< size_t, size_t > calculate_column_coordinate_limits( const MatrixType &A, const size_t row ) {

using Structure = typename MatrixType::structure;

// Declaring this to avoid static casts to std::ptrdiff_t in std::min and std::max calls
const std::ptrdiff_t signed_zero = 0;

static_assert(
band_index < std::tuple_size< typename Structure::band_intervals >::value,
"Provided band index is out of bounds."
);

assert( row < nrows( A ) );

// cast matrix dimensions to signed integer to allow for comparison with negative numbers
const std::ptrdiff_t N = static_cast< std::ptrdiff_t >( ncols( A ) );

constexpr bool is_sym = structures::is_a< Structure, structures::Symmetric >::value;
// Temporary until adding multiple symmetry directions
constexpr bool sym_up = is_sym;

// Band limits
const std::ptrdiff_t l = structures::get_lower_limit< band_index >( A );
const std::ptrdiff_t u = structures::get_upper_limit< band_index >( A );

// Band limits taking into account symmetry
const std::ptrdiff_t sym_l = is_sym && sym_up ? std::max( signed_zero, l ) : l;
const std::ptrdiff_t sym_u = is_sym && !sym_up ? std::min( signed_zero, u ) : u;

// column coordinate lower and upper limits considering the provided row coordinate
const std::ptrdiff_t sym_l_row = static_cast< std::ptrdiff_t >( row ) + sym_l;
const std::ptrdiff_t sym_u_row = sym_l_row + ( sym_u - sym_l );

// fit the limits within the matrix dimensions
const size_t lower_limit = static_cast< size_t >( std::max( std::min( sym_l_row, N ), signed_zero ) );
const size_t upper_limit = static_cast< size_t >( std::max( std::min( sym_u_row, N ), signed_zero ) );

assert( lower_limit <= upper_limit );

return std::make_pair( lower_limit, upper_limit );
}

} // namespace structures

/**
Expand Down