Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions include/alp/omp/io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,23 +99,22 @@ namespace alp {
return SUCCESS;
}

const Distribution &d = internal::getAmf( C ).getDistribution();
const Distribution_2_5D &d = internal::getAmf( C ).getDistribution();

RC rc = SUCCESS;

#pragma omp parallel for
for( size_t thread = 0; thread < config::OMP::current_threads(); ++thread ) {
const size_t tr = d.getThreadCoords( thread ).first;
const size_t tc = d.getThreadCoords( thread ).second;
const auto block_grid_dims = d.getLocalBlockGridDims( tr, tc );
const auto t_coords = d.getThreadCoords( thread );
const auto block_grid_dims = d.getLocalBlockGridDims( t_coords );

RC local_rc = SUCCESS;

for( size_t br = 0; br < block_grid_dims.first; ++br ) {
for( size_t bc = 0; bc < block_grid_dims.second; ++bc ) {

// Get a sequential matrix view over the block
auto refC = internal::get_view( C, tr, tc, 1 /* rt */, br, bc );
auto refC = internal::get_view( C, t_coords, br, bc );

// Construct a sequential Scalar container from the input Scalar
Scalar< InputType, InputStructure, config::default_sequential_backend > ref_val( *val );
Expand Down
9 changes: 4 additions & 5 deletions include/alp/omp/matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,20 @@ namespace alp {
template<
enum view::Views target_view = view::original,
typename SourceMatrix,
typename ThreadCoords,
std::enable_if_t<
is_matrix< SourceMatrix >::value
> * = nullptr
>
typename internal::new_container_type_from<
typename SourceMatrix::template view_type< view::gather >::type
>::template change_backend< config::default_sequential_backend >::type
get_view( SourceMatrix &source, const size_t tr, const size_t tc, const size_t rt, const size_t br, const size_t bc ) {

(void) rt;
get_view( SourceMatrix &source, const ThreadCoords t, const size_t br, const size_t bc ) {

// get the container
const auto &distribution = getAmf( source ).getDistribution();
const size_t thread_id = tr * distribution.getThreadGridDims().second + tc;
const size_t block_id = br * distribution.getLocalBlockGridDims( tr, tc ).second + bc;
const size_t thread_id = distribution.getThreadId( t );
const size_t block_id = distribution.getLocalBlockId( t, br, bc );
auto &container = internal::getLocalContainer( internal::getContainer( source ), thread_id, block_id );

// make an AMF
Expand Down
95 changes: 58 additions & 37 deletions include/alp/omp/storage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,19 @@ namespace alp {
* among threads.
*
*/
class Distribution {
class Distribution_2_5D {

public:

/** Type encapsulating thread coordinates within the thread grid. */
struct ThreadCoords {
const size_t tr;
const size_t tc;
const size_t rt;

ThreadCoords( const size_t tr, const size_t tc, const size_t rt ) : tr( tr ), tc( tc ), rt( rt ) {}
};

/** Type encapsulating the global element coordinate. */
struct GlobalCoord {

Expand All @@ -87,9 +96,7 @@ namespace alp {
/** Type encapsulating the local element coordinate. */
struct LocalCoord {

const size_t tr;
const size_t tc;
const size_t rt;
const ThreadCoords t;
const size_t br;
const size_t bc;
const size_t i;
Expand All @@ -101,10 +108,13 @@ namespace alp {
const size_t br, const size_t bc,
const size_t i, const size_t j
) :
tr( tr ), tc( tc ),
rt( rt ),
t( tr, tc, rt ),
br( br ), bc( bc ),
i( i ), j( j ) {}

const ThreadCoords &getThreadCoords() const {
return t;
}

};

Expand Down Expand Up @@ -150,26 +160,25 @@ namespace alp {
const size_t Tr;
const size_t Tc;
/** Replication factor in thread-coordinate space */
const size_t Rt;
static constexpr size_t Rt = config::REPLICATION_FACTOR_THREADS;
/** The row and column dimensions of the global block grid */
const size_t Br;
const size_t Bc;

public:

Distribution(
Distribution_2_5D(
const size_t m, const size_t n,
const size_t num_threads
) :
m( m ), n( n ),
Tr( static_cast< size_t >( sqrt( num_threads ) ) ),
Tc( num_threads / Tr ),
Rt( config::REPLICATION_FACTOR_THREADS ),
Tr( static_cast< size_t >( sqrt( num_threads/ Rt ) ) ),
Tc( num_threads / Rt / Tr ),
Br( static_cast< size_t >( std::ceil( static_cast< double >( m ) / config::BLOCK_ROW_DIM ) ) ),
Bc( static_cast< size_t >( std::ceil( static_cast< double >( n ) / config::BLOCK_COL_DIM ) ) ) {

if( num_threads != Tr * Tc ) {
std::cerr << "Error\n";
if( num_threads != Tr * Tc * Rt ) {
std::cerr << "Warning: Provided number of threads cannot be factorized in a 3D grid.\n";
}
}

Expand Down Expand Up @@ -197,25 +206,29 @@ namespace alp {

return LocalCoord(
tr, tc,
0, // Rt
0, // Rt always maps to the front layer
local_br, local_bc,
local_i, local_j
);
}

/**
* Maps coordinates from local to global space.
*
* \todo Add implementation
*/
GlobalCoord mapLocalToGlobal( const LocalCoord &l ) const {
(void) l;
return GlobalCoord( 0, 0 );
}

/** Returns the dimensions of the thread grid */
std::pair< size_t, size_t > getThreadGridDims() const {
return { Tr, Tc };
/** Returns the thread ID corresponding to the given thread coordinates. */
size_t getThreadId( const ThreadCoords t ) const {
return t.rt * Tr * Tc + t.tr * Tc + t.tc;
}

/** Returns the thread ID corresponding to the given thread coordinates. */
size_t getThreadId( const size_t tr, const size_t tc ) const {
return tr * Tc + tc;
size_t getNumberOfThreads() const {
return Tr * Tc * Rt;
}

/** Returns the total global amount of blocks */
Expand All @@ -224,11 +237,11 @@ namespace alp {
}

/** Returns the dimensions of the block grid associated to the given thread */
std::pair< size_t, size_t > getLocalBlockGridDims( const size_t tr, const size_t tc ) const {
// The LHS of the + operand covers the case
std::pair< size_t, size_t > getLocalBlockGridDims( const ThreadCoords t ) const {
// The RHS of the + operand covers the case
// when the last block of threads is not full
const size_t blocks_r = Br / Tr + ( tr < Br % Tr ? 1 : 0 );
const size_t blocks_c = Bc / Tc + ( tc < Bc % Tc ? 1 : 0 );
const size_t blocks_r = Br / Tr + ( t.tr < Br % Tr ? 1 : 0 );
const size_t blocks_c = Bc / Tc + ( t.tc < Bc % Tc ? 1 : 0 );
return { blocks_r, blocks_c };
}

Expand All @@ -244,6 +257,14 @@ namespace alp {
return global_coords.first * Bc + global_coords.second;
}

size_t getLocalBlockId( const LocalCoord &local ) const {
return local.br * getLocalBlockGridDims( local.getThreadCoords() ).second + local.bc;
}

size_t getLocalBlockId( const ThreadCoords &t, const size_t br, const size_t bc ) const {
return br * getLocalBlockGridDims( t ).second + bc;
}

/**
* Returns the dimensions of the block given by the block id
*/
Expand All @@ -270,16 +291,17 @@ namespace alp {
}

/** For a given block, returns its offset from the beginning of the buffer in which it is stored */
size_t getBlocksOffset( const size_t tr, const size_t tc, const size_t br, const size_t bc ) const {
size_t getBlocksOffset( const ThreadCoords t, const size_t br, const size_t bc ) const {
// The offset is calculated as the sum of sizes of all previous blocks
const size_t block_coord_1D = br * getLocalBlockGridDims( tr, tc ).second + bc;
const size_t block_coord_1D = br * getLocalBlockGridDims( t ).second + bc;
return block_coord_1D * getBlockSize();
}

std::pair< size_t, size_t > getThreadCoords( const size_t thread_id ) const {
const size_t tr = thread_id / Tc;
const size_t tc = thread_id % Tc;
return { tr, tc };
ThreadCoords getThreadCoords( const size_t thread_id ) const {
const size_t rt = thread_id / ( Tr * Tc );
const size_t tr = ( thread_id % ( Tr * Tc ) ) / Tc;
const size_t tc = ( thread_id % ( Tr * Tc ) ) % Tc;
return { tr, tc, rt };
}
};

Expand Down Expand Up @@ -334,7 +356,7 @@ namespace alp {
*/
const size_t num_threads;

const Distribution distribution;
const Distribution_2_5D distribution;

AMF(
ImfR imf_r,
Expand All @@ -360,7 +382,7 @@ namespace alp {
std::cout << "Entering OMP AMF move constructor\n";
}

const Distribution &getDistribution() const {
const Distribution_2_5D &getDistribution() const {
return distribution;
}

Expand Down Expand Up @@ -400,12 +422,11 @@ namespace alp {
storage_index_type getStorageIndex( const size_t i, const size_t j, const size_t s, const size_t P ) const {
(void) s;
(void) P;
const typename Distribution::GlobalCoord global( imf_r.map( i ), imf_c.map( j ) );
const typename Distribution::LocalCoord local = distribution.mapGlobalToLocal( global );

const size_t thread = local.tr * distribution.getThreadGridDims().second + local.tc;
const typename Distribution_2_5D::GlobalCoord global( imf_r.map( i ), imf_c.map( j ) );
const typename Distribution_2_5D::LocalCoord local = distribution.mapGlobalToLocal( global );

const size_t local_block = local.br * distribution.getLocalBlockGridDims( local.tr, local.tc ).second + local.bc;
const size_t thread = distribution.getThreadId( local.getThreadCoords() );
const size_t local_block = distribution.getLocalBlockId( local );
const size_t local_element = local.i * config::BLOCK_ROW_DIM + local.j;

return storage_index_type( thread, local_block, local_element );
Expand Down
13 changes: 6 additions & 7 deletions include/alp/omp/vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,9 @@ namespace alp {
* code sections.
*/
Vector(
const Distribution &d,
const Distribution_2_5D &d,
const size_t cap = 0
) : num_buffers( d.getThreadGridDims().first * d.getThreadGridDims().second ),
) : num_buffers( d.getNumberOfThreads() ),
containers( num_buffers ),
initialized( false ) {

Expand All @@ -169,9 +169,8 @@ namespace alp {

#pragma omp parallel for
for( size_t thread = 0; thread < config::OMP::current_threads(); ++thread ) {
const size_t tr = d.getThreadCoords( thread ).first;
const size_t tc = d.getThreadCoords( thread ).second;
const auto block_grid_dims = d.getLocalBlockGridDims( tr, tc );
const auto t_coords = d.getThreadCoords( thread );
const auto block_grid_dims = d.getLocalBlockGridDims( t_coords );

// Assuming that all blocks are of the same size
const size_t alloc_size = block_grid_dims.first * block_grid_dims.second * d.getBlockSize();
Expand All @@ -182,7 +181,7 @@ namespace alp {
if( thread != config::OMP::current_thread_ID() ) {
std::cout << "Warning: thread != OMP::current_thread_id()\n";
}
std::cout << "Thread with global coordinates tr = " << tr << " tc = " << tc
std::cout << "Thread with global coordinates tr = " << t_coords.tr << " tc = " << t_coords.tc
<< " on OpenMP thread " << config::OMP::current_thread_ID()
<< " allocating buffer of " << alloc_size << " elements "
<< " holding " << block_grid_dims.first << " x " << block_grid_dims.second << " blocks.\n";
Expand All @@ -202,7 +201,7 @@ namespace alp {
// Populate the array of internal container wrappers
for( size_t br = 0; br < block_grid_dims.first; ++br ) {
for( size_t bc = 0; bc < block_grid_dims.second; ++bc ) {
const size_t offset = d.getBlocksOffset( tr, tc, br, bc );
const size_t offset = d.getBlocksOffset( t_coords, br, bc );
containers[ thread ].emplace_back( &( buffers[ thread ][ offset ] ), d.getBlockSize() );
}
}
Expand Down