From c6bb684513f4b57a241434b0fc9ecfa29dd5481a Mon Sep 17 00:00:00 2001 From: Vladimir Dimic Date: Fri, 25 Nov 2022 10:18:36 +0100 Subject: [PATCH 01/12] Replication factor in Distribution become static constexpr --- include/alp/omp/storage.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/alp/omp/storage.hpp b/include/alp/omp/storage.hpp index 7c6e060dd..ea3ea32d4 100644 --- a/include/alp/omp/storage.hpp +++ b/include/alp/omp/storage.hpp @@ -150,7 +150,7 @@ namespace alp { const size_t Tr; const size_t Tc; /** Replication factor in thread-coordinate space */ - const size_t Rt; + static constexpr size_t Rt = config::REPLICATION_FACTOR_THREADS; /** The row and column dimensions of the global block grid */ const size_t Br; const size_t Bc; @@ -164,7 +164,6 @@ namespace alp { m( m ), n( n ), Tr( static_cast< size_t >( sqrt( num_threads ) ) ), Tc( num_threads / Tr ), - Rt( config::REPLICATION_FACTOR_THREADS ), Br( static_cast< size_t >( std::ceil( static_cast< double >( m ) / config::BLOCK_ROW_DIM ) ) ), Bc( static_cast< size_t >( std::ceil( static_cast< double >( n ) / config::BLOCK_COL_DIM ) ) ) { From f85db3f8ae458e14414e9330c48f4a737a45dd02 Mon Sep 17 00:00:00 2001 From: Vladimir Dimic Date: Fri, 25 Nov 2022 11:19:29 +0100 Subject: [PATCH 02/12] Take into account replication factor in thread grid layout --- include/alp/omp/matrix.hpp | 2 +- include/alp/omp/storage.hpp | 26 +++++++++++++++++--------- include/alp/omp/vector.hpp | 2 +- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/include/alp/omp/matrix.hpp b/include/alp/omp/matrix.hpp index 3ecd193f8..6ef3ab3b2 100644 --- a/include/alp/omp/matrix.hpp +++ b/include/alp/omp/matrix.hpp @@ -63,7 +63,7 @@ namespace alp { // get the container const auto &distribution = getAmf( source ).getDistribution(); - const size_t thread_id = tr * distribution.getThreadGridDims().second + tc; + const size_t thread_id = tr * distribution.getThreadGridDims().Tc + tc; const size_t block_id = br * distribution.getLocalBlockGridDims( tr, tc ).second + bc; auto &container = internal::getLocalContainer( internal::getContainer( source ), thread_id, block_id ); diff --git a/include/alp/omp/storage.hpp b/include/alp/omp/storage.hpp index ea3ea32d4..8f8f43cee 100644 --- a/include/alp/omp/storage.hpp +++ b/include/alp/omp/storage.hpp @@ -141,6 +141,14 @@ namespace alp { }; + struct ThreadGrid { + const size_t Tr; + const size_t Tc; + static constexpr size_t Rt = config::REPLICATION_FACTOR_THREADS; + + ThreadGrid( const size_t Tr, const size_t Tc ) : Tr( Tr ), Tc( Tc ) {} + }; + private: /** Row and column dimensions of the associated container */ @@ -162,13 +170,13 @@ namespace alp { const size_t num_threads ) : m( m ), n( n ), - Tr( static_cast< size_t >( sqrt( num_threads ) ) ), - Tc( num_threads / Tr ), + Tr( static_cast< size_t >( sqrt( num_threads/ Rt ) ) ), + Tc( num_threads / Rt / Tr ), Br( static_cast< size_t >( std::ceil( static_cast< double >( m ) / config::BLOCK_ROW_DIM ) ) ), Bc( static_cast< size_t >( std::ceil( static_cast< double >( n ) / config::BLOCK_COL_DIM ) ) ) { - if( num_threads != Tr * Tc ) { - std::cerr << "Error\n"; + if( num_threads != Tr * Tc * Rt ) { + std::cerr << "Warning: Provided number of threads cannot be factorized in a 3D grid.\n"; } } @@ -208,13 +216,13 @@ namespace alp { } /** Returns the dimensions of the thread grid */ - std::pair< size_t, size_t > getThreadGridDims() const { - return { Tr, Tc }; + const ThreadGrid getThreadGridDims() const { + return ThreadGrid( Tr, Tc ); } /** Returns the thread ID corresponding to the given thread coordinates. */ - size_t getThreadId( const size_t tr, const size_t tc ) const { - return tr * Tc + tc; + size_t getThreadId( const size_t tr, const size_t tc, const size_t rt ) const { + return rt * Tr * Tc + tr * Tc + tc; } /** Returns the total global amount of blocks */ @@ -402,7 +410,7 @@ namespace alp { const typename Distribution::GlobalCoord global( imf_r.map( i ), imf_c.map( j ) ); const typename Distribution::LocalCoord local = distribution.mapGlobalToLocal( global ); - const size_t thread = local.tr * distribution.getThreadGridDims().second + local.tc; + const size_t thread = local.tr * distribution.getThreadGridDims().Tc + local.tc; const size_t local_block = local.br * distribution.getLocalBlockGridDims( local.tr, local.tc ).second + local.bc; const size_t local_element = local.i * config::BLOCK_ROW_DIM + local.j; diff --git a/include/alp/omp/vector.hpp b/include/alp/omp/vector.hpp index 096e79c45..e774903ee 100644 --- a/include/alp/omp/vector.hpp +++ b/include/alp/omp/vector.hpp @@ -151,7 +151,7 @@ namespace alp { Vector( const Distribution &d, const size_t cap = 0 - ) : num_buffers( d.getThreadGridDims().first * d.getThreadGridDims().second ), + ) : num_buffers( d.getThreadGridDims().Tr * d.getThreadGridDims().Tc ), containers( num_buffers ), initialized( false ) { From d71cad17d648b9ae2682af66394247974eb3056e Mon Sep 17 00:00:00 2001 From: Vladimir Dimic Date: Fri, 25 Nov 2022 11:36:53 +0100 Subject: [PATCH 03/12] Consider full thread grid coordinates (including rt) in containers and operations --- include/alp/omp/io.hpp | 7 ++++--- include/alp/omp/matrix.hpp | 4 +--- include/alp/omp/storage.hpp | 17 +++++++++++++---- include/alp/omp/vector.hpp | 6 +++--- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/include/alp/omp/io.hpp b/include/alp/omp/io.hpp index 3c550f5a9..f5636f678 100644 --- a/include/alp/omp/io.hpp +++ b/include/alp/omp/io.hpp @@ -105,8 +105,9 @@ namespace alp { #pragma omp parallel for for( size_t thread = 0; thread < config::OMP::current_threads(); ++thread ) { - const size_t tr = d.getThreadCoords( thread ).first; - const size_t tc = d.getThreadCoords( thread ).second; + const size_t tr = d.getThreadCoords( thread ).tr; + const size_t tc = d.getThreadCoords( thread ).tc; + const size_t rt = d.getThreadCoords( thread ).rt; const auto block_grid_dims = d.getLocalBlockGridDims( tr, tc ); RC local_rc = SUCCESS; @@ -115,7 +116,7 @@ namespace alp { for( size_t bc = 0; bc < block_grid_dims.second; ++bc ) { // Get a sequential matrix view over the block - auto refC = internal::get_view( C, tr, tc, 1 /* rt */, br, bc ); + auto refC = internal::get_view( C, tr, tc, rt, br, bc ); // Construct a sequential Scalar container from the input Scalar Scalar< InputType, InputStructure, config::default_sequential_backend > ref_val( *val ); diff --git a/include/alp/omp/matrix.hpp b/include/alp/omp/matrix.hpp index 6ef3ab3b2..f995c4a33 100644 --- a/include/alp/omp/matrix.hpp +++ b/include/alp/omp/matrix.hpp @@ -59,11 +59,9 @@ namespace alp { >::template change_backend< config::default_sequential_backend >::type get_view( SourceMatrix &source, const size_t tr, const size_t tc, const size_t rt, const size_t br, const size_t bc ) { - (void) rt; - // get the container const auto &distribution = getAmf( source ).getDistribution(); - const size_t thread_id = tr * distribution.getThreadGridDims().Tc + tc; + const size_t thread_id = distribution.getThreadId( tr, tc, rt ); const size_t block_id = br * distribution.getLocalBlockGridDims( tr, tc ).second + bc; auto &container = internal::getLocalContainer( internal::getContainer( source ), thread_id, block_id ); diff --git a/include/alp/omp/storage.hpp b/include/alp/omp/storage.hpp index 8f8f43cee..3c58d3623 100644 --- a/include/alp/omp/storage.hpp +++ b/include/alp/omp/storage.hpp @@ -149,6 +149,14 @@ namespace alp { ThreadGrid( const size_t Tr, const size_t Tc ) : Tr( Tr ), Tc( Tc ) {} }; + struct ThreadCoords { + const size_t tr; + const size_t tc; + const size_t rt; + + ThreadCoords( const size_t tr, const size_t tc, const size_t rt ) : tr( tr ), tc( tc ), rt( rt ) {} + }; + private: /** Row and column dimensions of the associated container */ @@ -283,10 +291,11 @@ namespace alp { return block_coord_1D * getBlockSize(); } - std::pair< size_t, size_t > getThreadCoords( const size_t thread_id ) const { - const size_t tr = thread_id / Tc; - const size_t tc = thread_id % Tc; - return { tr, tc }; + ThreadCoords getThreadCoords( const size_t thread_id ) const { + const size_t rt = thread_id / ( Tr * Tc ); + const size_t tr = ( thread_id % ( Tr * Tc ) ) / Tc; + const size_t tc = ( thread_id % ( Tr * Tc ) ) % Tc; + return { tr, tc, rt }; } }; diff --git a/include/alp/omp/vector.hpp b/include/alp/omp/vector.hpp index e774903ee..00e7e2104 100644 --- a/include/alp/omp/vector.hpp +++ b/include/alp/omp/vector.hpp @@ -151,7 +151,7 @@ namespace alp { Vector( const Distribution &d, const size_t cap = 0 - ) : num_buffers( d.getThreadGridDims().Tr * d.getThreadGridDims().Tc ), + ) : num_buffers( d.getThreadGridDims().Tr * d.getThreadGridDims().Tc * d.getThreadGridDims().Rt ), containers( num_buffers ), initialized( false ) { @@ -169,8 +169,8 @@ namespace alp { #pragma omp parallel for for( size_t thread = 0; thread < config::OMP::current_threads(); ++thread ) { - const size_t tr = d.getThreadCoords( thread ).first; - const size_t tc = d.getThreadCoords( thread ).second; + const size_t tr = d.getThreadCoords( thread ).tr; + const size_t tc = d.getThreadCoords( thread ).tc; const auto block_grid_dims = d.getLocalBlockGridDims( tr, tc ); // Assuming that all blocks are of the same size From ac0f9dbc4fb7d87b837d667454c7e81719e32783 Mon Sep 17 00:00:00 2001 From: Vladimir Dimic Date: Fri, 25 Nov 2022 11:37:47 +0100 Subject: [PATCH 04/12] Add missing comments and fix a mistake in another comment --- include/alp/omp/storage.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/alp/omp/storage.hpp b/include/alp/omp/storage.hpp index 3c58d3623..0f8893c92 100644 --- a/include/alp/omp/storage.hpp +++ b/include/alp/omp/storage.hpp @@ -218,6 +218,11 @@ namespace alp { ); } + /** + * Maps coordinates from local to global space. + * + * \todo Add implementation + */ GlobalCoord mapLocalToGlobal( const LocalCoord &l ) const { (void) l; return GlobalCoord( 0, 0 ); @@ -240,7 +245,7 @@ namespace alp { /** Returns the dimensions of the block grid associated to the given thread */ std::pair< size_t, size_t > getLocalBlockGridDims( const size_t tr, const size_t tc ) const { - // The LHS of the + operand covers the case + // The RHS of the + operand covers the case // when the last block of threads is not full const size_t blocks_r = Br / Tr + ( tr < Br % Tr ? 1 : 0 ); const size_t blocks_c = Bc / Tc + ( tc < Bc % Tc ? 1 : 0 ); From 3fdaf51f4d7c802f01099c2bb07ca999146fe3cd Mon Sep 17 00:00:00 2001 From: Vladimir Dimic Date: Fri, 25 Nov 2022 11:43:22 +0100 Subject: [PATCH 05/12] Avoid calling distribution functions multiple times --- include/alp/omp/io.hpp | 8 +++----- include/alp/omp/vector.hpp | 9 ++++----- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/include/alp/omp/io.hpp b/include/alp/omp/io.hpp index f5636f678..f8eba9952 100644 --- a/include/alp/omp/io.hpp +++ b/include/alp/omp/io.hpp @@ -105,10 +105,8 @@ namespace alp { #pragma omp parallel for for( size_t thread = 0; thread < config::OMP::current_threads(); ++thread ) { - const size_t tr = d.getThreadCoords( thread ).tr; - const size_t tc = d.getThreadCoords( thread ).tc; - const size_t rt = d.getThreadCoords( thread ).rt; - const auto block_grid_dims = d.getLocalBlockGridDims( tr, tc ); + const auto t_coords = d.getThreadCoords( thread ); + const auto block_grid_dims = d.getLocalBlockGridDims( t_coords.tr, t_coords.tc ); RC local_rc = SUCCESS; @@ -116,7 +114,7 @@ namespace alp { for( size_t bc = 0; bc < block_grid_dims.second; ++bc ) { // Get a sequential matrix view over the block - auto refC = internal::get_view( C, tr, tc, rt, br, bc ); + auto refC = internal::get_view( C, t_coords.tr, t_coords.tc, t_coords.rt, br, bc ); // Construct a sequential Scalar container from the input Scalar Scalar< InputType, InputStructure, config::default_sequential_backend > ref_val( *val ); diff --git a/include/alp/omp/vector.hpp b/include/alp/omp/vector.hpp index 00e7e2104..ca23229a5 100644 --- a/include/alp/omp/vector.hpp +++ b/include/alp/omp/vector.hpp @@ -169,9 +169,8 @@ namespace alp { #pragma omp parallel for for( size_t thread = 0; thread < config::OMP::current_threads(); ++thread ) { - const size_t tr = d.getThreadCoords( thread ).tr; - const size_t tc = d.getThreadCoords( thread ).tc; - const auto block_grid_dims = d.getLocalBlockGridDims( tr, tc ); + const auto t_coords = d.getThreadCoords( thread ); + const auto block_grid_dims = d.getLocalBlockGridDims( t_coords.tr, t_coords.tc ); // Assuming that all blocks are of the same size const size_t alloc_size = block_grid_dims.first * block_grid_dims.second * d.getBlockSize(); @@ -182,7 +181,7 @@ namespace alp { if( thread != config::OMP::current_thread_ID() ) { std::cout << "Warning: thread != OMP::current_thread_id()\n"; } - std::cout << "Thread with global coordinates tr = " << tr << " tc = " << tc + std::cout << "Thread with global coordinates tr = " << t_coords.tr << " tc = " << t_coords.tc << " on OpenMP thread " << config::OMP::current_thread_ID() << " allocating buffer of " << alloc_size << " elements " << " holding " << block_grid_dims.first << " x " << block_grid_dims.second << " blocks.\n"; @@ -202,7 +201,7 @@ namespace alp { // Populate the array of internal container wrappers for( size_t br = 0; br < block_grid_dims.first; ++br ) { for( size_t bc = 0; bc < block_grid_dims.second; ++bc ) { - const size_t offset = d.getBlocksOffset( tr, tc, br, bc ); + const size_t offset = d.getBlocksOffset( t_coords.tr, t_coords.tc, br, bc ); containers[ thread ].emplace_back( &( buffers[ thread ][ offset ] ), d.getBlockSize() ); } } From b1be7f9c672afc48f2f0ad051ce76a9b5ba99183 Mon Sep 17 00:00:00 2001 From: Vladimir Dimic Date: Fri, 25 Nov 2022 15:25:46 +0100 Subject: [PATCH 06/12] Pass thread coordinates using ThreadCoords object --- include/alp/omp/io.hpp | 4 ++-- include/alp/omp/matrix.hpp | 7 ++++--- include/alp/omp/storage.hpp | 16 ++++++++-------- include/alp/omp/vector.hpp | 4 ++-- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/include/alp/omp/io.hpp b/include/alp/omp/io.hpp index f8eba9952..688ea9995 100644 --- a/include/alp/omp/io.hpp +++ b/include/alp/omp/io.hpp @@ -106,7 +106,7 @@ namespace alp { #pragma omp parallel for for( size_t thread = 0; thread < config::OMP::current_threads(); ++thread ) { const auto t_coords = d.getThreadCoords( thread ); - const auto block_grid_dims = d.getLocalBlockGridDims( t_coords.tr, t_coords.tc ); + const auto block_grid_dims = d.getLocalBlockGridDims( t_coords ); RC local_rc = SUCCESS; @@ -114,7 +114,7 @@ namespace alp { for( size_t bc = 0; bc < block_grid_dims.second; ++bc ) { // Get a sequential matrix view over the block - auto refC = internal::get_view( C, t_coords.tr, t_coords.tc, t_coords.rt, br, bc ); + auto refC = internal::get_view( C, t_coords, br, bc ); // Construct a sequential Scalar container from the input Scalar Scalar< InputType, InputStructure, config::default_sequential_backend > ref_val( *val ); diff --git a/include/alp/omp/matrix.hpp b/include/alp/omp/matrix.hpp index f995c4a33..21fb0413f 100644 --- a/include/alp/omp/matrix.hpp +++ b/include/alp/omp/matrix.hpp @@ -50,6 +50,7 @@ namespace alp { template< enum view::Views target_view = view::original, typename SourceMatrix, + typename ThreadCoords, std::enable_if_t< is_matrix< SourceMatrix >::value > * = nullptr @@ -57,12 +58,12 @@ namespace alp { typename internal::new_container_type_from< typename SourceMatrix::template view_type< view::gather >::type >::template change_backend< config::default_sequential_backend >::type - get_view( SourceMatrix &source, const size_t tr, const size_t tc, const size_t rt, const size_t br, const size_t bc ) { + get_view( SourceMatrix &source, const ThreadCoords t, const size_t br, const size_t bc ) { // get the container const auto &distribution = getAmf( source ).getDistribution(); - const size_t thread_id = distribution.getThreadId( tr, tc, rt ); - const size_t block_id = br * distribution.getLocalBlockGridDims( tr, tc ).second + bc; + const size_t thread_id = distribution.getThreadId( t ); + const size_t block_id = br * distribution.getLocalBlockGridDims( t ).second + bc; auto &container = internal::getLocalContainer( internal::getContainer( source ), thread_id, block_id ); // make an AMF diff --git a/include/alp/omp/storage.hpp b/include/alp/omp/storage.hpp index 0f8893c92..eada60f5c 100644 --- a/include/alp/omp/storage.hpp +++ b/include/alp/omp/storage.hpp @@ -234,8 +234,8 @@ namespace alp { } /** Returns the thread ID corresponding to the given thread coordinates. */ - size_t getThreadId( const size_t tr, const size_t tc, const size_t rt ) const { - return rt * Tr * Tc + tr * Tc + tc; + size_t getThreadId( const ThreadCoords t ) const { + return t.rt * Tr * Tc + t.tr * Tc + t.tc; } /** Returns the total global amount of blocks */ @@ -244,11 +244,11 @@ namespace alp { } /** Returns the dimensions of the block grid associated to the given thread */ - std::pair< size_t, size_t > getLocalBlockGridDims( const size_t tr, const size_t tc ) const { + std::pair< size_t, size_t > getLocalBlockGridDims( const ThreadCoords t ) const { // The RHS of the + operand covers the case // when the last block of threads is not full - const size_t blocks_r = Br / Tr + ( tr < Br % Tr ? 1 : 0 ); - const size_t blocks_c = Bc / Tc + ( tc < Bc % Tc ? 1 : 0 ); + const size_t blocks_r = Br / Tr + ( t.tr < Br % Tr ? 1 : 0 ); + const size_t blocks_c = Bc / Tc + ( t.tc < Bc % Tc ? 1 : 0 ); return { blocks_r, blocks_c }; } @@ -290,9 +290,9 @@ namespace alp { } /** For a given block, returns its offset from the beginning of the buffer in which it is stored */ - size_t getBlocksOffset( const size_t tr, const size_t tc, const size_t br, const size_t bc ) const { + size_t getBlocksOffset( const ThreadCoords t, const size_t br, const size_t bc ) const { // The offset is calculated as the sum of sizes of all previous blocks - const size_t block_coord_1D = br * getLocalBlockGridDims( tr, tc ).second + bc; + const size_t block_coord_1D = br * getLocalBlockGridDims( t ).second + bc; return block_coord_1D * getBlockSize(); } @@ -426,7 +426,7 @@ namespace alp { const size_t thread = local.tr * distribution.getThreadGridDims().Tc + local.tc; - const size_t local_block = local.br * distribution.getLocalBlockGridDims( local.tr, local.tc ).second + local.bc; + const size_t local_block = local.br * distribution.getLocalBlockGridDims( { local.tr, local.tc, local.rt } ).second + local.bc; const size_t local_element = local.i * config::BLOCK_ROW_DIM + local.j; return storage_index_type( thread, local_block, local_element ); diff --git a/include/alp/omp/vector.hpp b/include/alp/omp/vector.hpp index ca23229a5..69c60afda 100644 --- a/include/alp/omp/vector.hpp +++ b/include/alp/omp/vector.hpp @@ -170,7 +170,7 @@ namespace alp { #pragma omp parallel for for( size_t thread = 0; thread < config::OMP::current_threads(); ++thread ) { const auto t_coords = d.getThreadCoords( thread ); - const auto block_grid_dims = d.getLocalBlockGridDims( t_coords.tr, t_coords.tc ); + const auto block_grid_dims = d.getLocalBlockGridDims( t_coords ); // Assuming that all blocks are of the same size const size_t alloc_size = block_grid_dims.first * block_grid_dims.second * d.getBlockSize(); @@ -201,7 +201,7 @@ namespace alp { // Populate the array of internal container wrappers for( size_t br = 0; br < block_grid_dims.first; ++br ) { for( size_t bc = 0; bc < block_grid_dims.second; ++bc ) { - const size_t offset = d.getBlocksOffset( t_coords.tr, t_coords.tc, br, bc ); + const size_t offset = d.getBlocksOffset( t_coords, br, bc ); containers[ thread ].emplace_back( &( buffers[ thread ][ offset ] ), d.getBlockSize() ); } } From fee7041fb52f0d3956009bea574634af749f888f Mon Sep 17 00:00:00 2001 From: Vladimir Dimic Date: Fri, 25 Nov 2022 15:29:50 +0100 Subject: [PATCH 07/12] Compute number of threads within the distribution --- include/alp/omp/storage.hpp | 4 ++++ include/alp/omp/vector.hpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/alp/omp/storage.hpp b/include/alp/omp/storage.hpp index eada60f5c..791096fdf 100644 --- a/include/alp/omp/storage.hpp +++ b/include/alp/omp/storage.hpp @@ -238,6 +238,10 @@ namespace alp { return t.rt * Tr * Tc + t.tr * Tc + t.tc; } + size_t getNumberOfThreads() const { + return Tr * Tc * Rt; + } + /** Returns the total global amount of blocks */ std::pair< size_t, size_t > getGlobalBlockGridDims() const { return { Br, Bc }; diff --git a/include/alp/omp/vector.hpp b/include/alp/omp/vector.hpp index 69c60afda..56d0829d2 100644 --- a/include/alp/omp/vector.hpp +++ b/include/alp/omp/vector.hpp @@ -151,7 +151,7 @@ namespace alp { Vector( const Distribution &d, const size_t cap = 0 - ) : num_buffers( d.getThreadGridDims().Tr * d.getThreadGridDims().Tc * d.getThreadGridDims().Rt ), + ) : num_buffers( d.getNumberOfThreads() ), containers( num_buffers ), initialized( false ) { From 292f234d00dbfd5b405c423a5822afe60e8edc44 Mon Sep 17 00:00:00 2001 From: Vladimir Dimic Date: Fri, 25 Nov 2022 15:40:42 +0100 Subject: [PATCH 08/12] Encapsulate thread coordinates within the local coordinates --- include/alp/omp/storage.hpp | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/include/alp/omp/storage.hpp b/include/alp/omp/storage.hpp index 791096fdf..fec0aa07e 100644 --- a/include/alp/omp/storage.hpp +++ b/include/alp/omp/storage.hpp @@ -74,6 +74,15 @@ namespace alp { public: + /** Type encapsulating thread coordinates within the thread grid. */ + struct ThreadCoords { + const size_t tr; + const size_t tc; + const size_t rt; + + ThreadCoords( const size_t tr, const size_t tc, const size_t rt ) : tr( tr ), tc( tc ), rt( rt ) {} + }; + /** Type encapsulating the global element coordinate. */ struct GlobalCoord { @@ -87,9 +96,7 @@ namespace alp { /** Type encapsulating the local element coordinate. */ struct LocalCoord { - const size_t tr; - const size_t tc; - const size_t rt; + const ThreadCoords t; const size_t br; const size_t bc; const size_t i; @@ -101,10 +108,13 @@ namespace alp { const size_t br, const size_t bc, const size_t i, const size_t j ) : - tr( tr ), tc( tc ), - rt( rt ), + t( tr, tc, rt ), br( br ), bc( bc ), i( i ), j( j ) {} + + const ThreadCoords &getThreadCoords() const { + return t; + } }; @@ -149,14 +159,6 @@ namespace alp { ThreadGrid( const size_t Tr, const size_t Tc ) : Tr( Tr ), Tc( Tc ) {} }; - struct ThreadCoords { - const size_t tr; - const size_t tc; - const size_t rt; - - ThreadCoords( const size_t tr, const size_t tc, const size_t rt ) : tr( tr ), tc( tc ), rt( rt ) {} - }; - private: /** Row and column dimensions of the associated container */ @@ -428,9 +430,8 @@ namespace alp { const typename Distribution::GlobalCoord global( imf_r.map( i ), imf_c.map( j ) ); const typename Distribution::LocalCoord local = distribution.mapGlobalToLocal( global ); - const size_t thread = local.tr * distribution.getThreadGridDims().Tc + local.tc; - - const size_t local_block = local.br * distribution.getLocalBlockGridDims( { local.tr, local.tc, local.rt } ).second + local.bc; + const size_t thread = distribution.getThreadId( local.getThreadCoords() ); + const size_t local_block = local.br * distribution.getLocalBlockGridDims( local.getThreadCoords() ).second + local.bc; const size_t local_element = local.i * config::BLOCK_ROW_DIM + local.j; return storage_index_type( thread, local_block, local_element ); From e5a09ef8b7d3cb5c248b470f146db25256c18c4e Mon Sep 17 00:00:00 2001 From: Vladimir Dimic Date: Fri, 25 Nov 2022 15:41:53 +0100 Subject: [PATCH 09/12] Remove thread-grid related structures and getters since they are no longer needed --- include/alp/omp/storage.hpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/include/alp/omp/storage.hpp b/include/alp/omp/storage.hpp index fec0aa07e..e0a0deb5c 100644 --- a/include/alp/omp/storage.hpp +++ b/include/alp/omp/storage.hpp @@ -151,14 +151,6 @@ namespace alp { }; - struct ThreadGrid { - const size_t Tr; - const size_t Tc; - static constexpr size_t Rt = config::REPLICATION_FACTOR_THREADS; - - ThreadGrid( const size_t Tr, const size_t Tc ) : Tr( Tr ), Tc( Tc ) {} - }; - private: /** Row and column dimensions of the associated container */ @@ -230,11 +222,6 @@ namespace alp { return GlobalCoord( 0, 0 ); } - /** Returns the dimensions of the thread grid */ - const ThreadGrid getThreadGridDims() const { - return ThreadGrid( Tr, Tc ); - } - /** Returns the thread ID corresponding to the given thread coordinates. */ size_t getThreadId( const ThreadCoords t ) const { return t.rt * Tr * Tc + t.tr * Tc + t.tc; From d2381348aef64987fbe3b9008fdaa74415c631fd Mon Sep 17 00:00:00 2001 From: Vladimir Dimic Date: Fri, 25 Nov 2022 15:47:35 +0100 Subject: [PATCH 10/12] Explain the reason behind hard-coded rt value --- include/alp/omp/storage.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/alp/omp/storage.hpp b/include/alp/omp/storage.hpp index e0a0deb5c..ed80f6f4d 100644 --- a/include/alp/omp/storage.hpp +++ b/include/alp/omp/storage.hpp @@ -206,7 +206,7 @@ namespace alp { return LocalCoord( tr, tc, - 0, // Rt + 0, // Rt always maps to the front layer local_br, local_bc, local_i, local_j ); From f8fcfed8a0e7d8379dbaab45e759f542962ad1f4 Mon Sep 17 00:00:00 2001 From: Vladimir Dimic Date: Fri, 25 Nov 2022 16:33:49 +0100 Subject: [PATCH 11/12] Calculate block id inside distribution --- include/alp/omp/matrix.hpp | 2 +- include/alp/omp/storage.hpp | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/alp/omp/matrix.hpp b/include/alp/omp/matrix.hpp index 21fb0413f..9b57f5d8b 100644 --- a/include/alp/omp/matrix.hpp +++ b/include/alp/omp/matrix.hpp @@ -63,7 +63,7 @@ namespace alp { // get the container const auto &distribution = getAmf( source ).getDistribution(); const size_t thread_id = distribution.getThreadId( t ); - const size_t block_id = br * distribution.getLocalBlockGridDims( t ).second + bc; + const size_t block_id = distribution.getLocalBlockId( t, br, bc ); auto &container = internal::getLocalContainer( internal::getContainer( source ), thread_id, block_id ); // make an AMF diff --git a/include/alp/omp/storage.hpp b/include/alp/omp/storage.hpp index ed80f6f4d..2aef7e54f 100644 --- a/include/alp/omp/storage.hpp +++ b/include/alp/omp/storage.hpp @@ -257,6 +257,14 @@ namespace alp { return global_coords.first * Bc + global_coords.second; } + size_t getLocalBlockId( const LocalCoord &local ) const { + return local.br * getLocalBlockGridDims( local.getThreadCoords() ).second + local.bc; + } + + size_t getLocalBlockId( const ThreadCoords &t, const size_t br, const size_t bc ) const { + return br * getLocalBlockGridDims( t ).second + bc; + } + /** * Returns the dimensions of the block given by the block id */ @@ -418,7 +426,7 @@ namespace alp { const typename Distribution::LocalCoord local = distribution.mapGlobalToLocal( global ); const size_t thread = distribution.getThreadId( local.getThreadCoords() ); - const size_t local_block = local.br * distribution.getLocalBlockGridDims( local.getThreadCoords() ).second + local.bc; + const size_t local_block = distribution.getLocalBlockId( local ); const size_t local_element = local.i * config::BLOCK_ROW_DIM + local.j; return storage_index_type( thread, local_block, local_element ); From 700a6011c6a3c0e9b9d91965b0414a4084f70ec8 Mon Sep 17 00:00:00 2001 From: Vladimir Dimic Date: Fri, 25 Nov 2022 16:35:53 +0100 Subject: [PATCH 12/12] Rename distribution to a more descriptive name --- include/alp/omp/io.hpp | 2 +- include/alp/omp/storage.hpp | 12 ++++++------ include/alp/omp/vector.hpp | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/alp/omp/io.hpp b/include/alp/omp/io.hpp index 688ea9995..01fcdebc8 100644 --- a/include/alp/omp/io.hpp +++ b/include/alp/omp/io.hpp @@ -99,7 +99,7 @@ namespace alp { return SUCCESS; } - const Distribution &d = internal::getAmf( C ).getDistribution(); + const Distribution_2_5D &d = internal::getAmf( C ).getDistribution(); RC rc = SUCCESS; diff --git a/include/alp/omp/storage.hpp b/include/alp/omp/storage.hpp index 2aef7e54f..fb24da0bf 100644 --- a/include/alp/omp/storage.hpp +++ b/include/alp/omp/storage.hpp @@ -70,7 +70,7 @@ namespace alp { * among threads. * */ - class Distribution { + class Distribution_2_5D { public: @@ -167,7 +167,7 @@ namespace alp { public: - Distribution( + Distribution_2_5D( const size_t m, const size_t n, const size_t num_threads ) : @@ -356,7 +356,7 @@ namespace alp { */ const size_t num_threads; - const Distribution distribution; + const Distribution_2_5D distribution; AMF( ImfR imf_r, @@ -382,7 +382,7 @@ namespace alp { std::cout << "Entering OMP AMF move constructor\n"; } - const Distribution &getDistribution() const { + const Distribution_2_5D &getDistribution() const { return distribution; } @@ -422,8 +422,8 @@ namespace alp { storage_index_type getStorageIndex( const size_t i, const size_t j, const size_t s, const size_t P ) const { (void) s; (void) P; - const typename Distribution::GlobalCoord global( imf_r.map( i ), imf_c.map( j ) ); - const typename Distribution::LocalCoord local = distribution.mapGlobalToLocal( global ); + const typename Distribution_2_5D::GlobalCoord global( imf_r.map( i ), imf_c.map( j ) ); + const typename Distribution_2_5D::LocalCoord local = distribution.mapGlobalToLocal( global ); const size_t thread = distribution.getThreadId( local.getThreadCoords() ); const size_t local_block = distribution.getLocalBlockId( local ); diff --git a/include/alp/omp/vector.hpp b/include/alp/omp/vector.hpp index 56d0829d2..8fc712b7a 100644 --- a/include/alp/omp/vector.hpp +++ b/include/alp/omp/vector.hpp @@ -149,7 +149,7 @@ namespace alp { * code sections. */ Vector( - const Distribution &d, + const Distribution_2_5D &d, const size_t cap = 0 ) : num_buffers( d.getNumberOfThreads() ), containers( num_buffers ),