From c1f5ab67174a2b075f8f335647b600fb3a578216 Mon Sep 17 00:00:00 2001 From: hayk Date: Sat, 30 Nov 2024 19:21:08 -0500 Subject: [PATCH 01/52] benchmark flow --- CMakeLists.txt | 3 +++ benchmark/benchmark.cpp | 35 +++++++++++++++++++++++++++++++++++ cmake/benchmark.cmake | 24 ++++++++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 benchmark/benchmark.cpp create mode 100644 cmake/benchmark.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ee00d1b4..2618a0cb2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,6 +134,9 @@ link_libraries(${DEPENDENCIES}) if(TESTS) # ---------------------------------- Tests --------------------------------- # include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/tests.cmake) +elseif(BENCHMARK) + # ------------------------------ Benchmark --------------------------------- # + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/benchmark.cmake) else() # ----------------------------------- GUI ---------------------------------- # if(${gui}) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp new file mode 100644 index 000000000..b5a7631c4 --- /dev/null +++ b/benchmark/benchmark.cpp @@ -0,0 +1,35 @@ +#include "enums.h" +#include "global.h" + +#include "framework/containers/particles.h" + +auto main(int argc, char* argv[]) -> int { + ntt::GlobalInitialize(argc, argv); + // auto species = ntt::ParticleSpecies(1u, + // "test_e", + // 1.0f, + // 1.0f, + // 10000000, + // ntt::PrtlPusher::BORIS, + // false, + // ntt::Cooling::NONE); + ntt::GlobalFinalize(); + // * @param global_ndomains total number of domains + // * @param global_decomposition decomposition of the global domain + // * @param global_ncells number of cells in each dimension + // * @param global_extent physical extent of the global domain + // * @param global_flds_bc boundary conditions for fields + // * @param global_prtl_bc boundary conditions for particles + // * @param metric_params parameters for the metric + // * @param species_params parameters for the particle species + // Metadomain(unsigned int, + // const std::vector&, + // const std::vector&, + // const boundaries_t&, + // const boundaries_t&, + // const boundaries_t&, + // const std::map&, + // const std::vector&); + + return 0; +} diff --git a/cmake/benchmark.cmake b/cmake/benchmark.cmake new file mode 100644 index 000000000..d2e8ca47c --- /dev/null +++ b/cmake/benchmark.cmake @@ -0,0 +1,24 @@ +set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) + +add_subdirectory(${SRC_DIR}/global ${CMAKE_CURRENT_BINARY_DIR}/global) +add_subdirectory(${SRC_DIR}/metrics ${CMAKE_CURRENT_BINARY_DIR}/metrics) +add_subdirectory(${SRC_DIR}/kernels ${CMAKE_CURRENT_BINARY_DIR}/kernels) +add_subdirectory(${SRC_DIR}/archetypes ${CMAKE_CURRENT_BINARY_DIR}/archetypes) +add_subdirectory(${SRC_DIR}/framework ${CMAKE_CURRENT_BINARY_DIR}/framework) + +if(${output}) + add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) + add_subdirectory(${SRC_DIR}/checkpoint ${CMAKE_CURRENT_BINARY_DIR}/checkpoint) +endif() + +set(exec benchmark.xc) +set(src ${CMAKE_CURRENT_SOURCE_DIR}/benchmark/benchmark.cpp) + +add_executable(${exec} ${src}) + +set(libs ntt_global ntt_metrics ntt_kernels ntt_archetypes ntt_framework) +if(${output}) + list(APPEND libs ntt_output ntt_checkpoint) +endif() +add_dependencies(${exec} ${libs}) +target_link_libraries(${exec} PRIVATE ${libs} stdc++fs) From 553bcae7d6fff7980815b1f510a5fef7271dd3db Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Mon, 2 Dec 2024 16:37:34 -0500 Subject: [PATCH 02/52] added new particle array --- src/framework/containers/particles.cpp | 9 +++++++++ src/framework/containers/particles.h | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index f0c64c4ee..c7f8f3b7c 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -47,6 +47,9 @@ namespace ntt { tag = array_t { label + "_tag", maxnpart }; tag_h = Kokkos::create_mirror_view(tag); + tag_offset = array_t { label + "_tag_offset", ntags() }; + tag_offset_h = Kokkos::create_mirror_view(tag_offset); + for (unsigned short n { 0 }; n < npld; ++n) { pld.push_back(array_t("pld", maxnpart)); pld_h.push_back(Kokkos::create_mirror_view(pld[n])); @@ -98,7 +101,13 @@ namespace ntt { std::vector npart_tag_vec; for (std::size_t t { 0 }; t < ntags(); ++t) { npart_tag_vec.push_back(npart_tag_host(t)); + tag_offset_h(t) = (t > 0) ? npart_tag_vec[t - 1] : 0; + } + for (std::size_t t { 0 }; t < ntags(); ++t) { + tag_offset_h(t) += (t > 0) ? tag_offset_h(t - 1) : 0; } + // Copy to device + Kokkos::deep_copy(tag_offset, tag_offset_h); return npart_tag_vec; } diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index b4831b64a..7496db78c 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -60,6 +60,8 @@ namespace ntt { array_t dx1_prev, dx2_prev, dx3_prev; // Array to tag the particles array_t tag; + // Array to store the cumulative number of particles per tag + array_t tag_offset; // Array to store the particle load std::vector> pld; // phi coordinate (for axisymmetry) @@ -72,6 +74,7 @@ namespace ntt { array_mirror_t weight_h; array_mirror_t phi_h; array_mirror_t tag_h; + array_mirror_t tag_offset_h; std::vector> pld_h; // for empty allocation @@ -178,6 +181,7 @@ namespace ntt { footprint += sizeof(prtldx_t) * dx2_prev.extent(0); footprint += sizeof(prtldx_t) * dx3_prev.extent(0); footprint += sizeof(short) * tag.extent(0); + footprint += sizeof(int) * tag_offset.extent(0); for (auto& p : pld) { footprint += sizeof(real_t) * p.extent(0); } From cffc563f664aeaceebe2e611945fb6852f9052e8 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Mon, 2 Dec 2024 16:37:56 -0500 Subject: [PATCH 03/52] added new sendbuffer function --- src/framework/domain/comm_mpi.hpp | 144 +++++++++++++++++ src/framework/domain/communications.cpp | 195 ++++++++++++++++++++++++ src/framework/domain/metadomain.h | 1 + 3 files changed, 340 insertions(+) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 63dd8271a..2067ab9a4 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -316,6 +316,7 @@ namespace comm { } } + void ParticleSendRecvCount(int send_rank, int recv_rank, const std::size_t& send_count, @@ -441,6 +442,149 @@ namespace comm { return recv_count; } + + template + void CommunicateParticleQuantityBuffer( array_t& arr, + int send_rank, + int recv_rank, + const range_tuple_t& send_slice, + const range_tuple_t& recv_slice, + Kokkos::View indices_to_send, + Kokkos::View indices_to_allocate) { + + array_t buffer( "buffer", indices_to_send.size() + + indices_to_allocate.size()); + // Populate the buffer for particle array + Kokkos::parallel_for( + "PopulateBuffer", + Kokkos::RangePolicy(0, indices_to_send.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_send(i); + buffer(i) = arr(idx); + }); + CommunicateParticleQuantity(buffer, send_rank, recv_rank, send_slice, recv_slice); + // Populate from buffer to the particle array + Kokkos::parallel_for( + "PopulateFromBuffer", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + arr(idx) = buffer(indices_to_send.size() + i); + }); + return; + } + + + template + void CommunicateParticlesBuffer(Particles& species, + Kokkos::View indices_to_send, + Kokkos::View indices_to_allocate, + int send_rank, + int recv_rank, + std::vector shifts_in_x){ + if ((send_rank < 0) && (recv_rank < 0)) { + raise::Error("No send or recv in SendRecvParticlesBuffered", HERE); + } + // Construct send and receive slice for the buffer + auto send_slice = range_tuple_t({ 0, indices_to_send.size() }); + auto recv_slice = range_tuple_t({ indices_to_send.size(), indices_to_send.size() + + indices_to_allocate.size() }); + // Send and receive the particles + CommunicateParticleQuantityBuffer(species.i1, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx1, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i1_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx1_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + if constexpr (D == Dim::_2D || D == Dim::_3D) { + CommunicateParticleQuantityBuffer(species.i2, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx2, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i2_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx2_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + } + if constexpr (D == Dim::_3D) { + CommunicateParticleQuantityBuffer(species.i3, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx3, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i3_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx3_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + } + CommunicateParticleQuantityBuffer(species.ux1, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.ux2, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.ux3, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.weight, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + if constexpr (D == Dim::_2D and C != Coord::Cart) { + CommunicateParticleQuantityBuffer(species.phi, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + } + for (auto p { 0 }; p < species.npld(); ++p) { + CommunicateParticleQuantityBuffer(species.pld[p], send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + } + // Set the tag for the received particles to be alive and perform the necessary displacements + auto& this_tag = species.tag; + + if constexpr (D == Dim::_1D) + { + const auto shift_in_x1 = shifts_in_x[0]; + auto& this_i1 = species.i1; + auto& this_i1_prev = species.i1_prev; + Kokkos::parallel_for( + "SetTagAlive", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + this_tag(idx) = static_cast(ParticleTag::alive); + this_i1(idx) += shift_in_x1; + this_i1_prev(idx) += shift_in_x1; + }); + } + + else if constexpr (D == Dim::_2D) + { + const auto shift_in_x1 = shifts_in_x[0]; + const auto shift_in_x2 = shifts_in_x[1]; + auto& this_i1 = species.i1; + auto& this_i2 = species.i2; + auto& this_i1_prev = species.i1_prev; + auto& this_i2_prev = species.i2_prev; + Kokkos::parallel_for( + "SetTagAlive", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + this_tag(idx) = static_cast(ParticleTag::alive); + this_i1(idx) += shift_in_x1; + this_i2(idx) += shift_in_x2; + this_i1_prev(idx) += shift_in_x1; + this_i2_prev(idx) += shift_in_x2; + }); + } + + else if constexpr (D == Dim::_3D) + { + const auto shift_in_x1 = shifts_in_x[0]; + const auto shift_in_x2 = shifts_in_x[1]; + const auto shift_in_x3 = shifts_in_x[2]; + auto& this_i1 = species.i1; + auto& this_i2 = species.i2; + auto& this_i3 = species.i3; + auto& this_i1_prev = species.i1_prev; + auto& this_i2_prev = species.i2_prev; + auto& this_i3_prev = species.i3_prev; + Kokkos::parallel_for( + "SetTagAlive", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + this_tag(idx) = static_cast(ParticleTag::alive); + this_i1(idx) += shift_in_x1; + this_i2(idx) += shift_in_x2; + this_i3(idx) += shift_in_x3; + this_i1_prev(idx) += shift_in_x1; + this_i2_prev(idx) += shift_in_x2; + this_i3_prev(idx) += shift_in_x3; + }); + } + return; + } + + } // namespace comm #endif // FRAMEWORK_DOMAIN_COMM_MPI_HPP diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 60524eedd..f484f664d 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -646,6 +646,201 @@ namespace ntt { } } + +/* + New function to communicate particles using a buffer +*/ +template + void Metadomain::CommunicateParticlesBuffer(Domain& domain, + timer::Timers* timers) { + raise::ErrorIf(timers == nullptr, + "Timers not passed when Comm::Prtl called", + HERE); + logger::Checkpoint("Communicating particles\n", HERE); + for (auto& species : domain.species) { + const auto npart_per_tag_arr = species.npart_per_tag(); + const auto tag_offset = species.tag_offset_h; + auto index_last = tag_offset[tag_offset.size() - 1] + + npart_per_tag_arr[npart_per_tag_arr.size() - 1]; + std::vector send_ranks, send_inds; + std::vector recv_ranks, recv_inds; + // at this point particles should already by tagged in the pusher +#if defined(MPI_ENABLED) + timers->start("Communications_sendrecv"); + // array that holds the number of particles to be received per tag + std::vector npart_per_tag_arr_recv(npart_per_tag_arr.size(), 0); + std::size_t total_recv_count = 0; + const std::size_t total_send_count = species.npart() - npart_per_tag_arr[ParticleTag::alive]; + for (auto& direction : dir::Directions::all) { + const auto [send_params, + recv_params] = GetSendRecvParams(this, domain, direction, true); + const auto [send_indrank, send_slice] = send_params; + const auto [recv_indrank, recv_slice] = recv_params; + const auto [send_ind, send_rank] = send_indrank; + const auto [recv_ind, recv_rank] = recv_indrank; + if (send_rank < 0 and recv_rank < 0) { + continue; + } + const auto send_dir_tag = mpi::PrtlSendTag::dir2tag(direction); + const auto nsend = npart_per_tag_arr[send_dir_tag]; + std::size_t nrecv = 0; + // Get the receive count + send_ranks.push_back(send_rank); + recv_ranks.push_back(recv_rank); + send_inds.push_back(send_ind); + recv_inds.push_back(recv_ind); + comm::ParticleSendRecvCount(send_rank, + recv_rank, + nsend, + nrecv); + total_recv_count += nrecv; + npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)] = nrecv; + } + timers->stop("Communications_sendrecv"); + raise::FatalIf((index_last + total_recv_count) >= species.maxnpart(), + "Too many particles to receive (cannot fit into maxptl)", + HERE); + // Now we know the number of particles to be sent and received per direction + /* permute vector contains the indices of the tags to send and receive + in the order of the directions + E.g., consider the following tag array + [ 0, 0, 3, 0, 1,...] + Then, permute vector will look something like + [0, 1, 3, ..., 2, ..., 4, ... ] + |<--------- >| |<----->| |<----->| .... + tag=0 ct tag=1 ct tag=3 ct + (dead) (alive) (tag1) ... + */ + timers->start("PermuteVector"); + auto& this_tag = species.tag; + auto& this_tag_offset = species.tag_offset; + Kokkos::View permute_vector("permute_vector", species.npart()); + Kokkos::View current_offset("current_offset", species.ntags()); + Kokkos::parallel_for( + "PermuteVector", + species.npart(), + Lambda(const std::size_t p) { + auto current_tag = this_tag(p); + auto idx_permute_vec = this_tag_offset(current_tag) + current_offset(current_tag); + Kokkos::atomic_fetch_add(¤t_offset(current_tag), 1); + permute_vector(idx_permute_vec) = static_cast(p); + }); + timers->stop("PermuteVector"); + + // allocation_vector(p) assigns the pth received particle + // to the pth hole in the array, or after npart() if p > sent+dead count. + Kokkos::View allocation_vector("allocation_vector", total_recv_count); + auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); + std::size_t n_alive = npart_per_tag_arr[ParticleTag::alive]; + std::size_t n_dead = npart_per_tag_arr[ParticleTag::dead]; + std::size_t n_holes = species.npart() - n_alive; + + timers->start("AllocationVector"); + Kokkos::parallel_for( + "AllocationVector", + total_recv_count, + Lambda(const std::size_t p) { + // Case: recevied particle count less than dead particle count -> replace dead particles + if (p < n_dead){ + allocation_vector(p) = permute_vector(p); + } + // Case: received particle count > dead particle count but < sent particle count -> replace + // sent particles + else if (p <= n_holes){ + allocation_vector(p) = permute_vector(n_alive + p); + } + // Case: received particle count exceeds sent + dead particles -> append at the end + else { + allocation_vector(p) = static_cast(index_last + (p - n_holes)); + } + }); + Kokkos::deep_copy(allocation_vector_h, allocation_vector); + timers->stop("AllocationVector"); + + std::size_t count_recv = 0; + std::size_t iteration = 0; + for (auto& direction : dir::Directions::all) { + // Get the coordinate shifts in xi + std::vector shifts_in_x; + auto recv_ind = recv_inds[iteration]; + if constexpr (D == Dim::_1D) { + int shift_in_x1 { 0 }; + if ((-direction)[0] == -1) { + shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); + } else if ((-direction)[0] == 1) { + shift_in_x1 = domain.mesh.n_active(in::x1); + } + shifts_in_x.push_back(shift_in_x1); + } + else if constexpr (D == Dim::_2D) { + int shift_in_x1 { 0 }, shift_in_x2 { 0 }; + if ((-direction)[0] == -1) { + shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); + } else if ((-direction)[0] == 1) { + shift_in_x1 = domain.mesh.n_active()[0]; + } + if ((-direction)[1] == -1) { + shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); + } else if ((-direction)[1] == 1) { + shift_in_x2 = domain.mesh.n_active(in::x2); + } + shifts_in_x.push_back(shift_in_x1); + shifts_in_x.push_back(shift_in_x2); + } + else if constexpr (D == Dim::_3D) { + int shift_in_x1 { 0 }, shift_in_x2 { 0 }, shift_in_x3 { 0 }; + if ((-direction)[0] == -1) { + shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); + } else if ((-direction)[0] == 1) { + shift_in_x1 = domain.mesh.n_active(in::x1); + } + if ((-direction)[1] == -1) { + shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); + } else if ((-direction)[1] == 1) { + shift_in_x2 = domain.mesh.n_active(in::x2); + } + if ((-direction)[2] == -1) { + shift_in_x3 = -subdomain(recv_ind).mesh.n_active(in::x3); + } else if ((-direction)[2] == 1) { + shift_in_x3 = domain.mesh.n_active(in::x3); + } + shifts_in_x.push_back(shift_in_x1); + shifts_in_x.push_back(shift_in_x2); + shifts_in_x.push_back(shift_in_x3); + } + + auto range_permute = std::make_pair(static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)]), + static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)] + + npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); + + auto range_allocate = std::make_pair(static_cast(allocation_vector_h(count_recv)), + static_cast(allocation_vector_h(count_recv) + + npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)])); + + // contains the indices of the holes where the received particles will be placed + auto indices_to_allocate = Kokkos::subview(allocation_vector, range_allocate); + // contains the indices of all particles of a given tag = mpi::PrtlSendTag::dir2tag(direction) + auto indices_to_send = Kokkos::subview(permute_vector, range_permute); + + // Main function that sends the particles and receives the arrays + auto send_rank = send_ranks[iteration]; + auto recv_rank = recv_ranks[iteration]; + comm::CommunicateParticlesBuffer( species, + indices_to_send, + indices_to_allocate, + send_rank, + recv_rank, + shifts_in_x); + count_recv += npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)]; + iteration++; + } + species.set_npart(index_last + std::max(total_recv_count, total_send_count) - total_send_count); +#endif + } + } + + + template struct Metadomain>; template struct Metadomain>; template struct Metadomain>; diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 7b3042b5b..e30bc8e97 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -89,6 +89,7 @@ namespace ntt { void CommunicateFields(Domain&, CommTags); void SynchronizeFields(Domain&, CommTags, const range_tuple_t& = { 0, 0 }); void CommunicateParticles(Domain&, timer::Timers*); + void CommunicateParticlesBuffer(Domain&, timer::Timers*); /** * @param global_ndomains total number of domains From 3c55ee7e4fc6255a2dcc772ab75b067df11281b1 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 3 Dec 2024 15:57:16 -0500 Subject: [PATCH 04/52] fixed bug in CommunicateParticlesBuffer and created metadomain object in benchmark.cpp --- benchmark/benchmark.cpp | 183 ++++++++++++++++++++---- src/framework/domain/comm_mpi.hpp | 15 +- src/framework/domain/communications.cpp | 44 +++--- 3 files changed, 193 insertions(+), 49 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index b5a7631c4..5ab9124f0 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -1,35 +1,170 @@ #include "enums.h" #include "global.h" +#include "utils/timer.h" +#include "utils/error.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" #include "framework/containers/particles.h" +#include "metrics/metric_base.h" +#include "metrics/minkowski.h" +#include "arch/mpi_tags.h" + +#include + +/* + Test to check the performance of the new particle allocation scheme + - Create a metadomain object + - Create particle array + - Initialize the position and velocities of the particles + - Set a large timestep (see where that is set) + - Make a loop of N iterations, where the positions of particles is sorted + and pushed + - Check if the particle tags are correct after each iteration + - Compute the time taken for best of N iterations for the communication + */ + + +/* + Structure of the 2D domain + ---------------------------------- (3,3) + | | | | + | | | | + | | | | + | | | | + ---------------------------------- (3,2) + | | | | + | | | | + | | | | + | | | | + ---------------------------------- (3,1) + | | | | + | | | | + | | | | + | | | | + ---------------------------------- + (0,0) (1,0) (2,0) (3,0) +*/ + +/* + Function to check the tags of a domain object to make sure that + all the tags are alive. If the tags are not alive then the function + prints the tag count for each of the particles along with the rank + of the domain. +*/ +template +void CheckDomainTags(Domain& domain, + timer::Timers* timers) +{ + bool all_alive = true; + bool no_dead_particles = true; + bool tag_check = true; + for (auto& species : domain.species) { + std::cout << "Checking domain tags for species: " << species.label << std::endl; + const auto npart_per_tag_arr = species.npart_per_tag(); + const auto npart = species.npart(); + if (npart != npart_per_tag_arr[ParticleTag::alive]){ + all_alive = false; + } + for (std::size_t i = 0; i < npart_per_tag_arr.size(); ++i) { + if (i == ParticleTag::alive) { + continue; + } + if (npart_per_tag_arr[i] != 0) { + no_dead_particles = false; + } + } + auto this_tag = species.tag; + Kokkos::parallel_for("CheckTags", + npart, Lambda(const std::size_t i) { + if (this_tag(i) != ParticleTag::alive) { + tag_check = false; + } + }); + raise::ErrorIf(all_alive == false, + "Array contains particles with tags other than alive", + HERE); + raise::ErrorIf(no_dead_particles == false, + "Array contains dead particles", + HERE); + raise::ErrorIf(tag_check == false, + "Tag check failed", + HERE); + } + return; +} + + auto main(int argc, char* argv[]) -> int { + std::cout << "Constructing the domain" << std::endl; ntt::GlobalInitialize(argc, argv); - // auto species = ntt::ParticleSpecies(1u, - // "test_e", - // 1.0f, - // 1.0f, - // 10000000, - // ntt::PrtlPusher::BORIS, - // false, - // ntt::Cooling::NONE); + // Create a Metadomain object + const unsigned int ndomains = 9; + const std::vector global_decomposition = {-1, -1}; + const std::vector global_ncells = {32, 32}; + const boundaries_t global_extent = {{0.0, 0.0}, {3.0, 3.0}}; + const boundaries_t global_flds_bc = {{FldsBC::PERIODIC, FldsBC::PERIODIC}, {FldsBC::PERIODIC, FldsBC::PERIODIC}}; + const boundaries_t global_prtl_bc = {{PrtlBC::PERIODIC, PrtlBC::PERIODIC}, {PrtlBC::PERIODIC, PrtlBC::PERIODIC}}; + const std::map metric_params = {}; + const int npart = 10000; + auto species = ntt::Particles(1u, + "test_e", + 1.0f, + 1.0f, + npart, + ntt::PrtlPusher::BORIS, + false, + ntt::Cooling::NONE); + auto metadomain = Metadomain> + ( + ndomains, + global_decomposition, + global_ncells, + global_extent, + global_flds_bc, + global_prtl_bc, + metric_params, + {species} + ); + // Get the pointers to all the subdomains + const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + + // Set the positions of the particles in each domain + for (auto& species : local_domain->species) + { + auto tag = ParticleTag::alive; + auto &this_i1 = species.i1; + auto &this_i2 = species.i2; + auto &this_i3 = species.i3; + auto &this_dx1 = species.dx1; + auto &this_dx2 = species.dx2; + auto &this_dx3 = species.dx3; + auto &this_ux1 = species.ux1; + auto &this_ux2 = species.ux2; + auto &this_ux3 = species.ux3; + auto &this_tag = species.tag; + Kokkos::parallel_for("SetPositions", + species.npart(), Lambda(const std::size_t i) { + this_i1(i) = 1; + this_i2(i) = 1; + this_dx1(i) = 0.01; + this_dx2(i) = 0.01; + this_ux1(i) = 0.5; + this_ux2(i) = 0.5; + this_tag(i) = tag; + }); + } + + + // Print the number of particles per domain + std::cout << "Number of particles in domain " << local_subdomain_idx << ": " << local_domain->species[0].npart() << std::endl; + // Print the position of the 5 particles in the domain + ntt::GlobalFinalize(); - // * @param global_ndomains total number of domains - // * @param global_decomposition decomposition of the global domain - // * @param global_ncells number of cells in each dimension - // * @param global_extent physical extent of the global domain - // * @param global_flds_bc boundary conditions for fields - // * @param global_prtl_bc boundary conditions for particles - // * @param metric_params parameters for the metric - // * @param species_params parameters for the particle species - // Metadomain(unsigned int, - // const std::vector&, - // const std::vector&, - // const boundaries_t&, - // const boundaries_t&, - // const boundaries_t&, - // const std::map&, - // const std::vector&); + + std::cout << "Terminating" << std::endl; return 0; } diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 2067ab9a4..ed73302b2 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -452,13 +452,13 @@ namespace comm { Kokkos::View indices_to_send, Kokkos::View indices_to_allocate) { - array_t buffer( "buffer", indices_to_send.size() + - indices_to_allocate.size()); + array_t buffer( "buffer", indices_to_send.extent(0) + + indices_to_allocate.extent(0)); // Populate the buffer for particle array Kokkos::parallel_for( "PopulateBuffer", - Kokkos::RangePolicy(0, indices_to_send.size()), - KOKKOS_LAMBDA(const size_t i) { + indices_to_send.extent(0), + Lambda(const size_t i) { const auto idx = indices_to_send(i); buffer(i) = arr(idx); }); @@ -466,15 +466,14 @@ namespace comm { // Populate from buffer to the particle array Kokkos::parallel_for( "PopulateFromBuffer", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { + indices_to_allocate.extent(0), + Lambda(const size_t i) { const auto idx = indices_to_allocate(i); - arr(idx) = buffer(indices_to_send.size() + i); + arr(idx) = buffer(indices_to_send.extent(0) + i); }); return; } - template void CommunicateParticlesBuffer(Particles& species, Kokkos::View indices_to_send, diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index f484f664d..4ad29a327 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -660,7 +660,7 @@ template for (auto& species : domain.species) { const auto npart_per_tag_arr = species.npart_per_tag(); const auto tag_offset = species.tag_offset_h; - auto index_last = tag_offset[tag_offset.size() - 1] + + auto index_last = tag_offset[tag_offset.extent(0) - 1] + npart_per_tag_arr[npart_per_tag_arr.size() - 1]; std::vector send_ranks, send_inds; std::vector recv_ranks, recv_inds; @@ -668,9 +668,11 @@ template #if defined(MPI_ENABLED) timers->start("Communications_sendrecv"); // array that holds the number of particles to be received per tag - std::vector npart_per_tag_arr_recv(npart_per_tag_arr.size(), 0); + std::vector npart_per_tag_arr_recv(npart_per_tag_arr.size(), 0); std::size_t total_recv_count = 0; - const std::size_t total_send_count = species.npart() - npart_per_tag_arr[ParticleTag::alive]; + const std::size_t total_send_count = species.npart() - + npart_per_tag_arr[ParticleTag::alive] - + npart_per_tag_arr[ParticleTag::dead]; for (auto& direction : dir::Directions::all) { const auto [send_params, recv_params] = GetSendRecvParams(this, domain, direction, true); @@ -714,8 +716,10 @@ template timers->start("PermuteVector"); auto& this_tag = species.tag; auto& this_tag_offset = species.tag_offset; - Kokkos::View permute_vector("permute_vector", species.npart()); - Kokkos::View current_offset("current_offset", species.ntags()); + Kokkos::View permute_vector("permute_vector", species.npart()); + // Current offset is a helper array used to create permute vector + // It stores the number of particles of a given tag type stored during the loop + Kokkos::View current_offset("current_offset", species.ntags()); Kokkos::parallel_for( "PermuteVector", species.npart(), @@ -729,7 +733,7 @@ template // allocation_vector(p) assigns the pth received particle // to the pth hole in the array, or after npart() if p > sent+dead count. - Kokkos::View allocation_vector("allocation_vector", total_recv_count); + Kokkos::View allocation_vector("allocation_vector", total_recv_count); auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); std::size_t n_alive = npart_per_tag_arr[ParticleTag::alive]; std::size_t n_dead = npart_per_tag_arr[ParticleTag::dead]; @@ -759,7 +763,15 @@ template std::size_t count_recv = 0; std::size_t iteration = 0; + // Main loop over all direction where we send the data for (auto& direction : dir::Directions::all) { + // When nowhere to send and receive + auto send_rank = send_ranks[iteration]; + auto recv_rank = recv_ranks[iteration]; + + if (send_rank < 0 and recv_rank < 0) { + continue; + } // Get the coordinate shifts in xi std::vector shifts_in_x; auto recv_ind = recv_inds[iteration]; @@ -809,22 +821,20 @@ template shifts_in_x.push_back(shift_in_x3); } - auto range_permute = std::make_pair(static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)]), - static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)] + + // Tuple that contains the start and end indices of permtute_vec pointing to a given tag type = dir2tag(dir) + auto range_permute = std::make_pair(static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)]), + static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)] + npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); - - auto range_allocate = std::make_pair(static_cast(allocation_vector_h(count_recv)), - static_cast(allocation_vector_h(count_recv) + - npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)])); - - // contains the indices of the holes where the received particles will be placed - auto indices_to_allocate = Kokkos::subview(allocation_vector, range_allocate); + // Tuple that contains the start and end indices for allocation_vector pointing to a given tag type = dir2tag(dir) + auto range_allocate = std::make_pair(static_cast(count_recv), + static_cast(count_recv + + npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)])); // contains the indices of all particles of a given tag = mpi::PrtlSendTag::dir2tag(direction) auto indices_to_send = Kokkos::subview(permute_vector, range_permute); + // contains the indices of the holes where the received particles will be placed + auto indices_to_allocate = Kokkos::subview(allocation_vector, range_allocate); // Main function that sends the particles and receives the arrays - auto send_rank = send_ranks[iteration]; - auto recv_rank = recv_ranks[iteration]; comm::CommunicateParticlesBuffer( species, indices_to_send, indices_to_allocate, From 5f1fe4a46881b4b28262e7d439a0da15600b9324 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 3 Dec 2024 16:35:07 -0500 Subject: [PATCH 05/52] Printing nparticles per domain --- benchmark/benchmark.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 5ab9124f0..4a8923027 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -128,7 +128,11 @@ auto main(int argc, char* argv[]) -> int { {species} ); // Get the pointers to all the subdomains - const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + //int rank; + //MPI_Comm_rank(MPI_COMM_WORLD, &rank); + for (int i=0; i int { species.npart(), Lambda(const std::size_t i) { this_i1(i) = 1; this_i2(i) = 1; + this_i3(i) = 0; this_dx1(i) = 0.01; this_dx2(i) = 0.01; this_ux1(i) = 0.5; @@ -157,11 +162,17 @@ auto main(int argc, char* argv[]) -> int { }); } - + // Get and print the extent of each domain + std::cout << fmt::format("x1 extent {%.2f; %.2f} \n", + local_domain->mesh.extent(in::x1).first, + local_domain->mesh.extent(in::x1).second); + std::cout << fmt::format("x2 extent {%.2f; %.2f} \n", + local_domain->mesh.extent(in::x2).first, + local_domain->mesh.extent(in::x2).second); // Print the number of particles per domain std::cout << "Number of particles in domain " << local_subdomain_idx << ": " << local_domain->species[0].npart() << std::endl; // Print the position of the 5 particles in the domain - + } ntt::GlobalFinalize(); std::cout << "Terminating" << std::endl; From 5b14e3474ec4f9494db572cdaf22add4d2e4768f Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 3 Dec 2024 18:45:56 -0500 Subject: [PATCH 06/52] Printing particle count in domain --- benchmark/benchmark.cpp | 150 ++++++++++++++++++++++++++-------------- 1 file changed, 97 insertions(+), 53 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 4a8923027..5eebb4d2d 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -1,7 +1,6 @@ #include "enums.h" #include "global.h" -#include "utils/timer.h" #include "utils/error.h" #include "framework/domain/domain.h" #include "framework/domain/metadomain.h" @@ -11,6 +10,15 @@ #include "arch/mpi_tags.h" #include +#define TIMER_START(label) \ + Kokkos::fence(); \ + auto start_##label = std::chrono::high_resolution_clock::now(); + +#define TIMER_STOP(label) \ + Kokkos::fence(); \ + auto stop_##label = std::chrono::high_resolution_clock::now(); \ + auto duration_##label = std::chrono::duration_cast(stop_##label - start_##label).count(); \ + std::cout << "Timer [" #label "]: " << duration_##label << " microseconds" << std::endl; /* Test to check the performance of the new particle allocation scheme @@ -53,14 +61,12 @@ of the domain. */ template -void CheckDomainTags(Domain& domain, - timer::Timers* timers) +void CheckDomainTags(Domain& domain) { bool all_alive = true; bool no_dead_particles = true; - bool tag_check = true; for (auto& species : domain.species) { - std::cout << "Checking domain tags for species: " << species.label << std::endl; + std::cout << "Checking domain tags for species: " << species.label() << std::endl; const auto npart_per_tag_arr = species.npart_per_tag(); const auto npart = species.npart(); if (npart != npart_per_tag_arr[ParticleTag::alive]){ @@ -74,26 +80,33 @@ void CheckDomainTags(Domain& domain, no_dead_particles = false; } } - auto this_tag = species.tag; - Kokkos::parallel_for("CheckTags", - npart, Lambda(const std::size_t i) { - if (this_tag(i) != ParticleTag::alive) { - tag_check = false; - } - }); + raise::ErrorIf(all_alive == false, "Array contains particles with tags other than alive", HERE); raise::ErrorIf(no_dead_particles == false, "Array contains dead particles", HERE); - raise::ErrorIf(tag_check == false, - "Tag check failed", - HERE); + //raise::ErrorIf(tag_check_h(0) == false, + // "Tag check failed", + // HERE); } return; } +void InitializePositionsDomain(Domain>& domain) +{ + for (auto& species : domain.species) { + TIMER_START(Sorting_timer); + species.SortByTags(); + TIMER_STOP(Sorting_timer); + species.SyncHostDevice(); + std::cout << "Number of particles in domain: " << species.npart() << std::endl; + //std::cout << "Extent of i1" << species.i1.extent(0) << std::endl; + } + CheckDomainTags(domain); +} + auto main(int argc, char* argv[]) -> int { @@ -101,44 +114,23 @@ auto main(int argc, char* argv[]) -> int { ntt::GlobalInitialize(argc, argv); // Create a Metadomain object const unsigned int ndomains = 9; - const std::vector global_decomposition = {-1, -1}; + const std::vector global_decomposition = {{}}; const std::vector global_ncells = {32, 32}; const boundaries_t global_extent = {{0.0, 0.0}, {3.0, 3.0}}; const boundaries_t global_flds_bc = {{FldsBC::PERIODIC, FldsBC::PERIODIC}, {FldsBC::PERIODIC, FldsBC::PERIODIC}}; const boundaries_t global_prtl_bc = {{PrtlBC::PERIODIC, PrtlBC::PERIODIC}, {PrtlBC::PERIODIC, PrtlBC::PERIODIC}}; const std::map metric_params = {}; - const int npart = 10000; + const int maxnpart = 1000; auto species = ntt::Particles(1u, "test_e", 1.0f, 1.0f, - npart, + maxnpart, ntt::PrtlPusher::BORIS, false, ntt::Cooling::NONE); - auto metadomain = Metadomain> - ( - ndomains, - global_decomposition, - global_ncells, - global_extent, - global_flds_bc, - global_prtl_bc, - metric_params, - {species} - ); - // Get the pointers to all the subdomains - //int rank; - //MPI_Comm_rank(MPI_COMM_WORLD, &rank); - for (int i=0; ispecies) - { - auto tag = ParticleTag::alive; + species.set_npart(maxnpart); auto &this_i1 = species.i1; auto &this_i2 = species.i2; auto &this_i3 = species.i3; @@ -149,30 +141,82 @@ auto main(int argc, char* argv[]) -> int { auto &this_ux2 = species.ux2; auto &this_ux3 = species.ux3; auto &this_tag = species.tag; + + std::cout << "Species particle count is " << species.npart() << std::endl; Kokkos::parallel_for("SetPositions", species.npart(), Lambda(const std::size_t i) { this_i1(i) = 1; this_i2(i) = 1; - this_i3(i) = 0; + this_i3(i) = 1; this_dx1(i) = 0.01; this_dx2(i) = 0.01; - this_ux1(i) = 0.5; - this_ux2(i) = 0.5; - this_tag(i) = tag; + this_ux1(i) = 0.; + this_ux2(i) = 0.; + this_ux3(i) = 0.; + this_tag(i) = 1; }); - } + Kokkos::fence(); + std::cout << "Species set " << species.npart() << std::endl; + auto metadomain = Metadomain> + ( ndomains, + global_decomposition, + global_ncells, + global_extent, + global_flds_bc, + global_prtl_bc, + metric_params, + {species} + ); + + //metadomain.runOnLocalDomains([&](auto& loc_dom) { + // InitializePositionsDomain(loc_dom); + //}); + + // Get the pointer to the subdomain + //const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + //auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + + // Set the positions of the particles in each domain + //for (auto& species : local_domain->species) + //{ + // auto tag = ParticleTag::alive; + // auto &this_i1 = species.i1; + // auto &this_i2 = species.i2; + // auto &this_i3 = species.i3; + // auto &this_dx1 = species.dx1; + // auto &this_dx2 = species.dx2; + // auto &this_dx3 = species.dx3; + // auto &this_ux1 = species.ux1; + // auto &this_ux2 = species.ux2; + // auto &this_ux3 = species.ux3; + // auto &this_tag = species.tag; + // Kokkos::parallel_for("SetPositions", + // species.npart(), Lambda(const std::size_t i) { + // this_i1(i) = 1; + // this_i2(i) = 1; + // this_i3(i) = 0; + // this_dx1(i) = 0.01; + // this_dx2(i) = 0.01; + // this_ux1(i) = 0.5; + // this_ux2(i) = 0.5; + // this_tag(i) = tag; + // }); +// + //species.SortByTags(); + //species.SyncHostDevice(); + //} // Get and print the extent of each domain - std::cout << fmt::format("x1 extent {%.2f; %.2f} \n", - local_domain->mesh.extent(in::x1).first, - local_domain->mesh.extent(in::x1).second); - std::cout << fmt::format("x2 extent {%.2f; %.2f} \n", - local_domain->mesh.extent(in::x2).first, - local_domain->mesh.extent(in::x2).second); + //std::cout << fmt::format("x1 extent {%.2f; %.2f} \n", + // local_domain->mesh.extent(in::x1).first, + // local_domain->mesh.extent(in::x1).second); + //std::cout << fmt::format("x2 extent {%.2f; %.2f} \n", + // local_domain->mesh.extent(in::x2).first, + // local_domain->mesh.extent(in::x2).second); // Print the number of particles per domain - std::cout << "Number of particles in domain " << local_subdomain_idx << ": " << local_domain->species[0].npart() << std::endl; + //std::cout << "Number of particles in domain " << local_subdomain_idx << ": " << local_domain->species[0].npart() << std::endl; // Print the position of the 5 particles in the domain - } + ntt::GlobalFinalize(); std::cout << "Terminating" << std::endl; From 1b0993d7b475a0be005686cbd1516f13a7b762e5 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Fri, 6 Dec 2024 15:13:14 -0500 Subject: [PATCH 07/52] benchmark/benchmark.cpp fixed benchmark.cpp --- src/framework/containers/particles.cpp | 3 +++ src/framework/domain/comm_mpi.hpp | 2 -- src/framework/domain/communications.cpp | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index c7f8f3b7c..c97f8da2d 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -85,6 +85,9 @@ namespace ntt { auto this_tag = tag; array_t npart_tag("npart_tags", ntags()); + // Print tag_h array + auto tag_host = Kokkos::create_mirror_view(tag); + Kokkos::deep_copy(tag_host, tag); auto npart_tag_scatter = Kokkos::Experimental::create_scatter_view(npart_tag); Kokkos::parallel_for( "NpartPerTag", diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index ed73302b2..d29a5758b 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -363,12 +363,10 @@ namespace comm { recv_rank, send_slice.second - send_slice.first, recv_count); - raise::FatalIf((index_last + recv_count) >= species.maxnpart(), "Too many particles to receive (cannot fit into maxptl)", HERE); const auto recv_slice = range_tuple_t({ index_last, index_last + recv_count }); - CommunicateParticleQuantity(species.i1, send_rank, recv_rank, send_slice, recv_slice); CommunicateParticleQuantity(species.dx1, send_rank, recv_rank, send_slice, recv_slice); CommunicateParticleQuantity(species.i1_prev, diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 4ad29a327..cdd32aed6 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -631,7 +631,6 @@ namespace ntt { index_last += recv_count; species.set_npart(index_last); } - Kokkos::deep_copy( Kokkos::subview(species.tag, std::make_pair(send_pmin, send_pmax)), ParticleTag::dead); @@ -844,6 +843,7 @@ template count_recv += npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)]; iteration++; } + // If receive count is less than send count then make the tags of sent dead ? Ask Hayk species.set_npart(index_last + std::max(total_recv_count, total_send_count) - total_send_count); #endif } From 320292467f50dbd31ceb4036ab7a9489765b7420 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Thu, 12 Dec 2024 16:48:27 -0500 Subject: [PATCH 08/52] create mirror views for MPISendRecv in comm_mpi --- src/framework/domain/comm_mpi.hpp | 41 ++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index d29a5758b..33431cfe7 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -47,15 +47,19 @@ namespace comm { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); + raise::ErrorIf( (send_rank == rank && send_idx != idx) || (recv_rank == rank && recv_idx != idx), "Multiple-domain single-rank communication not yet implemented", HERE); + if ((send_idx == idx) and (recv_idx == idx)) { // trivial copy if sending to self and receiving from self + if (not additive) { + // simply filling the ghost cells if constexpr (D == Dim::_1D) { Kokkos::deep_copy(Kokkos::subview(fld, recv_slice[0], comps), @@ -65,6 +69,7 @@ namespace comm { Kokkos::subview(fld, recv_slice[0], recv_slice[1], comps), Kokkos::subview(fld, send_slice[0], send_slice[1], comps)); } else if constexpr (D == Dim::_3D) { + Kokkos::deep_copy( Kokkos::subview(fld, recv_slice[0], recv_slice[1], recv_slice[2], comps), Kokkos::subview(fld, send_slice[0], send_slice[1], send_slice[2], comps)); @@ -177,13 +182,19 @@ namespace comm { comps.second - comps.first); } } + + auto send_fld_h = Kokkos::create_mirror_view(send_fld); + auto recv_fld_h = Kokkos::create_mirror_view(recv_fld); + Kokkos::deep_copy(send_fld_h, send_fld); if (send_rank >= 0 && recv_rank >= 0) { - MPI_Sendrecv(send_fld.data(), + // Segfault here: print mpi params + // Create host views + MPI_Sendrecv(send_fld_h.data(), nsend, mpi::get_type(), send_rank, 0, - recv_fld.data(), + recv_fld_h.data(), nrecv, mpi::get_type(), recv_rank, @@ -191,14 +202,16 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if (send_rank >= 0) { - MPI_Send(send_fld.data(), + MPI_Send(send_fld_h.data(), nsend, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); + } else if (recv_rank >= 0) { - MPI_Recv(recv_fld.data(), + auto recv_fld_h = Kokkos::create_mirror_view(recv_fld); + MPI_Recv(recv_fld_h.data(), nrecv, mpi::get_type(), recv_rank, @@ -208,7 +221,10 @@ namespace comm { } else { raise::Error("CommunicateField called with negative ranks", HERE); } + Kokkos::deep_copy(recv_fld, recv_fld_h); + if (recv_rank >= 0) { + // !TODO: perhaps directly recv to the fld? if (not additive) { if constexpr (D == Dim::_1D) { @@ -282,16 +298,18 @@ namespace comm { int recv_rank, const range_tuple_t& send_slice, const range_tuple_t& recv_slice) { + auto array_h = Kokkos::create_mirror_view(arr); + Kokkos::deep_copy(array_h, arr); const std::size_t send_count = send_slice.second - send_slice.first; const std::size_t recv_count = recv_slice.second - recv_slice.first; if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { - MPI_Sendrecv(arr.data() + send_slice.first, + MPI_Sendrecv(array_h.data() + send_slice.first, send_count, mpi::get_type(), send_rank, 0, - arr.data() + recv_slice.first, + array_h.data() + recv_slice.first, recv_count, mpi::get_type(), recv_rank, @@ -299,14 +317,14 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { - MPI_Send(arr.data() + send_slice.first, + MPI_Send(array_h.data() + send_slice.first, send_count, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { - MPI_Recv(arr.data() + recv_slice.first, + MPI_Recv(array_h.data() + recv_slice.first, recv_count, mpi::get_type(), recv_rank, @@ -314,6 +332,7 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } + Kokkos::deep_copy(arr, array_h); } @@ -457,8 +476,7 @@ namespace comm { "PopulateBuffer", indices_to_send.extent(0), Lambda(const size_t i) { - const auto idx = indices_to_send(i); - buffer(i) = arr(idx); + buffer(i) = arr(indices_to_send(i)); }); CommunicateParticleQuantity(buffer, send_rank, recv_rank, send_slice, recv_slice); // Populate from buffer to the particle array @@ -466,8 +484,7 @@ namespace comm { "PopulateFromBuffer", indices_to_allocate.extent(0), Lambda(const size_t i) { - const auto idx = indices_to_allocate(i); - arr(idx) = buffer(indices_to_send.extent(0) + i); + arr(indices_to_allocate(i)) = buffer(indices_to_send.extent(0) + i); }); return; } From 831e7d9cc12e3f484e14c09e3362dfbf00049a39 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Thu, 12 Dec 2024 16:49:43 -0500 Subject: [PATCH 09/52] fixed function to time old and new communication routines --- benchmark/benchmark.cpp | 282 +++++++++++++++++----------------------- 1 file changed, 116 insertions(+), 166 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 5eebb4d2d..6bfe5c7c7 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -5,11 +5,11 @@ #include "framework/domain/domain.h" #include "framework/domain/metadomain.h" #include "framework/containers/particles.h" +#include "framework/domain/communications.cpp" #include "metrics/metric_base.h" #include "metrics/minkowski.h" -#include "arch/mpi_tags.h" - #include + #define TIMER_START(label) \ Kokkos::fence(); \ auto start_##label = std::chrono::high_resolution_clock::now(); @@ -22,105 +22,88 @@ /* Test to check the performance of the new particle allocation scheme - - Create a metadomain object - - Create particle array - - Initialize the position and velocities of the particles - - Set a large timestep (see where that is set) - - Make a loop of N iterations, where the positions of particles is sorted - and pushed - - Check if the particle tags are correct after each iteration + - Create a metadomain object main() + - Set npart + initialize tags InitializeParticleArrays() + - 'Push' the particles by randomly updating the tags PushParticles() + - Communicate particles to neighbors and time the communication - Compute the time taken for best of N iterations for the communication */ - -/* - Structure of the 2D domain - ---------------------------------- (3,3) - | | | | - | | | | - | | | | - | | | | - ---------------------------------- (3,2) - | | | | - | | | | - | | | | - | | | | - ---------------------------------- (3,1) - | | | | - | | | | - | | | | - | | | | - ---------------------------------- - (0,0) (1,0) (2,0) (3,0) -*/ - -/* - Function to check the tags of a domain object to make sure that - all the tags are alive. If the tags are not alive then the function - prints the tag count for each of the particles along with the rank - of the domain. -*/ +// Set npart and set the particle tags to alive template -void CheckDomainTags(Domain& domain) -{ - bool all_alive = true; - bool no_dead_particles = true; - for (auto& species : domain.species) { - std::cout << "Checking domain tags for species: " << species.label() << std::endl; - const auto npart_per_tag_arr = species.npart_per_tag(); - const auto npart = species.npart(); - if (npart != npart_per_tag_arr[ParticleTag::alive]){ - all_alive = false; - } - for (std::size_t i = 0; i < npart_per_tag_arr.size(); ++i) { - if (i == ParticleTag::alive) { - continue; - } - if (npart_per_tag_arr[i] != 0) { - no_dead_particles = false; +void InitializeParticleArrays(Domain &domain, const int npart){ + raise::ErrorIf(npart > domain.species[0].maxnpart(), + "Npart cannot be greater than maxnpart", HERE); + const auto nspecies = domain.species.size(); + for (int i_spec = 0; i_spec < nspecies; i_spec++) { + domain.species[i_spec].set_npart(npart); + domain.species[i_spec].SyncHostDevice(); + auto &this_tag = domain.species[i_spec].tag; + Kokkos::parallel_for( + "Initialize particles", + npart, + Lambda(const std::size_t i) + { + this_tag(i) = ParticleTag::alive; } - } - - raise::ErrorIf(all_alive == false, - "Array contains particles with tags other than alive", - HERE); - raise::ErrorIf(no_dead_particles == false, - "Array contains dead particles", - HERE); - //raise::ErrorIf(tag_check_h(0) == false, - // "Tag check failed", - // HERE); + ); } return; } -void InitializePositionsDomain(Domain>& domain) -{ - for (auto& species : domain.species) { - TIMER_START(Sorting_timer); - species.SortByTags(); - TIMER_STOP(Sorting_timer); - species.SyncHostDevice(); - std::cout << "Number of particles in domain: " << species.npart() << std::endl; - //std::cout << "Extent of i1" << species.i1.extent(0) << std::endl; +// Randomly reassign tags to particles for a fraction of particles +template +void PushParticles(Domain &domain, const double send_frac, + const int seed_ind, const int seed_tag){ + raise::ErrorIf(send_frac > 1.0, "send_frac cannot be greater than 1.0", HERE); + const auto nspecies = domain.species.size(); + for (int i_spec = 0; i_spec < nspecies; i_spec++) { + domain.species[i_spec].set_unsorted(); + const auto nparticles = domain.species[i_spec].npart(); + const auto nparticles_to_send = static_cast(send_frac * nparticles); + // Generate random indices to send + Kokkos::Random_XorShift64_Pool<> random_pool(seed_ind); + Kokkos::View indices_to_send("indices_to_send", nparticles_to_send); + Kokkos::fill_random(indices_to_send, random_pool, 0, nparticles); + // Generate random tags to send + Kokkos::Random_XorShift64_Pool<> random_pool_tag(seed_tag); + Kokkos::View tags_to_send("tags_to_send", nparticles_to_send); + Kokkos::fill_random(tags_to_send, random_pool_tag, 0, domain.species[i_spec].ntags()); + auto &this_tag = domain.species[i_spec].tag; + Kokkos::parallel_for( + "Push particles", + nparticles_to_send, + Lambda(const std::size_t i) + { + auto prtl_to_send = indices_to_send(i); + auto tag_to_send = tags_to_send(i); + this_tag(prtl_to_send) = tag_to_send; + } + ); + domain.species[i_spec].npart_per_tag(); + domain.species[i_spec].SyncHostDevice(); } - CheckDomainTags(domain); + return; } - - auto main(int argc, char* argv[]) -> int { std::cout << "Constructing the domain" << std::endl; ntt::GlobalInitialize(argc, argv); // Create a Metadomain object - const unsigned int ndomains = 9; - const std::vector global_decomposition = {{}}; - const std::vector global_ncells = {32, 32}; - const boundaries_t global_extent = {{0.0, 0.0}, {3.0, 3.0}}; - const boundaries_t global_flds_bc = {{FldsBC::PERIODIC, FldsBC::PERIODIC}, {FldsBC::PERIODIC, FldsBC::PERIODIC}}; - const boundaries_t global_prtl_bc = {{PrtlBC::PERIODIC, PrtlBC::PERIODIC}, {PrtlBC::PERIODIC, PrtlBC::PERIODIC}}; + const unsigned int ndomains = 1; + const std::vector global_decomposition = {{-1,-1, -1}}; + const std::vector global_ncells = {32, 32, 32}; + const boundaries_t global_extent = {{0.0, 3.0}, {0.0, 3.0}, {0.0, 3.0}}; + const boundaries_t global_flds_bc = { {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC}}; + const boundaries_t global_prtl_bc = { {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}}; const std::map metric_params = {}; - const int maxnpart = 1000; + const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; + const double npart_to_send_frac = 0.01; + const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); auto species = ntt::Particles(1u, "test_e", 1.0f, @@ -129,35 +112,7 @@ auto main(int argc, char* argv[]) -> int { ntt::PrtlPusher::BORIS, false, ntt::Cooling::NONE); - - species.set_npart(maxnpart); - auto &this_i1 = species.i1; - auto &this_i2 = species.i2; - auto &this_i3 = species.i3; - auto &this_dx1 = species.dx1; - auto &this_dx2 = species.dx2; - auto &this_dx3 = species.dx3; - auto &this_ux1 = species.ux1; - auto &this_ux2 = species.ux2; - auto &this_ux3 = species.ux3; - auto &this_tag = species.tag; - - std::cout << "Species particle count is " << species.npart() << std::endl; - Kokkos::parallel_for("SetPositions", - species.npart(), Lambda(const std::size_t i) { - this_i1(i) = 1; - this_i2(i) = 1; - this_i3(i) = 1; - this_dx1(i) = 0.01; - this_dx2(i) = 0.01; - this_ux1(i) = 0.; - this_ux2(i) = 0.; - this_ux3(i) = 0.; - this_tag(i) = 1; - }); - Kokkos::fence(); - std::cout << "Species set " << species.npart() << std::endl; - auto metadomain = Metadomain> + auto metadomain = Metadomain> ( ndomains, global_decomposition, global_ncells, @@ -168,58 +123,53 @@ auto main(int argc, char* argv[]) -> int { {species} ); - //metadomain.runOnLocalDomains([&](auto& loc_dom) { - // InitializePositionsDomain(loc_dom); - //}); - - // Get the pointer to the subdomain - //const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; - //auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); - - // Set the positions of the particles in each domain - //for (auto& species : local_domain->species) - //{ - // auto tag = ParticleTag::alive; - // auto &this_i1 = species.i1; - // auto &this_i2 = species.i2; - // auto &this_i3 = species.i3; - // auto &this_dx1 = species.dx1; - // auto &this_dx2 = species.dx2; - // auto &this_dx3 = species.dx3; - // auto &this_ux1 = species.ux1; - // auto &this_ux2 = species.ux2; - // auto &this_ux3 = species.ux3; - // auto &this_tag = species.tag; - // Kokkos::parallel_for("SetPositions", - // species.npart(), Lambda(const std::size_t i) { - // this_i1(i) = 1; - // this_i2(i) = 1; - // this_i3(i) = 0; - // this_dx1(i) = 0.01; - // this_dx2(i) = 0.01; - // this_ux1(i) = 0.5; - // this_ux2(i) = 0.5; - // this_tag(i) = tag; - // }); -// - //species.SortByTags(); - //species.SyncHostDevice(); - //} - - // Get and print the extent of each domain - //std::cout << fmt::format("x1 extent {%.2f; %.2f} \n", - // local_domain->mesh.extent(in::x1).first, - // local_domain->mesh.extent(in::x1).second); - //std::cout << fmt::format("x2 extent {%.2f; %.2f} \n", - // local_domain->mesh.extent(in::x2).first, - // local_domain->mesh.extent(in::x2).second); - // Print the number of particles per domain - //std::cout << "Number of particles in domain " << local_subdomain_idx << ": " << local_domain->species[0].npart() << std::endl; - // Print the position of the 5 particles in the domain - - ntt::GlobalFinalize(); - - std::cout << "Terminating" << std::endl; - + const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + auto timers = timer::Timers {{"Communication"}, nullptr, false}; + InitializeParticleArrays(*local_domain, npart); + // Timers for both the communication routines + auto total_time_elapsed_old = 0; + auto total_time_elapsed_new = 0; + + int seed_ind = 0; + int seed_tag = 1; + for (int i = 0; i < 10; ++i) { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort new + Kokkos::fence(); + auto start_new = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticlesBuffer(*local_domain, &timers); + auto stop_new = std::chrono::high_resolution_clock::now(); + auto duration_new = std::chrono::duration_cast(stop_new - start_new).count(); + total_time_elapsed_new += duration_new; + Kokkos::fence(); + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort old + Kokkos::fence(); + auto start_old = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticles(*local_domain, &timers); + auto stop_old = std::chrono::high_resolution_clock::now(); + auto duration_old = std::chrono::duration_cast(stop_old - start_old).count(); + total_time_elapsed_old += duration_old; + Kokkos::fence(); + } + std::cout << "Total time elapsed for old: " << total_time_elapsed_old << " microseconds" << std::endl; + std::cout << "Total time elapsed for new: " << total_time_elapsed_new << " microseconds" << std::endl; return 0; } + +/* + Buggy behavior: + Consider a single domain with a single mpi rank + Particle tag arrays is set to [0, 0, 1, 1, 2, 3, ...] for a single domain + CommunicateParticles() discounts all the dead particles and reassigns the + other tags to alive + CommunicateParticlesBuffer() only keeps the ParticleTag::Alive particles + and discounts the rest +*/ \ No newline at end of file From 896a9570fa0347abd2e158167ec73189f2ff8541 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Mon, 16 Dec 2024 16:13:27 -0500 Subject: [PATCH 10/52] bug fix in comm --- src/framework/domain/comm_mpi.hpp | 13 +- src/framework/domain/communications.cpp | 193 +++++++++++++++++++----- 2 files changed, 165 insertions(+), 41 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 33431cfe7..2251968c4 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -499,6 +499,16 @@ namespace comm { if ((send_rank < 0) && (recv_rank < 0)) { raise::Error("No send or recv in SendRecvParticlesBuffered", HERE); } + // First set the tags of the sent particles to be dead + auto& this_tag = species.tag; + //Kokkos::parallel_for( + //"SetTagDead", + //Kokkos::RangePolicy(0, indices_to_allocate.size()), + //KOKKOS_LAMBDA(const size_t i) { + // const auto idx = indices_to_send(i); + // this_tag(idx) = static_cast(ParticleTag::dead); + //}); + // Construct send and receive slice for the buffer auto send_slice = range_tuple_t({ 0, indices_to_send.size() }); auto recv_slice = range_tuple_t({ indices_to_send.size(), indices_to_send.size() + @@ -531,8 +541,6 @@ namespace comm { CommunicateParticleQuantityBuffer(species.pld[p], send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); } // Set the tag for the received particles to be alive and perform the necessary displacements - auto& this_tag = species.tag; - if constexpr (D == Dim::_1D) { const auto shift_in_x1 = shifts_in_x[0]; @@ -595,6 +603,7 @@ namespace comm { this_i3_prev(idx) += shift_in_x3; }); } + Kokkos::fence(); return; } diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index cdd32aed6..a43b635b7 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -657,22 +657,26 @@ template HERE); logger::Checkpoint("Communicating particles\n", HERE); for (auto& species : domain.species) { - const auto npart_per_tag_arr = species.npart_per_tag(); - const auto tag_offset = species.tag_offset_h; - auto index_last = tag_offset[tag_offset.extent(0) - 1] + - npart_per_tag_arr[npart_per_tag_arr.size() - 1]; + auto npart_per_tag_arr = species.npart_per_tag(); + auto npart = static_cast(species.npart()); + auto total_alive = static_cast(npart_per_tag_arr[ParticleTag::alive]); + auto total_dead = static_cast(npart_per_tag_arr[ParticleTag::dead]); + auto total_holes = static_cast(npart - total_alive); + auto total_send = static_cast(npart - total_alive - total_dead); + auto total_recv = static_cast(0); + auto tag_count = static_cast(npart_per_tag_arr.size()); + std::vector send_ranks, send_inds; std::vector recv_ranks, recv_inds; // at this point particles should already by tagged in the pusher #if defined(MPI_ENABLED) - timers->start("Communications_sendrecv"); + // Defined for debugging + int mpi_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + // array that holds the number of particles to be received per tag - std::vector npart_per_tag_arr_recv(npart_per_tag_arr.size(), 0); - std::size_t total_recv_count = 0; - const std::size_t total_send_count = species.npart() - - npart_per_tag_arr[ParticleTag::alive] - - npart_per_tag_arr[ParticleTag::dead]; - for (auto& direction : dir::Directions::all) { + std::vector npart_per_tag_arr_recv(tag_count, 0); + for (auto& direction : dir::Directions::all) { const auto [send_params, recv_params] = GetSendRecvParams(this, domain, direction, true); const auto [send_indrank, send_slice] = send_params; @@ -694,11 +698,11 @@ template recv_rank, nsend, nrecv); - total_recv_count += nrecv; + total_recv += nrecv; npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)] = nrecv; } - timers->stop("Communications_sendrecv"); - raise::FatalIf((index_last + total_recv_count) >= species.maxnpart(), + + raise::FatalIf((npart + total_recv) >= species.maxnpart(), "Too many particles to receive (cannot fit into maxptl)", HERE); // Now we know the number of particles to be sent and received per direction @@ -712,54 +716,89 @@ template tag=0 ct tag=1 ct tag=3 ct (dead) (alive) (tag1) ... */ - timers->start("PermuteVector"); auto& this_tag = species.tag; auto& this_tag_offset = species.tag_offset; Kokkos::View permute_vector("permute_vector", species.npart()); - // Current offset is a helper array used to create permute vector - // It stores the number of particles of a given tag type stored during the loop Kokkos::View current_offset("current_offset", species.ntags()); + Kokkos::parallel_for( "PermuteVector", species.npart(), Lambda(const std::size_t p) { - auto current_tag = this_tag(p); - auto idx_permute_vec = this_tag_offset(current_tag) + current_offset(current_tag); - Kokkos::atomic_fetch_add(¤t_offset(current_tag), 1); + auto current_tag = this_tag(p); + auto i_current_tag_offset = Kokkos::atomic_fetch_add(¤t_offset(current_tag), 1); + auto idx_permute_vec = this_tag_offset(current_tag) + i_current_tag_offset; permute_vector(idx_permute_vec) = static_cast(p); }); - timers->stop("PermuteVector"); + + // Check: add the end of the loop, current_offset should be equal to npart_per_tag + auto current_offset_h = Kokkos::create_mirror_view(current_offset); + Kokkos::deep_copy(current_offset_h, current_offset); + for (std::size_t i { 0 }; i < current_offset_h.size(); ++i) { + raise::FatalIf(current_offset_h(i) != npart_per_tag_arr[i], + "Error in permute vector construction", + HERE); + } // allocation_vector(p) assigns the pth received particle // to the pth hole in the array, or after npart() if p > sent+dead count. - Kokkos::View allocation_vector("allocation_vector", total_recv_count); - auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); - std::size_t n_alive = npart_per_tag_arr[ParticleTag::alive]; - std::size_t n_dead = npart_per_tag_arr[ParticleTag::dead]; - std::size_t n_holes = species.npart() - n_alive; + Kokkos::View allocation_vector("allocation_vector", total_recv); + + // TWO BUGS: when nsend = nrecv, an extra dead particle is created out of nowhere + // when nrecv > nsend but < nrecv < nsend + ndead, tags of alive particles are not changed - timers->start("AllocationVector"); Kokkos::parallel_for( "AllocationVector", - total_recv_count, + total_recv, Lambda(const std::size_t p) { // Case: recevied particle count less than dead particle count -> replace dead particles - if (p < n_dead){ + if (p < total_dead){ allocation_vector(p) = permute_vector(p); } // Case: received particle count > dead particle count but < sent particle count -> replace // sent particles - else if (p <= n_holes){ - allocation_vector(p) = permute_vector(n_alive + p); + else if (p < total_holes && p >= total_dead){ + allocation_vector(p) = permute_vector(total_alive + p); } // Case: received particle count exceeds sent + dead particles -> append at the end else { - allocation_vector(p) = static_cast(index_last + (p - n_holes)); + allocation_vector(p) = static_cast(npart + (p - total_holes)); } }); - Kokkos::deep_copy(allocation_vector_h, allocation_vector); - timers->stop("AllocationVector"); + Kokkos::fence(); + + // Compute where the received particles are allocated + if (mpi_rank == 0){ + Kokkos::View particles_allocated_per_tag("particles allocated per tag", tag_count); + Kokkos::parallel_for( + "ParticlesAllocatedPerTag", + total_recv, + Lambda(const std::size_t i) { + auto index = allocation_vector(i); + auto tag = this_tag(index); + Kokkos::atomic_fetch_add(&particles_allocated_per_tag(tag), 1); + }); + Kokkos::fence(); + auto particles_allocated_per_tag_h = Kokkos::create_mirror_view(particles_allocated_per_tag); + Kokkos::deep_copy(particles_allocated_per_tag_h, particles_allocated_per_tag); + std::cout << "Particles allocated per tag (pre recv): "; + for (std::size_t i = 0; i < tag_count; i++){ + std::cout << "[" << particles_allocated_per_tag_h[i] << "] "; + } + std::cout << std::endl; + } + + + // Check if the particle tags are only dead or alive + //if (mpi_rank == 0){ + // std::cout << "Before COMM: " << std::endl; + // std::cout << "Tag counts: "; + // for (std::size_t i = 0; i < tag_count; i++){ + // std::cout << "[" << npart_per_tag_arr[i] << "] "; + // } + // std::cout << std::endl; + //} std::size_t count_recv = 0; std::size_t iteration = 0; // Main loop over all direction where we send the data @@ -821,8 +860,8 @@ template } // Tuple that contains the start and end indices of permtute_vec pointing to a given tag type = dir2tag(dir) - auto range_permute = std::make_pair(static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)]), - static_cast(tag_offset[mpi::PrtlSendTag::dir2tag(direction)] + + auto range_permute = std::make_pair(static_cast(species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)]), + static_cast(species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)] + npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); // Tuple that contains the start and end indices for allocation_vector pointing to a given tag type = dir2tag(dir) auto range_allocate = std::make_pair(static_cast(count_recv), @@ -843,9 +882,85 @@ template count_recv += npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)]; iteration++; } - // If receive count is less than send count then make the tags of sent dead ? Ask Hayk - species.set_npart(index_last + std::max(total_recv_count, total_send_count) - total_send_count); -#endif + // Compute where the received particles are allocated + //if (mpi_rank == 0){ + //Kokkos::View particles_allocated_per_tag("particles allocated per tag", tag_count); + //Kokkos::parallel_for( + // "ParticlesAllocatedPerTag", + // total_recv, + // Lambda(const std::size_t i) { + // auto index = allocation_vector(i); + // auto tag = this_tag(index); + // Kokkos::atomic_fetch_add(&particles_allocated_per_tag(tag), 1); + // }); + //Kokkos::fence(); + //auto particles_allocated_per_tag_h = Kokkos::create_mirror_view(particles_allocated_per_tag); + //Kokkos::deep_copy(particles_allocated_per_tag_h, particles_allocated_per_tag); + + //std::cout << "Particles allocated per tag (post recv): "; + //for (std::size_t i = 0; i < tag_count; i++){ + // std::cout << "[" << particles_allocated_per_tag_h[i] << "] "; + //} + //std::cout << std::endl; + // } + // If receive count is less than send count then make the tags of sent dead + if (total_recv <= total_holes){ + if (total_recv <= total_dead){ + // Case: all sent particles' tags are set to dead + /* (received) + [ | <------------------> | <-------->] + (dead) (alive) (sent) + || + (to be made dead) + ^ + (offset) + */ + + auto offset = total_alive + total_dead; + Kokkos::parallel_for( + "CommunicateParticles", + total_send, + Lambda(index_t p) { + this_tag(permute_vector(offset + p)) = ParticleTag::dead; + }); + } + else{ + // Case: tags of sent particles that are not replaced by recevied particles are made dead + /* (received) (received) + [ | <------------------> |] + (dead) (alive) (sent) + || + (to be made dead) + ^ + (offset) + */ + auto offset = total_alive + total_recv; + Kokkos::parallel_for( + "CommunicateParticles", + total_send - (total_recv - total_dead), + Lambda(index_t p) { + this_tag(permute_vector(offset + p)) = ParticleTag::dead; + }); + } + } + + + // Check if the particle tags are only dead or alive + species.set_npart(npart + std::max(total_send, total_recv) - total_send); + npart_per_tag_arr = species.npart_per_tag(); + //if (mpi_rank == 0) + //{ + // std::cout << "After COMM: " << std::endl; + // std::cout << "Tag counts: "; + // for (std::size_t i = 0; i < tag_count; i++){ + // std::cout << "[" << npart_per_tag_arr[i] << "] "; + // } + // std::cout << std::endl; + // std::cout << "Holes filled: " << total_holes << " Total recv: " << total_recv << + // "Total send: " << total_send << std::endl; + // std::cout << std::endl << "*************"<< std::endl; + //} + #endif } } From 9391c196f5e8c65ef4d43a9e924b948b9ab77adf Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 16 Dec 2024 17:19:36 -0500 Subject: [PATCH 11/52] fmt --- TASKLIST.md | 4 + benchmark/benchmark.cpp | 230 +++++++++------- extern/Kokkos | 2 +- extern/adios2 | 2 +- extern/plog | 2 +- src/framework/domain/comm_mpi.hpp | 333 +++++++++++++++--------- src/framework/domain/communications.cpp | 322 +++++++++++------------ 7 files changed, 499 insertions(+), 396 deletions(-) diff --git a/TASKLIST.md b/TASKLIST.md index 069a7deb2..c12f60f4c 100644 --- a/TASKLIST.md +++ b/TASKLIST.md @@ -3,3 +3,7 @@ - [ ] removing temporary variables in interpolation - [ ] passing by value vs const ref in metric - [ ] return physical coords one-by-one instead of by passing full vector + +### Things to look into + +1. _h fields in mpi communication diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 6bfe5c7c7..797c8ed87 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -2,59 +2,65 @@ #include "global.h" #include "utils/error.h" -#include "framework/domain/domain.h" -#include "framework/domain/metadomain.h" -#include "framework/containers/particles.h" -#include "framework/domain/communications.cpp" + #include "metrics/metric_base.h" #include "metrics/minkowski.h" + +#include "framework/containers/species.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" + #include -#define TIMER_START(label) \ - Kokkos::fence(); \ - auto start_##label = std::chrono::high_resolution_clock::now(); +#include "framework/domain/communications.cpp" + +#define TIMER_START(label) \ + Kokkos::fence(); \ + auto start_##label = std::chrono::high_resolution_clock::now(); -#define TIMER_STOP(label) \ - Kokkos::fence(); \ - auto stop_##label = std::chrono::high_resolution_clock::now(); \ - auto duration_##label = std::chrono::duration_cast(stop_##label - start_##label).count(); \ - std::cout << "Timer [" #label "]: " << duration_##label << " microseconds" << std::endl; +#define TIMER_STOP(label) \ + Kokkos::fence(); \ + auto stop_##label = std::chrono::high_resolution_clock::now(); \ + auto duration_##label = std::chrono::duration_cast( \ + stop_##label - start_##label) \ + .count(); \ + std::cout << "Timer [" #label "]: " << duration_##label << " microseconds" \ + << std::endl; /* Test to check the performance of the new particle allocation scheme - - Create a metadomain object main() - - Set npart + initialize tags InitializeParticleArrays() - - 'Push' the particles by randomly updating the tags PushParticles() + - Create a metadomain object main() + - Set npart + initialize tags InitializeParticleArrays() + - 'Push' the particles by randomly updating the tags PushParticles() - Communicate particles to neighbors and time the communication - Compute the time taken for best of N iterations for the communication */ // Set npart and set the particle tags to alive template -void InitializeParticleArrays(Domain &domain, const int npart){ - raise::ErrorIf(npart > domain.species[0].maxnpart(), - "Npart cannot be greater than maxnpart", HERE); +void InitializeParticleArrays(Domain& domain, const int npart) { + raise::ErrorIf(npart > domain.species[0].maxnpart(), + "Npart cannot be greater than maxnpart", + HERE); const auto nspecies = domain.species.size(); for (int i_spec = 0; i_spec < nspecies; i_spec++) { domain.species[i_spec].set_npart(npart); domain.species[i_spec].SyncHostDevice(); - auto &this_tag = domain.species[i_spec].tag; + auto& this_tag = domain.species[i_spec].tag; Kokkos::parallel_for( "Initialize particles", npart, - Lambda(const std::size_t i) - { - this_tag(i) = ParticleTag::alive; - } - ); + Lambda(const std::size_t i) { this_tag(i) = ParticleTag::alive; }); } return; } // Randomly reassign tags to particles for a fraction of particles template -void PushParticles(Domain &domain, const double send_frac, - const int seed_ind, const int seed_tag){ +void PushParticles(Domain& domain, + const double send_frac, + const int seed_ind, + const int seed_tag) { raise::ErrorIf(send_frac > 1.0, "send_frac cannot be greater than 1.0", HERE); const auto nspecies = domain.species.size(); for (int i_spec = 0; i_spec < nspecies; i_spec++) { @@ -62,26 +68,27 @@ void PushParticles(Domain &domain, const double send_frac, const auto nparticles = domain.species[i_spec].npart(); const auto nparticles_to_send = static_cast(send_frac * nparticles); // Generate random indices to send - Kokkos::Random_XorShift64_Pool<> random_pool(seed_ind); + // Kokkos::Random_XorShift64_Pool<> random_pool(seed_ind); Kokkos::View indices_to_send("indices_to_send", nparticles_to_send); - Kokkos::fill_random(indices_to_send, random_pool, 0, nparticles); + Kokkos::fill_random(indices_to_send, domain.random_pool, 0, nparticles); // Generate random tags to send - Kokkos::Random_XorShift64_Pool<> random_pool_tag(seed_tag); + // Kokkos::Random_XorShift64_Pool<> random_pool_tag(seed_tag); Kokkos::View tags_to_send("tags_to_send", nparticles_to_send); - Kokkos::fill_random(tags_to_send, random_pool_tag, 0, domain.species[i_spec].ntags()); - auto &this_tag = domain.species[i_spec].tag; + Kokkos::fill_random(tags_to_send, + domain.random_pool, + 0, + domain.species[i_spec].ntags()); + auto& this_tag = domain.species[i_spec].tag; Kokkos::parallel_for( - "Push particles", - nparticles_to_send, - Lambda(const std::size_t i) - { - auto prtl_to_send = indices_to_send(i); - auto tag_to_send = tags_to_send(i); - this_tag(prtl_to_send) = tag_to_send; - } - ); - domain.species[i_spec].npart_per_tag(); - domain.species[i_spec].SyncHostDevice(); + "Push particles", + nparticles_to_send, + Lambda(const std::size_t i) { + auto prtl_to_send = indices_to_send(i); + auto tag_to_send = tags_to_send(i); + this_tag(prtl_to_send) = tag_to_send; + }); + domain.species[i_spec].npart_per_tag(); + domain.species[i_spec].SyncHostDevice(); } return; } @@ -90,42 +97,51 @@ auto main(int argc, char* argv[]) -> int { std::cout << "Constructing the domain" << std::endl; ntt::GlobalInitialize(argc, argv); // Create a Metadomain object - const unsigned int ndomains = 1; - const std::vector global_decomposition = {{-1,-1, -1}}; - const std::vector global_ncells = {32, 32, 32}; - const boundaries_t global_extent = {{0.0, 3.0}, {0.0, 3.0}, {0.0, 3.0}}; - const boundaries_t global_flds_bc = { {FldsBC::PERIODIC, FldsBC::PERIODIC}, - {FldsBC::PERIODIC, FldsBC::PERIODIC}, - {FldsBC::PERIODIC, FldsBC::PERIODIC}}; - const boundaries_t global_prtl_bc = { {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}}; + const unsigned int ndomains = 1; + const std::vector global_decomposition = { + { -1, -1, -1 } + }; + const std::vector global_ncells = { 32, 32, 32 }; + const boundaries_t global_extent = { + { 0.0, 3.0 }, + { 0.0, 3.0 }, + { 0.0, 3.0 } + }; + const boundaries_t global_flds_bc = { + { FldsBC::PERIODIC, FldsBC::PERIODIC }, + { FldsBC::PERIODIC, FldsBC::PERIODIC }, + { FldsBC::PERIODIC, FldsBC::PERIODIC } + }; + const boundaries_t global_prtl_bc = { + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC } + }; const std::map metric_params = {}; - const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; + const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; const double npart_to_send_frac = 0.01; - const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); - auto species = ntt::Particles(1u, - "test_e", - 1.0f, - 1.0f, - maxnpart, - ntt::PrtlPusher::BORIS, - false, - ntt::Cooling::NONE); - auto metadomain = Metadomain> - ( ndomains, - global_decomposition, - global_ncells, - global_extent, - global_flds_bc, - global_prtl_bc, - metric_params, - {species} - ); + const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); + auto species = ntt::ParticlesSpecies(1u, + "test_e", + 1.0f, + 1.0f, + maxnpart, + ntt::PrtlPusher::BORIS, + false, + ntt::Cooling::NONE); + auto metadomain = Metadomain>( + ndomains, + global_decomposition, + global_ncells, + global_extent, + global_flds_bc, + global_prtl_bc, + metric_params, + { species }); const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; - auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); - auto timers = timer::Timers {{"Communication"}, nullptr, false}; + auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + auto timers = timer::Timers { { "Communication" }, nullptr, false }; InitializeParticleArrays(*local_domain, npart); // Timers for both the communication routines auto total_time_elapsed_old = 0; @@ -133,34 +149,46 @@ auto main(int argc, char* argv[]) -> int { int seed_ind = 0; int seed_tag = 1; + Kokkos::fence(); + for (int i = 0; i < 10; ++i) { - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort new - Kokkos::fence(); - auto start_new = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticlesBuffer(*local_domain, &timers); - auto stop_new = std::chrono::high_resolution_clock::now(); - auto duration_new = std::chrono::duration_cast(stop_new - start_new).count(); - total_time_elapsed_new += duration_new; - Kokkos::fence(); - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort old - Kokkos::fence(); - auto start_old = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticles(*local_domain, &timers); - auto stop_old = std::chrono::high_resolution_clock::now(); - auto duration_old = std::chrono::duration_cast(stop_old - start_old).count(); - total_time_elapsed_old += duration_old; - Kokkos::fence(); + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort new + Kokkos::fence(); + auto start_new = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticlesBuffer(*local_domain, &timers); + auto stop_new = std::chrono::high_resolution_clock::now(); + auto duration_new = std::chrono::duration_cast( + stop_new - start_new) + .count(); + total_time_elapsed_new += duration_new; + Kokkos::fence(); + } + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort old + Kokkos::fence(); + auto start_old = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticles(*local_domain, &timers); + auto stop_old = std::chrono::high_resolution_clock::now(); + auto duration_old = std::chrono::duration_cast( + stop_old - start_old) + .count(); + total_time_elapsed_old += duration_old; + Kokkos::fence(); + } } - std::cout << "Total time elapsed for old: " << total_time_elapsed_old << " microseconds" << std::endl; - std::cout << "Total time elapsed for new: " << total_time_elapsed_new << " microseconds" << std::endl; + std::cout << "Total time elapsed for old: " << total_time_elapsed_old + << " microseconds" << std::endl; + std::cout << "Total time elapsed for new: " << total_time_elapsed_new + << " microseconds" << std::endl; return 0; } @@ -172,4 +200,4 @@ auto main(int argc, char* argv[]) -> int { other tags to alive CommunicateParticlesBuffer() only keeps the ParticleTag::Alive particles and discounts the rest -*/ \ No newline at end of file +*/ diff --git a/extern/Kokkos b/extern/Kokkos index 5fc08a9a7..b6a16bc9d 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit 5fc08a9a7da14d8530f8c7035d008ef63ddb4e5c +Subproject commit b6a16bc9d88a9252d76e64fd2be20c58eb5d7f2e diff --git a/extern/adios2 b/extern/adios2 index a6e8314cc..25ccd6aaa 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit a6e8314cc3c0b28d496b44dcd4f15685013b887b +Subproject commit 25ccd6aaa810bbc217b43421f9c43140082c65b9 diff --git a/extern/plog b/extern/plog index 85a871b13..96637a6e5 160000 --- a/extern/plog +++ b/extern/plog @@ -1 +1 @@ -Subproject commit 85a871b13be0bd1a9e0110744fa60cc9bd1e8380 +Subproject commit 96637a6e5e53f54e4e56d667d312c564d979ec0e diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 2251968c4..9b2ad0a33 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -47,14 +47,12 @@ namespace comm { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - raise::ErrorIf( (send_rank == rank && send_idx != idx) || (recv_rank == rank && recv_idx != idx), "Multiple-domain single-rank communication not yet implemented", HERE); - if ((send_idx == idx) and (recv_idx == idx)) { // trivial copy if sending to self and receiving from self @@ -332,10 +330,11 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } - Kokkos::deep_copy(arr, array_h); + if ((recv_rank >= 0) and (recv_count > 0)) { + Kokkos::deep_copy(arr, array_h); + } } - void ParticleSendRecvCount(int send_rank, int recv_rank, const std::size_t& send_count, @@ -459,154 +458,256 @@ namespace comm { return recv_count; } - template - void CommunicateParticleQuantityBuffer( array_t& arr, - int send_rank, - int recv_rank, - const range_tuple_t& send_slice, - const range_tuple_t& recv_slice, - Kokkos::View indices_to_send, - Kokkos::View indices_to_allocate) { - - array_t buffer( "buffer", indices_to_send.extent(0) + - indices_to_allocate.extent(0)); + void CommunicateParticleQuantityBuffer(array_t& arr, + int send_rank, + int recv_rank, + const range_tuple_t& send_slice, + const range_tuple_t& recv_slice, + Kokkos::View indices_to_send, + Kokkos::View indices_to_allocate) { + + array_t buffer("buffer", + indices_to_send.extent(0) + indices_to_allocate.extent(0)); // Populate the buffer for particle array Kokkos::parallel_for( - "PopulateBuffer", - indices_to_send.extent(0), - Lambda(const size_t i) { - buffer(i) = arr(indices_to_send(i)); - }); + "PopulateBuffer", + indices_to_send.extent(0), + Lambda(const size_t i) { buffer(i) = arr(indices_to_send(i)); }); CommunicateParticleQuantity(buffer, send_rank, recv_rank, send_slice, recv_slice); // Populate from buffer to the particle array Kokkos::parallel_for( - "PopulateFromBuffer", - indices_to_allocate.extent(0), - Lambda(const size_t i) { - arr(indices_to_allocate(i)) = buffer(indices_to_send.extent(0) + i); - }); - return; + "PopulateFromBuffer", + indices_to_allocate.extent(0), + Lambda(const size_t i) { + arr(indices_to_allocate(i)) = buffer(indices_to_send.extent(0) + i); + }); + return; } template - void CommunicateParticlesBuffer(Particles& species, - Kokkos::View indices_to_send, - Kokkos::View indices_to_allocate, - int send_rank, - int recv_rank, - std::vector shifts_in_x){ + void CommunicateParticlesBuffer(Particles& species, + Kokkos::View indices_to_send, + Kokkos::View indices_to_allocate, + int send_rank, + int recv_rank, + std::vector shifts_in_x) { if ((send_rank < 0) && (recv_rank < 0)) { raise::Error("No send or recv in SendRecvParticlesBuffered", HERE); } // First set the tags of the sent particles to be dead - auto& this_tag = species.tag; - //Kokkos::parallel_for( + auto& this_tag = species.tag; + // Kokkos::parallel_for( //"SetTagDead", - //Kokkos::RangePolicy(0, indices_to_allocate.size()), - //KOKKOS_LAMBDA(const size_t i) { - // const auto idx = indices_to_send(i); - // this_tag(idx) = static_cast(ParticleTag::dead); - //}); - + // Kokkos::RangePolicy(0, indices_to_allocate.size()), + // KOKKOS_LAMBDA(const size_t i) { + // const auto idx = indices_to_send(i); + // this_tag(idx) = static_cast(ParticleTag::dead); + // }); + // Construct send and receive slice for the buffer auto send_slice = range_tuple_t({ 0, indices_to_send.size() }); - auto recv_slice = range_tuple_t({ indices_to_send.size(), indices_to_send.size() + - indices_to_allocate.size() }); + auto recv_slice = range_tuple_t( + { indices_to_send.size(), + indices_to_send.size() + indices_to_allocate.size() }); // Send and receive the particles - CommunicateParticleQuantityBuffer(species.i1, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx1, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.i1_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx1_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i1, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx1, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i1_prev, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx1_prev, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); if constexpr (D == Dim::_2D || D == Dim::_3D) { - CommunicateParticleQuantityBuffer(species.i2, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx2, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.i2_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx2_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i2, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx2, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i2_prev, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx2_prev, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); } if constexpr (D == Dim::_3D) { - CommunicateParticleQuantityBuffer(species.i3, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx3, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.i3_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx3_prev, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i3, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx3, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.i3_prev, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.dx3_prev, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); } - CommunicateParticleQuantityBuffer(species.ux1, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.ux2, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.ux3, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); - CommunicateParticleQuantityBuffer(species.weight, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.ux1, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.ux2, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.ux3, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); + CommunicateParticleQuantityBuffer(species.weight, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); if constexpr (D == Dim::_2D and C != Coord::Cart) { - CommunicateParticleQuantityBuffer(species.phi, send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.phi, + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); } for (auto p { 0 }; p < species.npld(); ++p) { - CommunicateParticleQuantityBuffer(species.pld[p], send_rank, recv_rank, send_slice, recv_slice, indices_to_send, indices_to_allocate); + CommunicateParticleQuantityBuffer(species.pld[p], + send_rank, + recv_rank, + send_slice, + recv_slice, + indices_to_send, + indices_to_allocate); } // Set the tag for the received particles to be alive and perform the necessary displacements - if constexpr (D == Dim::_1D) - { - const auto shift_in_x1 = shifts_in_x[0]; - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; + if constexpr (D == Dim::_1D) { + const auto shift_in_x1 = shifts_in_x[0]; + auto& this_i1 = species.i1; + auto& this_i1_prev = species.i1_prev; Kokkos::parallel_for( - "SetTagAlive", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { - const auto idx = indices_to_allocate(i); - this_tag(idx) = static_cast(ParticleTag::alive); - this_i1(idx) += shift_in_x1; - this_i1_prev(idx) += shift_in_x1; - }); + "SetTagAlive", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + this_tag(idx) = static_cast(ParticleTag::alive); + this_i1(idx) += shift_in_x1; + this_i1_prev(idx) += shift_in_x1; + }); } - else if constexpr (D == Dim::_2D) - { - const auto shift_in_x1 = shifts_in_x[0]; - const auto shift_in_x2 = shifts_in_x[1]; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; + else if constexpr (D == Dim::_2D) { + const auto shift_in_x1 = shifts_in_x[0]; + const auto shift_in_x2 = shifts_in_x[1]; + auto& this_i1 = species.i1; + auto& this_i2 = species.i2; + auto& this_i1_prev = species.i1_prev; + auto& this_i2_prev = species.i2_prev; Kokkos::parallel_for( - "SetTagAlive", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { - const auto idx = indices_to_allocate(i); - this_tag(idx) = static_cast(ParticleTag::alive); - this_i1(idx) += shift_in_x1; - this_i2(idx) += shift_in_x2; - this_i1_prev(idx) += shift_in_x1; - this_i2_prev(idx) += shift_in_x2; - }); + "SetTagAlive", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + this_tag(idx) = static_cast(ParticleTag::alive); + this_i1(idx) += shift_in_x1; + this_i2(idx) += shift_in_x2; + this_i1_prev(idx) += shift_in_x1; + this_i2_prev(idx) += shift_in_x2; + }); } - else if constexpr (D == Dim::_3D) - { - const auto shift_in_x1 = shifts_in_x[0]; - const auto shift_in_x2 = shifts_in_x[1]; - const auto shift_in_x3 = shifts_in_x[2]; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i3 = species.i3; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; - auto& this_i3_prev = species.i3_prev; + else if constexpr (D == Dim::_3D) { + const auto shift_in_x1 = shifts_in_x[0]; + const auto shift_in_x2 = shifts_in_x[1]; + const auto shift_in_x3 = shifts_in_x[2]; + auto& this_i1 = species.i1; + auto& this_i2 = species.i2; + auto& this_i3 = species.i3; + auto& this_i1_prev = species.i1_prev; + auto& this_i2_prev = species.i2_prev; + auto& this_i3_prev = species.i3_prev; Kokkos::parallel_for( - "SetTagAlive", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { - const auto idx = indices_to_allocate(i); - this_tag(idx) = static_cast(ParticleTag::alive); - this_i1(idx) += shift_in_x1; - this_i2(idx) += shift_in_x2; - this_i3(idx) += shift_in_x3; - this_i1_prev(idx) += shift_in_x1; - this_i2_prev(idx) += shift_in_x2; - this_i3_prev(idx) += shift_in_x3; - }); + "SetTagAlive", + Kokkos::RangePolicy(0, indices_to_allocate.size()), + KOKKOS_LAMBDA(const size_t i) { + const auto idx = indices_to_allocate(i); + this_tag(idx) = static_cast(ParticleTag::alive); + this_i1(idx) += shift_in_x1; + this_i2(idx) += shift_in_x2; + this_i3(idx) += shift_in_x3; + this_i1_prev(idx) += shift_in_x1; + this_i2_prev(idx) += shift_in_x2; + this_i3_prev(idx) += shift_in_x3; + }); } Kokkos::fence(); return; - } - + } } // namespace comm diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index a43b635b7..5e5da4a0c 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -86,8 +86,8 @@ namespace ntt { } else { // no communication necessary return { - {0, -1}, - {0, -1} + { 0, -1 }, + { 0, -1 } }; } #if defined(MPI_ENABLED) @@ -110,8 +110,8 @@ namespace ntt { (void)send_rank; (void)recv_rank; return { - {send_ind, send_rank}, - {recv_ind, recv_rank} + { send_ind, send_rank }, + { recv_ind, recv_rank } }; } @@ -129,8 +129,8 @@ namespace ntt { const auto is_receiving = (recv_rank >= 0); if (not(is_sending or is_receiving)) { return { - {{ 0, -1 }, {}}, - {{ 0, -1 }, {}} + { { 0, -1 }, {} }, + { { 0, -1 }, {} } }; } auto send_slice = std::vector {}; @@ -196,8 +196,8 @@ namespace ntt { } return { - {{ send_ind, send_rank }, send_slice}, - {{ recv_ind, recv_rank }, recv_slice}, + { { send_ind, send_rank }, send_slice }, + { { recv_ind, recv_rank }, recv_slice }, }; } @@ -645,26 +645,27 @@ namespace ntt { } } - -/* - New function to communicate particles using a buffer -*/ -template + /* + New function to communicate particles using a buffer + */ + template void Metadomain::CommunicateParticlesBuffer(Domain& domain, - timer::Timers* timers) { + timer::Timers* timers) { raise::ErrorIf(timers == nullptr, "Timers not passed when Comm::Prtl called", HERE); logger::Checkpoint("Communicating particles\n", HERE); for (auto& species : domain.species) { - auto npart_per_tag_arr = species.npart_per_tag(); - auto npart = static_cast(species.npart()); - auto total_alive = static_cast(npart_per_tag_arr[ParticleTag::alive]); - auto total_dead = static_cast(npart_per_tag_arr[ParticleTag::dead]); - auto total_holes = static_cast(npart - total_alive); - auto total_send = static_cast(npart - total_alive - total_dead); - auto total_recv = static_cast(0); - auto tag_count = static_cast(npart_per_tag_arr.size()); + auto npart_per_tag_arr = species.npart_per_tag(); + auto npart = static_cast(species.npart()); + auto total_alive = static_cast( + npart_per_tag_arr[ParticleTag::alive]); + auto total_dead = static_cast( + npart_per_tag_arr[ParticleTag::dead]); + auto total_holes = static_cast(npart - total_alive); + auto total_send = static_cast(npart - total_alive - total_dead); + auto total_recv = static_cast(0); + auto tag_count = static_cast(npart_per_tag_arr.size()); std::vector send_ranks, send_inds; std::vector recv_ranks, recv_inds; @@ -676,9 +677,9 @@ template // array that holds the number of particles to be received per tag std::vector npart_per_tag_arr_recv(tag_count, 0); - for (auto& direction : dir::Directions::all) { + for (auto& direction : dir::Directions::all) { const auto [send_params, - recv_params] = GetSendRecvParams(this, domain, direction, true); + recv_params] = GetSendRecvParams(this, domain, direction, true); const auto [send_indrank, send_slice] = send_params; const auto [recv_indrank, recv_slice] = recv_params; const auto [send_ind, send_rank] = send_indrank; @@ -686,78 +687,76 @@ template if (send_rank < 0 and recv_rank < 0) { continue; } - const auto send_dir_tag = mpi::PrtlSendTag::dir2tag(direction); - const auto nsend = npart_per_tag_arr[send_dir_tag]; - std::size_t nrecv = 0; + const auto send_dir_tag = mpi::PrtlSendTag::dir2tag(direction); + const auto nsend = npart_per_tag_arr[send_dir_tag]; + std::size_t nrecv = 0; // Get the receive count send_ranks.push_back(send_rank); recv_ranks.push_back(recv_rank); send_inds.push_back(send_ind); recv_inds.push_back(recv_ind); - comm::ParticleSendRecvCount(send_rank, - recv_rank, - nsend, - nrecv); - total_recv += nrecv; + comm::ParticleSendRecvCount(send_rank, recv_rank, nsend, nrecv); + total_recv += nrecv; npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)] = nrecv; } raise::FatalIf((npart + total_recv) >= species.maxnpart(), - "Too many particles to receive (cannot fit into maxptl)", - HERE); + "Too many particles to receive (cannot fit into maxptl)", + HERE); // Now we know the number of particles to be sent and received per direction /* permute vector contains the indices of the tags to send and receive in the order of the directions E.g., consider the following tag array [ 0, 0, 3, 0, 1,...] Then, permute vector will look something like - [0, 1, 3, ..., 2, ..., 4, ... ] - |<--------- >| |<----->| |<----->| .... - tag=0 ct tag=1 ct tag=3 ct - (dead) (alive) (tag1) ... + [0, 1, 3, ..., 4, ..., ... 2, ... ] + |<--------- >| |<----->| |<----->| .... + tag=0 ct tag=1 ct tag=3 ct + (dead) (alive) (tag1) ... */ - auto& this_tag = species.tag; - auto& this_tag_offset = species.tag_offset; + auto& this_tag = species.tag; + auto& this_tag_offset = species.tag_offset; Kokkos::View permute_vector("permute_vector", species.npart()); Kokkos::View current_offset("current_offset", species.ntags()); + // @TODO: do not save tag = 1 particles into permute_vector + // instead of species.npart(), size will be species.npart() - npart_per_tag[ParticleTag::alive]; Kokkos::parallel_for( "PermuteVector", species.npart(), - Lambda(const std::size_t p) { - auto current_tag = this_tag(p); - auto i_current_tag_offset = Kokkos::atomic_fetch_add(¤t_offset(current_tag), 1); - auto idx_permute_vec = this_tag_offset(current_tag) + i_current_tag_offset; - permute_vector(idx_permute_vec) = static_cast(p); + Lambda(index_t p) { + const auto current_tag = this_tag(p); + const auto idx_permute_vec = this_tag_offset(current_tag) + + Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + permute_vector(idx_permute_vec) = p; }); - // Check: add the end of the loop, current_offset should be equal to npart_per_tag - auto current_offset_h = Kokkos::create_mirror_view(current_offset); - Kokkos::deep_copy(current_offset_h, current_offset); - for (std::size_t i { 0 }; i < current_offset_h.size(); ++i) { - raise::FatalIf(current_offset_h(i) != npart_per_tag_arr[i], - "Error in permute vector construction", - HERE); - } + // Check: add the end of the loop, current_offset should be equal to npart_per_tag + auto current_offset_h = Kokkos::create_mirror_view(current_offset); + Kokkos::deep_copy(current_offset_h, current_offset); + for (std::size_t i { 0 }; i < current_offset_h.size(); ++i) { + raise::FatalIf(current_offset_h(i) != npart_per_tag_arr[i], + "Error in permute vector construction", + HERE); + } // allocation_vector(p) assigns the pth received particle // to the pth hole in the array, or after npart() if p > sent+dead count. Kokkos::View allocation_vector("allocation_vector", total_recv); - // TWO BUGS: when nsend = nrecv, an extra dead particle is created out of nowhere - // when nrecv > nsend but < nrecv < nsend + ndead, tags of alive particles are not changed - Kokkos::parallel_for( "AllocationVector", total_recv, - Lambda(const std::size_t p) { - // Case: recevied particle count less than dead particle count -> replace dead particles - if (p < total_dead){ + Lambda(index_t p) { + // Case: received particle count less than dead particle count -> replace dead particles + if (p < total_dead) { allocation_vector(p) = permute_vector(p); } // Case: received particle count > dead particle count but < sent particle count -> replace // sent particles - else if (p < total_holes && p >= total_dead){ + else if (p < total_holes && p >= total_dead) { allocation_vector(p) = permute_vector(total_alive + p); } // Case: received particle count exceeds sent + dead particles -> append at the end @@ -767,52 +766,20 @@ template }); Kokkos::fence(); - // Compute where the received particles are allocated - if (mpi_rank == 0){ - Kokkos::View particles_allocated_per_tag("particles allocated per tag", tag_count); - Kokkos::parallel_for( - "ParticlesAllocatedPerTag", - total_recv, - Lambda(const std::size_t i) { - auto index = allocation_vector(i); - auto tag = this_tag(index); - Kokkos::atomic_fetch_add(&particles_allocated_per_tag(tag), 1); - }); - Kokkos::fence(); - auto particles_allocated_per_tag_h = Kokkos::create_mirror_view(particles_allocated_per_tag); - Kokkos::deep_copy(particles_allocated_per_tag_h, particles_allocated_per_tag); - - std::cout << "Particles allocated per tag (pre recv): "; - for (std::size_t i = 0; i < tag_count; i++){ - std::cout << "[" << particles_allocated_per_tag_h[i] << "] "; - } - std::cout << std::endl; - } - - - // Check if the particle tags are only dead or alive - //if (mpi_rank == 0){ - // std::cout << "Before COMM: " << std::endl; - // std::cout << "Tag counts: "; - // for (std::size_t i = 0; i < tag_count; i++){ - // std::cout << "[" << npart_per_tag_arr[i] << "] "; - // } - // std::cout << std::endl; - //} std::size_t count_recv = 0; std::size_t iteration = 0; // Main loop over all direction where we send the data for (auto& direction : dir::Directions::all) { // When nowhere to send and receive - auto send_rank = send_ranks[iteration]; - auto recv_rank = recv_ranks[iteration]; + auto send_rank = send_ranks[iteration]; + auto recv_rank = recv_ranks[iteration]; if (send_rank < 0 and recv_rank < 0) { continue; } // Get the coordinate shifts in xi std::vector shifts_in_x; - auto recv_ind = recv_inds[iteration]; + auto recv_ind = recv_inds[iteration]; if constexpr (D == Dim::_1D) { int shift_in_x1 { 0 }; if ((-direction)[0] == -1) { @@ -821,8 +788,7 @@ template shift_in_x1 = domain.mesh.n_active(in::x1); } shifts_in_x.push_back(shift_in_x1); - } - else if constexpr (D == Dim::_2D) { + } else if constexpr (D == Dim::_2D) { int shift_in_x1 { 0 }, shift_in_x2 { 0 }; if ((-direction)[0] == -1) { shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); @@ -836,8 +802,7 @@ template } shifts_in_x.push_back(shift_in_x1); shifts_in_x.push_back(shift_in_x2); - } - else if constexpr (D == Dim::_3D) { + } else if constexpr (D == Dim::_3D) { int shift_in_x1 { 0 }, shift_in_x2 { 0 }, shift_in_x3 { 0 }; if ((-direction)[0] == -1) { shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); @@ -860,32 +825,39 @@ template } // Tuple that contains the start and end indices of permtute_vec pointing to a given tag type = dir2tag(dir) - auto range_permute = std::make_pair(static_cast(species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)]), - static_cast(species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)] + - npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); + auto range_permute = std::make_pair( + static_cast( + species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)]), + static_cast( + species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)] + + npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); // Tuple that contains the start and end indices for allocation_vector pointing to a given tag type = dir2tag(dir) - auto range_allocate = std::make_pair(static_cast(count_recv), - static_cast(count_recv + - npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)])); + auto range_allocate = std::make_pair( + static_cast(count_recv), + static_cast( + count_recv + + npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)])); + // @TODO: check subview index // contains the indices of all particles of a given tag = mpi::PrtlSendTag::dir2tag(direction) - auto indices_to_send = Kokkos::subview(permute_vector, range_permute); + auto indices_to_send = Kokkos::subview(permute_vector, range_permute); // contains the indices of the holes where the received particles will be placed - auto indices_to_allocate = Kokkos::subview(allocation_vector, range_allocate); + auto indices_to_allocate = Kokkos::subview(allocation_vector, + range_allocate); // Main function that sends the particles and receives the arrays - comm::CommunicateParticlesBuffer( species, - indices_to_send, - indices_to_allocate, - send_rank, - recv_rank, - shifts_in_x); + comm::CommunicateParticlesBuffer(species, + indices_to_send, + indices_to_allocate, + send_rank, + recv_rank, + shifts_in_x); count_recv += npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)]; iteration++; } // Compute where the received particles are allocated - //if (mpi_rank == 0){ - //Kokkos::View particles_allocated_per_tag("particles allocated per tag", tag_count); - //Kokkos::parallel_for( + // if (mpi_rank == 0){ + // Kokkos::View particles_allocated_per_tag("particles + // allocated per tag", tag_count); Kokkos::parallel_for( // "ParticlesAllocatedPerTag", // total_recv, // Lambda(const std::size_t i) { @@ -893,79 +865,77 @@ template // auto tag = this_tag(index); // Kokkos::atomic_fetch_add(&particles_allocated_per_tag(tag), 1); // }); - //Kokkos::fence(); - //auto particles_allocated_per_tag_h = Kokkos::create_mirror_view(particles_allocated_per_tag); - //Kokkos::deep_copy(particles_allocated_per_tag_h, particles_allocated_per_tag); - - //std::cout << "Particles allocated per tag (post recv): "; - //for (std::size_t i = 0; i < tag_count; i++){ - // std::cout << "[" << particles_allocated_per_tag_h[i] << "] "; - //} - //std::cout << std::endl; + // Kokkos::fence(); + // auto particles_allocated_per_tag_h = + // Kokkos::create_mirror_view(particles_allocated_per_tag); + // Kokkos::deep_copy(particles_allocated_per_tag_h, + // particles_allocated_per_tag); + + // std::cout << "Particles allocated per tag (post recv): "; + // for (std::size_t i = 0; i < tag_count; i++){ + // std::cout << "[" << particles_allocated_per_tag_h[i] << "] "; // } - // If receive count is less than send count then make the tags of sent dead - if (total_recv <= total_holes){ - if (total_recv <= total_dead){ - // Case: all sent particles' tags are set to dead - /* (received) - [ | <------------------> | <-------->] - (dead) (alive) (sent) - || - (to be made dead) - ^ - (offset) - */ - - auto offset = total_alive + total_dead; + // std::cout << std::endl; + // } + // If receive count is less than send count then make the tags of sent dead + if (total_recv <= total_holes) { + if (total_recv <= total_dead) { + // Case: all sent particles' tags are set to dead + /* (received) + [ | <------------------> | <-------->] + (dead) (alive) (sent) + || + (to be made dead) + ^ + (offset) + */ + + auto offset = total_alive + total_dead; Kokkos::parallel_for( - "CommunicateParticles", - total_send, - Lambda(index_t p) { - this_tag(permute_vector(offset + p)) = ParticleTag::dead; - }); - } - else{ - // Case: tags of sent particles that are not replaced by recevied particles are made dead - /* (received) (received) - [ | <------------------> |] - (dead) (alive) (sent) - || - (to be made dead) - ^ - (offset) - */ - auto offset = total_alive + total_recv; + "CommunicateParticles", + total_send, + Lambda(index_t p) { + this_tag(permute_vector(offset + p)) = ParticleTag::dead; + }); + } else { + // Case: tags of sent particles that are not replaced by recevied particles are made dead + /* (received) (received) + [ | <------------------> |] + (dead) (alive) (sent) + || + (to be made dead) + ^ + (offset) + */ + auto offset = total_alive + total_recv; Kokkos::parallel_for( - "CommunicateParticles", - total_send - (total_recv - total_dead), - Lambda(index_t p) { - this_tag(permute_vector(offset + p)) = ParticleTag::dead; - }); + "CommunicateParticles", + total_send - (total_recv - total_dead), + Lambda(index_t p) { + this_tag(permute_vector(offset + p)) = ParticleTag::dead; + }); } } - // Check if the particle tags are only dead or alive species.set_npart(npart + std::max(total_send, total_recv) - total_send); npart_per_tag_arr = species.npart_per_tag(); - //if (mpi_rank == 0) + // if (mpi_rank == 0) //{ - // std::cout << "After COMM: " << std::endl; - // std::cout << "Tag counts: "; - // for (std::size_t i = 0; i < tag_count; i++){ - // std::cout << "[" << npart_per_tag_arr[i] << "] "; - // } - // std::cout << std::endl; - // std::cout << "Holes filled: " << total_holes << " Total recv: " << total_recv << - // "Total send: " << total_send << std::endl; - // std::cout << std::endl << "*************"<< std::endl; - //} - #endif + // std::cout << "After COMM: " << std::endl; + // std::cout << "Tag counts: "; + // for (std::size_t i = 0; i < tag_count; i++){ + // std::cout << "[" << npart_per_tag_arr[i] << "] "; + // } + // std::cout << std::endl; + // std::cout << "Holes filled: " << total_holes << " Total recv: " << total_recv << + // "Total send: " << total_send << std::endl; + // std::cout << std::endl << "*************"<< std::endl; + // } +#endif } } - - template struct Metadomain>; template struct Metadomain>; template struct Metadomain>; From c0d465205889775c30843d7368bd1f5c28a31f25 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 16 Dec 2024 18:29:21 -0500 Subject: [PATCH 12/52] tested prtlsort (WIP) --- benchmark/benchmark.cpp | 186 ++++++++++++------------ extern/Kokkos | 2 +- extern/adios2 | 2 +- extern/plog | 2 +- src/framework/containers/particles.h | 1 - src/framework/domain/comm_mpi.hpp | 18 +-- src/framework/domain/communications.cpp | 17 ++- src/framework/domain/metadomain.cpp | 3 + 8 files changed, 120 insertions(+), 111 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 797c8ed87..593b7f190 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -35,6 +35,7 @@ - Communicate particles to neighbors and time the communication - Compute the time taken for best of N iterations for the communication */ +using namespace ntt; // Set npart and set the particle tags to alive template @@ -94,101 +95,106 @@ void PushParticles(Domain& domain, } auto main(int argc, char* argv[]) -> int { - std::cout << "Constructing the domain" << std::endl; - ntt::GlobalInitialize(argc, argv); - // Create a Metadomain object - const unsigned int ndomains = 1; - const std::vector global_decomposition = { - { -1, -1, -1 } - }; - const std::vector global_ncells = { 32, 32, 32 }; - const boundaries_t global_extent = { - { 0.0, 3.0 }, - { 0.0, 3.0 }, - { 0.0, 3.0 } - }; - const boundaries_t global_flds_bc = { - { FldsBC::PERIODIC, FldsBC::PERIODIC }, - { FldsBC::PERIODIC, FldsBC::PERIODIC }, - { FldsBC::PERIODIC, FldsBC::PERIODIC } - }; - const boundaries_t global_prtl_bc = { - { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, - { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, - { PrtlBC::PERIODIC, PrtlBC::PERIODIC } - }; - const std::map metric_params = {}; - const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; - const double npart_to_send_frac = 0.01; - const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); - auto species = ntt::ParticlesSpecies(1u, - "test_e", - 1.0f, - 1.0f, - maxnpart, - ntt::PrtlPusher::BORIS, - false, - ntt::Cooling::NONE); - auto metadomain = Metadomain>( - ndomains, - global_decomposition, - global_ncells, - global_extent, - global_flds_bc, - global_prtl_bc, - metric_params, - { species }); + GlobalInitialize(argc, argv); + { + std::cout << "Constructing the domain" << std::endl; + // Create a Metadomain object + const unsigned int ndomains = 2; + const std::vector global_decomposition = { + {-1, -1, -1} + }; + const std::vector global_ncells = { 32, 32, 32 }; + const boundaries_t global_extent = { + {0.0, 3.0}, + {0.0, 3.0}, + {0.0, 3.0} + }; + const boundaries_t global_flds_bc = { + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC} + }; + const boundaries_t global_prtl_bc = { + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC} + }; + const std::map metric_params = {}; + const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; + const double npart_to_send_frac = 0.01; + const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); + auto species = ntt::ParticleSpecies(1u, + "test_e", + 1.0f, + 1.0f, + maxnpart, + ntt::PrtlPusher::BORIS, + false, + ntt::Cooling::NONE); + auto metadomain = Metadomain>( + ndomains, + global_decomposition, + global_ncells, + global_extent, + global_flds_bc, + global_prtl_bc, + metric_params, + { species }); - const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; - auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); - auto timers = timer::Timers { { "Communication" }, nullptr, false }; - InitializeParticleArrays(*local_domain, npart); - // Timers for both the communication routines - auto total_time_elapsed_old = 0; - auto total_time_elapsed_new = 0; + const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + auto timers = timer::Timers { { "Communication" }, nullptr, false }; + InitializeParticleArrays(*local_domain, npart); + // Timers for both the communication routines + auto total_time_elapsed_old = 0; + auto total_time_elapsed_new = 0; - int seed_ind = 0; - int seed_tag = 1; - Kokkos::fence(); + int seed_ind = 0; + int seed_tag = 1; + Kokkos::fence(); - for (int i = 0; i < 10; ++i) { - { - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort new - Kokkos::fence(); - auto start_new = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticlesBuffer(*local_domain, &timers); - auto stop_new = std::chrono::high_resolution_clock::now(); - auto duration_new = std::chrono::duration_cast( - stop_new - start_new) - .count(); - total_time_elapsed_new += duration_new; - Kokkos::fence(); - } - { - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort old - Kokkos::fence(); - auto start_old = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticles(*local_domain, &timers); - auto stop_old = std::chrono::high_resolution_clock::now(); - auto duration_old = std::chrono::duration_cast( - stop_old - start_old) - .count(); - total_time_elapsed_old += duration_old; - Kokkos::fence(); + for (int i = 0; i < 10; ++i) { + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort new + Kokkos::fence(); + auto start_new = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticlesBuffer(*local_domain, &timers); + auto stop_new = std::chrono::high_resolution_clock::now(); + auto duration_new = std::chrono::duration_cast( + stop_new - start_new) + .count(); + total_time_elapsed_new += duration_new; + Kokkos::fence(); + } + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort old + Kokkos::fence(); + auto start_old = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticles(*local_domain, &timers); + auto stop_old = std::chrono::high_resolution_clock::now(); + auto duration_old = std::chrono::duration_cast( + stop_old - start_old) + .count(); + total_time_elapsed_old += duration_old; + Kokkos::fence(); + } } + printf("Total time elapsed for old: %f us : %f us/prtl\n", + total_time_elapsed_old / 10.0, + total_time_elapsed_old / 10.0 * 1000 / npart); + printf("Total time elapsed for new: %f us : %f us/prtl\n", + total_time_elapsed_new / 10.0, + total_time_elapsed_new / 10.0 * 1000 / npart); } - std::cout << "Total time elapsed for old: " << total_time_elapsed_old - << " microseconds" << std::endl; - std::cout << "Total time elapsed for new: " << total_time_elapsed_new - << " microseconds" << std::endl; + GlobalFinalize(); return 0; } diff --git a/extern/Kokkos b/extern/Kokkos index b6a16bc9d..eb11070f6 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit b6a16bc9d88a9252d76e64fd2be20c58eb5d7f2e +Subproject commit eb11070f67565b2e660659f5207f0363bdf3b882 diff --git a/extern/adios2 b/extern/adios2 index 25ccd6aaa..b8761e2af 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit 25ccd6aaa810bbc217b43421f9c43140082c65b9 +Subproject commit b8761e2afab2cd05b89d09b2ee4da1cd7a834225 diff --git a/extern/plog b/extern/plog index 96637a6e5..85a871b13 160000 --- a/extern/plog +++ b/extern/plog @@ -1 +1 @@ -Subproject commit 96637a6e5e53f54e4e56d667d312c564d979ec0e +Subproject commit 85a871b13be0bd1a9e0110744fa60cc9bd1e8380 diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 7496db78c..86443c98f 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -193,7 +193,6 @@ namespace ntt { * @brief Count the number of particles with a specific tag. * @return The vector of counts for each tag. */ - [[nodiscard]] auto npart_per_tag() const -> std::vector; /* setters -------------------------------------------------------------- */ diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 9b2ad0a33..d7d19c983 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -296,18 +296,18 @@ namespace comm { int recv_rank, const range_tuple_t& send_slice, const range_tuple_t& recv_slice) { - auto array_h = Kokkos::create_mirror_view(arr); - Kokkos::deep_copy(array_h, arr); + // auto array_h = Kokkos::create_mirror_view(arr); + // Kokkos::deep_copy(array, arr); const std::size_t send_count = send_slice.second - send_slice.first; const std::size_t recv_count = recv_slice.second - recv_slice.first; if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { - MPI_Sendrecv(array_h.data() + send_slice.first, + MPI_Sendrecv(arr.data() + send_slice.first, send_count, mpi::get_type(), send_rank, 0, - array_h.data() + recv_slice.first, + arr.data() + recv_slice.first, recv_count, mpi::get_type(), recv_rank, @@ -315,14 +315,14 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { - MPI_Send(array_h.data() + send_slice.first, + MPI_Send(arr.data() + send_slice.first, send_count, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { - MPI_Recv(array_h.data() + recv_slice.first, + MPI_Recv(arr.data() + recv_slice.first, recv_count, mpi::get_type(), recv_rank, @@ -330,9 +330,9 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } - if ((recv_rank >= 0) and (recv_count > 0)) { - Kokkos::deep_copy(arr, array_h); - } + // if ((recv_rank >= 0) and (recv_count > 0)) { + // Kokkos::deep_copy(arr, array_h); + // } } void ParticleSendRecvCount(int send_rank, diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 5e5da4a0c..ff7edfec6 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -86,8 +86,8 @@ namespace ntt { } else { // no communication necessary return { - { 0, -1 }, - { 0, -1 } + {0, -1}, + {0, -1} }; } #if defined(MPI_ENABLED) @@ -110,8 +110,8 @@ namespace ntt { (void)send_rank; (void)recv_rank; return { - { send_ind, send_rank }, - { recv_ind, recv_rank } + {send_ind, send_rank}, + {recv_ind, recv_rank} }; } @@ -129,8 +129,8 @@ namespace ntt { const auto is_receiving = (recv_rank >= 0); if (not(is_sending or is_receiving)) { return { - { { 0, -1 }, {} }, - { { 0, -1 }, {} } + {{ 0, -1 }, {}}, + {{ 0, -1 }, {}} }; } auto send_slice = std::vector {}; @@ -196,8 +196,8 @@ namespace ntt { } return { - { { send_ind, send_rank }, send_slice }, - { { recv_ind, recv_rank }, recv_slice }, + {{ send_ind, send_rank }, send_slice}, + {{ recv_ind, recv_rank }, recv_slice}, }; } @@ -746,6 +746,7 @@ namespace ntt { // to the pth hole in the array, or after npart() if p > sent+dead count. Kokkos::View allocation_vector("allocation_vector", total_recv); + // @CRITICAL: this may overwrite unsent data Kokkos::parallel_for( "AllocationVector", total_recv, diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index 5e66bc366..ec8561a9a 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -46,6 +46,9 @@ namespace ntt { #if defined(MPI_ENABLED) MPI_Comm_size(MPI_COMM_WORLD, &g_mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &g_mpi_rank); + raise::ErrorIf(global_ndomains != g_mpi_size, + "Exactly 1 domain per MPI rank is allowed", + HERE); #endif initialValidityCheck(); From 646a208a5c122ef168a12ac169fab2acd2e5e454 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 17 Dec 2024 20:04:54 -0500 Subject: [PATCH 13/52] removed tag_offset array from the particle class. The npart_per_tag() method now returns a pair of npart_per_tag and tag_offset arrays --- src/framework/containers/particles.cpp | 16 ++++++---------- src/framework/containers/particles.h | 4 ---- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index c97f8da2d..52efdaf6d 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -47,9 +47,6 @@ namespace ntt { tag = array_t { label + "_tag", maxnpart }; tag_h = Kokkos::create_mirror_view(tag); - tag_offset = array_t { label + "_tag_offset", ntags() }; - tag_offset_h = Kokkos::create_mirror_view(tag_offset); - for (unsigned short n { 0 }; n < npld; ++n) { pld.push_back(array_t("pld", maxnpart)); pld_h.push_back(Kokkos::create_mirror_view(pld[n])); @@ -101,17 +98,16 @@ namespace ntt { auto npart_tag_host = Kokkos::create_mirror_view(npart_tag); Kokkos::deep_copy(npart_tag_host, npart_tag); - std::vector npart_tag_vec; + std::vector npart_tag_vec(ntags()); + std::vector tag_offset(ntags()); for (std::size_t t { 0 }; t < ntags(); ++t) { - npart_tag_vec.push_back(npart_tag_host(t)); - tag_offset_h(t) = (t > 0) ? npart_tag_vec[t - 1] : 0; + npart_tag_vec[t] = npart_tag_host(t); + tag_offset[t] = (t > 0) ? npart_tag_vec[t - 1] : 0; } for (std::size_t t { 0 }; t < ntags(); ++t) { - tag_offset_h(t) += (t > 0) ? tag_offset_h(t - 1) : 0; + tag_offset[t] += (t > 0) ? tag_offset[t - 1] : 0; } - // Copy to device - Kokkos::deep_copy(tag_offset, tag_offset_h); - return npart_tag_vec; + return std::make_pair(npart_tag_vec, tag_offset); } template diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 86443c98f..e4d78cd0d 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -60,8 +60,6 @@ namespace ntt { array_t dx1_prev, dx2_prev, dx3_prev; // Array to tag the particles array_t tag; - // Array to store the cumulative number of particles per tag - array_t tag_offset; // Array to store the particle load std::vector> pld; // phi coordinate (for axisymmetry) @@ -74,7 +72,6 @@ namespace ntt { array_mirror_t weight_h; array_mirror_t phi_h; array_mirror_t tag_h; - array_mirror_t tag_offset_h; std::vector> pld_h; // for empty allocation @@ -181,7 +178,6 @@ namespace ntt { footprint += sizeof(prtldx_t) * dx2_prev.extent(0); footprint += sizeof(prtldx_t) * dx3_prev.extent(0); footprint += sizeof(short) * tag.extent(0); - footprint += sizeof(int) * tag_offset.extent(0); for (auto& p : pld) { footprint += sizeof(real_t) * p.extent(0); } From 708115c4408d276e1e67f8afd291f793f7a6a831 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 17 Dec 2024 20:47:52 -0500 Subject: [PATCH 14/52] changed functions that called npart_per_tag() --- src/framework/containers/particles.cpp | 18 ++++++++------ src/framework/containers/particles.h | 2 +- src/framework/domain/communications.cpp | 31 ++++++++++++++----------- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index 52efdaf6d..fe2346132 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -78,7 +78,8 @@ namespace ntt { } template - auto Particles::npart_per_tag() const -> std::vector { + auto Particles::npart_per_tag() const -> std::pair, + array_t>{ auto this_tag = tag; array_t npart_tag("npart_tags", ntags()); @@ -97,23 +98,25 @@ namespace ntt { auto npart_tag_host = Kokkos::create_mirror_view(npart_tag); Kokkos::deep_copy(npart_tag_host, npart_tag); + array_t tag_offset("tag_offset", ntags()); + auto tag_offset_host = Kokkos::create_mirror_view(tag_offset); std::vector npart_tag_vec(ntags()); - std::vector tag_offset(ntags()); for (std::size_t t { 0 }; t < ntags(); ++t) { - npart_tag_vec[t] = npart_tag_host(t); - tag_offset[t] = (t > 0) ? npart_tag_vec[t - 1] : 0; + npart_tag_vec[t] = npart_tag_host(t); + tag_offset_host(t) = (t > 0) ? npart_tag_vec[t - 1] : 0; } for (std::size_t t { 0 }; t < ntags(); ++t) { - tag_offset[t] += (t > 0) ? tag_offset[t - 1] : 0; + tag_offset_host(t) += (t > 0) ? tag_offset_host(t - 1) : 0; } + Kokkos::deep_copy(tag_offset, tag_offset_host); return std::make_pair(npart_tag_vec, tag_offset); } template auto Particles::SortByTags() -> std::vector { if (npart() == 0 || is_sorted()) { - return npart_per_tag(); + return npart_per_tag().first; } using KeyType = array_t; using BinOp = sort::BinTag; @@ -156,7 +159,8 @@ namespace ntt { Sorter.sort(Kokkos::subview(phi, slice)); } - const auto np_per_tag = npart_per_tag(); + auto np_per_tag_tag_offset = npart_per_tag(); + const auto np_per_tag = np_per_tag_tag_offset.first; set_npart(np_per_tag[(short)(ParticleTag::alive)]); m_is_sorted = true; diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index e4d78cd0d..ea692bdd9 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -189,7 +189,7 @@ namespace ntt { * @brief Count the number of particles with a specific tag. * @return The vector of counts for each tag. */ - auto npart_per_tag() const -> std::vector; + auto npart_per_tag() const -> std::pair, array_t>; /* setters -------------------------------------------------------------- */ /** diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index ff7edfec6..36f7a1858 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -656,16 +656,18 @@ namespace ntt { HERE); logger::Checkpoint("Communicating particles\n", HERE); for (auto& species : domain.species) { - auto npart_per_tag_arr = species.npart_per_tag(); - auto npart = static_cast(species.npart()); - auto total_alive = static_cast( - npart_per_tag_arr[ParticleTag::alive]); - auto total_dead = static_cast( - npart_per_tag_arr[ParticleTag::dead]); - auto total_holes = static_cast(npart - total_alive); - auto total_send = static_cast(npart - total_alive - total_dead); - auto total_recv = static_cast(0); - auto tag_count = static_cast(npart_per_tag_arr.size()); + // TO DO: npart per tag must return npart_per_tag_arr and the cumsum array + auto [npart_per_tag_arr, + tag_offset] = species.npart_per_tag(); + auto npart = static_cast(species.npart()); + auto total_alive = static_cast( + npart_per_tag_arr[ParticleTag::alive]); + auto total_dead = static_cast( + npart_per_tag_arr[ParticleTag::dead]); + auto total_holes = static_cast(npart - total_alive); + auto total_send = static_cast(npart - total_alive - total_dead); + auto total_recv = static_cast(0); + auto tag_count = static_cast(npart_per_tag_arr.size()); std::vector send_ranks, send_inds; std::vector recv_ranks, recv_inds; @@ -715,7 +717,6 @@ namespace ntt { (dead) (alive) (tag1) ... */ auto& this_tag = species.tag; - auto& this_tag_offset = species.tag_offset; Kokkos::View permute_vector("permute_vector", species.npart()); Kokkos::View current_offset("current_offset", species.ntags()); // @TODO: do not save tag = 1 particles into permute_vector @@ -726,7 +727,7 @@ namespace ntt { species.npart(), Lambda(index_t p) { const auto current_tag = this_tag(p); - const auto idx_permute_vec = this_tag_offset(current_tag) + + const auto idx_permute_vec = tag_offset(current_tag) + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -826,11 +827,13 @@ namespace ntt { } // Tuple that contains the start and end indices of permtute_vec pointing to a given tag type = dir2tag(dir) + auto tag_offset_h = Kokkos::create_mirror_view(tag_offset); + Kokkos::deep_copy(tag_offset_h, tag_offset); auto range_permute = std::make_pair( static_cast( - species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)]), + tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)]), static_cast( - species.tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)] + + tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)] + npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); // Tuple that contains the start and end indices for allocation_vector pointing to a given tag type = dir2tag(dir) auto range_allocate = std::make_pair( From d6a325b3251e414331f89d85eba0ad061fb07bfe Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Thu, 19 Dec 2024 15:14:43 -0500 Subject: [PATCH 15/52] changed comms to dispatch arrays of same type in one buffer --- src/framework/domain/comm_mpi.hpp | 619 +++++++++++++++--------- src/framework/domain/communications.cpp | 433 ++++++++--------- 2 files changed, 593 insertions(+), 459 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index d7d19c983..66ea17d23 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -14,6 +14,7 @@ #include "enums.h" #include "global.h" +#include "arch/directions.h" #include "arch/kokkos_aliases.h" #include "arch/mpi_aliases.h" #include "utils/error.h" @@ -296,8 +297,8 @@ namespace comm { int recv_rank, const range_tuple_t& send_slice, const range_tuple_t& recv_slice) { - // auto array_h = Kokkos::create_mirror_view(arr); - // Kokkos::deep_copy(array, arr); + //auto arr_h = Kokkos::create_mirror_view(arr); + //Kokkos::deep_copy(arr_h, arr); const std::size_t send_count = send_slice.second - send_slice.first; const std::size_t recv_count = recv_slice.second - recv_slice.first; if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and @@ -331,7 +332,7 @@ namespace comm { MPI_STATUS_IGNORE); } // if ((recv_rank >= 0) and (recv_count > 0)) { - // Kokkos::deep_copy(arr, array_h); + // Kokkos::deep_copy(arr, arr_h); // } } @@ -458,256 +459,398 @@ namespace comm { return recv_count; } - template - void CommunicateParticleQuantityBuffer(array_t& arr, - int send_rank, - int recv_rank, - const range_tuple_t& send_slice, - const range_tuple_t& recv_slice, - Kokkos::View indices_to_send, - Kokkos::View indices_to_allocate) { - - array_t buffer("buffer", - indices_to_send.extent(0) + indices_to_allocate.extent(0)); - // Populate the buffer for particle array - Kokkos::parallel_for( - "PopulateBuffer", - indices_to_send.extent(0), - Lambda(const size_t i) { buffer(i) = arr(indices_to_send(i)); }); - CommunicateParticleQuantity(buffer, send_rank, recv_rank, send_slice, recv_slice); - // Populate from buffer to the particle array - Kokkos::parallel_for( - "PopulateFromBuffer", - indices_to_allocate.extent(0), - Lambda(const size_t i) { - arr(indices_to_allocate(i)) = buffer(indices_to_send.extent(0) + i); - }); - return; - } template - void CommunicateParticlesBuffer(Particles& species, - Kokkos::View indices_to_send, - Kokkos::View indices_to_allocate, - int send_rank, - int recv_rank, - std::vector shifts_in_x) { - if ((send_rank < 0) && (recv_rank < 0)) { - raise::Error("No send or recv in SendRecvParticlesBuffered", HERE); - } - // First set the tags of the sent particles to be dead - auto& this_tag = species.tag; - // Kokkos::parallel_for( - //"SetTagDead", - // Kokkos::RangePolicy(0, indices_to_allocate.size()), - // KOKKOS_LAMBDA(const size_t i) { - // const auto idx = indices_to_send(i); - // this_tag(idx) = static_cast(ParticleTag::dead); - // }); - - // Construct send and receive slice for the buffer - auto send_slice = range_tuple_t({ 0, indices_to_send.size() }); - auto recv_slice = range_tuple_t( - { indices_to_send.size(), - indices_to_send.size() + indices_to_allocate.size() }); - // Send and receive the particles - CommunicateParticleQuantityBuffer(species.i1, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx1, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.i1_prev, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx1_prev, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - if constexpr (D == Dim::_2D || D == Dim::_3D) { - CommunicateParticleQuantityBuffer(species.i2, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx2, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.i2_prev, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx2_prev, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); + void CommunicateParticlesBuffer(Particles& species, + Kokkos::View permute_vector, + Kokkos::View allocation_vector, + Kokkos::View tag_offset, + std::vector npart_per_tag_arr, + std::vector npart_per_tag_arr_recv, + std::vector send_ranks, + std::vector recv_ranks) { + // Pointers to the particle data arrays + auto &this_ux1 = species.ux1; + auto &this_ux2 = species.ux2; + auto &this_ux3 = species.ux3; + auto &this_weight = species.weight; + auto &this_phi = species.phi; + auto &this_i1 = species.i1; + auto &this_i1_prev = species.i1_prev; + auto &this_i2 = species.i2; + auto &this_i3 = species.i3; + auto &this_i2_prev = species.i2_prev; + auto &this_i3_prev = species.i3_prev; + auto &this_dx1 = species.dx1; + auto &this_dx1_prev = species.dx1_prev; + auto &this_dx2 = species.dx2; + auto &this_dx3 = species.dx3; + auto &this_dx2_prev = species.dx2_prev; + auto &this_dx3_prev = species.dx3_prev; + auto &this_tag = species.tag; + + // Number of arrays of each type to send/recv + auto NREALS = 4; + auto NINTS = 2; + auto NFLOATS = 2; + if constexpr (D == Dim::_2D) { + this_i2 = species.i2; + this_i2_prev = species.i2_prev; + this_dx2 = species.dx2; + this_dx2_prev = species.dx2_prev; + if (C != Coord::Cart) { + NREALS = 5; + NINTS = 4; + NFLOATS = 4; + this_phi = species.phi; + } else { + NREALS = 4; + NINTS = 4; + NFLOATS = 4; + } } if constexpr (D == Dim::_3D) { - CommunicateParticleQuantityBuffer(species.i3, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx3, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.i3_prev, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.dx3_prev, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); + this_i2 = species.i2; + this_i2_prev = species.i2_prev; + this_dx2 = species.dx2; + this_dx2_prev = species.dx2_prev; + this_i3 = species.i3; + this_i3_prev = species.i3_prev; + this_dx3 = species.dx3; + this_dx3_prev = species.dx3_prev; + NREALS = 4; + NINTS = 6; + NFLOATS = 6; } - CommunicateParticleQuantityBuffer(species.ux1, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.ux2, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.ux3, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - CommunicateParticleQuantityBuffer(species.weight, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - if constexpr (D == Dim::_2D and C != Coord::Cart) { - CommunicateParticleQuantityBuffer(species.phi, - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); - } - for (auto p { 0 }; p < species.npld(); ++p) { - CommunicateParticleQuantityBuffer(species.pld[p], - send_rank, - recv_rank, - send_slice, - recv_slice, - indices_to_send, - indices_to_allocate); + + // Now make buffers to store recevied data (don't need global send buffers) + const auto total_send = permute_vector.extent(0) - npart_per_tag_arr[ParticleTag::dead]; + const auto total_recv = allocation_vector.extent(0); + const auto n_alive = npart_per_tag_arr[ParticleTag::alive]; + const auto n_dead = npart_per_tag_arr[ParticleTag::dead]; + + /* + Brief on recv buffers: Each recv buffer contains all the received arrays of + a given type. The different physical quantities are stored next to each other + to avoid cache misses. The array is structured as follows: + E.g., + recv_buffer_int: | qty1 | qty2 | ... | qtyNINTS | qty1 | qty2 | ... | qtyNINTS | ... + <-------particle to recv1------> <-------particle to recv2--------> + <----------------------------------total_recv----------------------------> + */ + Kokkos::View recv_buffer_int("recv_buffer_int", total_recv * NINTS); + Kokkos::View recv_buffer_real("recv_buffer_real", total_recv * NREALS); + Kokkos::View recv_buffer_prtldx("recv_buffer_prtldx",total_recv * NFLOATS); + auto recv_buffer_int_h = Kokkos::create_mirror_view(recv_buffer_int); + auto recv_buffer_real_h = Kokkos::create_mirror_view(recv_buffer_real); + auto recv_buffer_prtldx_h = Kokkos::create_mirror_view(recv_buffer_prtldx); + + + auto iteration = 0; + auto current_received = 0; + for (auto& direction : dir::Directions::all) { + const auto send_rank = send_ranks[iteration]; + const auto recv_rank = recv_ranks[iteration]; + const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto send_count = npart_per_tag_arr[tag_send]; + const auto recv_count = npart_per_tag_arr_recv[tag_recv]; + if (send_rank < 0 and recv_rank < 0) { + continue; + } + Kokkos::View send_buffer_int("send_buffer_int", send_count * NINTS); + Kokkos::View send_buffer_real("send_buffer_real", send_count * NREALS); + Kokkos::View send_buffer_prtldx("send_buffer_prtldx",send_count * NFLOATS); + auto send_buffer_int_h = Kokkos::create_mirror_view(send_buffer_int); + auto send_buffer_real_h = Kokkos::create_mirror_view(send_buffer_real); + auto send_buffer_prtldx_h = Kokkos::create_mirror_view(send_buffer_prtldx); + + // Need different constexpr parallel fors for different dims + if constexpr(D == Dim::_1D) { + Kokkos::parallel_for( + "PopulateSendBuffer", + send_count, + Lambda(const std::size_t p){ + const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); + send_buffer_int(NINTS * p + 0) = this_i1(idx); + send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); + send_buffer_real(NREALS * p + 0) = this_ux1(idx); + send_buffer_real(NREALS * p + 1) = this_ux2(idx); + send_buffer_real(NREALS * p + 2) = this_ux3(idx); + send_buffer_real(NREALS * p + 3) = this_weight(idx); + send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); + send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); + }); + } + if constexpr(D == Dim::_2D && C == Coord::Cart) { + Kokkos::parallel_for( + "PopulateSendBuffer", + send_count, + Lambda(const std::size_t p){ + const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); + send_buffer_int(NINTS * p + 0) = this_i1(idx); + send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); + send_buffer_int(NINTS * p + 2) = this_i2(idx); + send_buffer_int(NINTS * p + 3) = this_i2_prev(idx); + send_buffer_real(NREALS * p + 0) = this_ux1(idx); + send_buffer_real(NREALS * p + 1) = this_ux2(idx); + send_buffer_real(NREALS * p + 2) = this_ux3(idx); + send_buffer_real(NREALS * p + 3) = this_weight(idx); + send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); + send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); + send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); + send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + }); + } + if constexpr(D == Dim::_2D && C != Coord::Cart) { + Kokkos::parallel_for( + "PopulateSendBuffer", + send_count, + Lambda(const std::size_t p){ + const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); + send_buffer_int(NINTS * p + 0) = this_i1(idx); + send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); + send_buffer_int(NINTS * p + 2) = this_i2(idx); + send_buffer_int(NINTS * p + 3) = this_i2_prev(idx); + send_buffer_real(NREALS * p + 0) = this_ux1(idx); + send_buffer_real(NREALS * p + 1) = this_ux2(idx); + send_buffer_real(NREALS * p + 2) = this_ux3(idx); + send_buffer_real(NREALS * p + 3) = this_weight(idx); + send_buffer_real(NREALS * p + 4) = this_phi(idx); + send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); + send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); + send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); + send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + }); + } + if constexpr(D == Dim::_3D) { + Kokkos::parallel_for( + "PopulateSendBuffer", + send_count, + Lambda(const std::size_t p){ + const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); + send_buffer_int(NINTS * p + 0) = this_i1(idx); + send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); + send_buffer_int(NINTS * p + 2) = this_i2(idx); + send_buffer_int(NINTS * p + 3) = this_i2_prev(idx); + send_buffer_int(NINTS * p + 4) = this_i3(idx); + send_buffer_int(NINTS * p + 5) = this_i3_prev(idx); + send_buffer_real(NREALS * p + 0) = this_ux1(idx); + send_buffer_real(NREALS * p + 1) = this_ux2(idx); + send_buffer_real(NREALS * p + 2) = this_ux3(idx); + send_buffer_real(NREALS * p + 3) = this_weight(idx); + send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); + send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); + send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); + send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + send_buffer_prtldx(NFLOATS * p + 4) = this_dx3(idx); + send_buffer_prtldx(NFLOATS * p + 5) = this_dx3_prev(idx); + }); + } + + auto tag_offset_h = Kokkos::create_mirror_view(tag_offset); + Kokkos::deep_copy(tag_offset_h, tag_offset); + /* + Brief on receive offset: + The receive buffer looks like this + <-----------------------------------> + |NINT|NINT|NINT|NINT|NINT|NINT|NINT|NINT|...xnrecv + <--------><--------><--------><--------> + recv1 recv2 recv3 recv4 + |________| + ^ ^ + offset offset + nrecv + */ + const auto receive_offset_int = current_received * NINTS; + const auto receive_offset_real = current_received * NREALS; + const auto receive_offset_prtldx = current_received * NFLOATS; + // Comms + // Make host arrays for send and recv buffers + Kokkos::deep_copy(send_buffer_int_h, send_buffer_int); + Kokkos::deep_copy(send_buffer_real_h, send_buffer_real); + Kokkos::deep_copy(send_buffer_prtldx_h, send_buffer_prtldx); + + if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and + (recv_count > 0)) { + MPI_Sendrecv(send_buffer_int_h.data(), + send_count * NINTS, + mpi::get_type(), + send_rank, + 0, + recv_buffer_int_h.data() + receive_offset_int, + recv_count*NINTS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buffer_real_h.data(), + send_count * NREALS, + mpi::get_type(), + send_rank, + 0, + recv_buffer_real_h.data() + receive_offset_real, + recv_count*NREALS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buffer_prtldx_h.data(), + send_count * NFLOATS, + mpi::get_type(), + send_rank, + 0, + recv_buffer_prtldx_h.data() + receive_offset_prtldx, + recv_count*NFLOATS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } else if ((send_rank >= 0) and (send_count > 0)) { + MPI_Send(send_buffer_int_h.data(), + send_count * NINTS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + MPI_Send(send_buffer_real_h.data(), + send_count * NREALS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + MPI_Send(send_buffer_prtldx_h.data(), + send_count * NFLOATS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + } else if ((recv_rank >= 0) and (recv_count > 0)) { + MPI_Recv(recv_buffer_int_h.data() + receive_offset_int, + recv_count * NINTS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Recv(recv_buffer_real_h.data() + receive_offset_real, + recv_count * NREALS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Recv(recv_buffer_prtldx_h.data() + receive_offset_prtldx, + recv_count * NFLOATS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); } - // Set the tag for the received particles to be alive and perform the necessary displacements - if constexpr (D == Dim::_1D) { - const auto shift_in_x1 = shifts_in_x[0]; - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; + current_received += recv_count; + iteration++; + } // end over direction loop + Kokkos::deep_copy(recv_buffer_int, recv_buffer_int_h); + Kokkos::deep_copy(recv_buffer_real, recv_buffer_real_h); + Kokkos::deep_copy(recv_buffer_prtldx, recv_buffer_prtldx_h); + if constexpr (D == Dim::_1D) + { Kokkos::parallel_for( - "SetTagAlive", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { - const auto idx = indices_to_allocate(i); - this_tag(idx) = static_cast(ParticleTag::alive); - this_i1(idx) += shift_in_x1; - this_i1_prev(idx) += shift_in_x1; - }); + "PopulateFromRecvBuffer", + total_recv, + Lambda(const std::size_t p){ + auto idx = allocation_vector(p); + this_tag(idx) = ParticleTag::alive; + this_i1(idx) = recv_buffer_int(NINTS * p + 0); + this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); + this_ux1(idx) = recv_buffer_real(NREALS * p + 0); + this_ux2(idx) = recv_buffer_real(NREALS * p + 1); + this_ux3(idx) = recv_buffer_real(NREALS * p + 2); + this_weight(idx) = recv_buffer_real(NREALS * p + 3); + this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); + this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); + }); } - else if constexpr (D == Dim::_2D) { - const auto shift_in_x1 = shifts_in_x[0]; - const auto shift_in_x2 = shifts_in_x[1]; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; + if constexpr (D == Dim::_2D && C == Coord::Cart) + { + Kokkos::parallel_for( + "PopulateFromRecvBuffer", + total_recv, + Lambda(const std::size_t p){ + auto idx = allocation_vector(p); + this_tag(idx) = ParticleTag::alive; + this_i1(idx) = recv_buffer_int(NINTS * p + 0); + this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); + this_i2(idx) = recv_buffer_int(NINTS * p + 2); + this_i2_prev(idx) = recv_buffer_int(NINTS * p + 3); + this_ux1(idx) = recv_buffer_real(NREALS * p + 0); + this_ux2(idx) = recv_buffer_real(NREALS * p + 1); + this_ux3(idx) = recv_buffer_real(NREALS * p + 2); + this_weight(idx) = recv_buffer_real(NREALS * p + 3); + this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); + this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); + this_dx2(idx) = recv_buffer_prtldx(NFLOATS * p + 2); + this_dx2_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 3); + }); + } + + if constexpr (D == Dim::_2D && C == Coord::Cart) + { Kokkos::parallel_for( - "SetTagAlive", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { - const auto idx = indices_to_allocate(i); - this_tag(idx) = static_cast(ParticleTag::alive); - this_i1(idx) += shift_in_x1; - this_i2(idx) += shift_in_x2; - this_i1_prev(idx) += shift_in_x1; - this_i2_prev(idx) += shift_in_x2; - }); + "PopulateFromRecvBuffer", + total_recv, + Lambda(const std::size_t p){ + auto idx = allocation_vector(p); + this_tag(idx) = ParticleTag::alive; + this_i1(idx) = recv_buffer_int(NINTS * p + 0); + this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); + this_i2(idx) = recv_buffer_int(NINTS * p + 2); + this_i2_prev(idx) = recv_buffer_int(NINTS * p + 3); + this_ux1(idx) = recv_buffer_real(NREALS * p + 0); + this_ux2(idx) = recv_buffer_real(NREALS * p + 1); + this_ux3(idx) = recv_buffer_real(NREALS * p + 2); + this_weight(idx) = recv_buffer_real(NREALS * p + 3); + this_phi(idx) = recv_buffer_real(NREALS * p + 4); + this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); + this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); + this_dx2(idx) = recv_buffer_prtldx(NFLOATS * p + 2); + this_dx2_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 3); + }); } - else if constexpr (D == Dim::_3D) { - const auto shift_in_x1 = shifts_in_x[0]; - const auto shift_in_x2 = shifts_in_x[1]; - const auto shift_in_x3 = shifts_in_x[2]; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i3 = species.i3; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; - auto& this_i3_prev = species.i3_prev; + if constexpr (D == Dim::_3D) + { Kokkos::parallel_for( - "SetTagAlive", - Kokkos::RangePolicy(0, indices_to_allocate.size()), - KOKKOS_LAMBDA(const size_t i) { - const auto idx = indices_to_allocate(i); - this_tag(idx) = static_cast(ParticleTag::alive); - this_i1(idx) += shift_in_x1; - this_i2(idx) += shift_in_x2; - this_i3(idx) += shift_in_x3; - this_i1_prev(idx) += shift_in_x1; - this_i2_prev(idx) += shift_in_x2; - this_i3_prev(idx) += shift_in_x3; - }); + "PopulateFromRecvBuffer", + total_recv, + Lambda(const std::size_t p){ + auto idx = allocation_vector(p); + this_tag(idx) = ParticleTag::alive; + this_i1(idx) = recv_buffer_int(NINTS * p + 0); + this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); + this_i2(idx) = recv_buffer_int(NINTS * p + 2); + this_i2_prev(idx) = recv_buffer_int(NINTS * p + 3); + this_i3(idx) = recv_buffer_int(NINTS * p + 4); + this_i3_prev(idx) = recv_buffer_int(NINTS * p + 5); + this_ux1(idx) = recv_buffer_real(NREALS * p + 0); + this_ux2(idx) = recv_buffer_real(NREALS * p + 1); + this_ux3(idx) = recv_buffer_real(NREALS * p + 2); + this_weight(idx) = recv_buffer_real(NREALS * p + 3); + this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); + this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); + this_dx2(idx) = recv_buffer_prtldx(NFLOATS * p + 2); + this_dx2_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 3); + this_dx3(idx) = recv_buffer_prtldx(NFLOATS * p + 4); + this_dx3_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 5); + }); } - Kokkos::fence(); + species.set_npart(species.npart() + std::max(total_send, total_recv) - total_send); return; - } +} } // namespace comm diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 36f7a1858..cdc9e5b5a 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -656,7 +656,15 @@ namespace ntt { HERE); logger::Checkpoint("Communicating particles\n", HERE); for (auto& species : domain.species) { - // TO DO: npart per tag must return npart_per_tag_arr and the cumsum array + /* + Brief on arrays + npart_per_tag_arr (vector): | dead count| alive count | tag=1 count | tag=2 count | ... + <--------------------------size = ntags()--------------------------> + tag_offset (Kokkos::View): | 0 | dead count | dead + alive count | dead + alive + tag=1 count | ... + <--------------------------size = ntags()--------------------------> + npart_per_tag_arr_recv (vector): | 0 | 0 | nrecv1 | nrecv2 | ... + <--------------------------size = ntags()--------------------------> + */ auto [npart_per_tag_arr, tag_offset] = species.npart_per_tag(); auto npart = static_cast(species.npart()); @@ -665,21 +673,24 @@ namespace ntt { auto total_dead = static_cast( npart_per_tag_arr[ParticleTag::dead]); auto total_holes = static_cast(npart - total_alive); - auto total_send = static_cast(npart - total_alive - total_dead); auto total_recv = static_cast(0); - auto tag_count = static_cast(npart_per_tag_arr.size()); std::vector send_ranks, send_inds; std::vector recv_ranks, recv_inds; // at this point particles should already by tagged in the pusher #if defined(MPI_ENABLED) - // Defined for debugging - int mpi_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); - - // array that holds the number of particles to be received per tag - std::vector npart_per_tag_arr_recv(tag_count, 0); + std::vector npart_per_tag_arr_recv(species.ntags(), 0); + Kokkos::View shifts_in_x1("shifts_in_x1", species.ntags()); + Kokkos::View shifts_in_x2("shifts_in_x2", species.ntags()); + Kokkos::View shifts_in_x3("shifts_in_x3", species.ntags()); + auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); + auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); + auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); + + // Get receive counts + displacements for (auto& direction : dir::Directions::all) { + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); const auto [send_params, recv_params] = GetSendRecvParams(this, domain, direction, true); const auto [send_indrank, send_slice] = send_params; @@ -689,253 +700,233 @@ namespace ntt { if (send_rank < 0 and recv_rank < 0) { continue; } - const auto send_dir_tag = mpi::PrtlSendTag::dir2tag(direction); - const auto nsend = npart_per_tag_arr[send_dir_tag]; + const auto nsend = npart_per_tag_arr[tag_send]; std::size_t nrecv = 0; - // Get the receive count + send_ranks.push_back(send_rank); recv_ranks.push_back(recv_rank); send_inds.push_back(send_ind); recv_inds.push_back(recv_ind); comm::ParticleSendRecvCount(send_rank, recv_rank, nsend, nrecv); total_recv += nrecv; - npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)] = nrecv; - } - - raise::FatalIf((npart + total_recv) >= species.maxnpart(), - "Too many particles to receive (cannot fit into maxptl)", - HERE); - // Now we know the number of particles to be sent and received per direction - /* permute vector contains the indices of the tags to send and receive - in the order of the directions - E.g., consider the following tag array - [ 0, 0, 3, 0, 1,...] - Then, permute vector will look something like - [0, 1, 3, ..., 4, ..., ... 2, ... ] - |<--------- >| |<----->| |<----->| .... - tag=0 ct tag=1 ct tag=3 ct - (dead) (alive) (tag1) ... - */ - auto& this_tag = species.tag; - Kokkos::View permute_vector("permute_vector", species.npart()); - Kokkos::View current_offset("current_offset", species.ntags()); - // @TODO: do not save tag = 1 particles into permute_vector - // instead of species.npart(), size will be species.npart() - npart_per_tag[ParticleTag::alive]; - - Kokkos::parallel_for( - "PermuteVector", - species.npart(), - Lambda(index_t p) { - const auto current_tag = this_tag(p); - const auto idx_permute_vec = tag_offset(current_tag) + - Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; - }); - - // Check: add the end of the loop, current_offset should be equal to npart_per_tag - auto current_offset_h = Kokkos::create_mirror_view(current_offset); - Kokkos::deep_copy(current_offset_h, current_offset); - for (std::size_t i { 0 }; i < current_offset_h.size(); ++i) { - raise::FatalIf(current_offset_h(i) != npart_per_tag_arr[i], - "Error in permute vector construction", - HERE); - } - - // allocation_vector(p) assigns the pth received particle - // to the pth hole in the array, or after npart() if p > sent+dead count. - Kokkos::View allocation_vector("allocation_vector", total_recv); - - // @CRITICAL: this may overwrite unsent data - Kokkos::parallel_for( - "AllocationVector", - total_recv, - Lambda(index_t p) { - // Case: received particle count less than dead particle count -> replace dead particles - if (p < total_dead) { - allocation_vector(p) = permute_vector(p); - } - // Case: received particle count > dead particle count but < sent particle count -> replace - // sent particles - else if (p < total_holes && p >= total_dead) { - allocation_vector(p) = permute_vector(total_alive + p); - } - // Case: received particle count exceeds sent + dead particles -> append at the end - else { - allocation_vector(p) = static_cast(npart + (p - total_holes)); - } - }); - Kokkos::fence(); + npart_per_tag_arr_recv[tag_recv] = nrecv; - std::size_t count_recv = 0; - std::size_t iteration = 0; - // Main loop over all direction where we send the data - for (auto& direction : dir::Directions::all) { - // When nowhere to send and receive - auto send_rank = send_ranks[iteration]; - auto recv_rank = recv_ranks[iteration]; - - if (send_rank < 0 and recv_rank < 0) { - continue; - } - // Get the coordinate shifts in xi - std::vector shifts_in_x; - auto recv_ind = recv_inds[iteration]; + // @CRITICAL: Ask Hayk if the displacements are correctly set before sending if constexpr (D == Dim::_1D) { - int shift_in_x1 { 0 }; if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x1); } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = domain.mesh.n_active(in::x1); } - shifts_in_x.push_back(shift_in_x1); } else if constexpr (D == Dim::_2D) { - int shift_in_x1 { 0 }, shift_in_x2 { 0 }; if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x1); } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active()[0]; + shifts_in_x1_h(tag_send) = domain.mesh.n_active()[0]; } if ((-direction)[1] == -1) { - shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); + shifts_in_x2_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x2); } else if ((-direction)[1] == 1) { - shift_in_x2 = domain.mesh.n_active(in::x2); + shifts_in_x2_h(tag_send) = domain.mesh.n_active(in::x2); } - shifts_in_x.push_back(shift_in_x1); - shifts_in_x.push_back(shift_in_x2); } else if constexpr (D == Dim::_3D) { - int shift_in_x1 { 0 }, shift_in_x2 { 0 }, shift_in_x3 { 0 }; if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x1); } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = domain.mesh.n_active(in::x1); } if ((-direction)[1] == -1) { - shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); + shifts_in_x2_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x2); } else if ((-direction)[1] == 1) { - shift_in_x2 = domain.mesh.n_active(in::x2); + shifts_in_x2_h(tag_send) = domain.mesh.n_active(in::x2); } if ((-direction)[2] == -1) { - shift_in_x3 = -subdomain(recv_ind).mesh.n_active(in::x3); + shifts_in_x3_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x3); } else if ((-direction)[2] == 1) { - shift_in_x3 = domain.mesh.n_active(in::x3); + shifts_in_x3_h(tag_send) = domain.mesh.n_active(in::x3); } - shifts_in_x.push_back(shift_in_x1); - shifts_in_x.push_back(shift_in_x2); - shifts_in_x.push_back(shift_in_x3); } + } // end directions loop + Kokkos::deep_copy(shifts_in_x1, shifts_in_x1_h); + Kokkos::deep_copy(shifts_in_x2, shifts_in_x2_h); + Kokkos::deep_copy(shifts_in_x3, shifts_in_x3_h); - // Tuple that contains the start and end indices of permtute_vec pointing to a given tag type = dir2tag(dir) - auto tag_offset_h = Kokkos::create_mirror_view(tag_offset); - Kokkos::deep_copy(tag_offset_h, tag_offset); - auto range_permute = std::make_pair( - static_cast( - tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)]), - static_cast( - tag_offset_h[mpi::PrtlSendTag::dir2tag(direction)] + - npart_per_tag_arr[mpi::PrtlSendTag::dir2tag(direction)])); - // Tuple that contains the start and end indices for allocation_vector pointing to a given tag type = dir2tag(dir) - auto range_allocate = std::make_pair( - static_cast(count_recv), - static_cast( - count_recv + - npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)])); - // @TODO: check subview index - // contains the indices of all particles of a given tag = mpi::PrtlSendTag::dir2tag(direction) - auto indices_to_send = Kokkos::subview(permute_vector, range_permute); - // contains the indices of the holes where the received particles will be placed - auto indices_to_allocate = Kokkos::subview(allocation_vector, - range_allocate); - - // Main function that sends the particles and receives the arrays - comm::CommunicateParticlesBuffer(species, - indices_to_send, - indices_to_allocate, - send_rank, - recv_rank, - shifts_in_x); - count_recv += npart_per_tag_arr_recv[mpi::PrtlSendTag::dir2tag(-direction)]; - iteration++; + raise::FatalIf((npart + total_recv) >= species.maxnpart(), + "Too many particles to receive (cannot fit into maxptl)", + HERE); + + auto& this_tag = species.tag; + auto& this_i1 = species.i1; + auto& this_i1_prev = species.i1_prev; + auto& this_i2 = species.i2; + auto& this_i2_prev = species.i2_prev; + auto& this_i3 = species.i3; + auto& this_i3_prev = species.i3_prev; + + /* + Brief on permute vector: It contains the sorted indices of tag != alive particles + E.g., consider the following tag array + species.tag = [ 0, 0, 3, 0, 1, 2,...] + Then, permute vector will look something like + permute_vector = [0, 1, 3, ..., 4, ..., ... 5, ... ] + |<--------- >| |<----->| |<----->| .... + tag=0 ct tag=1 ct tag=2 ct + */ + Kokkos::View permute_vector("permute_vector", total_holes); + Kokkos::View current_offset("current_offset", species.ntags()); + auto &this_tag_offset = tag_offset; + + auto n_alive = npart_per_tag_arr[ParticleTag::alive]; + + if constexpr (D == Dim::_1D){ + Kokkos::parallel_for( + "PermuteVector and Displace", + species.npart(), + Lambda(index_t p) { + const auto current_tag = this_tag(p); + if (current_tag != ParticleTag::alive){ + // dead tags only + if (current_tag == ParticleTag::dead) { + const auto idx_permute_vec = Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + } + // tag = 1->N (excluding dead and alive) + else{ + const auto idx_permute_vec = this_tag_offset(current_tag) - + n_alive + + Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + permute_vector(idx_permute_vec) = p; + this_i1(p) += shifts_in_x1(current_tag); + this_i1_prev(p) += shifts_in_x1(current_tag); + } + } + }); } - // Compute where the received particles are allocated - // if (mpi_rank == 0){ - // Kokkos::View particles_allocated_per_tag("particles - // allocated per tag", tag_count); Kokkos::parallel_for( - // "ParticlesAllocatedPerTag", - // total_recv, - // Lambda(const std::size_t i) { - // auto index = allocation_vector(i); - // auto tag = this_tag(index); - // Kokkos::atomic_fetch_add(&particles_allocated_per_tag(tag), 1); - // }); - // Kokkos::fence(); - // auto particles_allocated_per_tag_h = - // Kokkos::create_mirror_view(particles_allocated_per_tag); - // Kokkos::deep_copy(particles_allocated_per_tag_h, - // particles_allocated_per_tag); - - // std::cout << "Particles allocated per tag (post recv): "; - // for (std::size_t i = 0; i < tag_count; i++){ - // std::cout << "[" << particles_allocated_per_tag_h[i] << "] "; - // } - // std::cout << std::endl; - // } - // If receive count is less than send count then make the tags of sent dead - if (total_recv <= total_holes) { - if (total_recv <= total_dead) { - // Case: all sent particles' tags are set to dead - /* (received) - [ | <------------------> | <-------->] - (dead) (alive) (sent) - || - (to be made dead) - ^ - (offset) - */ - - auto offset = total_alive + total_dead; - Kokkos::parallel_for( - "CommunicateParticles", - total_send, - Lambda(index_t p) { - this_tag(permute_vector(offset + p)) = ParticleTag::dead; - }); - } else { - // Case: tags of sent particles that are not replaced by recevied particles are made dead - /* (received) (received) - [ | <------------------> |] - (dead) (alive) (sent) - || - (to be made dead) - ^ - (offset) - */ - auto offset = total_alive + total_recv; - Kokkos::parallel_for( - "CommunicateParticles", - total_send - (total_recv - total_dead), - Lambda(index_t p) { - this_tag(permute_vector(offset + p)) = ParticleTag::dead; - }); + + if constexpr (D == Dim::_2D){ + Kokkos::parallel_for( + "PermuteVector and Displace", + species.npart(), + Lambda(index_t p) { + const auto current_tag = this_tag(p); + if (current_tag != ParticleTag::alive){ + // dead tags only + if (current_tag == ParticleTag::dead) { + const auto idx_permute_vec = Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + } + // tag = 1->N (excluding dead and alive) + else{ + const auto idx_permute_vec = this_tag_offset(current_tag) - + n_alive + + Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + permute_vector(idx_permute_vec) = p; + this_i1(p) += shifts_in_x1(current_tag); + this_i1_prev(p) += shifts_in_x1(current_tag); + this_i2(p) += shifts_in_x2(current_tag); + this_i2_prev(p) += shifts_in_x2(current_tag); + } + } + }); + } + + if constexpr (D == Dim::_3D){ + Kokkos::parallel_for( + "PermuteVector and Displace", + species.npart(), + Lambda(index_t p) { + const auto current_tag = this_tag(p); + if (current_tag != ParticleTag::alive){ + // dead tags only + if (current_tag == ParticleTag::dead) { + const auto idx_permute_vec = Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + } + // tag = 1->N (excluding dead and alive) + else{ + const auto idx_permute_vec = this_tag_offset(current_tag) - + n_alive + + Kokkos::atomic_fetch_add( + ¤t_offset(current_tag), + 1); + permute_vector(idx_permute_vec) = p; + this_i1(p) += shifts_in_x1(current_tag); + this_i1_prev(p) += shifts_in_x1(current_tag); + this_i2(p) += shifts_in_x2(current_tag); + this_i2_prev(p) += shifts_in_x2(current_tag); + this_i3(p) += shifts_in_x3(current_tag); + this_i3_prev(p) += shifts_in_x3(current_tag); + } + } + }); + } + + // Sanity check: npart_per_tag must be equal to the current offset except tag=alive + auto current_offset_h = Kokkos::create_mirror_view(current_offset); + Kokkos::deep_copy(current_offset_h, current_offset); + for (std::size_t i { 0 }; i < species.ntags(); ++i) { + if (i != ParticleTag::alive){ + raise::FatalIf(current_offset_h(i) != npart_per_tag_arr[i], + "Error in permute vector construction", + HERE); + } + else{ + raise::FatalIf(current_offset_h(i) != 0, + "Error in permute vector construction", + HERE); } } - // Check if the particle tags are only dead or alive - species.set_npart(npart + std::max(total_send, total_recv) - total_send); - npart_per_tag_arr = species.npart_per_tag(); - // if (mpi_rank == 0) - //{ - // std::cout << "After COMM: " << std::endl; - // std::cout << "Tag counts: "; - // for (std::size_t i = 0; i < tag_count; i++){ - // std::cout << "[" << npart_per_tag_arr[i] << "] "; - // } - // std::cout << std::endl; - // std::cout << "Holes filled: " << total_holes << " Total recv: " << total_recv << - // "Total send: " << total_send << std::endl; - // std::cout << std::endl << "*************"<< std::endl; - // } + /* + Brief on allocation vector: It contains the indices of holes that are filled + by the particles received from other domains + case 1: total_recv > nholes + allocation_vector = | i1 | i2 | i3 | .... | npart | npart + 1 | ... + <-------total_holes------> <---total_recv - nholes--> + (same as permuute vector) (extra particles appended at end) + case 2: total_recv <= nholes + allocation_vector = | i1 | i2 | i3 | .... + <----total_recv-----> + (same as permuute vector) + */ + Kokkos::View allocation_vector("allocation_vector", total_recv); + if (total_recv > total_holes) + { + // Fill the first bit with the permute vector; these are the holes to be filled + Kokkos::parallel_for( + "AllocationVector", + total_holes, + Lambda(index_t p) { + allocation_vector(p) = permute_vector(p); + }); + + // Now allocate the rest to the end of the array + Kokkos::parallel_for( + "AllocationVector", + total_recv - total_holes, + Lambda(index_t p) { + allocation_vector(total_holes + p) = static_cast(npart + p); + }); + } + else + { Kokkos::parallel_for( + "AllocationVector", + total_recv, + Lambda(index_t p) { + allocation_vector(p) = permute_vector(p); + }); + } + // Communicate the arrays + comm::CommunicateParticlesBuffer(species, permute_vector, allocation_vector, + this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, + send_ranks, recv_ranks); #endif } } From 89a109dcfe61fcb30c478857d40a55791dee6476 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Sun, 29 Dec 2024 11:24:23 -0500 Subject: [PATCH 16/52] test for cuda mpi --- benchmark/benchmark.cpp | 64 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 593b7f190..54fc17cf9 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -13,6 +13,8 @@ #include #include "framework/domain/communications.cpp" +#include "mpi.h" +#include "mpi-ext.h" #define TIMER_START(label) \ Kokkos::fence(); \ @@ -97,6 +99,68 @@ void PushParticles(Domain& domain, auto main(int argc, char* argv[]) -> int { GlobalInitialize(argc, argv); { + /* + MPI checks + */ + printf("Compile time check:\n"); +#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT + printf("This MPI library has CUDA-aware support.\n", MPIX_CUDA_AWARE_SUPPORT); +#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT + printf("This MPI library does not have CUDA-aware support.\n"); +#else + printf("This MPI library cannot determine if there is CUDA-aware support.\n"); +#endif /* MPIX_CUDA_AWARE_SUPPORT */ +printf("Run time check:\n"); +#if defined(MPIX_CUDA_AWARE_SUPPORT) + if (1 == MPIX_Query_cuda_support()) { + printf("This MPI library has CUDA-aware support.\n"); + } else { + printf("This MPI library does not have CUDA-aware support.\n"); + } +#else /* !defined(MPIX_CUDA_AWARE_SUPPORT) */ + printf("This MPI library cannot determine if there is CUDA-aware support.\n"); +#endif /* MPIX_CUDA_AWARE_SUPPORT */ + + /* + Test to send and receive Kokkos arrays + */ + int sender_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &sender_rank); + + int neighbor_rank = 0; + if (sender_rank == 0) { + neighbor_rank = 1; + } + else if (sender_rank == 1) { + neighbor_rank = 0; + } + else { + raise::Error("This test is only for 2 ranks", HERE); + } + Kokkos::View send_array("send_array", 10); + Kokkos::View recv_array("recv_array", 10); + if (sender_rank == 0) { + Kokkos::deep_copy(send_array, 10); + } + else { + Kokkos::deep_copy(send_array, 20); + } + + auto send_array_host = Kokkos::create_mirror_view(send_array); + Kokkos::deep_copy(send_array_host, send_array); + auto host_recv_array = Kokkos::create_mirror_view(recv_array); + + MPI_Sendrecv(send_array.data(), send_array.extent(0), MPI_INT, neighbor_rank, 0, + recv_array.data(), recv_array.extent(0), MPI_INT, neighbor_rank, 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + // Print the received array + Kokkos::deep_copy(host_recv_array, recv_array); + for (int i = 0; i < 10; ++i) { + printf("Rank %d: Received %d\n", sender_rank, host_recv_array(i)); + } + + std::cout << "Constructing the domain" << std::endl; // Create a Metadomain object const unsigned int ndomains = 2; From 43924f5fbbcbc14092e7966ff0c4245ba04e71aa Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Mon, 30 Dec 2024 22:33:47 -0500 Subject: [PATCH 17/52] fixed displacements --- src/engines/srpic.hpp | 1 + src/framework/domain/comm_mpi.hpp | 67 +++++++++++++++++-------- src/framework/domain/communications.cpp | 36 +++++-------- 3 files changed, 60 insertions(+), 44 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 78c8f371e..4772b975a 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -1008,6 +1008,7 @@ namespace ntt { Kokkos::Experimental::contribute(domain.fields.bckp, scatter_bckp); m_metadomain.SynchronizeFields(domain, Comm::Bckp, { 0, 1 }); } + logger::Checkpoint("Atmosphere particles injected\n", HERE); if (dim == in::x1) { if (sign > 0) { diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 66ea17d23..0e4817571 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -297,43 +297,70 @@ namespace comm { int recv_rank, const range_tuple_t& send_slice, const range_tuple_t& recv_slice) { - //auto arr_h = Kokkos::create_mirror_view(arr); - //Kokkos::deep_copy(arr_h, arr); const std::size_t send_count = send_slice.second - send_slice.first; const std::size_t recv_count = recv_slice.second - recv_slice.first; + // Make arrays on the host + auto send_arr_h = Kokkos::create_mirror_view(Kokkos::subview(arr, send_slice)); + Kokkos::deep_copy(send_arr_h, Kokkos::subview(arr, send_slice)); + auto recv_arr_h = Kokkos::create_mirror_view(Kokkos::subview(arr, recv_slice)); if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { - MPI_Sendrecv(arr.data() + send_slice.first, + MPI_Sendrecv(send_arr_h.data(), send_count, mpi::get_type(), send_rank, 0, - arr.data() + recv_slice.first, + recv_arr_h.data(), recv_count, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + //MPI_Sendrecv(arr.data() + send_slice.first, + // send_count, + // mpi::get_type(), + // send_rank, + // 0, + // arr.data() + recv_slice.first, + // recv_count, + // mpi::get_type(), + // recv_rank, + // 0, + // MPI_COMM_WORLD, + // MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { - MPI_Send(arr.data() + send_slice.first, - send_count, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); + MPI_Send( send_arr_h.data(), + send_count, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + //MPI_Send(arr.data() + send_slice.first, + // send_count, + // mpi::get_type(), + // send_rank, + // 0, + // MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { - MPI_Recv(arr.data() + recv_slice.first, - recv_count, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); + MPI_Recv( recv_arr_h.data(), + recv_count, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + //MPI_Recv(arr.data() + recv_slice.first, + // recv_count, + // mpi::get_type(), + // recv_rank, + // 0, + // MPI_COMM_WORLD, + // MPI_STATUS_IGNORE); + } + if ((recv_rank >= 0) and (recv_count > 0)) { + Kokkos::deep_copy(Kokkos::subview(arr, recv_slice), recv_arr_h); } - // if ((recv_rank >= 0) and (recv_count > 0)) { - // Kokkos::deep_copy(arr, arr_h); - // } } void ParticleSendRecvCount(int send_rank, diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index cdc9e5b5a..428db1ea6 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -712,38 +712,26 @@ namespace ntt { npart_per_tag_arr_recv[tag_recv] = nrecv; // @CRITICAL: Ask Hayk if the displacements are correctly set before sending - if constexpr (D == Dim::_1D) { + // direction must be defined + if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { if ((-direction)[0] == -1) { - shifts_in_x1_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = subdomain(recv_ind).mesh.n_active(in::x1); } else if ((-direction)[0] == 1) { - shifts_in_x1_h(tag_send) = domain.mesh.n_active(in::x1); - } - } else if constexpr (D == Dim::_2D) { - if ((-direction)[0] == -1) { - shifts_in_x1_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shifts_in_x1_h(tag_send) = domain.mesh.n_active()[0]; - } - if ((-direction)[1] == -1) { - shifts_in_x2_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x2); - } else if ((-direction)[1] == 1) { - shifts_in_x2_h(tag_send) = domain.mesh.n_active(in::x2); - } - } else if constexpr (D == Dim::_3D) { - if ((-direction)[0] == -1) { - shifts_in_x1_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shifts_in_x1_h(tag_send) = domain.mesh.n_active(in::x1); + shifts_in_x1_h(tag_send) = -domain.mesh.n_active(in::x1); } + } + if constexpr (D == Dim::_2D || D == Dim::_3D) { if ((-direction)[1] == -1) { - shifts_in_x2_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x2); + shifts_in_x2_h(tag_send) = subdomain(recv_ind).mesh.n_active(in::x2); } else if ((-direction)[1] == 1) { - shifts_in_x2_h(tag_send) = domain.mesh.n_active(in::x2); + shifts_in_x2_h(tag_send) = -domain.mesh.n_active(in::x2); } + } + if constexpr (D == Dim::_3D) { if ((-direction)[2] == -1) { - shifts_in_x3_h(tag_send) = -subdomain(recv_ind).mesh.n_active(in::x3); + shifts_in_x3_h(tag_send) = subdomain(recv_ind).mesh.n_active(in::x3); } else if ((-direction)[2] == 1) { - shifts_in_x3_h(tag_send) = domain.mesh.n_active(in::x3); + shifts_in_x3_h(tag_send) = -domain.mesh.n_active(in::x3); } } } // end directions loop From 8571797268f689e572ea290df480fe6c49a73707 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Mon, 30 Dec 2024 22:34:09 -0500 Subject: [PATCH 18/52] changed mpi init call --- src/global/global.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/global/global.cpp b/src/global/global.cpp index ec22fd2f3..434740446 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -9,7 +9,17 @@ void ntt::GlobalInitialize(int argc, char* argv[]) { Kokkos::initialize(argc, argv); #if defined(MPI_ENABLED) - MPI_Init(&argc, &argv); + int required = MPI_THREAD_MULTIPLE; + int provided; + MPI_Init_thread(&argc, + &argv, + required, + &provided); + if (provided != required) { + std::cerr << "MPI_Init_thread() did not provide the requested threading support." << std::endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + //MPI_Init(&argc, &argv); #endif // MPI_ENABLED } From ea99f3bc86cdfeebc871f14eab00d30dca4fa096 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Wed, 1 Jan 2025 13:54:18 -0500 Subject: [PATCH 19/52] fixed displacements --- src/engines/srpic.hpp | 4 ++-- src/entity.cpp | 2 +- src/framework/domain/comm_mpi.hpp | 7 ++++++- src/framework/domain/communications.cpp | 26 +++++++++++++------------ src/framework/domain/output.cpp | 2 +- src/global/utils/progressbar.cpp | 4 ++-- src/kernels/particle_moments.hpp | 1 - 7 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 4772b975a..6e0d9634e 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -102,6 +102,7 @@ namespace ntt { timers.start("FieldBoundaries"); FieldBoundaries(dom, BC::B); timers.stop("FieldBoundaries"); + Kokkos::fence(); } { @@ -127,7 +128,7 @@ namespace ntt { timers.start("Communications"); if ((sort_interval > 0) and (step % sort_interval == 0)) { - m_metadomain.CommunicateParticles(dom, &timers); + m_metadomain.CommunicateParticlesBuffer(dom, &timers); } timers.stop("Communications"); } @@ -1008,7 +1009,6 @@ namespace ntt { Kokkos::Experimental::contribute(domain.fields.bckp, scatter_bckp); m_metadomain.SynchronizeFields(domain, Comm::Bckp, { 0, 1 }); } - logger::Checkpoint("Atmosphere particles injected\n", HERE); if (dim == in::x1) { if (sign > 0) { diff --git a/src/entity.cpp b/src/entity.cpp index 272635d68..79b2f1335 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -114,4 +114,4 @@ auto main(int argc, char* argv[]) -> int { } return 0; -} +} \ No newline at end of file diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 0e4817571..c8e7de3a7 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -607,6 +607,7 @@ namespace comm { send_buffer_real(NREALS * p + 3) = this_weight(idx); send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); + this_tag(idx) = ParticleTag::dead; }); } if constexpr(D == Dim::_2D && C == Coord::Cart) { @@ -627,6 +628,7 @@ namespace comm { send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + this_tag(idx) = ParticleTag::dead; }); } if constexpr(D == Dim::_2D && C != Coord::Cart) { @@ -648,6 +650,7 @@ namespace comm { send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + this_tag(idx) = ParticleTag::dead; }); } if constexpr(D == Dim::_3D) { @@ -672,6 +675,7 @@ namespace comm { send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); send_buffer_prtldx(NFLOATS * p + 4) = this_dx3(idx); send_buffer_prtldx(NFLOATS * p + 5) = this_dx3_prev(idx); + this_tag(idx) = ParticleTag::dead; }); } @@ -825,7 +829,7 @@ namespace comm { }); } - if constexpr (D == Dim::_2D && C == Coord::Cart) + if constexpr (D == Dim::_2D && C != Coord::Cart) { Kokkos::parallel_for( "PopulateFromRecvBuffer", @@ -875,6 +879,7 @@ namespace comm { this_dx3_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 5); }); } + species.set_npart(species.npart() + std::max(total_send, total_recv) - total_send); return; } diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 428db1ea6..80414bd93 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -787,8 +787,8 @@ namespace ntt { ¤t_offset(current_tag), 1); permute_vector(idx_permute_vec) = p; - this_i1(p) += shifts_in_x1(current_tag); - this_i1_prev(p) += shifts_in_x1(current_tag); + this_i1(p) -= shifts_in_x1(current_tag); + this_i1_prev(p) -= shifts_in_x1(current_tag); } } }); @@ -815,10 +815,10 @@ namespace ntt { ¤t_offset(current_tag), 1); permute_vector(idx_permute_vec) = p; - this_i1(p) += shifts_in_x1(current_tag); - this_i1_prev(p) += shifts_in_x1(current_tag); - this_i2(p) += shifts_in_x2(current_tag); - this_i2_prev(p) += shifts_in_x2(current_tag); + this_i1(p) -= shifts_in_x1(current_tag); + this_i1_prev(p) -= shifts_in_x1(current_tag); + this_i2(p) -= shifts_in_x2(current_tag); + this_i2_prev(p) -= shifts_in_x2(current_tag); } } }); @@ -845,17 +845,19 @@ namespace ntt { ¤t_offset(current_tag), 1); permute_vector(idx_permute_vec) = p; - this_i1(p) += shifts_in_x1(current_tag); - this_i1_prev(p) += shifts_in_x1(current_tag); - this_i2(p) += shifts_in_x2(current_tag); - this_i2_prev(p) += shifts_in_x2(current_tag); - this_i3(p) += shifts_in_x3(current_tag); - this_i3_prev(p) += shifts_in_x3(current_tag); + this_i1(p) -= shifts_in_x1(current_tag); + this_i1_prev(p) -= shifts_in_x1(current_tag); + this_i2(p) -= shifts_in_x2(current_tag); + this_i2_prev(p) -= shifts_in_x2(current_tag); + this_i3(p) -= shifts_in_x3(current_tag); + this_i3_prev(p) -= shifts_in_x3(current_tag); } } }); } + + // Sanity check: npart_per_tag must be equal to the current offset except tag=alive auto current_offset_h = Kokkos::create_mirror_view(current_offset); Kokkos::deep_copy(current_offset_h, current_offset); diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index c7cb6bb65..4a6b2c908 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -107,6 +107,7 @@ namespace ntt { } } + template void ComputeMoments(const SimulationParams& params, const Mesh& mesh, @@ -271,7 +272,6 @@ namespace ntt { }); g_writer.writeMesh(dim, xc, xe); } - const auto output_asis = params.template get("output.debug.as_is"); // !TODO: this can probably be optimized to dump things at once for (auto& fld : g_writer.fieldWriters()) { diff --git a/src/global/utils/progressbar.cpp b/src/global/utils/progressbar.cpp index 74f952382..38f65a790 100644 --- a/src/global/utils/progressbar.cpp +++ b/src/global/utils/progressbar.cpp @@ -52,10 +52,10 @@ namespace pbar { } auto to_human_readable(long double t, const std::string& u) -> std::string { - const auto [tt, tu] = normalize_duration_fmt(t, u); + const auto [tt, tu] = std::pair{t, u};//normalize_duration_fmt(t, u); const auto t1 = static_cast(tt); const auto t2 = tt - static_cast(t1); - const auto [tt2, tu2] = normalize_duration_fmt(t2, tu); + const auto [tt2, tu2] = std::pair{t2, tu};//normalize_duration_fmt(t2, tu); return fmt::format("%d%s %d%s", t1, tu.c_str(), static_cast(tt2), tu2.c_str()); } diff --git a/src/kernels/particle_moments.hpp b/src/kernels/particle_moments.hpp index 8b668a036..0621646ad 100644 --- a/src/kernels/particle_moments.hpp +++ b/src/kernels/particle_moments.hpp @@ -223,7 +223,6 @@ namespace kernel { } coeff *= weight(p) * smooth; } - auto buff_access = Buff.access(); if constexpr (D == Dim::_1D) { for (auto di1 { -window }; di1 <= window; ++di1) { From 73377c2cba083a25d3e449b7632402d24d2b8a32 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Sat, 4 Jan 2025 10:33:22 -0500 Subject: [PATCH 20/52] fixed permute vector construction --- src/engines/srpic.hpp | 5 + src/framework/containers/particles.cpp | 3 + src/framework/containers/particles.h | 4 + src/framework/domain/comm_mpi.hpp | 178 +++++++++++++++++++++--- src/framework/domain/communications.cpp | 84 ++++++++--- src/framework/domain/metadomain.cpp | 27 ++++ src/framework/domain/metadomain.h | 1 + 7 files changed, 263 insertions(+), 39 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 6e0d9634e..d751c712a 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -126,6 +126,11 @@ namespace ntt { timers.stop("CurrentFiltering"); } + // Tags are assigned by now + if (step == 0){ + m_metadomain.SetParticleIDs(dom); + } + timers.start("Communications"); if ((sort_interval > 0) and (step % sort_interval == 0)) { m_metadomain.CommunicateParticlesBuffer(dom, &timers); diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index fe2346132..1cb63bf43 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -47,6 +47,9 @@ namespace ntt { tag = array_t { label + "_tag", maxnpart }; tag_h = Kokkos::create_mirror_view(tag); + particleID = array_t {label + "_particleID", maxnpart}; + particleID_h = Kokkos::create_mirror_view(particleID); + for (unsigned short n { 0 }; n < npld; ++n) { pld.push_back(array_t("pld", maxnpart)); pld_h.push_back(Kokkos::create_mirror_view(pld[n])); diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index ea692bdd9..131ff45c0 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -64,6 +64,8 @@ namespace ntt { std::vector> pld; // phi coordinate (for axisymmetry) array_t phi; + // Array to store the particle ids + array_t particleID; // host mirrors array_mirror_t i1_h, i2_h, i3_h; @@ -73,6 +75,7 @@ namespace ntt { array_mirror_t phi_h; array_mirror_t tag_h; std::vector> pld_h; + array_mirror_t particleID_h; // for empty allocation Particles() {} @@ -178,6 +181,7 @@ namespace ntt { footprint += sizeof(prtldx_t) * dx2_prev.extent(0); footprint += sizeof(prtldx_t) * dx3_prev.extent(0); footprint += sizeof(short) * tag.extent(0); + footprint += sizeof(long) * particleID.extent(0); for (auto& p : pld) { footprint += sizeof(real_t) * p.extent(0); } diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index c8e7de3a7..82308e107 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -186,8 +186,6 @@ namespace comm { auto recv_fld_h = Kokkos::create_mirror_view(recv_fld); Kokkos::deep_copy(send_fld_h, send_fld); if (send_rank >= 0 && recv_rank >= 0) { - // Segfault here: print mpi params - // Create host views MPI_Sendrecv(send_fld_h.data(), nsend, mpi::get_type(), @@ -515,16 +513,14 @@ namespace comm { auto &this_dx2_prev = species.dx2_prev; auto &this_dx3_prev = species.dx3_prev; auto &this_tag = species.tag; + auto &this_particleID = species.particleID; // Number of arrays of each type to send/recv auto NREALS = 4; auto NINTS = 2; auto NFLOATS = 2; + auto NLONGS = 2; if constexpr (D == Dim::_2D) { - this_i2 = species.i2; - this_i2_prev = species.i2_prev; - this_dx2 = species.dx2; - this_dx2_prev = species.dx2_prev; if (C != Coord::Cart) { NREALS = 5; NINTS = 4; @@ -537,14 +533,6 @@ namespace comm { } } if constexpr (D == Dim::_3D) { - this_i2 = species.i2; - this_i2_prev = species.i2_prev; - this_dx2 = species.dx2; - this_dx2_prev = species.dx2_prev; - this_i3 = species.i3; - this_i3_prev = species.i3_prev; - this_dx3 = species.dx3; - this_dx3_prev = species.dx3_prev; NREALS = 4; NINTS = 6; NFLOATS = 6; @@ -556,6 +544,12 @@ namespace comm { const auto n_alive = npart_per_tag_arr[ParticleTag::alive]; const auto n_dead = npart_per_tag_arr[ParticleTag::dead]; + // Debug test: print send and recv count + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Total send: %d, Total recv: %d \n", rank, total_send, total_recv); + } /* Brief on recv buffers: Each recv buffer contains all the received arrays of a given type. The different physical quantities are stored next to each other @@ -568,10 +562,11 @@ namespace comm { Kokkos::View recv_buffer_int("recv_buffer_int", total_recv * NINTS); Kokkos::View recv_buffer_real("recv_buffer_real", total_recv * NREALS); Kokkos::View recv_buffer_prtldx("recv_buffer_prtldx",total_recv * NFLOATS); + Kokkos::View recv_buffer_long("recv_buffer_long", total_recv * NLONGS); auto recv_buffer_int_h = Kokkos::create_mirror_view(recv_buffer_int); auto recv_buffer_real_h = Kokkos::create_mirror_view(recv_buffer_real); auto recv_buffer_prtldx_h = Kokkos::create_mirror_view(recv_buffer_prtldx); - + auto recv_buffer_long_h = Kokkos::create_mirror_view(recv_buffer_long); auto iteration = 0; auto current_received = 0; @@ -588,9 +583,11 @@ namespace comm { Kokkos::View send_buffer_int("send_buffer_int", send_count * NINTS); Kokkos::View send_buffer_real("send_buffer_real", send_count * NREALS); Kokkos::View send_buffer_prtldx("send_buffer_prtldx",send_count * NFLOATS); + Kokkos::View send_buffer_long("send_buffer_long", send_count * NLONGS); auto send_buffer_int_h = Kokkos::create_mirror_view(send_buffer_int); auto send_buffer_real_h = Kokkos::create_mirror_view(send_buffer_real); auto send_buffer_prtldx_h = Kokkos::create_mirror_view(send_buffer_prtldx); + auto send_buffer_long_h = Kokkos::create_mirror_view(send_buffer_long); // Need different constexpr parallel fors for different dims if constexpr(D == Dim::_1D) { @@ -607,6 +604,8 @@ namespace comm { send_buffer_real(NREALS * p + 3) = this_weight(idx); send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); + send_buffer_long(NLONGS * p + 0) = this_particleID(idx); + send_buffer_long(NLONGS * p + 1) = this_tag(idx); this_tag(idx) = ParticleTag::dead; }); } @@ -628,6 +627,8 @@ namespace comm { send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + send_buffer_long(NLONGS * p + 0) = this_particleID(idx); + send_buffer_long(NLONGS * p + 1) = this_tag(idx); this_tag(idx) = ParticleTag::dead; }); } @@ -650,6 +651,8 @@ namespace comm { send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); + send_buffer_long(NLONGS * p + 0) = this_particleID(idx); + send_buffer_long(NLONGS * p + 1) = this_tag(idx); this_tag(idx) = ParticleTag::dead; }); } @@ -675,6 +678,8 @@ namespace comm { send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); send_buffer_prtldx(NFLOATS * p + 4) = this_dx3(idx); send_buffer_prtldx(NFLOATS * p + 5) = this_dx3_prev(idx); + send_buffer_long(NLONGS * p + 0) = this_particleID(idx); + send_buffer_long(NLONGS * p + 1) = this_tag(idx); this_tag(idx) = ParticleTag::dead; }); } @@ -695,14 +700,22 @@ namespace comm { const auto receive_offset_int = current_received * NINTS; const auto receive_offset_real = current_received * NREALS; const auto receive_offset_prtldx = current_received * NFLOATS; + const auto receive_offset_long = current_received * NLONGS; // Comms // Make host arrays for send and recv buffers Kokkos::deep_copy(send_buffer_int_h, send_buffer_int); Kokkos::deep_copy(send_buffer_real_h, send_buffer_real); Kokkos::deep_copy(send_buffer_prtldx_h, send_buffer_prtldx); + Kokkos::deep_copy(send_buffer_long_h, send_buffer_long); if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { + // Debug: Print the rank and type of mpi operation performed + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Performing sendrecv operation \n", rank); + } MPI_Sendrecv(send_buffer_int_h.data(), send_count * NINTS, mpi::get_type(), @@ -739,7 +752,25 @@ namespace comm { 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buffer_long_h.data(), + send_count * NLONGS, + mpi::get_type(), + send_rank, + 0, + recv_buffer_long_h.data() + receive_offset_long, + recv_count*NLONGS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { + // Debug: Print the rank and type of mpi operation performed + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Performing send operation \n", rank); + } MPI_Send(send_buffer_int_h.data(), send_count * NINTS, mpi::get_type(), @@ -758,7 +789,19 @@ namespace comm { send_rank, 0, MPI_COMM_WORLD); + MPI_Send(send_buffer_long_h.data(), + send_count * NLONGS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { + // Debug: Print the rank and type of mpi operation performed + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Performing recv operation \n", rank); + } MPI_Recv(recv_buffer_int_h.data() + receive_offset_int, recv_count * NINTS, mpi::get_type(), @@ -780,9 +823,69 @@ namespace comm { 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(recv_buffer_long_h.data() + receive_offset_long, + recv_count * NLONGS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); } current_received += recv_count; iteration++; + + // Debug test: Print recv buffer before and after + /* + { + int total_ranks; + MPI_Comm_size(MPI_COMM_WORLD, &total_ranks); + for (int allranks=0; allranks| |<----->| |<----->| .... - tag=0 ct tag=1 ct tag=2 ct + tag=dead ct tag=2 ct tag=3 ct */ Kokkos::View permute_vector("permute_vector", total_holes); Kokkos::View current_offset("current_offset", species.ntags()); @@ -778,6 +776,7 @@ namespace ntt { const auto idx_permute_vec = Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); + permute_vector(idx_permute_vec) = p; } // tag = 1->N (excluding dead and alive) else{ @@ -787,8 +786,8 @@ namespace ntt { ¤t_offset(current_tag), 1); permute_vector(idx_permute_vec) = p; - this_i1(p) -= shifts_in_x1(current_tag); - this_i1_prev(p) -= shifts_in_x1(current_tag); + this_i1(p) += shifts_in_x1(current_tag); + this_i1_prev(p) += shifts_in_x1(current_tag); } } }); @@ -806,6 +805,7 @@ namespace ntt { const auto idx_permute_vec = Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); + permute_vector(idx_permute_vec) = p; } // tag = 1->N (excluding dead and alive) else{ @@ -815,10 +815,10 @@ namespace ntt { ¤t_offset(current_tag), 1); permute_vector(idx_permute_vec) = p; - this_i1(p) -= shifts_in_x1(current_tag); - this_i1_prev(p) -= shifts_in_x1(current_tag); - this_i2(p) -= shifts_in_x2(current_tag); - this_i2_prev(p) -= shifts_in_x2(current_tag); + this_i1(p) += shifts_in_x1(current_tag); + this_i1_prev(p) += shifts_in_x1(current_tag); + this_i2(p) += shifts_in_x2(current_tag); + this_i2_prev(p) += shifts_in_x2(current_tag); } } }); @@ -836,6 +836,7 @@ namespace ntt { const auto idx_permute_vec = Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); + permute_vector(idx_permute_vec) = p; } // tag = 1->N (excluding dead and alive) else{ @@ -845,12 +846,12 @@ namespace ntt { ¤t_offset(current_tag), 1); permute_vector(idx_permute_vec) = p; - this_i1(p) -= shifts_in_x1(current_tag); - this_i1_prev(p) -= shifts_in_x1(current_tag); - this_i2(p) -= shifts_in_x2(current_tag); - this_i2_prev(p) -= shifts_in_x2(current_tag); - this_i3(p) -= shifts_in_x3(current_tag); - this_i3_prev(p) -= shifts_in_x3(current_tag); + this_i1(p) += shifts_in_x1(current_tag); + this_i1_prev(p) += shifts_in_x1(current_tag); + this_i2(p) += shifts_in_x2(current_tag); + this_i2_prev(p) += shifts_in_x2(current_tag); + this_i3(p) += shifts_in_x3(current_tag); + this_i3_prev(p) += shifts_in_x3(current_tag); } } }); @@ -913,6 +914,43 @@ namespace ntt { allocation_vector(p) = permute_vector(p); }); } + + /* + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == 1 && species.label() == "e+_b") + { + // Copy the tag array to host + auto tag_h = Kokkos::create_mirror_view(species.tag); + Kokkos::deep_copy(tag_h, species.tag); + std::cout << "Tag locs before send" << std::endl; + for (std::size_t i { 0 }; i < species.npart(); i++) { + if (tag_h(i) != ParticleTag::alive) + std::cout <<" Tag: " << tag_h(i) << " loc: "<< i << std::endl; + } + + // Print allocation vector after copying to host + auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); + std::cout << "Total holes: " << total_holes << " Total recv: " << total_recv << std::endl; + Kokkos::deep_copy(allocation_vector_h, allocation_vector); + for (std::size_t i { 0 }; i < total_recv; ++i) { + std::cout << "Rank: " << rank << " Allocation vector: " << allocation_vector_h(i) << std::endl; + } + // Print the permute vector as well + auto permute_vector_h = Kokkos::create_mirror_view(permute_vector); + Kokkos::deep_copy(permute_vector_h, permute_vector); + for (std::size_t i { 0 }; i < total_holes; ++i) { + std::cout << "Rank: " << rank << " Permuted vector: " << permute_vector_h(i) << + " tag: " << tag_h(permute_vector_h(i)) << std::endl; + } + } + */ + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + std::cout << "Rank: " << rank << " Total sent: " << total_holes - total_dead << " Total recv: " << total_recv << std::endl; + } + // Communicate the arrays comm::CommunicateParticlesBuffer(species, permute_vector, allocation_vector, this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index ec8561a9a..a01296823 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -399,6 +399,33 @@ namespace ntt { #endif } + // Function to assign a unique ID to each particle + template + void Metadomain::SetParticleIDs(Domain& domain){ + for (auto& species : domain.species) { + auto &this_particleID = species.particleID; + auto &this_tag = species.tag; + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + const auto offset_per_rank = static_cast(1e9 * rank); + std::size_t current_particleID = 0; + Kokkos::View counter_view("current_particleID", 1); + Kokkos::deep_copy(counter_view, current_particleID); + + Kokkos::parallel_for( + "Set Particle IDs", + species.npart(), + Lambda(const std::size_t p){ + if (this_tag(p) == ParticleTag::alive) + { + Kokkos::atomic_increment(&counter_view(0)); + this_particleID(p) = offset_per_rank + static_cast(counter_view(0)); + } + }); + } + return; + } + template struct Metadomain>; template struct Metadomain>; template struct Metadomain>; diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index e30bc8e97..9e94bf89f 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -90,6 +90,7 @@ namespace ntt { void SynchronizeFields(Domain&, CommTags, const range_tuple_t& = { 0, 0 }); void CommunicateParticles(Domain&, timer::Timers*); void CommunicateParticlesBuffer(Domain&, timer::Timers*); + void SetParticleIDs(Domain&); /** * @param global_ndomains total number of domains From 0d18e5c07cdd6b9cb336b9ec976d032003eae687 Mon Sep 17 00:00:00 2001 From: Sasha Chernoglazov Date: Sat, 4 Jan 2025 18:18:47 -0500 Subject: [PATCH 21/52] test of the number of particles --- extern/adios2 | 2 +- setups/srpic/blob/blob.py | 62 +++++++++++++++++++++ setups/srpic/blob/blob.toml | 66 +++++++++++++++++++++++ setups/srpic/blob/nparts.py | 38 +++++++++++++ setups/srpic/blob/pgen.hpp | 104 ++++++++++++++++++++++++++++++++++++ 5 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 setups/srpic/blob/blob.py create mode 100644 setups/srpic/blob/blob.toml create mode 100644 setups/srpic/blob/nparts.py create mode 100644 setups/srpic/blob/pgen.hpp diff --git a/extern/adios2 b/extern/adios2 index b8761e2af..f80ad829d 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit b8761e2afab2cd05b89d09b2ee4da1cd7a834225 +Subproject commit f80ad829d751241140c40923503e1888e27e22e1 diff --git a/setups/srpic/blob/blob.py b/setups/srpic/blob/blob.py new file mode 100644 index 000000000..77337d3b2 --- /dev/null +++ b/setups/srpic/blob/blob.py @@ -0,0 +1,62 @@ +import h5py +import numpy as np +import matplotlib.pyplot as plt + +f = open("report", "r") +Lines = f.readlines() +f.close() + +em_new = [] +ep_new = [] +time_new = [] +for i in range (len(Lines)): + line = Lines[i] + line = line.strip() + arr = line.split() + + if (len(arr)>0 and arr[0]=='species'): + nparts = arr[2].split("..") + if (nparts[0]=="(e-_p)"): + em_new.append(float(nparts[-1])) + if (nparts[0]=="(e+_p)"): + ep_new.append(float(nparts[-1])) + + if (len(arr)>0 and arr[0]=='Time:'): + time_new.append(float(arr[1])) + +f = h5py.File('blob.h5', 'r') + +Nsteps = len(f.keys()) +print(list(f['Step0'].keys())) + +for i in range (Nsteps): + print (i) + fig = plt.figure(dpi=300, figsize=(8,8), facecolor='white') + + densMax = max(np.max(f['Step'+str(i)]['fN_1']),np.max(f['Step'+str(i)]['fN_2'])) + print(densMax) + ax1 = fig.add_axes([0.05,0.05,0.4,0.4]) + im1=ax1.pcolormesh(f['Step'+str(i)]['X1'],f['Step'+str(i)]['X2'],f['Step'+str(i)]['fN_1'],cmap='turbo',vmin=0,vmax=1.0) + ax1.set_title(r"$N_1$") + ax1.vlines(0,-10.0,10.0,color='white') + + ax1 = fig.add_axes([0.48,0.05,0.4,0.4]) + ax1.pcolormesh(f['Step'+str(i)]['X1'],f['Step'+str(i)]['X2'],f['Step'+str(i)]['fN_2'],cmap='turbo',vmin=0,vmax=1.0) + ax1.set_yticklabels([]) + ax1.set_title(r"$N_2$") + ax1.vlines(0,-10.0,10.0,color='white') + + ax4cb = fig.add_axes([0.89, 0.05, 0.01, 0.4]) + cbar4 = fig.colorbar(im1,cax=ax4cb) + + ax1= fig.add_axes([0.05,0.5,0.83,0.4]) + ax1.plot(time_new,em_new, color='blue', label=r'$e^-$, new') + ax1.plot(time_new,ep_new, color='red', label=r'$e^+$, new') + ax1.legend() + ax1.set_ylim(0,1.8e5) + ax1.set_xlim(0,100) + ax1.vlines(i, 0,1.8e5, color='green',linewidth=0.6) + + + fig.savefig("%05d"%i+".png",dpi=300,bbox_inches='tight') + plt.close() diff --git a/setups/srpic/blob/blob.toml b/setups/srpic/blob/blob.toml new file mode 100644 index 000000000..7c03b1f9e --- /dev/null +++ b/setups/srpic/blob/blob.toml @@ -0,0 +1,66 @@ +[simulation] + name = "blob" + engine = "srpic" + runtime = 100.0 + + [simulation.domain] + decomposition = [2,1,1] + +[grid] + resolution = [1024, 1024] + extent = [[-10.0, 10.0], [-10.0, 10.0]] + + [grid.metric] + metric = "minkowski" + + [grid.boundaries] + fields = [["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"]] + +[scales] + larmor0 = 1.0 + skindepth0 = 1.0 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 16.0 + + [[particles.species]] + label = "e-_p" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 + + [[particles.species]] + label = "e+_p" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 + +[setup] + temp_1 = 1e-4 + x1c = -5.0 + x2c = 0.0 + v_max = 50.0 + dr = 1.0 + +[output] + format = "hdf5" + interval_time = 1.0 + + [output.fields] + quantities = ["N_1", "N_2", "B", "E"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + +[diagnostics] + colored_stdout = false diff --git a/setups/srpic/blob/nparts.py b/setups/srpic/blob/nparts.py new file mode 100644 index 000000000..e759422c0 --- /dev/null +++ b/setups/srpic/blob/nparts.py @@ -0,0 +1,38 @@ +import h5py +import numpy as np +import matplotlib.pyplot as plt + +f = open("report", "r") +Lines = f.readlines() +f.close() + +em_new = [] +ep_new = [] +time_new = [] +for i in range (len(Lines)): + line = Lines[i] + line = line.strip() + arr = line.split() + + if (len(arr)>0 and arr[0]=='species'): + nparts = arr[2].split("..") + if (nparts[0]=="(e-_p)"): + em_new.append(float(nparts[-1])) + if (nparts[0]=="(e+_p)"): + ep_new.append(float(nparts[-1])) + + if (len(arr)>0 and arr[0]=='Time:'): + time_new.append(float(arr[1])) + + +fig = plt.figure(dpi=300, figsize=(8,8), facecolor='white') + +ax1= fig.add_axes([0.05,0.5,0.83,0.4]) +ax1.plot(time_new,em_new, color='blue', label=r'$e^-$, new') +ax1.plot(time_new,ep_new, color='red', label=r'$e^+$, new') +ax1.legend() +ax1.set_ylim(0,1.8e5) +ax1.set_xlim(0,100) + +fig.savefig("nparts.png",dpi=300,bbox_inches='tight') +plt.close() diff --git a/setups/srpic/blob/pgen.hpp b/setups/srpic/blob/pgen.hpp new file mode 100644 index 000000000..38b3db1c5 --- /dev/null +++ b/setups/srpic/blob/pgen.hpp @@ -0,0 +1,104 @@ +#ifndef PROBLEM_GENERATOR_H +#define PROBLEM_GENERATOR_H + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "arch/traits.h" + +#include "archetypes/energy_dist.h" +#include "archetypes/particle_injector.h" +#include "archetypes/problem_generator.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" + +namespace user { + using namespace ntt; + + template + struct CounterstreamEnergyDist : public arch::EnergyDistribution { + CounterstreamEnergyDist(const M& metric, real_t v_max) + : arch::EnergyDistribution { metric } + , v_max { v_max } {} + + Inline void operator()(const coord_t& x_Ph, + vec_t& v, + unsigned short sp) const override { + v[0] = v_max; + } + + private: + const real_t v_max; + }; + + template + struct GaussianDist : public arch::SpatialDistribution { + GaussianDist(const M& metric, real_t x1c, real_t x2c, real_t dr) + : arch::SpatialDistribution { metric } + , x1c { x1c } + , x2c { x2c } + , dr { dr } {} + + // to properly scale the number density, the probability should be normalized to 1 + Inline auto operator()(const coord_t& x_Ph) const -> real_t override { + if (math::abs(x_Ph[0] - x1c) < dr && math::abs(x_Ph[1] - x2c) < dr){ + return 1.0; + }else{ + return 0.0; + } + } + + private: + const real_t x1c, x2c, dr; + }; + + template + struct PGen : public arch::ProblemGenerator { + + // compatibility traits for the problem generator + static constexpr auto engines = traits::compatible_with::value; + static constexpr auto metrics = traits::compatible_with::value; + static constexpr auto dimensions = + traits::compatible_with::value; + + // for easy access to variables in the child class + using arch::ProblemGenerator::D; + using arch::ProblemGenerator::C; + using arch::ProblemGenerator::params; + + const real_t temp_1, x1c, x2c, dr, v_max; + + inline PGen(const SimulationParams& p, const Metadomain& global_domain) + : arch::ProblemGenerator { p } + , temp_1 { p.template get("setup.temp_1") } + , x1c { p.template get("setup.x1c") } + , x2c { p.template get("setup.x2c") } + , v_max { p.template get("setup.v_max") } + , dr { p.template get("setup.dr") } {} + + inline void InitPrtls(Domain& local_domain) { + const auto energy_dist = CounterstreamEnergyDist( + local_domain.mesh.metric, + v_max); + const auto spatial_dist = GaussianDist(local_domain.mesh.metric, + x1c, + x2c, + dr); + const auto injector = + arch::NonUniformInjector( + energy_dist, + spatial_dist, + { 1, 2 }); + + arch::InjectNonUniform>( + params, + local_domain, + injector, + 1.0); + } + }; + +} // namespace user + +#endif From d7f92f0ae4ea7a7c4407625c8822f417eb8810d1 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Sun, 5 Jan 2025 00:37:22 -0500 Subject: [PATCH 22/52] added function to remove dead particles --- src/engines/srpic.hpp | 8 + src/framework/domain/comm_mpi.hpp | 114 +-------- src/framework/domain/communications.cpp | 306 ++++++++++++++++++++---- src/framework/domain/metadomain.h | 1 + 4 files changed, 275 insertions(+), 154 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index d751c712a..686ed0c35 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -175,6 +175,14 @@ namespace ntt { ParticleInjector(dom); timers.stop("Injector"); } + + if (step % 10 == 0 && step > 0){ + + timers.start("RemoveDead"); + m_metadomain.RemoveDeadParticles(dom, &timers); + timers.stop("RemoveDead"); + } + } /* algorithm substeps --------------------------------------------------- */ diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 82308e107..7b9a22eee 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -544,12 +544,6 @@ namespace comm { const auto n_alive = npart_per_tag_arr[ParticleTag::alive]; const auto n_dead = npart_per_tag_arr[ParticleTag::dead]; - // Debug test: print send and recv count - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Total send: %d, Total recv: %d \n", rank, total_send, total_recv); - } /* Brief on recv buffers: Each recv buffer contains all the received arrays of a given type. The different physical quantities are stored next to each other @@ -710,12 +704,6 @@ namespace comm { if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { - // Debug: Print the rank and type of mpi operation performed - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing sendrecv operation \n", rank); - } MPI_Sendrecv(send_buffer_int_h.data(), send_count * NINTS, mpi::get_type(), @@ -765,12 +753,6 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { - // Debug: Print the rank and type of mpi operation performed - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing send operation \n", rank); - } MPI_Send(send_buffer_int_h.data(), send_count * NINTS, mpi::get_type(), @@ -796,12 +778,6 @@ namespace comm { 0, MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { - // Debug: Print the rank and type of mpi operation performed - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing recv operation \n", rank); - } MPI_Recv(recv_buffer_int_h.data() + receive_offset_int, recv_count * NINTS, mpi::get_type(), @@ -833,59 +809,8 @@ namespace comm { } current_received += recv_count; iteration++; + - // Debug test: Print recv buffer before and after - /* - { - int total_ranks; - MPI_Comm_size(MPI_COMM_WORLD, &total_ranks); - for (int allranks=0; allranks current_offset("current_offset", species.ntags()); auto &this_tag_offset = tag_offset; - auto n_alive = npart_per_tag_arr[ParticleTag::alive]; - if constexpr (D == Dim::_1D){ Kokkos::parallel_for( "PermuteVector and Displace", @@ -781,7 +779,7 @@ namespace ntt { // tag = 1->N (excluding dead and alive) else{ const auto idx_permute_vec = this_tag_offset(current_tag) - - n_alive + + total_alive + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -810,7 +808,7 @@ namespace ntt { // tag = 1->N (excluding dead and alive) else{ const auto idx_permute_vec = this_tag_offset(current_tag) - - n_alive + + total_alive + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -841,7 +839,7 @@ namespace ntt { // tag = 1->N (excluding dead and alive) else{ const auto idx_permute_vec = this_tag_offset(current_tag) - - n_alive + + total_alive + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -915,42 +913,6 @@ namespace ntt { }); } - /* - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - if (rank == 1 && species.label() == "e+_b") - { - // Copy the tag array to host - auto tag_h = Kokkos::create_mirror_view(species.tag); - Kokkos::deep_copy(tag_h, species.tag); - std::cout << "Tag locs before send" << std::endl; - for (std::size_t i { 0 }; i < species.npart(); i++) { - if (tag_h(i) != ParticleTag::alive) - std::cout <<" Tag: " << tag_h(i) << " loc: "<< i << std::endl; - } - - // Print allocation vector after copying to host - auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); - std::cout << "Total holes: " << total_holes << " Total recv: " << total_recv << std::endl; - Kokkos::deep_copy(allocation_vector_h, allocation_vector); - for (std::size_t i { 0 }; i < total_recv; ++i) { - std::cout << "Rank: " << rank << " Allocation vector: " << allocation_vector_h(i) << std::endl; - } - // Print the permute vector as well - auto permute_vector_h = Kokkos::create_mirror_view(permute_vector); - Kokkos::deep_copy(permute_vector_h, permute_vector); - for (std::size_t i { 0 }; i < total_holes; ++i) { - std::cout << "Rank: " << rank << " Permuted vector: " << permute_vector_h(i) << - " tag: " << tag_h(permute_vector_h(i)) << std::endl; - } - } - */ - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - std::cout << "Rank: " << rank << " Total sent: " << total_holes - total_dead << " Total recv: " << total_recv << std::endl; - } - // Communicate the arrays comm::CommunicateParticlesBuffer(species, permute_vector, allocation_vector, this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, @@ -959,6 +921,268 @@ namespace ntt { } } + /* + Function to remove dead particles from the domain + */ + template + void Metadomain::RemoveDeadParticles(Domain& domain, + timer::Timers* timers){ + MPI_Barrier(MPI_COMM_WORLD); + for (auto& species : domain.species) { + auto [npart_per_tag_arr, + tag_offset] = species.npart_per_tag(); + auto npart = static_cast(species.npart()); + auto total_alive = static_cast( + npart_per_tag_arr[ParticleTag::alive]); + auto total_dead = static_cast( + npart_per_tag_arr[ParticleTag::dead]); + + if (total_dead != 0){ + // Check that only alive and dead particles are present + for (std::size_t i { 0 }; i < species.ntags(); i++) { + if (i != ParticleTag::alive && i != ParticleTag::dead){ + raise::FatalIf(npart_per_tag_arr[i] != 0, + "Particle tags can only be dead or alive at this point", + HERE); + } + } + + // Get the indices of all alive particles + auto &this_ux1 = species.ux1; + auto &this_ux2 = species.ux2; + auto &this_ux3 = species.ux3; + auto &this_weight = species.weight; + auto &this_phi = species.phi; + auto &this_i1 = species.i1; + auto &this_i1_prev = species.i1_prev; + auto &this_i2 = species.i2; + auto &this_i3 = species.i3; + auto &this_i2_prev = species.i2_prev; + auto &this_i3_prev = species.i3_prev; + auto &this_dx1 = species.dx1; + auto &this_dx1_prev = species.dx1_prev; + auto &this_dx2 = species.dx2; + auto &this_dx3 = species.dx3; + auto &this_dx2_prev = species.dx2_prev; + auto &this_dx3_prev = species.dx3_prev; + auto &this_tag = species.tag; + + // Create buffers to store alive particles + Kokkos::View buffer_ctr("buffer_ctr", 1); + Kokkos::View buffer_int("buffer_int", total_alive); + Kokkos::View buffer_real("buffer_real", total_alive); + Kokkos::View buffer_prtldx("buffer_prtldx",total_alive); + + // Simulaneously update i1, u1, dx1 + Kokkos::parallel_for( + "CopyToBuffer i1 u1 dx1", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_int(idx) = this_i1(p); + buffer_real(idx) = this_ux1(p); + buffer_prtldx(idx) = this_dx1(p); + } + }); + + Kokkos::parallel_for( + "i1 u1 dx1 from Buffer", + total_alive, + Lambda(index_t p) { + this_i1(p) = buffer_int(p); + this_ux1(p) = buffer_real(p); + this_dx1(p) = buffer_prtldx(p); + }); + + // Update i1_prev, dx1_prev, u2 + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer i1_prev dx1_prev u2", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_real(idx) = this_ux2(p); + buffer_prtldx(idx) = this_dx1_prev(p); + buffer_int(idx) = this_i1_prev(p); + } + }); + + Kokkos::parallel_for( + "i1_prev u2 dx1_prev from Buffer", + total_alive, + Lambda(index_t p) { + this_i1_prev(p) = buffer_int(p); + this_ux2(p) = buffer_real(p); + this_dx1_prev(p) = buffer_prtldx(p); + }); + + // Update u3 + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer u3", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_real(idx) = this_ux3(p); + } + }); + + Kokkos::parallel_for( + "u3 from Buffer", + total_alive, + Lambda(index_t p) { + this_ux3(p) = buffer_real(p); + }); + + + // Update weight + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer weight", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_real(idx) = this_weight(p); + } + }); + + Kokkos::parallel_for( + "weight from Buffer", + total_alive, + Lambda(index_t p) { + this_weight(p) = buffer_real(p); + }); + + // Update i2, dx2, i2_prev, dx2_prev + if constexpr(D == Dim::_2D || D == Dim::_3D){ + // i2, dx2 + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer i2 dx2", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_int(idx) = this_i2(p); + buffer_prtldx(idx) = this_dx2(p); + } + }); + + Kokkos::parallel_for( + "i2 dx2 from Buffer", + total_alive, + Lambda(index_t p) { + this_i2(p) = buffer_int(p); + this_dx2(p) = buffer_prtldx(p); + }); + + // i2_prev, dx2_prev + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer i2_prev dx2_prev", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_int(idx) = this_i2_prev(p); + buffer_prtldx(idx) = this_dx2_prev(p); + } + }); + + Kokkos::parallel_for( + "i2_prev dx2_prev from Buffer", + total_alive, + Lambda(index_t p) { + this_i2_prev(p) = buffer_int(p); + this_dx2_prev(p) = buffer_prtldx(p); + }); + + } + + // Update i3, dx3, i3_prev, dx3_prev + if constexpr(D == Dim::_3D){ + // i3, dx3 + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer i3 dx3", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_int(idx) = this_i3(p); + buffer_prtldx(idx) = this_dx3(p); + } + }); + + Kokkos::parallel_for( + "i3 dx3 from Buffer", + total_alive, + Lambda(index_t p) { + this_i3(p) = buffer_int(p); + this_dx3(p) = buffer_prtldx(p); + }); + + // i3_prev, dx3_prev + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer i3_prev dx3_prev", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_int(idx) = this_i3_prev(p); + buffer_prtldx(idx) = this_dx3_prev(p); + } + }); + + Kokkos::parallel_for( + "i3_prev dx3_prev from Buffer", + total_alive, + Lambda(index_t p) { + this_i3_prev(p) = buffer_int(p); + this_dx3_prev(p) = buffer_prtldx(p); + }); + } + + // phi + if constexpr(D == Dim::_2D && M::CoordType != Coord::Cart){ + Kokkos::deep_copy(buffer_ctr, 0); + Kokkos::parallel_for( + "CopyToBuffer phi", + total_alive, + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); + buffer_real(idx) = this_phi(p); + } + }); + + Kokkos::parallel_for( + "phi from Buffer", + total_alive, + Lambda(index_t p) { + this_phi(p) = buffer_real(p); + }); + + } + + // tags + Kokkos::parallel_for( + "Make tags alive", + total_alive, + Lambda(index_t p) { + this_tag(p) = ParticleTag::alive; + }); + species.set_npart(total_alive); + } + } + return; + } + template struct Metadomain>; template struct Metadomain>; template struct Metadomain>; diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 9e94bf89f..6bd3d29d8 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -91,6 +91,7 @@ namespace ntt { void CommunicateParticles(Domain&, timer::Timers*); void CommunicateParticlesBuffer(Domain&, timer::Timers*); void SetParticleIDs(Domain&); + void RemoveDeadParticles(Domain&, timer::Timers* ); /** * @param global_ndomains total number of domains From 39f6c9f64f5dd9f5df34734036277ecf81a57c84 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Sun, 5 Jan 2025 23:44:32 -0500 Subject: [PATCH 23/52] testing: remove dead prtls --- src/engines/srpic.hpp | 2 +- src/framework/domain/comm_mpi.hpp | 27 ++ src/framework/domain/communications.cpp | 348 ++++++++---------------- 3 files changed, 138 insertions(+), 239 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 686ed0c35..b54327076 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -176,7 +176,7 @@ namespace ntt { timers.stop("Injector"); } - if (step % 10 == 0 && step > 0){ + if (step % 100 == 0 && step > 0){ timers.start("RemoveDead"); m_metadomain.RemoveDeadParticles(dom, &timers); diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 7b9a22eee..aa35ce2a6 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -918,6 +918,33 @@ namespace comm { return; } +/* + Function to copy the alive particle data the arrays to a buffer and then back + to the particle arrays +*/ + template + void MoveDeadToEnd(array_t& arr, + Kokkos::View indices_alive) { + auto n_alive = indices_alive.extent(0); + auto buffer = Kokkos::View("buffer", n_alive); + Kokkos::parallel_for( + "PopulateBufferAlive", + n_alive, + Lambda(const std::size_t p) { + buffer(p) = arr(indices_alive(p)); + }); + + Kokkos::parallel_for( + "CopyBufferToArr", + n_alive, + Lambda(const std::size_t p) { + arr(p) = buffer(p); + }); + + return; + } + + } // namespace comm #endif // FRAMEWORK_DOMAIN_COMM_MPI_HPP diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 433a97fb8..ee0d37f10 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -927,7 +927,6 @@ namespace ntt { template void Metadomain::RemoveDeadParticles(Domain& domain, timer::Timers* timers){ - MPI_Barrier(MPI_COMM_WORLD); for (auto& species : domain.species) { auto [npart_per_tag_arr, tag_offset] = species.npart_per_tag(); @@ -936,250 +935,123 @@ namespace ntt { npart_per_tag_arr[ParticleTag::alive]); auto total_dead = static_cast( npart_per_tag_arr[ParticleTag::dead]); - - if (total_dead != 0){ - // Check that only alive and dead particles are present - for (std::size_t i { 0 }; i < species.ntags(); i++) { - if (i != ParticleTag::alive && i != ParticleTag::dead){ - raise::FatalIf(npart_per_tag_arr[i] != 0, - "Particle tags can only be dead or alive at this point", - HERE); - } - } - - // Get the indices of all alive particles - auto &this_ux1 = species.ux1; - auto &this_ux2 = species.ux2; - auto &this_ux3 = species.ux3; - auto &this_weight = species.weight; - auto &this_phi = species.phi; - auto &this_i1 = species.i1; - auto &this_i1_prev = species.i1_prev; - auto &this_i2 = species.i2; - auto &this_i3 = species.i3; - auto &this_i2_prev = species.i2_prev; - auto &this_i3_prev = species.i3_prev; - auto &this_dx1 = species.dx1; - auto &this_dx1_prev = species.dx1_prev; - auto &this_dx2 = species.dx2; - auto &this_dx3 = species.dx3; - auto &this_dx2_prev = species.dx2_prev; - auto &this_dx3_prev = species.dx3_prev; - auto &this_tag = species.tag; - - // Create buffers to store alive particles - Kokkos::View buffer_ctr("buffer_ctr", 1); - Kokkos::View buffer_int("buffer_int", total_alive); - Kokkos::View buffer_real("buffer_real", total_alive); - Kokkos::View buffer_prtldx("buffer_prtldx",total_alive); - - // Simulaneously update i1, u1, dx1 - Kokkos::parallel_for( - "CopyToBuffer i1 u1 dx1", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_int(idx) = this_i1(p); - buffer_real(idx) = this_ux1(p); - buffer_prtldx(idx) = this_dx1(p); - } - }); - - Kokkos::parallel_for( - "i1 u1 dx1 from Buffer", - total_alive, - Lambda(index_t p) { - this_i1(p) = buffer_int(p); - this_ux1(p) = buffer_real(p); - this_dx1(p) = buffer_prtldx(p); - }); - - // Update i1_prev, dx1_prev, u2 - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer i1_prev dx1_prev u2", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_real(idx) = this_ux2(p); - buffer_prtldx(idx) = this_dx1_prev(p); - buffer_int(idx) = this_i1_prev(p); - } - }); - - Kokkos::parallel_for( - "i1_prev u2 dx1_prev from Buffer", - total_alive, - Lambda(index_t p) { - this_i1_prev(p) = buffer_int(p); - this_ux2(p) = buffer_real(p); - this_dx1_prev(p) = buffer_prtldx(p); - }); - - // Update u3 - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer u3", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_real(idx) = this_ux3(p); - } - }); - - Kokkos::parallel_for( - "u3 from Buffer", - total_alive, - Lambda(index_t p) { - this_ux3(p) = buffer_real(p); - }); - - - // Update weight - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer weight", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_real(idx) = this_weight(p); - } - }); - - Kokkos::parallel_for( - "weight from Buffer", - total_alive, - Lambda(index_t p) { - this_weight(p) = buffer_real(p); - }); - - // Update i2, dx2, i2_prev, dx2_prev - if constexpr(D == Dim::_2D || D == Dim::_3D){ - // i2, dx2 - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer i2 dx2", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_int(idx) = this_i2(p); - buffer_prtldx(idx) = this_dx2(p); - } - }); - - Kokkos::parallel_for( - "i2 dx2 from Buffer", - total_alive, - Lambda(index_t p) { - this_i2(p) = buffer_int(p); - this_dx2(p) = buffer_prtldx(p); - }); - - // i2_prev, dx2_prev - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer i2_prev dx2_prev", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_int(idx) = this_i2_prev(p); - buffer_prtldx(idx) = this_dx2_prev(p); - } - }); - - Kokkos::parallel_for( - "i2_prev dx2_prev from Buffer", - total_alive, - Lambda(index_t p) { - this_i2_prev(p) = buffer_int(p); - this_dx2_prev(p) = buffer_prtldx(p); - }); + // Check that only alive and dead particles are present + for (std::size_t i { 0 }; i < species.ntags(); i++) { + if (i != ParticleTag::alive && i != ParticleTag::dead){ + raise::FatalIf(npart_per_tag_arr[i] != 0, + "Particle tags can only be dead or alive at this point", + HERE); } - - // Update i3, dx3, i3_prev, dx3_prev - if constexpr(D == Dim::_3D){ - // i3, dx3 - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer i3 dx3", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_int(idx) = this_i3(p); - buffer_prtldx(idx) = this_dx3(p); - } - }); - - Kokkos::parallel_for( - "i3 dx3 from Buffer", - total_alive, - Lambda(index_t p) { - this_i3(p) = buffer_int(p); - this_dx3(p) = buffer_prtldx(p); - }); - - // i3_prev, dx3_prev - Kokkos::deep_copy(buffer_ctr, 0); - Kokkos::parallel_for( - "CopyToBuffer i3_prev dx3_prev", - total_alive, - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&buffer_ctr(0), 1); - buffer_int(idx) = this_i3_prev(p); - buffer_prtldx(idx) = this_dx3_prev(p); + } + { + int rank, totranks; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &totranks); + for (std::size_t current_rank=0; current_rank indices_alive("indices_alive", total_alive); + Kokkos::View alive_counter("counter_alive", 1); + Kokkos::deep_copy(alive_counter, 0); + Kokkos::parallel_for( + "Indices of Alive Particles", + species.npart(), + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); + indices_alive(idx) = p; } - - // tags - Kokkos::parallel_for( - "Make tags alive", - total_alive, - Lambda(index_t p) { - this_tag(p) = ParticleTag::alive; - }); - species.set_npart(total_alive); + }); + // Sanity check: alive_counter must be equal to total_alive + auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); + Kokkos::deep_copy(alive_counter_h, alive_counter); + raise::FatalIf(alive_counter_h(0) != total_alive, + "Error in finding alive particles", + HERE); + comm::MoveDeadToEnd(species.i1, indices_alive); + comm::MoveDeadToEnd(species.i1_prev, indices_alive); + comm::MoveDeadToEnd(species.dx1_prev, indices_alive); + comm::MoveDeadToEnd(species.ux1, indices_alive); + comm::MoveDeadToEnd(species.ux2, indices_alive); + comm::MoveDeadToEnd(species.ux3, indices_alive); + comm::MoveDeadToEnd(species.weight, indices_alive); + // Update i2, dx2, i2_prev, dx2_prev + if constexpr(D == Dim::_2D || D == Dim::_3D){ + comm::MoveDeadToEnd(species.i2, indices_alive); + comm::MoveDeadToEnd(species.i2_prev, indices_alive); + comm::MoveDeadToEnd(species.dx2, indices_alive); + comm::MoveDeadToEnd(species.dx2_prev, indices_alive); + if constexpr(D == Dim::_2D && M::CoordType != Coord::Cart){ + comm::MoveDeadToEnd(species.phi, indices_alive); + } } + // Update i3, dx3, i3_prev, dx3_prev + if constexpr(D == Dim::_3D){ + comm::MoveDeadToEnd(species.i3, indices_alive); + comm::MoveDeadToEnd(species.i3_prev, indices_alive); + comm::MoveDeadToEnd(species.dx3, indices_alive); + comm::MoveDeadToEnd(species.dx3_prev, indices_alive); + } + // tags + Kokkos::parallel_for( + "Make tags alive", + total_alive, + Lambda(index_t p) { + this_tag(p) = ParticleTag::alive; + }); + species.set_npart(total_alive); + + + int rank, totranks; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &totranks); + for (std::size_t current_rank=0; current_rank Date: Mon, 6 Jan 2025 01:39:18 -0500 Subject: [PATCH 24/52] testing removedeadparticles() --- src/framework/domain/communications.cpp | 79 +++++++++++++++++++++---- 1 file changed, 66 insertions(+), 13 deletions(-) diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index ee0d37f10..6450aa4b7 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -923,6 +923,33 @@ namespace ntt { /* Function to remove dead particles from the domain + + Consider the following particle quantity array + <---xxx---x---xx---xx-----------xx----x--> (qty) + - = alive + x = dead + ntot = nalive + ndead + + (1) Copy all alive particle data to buffer + <---xxx---x---xx---xx-----------xx----x--> (qty) + | + | + v + <--------------------------> buffer + (nalive) + + (2) Copy from buffer to the beginning of the array + overwritting all particles + <--------------------------> buffer + (nalive) + | + | + v + <--------------------------xx----x--> (qty) + ^ + (nalive) + + (3) Set npart to nalive */ template void Metadomain::RemoveDeadParticles(Domain& domain, @@ -930,11 +957,11 @@ namespace ntt { for (auto& species : domain.species) { auto [npart_per_tag_arr, tag_offset] = species.npart_per_tag(); - auto npart = static_cast(species.npart()); - auto total_alive = static_cast( - npart_per_tag_arr[ParticleTag::alive]); - auto total_dead = static_cast( - npart_per_tag_arr[ParticleTag::dead]); + const auto npart = static_cast(species.npart()); + const auto total_alive = static_cast( + npart_per_tag_arr[ParticleTag::alive]); + const auto total_dead = static_cast( + npart_per_tag_arr[ParticleTag::dead]); // Check that only alive and dead particles are present for (std::size_t i { 0 }; i < species.ntags(); i++) { @@ -945,6 +972,14 @@ namespace ntt { } } { + auto [npart_per_tag_arr_, + tag_offset_] = species.npart_per_tag(); + auto npart_ = static_cast(species.npart()); + auto total_alive_ = static_cast( + npart_per_tag_arr_[ParticleTag::alive]); + auto total_dead_ = static_cast( + npart_per_tag_arr_[ParticleTag::dead]); + int rank, totranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &totranks); @@ -952,8 +987,8 @@ namespace ntt { if (rank == current_rank && species.label() == "e-_p"){ std::cout << "Before removing dead particles" << std::endl; std::cout << "Rank: " << rank << std::endl; - std::cout << "Total alive: " << total_alive << std::endl; - std::cout << "Total dead: " << total_dead << std::endl; + std::cout << "Total alive: " << total_alive_ << std::endl; + std::cout << "Total dead: " << total_dead_ << std::endl; std::cout << "Total particles: " << npart << std::endl; for (std::size_t i { 0 }; i < species.ntags(); i++) { std::cout << "Tag: " << i << " count: " << npart_per_tag_arr[i] << std::endl; @@ -1000,6 +1035,7 @@ namespace ntt { raise::FatalIf(alive_counter_h(0) != total_alive, "Error in finding alive particles", HERE); + comm::MoveDeadToEnd(species.i1, indices_alive); comm::MoveDeadToEnd(species.i1_prev, indices_alive); comm::MoveDeadToEnd(species.dx1_prev, indices_alive); @@ -1024,25 +1060,41 @@ namespace ntt { comm::MoveDeadToEnd(species.dx3, indices_alive); comm::MoveDeadToEnd(species.dx3_prev, indices_alive); } - // tags + // tags (set first total_alive to alive and rest to dead) Kokkos::parallel_for( "Make tags alive", total_alive, Lambda(index_t p) { this_tag(p) = ParticleTag::alive; }); + + Kokkos::parallel_for( + "Make tags dead", + total_dead, + Lambda(index_t p) { + this_tag(total_alive + p) = ParticleTag::dead; + }); + species.set_npart(total_alive); - - + + { + auto [npart_per_tag_arr_, + tag_offset_] = species.npart_per_tag(); + auto npart_ = static_cast(species.npart()); + auto total_alive_ = static_cast( + npart_per_tag_arr_[ParticleTag::alive]); + auto total_dead_ = static_cast( + npart_per_tag_arr_[ParticleTag::dead]); + int rank, totranks; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &totranks); for (std::size_t current_rank=0; current_rank Date: Mon, 6 Jan 2025 23:17:18 -0500 Subject: [PATCH 25/52] fixed dead particle removal bug --- src/engines/srpic.hpp | 5 +- src/framework/domain/comm_mpi.hpp | 27 ------ src/framework/domain/communications.cpp | 112 ++++++++++++++++-------- 3 files changed, 77 insertions(+), 67 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index b54327076..fd1ca226a 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -176,11 +176,12 @@ namespace ntt { timers.stop("Injector"); } - if (step % 100 == 0 && step > 0){ - + if (step % 1 == 0 && step > 0){ + MPI_Barrier(MPI_COMM_WORLD); timers.start("RemoveDead"); m_metadomain.RemoveDeadParticles(dom, &timers); timers.stop("RemoveDead"); + MPI_Barrier(MPI_COMM_WORLD); } } diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index aa35ce2a6..7b9a22eee 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -918,33 +918,6 @@ namespace comm { return; } -/* - Function to copy the alive particle data the arrays to a buffer and then back - to the particle arrays -*/ - template - void MoveDeadToEnd(array_t& arr, - Kokkos::View indices_alive) { - auto n_alive = indices_alive.extent(0); - auto buffer = Kokkos::View("buffer", n_alive); - Kokkos::parallel_for( - "PopulateBufferAlive", - n_alive, - Lambda(const std::size_t p) { - buffer(p) = arr(indices_alive(p)); - }); - - Kokkos::parallel_for( - "CopyBufferToArr", - n_alive, - Lambda(const std::size_t p) { - arr(p) = buffer(p); - }); - - return; - } - - } // namespace comm #endif // FRAMEWORK_DOMAIN_COMM_MPI_HPP diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 6450aa4b7..9f7b088f0 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -921,6 +921,32 @@ namespace ntt { } } + +/* + Function to copy the alive particle data the arrays to a buffer and then back + to the particle arrays +*/ + template + void MoveDeadToEnd(array_t& arr, + Kokkos::View indices_alive) { + auto n_alive = indices_alive.extent(0); + auto buffer = Kokkos::View("buffer", n_alive); + Kokkos::parallel_for( + "PopulateBufferAlive", + n_alive, + Lambda(const std::size_t p) { + buffer(p) = arr(indices_alive(p)); + }); + + Kokkos::parallel_for( + "CopyBufferToArr", + n_alive, + Lambda(const std::size_t p) { + arr(p) = buffer(p); + }); + return; + } + /* Function to remove dead particles from the domain @@ -989,33 +1015,33 @@ namespace ntt { std::cout << "Rank: " << rank << std::endl; std::cout << "Total alive: " << total_alive_ << std::endl; std::cout << "Total dead: " << total_dead_ << std::endl; - std::cout << "Total particles: " << npart << std::endl; + std::cout << "Total particles: " << npart_ << std::endl; for (std::size_t i { 0 }; i < species.ntags(); i++) { - std::cout << "Tag: " << i << " count: " << npart_per_tag_arr[i] << std::endl; + std::cout << "Tag: " << i << " count: " << npart_per_tag_arr_[i] << std::endl; } } MPI_Barrier(MPI_COMM_WORLD); } } // Get the indices of all alive particles - auto &this_ux1 = species.ux1; - auto &this_ux2 = species.ux2; - auto &this_ux3 = species.ux3; - auto &this_weight = species.weight; - auto &this_phi = species.phi; - auto &this_i1 = species.i1; - auto &this_i1_prev = species.i1_prev; - auto &this_i2 = species.i2; - auto &this_i3 = species.i3; - auto &this_i2_prev = species.i2_prev; - auto &this_i3_prev = species.i3_prev; - auto &this_dx1 = species.dx1; - auto &this_dx1_prev = species.dx1_prev; - auto &this_dx2 = species.dx2; - auto &this_dx3 = species.dx3; - auto &this_dx2_prev = species.dx2_prev; - auto &this_dx3_prev = species.dx3_prev; - auto &this_tag = species.tag; + auto &this_i1 = species.i1; + auto &this_i2 = species.i2; + auto &this_i3 = species.i3; + auto &this_i1_prev = species.i1_prev; + auto &this_i2_prev = species.i2_prev; + auto &this_i3_prev = species.i3_prev; + auto &this_dx1 = species.dx1; + auto &this_dx2 = species.dx2; + auto &this_dx3 = species.dx3; + auto &this_dx1_prev = species.dx1_prev; + auto &this_dx2_prev = species.dx2_prev; + auto &this_dx3_prev = species.dx3_prev; + auto &this_ux1 = species.ux1; + auto &this_ux2 = species.ux2; + auto &this_ux3 = species.ux3; + auto &this_weight = species.weight; + auto &this_phi = species.phi; + auto &this_tag = species.tag; // Find indices of tag = alive particles Kokkos::View indices_alive("indices_alive", total_alive); Kokkos::View alive_counter("counter_alive", 1); @@ -1036,29 +1062,29 @@ namespace ntt { "Error in finding alive particles", HERE); - comm::MoveDeadToEnd(species.i1, indices_alive); - comm::MoveDeadToEnd(species.i1_prev, indices_alive); - comm::MoveDeadToEnd(species.dx1_prev, indices_alive); - comm::MoveDeadToEnd(species.ux1, indices_alive); - comm::MoveDeadToEnd(species.ux2, indices_alive); - comm::MoveDeadToEnd(species.ux3, indices_alive); - comm::MoveDeadToEnd(species.weight, indices_alive); + MoveDeadToEnd(species.i1, indices_alive); + MoveDeadToEnd(species.dx1, indices_alive); + MoveDeadToEnd(species.dx1_prev, indices_alive); + MoveDeadToEnd(species.ux1, indices_alive); + MoveDeadToEnd(species.ux2, indices_alive); + MoveDeadToEnd(species.ux3, indices_alive); + MoveDeadToEnd(species.weight, indices_alive); // Update i2, dx2, i2_prev, dx2_prev if constexpr(D == Dim::_2D || D == Dim::_3D){ - comm::MoveDeadToEnd(species.i2, indices_alive); - comm::MoveDeadToEnd(species.i2_prev, indices_alive); - comm::MoveDeadToEnd(species.dx2, indices_alive); - comm::MoveDeadToEnd(species.dx2_prev, indices_alive); + MoveDeadToEnd(species.i2, indices_alive); + MoveDeadToEnd(species.i2_prev, indices_alive); + MoveDeadToEnd(species.dx2, indices_alive); + MoveDeadToEnd(species.dx2_prev, indices_alive); if constexpr(D == Dim::_2D && M::CoordType != Coord::Cart){ - comm::MoveDeadToEnd(species.phi, indices_alive); + MoveDeadToEnd(species.phi, indices_alive); } } // Update i3, dx3, i3_prev, dx3_prev if constexpr(D == Dim::_3D){ - comm::MoveDeadToEnd(species.i3, indices_alive); - comm::MoveDeadToEnd(species.i3_prev, indices_alive); - comm::MoveDeadToEnd(species.dx3, indices_alive); - comm::MoveDeadToEnd(species.dx3_prev, indices_alive); + MoveDeadToEnd(species.i3, indices_alive); + MoveDeadToEnd(species.i3_prev, indices_alive); + MoveDeadToEnd(species.dx3, indices_alive); + MoveDeadToEnd(species.dx3_prev, indices_alive); } // tags (set first total_alive to alive and rest to dead) Kokkos::parallel_for( @@ -1077,6 +1103,16 @@ namespace ntt { species.set_npart(total_alive); + std::tie(npart_per_tag_arr, + tag_offset) = species.npart_per_tag(); + raise::FatalIf(npart_per_tag_arr[ParticleTag::alive] != total_alive, + "Error in removing dead particles: alive count doesn't match", + HERE); + raise::FatalIf(npart_per_tag_arr[ParticleTag::dead] != 0, + "Error in removing dead particles: not all particles are dead", + HERE); + + { auto [npart_per_tag_arr_, tag_offset_] = species.npart_per_tag(); @@ -1095,9 +1131,9 @@ namespace ntt { std::cout << "Rank: " << rank << std::endl; std::cout << "Total alive: " << total_alive_ << std::endl; std::cout << "Total dead: " << total_dead_ << std::endl; - std::cout << "Total particles: " << npart << std::endl; + std::cout << "Total particles: " << npart_ << std::endl; for (std::size_t i { 0 }; i < species.ntags(); i++) { - std::cout << "Tag: " << i << " count: " << npart_per_tag_arr[i] << std::endl; + std::cout << "Tag: " << i << " count: " << npart_per_tag_arr_[i] << std::endl; } } MPI_Barrier(MPI_COMM_WORLD); From c719c1f8bcee71d9300ed7b537f2a7b6e2a30f70 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 7 Jan 2025 06:59:25 -0500 Subject: [PATCH 26/52] print mpi ranks during sendrecv --- src/framework/domain/comm_mpi.hpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 7b9a22eee..cb3e18caa 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -564,6 +564,31 @@ namespace comm { auto iteration = 0; auto current_received = 0; + + { + // For debugging purposes + // Loop over all mpi processes + int rank, maxranks; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &maxranks); + for (auto i = 0; i < maxranks; ++i) { + MPI_Barrier(MPI_COMM_WORLD); + if (rank == i) { + for (auto &direction : dir::Directions::all){ + const auto send_rank = send_ranks[iteration]; + const auto recv_rank = recv_ranks[iteration]; + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto send_count = npart_per_tag_arr[tag_send]; + const auto recv_count = npart_per_tag_arr_recv[tag_recv]; + printf("Current MPI rank %d, send rank %d recv rank %d, ", rank, + send_rank, recv_rank); + printf("send count %d, recv count %d\n", send_count, recv_count); + } + } + } + } + + for (auto& direction : dir::Directions::all) { const auto send_rank = send_ranks[iteration]; const auto recv_rank = recv_ranks[iteration]; From e43b7162d2f3e8d76df28d841140d75005d66d4e Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 7 Jan 2025 11:53:24 -0500 Subject: [PATCH 27/52] changed communications in fields --- src/framework/domain/comm_mpi.hpp | 216 +++++++++++-------- src/framework/domain/communications.cpp | 265 ++++-------------------- 2 files changed, 173 insertions(+), 308 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index cb3e18caa..1395c8191 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -182,16 +182,13 @@ namespace comm { } } - auto send_fld_h = Kokkos::create_mirror_view(send_fld); - auto recv_fld_h = Kokkos::create_mirror_view(recv_fld); - Kokkos::deep_copy(send_fld_h, send_fld); if (send_rank >= 0 && recv_rank >= 0) { - MPI_Sendrecv(send_fld_h.data(), + MPI_Sendrecv(send_fld.data(), nsend, mpi::get_type(), send_rank, 0, - recv_fld_h.data(), + recv_fld.data(), nrecv, mpi::get_type(), recv_rank, @@ -199,7 +196,7 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if (send_rank >= 0) { - MPI_Send(send_fld_h.data(), + MPI_Send(send_fld.data(), nsend, mpi::get_type(), send_rank, @@ -207,8 +204,7 @@ namespace comm { MPI_COMM_WORLD); } else if (recv_rank >= 0) { - auto recv_fld_h = Kokkos::create_mirror_view(recv_fld); - MPI_Recv(recv_fld_h.data(), + MPI_Recv(recv_fld.data(), nrecv, mpi::get_type(), recv_rank, @@ -218,7 +214,6 @@ namespace comm { } else { raise::Error("CommunicateField called with negative ranks", HERE); } - Kokkos::deep_copy(recv_fld, recv_fld_h); if (recv_rank >= 0) { @@ -297,67 +292,35 @@ namespace comm { const range_tuple_t& recv_slice) { const std::size_t send_count = send_slice.second - send_slice.first; const std::size_t recv_count = recv_slice.second - recv_slice.first; - // Make arrays on the host - auto send_arr_h = Kokkos::create_mirror_view(Kokkos::subview(arr, send_slice)); - Kokkos::deep_copy(send_arr_h, Kokkos::subview(arr, send_slice)); - auto recv_arr_h = Kokkos::create_mirror_view(Kokkos::subview(arr, recv_slice)); if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { - MPI_Sendrecv(send_arr_h.data(), + MPI_Sendrecv(arr.data() + send_slice.first, send_count, mpi::get_type(), send_rank, 0, - recv_arr_h.data(), + arr.data() + recv_slice.first, recv_count, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - //MPI_Sendrecv(arr.data() + send_slice.first, - // send_count, - // mpi::get_type(), - // send_rank, - // 0, - // arr.data() + recv_slice.first, - // recv_count, - // mpi::get_type(), - // recv_rank, - // 0, - // MPI_COMM_WORLD, - // MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { - MPI_Send( send_arr_h.data(), - send_count, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - //MPI_Send(arr.data() + send_slice.first, - // send_count, - // mpi::get_type(), - // send_rank, - // 0, - // MPI_COMM_WORLD); + MPI_Send(arr.data() + send_slice.first, + send_count, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { - MPI_Recv( recv_arr_h.data(), - recv_count, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - //MPI_Recv(arr.data() + recv_slice.first, - // recv_count, - // mpi::get_type(), - // recv_rank, - // 0, - // MPI_COMM_WORLD, - // MPI_STATUS_IGNORE); - } - if ((recv_rank >= 0) and (recv_count > 0)) { - Kokkos::deep_copy(Kokkos::subview(arr, recv_slice), recv_arr_h); + MPI_Recv(arr.data() + recv_slice.first, + recv_count, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); } } @@ -544,6 +507,12 @@ namespace comm { const auto n_alive = npart_per_tag_arr[ParticleTag::alive]; const auto n_dead = npart_per_tag_arr[ParticleTag::dead]; + // Debug test: print send and recv count + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Total send: %d, Total recv: %d \n", rank, total_send, total_recv); + } /* Brief on recv buffers: Each recv buffer contains all the received arrays of a given type. The different physical quantities are stored next to each other @@ -564,31 +533,6 @@ namespace comm { auto iteration = 0; auto current_received = 0; - - { - // For debugging purposes - // Loop over all mpi processes - int rank, maxranks; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &maxranks); - for (auto i = 0; i < maxranks; ++i) { - MPI_Barrier(MPI_COMM_WORLD); - if (rank == i) { - for (auto &direction : dir::Directions::all){ - const auto send_rank = send_ranks[iteration]; - const auto recv_rank = recv_ranks[iteration]; - const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); - const auto send_count = npart_per_tag_arr[tag_send]; - const auto recv_count = npart_per_tag_arr_recv[tag_recv]; - printf("Current MPI rank %d, send rank %d recv rank %d, ", rank, - send_rank, recv_rank); - printf("send count %d, recv count %d\n", send_count, recv_count); - } - } - } - } - - for (auto& direction : dir::Directions::all) { const auto send_rank = send_ranks[iteration]; const auto recv_rank = recv_ranks[iteration]; @@ -729,6 +673,12 @@ namespace comm { if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and (recv_count > 0)) { + // Debug: Print the rank and type of mpi operation performed + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Performing sendrecv operation \n", rank); + } MPI_Sendrecv(send_buffer_int_h.data(), send_count * NINTS, mpi::get_type(), @@ -778,6 +728,12 @@ namespace comm { MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if ((send_rank >= 0) and (send_count > 0)) { + // Debug: Print the rank and type of mpi operation performed + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Performing send operation \n", rank); + } MPI_Send(send_buffer_int_h.data(), send_count * NINTS, mpi::get_type(), @@ -803,6 +759,12 @@ namespace comm { 0, MPI_COMM_WORLD); } else if ((recv_rank >= 0) and (recv_count > 0)) { + // Debug: Print the rank and type of mpi operation performed + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //printf("MPI rank: %d, Performing recv operation \n", rank); + } MPI_Recv(recv_buffer_int_h.data() + receive_offset_int, recv_count * NINTS, mpi::get_type(), @@ -834,8 +796,59 @@ namespace comm { } current_received += recv_count; iteration++; - + // Debug test: Print recv buffer before and after + /* + { + int total_ranks; + MPI_Comm_size(MPI_COMM_WORLD, &total_ranks); + for (int allranks=0; allranks current_offset("current_offset", species.ntags()); auto &this_tag_offset = tag_offset; + auto n_alive = npart_per_tag_arr[ParticleTag::alive]; + if constexpr (D == Dim::_1D){ Kokkos::parallel_for( "PermuteVector and Displace", @@ -779,7 +781,7 @@ namespace ntt { // tag = 1->N (excluding dead and alive) else{ const auto idx_permute_vec = this_tag_offset(current_tag) - - total_alive + + n_alive + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -808,7 +810,7 @@ namespace ntt { // tag = 1->N (excluding dead and alive) else{ const auto idx_permute_vec = this_tag_offset(current_tag) - - total_alive + + n_alive + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -839,7 +841,7 @@ namespace ntt { // tag = 1->N (excluding dead and alive) else{ const auto idx_permute_vec = this_tag_offset(current_tag) - - total_alive + + n_alive + Kokkos::atomic_fetch_add( ¤t_offset(current_tag), 1); @@ -913,235 +915,48 @@ namespace ntt { }); } - // Communicate the arrays - comm::CommunicateParticlesBuffer(species, permute_vector, allocation_vector, - this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, - send_ranks, recv_ranks); -#endif - } - } - - -/* - Function to copy the alive particle data the arrays to a buffer and then back - to the particle arrays -*/ - template - void MoveDeadToEnd(array_t& arr, - Kokkos::View indices_alive) { - auto n_alive = indices_alive.extent(0); - auto buffer = Kokkos::View("buffer", n_alive); - Kokkos::parallel_for( - "PopulateBufferAlive", - n_alive, - Lambda(const std::size_t p) { - buffer(p) = arr(indices_alive(p)); - }); - - Kokkos::parallel_for( - "CopyBufferToArr", - n_alive, - Lambda(const std::size_t p) { - arr(p) = buffer(p); - }); - return; - } - - /* - Function to remove dead particles from the domain - - Consider the following particle quantity array - <---xxx---x---xx---xx-----------xx----x--> (qty) - - = alive - x = dead - ntot = nalive + ndead - - (1) Copy all alive particle data to buffer - <---xxx---x---xx---xx-----------xx----x--> (qty) - | - | - v - <--------------------------> buffer - (nalive) - - (2) Copy from buffer to the beginning of the array - overwritting all particles - <--------------------------> buffer - (nalive) - | - | - v - <--------------------------xx----x--> (qty) - ^ - (nalive) - - (3) Set npart to nalive - */ - template - void Metadomain::RemoveDeadParticles(Domain& domain, - timer::Timers* timers){ - for (auto& species : domain.species) { - auto [npart_per_tag_arr, - tag_offset] = species.npart_per_tag(); - const auto npart = static_cast(species.npart()); - const auto total_alive = static_cast( - npart_per_tag_arr[ParticleTag::alive]); - const auto total_dead = static_cast( - npart_per_tag_arr[ParticleTag::dead]); - - // Check that only alive and dead particles are present - for (std::size_t i { 0 }; i < species.ntags(); i++) { - if (i != ParticleTag::alive && i != ParticleTag::dead){ - raise::FatalIf(npart_per_tag_arr[i] != 0, - "Particle tags can only be dead or alive at this point", - HERE); - } - } + /* + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == 1 && species.label() == "e+_b") { - auto [npart_per_tag_arr_, - tag_offset_] = species.npart_per_tag(); - auto npart_ = static_cast(species.npart()); - auto total_alive_ = static_cast( - npart_per_tag_arr_[ParticleTag::alive]); - auto total_dead_ = static_cast( - npart_per_tag_arr_[ParticleTag::dead]); + // Copy the tag array to host + auto tag_h = Kokkos::create_mirror_view(species.tag); + Kokkos::deep_copy(tag_h, species.tag); + std::cout << "Tag locs before send" << std::endl; + for (std::size_t i { 0 }; i < species.npart(); i++) { + if (tag_h(i) != ParticleTag::alive) + std::cout <<" Tag: " << tag_h(i) << " loc: "<< i << std::endl; + } - int rank, totranks; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &totranks); - for (std::size_t current_rank=0; current_rank indices_alive("indices_alive", total_alive); - Kokkos::View alive_counter("counter_alive", 1); - Kokkos::deep_copy(alive_counter, 0); - Kokkos::parallel_for( - "Indices of Alive Particles", - species.npart(), - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); - indices_alive(idx) = p; + // Print the permute vector as well + auto permute_vector_h = Kokkos::create_mirror_view(permute_vector); + Kokkos::deep_copy(permute_vector_h, permute_vector); + for (std::size_t i { 0 }; i < total_holes; ++i) { + std::cout << "Rank: " << rank << " Permuted vector: " << permute_vector_h(i) << + " tag: " << tag_h(permute_vector_h(i)) << std::endl; } - }); - // Sanity check: alive_counter must be equal to total_alive - auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); - Kokkos::deep_copy(alive_counter_h, alive_counter); - raise::FatalIf(alive_counter_h(0) != total_alive, - "Error in finding alive particles", - HERE); - - MoveDeadToEnd(species.i1, indices_alive); - MoveDeadToEnd(species.dx1, indices_alive); - MoveDeadToEnd(species.dx1_prev, indices_alive); - MoveDeadToEnd(species.ux1, indices_alive); - MoveDeadToEnd(species.ux2, indices_alive); - MoveDeadToEnd(species.ux3, indices_alive); - MoveDeadToEnd(species.weight, indices_alive); - // Update i2, dx2, i2_prev, dx2_prev - if constexpr(D == Dim::_2D || D == Dim::_3D){ - MoveDeadToEnd(species.i2, indices_alive); - MoveDeadToEnd(species.i2_prev, indices_alive); - MoveDeadToEnd(species.dx2, indices_alive); - MoveDeadToEnd(species.dx2_prev, indices_alive); - if constexpr(D == Dim::_2D && M::CoordType != Coord::Cart){ - MoveDeadToEnd(species.phi, indices_alive); } - } - // Update i3, dx3, i3_prev, dx3_prev - if constexpr(D == Dim::_3D){ - MoveDeadToEnd(species.i3, indices_alive); - MoveDeadToEnd(species.i3_prev, indices_alive); - MoveDeadToEnd(species.dx3, indices_alive); - MoveDeadToEnd(species.dx3_prev, indices_alive); - } - // tags (set first total_alive to alive and rest to dead) - Kokkos::parallel_for( - "Make tags alive", - total_alive, - Lambda(index_t p) { - this_tag(p) = ParticleTag::alive; - }); - - Kokkos::parallel_for( - "Make tags dead", - total_dead, - Lambda(index_t p) { - this_tag(total_alive + p) = ParticleTag::dead; - }); - - species.set_npart(total_alive); - - std::tie(npart_per_tag_arr, - tag_offset) = species.npart_per_tag(); - raise::FatalIf(npart_per_tag_arr[ParticleTag::alive] != total_alive, - "Error in removing dead particles: alive count doesn't match", - HERE); - raise::FatalIf(npart_per_tag_arr[ParticleTag::dead] != 0, - "Error in removing dead particles: not all particles are dead", - HERE); - - - { - auto [npart_per_tag_arr_, - tag_offset_] = species.npart_per_tag(); - auto npart_ = static_cast(species.npart()); - auto total_alive_ = static_cast( - npart_per_tag_arr_[ParticleTag::alive]); - auto total_dead_ = static_cast( - npart_per_tag_arr_[ParticleTag::dead]); - - int rank, totranks; + */ + { + int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &totranks); - for (std::size_t current_rank=0; current_rank(species, permute_vector, allocation_vector, + this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, + send_ranks, recv_ranks); +#endif + } } template struct Metadomain>; From 42a6ea2668e31b58b0830ad6f48d4cfffa7024b8 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Tue, 7 Jan 2025 12:13:53 -0500 Subject: [PATCH 28/52] small change in metadomain header --- src/engines/srpic.hpp | 9 --------- src/framework/domain/metadomain.h | 1 - 2 files changed, 10 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index fd1ca226a..d751c712a 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -175,15 +175,6 @@ namespace ntt { ParticleInjector(dom); timers.stop("Injector"); } - - if (step % 1 == 0 && step > 0){ - MPI_Barrier(MPI_COMM_WORLD); - timers.start("RemoveDead"); - m_metadomain.RemoveDeadParticles(dom, &timers); - timers.stop("RemoveDead"); - MPI_Barrier(MPI_COMM_WORLD); - } - } /* algorithm substeps --------------------------------------------------- */ diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 6bd3d29d8..9e94bf89f 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -91,7 +91,6 @@ namespace ntt { void CommunicateParticles(Domain&, timer::Timers*); void CommunicateParticlesBuffer(Domain&, timer::Timers*); void SetParticleIDs(Domain&); - void RemoveDeadParticles(Domain&, timer::Timers* ); /** * @param global_ndomains total number of domains From a5704740678b49aa7c819754d67cb3df8ebb07b8 Mon Sep 17 00:00:00 2001 From: Sasha Chernoglazov Date: Wed, 8 Jan 2025 20:25:15 -0500 Subject: [PATCH 29/52] correct communications with boundaries --- src/framework/domain/comm_mpi.hpp | 56 +++++++++++++------------ src/framework/domain/communications.cpp | 11 ++--- src/global/utils/progressbar.cpp | 4 +- 3 files changed, 35 insertions(+), 36 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 1395c8191..ce38a8261 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -53,7 +53,6 @@ namespace comm { (recv_rank == rank && recv_idx != idx), "Multiple-domain single-rank communication not yet implemented", HERE); - if ((send_idx == idx) and (recv_idx == idx)) { // trivial copy if sending to self and receiving from self @@ -456,7 +455,8 @@ namespace comm { std::vector npart_per_tag_arr, std::vector npart_per_tag_arr_recv, std::vector send_ranks, - std::vector recv_ranks) { + std::vector recv_ranks, + const dir::dirs_t& legal_directions) { // Pointers to the particle data arrays auto &this_ux1 = species.ux1; auto &this_ux2 = species.ux2; @@ -533,13 +533,17 @@ namespace comm { auto iteration = 0; auto current_received = 0; - for (auto& direction : dir::Directions::all) { + for (const auto& direction : legal_directions) { const auto send_rank = send_ranks[iteration]; const auto recv_rank = recv_ranks[iteration]; const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); const auto send_count = npart_per_tag_arr[tag_send]; const auto recv_count = npart_per_tag_arr_recv[tag_recv]; + { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + } if (send_rank < 0 and recv_rank < 0) { continue; } @@ -677,50 +681,50 @@ namespace comm { { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing sendrecv operation \n", rank); + //printf("MPI rank: %d, Performing sendrecv operation, direction %d \n", rank, direction); } - MPI_Sendrecv(send_buffer_int_h.data(), + MPI_Sendrecv(send_buffer_int.data(), send_count * NINTS, mpi::get_type(), send_rank, 0, - recv_buffer_int_h.data() + receive_offset_int, + recv_buffer_int.data() + receive_offset_int, recv_count*NINTS, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buffer_real_h.data(), + MPI_Sendrecv(send_buffer_real.data(), send_count * NREALS, mpi::get_type(), send_rank, 0, - recv_buffer_real_h.data() + receive_offset_real, + recv_buffer_real.data() + receive_offset_real, recv_count*NREALS, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buffer_prtldx_h.data(), + MPI_Sendrecv(send_buffer_prtldx.data(), send_count * NFLOATS, mpi::get_type(), send_rank, 0, - recv_buffer_prtldx_h.data() + receive_offset_prtldx, + recv_buffer_prtldx.data() + receive_offset_prtldx, recv_count*NFLOATS, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buffer_long_h.data(), + MPI_Sendrecv(send_buffer_long.data(), send_count * NLONGS, mpi::get_type(), send_rank, 0, - recv_buffer_long_h.data() + receive_offset_long, + recv_buffer_long.data() + receive_offset_long, recv_count*NLONGS, mpi::get_type(), recv_rank, @@ -732,61 +736,61 @@ namespace comm { { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing send operation \n", rank); + //printf("MPI rank: %d, Performing send operation, direction %d \n", rank, direction); } - MPI_Send(send_buffer_int_h.data(), + MPI_Send(send_buffer_int.data(), send_count * NINTS, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); - MPI_Send(send_buffer_real_h.data(), + MPI_Send(send_buffer_real.data(), send_count * NREALS, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); - MPI_Send(send_buffer_prtldx_h.data(), + MPI_Send(send_buffer_prtldx.data(), send_count * NFLOATS, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); - MPI_Send(send_buffer_long_h.data(), + MPI_Send(send_buffer_long.data(), send_count * NLONGS, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); - } else if ((recv_rank >= 0) and (recv_count > 0)) { + } else if ((recv_rank >= 0) and (recv_count > 0)) { // Debug: Print the rank and type of mpi operation performed { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing recv operation \n", rank); + //printf("MPI rank: %d, Performing recv operation, direction %d \n", rank, direction); } - MPI_Recv(recv_buffer_int_h.data() + receive_offset_int, + MPI_Recv(recv_buffer_int.data() + receive_offset_int, recv_count * NINTS, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(recv_buffer_real_h.data() + receive_offset_real, + MPI_Recv(recv_buffer_real.data() + receive_offset_real, recv_count * NREALS, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(recv_buffer_prtldx_h.data() + receive_offset_prtldx, + MPI_Recv(recv_buffer_prtldx.data() + receive_offset_prtldx, recv_count * NFLOATS, mpi::get_type(), recv_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(recv_buffer_long_h.data() + receive_offset_long, + MPI_Recv(recv_buffer_long.data() + receive_offset_long, recv_count * NLONGS, mpi::get_type(), recv_rank, @@ -850,9 +854,9 @@ namespace comm { */ } // end over direction loop - Kokkos::deep_copy(recv_buffer_int, recv_buffer_int_h); + /*Kokkos::deep_copy(recv_buffer_int, recv_buffer_int_h); Kokkos::deep_copy(recv_buffer_real, recv_buffer_real_h); - Kokkos::deep_copy(recv_buffer_prtldx, recv_buffer_prtldx_h); + Kokkos::deep_copy(recv_buffer_prtldx, recv_buffer_prtldx_h);*/ if constexpr (D == Dim::_1D) { Kokkos::parallel_for( @@ -949,10 +953,8 @@ namespace comm { this_particleID(idx) = recv_buffer_long(NLONGS * p + 0); }); } - species.set_npart(species.npart() + std::max(permute_vector.extent(0), allocation_vector.extent(0)) - permute_vector.extent(0)); - /* { int rank; diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index d1ce06609..dc62338f7 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -686,6 +686,7 @@ namespace ntt { auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); + dir::dirs_t legal_directions; // Get receive counts + displacements for (auto& direction : dir::Directions::all) { @@ -703,6 +704,7 @@ namespace ntt { const auto nsend = npart_per_tag_arr[tag_send]; std::size_t nrecv = 0; + legal_directions.push_back(direction); send_ranks.push_back(send_rank); recv_ranks.push_back(recv_rank); send_inds.push_back(send_ind); @@ -945,16 +947,11 @@ namespace ntt { } } */ - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - std::cout << "Rank: " << rank << " Total sent: " << total_holes - total_dead << " Total recv: " << total_recv << std::endl; - } - + // Communicate the arrays comm::CommunicateParticlesBuffer(species, permute_vector, allocation_vector, this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, - send_ranks, recv_ranks); + send_ranks, recv_ranks, legal_directions); #endif } } diff --git a/src/global/utils/progressbar.cpp b/src/global/utils/progressbar.cpp index 38f65a790..74f952382 100644 --- a/src/global/utils/progressbar.cpp +++ b/src/global/utils/progressbar.cpp @@ -52,10 +52,10 @@ namespace pbar { } auto to_human_readable(long double t, const std::string& u) -> std::string { - const auto [tt, tu] = std::pair{t, u};//normalize_duration_fmt(t, u); + const auto [tt, tu] = normalize_duration_fmt(t, u); const auto t1 = static_cast(tt); const auto t2 = tt - static_cast(t1); - const auto [tt2, tu2] = std::pair{t2, tu};//normalize_duration_fmt(t2, tu); + const auto [tt2, tu2] = normalize_duration_fmt(t2, tu); return fmt::format("%d%s %d%s", t1, tu.c_str(), static_cast(tt2), tu2.c_str()); } From d63595f3a326eed5fb27268bb02af15b93f98a4a Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Wed, 8 Jan 2025 22:47:55 -0500 Subject: [PATCH 30/52] added dead particle function --- src/engines/srpic.hpp | 8 ++ src/framework/domain/communications.cpp | 170 ++++++++++++++++++++++++ 2 files changed, 178 insertions(+) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index d751c712a..0489d8508 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -175,6 +175,14 @@ namespace ntt { ParticleInjector(dom); timers.stop("Injector"); } + + if (step % 100 == 0 && step > 0){ + MPI_Barrier(MPI_COMM_WORLD); + timers.start("RemoveDead"); + m_metadomain.RemoveDeadParticles(dom, &timers); + timers.stop("RemoveDead"); + MPI_Barrier(MPI_COMM_WORLD); + } } /* algorithm substeps --------------------------------------------------- */ diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index dc62338f7..390c27fa8 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -956,6 +956,176 @@ namespace ntt { } } + /* + Function to copy the alive particle data the arrays to a buffer and then back + to the particle arrays +*/ + template + void MoveDeadToEnd(array_t& arr, + Kokkos::View indices_alive) { + auto n_alive = indices_alive.extent(0); + auto buffer = Kokkos::View("buffer", n_alive); + Kokkos::parallel_for( + "PopulateBufferAlive", + n_alive, + Lambda(const std::size_t p) { + buffer(p) = arr(indices_alive(p)); + }); + + Kokkos::parallel_for( + "CopyBufferToArr", + n_alive, + Lambda(const std::size_t p) { + arr(p) = buffer(p); + }); + return; + } + + /* + Function to remove dead particles from the domain + + Consider the following particle quantity array + <---xxx---x---xx---xx-----------xx----x--> (qty) + - = alive + x = dead + ntot = nalive + ndead + + (1) Copy all alive particle data to buffer + <---xxx---x---xx---xx-----------xx----x--> (qty) + | + | + v + <--------------------------> buffer + (nalive) + + (2) Copy from buffer to the beginning of the array + overwritting all particles + <--------------------------> buffer + (nalive) + | + | + v + <--------------------------xx----x--> (qty) + ^ + (nalive) + + (3) Set npart to nalive + */ + template + void Metadomain::RemoveDeadParticles(Domain& domain, + timer::Timers* timers){ + for (auto& species : domain.species) { + auto [npart_per_tag_arr, + tag_offset] = species.npart_per_tag(); + const auto npart = static_cast(species.npart()); + const auto total_alive = static_cast( + npart_per_tag_arr[ParticleTag::alive]); + const auto total_dead = static_cast( + npart_per_tag_arr[ParticleTag::dead]); + + // Check that only alive and dead particles are present + for (std::size_t i { 0 }; i < species.ntags(); i++) { + if (i != ParticleTag::alive && i != ParticleTag::dead){ + raise::FatalIf(npart_per_tag_arr[i] != 0, + "Particle tags can only be dead or alive at this point", + HERE); + } + } + + // Get the indices of all alive particles + auto &this_i1 = species.i1; + auto &this_i2 = species.i2; + auto &this_i3 = species.i3; + auto &this_i1_prev = species.i1_prev; + auto &this_i2_prev = species.i2_prev; + auto &this_i3_prev = species.i3_prev; + auto &this_dx1 = species.dx1; + auto &this_dx2 = species.dx2; + auto &this_dx3 = species.dx3; + auto &this_dx1_prev = species.dx1_prev; + auto &this_dx2_prev = species.dx2_prev; + auto &this_dx3_prev = species.dx3_prev; + auto &this_ux1 = species.ux1; + auto &this_ux2 = species.ux2; + auto &this_ux3 = species.ux3; + auto &this_weight = species.weight; + auto &this_phi = species.phi; + auto &this_tag = species.tag; + // Find indices of tag = alive particles + Kokkos::View indices_alive("indices_alive", total_alive); + Kokkos::View alive_counter("counter_alive", 1); + Kokkos::deep_copy(alive_counter, 0); + Kokkos::parallel_for( + "Indices of Alive Particles", + species.npart(), + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive){ + const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); + indices_alive(idx) = p; + } + }); + // Sanity check: alive_counter must be equal to total_alive + auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); + Kokkos::deep_copy(alive_counter_h, alive_counter); + raise::FatalIf(alive_counter_h(0) != total_alive, + "Error in finding alive particles", + HERE); + + MoveDeadToEnd(species.i1, indices_alive); + MoveDeadToEnd(species.dx1, indices_alive); + MoveDeadToEnd(species.dx1_prev, indices_alive); + MoveDeadToEnd(species.ux1, indices_alive); + MoveDeadToEnd(species.ux2, indices_alive); + MoveDeadToEnd(species.ux3, indices_alive); + MoveDeadToEnd(species.weight, indices_alive); + // Update i2, dx2, i2_prev, dx2_prev + if constexpr(D == Dim::_2D || D == Dim::_3D){ + MoveDeadToEnd(species.i2, indices_alive); + MoveDeadToEnd(species.i2_prev, indices_alive); + MoveDeadToEnd(species.dx2, indices_alive); + MoveDeadToEnd(species.dx2_prev, indices_alive); + if constexpr(D == Dim::_2D && M::CoordType != Coord::Cart){ + MoveDeadToEnd(species.phi, indices_alive); + } + } + // Update i3, dx3, i3_prev, dx3_prev + if constexpr(D == Dim::_3D){ + MoveDeadToEnd(species.i3, indices_alive); + MoveDeadToEnd(species.i3_prev, indices_alive); + MoveDeadToEnd(species.dx3, indices_alive); + MoveDeadToEnd(species.dx3_prev, indices_alive); + } + // tags (set first total_alive to alive and rest to dead) + Kokkos::parallel_for( + "Make tags alive", + total_alive, + Lambda(index_t p) { + this_tag(p) = ParticleTag::alive; + }); + + Kokkos::parallel_for( + "Make tags dead", + total_dead, + Lambda(index_t p) { + this_tag(total_alive + p) = ParticleTag::dead; + }); + + species.set_npart(total_alive); + + std::tie(npart_per_tag_arr, + tag_offset) = species.npart_per_tag(); + raise::FatalIf(npart_per_tag_arr[ParticleTag::alive] != total_alive, + "Error in removing dead particles: alive count doesn't match", + HERE); + raise::FatalIf(npart_per_tag_arr[ParticleTag::dead] != 0, + "Error in removing dead particles: not all particles are dead", + HERE); + + } + + return; + } + template struct Metadomain>; template struct Metadomain>; template struct Metadomain>; From 8fc0cd6dd2137e0ccd0b69f66ade66d3b9c12480 Mon Sep 17 00:00:00 2001 From: Siddhant Solanki Date: Wed, 8 Jan 2025 23:28:45 -0500 Subject: [PATCH 31/52] added header --- src/framework/domain/metadomain.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 9e94bf89f..9e2c2bb9d 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -91,6 +91,7 @@ namespace ntt { void CommunicateParticles(Domain&, timer::Timers*); void CommunicateParticlesBuffer(Domain&, timer::Timers*); void SetParticleIDs(Domain&); + void RemoveDeadParticles(Domain& ,timer::Timers* ); /** * @param global_ndomains total number of domains From 0ef23120b66a85dc4a3a2feb6303b4eb76185e03 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 22 Jan 2025 15:19:34 -0500 Subject: [PATCH 32/52] hdf5_root -- optionally set --- CMakeLists.txt | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2618a0cb2..efd240993 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,23 +101,12 @@ if(${output}) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/adios2Config.cmake) find_or_fetch_dependency(adios2 FALSE) if(NOT DEFINED ENV{HDF5_ROOT}) - set(USE_CUSTOM_HDF5 OFF) if(DEFINED ENV{CONDA_PREFIX}) execute_process(COMMAND bash -c "conda list | grep \"hdf5\" -q" RESULT_VARIABLE HDF5_INSTALLED) if(HDF5_INSTALLED EQUAL 0) set(HDF5_ROOT $ENV{CONDA_PREFIX}) - else() - set(USE_CUSTOM_HDF5 ON) endif() - else() - set(USE_CUSTOM_HDF5 ON) - endif() - if(USE_CUSTOM_HDF5) - message( - FATAL_ERROR - "HDF5_ROOT is not set. Please set it to the root of the HDF5 installation" - ) endif() endif() find_package(HDF5 REQUIRED) From 16a4086ccd1f8ef8fb54fc02d2682b11bc8693c0 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 22 Jan 2025 15:20:30 -0500 Subject: [PATCH 33/52] sort_interval -> clear_interval --- input.example.toml | 13 ++- setups/srpic/blob/blob.toml | 40 ++++---- setups/srpic/em_vacuum/em_vacuum.toml | 22 ++--- setups/srpic/langmuir/langmuir.toml | 36 +++---- setups/srpic/magnetar/magnetar.toml | 96 +++++++++---------- setups/srpic/magnetosphere/magnetosphere.toml | 58 +++++------ setups/srpic/monopole/monopole.toml | 56 +++++------ setups/srpic/shock/shock.toml | 30 +++--- setups/srpic/turbulence/turbulence.toml | 34 +++---- setups/srpic/weibel/weibel.toml | 48 +++++----- 10 files changed, 216 insertions(+), 217 deletions(-) diff --git a/input.example.toml b/input.example.toml index 5ee34d65d..2f6d2b285 100644 --- a/input.example.toml +++ b/input.example.toml @@ -105,7 +105,7 @@ # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]) [["ATMOSPHERE", "ABSORB"]] # @note: In GR, the horizon boundary is set automatically (only specify bc @ rmax): [["ABSORB"]] particles = "" - + [grid.boundaries.absorb] # Size of the absorption layer in physical (code) units: # @type: float @@ -119,7 +119,7 @@ coeff = "" [grid.boundaries.atmosphere] - # @required: if ATMOSPHERE is one of the boundaries + # @required: if ATMOSPHERE is one of the boundaries # Temperature of the atmosphere in units of m0 c^2 # @type: float temperature = "" @@ -210,7 +210,7 @@ # @type: float: ~1 # @default: 1.0 correction = "" - + # @inferred: # - dt [= CFL * dx0] # @brief: timestep duration @@ -252,12 +252,11 @@ # @type: bool # @default: false use_weights = "" - # Timesteps between particle re-sorting: + # Timesteps between particle re-sorting (removing dead particles): # @type: unsigned int # @default: 100 - # @note: When MPI is enable, particles are sorted every step. - # @note: When `sort_interval` == 0, the sorting is disabled. - sort_interval = "" + # @note: set to 0 to disable re-sorting + clear_interval = "" # @inferred: # - nspec diff --git a/setups/srpic/blob/blob.toml b/setups/srpic/blob/blob.toml index 7c03b1f9e..7a047f348 100644 --- a/setups/srpic/blob/blob.toml +++ b/setups/srpic/blob/blob.toml @@ -1,24 +1,24 @@ [simulation] - name = "blob" - engine = "srpic" + name = "blob" + engine = "srpic" runtime = 100.0 [simulation.domain] - decomposition = [2,1,1] + decomposition = [2, 1, 1] [grid] resolution = [1024, 1024] - extent = [[-10.0, 10.0], [-10.0, 10.0]] + extent = [[-10.0, 10.0], [-10.0, 10.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 1.0 + larmor0 = 1.0 skindepth0 = 1.0 [algorithms] @@ -31,26 +31,26 @@ ppc0 = 16.0 [[particles.species]] - label = "e-_p" - mass = 1.0 - charge = -1.0 + label = "e-_p" + mass = 1.0 + charge = -1.0 maxnpart = 1e7 [[particles.species]] - label = "e+_p" - mass = 1.0 - charge = 1.0 + label = "e+_p" + mass = 1.0 + charge = 1.0 maxnpart = 1e7 [setup] - temp_1 = 1e-4 - x1c = -5.0 - x2c = 0.0 - v_max = 50.0 - dr = 1.0 - + temp_1 = 1e-4 + x1c = -5.0 + x2c = 0.0 + v_max = 50.0 + dr = 1.0 + [output] - format = "hdf5" + format = "hdf5" interval_time = 1.0 [output.fields] diff --git a/setups/srpic/em_vacuum/em_vacuum.toml b/setups/srpic/em_vacuum/em_vacuum.toml index 156c8d308..23381b1c6 100644 --- a/setups/srpic/em_vacuum/em_vacuum.toml +++ b/setups/srpic/em_vacuum/em_vacuum.toml @@ -1,21 +1,21 @@ [simulation] - name = "em_vacuum" - engine = "srpic" + name = "em_vacuum" + engine = "srpic" runtime = 2.0 [grid] resolution = [256, 512] - extent = [[-1.0, 1.0], [-2.0, 2.0]] + extent = [[-1.0, 1.0], [-2.0, 2.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 0.1 + larmor0 = 0.1 skindepth0 = 0.01 [algorithms] @@ -28,12 +28,12 @@ [setup] amplitude = 1.0 - kx1 = 1 - kx2 = 1 - kx3 = 0 - + kx1 = 1 + kx2 = 1 + kx3 = 0 + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.1 [output.fields] diff --git a/setups/srpic/langmuir/langmuir.toml b/setups/srpic/langmuir/langmuir.toml index 2f3520fc5..b054a940d 100644 --- a/setups/srpic/langmuir/langmuir.toml +++ b/setups/srpic/langmuir/langmuir.toml @@ -1,21 +1,21 @@ [simulation] - name = "langmuir" - engine = "srpic" + name = "langmuir" + engine = "srpic" runtime = 1.0 [grid] resolution = [2048, 512] - extent = [[0.0, 1.0], [0.0, 0.25]] + extent = [[0.0, 1.0], [0.0, 0.25]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 0.1 + larmor0 = 0.1 skindepth0 = 0.01 [algorithms] @@ -28,24 +28,24 @@ ppc0 = 14.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e7 + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e7 + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 [setup] vmax = 0.1 - nx1 = 4 - nx2 = 2 - + nx1 = 4 + nx2 = 2 + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.0025 [output.fields] diff --git a/setups/srpic/magnetar/magnetar.toml b/setups/srpic/magnetar/magnetar.toml index 2a2260af5..fab2eb01c 100644 --- a/setups/srpic/magnetar/magnetar.toml +++ b/setups/srpic/magnetar/magnetar.toml @@ -1,17 +1,17 @@ [simulation] - name = "magnetar" - engine = "srpic" + name = "magnetar" + engine = "srpic" runtime = 50.0 [grid] - resolution = [2048,1024] - extent = [[1.0, 400.0]] + resolution = [2048, 1024] + extent = [[1.0, 400.0]] [grid.metric] metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] [grid.boundaries.absorb] @@ -19,13 +19,13 @@ [grid.boundaries.atmosphere] temperature = 0.1 - density = 40.0 - height = 0.02 - species = [1, 2] - ds = 0.5 + density = 40.0 + height = 0.02 + species = [1, 2] + ds = 0.5 [scales] - larmor0 = 1e-5 + larmor0 = 1e-5 skindepth0 = 0.01 [algorithms] @@ -36,59 +36,59 @@ [algorithms.gca] e_ovr_b_max = 0.9 - larmor_max = 100.0 + larmor_max = 100.0 [particles] - ppc0 = 4.0 - use_weights = true - sort_interval = 100 + ppc0 = 4.0 + use_weights = true + clear_interval = 100 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [setup] - Bsurf = 1.0 - omega = 0.0125 - pp_thres = 10.0 + Bsurf = 1.0 + omega = 0.0125 + pp_thres = 10.0 gamma_pairs = 1.75 [output] @@ -96,7 +96,7 @@ [output.fields] interval_time = 0.5 - quantities = ["N_1", "N_2", "N_3", "N_4", "N_5", "N_6", "B", "E", "J"] + quantities = ["N_1", "N_2", "N_3", "N_4", "N_5", "N_6", "B", "E", "J"] [output.particles] enable = false diff --git a/setups/srpic/magnetosphere/magnetosphere.toml b/setups/srpic/magnetosphere/magnetosphere.toml index 34e04b02d..4c7c9117d 100644 --- a/setups/srpic/magnetosphere/magnetosphere.toml +++ b/setups/srpic/magnetosphere/magnetosphere.toml @@ -1,31 +1,31 @@ [simulation] - name = "magnetosphere" - engine = "srpic" + name = "magnetosphere" + engine = "srpic" runtime = 60.0 [grid] resolution = [2048, 1024] - extent = [[1.0, 50.0]] + extent = [[1.0, 50.0]] [grid.metric] metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] - + [grid.boundaries.absorb] ds = 1.0 [grid.boundaries.atmosphere] temperature = 0.1 - density = 10.0 - height = 0.02 - species = [1, 2] - ds = 2.0 - + density = 10.0 + height = 0.02 + species = [1, 2] + ds = 2.0 + [scales] - larmor0 = 2e-5 + larmor0 = 2e-5 skindepth0 = 0.01 [algorithms] @@ -36,37 +36,37 @@ [algorithms.gca] e_ovr_b_max = 0.9 - larmor_max = 1.0 + larmor_max = 1.0 [particles] - ppc0 = 5.0 - use_weights = true - sort_interval = 100 + ppc0 = 5.0 + use_weights = true + clear_interval = 100 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e8 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e8 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e8 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e8 + pusher = "Boris,GCA" [setup] - Bsurf = 1.0 + Bsurf = 1.0 period = 60.0 [output] format = "hdf5" - + [output.fields] interval_time = 0.1 - quantities = ["N_1", "N_2", "E", "B", "T00"] + quantities = ["N_1", "N_2", "E", "B", "T00"] [output.particles] enable = false @@ -75,5 +75,5 @@ enable = false [diagnostics] - interval = 50 + interval = 50 colored_stdout = true diff --git a/setups/srpic/monopole/monopole.toml b/setups/srpic/monopole/monopole.toml index 169837489..cf735fce8 100644 --- a/setups/srpic/monopole/monopole.toml +++ b/setups/srpic/monopole/monopole.toml @@ -1,31 +1,31 @@ [simulation] - name = "monopole" - engine = "srpic" + name = "monopole" + engine = "srpic" runtime = 60.0 [grid] resolution = [2048, 1024] - extent = [[1.0, 50.0]] + extent = [[1.0, 50.0]] [grid.metric] metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] - + [grid.boundaries.absorb] ds = 1.0 [grid.boundaries.atmosphere] temperature = 0.1 - density = 10.0 - height = 0.02 - species = [1, 2] - ds = 2.0 - + density = 10.0 + height = 0.02 + species = [1, 2] + ds = 2.0 + [scales] - larmor0 = 2e-5 + larmor0 = 2e-5 skindepth0 = 0.01 [algorithms] @@ -36,38 +36,38 @@ [algorithms.gca] e_ovr_b_max = 0.9 - larmor_max = 1.0 + larmor_max = 1.0 [particles] - ppc0 = 5.0 - use_weights = true - sort_interval = 100 + ppc0 = 5.0 + use_weights = true + clear_interval = 100 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 + label = "e-" + mass = 1.0 + charge = -1.0 maxnpart = 1e8 - pusher = "Boris,GCA" + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 + label = "e+" + mass = 1.0 + charge = 1.0 maxnpart = 1e8 - pusher = "Boris,GCA" + pusher = "Boris,GCA" [setup] - Bsurf = 1.0 + Bsurf = 1.0 period = 60.0 [output] format = "hdf5" - + [output.fields] interval_time = 0.1 - quantities = ["N_1", "N_2", "E", "B", "T00"] - mom_smooth = 2 + quantities = ["N_1", "N_2", "E", "B", "T00"] + mom_smooth = 2 [output.particles] enable = false @@ -76,5 +76,5 @@ enable = false [diagnostics] - interval = 50 + interval = 50 colored_stdout = true diff --git a/setups/srpic/shock/shock.toml b/setups/srpic/shock/shock.toml index f48edb2d6..7b2cdde2c 100644 --- a/setups/srpic/shock/shock.toml +++ b/setups/srpic/shock/shock.toml @@ -1,21 +1,21 @@ [simulation] - name = "shock" - engine = "srpic" + name = "shock" + engine = "srpic" runtime = 50.0 [grid] resolution = [2048, 128] - extent = [[0.0, 10.0], [-0.3125, 0.3125]] + extent = [[0.0, 10.0], [-0.3125, 0.3125]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["CONDUCTOR", "ABSORB"], ["PERIODIC"]] + fields = [["CONDUCTOR", "ABSORB"], ["PERIODIC"]] particles = [["REFLECT", "ABSORB"], ["PERIODIC"]] - + [scales] - larmor0 = 1e-2 + larmor0 = 1e-2 skindepth0 = 1e-2 [algorithms] @@ -28,24 +28,24 @@ ppc0 = 16.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 + label = "e-" + mass = 1.0 + charge = -1.0 maxnpart = 1e8 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 + label = "e+" + mass = 1.0 + charge = 1.0 maxnpart = 1e8 [setup] - drift_ux = 0.1 + drift_ux = 0.1 temperature = 1e-3 [output] interval_time = 0.1 - format = "hdf5" - + format = "hdf5" + [output.fields] quantities = ["N_1", "N_2", "E", "B", "T0i_1", "T0i_2", "J"] diff --git a/setups/srpic/turbulence/turbulence.toml b/setups/srpic/turbulence/turbulence.toml index a28afde15..a1f8e29c1 100644 --- a/setups/srpic/turbulence/turbulence.toml +++ b/setups/srpic/turbulence/turbulence.toml @@ -1,21 +1,21 @@ [simulation] - name = "turbulence" - engine = "srpic" + name = "turbulence" + engine = "srpic" runtime = 20.0 [grid] resolution = [184, 184, 184] - extent = [[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]] + extent = [[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 0.02 + larmor0 = 0.02 skindepth0 = 0.02 [algorithms] @@ -28,22 +28,22 @@ ppc0 = 32.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e8 + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e8 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e8 + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e8 [setup] - + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.1 - + [output.fields] quantities = ["N_1", "N_2", "E", "B", "J", "T00_1", "T00_2"] diff --git a/setups/srpic/weibel/weibel.toml b/setups/srpic/weibel/weibel.toml index c8e2506f6..23d119b24 100644 --- a/setups/srpic/weibel/weibel.toml +++ b/setups/srpic/weibel/weibel.toml @@ -1,21 +1,21 @@ [simulation] - name = "weibel" - engine = "srpic" + name = "weibel" + engine = "srpic" runtime = 100.0 [grid] resolution = [512, 512] - extent = [[-10.0, 10.0], [-10.0, 10.0]] + extent = [[-10.0, 10.0], [-10.0, 10.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 1.0 + larmor0 = 1.0 skindepth0 = 1.0 [algorithms] @@ -28,37 +28,37 @@ ppc0 = 16.0 [[particles.species]] - label = "e-_p" - mass = 1.0 - charge = -1.0 + label = "e-_p" + mass = 1.0 + charge = -1.0 maxnpart = 1e7 [[particles.species]] - label = "e+_p" - mass = 1.0 - charge = 1.0 + label = "e+_p" + mass = 1.0 + charge = 1.0 maxnpart = 1e7 [[particles.species]] - label = "e-_b" - mass = 1.0 - charge = -1.0 + label = "e-_b" + mass = 1.0 + charge = -1.0 maxnpart = 1e7 [[particles.species]] - label = "e+_b" - mass = 1.0 - charge = 1.0 + label = "e+_b" + mass = 1.0 + charge = 1.0 maxnpart = 1e7 [setup] - drift_u_1 = 0.2 - drift_u_2 = 0.2 - temp_1 = 1e-4 - temp_2 = 1e-4 - + drift_u_1 = 0.2 + drift_u_2 = 0.2 + temp_1 = 1e-4 + temp_2 = 1e-4 + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.25 [output.fields] From e2644a691eab9107b9cd338f2804e82391576a80 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 22 Jan 2025 15:21:41 -0500 Subject: [PATCH 34/52] rm old sorting + added new comm --- src/engines/engine.hpp | 6 +- src/engines/engine_printer.cpp | 4 +- src/engines/engine_run.cpp | 15 +- src/engines/srpic.hpp | 23 +- src/framework/containers/particles.cpp | 184 +++-- src/framework/containers/particles.h | 23 +- src/framework/domain/comm_mpi.hpp | 895 ++++++------------------ src/framework/domain/communications.cpp | 755 ++++---------------- src/framework/domain/domain.h | 11 +- src/framework/domain/metadomain.cpp | 27 - src/framework/domain/metadomain.h | 6 +- src/framework/domain/output.cpp | 13 +- src/framework/parameters.cpp | 19 +- src/framework/tests/parameters.cpp | 105 +-- src/global/arch/directions.h | 126 ++-- src/global/arch/kokkos_aliases.cpp | 52 +- src/global/defaults.h | 8 +- src/global/global.cpp | 12 +- src/global/global.h | 2 +- src/global/utils/diag.cpp | 11 +- src/global/utils/diag.h | 6 +- src/global/utils/timer.cpp | 13 +- src/kernels/particle_pusher_sr.hpp | 2 +- 23 files changed, 674 insertions(+), 1644 deletions(-) diff --git a/src/engines/engine.hpp b/src/engines/engine.hpp index 5b7caa502..dac553dcd 100644 --- a/src/engines/engine.hpp +++ b/src/engines/engine.hpp @@ -55,10 +55,12 @@ namespace ntt { static_assert(user::PGen::is_pgen, "unrecognized problem generator"); protected: -#if MPI_ENABLED +#if defined(OUTPUT_ENABLED) + #if defined(MPI_ENABLED) adios2::ADIOS m_adios { MPI_COMM_WORLD }; -#else + #else adios2::ADIOS m_adios; + #endif #endif SimulationParams m_params; diff --git a/src/engines/engine_printer.cpp b/src/engines/engine_printer.cpp index 2608ea2f6..4b6ed42d7 100644 --- a/src/engines/engine_printer.cpp +++ b/src/engines/engine_printer.cpp @@ -105,8 +105,8 @@ namespace ntt { color::RESET); } - auto bytes_to_human_readable(std::size_t bytes) - -> std::pair { + auto bytes_to_human_readable( + std::size_t bytes) -> std::pair { const std::vector units { "B", "KB", "MB", "GB", "TB" }; std::size_t unit_idx = 0; auto size = static_cast(bytes); diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index bec5b8652..1db2de2ca 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -26,8 +26,8 @@ namespace ntt { "CurrentFiltering", "CurrentDeposit", "ParticlePusher", "FieldBoundaries", "ParticleBoundaries", "Communications", - "Injector", "Sorting", - "Custom", "Output", + "Injector", "Custom", + "PrtlClear", "Output", "Checkpoint" }, []() { Kokkos::fence(); @@ -37,9 +37,9 @@ namespace ntt { const auto diag_interval = m_params.get( "diagnostics.interval"); - auto time_history = pbar::DurationHistory { 1000 }; - const auto sort_interval = m_params.template get( - "particles.sort_interval"); + auto time_history = pbar::DurationHistory { 1000 }; + const auto clear_interval = m_params.template get( + "particles.clear_interval"); // main algorithm loop while (step < max_steps) { @@ -56,7 +56,8 @@ namespace ntt { }); timers.stop("Custom"); } - auto print_sorting = (sort_interval > 0 and step % sort_interval == 0); + auto print_prtl_clear = (clear_interval > 0 and + step % clear_interval == 0 and step > 0); // advance time & step time += dt; @@ -109,7 +110,7 @@ namespace ntt { m_metadomain.species_labels(), m_metadomain.l_npart_perspec(), m_metadomain.l_maxnpart_perspec(), - print_sorting, + print_prtl_clear, print_output, print_checkpoint, m_params.get("diagnostics.colored_stdout")); diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 0489d8508..b54291540 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -80,8 +80,8 @@ namespace ntt { "algorithms.toggles.fieldsolver"); const auto deposit_enabled = m_params.template get( "algorithms.toggles.deposit"); - const auto sort_interval = m_params.template get( - "particles.sort_interval"); + const auto clear_interval = m_params.template get( + "particles.clear_interval"); if (step == 0) { // communicate fields and apply BCs on the first timestep @@ -126,15 +126,8 @@ namespace ntt { timers.stop("CurrentFiltering"); } - // Tags are assigned by now - if (step == 0){ - m_metadomain.SetParticleIDs(dom); - } - timers.start("Communications"); - if ((sort_interval > 0) and (step % sort_interval == 0)) { - m_metadomain.CommunicateParticlesBuffer(dom, &timers); - } + m_metadomain.CommunicateParticles(dom); timers.stop("Communications"); } @@ -176,12 +169,10 @@ namespace ntt { timers.stop("Injector"); } - if (step % 100 == 0 && step > 0){ - MPI_Barrier(MPI_COMM_WORLD); - timers.start("RemoveDead"); - m_metadomain.RemoveDeadParticles(dom, &timers); - timers.stop("RemoveDead"); - MPI_Barrier(MPI_COMM_WORLD); + if (clear_interval > 0 and step % clear_interval == 0 and step > 0) { + timers.start("PrtlClear"); + m_metadomain.RemoveDeadParticles(dom); + timers.stop("PrtlClear"); } } diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index 1cb63bf43..758118d6c 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -10,7 +10,9 @@ #include #include +#include +#include #include #include @@ -47,9 +49,6 @@ namespace ntt { tag = array_t { label + "_tag", maxnpart }; tag_h = Kokkos::create_mirror_view(tag); - particleID = array_t {label + "_particleID", maxnpart}; - particleID_h = Kokkos::create_mirror_view(particleID); - for (unsigned short n { 0 }; n < npld; ++n) { pld.push_back(array_t("pld", maxnpart)); pld_h.push_back(Kokkos::create_mirror_view(pld[n])); @@ -81,93 +80,150 @@ namespace ntt { } template - auto Particles::npart_per_tag() const -> std::pair, - array_t>{ + auto Particles::NpartsPerTagAndOffsets() const + -> std::pair, array_t> { auto this_tag = tag; - array_t npart_tag("npart_tags", ntags()); + const auto num_tags = ntags(); + array_t npptag("nparts_per_tag", ntags()); - // Print tag_h array - auto tag_host = Kokkos::create_mirror_view(tag); - Kokkos::deep_copy(tag_host, tag); - auto npart_tag_scatter = Kokkos::Experimental::create_scatter_view(npart_tag); + // count # of particles per each tag + auto npptag_scat = Kokkos::Experimental::create_scatter_view(npptag); Kokkos::parallel_for( "NpartPerTag", - npart(), + rangeActiveParticles(), Lambda(index_t p) { - auto npart_tag_scatter_access = npart_tag_scatter.access(); - npart_tag_scatter_access((int)(this_tag(p))) += 1; + auto npptag_acc = npptag_scat.access(); + if (this_tag(p) < 0 || this_tag(p) >= num_tags) { + raise::KernelError(HERE, "Invalid tag value"); + } + npptag_acc(this_tag(p)) += 1; }); - Kokkos::Experimental::contribute(npart_tag, npart_tag_scatter); + Kokkos::Experimental::contribute(npptag, npptag_scat); + + // copy the count to a vector on the host + auto npptag_h = Kokkos::create_mirror_view(npptag); + Kokkos::deep_copy(npptag_h, npptag); + std::vector npptag_vec(num_tags); + for (auto t { 0u }; t < num_tags; ++t) { + npptag_vec[t] = npptag_h(t); + } - auto npart_tag_host = Kokkos::create_mirror_view(npart_tag); - Kokkos::deep_copy(npart_tag_host, npart_tag); - array_t tag_offset("tag_offset", ntags()); - auto tag_offset_host = Kokkos::create_mirror_view(tag_offset); + // count the offsets on the host and copy to device + array_t tag_offset("tag_offset", num_tags - 3); + auto tag_offset_h = Kokkos::create_mirror_view(tag_offset); - std::vector npart_tag_vec(ntags()); - for (std::size_t t { 0 }; t < ntags(); ++t) { - npart_tag_vec[t] = npart_tag_host(t); - tag_offset_host(t) = (t > 0) ? npart_tag_vec[t - 1] : 0; - } - for (std::size_t t { 0 }; t < ntags(); ++t) { - tag_offset_host(t) += (t > 0) ? tag_offset_host(t - 1) : 0; + for (auto t { 0u }; t < num_tags - 3; ++t) { + tag_offset_h(t) = npptag_vec[t + 2] + (t > 0u ? tag_offset_h(t - 1) : 0); } - Kokkos::deep_copy(tag_offset, tag_offset_host); - return std::make_pair(npart_tag_vec, tag_offset); + Kokkos::deep_copy(tag_offset, tag_offset_h); + + return { npptag_vec, tag_offset }; } - template - auto Particles::SortByTags() -> std::vector { - if (npart() == 0 || is_sorted()) { - return npart_per_tag().first; - } - using KeyType = array_t; - using BinOp = sort::BinTag; - BinOp bin_op(ntags()); - auto slice = range_tuple_t(0, npart()); - Kokkos::BinSort Sorter(Kokkos::subview(tag, slice), bin_op, false); - Sorter.create_permute_vector(); + template + void RemoveDeadInArray(array_t& arr, + const array_t& indices_alive) { + auto n_alive = indices_alive.extent(0); + auto buffer = Kokkos::View("buffer", n_alive); + Kokkos::parallel_for( + "PopulateBufferAlive", + n_alive, + Lambda(index_t p) { buffer(p) = arr(indices_alive(p)); }); - Sorter.sort(Kokkos::subview(i1, slice)); - Sorter.sort(Kokkos::subview(dx1, slice)); - Sorter.sort(Kokkos::subview(i1_prev, slice)); - Sorter.sort(Kokkos::subview(dx1_prev, slice)); - Sorter.sort(Kokkos::subview(ux1, slice)); - Sorter.sort(Kokkos::subview(ux2, slice)); - Sorter.sort(Kokkos::subview(ux3, slice)); + Kokkos::deep_copy( + Kokkos::subview(arr, std::make_pair(static_cast(0), n_alive)), + buffer); + } + + template + void Particles::RemoveDead() { + const auto n_part = npart(); + std::size_t n_alive = 0, n_dead = 0; + auto& this_tag = tag; + + Kokkos::parallel_reduce( + "CountDeadAlive", + rangeActiveParticles(), + Lambda(index_t p, std::size_t & nalive, std::size_t & ndead) { + nalive += (this_tag(p) == ParticleTag::alive); + ndead += (this_tag(p) == ParticleTag::dead); + if (this_tag(p) != ParticleTag::alive and this_tag(p) != ParticleTag::dead) { + raise::KernelError(HERE, "wrong particle tag"); + } + }, + n_alive, + n_dead); + + array_t indices_alive { "indices_alive", n_alive }; + array_t alive_counter { "counter_alive", 1 }; - Sorter.sort(Kokkos::subview(tag, slice)); - Sorter.sort(Kokkos::subview(weight, slice)); + Kokkos::parallel_for( + "AliveIndices", + rangeActiveParticles(), + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive) { + const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); + indices_alive(idx) = p; + } + }); - for (unsigned short n { 0 }; n < npld(); ++n) { - Sorter.sort(Kokkos::subview(pld[n], slice)); + { + auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); + Kokkos::deep_copy(alive_counter_h, alive_counter); + raise::ErrorIf(alive_counter_h(0) != n_alive, + "error in finding alive particle indices", + HERE); } - if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - Sorter.sort(Kokkos::subview(i2, slice)); - Sorter.sort(Kokkos::subview(dx2, slice)); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + RemoveDeadInArray(i1, indices_alive); + RemoveDeadInArray(i1_prev, indices_alive); + RemoveDeadInArray(dx1, indices_alive); + RemoveDeadInArray(dx1_prev, indices_alive); + } - Sorter.sort(Kokkos::subview(i2_prev, slice)); - Sorter.sort(Kokkos::subview(dx2_prev, slice)); + if constexpr (D == Dim::_2D or D == Dim::_3D) { + RemoveDeadInArray(i2, indices_alive); + RemoveDeadInArray(i2_prev, indices_alive); + RemoveDeadInArray(dx2, indices_alive); + RemoveDeadInArray(dx2_prev, indices_alive); } + if constexpr (D == Dim::_3D) { - Sorter.sort(Kokkos::subview(i3, slice)); - Sorter.sort(Kokkos::subview(dx3, slice)); + RemoveDeadInArray(i3, indices_alive); + RemoveDeadInArray(i3_prev, indices_alive); + RemoveDeadInArray(dx3, indices_alive); + RemoveDeadInArray(dx3_prev, indices_alive); + } + + RemoveDeadInArray(ux1, indices_alive); + RemoveDeadInArray(ux2, indices_alive); + RemoveDeadInArray(ux3, indices_alive); + RemoveDeadInArray(weight, indices_alive); - Sorter.sort(Kokkos::subview(i3_prev, slice)); - Sorter.sort(Kokkos::subview(dx3_prev, slice)); + if constexpr (D == Dim::_2D && C != Coord::Cart) { + RemoveDeadInArray(phi, indices_alive); } - if ((D == Dim::_2D) && (C != Coord::Cart)) { - Sorter.sort(Kokkos::subview(phi, slice)); + for (auto& payload : pld) { + RemoveDeadInArray(payload, indices_alive); } - auto np_per_tag_tag_offset = npart_per_tag(); - const auto np_per_tag = np_per_tag_tag_offset.first; - set_npart(np_per_tag[(short)(ParticleTag::alive)]); + Kokkos::Experimental::fill( + "TagAliveParticles", + AccelExeSpace(), + Kokkos::subview(this_tag, + std::make_pair(static_cast(0), n_alive)), + ParticleTag::alive); + + Kokkos::Experimental::fill( + "TagDeadParticles", + AccelExeSpace(), + Kokkos::subview(this_tag, std::make_pair(n_alive, n_alive + n_dead)), + ParticleTag::dead); + set_npart(n_alive); m_is_sorted = true; - return np_per_tag; } template diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 131ff45c0..3ae68b402 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -64,8 +64,6 @@ namespace ntt { std::vector> pld; // phi coordinate (for axisymmetry) array_t phi; - // Array to store the particle ids - array_t particleID; // host mirrors array_mirror_t i1_h, i2_h, i3_h; @@ -75,7 +73,6 @@ namespace ntt { array_mirror_t phi_h; array_mirror_t tag_h; std::vector> pld_h; - array_mirror_t particleID_h; // for empty allocation Particles() {} @@ -181,7 +178,6 @@ namespace ntt { footprint += sizeof(prtldx_t) * dx2_prev.extent(0); footprint += sizeof(prtldx_t) * dx3_prev.extent(0); footprint += sizeof(short) * tag.extent(0); - footprint += sizeof(long) * particleID.extent(0); for (auto& p : pld) { footprint += sizeof(real_t) * p.extent(0); } @@ -191,9 +187,19 @@ namespace ntt { /** * @brief Count the number of particles with a specific tag. - * @return The vector of counts for each tag. + * @return The vector of counts for each tag + offsets + * @note For instance, given the counts: 0 -> n0, 1 -> n1, 2 -> n2, 3 -> n3, + * ... it returns: + * ... [n0, n1, n2, n3, ...] of size ntags + * ... [n2, n2 + n3, n2 + n3 + n4, ...] of size ntags - 3 + * ... so in buffer array: + * ... tag=2 particles are offset by 0 + * ... tag=3 particles are offset by n2 + * ... tag=4 particles are offset by n2 + n3 + * ... etc. */ - auto npart_per_tag() const -> std::pair, array_t>; + auto NpartsPerTagAndOffsets() const + -> std::pair, array_t>; /* setters -------------------------------------------------------------- */ /** @@ -216,10 +222,9 @@ namespace ntt { } /** - * @brief Sort particles by their tags. - * @return The vector of counts per each tag. + * @brief Move dead particles to the end of arrays */ - auto SortByTags() -> std::vector; + void RemoveDead(); /** * @brief Copy particle data from device to host. diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index ce38a8261..370c02b18 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -24,6 +24,7 @@ #include #include +#include #include namespace comm { @@ -283,50 +284,10 @@ namespace comm { } } - template - void CommunicateParticleQuantity(array_t& arr, - int send_rank, - int recv_rank, - const range_tuple_t& send_slice, - const range_tuple_t& recv_slice) { - const std::size_t send_count = send_slice.second - send_slice.first; - const std::size_t recv_count = recv_slice.second - recv_slice.first; - if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and - (recv_count > 0)) { - MPI_Sendrecv(arr.data() + send_slice.first, - send_count, - mpi::get_type(), - send_rank, - 0, - arr.data() + recv_slice.first, - recv_count, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } else if ((send_rank >= 0) and (send_count > 0)) { - MPI_Send(arr.data() + send_slice.first, - send_count, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - } else if ((recv_rank >= 0) and (recv_count > 0)) { - MPI_Recv(arr.data() + recv_slice.first, - recv_count, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } - } - - void ParticleSendRecvCount(int send_rank, - int recv_rank, - const std::size_t& send_count, - std::size_t& recv_count) { + void ParticleSendRecvCount(int send_rank, + int recv_rank, + std::size_t send_count, + std::size_t& recv_count) { if ((send_rank >= 0) && (recv_rank >= 0)) { MPI_Sendrecv(&send_count, 1, @@ -356,644 +317,246 @@ namespace comm { } template - auto CommunicateParticles(Particles& species, - int send_rank, - int recv_rank, - const range_tuple_t& send_slice, - std::size_t& index_last) -> std::size_t { - if ((send_rank < 0) && (recv_rank < 0)) { - raise::Error("No send or recv in CommunicateParticles", HERE); - } - std::size_t recv_count { 0 }; - ParticleSendRecvCount(send_rank, - recv_rank, - send_slice.second - send_slice.first, - recv_count); - raise::FatalIf((index_last + recv_count) >= species.maxnpart(), - "Too many particles to receive (cannot fit into maxptl)", - HERE); - const auto recv_slice = range_tuple_t({ index_last, index_last + recv_count }); - CommunicateParticleQuantity(species.i1, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.dx1, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.i1_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.dx1_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - if constexpr (D == Dim::_2D || D == Dim::_3D) { - CommunicateParticleQuantity(species.i2, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.dx2, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.i2_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.dx2_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - } - if constexpr (D == Dim::_3D) { - CommunicateParticleQuantity(species.i3, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.dx3, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.i3_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.dx3_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - } - CommunicateParticleQuantity(species.ux1, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.ux2, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.ux3, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.weight, - send_rank, - recv_rank, - send_slice, - recv_slice); - if constexpr (D == Dim::_2D and C != Coord::Cart) { - CommunicateParticleQuantity(species.phi, - send_rank, - recv_rank, - send_slice, - recv_slice); - } - for (auto p { 0 }; p < species.npld(); ++p) { - CommunicateParticleQuantity(species.pld[p], - send_rank, - recv_rank, - send_slice, - recv_slice); - } - return recv_count; - } + void CommunicateParticles(Particles& species, + Kokkos::View outgoing_indices, + Kokkos::View tag_offsets, + std::vector npptag_vec, + std::vector npptag_recv_vec, + std::vector send_ranks, + std::vector recv_ranks, + const dir::dirs_t& dirs_to_comm) { + // Pointers to the particle data arrays + auto& this_i1 = species.i1; + auto& this_i1_prev = species.i1_prev; + auto& this_i2 = species.i2; + auto& this_i2_prev = species.i2_prev; + auto& this_i3 = species.i3; + auto& this_i3_prev = species.i3_prev; + auto& this_dx1 = species.dx1; + auto& this_dx1_prev = species.dx1_prev; + auto& this_dx2 = species.dx2; + auto& this_dx2_prev = species.dx2_prev; + auto& this_dx3 = species.dx3; + auto& this_dx3_prev = species.dx3_prev; + auto& this_phi = species.phi; + auto& this_ux1 = species.ux1; + auto& this_ux2 = species.ux2; + auto& this_ux3 = species.ux3; + auto& this_weight = species.weight; + auto& this_tag = species.tag; + + // number of arrays of each type to send/recv + const unsigned short NREALS = 4 + static_cast( + D == Dim::_2D and C != Coord::Cart); + const unsigned short NINTS = 2 * static_cast(D); + const unsigned short NPRTLDX = 2 * static_cast(D); + const unsigned short NPLD = species.npld(); + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + // buffers to store recv data + const auto npart_alive = npptag_vec[ParticleTag::alive]; + const auto npart_dead = npptag_vec[ParticleTag::dead]; + const auto npart_send = outgoing_indices.extent(0) - npart_dead; + const auto npart_recv = std::accumulate(npptag_recv_vec.begin(), + npptag_recv_vec.end(), + static_cast(0)); - template - void CommunicateParticlesBuffer(Particles& species, - Kokkos::View permute_vector, - Kokkos::View allocation_vector, - Kokkos::View tag_offset, - std::vector npart_per_tag_arr, - std::vector npart_per_tag_arr_recv, - std::vector send_ranks, - std::vector recv_ranks, - const dir::dirs_t& legal_directions) { - // Pointers to the particle data arrays - auto &this_ux1 = species.ux1; - auto &this_ux2 = species.ux2; - auto &this_ux3 = species.ux3; - auto &this_weight = species.weight; - auto &this_phi = species.phi; - auto &this_i1 = species.i1; - auto &this_i1_prev = species.i1_prev; - auto &this_i2 = species.i2; - auto &this_i3 = species.i3; - auto &this_i2_prev = species.i2_prev; - auto &this_i3_prev = species.i3_prev; - auto &this_dx1 = species.dx1; - auto &this_dx1_prev = species.dx1_prev; - auto &this_dx2 = species.dx2; - auto &this_dx3 = species.dx3; - auto &this_dx2_prev = species.dx2_prev; - auto &this_dx3_prev = species.dx3_prev; - auto &this_tag = species.tag; - auto &this_particleID = species.particleID; - - // Number of arrays of each type to send/recv - auto NREALS = 4; - auto NINTS = 2; - auto NFLOATS = 2; - auto NLONGS = 2; - if constexpr (D == Dim::_2D) { - if (C != Coord::Cart) { - NREALS = 5; - NINTS = 4; - NFLOATS = 4; - this_phi = species.phi; - } else { - NREALS = 4; - NINTS = 4; - NFLOATS = 4; - } - } - if constexpr (D == Dim::_3D) { - NREALS = 4; - NINTS = 6; - NFLOATS = 6; - } + Kokkos::View recv_buff_int { "recv_buff_int", npart_recv * NINTS }; + Kokkos::View recv_buff_real { "recv_buff_real", npart_recv * NREALS }; + Kokkos::View recv_buff_prtldx { "recv_buff_prtldx", + npart_recv * NPRTLDX }; - // Now make buffers to store recevied data (don't need global send buffers) - const auto total_send = permute_vector.extent(0) - npart_per_tag_arr[ParticleTag::dead]; - const auto total_recv = allocation_vector.extent(0); - const auto n_alive = npart_per_tag_arr[ParticleTag::alive]; - const auto n_dead = npart_per_tag_arr[ParticleTag::dead]; - - // Debug test: print send and recv count - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Total send: %d, Total recv: %d \n", rank, total_send, total_recv); - } - /* - Brief on recv buffers: Each recv buffer contains all the received arrays of - a given type. The different physical quantities are stored next to each other - to avoid cache misses. The array is structured as follows: - E.g., - recv_buffer_int: | qty1 | qty2 | ... | qtyNINTS | qty1 | qty2 | ... | qtyNINTS | ... - <-------particle to recv1------> <-------particle to recv2--------> - <----------------------------------total_recv----------------------------> - */ - Kokkos::View recv_buffer_int("recv_buffer_int", total_recv * NINTS); - Kokkos::View recv_buffer_real("recv_buffer_real", total_recv * NREALS); - Kokkos::View recv_buffer_prtldx("recv_buffer_prtldx",total_recv * NFLOATS); - Kokkos::View recv_buffer_long("recv_buffer_long", total_recv * NLONGS); - auto recv_buffer_int_h = Kokkos::create_mirror_view(recv_buffer_int); - auto recv_buffer_real_h = Kokkos::create_mirror_view(recv_buffer_real); - auto recv_buffer_prtldx_h = Kokkos::create_mirror_view(recv_buffer_prtldx); - auto recv_buffer_long_h = Kokkos::create_mirror_view(recv_buffer_long); - - auto iteration = 0; + auto iteration = 0; auto current_received = 0; - for (const auto& direction : legal_directions) { - const auto send_rank = send_ranks[iteration]; - const auto recv_rank = recv_ranks[iteration]; - const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); - const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); - const auto send_count = npart_per_tag_arr[tag_send]; - const auto recv_count = npart_per_tag_arr_recv[tag_recv]; - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - } + + for (const auto& direction : dirs_to_comm) { + const auto send_rank = send_ranks[iteration]; + const auto recv_rank = recv_ranks[iteration]; + const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto npart_send_in = npptag_vec[tag_send]; + const auto npart_recv_in = npptag_recv_vec[tag_recv - 2]; if (send_rank < 0 and recv_rank < 0) { continue; } - Kokkos::View send_buffer_int("send_buffer_int", send_count * NINTS); - Kokkos::View send_buffer_real("send_buffer_real", send_count * NREALS); - Kokkos::View send_buffer_prtldx("send_buffer_prtldx",send_count * NFLOATS); - Kokkos::View send_buffer_long("send_buffer_long", send_count * NLONGS); - auto send_buffer_int_h = Kokkos::create_mirror_view(send_buffer_int); - auto send_buffer_real_h = Kokkos::create_mirror_view(send_buffer_real); - auto send_buffer_prtldx_h = Kokkos::create_mirror_view(send_buffer_prtldx); - auto send_buffer_long_h = Kokkos::create_mirror_view(send_buffer_long); - - // Need different constexpr parallel fors for different dims - if constexpr(D == Dim::_1D) { - Kokkos::parallel_for( - "PopulateSendBuffer", - send_count, - Lambda(const std::size_t p){ - const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); - send_buffer_int(NINTS * p + 0) = this_i1(idx); - send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); - send_buffer_real(NREALS * p + 0) = this_ux1(idx); - send_buffer_real(NREALS * p + 1) = this_ux2(idx); - send_buffer_real(NREALS * p + 2) = this_ux3(idx); - send_buffer_real(NREALS * p + 3) = this_weight(idx); - send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); - send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); - send_buffer_long(NLONGS * p + 0) = this_particleID(idx); - send_buffer_long(NLONGS * p + 1) = this_tag(idx); - this_tag(idx) = ParticleTag::dead; - }); - } - if constexpr(D == Dim::_2D && C == Coord::Cart) { - Kokkos::parallel_for( - "PopulateSendBuffer", - send_count, - Lambda(const std::size_t p){ - const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); - send_buffer_int(NINTS * p + 0) = this_i1(idx); - send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); - send_buffer_int(NINTS * p + 2) = this_i2(idx); - send_buffer_int(NINTS * p + 3) = this_i2_prev(idx); - send_buffer_real(NREALS * p + 0) = this_ux1(idx); - send_buffer_real(NREALS * p + 1) = this_ux2(idx); - send_buffer_real(NREALS * p + 2) = this_ux3(idx); - send_buffer_real(NREALS * p + 3) = this_weight(idx); - send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); - send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); - send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); - send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); - send_buffer_long(NLONGS * p + 0) = this_particleID(idx); - send_buffer_long(NLONGS * p + 1) = this_tag(idx); - this_tag(idx) = ParticleTag::dead; - }); - } - if constexpr(D == Dim::_2D && C != Coord::Cart) { - Kokkos::parallel_for( - "PopulateSendBuffer", - send_count, - Lambda(const std::size_t p){ - const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); - send_buffer_int(NINTS * p + 0) = this_i1(idx); - send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); - send_buffer_int(NINTS * p + 2) = this_i2(idx); - send_buffer_int(NINTS * p + 3) = this_i2_prev(idx); - send_buffer_real(NREALS * p + 0) = this_ux1(idx); - send_buffer_real(NREALS * p + 1) = this_ux2(idx); - send_buffer_real(NREALS * p + 2) = this_ux3(idx); - send_buffer_real(NREALS * p + 3) = this_weight(idx); - send_buffer_real(NREALS * p + 4) = this_phi(idx); - send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); - send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); - send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); - send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); - send_buffer_long(NLONGS * p + 0) = this_particleID(idx); - send_buffer_long(NLONGS * p + 1) = this_tag(idx); - this_tag(idx) = ParticleTag::dead; - }); - } - if constexpr(D == Dim::_3D) { - Kokkos::parallel_for( - "PopulateSendBuffer", - send_count, - Lambda(const std::size_t p){ - const auto idx = permute_vector(tag_offset(tag_send) - n_alive + p); - send_buffer_int(NINTS * p + 0) = this_i1(idx); - send_buffer_int(NINTS * p + 1) = this_i1_prev(idx); - send_buffer_int(NINTS * p + 2) = this_i2(idx); - send_buffer_int(NINTS * p + 3) = this_i2_prev(idx); - send_buffer_int(NINTS * p + 4) = this_i3(idx); - send_buffer_int(NINTS * p + 5) = this_i3_prev(idx); - send_buffer_real(NREALS * p + 0) = this_ux1(idx); - send_buffer_real(NREALS * p + 1) = this_ux2(idx); - send_buffer_real(NREALS * p + 2) = this_ux3(idx); - send_buffer_real(NREALS * p + 3) = this_weight(idx); - send_buffer_prtldx(NFLOATS * p + 0) = this_dx1(idx); - send_buffer_prtldx(NFLOATS * p + 1) = this_dx1_prev(idx); - send_buffer_prtldx(NFLOATS * p + 2) = this_dx2(idx); - send_buffer_prtldx(NFLOATS * p + 3) = this_dx2_prev(idx); - send_buffer_prtldx(NFLOATS * p + 4) = this_dx3(idx); - send_buffer_prtldx(NFLOATS * p + 5) = this_dx3_prev(idx); - send_buffer_long(NLONGS * p + 0) = this_particleID(idx); - send_buffer_long(NLONGS * p + 1) = this_tag(idx); - this_tag(idx) = ParticleTag::dead; - }); - } - - auto tag_offset_h = Kokkos::create_mirror_view(tag_offset); - Kokkos::deep_copy(tag_offset_h, tag_offset); - /* - Brief on receive offset: - The receive buffer looks like this - <-----------------------------------> - |NINT|NINT|NINT|NINT|NINT|NINT|NINT|NINT|...xnrecv - <--------><--------><--------><--------> - recv1 recv2 recv3 recv4 - |________| - ^ ^ - offset offset + nrecv - */ - const auto receive_offset_int = current_received * NINTS; - const auto receive_offset_real = current_received * NREALS; - const auto receive_offset_prtldx = current_received * NFLOATS; - const auto receive_offset_long = current_received * NLONGS; - // Comms - // Make host arrays for send and recv buffers - Kokkos::deep_copy(send_buffer_int_h, send_buffer_int); - Kokkos::deep_copy(send_buffer_real_h, send_buffer_real); - Kokkos::deep_copy(send_buffer_prtldx_h, send_buffer_prtldx); - Kokkos::deep_copy(send_buffer_long_h, send_buffer_long); - - if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and - (recv_count > 0)) { - // Debug: Print the rank and type of mpi operation performed - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing sendrecv operation, direction %d \n", rank, direction); - } - MPI_Sendrecv(send_buffer_int.data(), - send_count * NINTS, - mpi::get_type(), - send_rank, - 0, - recv_buffer_int.data() + receive_offset_int, - recv_count*NINTS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buffer_real.data(), - send_count * NREALS, - mpi::get_type(), - send_rank, - 0, - recv_buffer_real.data() + receive_offset_real, - recv_count*NREALS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buffer_prtldx.data(), - send_count * NFLOATS, - mpi::get_type(), - send_rank, - 0, - recv_buffer_prtldx.data() + receive_offset_prtldx, - recv_count*NFLOATS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buffer_long.data(), - send_count * NLONGS, - mpi::get_type(), - send_rank, - 0, - recv_buffer_long.data() + receive_offset_long, - recv_count*NLONGS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } else if ((send_rank >= 0) and (send_count > 0)) { - // Debug: Print the rank and type of mpi operation performed - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing send operation, direction %d \n", rank, direction); - } - MPI_Send(send_buffer_int.data(), - send_count * NINTS, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - MPI_Send(send_buffer_real.data(), - send_count * NREALS, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - MPI_Send(send_buffer_prtldx.data(), - send_count * NFLOATS, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - MPI_Send(send_buffer_long.data(), - send_count * NLONGS, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - } else if ((recv_rank >= 0) and (recv_count > 0)) { - // Debug: Print the rank and type of mpi operation performed - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("MPI rank: %d, Performing recv operation, direction %d \n", rank, direction); - } - MPI_Recv(recv_buffer_int.data() + receive_offset_int, - recv_count * NINTS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Recv(recv_buffer_real.data() + receive_offset_real, - recv_count * NREALS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Recv(recv_buffer_prtldx.data() + receive_offset_prtldx, - recv_count * NFLOATS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Recv(recv_buffer_long.data() + receive_offset_long, - recv_count * NLONGS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } - current_received += recv_count; - iteration++; - - // Debug test: Print recv buffer before and after - /* - { - int total_ranks; - MPI_Comm_size(MPI_COMM_WORLD, &total_ranks); - for (int allranks=0; allranks send_buff_int { "send_buff_int", npart_send_in * NINTS }; + Kokkos::View send_buff_real { "send_buff_real", + npart_send_in * NREALS }; + Kokkos::View send_buff_prtldx { "send_buff_prtldx", + npart_send_in * NPRTLDX }; + Kokkos::parallel_for( + "PopulateSendBuffer", + npart_send_in, + Lambda(index_t p) { + const auto idx = outgoing_indices( + (tag_send > 2 ? tag_offsets(tag_send - 3) : 0) + npart_dead + p); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + send_buff_int(NINTS * p + 0) = this_i1(idx); + send_buff_int(NINTS * p + 1) = this_i1_prev(idx); + send_buff_prtldx(NPRTLDX * p + 0) = this_dx1(idx); + send_buff_prtldx(NPRTLDX * p + 1) = this_dx1_prev(idx); } - auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); - Kokkos::deep_copy(allocation_vector_h, allocation_vector); - - for (int i=0; i= 0) and (recv_rank >= 0) and (npart_send_in > 0) and + (npart_recv_in > 0)) { + raise::ErrorIf(recv_offset_int + npart_recv_in * NINTS > + recv_buff_int.extent(0), + "incorrect # of recv particles", + HERE); + MPI_Sendrecv(send_buff_int.data(), + npart_send_in * NINTS, + mpi::get_type(), + send_rank, + 0, + recv_buff_int.data() + recv_offset_int, + npart_recv_in * NINTS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buff_real.data(), + npart_send_in * NREALS, + mpi::get_type(), + send_rank, + 0, + recv_buff_real.data() + recv_offset_real, + npart_recv_in * NREALS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buff_prtldx.data(), + npart_send_in * NPRTLDX, + mpi::get_type(), + send_rank, + 0, + recv_buff_prtldx.data() + recv_offset_prtldx, + npart_recv_in * NPRTLDX, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } else if ((send_rank >= 0) and (npart_send_in > 0)) { + MPI_Send(send_buff_int.data(), + npart_send_in * NINTS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + MPI_Send(send_buff_real.data(), + npart_send_in * NREALS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + MPI_Send(send_buff_prtldx.data(), + npart_send_in * NPRTLDX, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + } else if ((recv_rank >= 0) and (npart_recv_in > 0)) { + raise::ErrorIf(recv_offset_int + npart_recv_in * NINTS > + recv_buff_int.extent(0), + "incorrect # of recv particles", + HERE); + MPI_Recv(recv_buff_int.data() + recv_offset_int, + npart_recv_in * NINTS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Recv(recv_buff_real.data() + recv_offset_real, + npart_recv_in * NREALS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Recv(recv_buff_prtldx.data() + recv_offset_prtldx, + npart_recv_in * NPRTLDX, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); } - } - */ - - } // end over direction loop - /*Kokkos::deep_copy(recv_buffer_int, recv_buffer_int_h); - Kokkos::deep_copy(recv_buffer_real, recv_buffer_real_h); - Kokkos::deep_copy(recv_buffer_prtldx, recv_buffer_prtldx_h);*/ - if constexpr (D == Dim::_1D) - { - Kokkos::parallel_for( - "PopulateFromRecvBuffer", - total_recv, - Lambda(const std::size_t p){ - auto idx = allocation_vector(p); - this_tag(idx) = ParticleTag::alive; - this_i1(idx) = recv_buffer_int(NINTS * p + 0); - this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); - this_ux1(idx) = recv_buffer_real(NREALS * p + 0); - this_ux2(idx) = recv_buffer_real(NREALS * p + 1); - this_ux3(idx) = recv_buffer_real(NREALS * p + 2); - this_weight(idx) = recv_buffer_real(NREALS * p + 3); - this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); - this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); - this_particleID(idx) = recv_buffer_long(NLONGS * p + 0); - }); - } + current_received += npart_recv_in; + iteration++; - if constexpr (D == Dim::_2D && C == Coord::Cart) - { - Kokkos::parallel_for( - "PopulateFromRecvBuffer", - total_recv, - Lambda(const std::size_t p){ - auto idx = allocation_vector(p); - this_tag(idx) = ParticleTag::alive; - this_i1(idx) = recv_buffer_int(NINTS * p + 0); - this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); - this_i2(idx) = recv_buffer_int(NINTS * p + 2); - this_i2_prev(idx) = recv_buffer_int(NINTS * p + 3); - this_ux1(idx) = recv_buffer_real(NREALS * p + 0); - this_ux2(idx) = recv_buffer_real(NREALS * p + 1); - this_ux3(idx) = recv_buffer_real(NREALS * p + 2); - this_weight(idx) = recv_buffer_real(NREALS * p + 3); - this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); - this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); - this_dx2(idx) = recv_buffer_prtldx(NFLOATS * p + 2); - this_dx2_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 3); - this_particleID(idx) = recv_buffer_long(NLONGS * p + 0); - }); - } + } // end direction loop - if constexpr (D == Dim::_2D && C != Coord::Cart) - { - Kokkos::parallel_for( - "PopulateFromRecvBuffer", - total_recv, - Lambda(const std::size_t p){ - auto idx = allocation_vector(p); - this_tag(idx) = ParticleTag::alive; - this_i1(idx) = recv_buffer_int(NINTS * p + 0); - this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); - this_i2(idx) = recv_buffer_int(NINTS * p + 2); - this_i2_prev(idx) = recv_buffer_int(NINTS * p + 3); - this_ux1(idx) = recv_buffer_real(NREALS * p + 0); - this_ux2(idx) = recv_buffer_real(NREALS * p + 1); - this_ux3(idx) = recv_buffer_real(NREALS * p + 2); - this_weight(idx) = recv_buffer_real(NREALS * p + 3); - this_phi(idx) = recv_buffer_real(NREALS * p + 4); - this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); - this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); - this_dx2(idx) = recv_buffer_prtldx(NFLOATS * p + 2); - this_dx2_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 3); - this_particleID(idx) = recv_buffer_long(NLONGS * p + 0); - }); - } + const auto npart = species.npart(); + const auto npart_holes = outgoing_indices.extent(0); - if constexpr (D == Dim::_3D) - { - Kokkos::parallel_for( + Kokkos::parallel_for( "PopulateFromRecvBuffer", - total_recv, - Lambda(const std::size_t p){ - auto idx = allocation_vector(p); - this_tag(idx) = ParticleTag::alive; - this_i1(idx) = recv_buffer_int(NINTS * p + 0); - this_i1_prev(idx) = recv_buffer_int(NINTS * p + 1); - this_i2(idx) = recv_buffer_int(NINTS * p + 2); - this_i2_prev(idx) = recv_buffer_int(NINTS * p + 3); - this_i3(idx) = recv_buffer_int(NINTS * p + 4); - this_i3_prev(idx) = recv_buffer_int(NINTS * p + 5); - this_ux1(idx) = recv_buffer_real(NREALS * p + 0); - this_ux2(idx) = recv_buffer_real(NREALS * p + 1); - this_ux3(idx) = recv_buffer_real(NREALS * p + 2); - this_weight(idx) = recv_buffer_real(NREALS * p + 3); - this_dx1(idx) = recv_buffer_prtldx(NFLOATS * p + 0); - this_dx1_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 1); - this_dx2(idx) = recv_buffer_prtldx(NFLOATS * p + 2); - this_dx2_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 3); - this_dx3(idx) = recv_buffer_prtldx(NFLOATS * p + 4); - this_dx3_prev(idx) = recv_buffer_prtldx(NFLOATS * p + 5); - this_particleID(idx) = recv_buffer_long(NLONGS * p + 0); - }); - } - species.set_npart(species.npart() + std::max(permute_vector.extent(0), - allocation_vector.extent(0)) - permute_vector.extent(0)); - /* - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - // Print the total number of particles after each pass - int species_npart = species.npart(); - int global_species_npart = 0; - // Reduce all local sums into global_sum on rank 0 - MPI_Reduce(&species_npart, &global_species_npart, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - int total_ranks; - MPI_Comm_size(MPI_COMM_WORLD, &total_ranks); - for (int allranks=0; allranks= npart_holes ? npart + p - npart_holes + : outgoing_indices(p)); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + this_i1(idx) = recv_buff_int(NINTS * p + 0); + this_i1_prev(idx) = recv_buff_int(NINTS * p + 1); + this_dx1(idx) = recv_buff_prtldx(NPRTLDX * p + 0); + this_dx1_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 1); } - } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + this_i2(idx) = recv_buff_int(NINTS * p + 2); + this_i2_prev(idx) = recv_buff_int(NINTS * p + 3); + this_dx2(idx) = recv_buff_prtldx(NPRTLDX * p + 2); + this_dx2_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 3); + } + if constexpr (D == Dim::_3D) { + this_i3(idx) = recv_buff_int(NINTS * p + 4); + this_i3_prev(idx) = recv_buff_int(NINTS * p + 5); + this_dx3(idx) = recv_buff_prtldx(NPRTLDX * p + 4); + this_dx3_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 5); + } + this_ux1(idx) = recv_buff_real(NREALS * p + 0); + this_ux2(idx) = recv_buff_real(NREALS * p + 1); + this_ux3(idx) = recv_buff_real(NREALS * p + 2); + this_weight(idx) = recv_buff_real(NREALS * p + 3); + if constexpr (D == Dim::_2D and C != Coord::Cart) { + this_phi(idx) = recv_buff_real(NREALS * p + 4); + } + this_tag(idx) = ParticleTag::alive; + }); + + if (npart_recv > npart_holes) { + species.set_npart(npart + npart_recv - npart_holes); } - */ - return; -} + } } // namespace comm diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 390c27fa8..6175cc4bb 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -24,6 +24,8 @@ #include "framework/domain/comm_nompi.hpp" #endif +#include + #include #include @@ -33,10 +35,10 @@ namespace ntt { using comm_params_t = std::pair>; template - auto GetSendRecvRanks(Metadomain* metadomain, - Domain& domain, - dir::direction_t direction) - -> std::pair { + auto GetSendRecvRanks( + Metadomain* metadomain, + Domain& domain, + dir::direction_t direction) -> std::pair { Domain* send_to_nghbr_ptr = nullptr; Domain* recv_from_nghbr_ptr = nullptr; // set pointers to the correct send/recv domains @@ -86,8 +88,8 @@ namespace ntt { } else { // no communication necessary return { - {0, -1}, - {0, -1} + { 0, -1 }, + { 0, -1 } }; } #if defined(MPI_ENABLED) @@ -110,17 +112,17 @@ namespace ntt { (void)send_rank; (void)recv_rank; return { - {send_ind, send_rank}, - {recv_ind, recv_rank} + { send_ind, send_rank }, + { recv_ind, recv_rank } }; } template - auto GetSendRecvParams(Metadomain* metadomain, - Domain& domain, - dir::direction_t direction, - bool synchronize) - -> std::pair { + auto GetSendRecvParams( + Metadomain* metadomain, + Domain& domain, + dir::direction_t direction, + bool synchronize) -> std::pair { const auto [send_indrank, recv_indrank] = GetSendRecvRanks(metadomain, domain, direction); const auto [send_ind, send_rank] = send_indrank; @@ -129,8 +131,8 @@ namespace ntt { const auto is_receiving = (recv_rank >= 0); if (not(is_sending or is_receiving)) { return { - {{ 0, -1 }, {}}, - {{ 0, -1 }, {}} + { { 0, -1 }, {} }, + { { 0, -1 }, {} } }; } auto send_slice = std::vector {}; @@ -196,8 +198,8 @@ namespace ntt { } return { - {{ send_ind, send_rank }, send_slice}, - {{ recv_ind, recv_rank }, recv_slice}, + { { send_ind, send_rank }, send_slice }, + { { recv_ind, recv_rank }, recv_slice }, }; } @@ -492,638 +494,177 @@ namespace ntt { } template - void Metadomain::CommunicateParticles(Domain& domain, - timer::Timers* timers) { - raise::ErrorIf(timers == nullptr, - "Timers not passed when Comm::Prtl called", - HERE); - logger::Checkpoint("Communicating particles\n", HERE); - for (auto& species : domain.species) { - // at this point particles should already by tagged in the pusher - timers->start("Sorting"); - const auto npart_per_tag = species.SortByTags(); - timers->stop("Sorting"); + void Metadomain::CommunicateParticles(Domain& domain) { #if defined(MPI_ENABLED) - timers->start("Communications"); - // only necessary when MPI is enabled - /** - * index_last - * | - * alive new dead tag1 tag2 v dead - * [ 11111111 000000000 222222222 3333333 .... nnnnnnn 00000000 ... ] - * ^ ^ - * | | - * tag_offset[tag1] -----+ +----- tag_offset[tag1] + npart_per_tag[tag1] - * "send_pmin" "send_pmax" (after last element) - */ - auto tag_offset { npart_per_tag }; - for (std::size_t i { 1 }; i < tag_offset.size(); ++i) { - tag_offset[i] += tag_offset[i - 1]; - } - for (std::size_t i { 0 }; i < tag_offset.size(); ++i) { - tag_offset[i] -= npart_per_tag[i]; - } - auto index_last = tag_offset[tag_offset.size() - 1] + - npart_per_tag[npart_per_tag.size() - 1]; - for (auto& direction : dir::Directions::all) { - const auto [send_params, - recv_params] = GetSendRecvParams(this, domain, direction, true); - const auto [send_indrank, send_slice] = send_params; - const auto [recv_indrank, recv_slice] = recv_params; - const auto [send_ind, send_rank] = send_indrank; - const auto [recv_ind, recv_rank] = recv_indrank; - if (send_rank < 0 and recv_rank < 0) { - continue; - } - const auto send_dir_tag = mpi::PrtlSendTag::dir2tag(direction); - const auto nsend = npart_per_tag[send_dir_tag]; - const auto send_pmin = tag_offset[send_dir_tag]; - const auto send_pmax = tag_offset[send_dir_tag] + nsend; - const auto recv_count = comm::CommunicateParticles( - species, - send_rank, - recv_rank, - { send_pmin, send_pmax }, - index_last); - if (recv_count > 0) { - if constexpr (D == Dim::_1D) { - int shift_in_x1 { 0 }; - if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active(in::x1); - } - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; - Kokkos::parallel_for( - "CommunicateParticles", - recv_count, - Lambda(index_t p) { - this_tag(index_last + p) = ParticleTag::alive; - this_i1(index_last + p) += shift_in_x1; - this_i1_prev(index_last + p) += shift_in_x1; - }); - } else if constexpr (D == Dim::_2D) { - int shift_in_x1 { 0 }, shift_in_x2 { 0 }; - if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active()[0]; - } - if ((-direction)[1] == -1) { - shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); - } else if ((-direction)[1] == 1) { - shift_in_x2 = domain.mesh.n_active(in::x2); - } - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; - Kokkos::parallel_for( - "CommunicateParticles", - recv_count, - Lambda(index_t p) { - this_tag(index_last + p) = ParticleTag::alive; - this_i1(index_last + p) += shift_in_x1; - this_i2(index_last + p) += shift_in_x2; - this_i1_prev(index_last + p) += shift_in_x1; - this_i2_prev(index_last + p) += shift_in_x2; - }); - } else if constexpr (D == Dim::_3D) { - int shift_in_x1 { 0 }, shift_in_x2 { 0 }, shift_in_x3 { 0 }; - if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active(in::x1); - } - if ((-direction)[1] == -1) { - shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); - } else if ((-direction)[1] == 1) { - shift_in_x2 = domain.mesh.n_active(in::x2); - } - if ((-direction)[2] == -1) { - shift_in_x3 = -subdomain(recv_ind).mesh.n_active(in::x3); - } else if ((-direction)[2] == 1) { - shift_in_x3 = domain.mesh.n_active(in::x3); - } - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i3 = species.i3; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; - auto& this_i3_prev = species.i3_prev; - Kokkos::parallel_for( - "CommunicateParticles", - recv_count, - Lambda(index_t p) { - this_tag(index_last + p) = ParticleTag::alive; - this_i1(index_last + p) += shift_in_x1; - this_i2(index_last + p) += shift_in_x2; - this_i3(index_last + p) += shift_in_x3; - this_i1_prev(index_last + p) += shift_in_x1; - this_i2_prev(index_last + p) += shift_in_x2; - this_i3_prev(index_last + p) += shift_in_x3; - }); - } - index_last += recv_count; - species.set_npart(index_last); - } - Kokkos::deep_copy( - Kokkos::subview(species.tag, std::make_pair(send_pmin, send_pmax)), - ParticleTag::dead); - } - timers->stop("Communications"); - // !TODO: maybe there is a way to not sort twice - timers->start("Sorting"); - species.set_unsorted(); - species.SortByTags(); - timers->stop("Sorting"); -#endif - } - } - - /* - New function to communicate particles using a buffer - */ - template - void Metadomain::CommunicateParticlesBuffer(Domain& domain, - timer::Timers* timers) { - raise::ErrorIf(timers == nullptr, - "Timers not passed when Comm::Prtl called", - HERE); logger::Checkpoint("Communicating particles\n", HERE); for (auto& species : domain.species) { - /* - Brief on arrays - npart_per_tag_arr (vector): | dead count| alive count | tag=1 count | tag=2 count | ... - <--------------------------size = ntags()--------------------------> - tag_offset (Kokkos::View): | 0 | dead count | dead + alive count | dead + alive + tag=1 count | ... - <--------------------------size = ntags()--------------------------> - npart_per_tag_arr_recv (vector): | 0 | 0 | nrecv1 | nrecv2 | ... - <--------------------------size = ntags()--------------------------> - */ - auto [npart_per_tag_arr, - tag_offset] = species.npart_per_tag(); - auto npart = static_cast(species.npart()); - auto total_alive = static_cast( - npart_per_tag_arr[ParticleTag::alive]); - auto total_dead = static_cast( - npart_per_tag_arr[ParticleTag::dead]); - auto total_holes = static_cast(npart - total_alive); - auto total_recv = static_cast(0); - + const auto ntags = species.ntags(); + + // at this point particles should already be tagged in the pusher + auto [npptag_vec, tag_offsets] = species.NpartsPerTagAndOffsets(); + const auto npart_dead = npptag_vec[ParticleTag::dead]; + const auto npart_alive = npptag_vec[ParticleTag::alive]; + + const auto npart = species.npart(); + const auto npart_holes = npart - npart_alive; + + // # of particles to receive per each tag (direction) + std::vector npptag_recv_vec(ntags - 2, 0); + // coordinate shifts per each direction + array_t shifts_in_x1("shifts_in_x1", ntags - 2); + array_t shifts_in_x2("shifts_in_x2", ntags - 2); + array_t shifts_in_x3("shifts_in_x3", ntags - 2); + // all directions requiring communication + dir::dirs_t dirs_to_comm; + + // ranks & indices of meshblock to send/recv from std::vector send_ranks, send_inds; std::vector recv_ranks, recv_inds; - // at this point particles should already by tagged in the pusher -#if defined(MPI_ENABLED) - std::vector npart_per_tag_arr_recv(species.ntags(), 0); - Kokkos::View shifts_in_x1("shifts_in_x1", species.ntags()); - Kokkos::View shifts_in_x2("shifts_in_x2", species.ntags()); - Kokkos::View shifts_in_x3("shifts_in_x3", species.ntags()); - auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); - auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); - auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); - dir::dirs_t legal_directions; - // Get receive counts + displacements - for (auto& direction : dir::Directions::all) { + // total # of reaceived particles from all directions + std::size_t npart_recv = 0u; + + for (const auto& direction : dir::Directions::all) { + // tags corresponding to the direction (both send & recv) const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); + + // get indices & ranks of send/recv meshblocks const auto [send_params, recv_params] = GetSendRecvParams(this, domain, direction, true); const auto [send_indrank, send_slice] = send_params; const auto [recv_indrank, recv_slice] = recv_params; const auto [send_ind, send_rank] = send_indrank; const auto [recv_ind, recv_rank] = recv_indrank; - if (send_rank < 0 and recv_rank < 0) { + + // skip if no communication is necessary + const auto is_sending = (send_rank >= 0); + const auto is_receiving = (recv_rank >= 0); + if (not is_sending and not is_receiving) { continue; } - const auto nsend = npart_per_tag_arr[tag_send]; - std::size_t nrecv = 0; - - legal_directions.push_back(direction); + dirs_to_comm.push_back(direction); send_ranks.push_back(send_rank); recv_ranks.push_back(recv_rank); send_inds.push_back(send_ind); recv_inds.push_back(recv_ind); - comm::ParticleSendRecvCount(send_rank, recv_rank, nsend, nrecv); - total_recv += nrecv; - npart_per_tag_arr_recv[tag_recv] = nrecv; - // Perform displacements before sending - if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { - if ((-direction)[0] == -1) { - shifts_in_x1_h(tag_recv) = subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shifts_in_x1_h(tag_recv) = -domain.mesh.n_active(in::x1); - } - } - if constexpr (D == Dim::_2D || D == Dim::_3D) { - if ((-direction)[1] == -1) { - shifts_in_x2_h(tag_recv) = subdomain(recv_ind).mesh.n_active(in::x2); - } else if ((-direction)[1] == 1) { - shifts_in_x2_h(tag_recv) = -domain.mesh.n_active(in::x2); - } - } - if constexpr (D == Dim::_3D) { - if ((-direction)[2] == -1) { - shifts_in_x3_h(tag_recv) = subdomain(recv_ind).mesh.n_active(in::x3); - } else if ((-direction)[2] == 1) { - shifts_in_x3_h(tag_recv) = -domain.mesh.n_active(in::x3); - } - } - } // end directions loop - Kokkos::deep_copy(shifts_in_x1, shifts_in_x1_h); - Kokkos::deep_copy(shifts_in_x2, shifts_in_x2_h); - Kokkos::deep_copy(shifts_in_x3, shifts_in_x3_h); - - raise::FatalIf((npart + total_recv) >= species.maxnpart(), - "Too many particles to receive (cannot fit into maxptl)", - HERE); - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; - auto& this_i2 = species.i2; - auto& this_i2_prev = species.i2_prev; - auto& this_i3 = species.i3; - auto& this_i3_prev = species.i3_prev; + // record the # of particles to-be-sent + const auto nsend = npptag_vec[tag_send]; - /* - Brief on permute vector: It contains the sorted indices of tag != alive particles - E.g., consider the following tag array - species.tag = [ 0, 0, 1, 0, 2, 3,...] - Then, permute vector will look something like - permute_vector = [0, 1, 3, ..., 4, ..., ... 5, ... ] - |<--------- >| |<----->| |<----->| .... - tag=dead ct tag=2 ct tag=3 ct - */ - Kokkos::View permute_vector("permute_vector", total_holes); - Kokkos::View current_offset("current_offset", species.ntags()); - auto &this_tag_offset = tag_offset; + // request the # of particles to-be-received ... + // ... and send the # of particles to-be-sent + std::size_t nrecv = 0; + comm::ParticleSendRecvCount(send_rank, recv_rank, nsend, nrecv); + npart_recv += nrecv; + npptag_recv_vec[tag_recv - 2] = nrecv; - auto n_alive = npart_per_tag_arr[ParticleTag::alive]; + raise::ErrorIf((npart + npart_recv) >= species.maxnpart(), + "Too many particles to receive (cannot fit into maxptl)", + HERE); - if constexpr (D == Dim::_1D){ - Kokkos::parallel_for( - "PermuteVector and Displace", - species.npart(), - Lambda(index_t p) { - const auto current_tag = this_tag(p); - if (current_tag != ParticleTag::alive){ - // dead tags only - if (current_tag == ParticleTag::dead) { - const auto idx_permute_vec = Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; - } - // tag = 1->N (excluding dead and alive) - else{ - const auto idx_permute_vec = this_tag_offset(current_tag) - - n_alive + - Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; - this_i1(p) += shifts_in_x1(current_tag); - this_i1_prev(p) += shifts_in_x1(current_tag); + // if sending, record displacements to apply before + // ... tag_send - 2: because we only shift tags > 2 (i.e. no dead/alive) + if (is_sending) { + if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { + auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); + if (direction[0] == -1) { + // sending backwards in x1 (add sx1 of target meshblock) + shifts_in_x1_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( + in::x1); + } else if (direction[0] == 1) { + // sending forward in x1 (subtract sx1 of source meshblock) + shifts_in_x1_h(tag_send - 2) = -domain.mesh.n_active(in::x1); } + Kokkos::deep_copy(shifts_in_x1, shifts_in_x1_h); } - }); - } - - if constexpr (D == Dim::_2D){ - Kokkos::parallel_for( - "PermuteVector and Displace", - species.npart(), - Lambda(index_t p) { - const auto current_tag = this_tag(p); - if (current_tag != ParticleTag::alive){ - // dead tags only - if (current_tag == ParticleTag::dead) { - const auto idx_permute_vec = Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; + if constexpr (D == Dim::_2D || D == Dim::_3D) { + auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); + if (direction[1] == -1) { + shifts_in_x2_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( + in::x2); + } else if (direction[1] == 1) { + shifts_in_x2_h(tag_send - 2) = -domain.mesh.n_active(in::x2); } - // tag = 1->N (excluding dead and alive) - else{ - const auto idx_permute_vec = this_tag_offset(current_tag) - - n_alive + - Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; - this_i1(p) += shifts_in_x1(current_tag); - this_i1_prev(p) += shifts_in_x1(current_tag); - this_i2(p) += shifts_in_x2(current_tag); - this_i2_prev(p) += shifts_in_x2(current_tag); + Kokkos::deep_copy(shifts_in_x2, shifts_in_x2_h); + } + if constexpr (D == Dim::_3D) { + auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); + if (direction[2] == -1) { + shifts_in_x3_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( + in::x3); + } else if (direction[2] == 1) { + shifts_in_x3_h(tag_send - 2) = -domain.mesh.n_active(in::x3); } + Kokkos::deep_copy(shifts_in_x3, shifts_in_x3_h); } - }); - } + } + } // end directions loop - if constexpr (D == Dim::_3D){ + auto& this_tag = species.tag; + auto& this_i1 = species.i1; + auto& this_i1_prev = species.i1_prev; + auto& this_i2 = species.i2; + auto& this_i2_prev = species.i2_prev; + auto& this_i3 = species.i3; + auto& this_i3_prev = species.i3_prev; + + array_t outgoing_indices("outgoing_indices", + npart - npart_alive); + + array_t current_offset("current_offset", ntags); Kokkos::parallel_for( - "PermuteVector and Displace", - species.npart(), + "OutgoingIndicesAndDisplace", + species.rangeActiveParticles(), Lambda(index_t p) { - const auto current_tag = this_tag(p); - if (current_tag != ParticleTag::alive){ - // dead tags only - if (current_tag == ParticleTag::dead) { - const auto idx_permute_vec = Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; + if (this_tag(p) != ParticleTag::alive) { + // dead or to-be-sent + const auto idx_for_tag = + Kokkos::atomic_fetch_add(¤t_offset(this_tag(p)), 1) + + (this_tag(p) != ParticleTag::dead ? npart_dead : 0) + + (this_tag(p) > 2 ? tag_offsets(this_tag(p) - 3) : 0); + if (idx_for_tag >= npart - npart_alive) { + raise::KernelError(HERE, + "Outgoing indices idx exceeds the array size"); } - // tag = 1->N (excluding dead and alive) - else{ - const auto idx_permute_vec = this_tag_offset(current_tag) - - n_alive + - Kokkos::atomic_fetch_add( - ¤t_offset(current_tag), - 1); - permute_vector(idx_permute_vec) = p; - this_i1(p) += shifts_in_x1(current_tag); - this_i1_prev(p) += shifts_in_x1(current_tag); - this_i2(p) += shifts_in_x2(current_tag); - this_i2_prev(p) += shifts_in_x2(current_tag); - this_i3(p) += shifts_in_x3(current_tag); - this_i3_prev(p) += shifts_in_x3(current_tag); + outgoing_indices(idx_for_tag) = p; + // apply offsets + if (this_tag(p) != ParticleTag::dead) { + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + this_i1(p) += shifts_in_x1(this_tag(p) - 2); + this_i1_prev(p) += shifts_in_x1(this_tag(p) - 2); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + this_i2(p) += shifts_in_x2(this_tag(p) - 2); + this_i2_prev(p) += shifts_in_x2(this_tag(p) - 2); + } + if constexpr (D == Dim::_3D) { + this_i3(p) += shifts_in_x3(this_tag(p) - 2); + this_i3_prev(p) += shifts_in_x3(this_tag(p) - 2); + } } } }); - } - - - - // Sanity check: npart_per_tag must be equal to the current offset except tag=alive - auto current_offset_h = Kokkos::create_mirror_view(current_offset); - Kokkos::deep_copy(current_offset_h, current_offset); - for (std::size_t i { 0 }; i < species.ntags(); ++i) { - if (i != ParticleTag::alive){ - raise::FatalIf(current_offset_h(i) != npart_per_tag_arr[i], - "Error in permute vector construction", - HERE); - } - else{ - raise::FatalIf(current_offset_h(i) != 0, - "Error in permute vector construction", - HERE); - } - } - - /* - Brief on allocation vector: It contains the indices of holes that are filled - by the particles received from other domains - case 1: total_recv > nholes - allocation_vector = | i1 | i2 | i3 | .... | npart | npart + 1 | ... - <-------total_holes------> <---total_recv - nholes--> - (same as permuute vector) (extra particles appended at end) - case 2: total_recv <= nholes - allocation_vector = | i1 | i2 | i3 | .... - <----total_recv-----> - (same as permuute vector) - */ - Kokkos::View allocation_vector("allocation_vector", total_recv); - if (total_recv > total_holes) - { - // Fill the first bit with the permute vector; these are the holes to be filled - Kokkos::parallel_for( - "AllocationVector", - total_holes, - Lambda(index_t p) { - allocation_vector(p) = permute_vector(p); - }); - - // Now allocate the rest to the end of the array - Kokkos::parallel_for( - "AllocationVector", - total_recv - total_holes, - Lambda(index_t p) { - allocation_vector(total_holes + p) = static_cast(npart + p); - }); - } - else - { Kokkos::parallel_for( - "AllocationVector", - total_recv, - Lambda(index_t p) { - allocation_vector(p) = permute_vector(p); - }); - } - - /* - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - if (rank == 1 && species.label() == "e+_b") - { - // Copy the tag array to host - auto tag_h = Kokkos::create_mirror_view(species.tag); - Kokkos::deep_copy(tag_h, species.tag); - std::cout << "Tag locs before send" << std::endl; - for (std::size_t i { 0 }; i < species.npart(); i++) { - if (tag_h(i) != ParticleTag::alive) - std::cout <<" Tag: " << tag_h(i) << " loc: "<< i << std::endl; - } - - // Print allocation vector after copying to host - auto allocation_vector_h = Kokkos::create_mirror_view(allocation_vector); - std::cout << "Total holes: " << total_holes << " Total recv: " << total_recv << std::endl; - Kokkos::deep_copy(allocation_vector_h, allocation_vector); - for (std::size_t i { 0 }; i < total_recv; ++i) { - std::cout << "Rank: " << rank << " Allocation vector: " << allocation_vector_h(i) << std::endl; - } - // Print the permute vector as well - auto permute_vector_h = Kokkos::create_mirror_view(permute_vector); - Kokkos::deep_copy(permute_vector_h, permute_vector); - for (std::size_t i { 0 }; i < total_holes; ++i) { - std::cout << "Rank: " << rank << " Permuted vector: " << permute_vector_h(i) << - " tag: " << tag_h(permute_vector_h(i)) << std::endl; - } - } - */ - - // Communicate the arrays - comm::CommunicateParticlesBuffer(species, permute_vector, allocation_vector, - this_tag_offset, npart_per_tag_arr, npart_per_tag_arr_recv, - send_ranks, recv_ranks, legal_directions); + comm::CommunicateParticles(species, + outgoing_indices, + tag_offsets, + npptag_vec, + npptag_recv_vec, + send_ranks, + recv_ranks, + dirs_to_comm); + species.set_unsorted(); + } // end species loop +#else + (void)domain; #endif - } } - /* - Function to copy the alive particle data the arrays to a buffer and then back - to the particle arrays -*/ - template - void MoveDeadToEnd(array_t& arr, - Kokkos::View indices_alive) { - auto n_alive = indices_alive.extent(0); - auto buffer = Kokkos::View("buffer", n_alive); - Kokkos::parallel_for( - "PopulateBufferAlive", - n_alive, - Lambda(const std::size_t p) { - buffer(p) = arr(indices_alive(p)); - }); - - Kokkos::parallel_for( - "CopyBufferToArr", - n_alive, - Lambda(const std::size_t p) { - arr(p) = buffer(p); - }); - return; - } - - /* - Function to remove dead particles from the domain - - Consider the following particle quantity array - <---xxx---x---xx---xx-----------xx----x--> (qty) - - = alive - x = dead - ntot = nalive + ndead - - (1) Copy all alive particle data to buffer - <---xxx---x---xx---xx-----------xx----x--> (qty) - | - | - v - <--------------------------> buffer - (nalive) - - (2) Copy from buffer to the beginning of the array - overwritting all particles - <--------------------------> buffer - (nalive) - | - | - v - <--------------------------xx----x--> (qty) - ^ - (nalive) - - (3) Set npart to nalive - */ template - void Metadomain::RemoveDeadParticles(Domain& domain, - timer::Timers* timers){ + void Metadomain::RemoveDeadParticles(Domain& domain) { for (auto& species : domain.species) { - auto [npart_per_tag_arr, - tag_offset] = species.npart_per_tag(); - const auto npart = static_cast(species.npart()); - const auto total_alive = static_cast( - npart_per_tag_arr[ParticleTag::alive]); - const auto total_dead = static_cast( - npart_per_tag_arr[ParticleTag::dead]); - - // Check that only alive and dead particles are present - for (std::size_t i { 0 }; i < species.ntags(); i++) { - if (i != ParticleTag::alive && i != ParticleTag::dead){ - raise::FatalIf(npart_per_tag_arr[i] != 0, - "Particle tags can only be dead or alive at this point", - HERE); - } - } - - // Get the indices of all alive particles - auto &this_i1 = species.i1; - auto &this_i2 = species.i2; - auto &this_i3 = species.i3; - auto &this_i1_prev = species.i1_prev; - auto &this_i2_prev = species.i2_prev; - auto &this_i3_prev = species.i3_prev; - auto &this_dx1 = species.dx1; - auto &this_dx2 = species.dx2; - auto &this_dx3 = species.dx3; - auto &this_dx1_prev = species.dx1_prev; - auto &this_dx2_prev = species.dx2_prev; - auto &this_dx3_prev = species.dx3_prev; - auto &this_ux1 = species.ux1; - auto &this_ux2 = species.ux2; - auto &this_ux3 = species.ux3; - auto &this_weight = species.weight; - auto &this_phi = species.phi; - auto &this_tag = species.tag; - // Find indices of tag = alive particles - Kokkos::View indices_alive("indices_alive", total_alive); - Kokkos::View alive_counter("counter_alive", 1); - Kokkos::deep_copy(alive_counter, 0); - Kokkos::parallel_for( - "Indices of Alive Particles", - species.npart(), - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive){ - const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); - indices_alive(idx) = p; - } - }); - // Sanity check: alive_counter must be equal to total_alive - auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); - Kokkos::deep_copy(alive_counter_h, alive_counter); - raise::FatalIf(alive_counter_h(0) != total_alive, - "Error in finding alive particles", - HERE); - - MoveDeadToEnd(species.i1, indices_alive); - MoveDeadToEnd(species.dx1, indices_alive); - MoveDeadToEnd(species.dx1_prev, indices_alive); - MoveDeadToEnd(species.ux1, indices_alive); - MoveDeadToEnd(species.ux2, indices_alive); - MoveDeadToEnd(species.ux3, indices_alive); - MoveDeadToEnd(species.weight, indices_alive); - // Update i2, dx2, i2_prev, dx2_prev - if constexpr(D == Dim::_2D || D == Dim::_3D){ - MoveDeadToEnd(species.i2, indices_alive); - MoveDeadToEnd(species.i2_prev, indices_alive); - MoveDeadToEnd(species.dx2, indices_alive); - MoveDeadToEnd(species.dx2_prev, indices_alive); - if constexpr(D == Dim::_2D && M::CoordType != Coord::Cart){ - MoveDeadToEnd(species.phi, indices_alive); - } - } - // Update i3, dx3, i3_prev, dx3_prev - if constexpr(D == Dim::_3D){ - MoveDeadToEnd(species.i3, indices_alive); - MoveDeadToEnd(species.i3_prev, indices_alive); - MoveDeadToEnd(species.dx3, indices_alive); - MoveDeadToEnd(species.dx3_prev, indices_alive); - } - // tags (set first total_alive to alive and rest to dead) - Kokkos::parallel_for( - "Make tags alive", - total_alive, - Lambda(index_t p) { - this_tag(p) = ParticleTag::alive; - }); - - Kokkos::parallel_for( - "Make tags dead", - total_dead, - Lambda(index_t p) { - this_tag(total_alive + p) = ParticleTag::dead; - }); - - species.set_npart(total_alive); - - std::tie(npart_per_tag_arr, - tag_offset) = species.npart_per_tag(); - raise::FatalIf(npart_per_tag_arr[ParticleTag::alive] != total_alive, - "Error in removing dead particles: alive count doesn't match", - HERE); - raise::FatalIf(npart_per_tag_arr[ParticleTag::dead] != 0, - "Error in removing dead particles: not all particles are dead", - HERE); - + species.RemoveDead(); } - - return; } template struct Metadomain>; diff --git a/src/framework/domain/domain.h b/src/framework/domain/domain.h index 397907fef..bc7c6e4b5 100644 --- a/src/framework/domain/domain.h +++ b/src/framework/domain/domain.h @@ -65,7 +65,7 @@ namespace ntt { Mesh mesh; Fields fields; std::vector> species; - random_number_pool_t random_pool { constant::RandomSeed }; + random_number_pool_t random_pool; /** * @brief constructor for "empty" allocation of non-local domain placeholders @@ -81,6 +81,7 @@ namespace ntt { : mesh { ncells, extent, metric_params } , fields {} , species {} + , random_pool { constant::RandomSeed } , m_index { index } , m_offset_ndomains { offset_ndomains } , m_offset_ncells { offset_ncells } {} @@ -95,6 +96,7 @@ namespace ntt { : mesh { ncells, extent, metric_params } , fields { ncells } , species { species_params.begin(), species_params.end() } + , random_pool { constant::RandomSeed + static_cast(index) } , m_index { index } , m_offset_ndomains { offset_ndomains } , m_offset_ncells { offset_ncells } {} @@ -144,8 +146,7 @@ namespace ntt { } /* setters -------------------------------------------------------------- */ - auto set_neighbor_idx(const dir::direction_t& dir, unsigned int idx) - -> void { + auto set_neighbor_idx(const dir::direction_t& dir, unsigned int idx) -> void { m_neighbor_idx[dir] = idx; } @@ -163,8 +164,8 @@ namespace ntt { }; template - inline auto operator<<(std::ostream& os, const Domain& domain) - -> std::ostream& { + inline auto operator<<(std::ostream& os, + const Domain& domain) -> std::ostream& { os << "Domain #" << domain.index(); #if defined(MPI_ENABLED) os << " [MPI rank: " << domain.mpi_rank() << "]"; diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index a01296823..ec8561a9a 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -399,33 +399,6 @@ namespace ntt { #endif } - // Function to assign a unique ID to each particle - template - void Metadomain::SetParticleIDs(Domain& domain){ - for (auto& species : domain.species) { - auto &this_particleID = species.particleID; - auto &this_tag = species.tag; - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - const auto offset_per_rank = static_cast(1e9 * rank); - std::size_t current_particleID = 0; - Kokkos::View counter_view("current_particleID", 1); - Kokkos::deep_copy(counter_view, current_particleID); - - Kokkos::parallel_for( - "Set Particle IDs", - species.npart(), - Lambda(const std::size_t p){ - if (this_tag(p) == ParticleTag::alive) - { - Kokkos::atomic_increment(&counter_view(0)); - this_particleID(p) = offset_per_rank + static_cast(counter_view(0)); - } - }); - } - return; - } - template struct Metadomain>; template struct Metadomain>; template struct Metadomain>; diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 9e2c2bb9d..5177571d0 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -88,10 +88,8 @@ namespace ntt { void CommunicateFields(Domain&, CommTags); void SynchronizeFields(Domain&, CommTags, const range_tuple_t& = { 0, 0 }); - void CommunicateParticles(Domain&, timer::Timers*); - void CommunicateParticlesBuffer(Domain&, timer::Timers*); - void SetParticleIDs(Domain&); - void RemoveDeadParticles(Domain& ,timer::Timers* ); + void CommunicateParticles(Domain&); + void RemoveDeadParticles(Domain&); /** * @param global_ndomains total number of domains diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 4a6b2c908..c39f0c67f 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -107,7 +107,6 @@ namespace ntt { } } - template void ComputeMoments(const SimulationParams& params, const Mesh& mesh, @@ -475,16 +474,14 @@ namespace ntt { for (const auto& prtl : g_writer.speciesWriters()) { auto& species = local_domain->species[prtl.species() - 1]; if (not species.is_sorted()) { - species.SortByTags(); + species.RemoveDead(); } const std::size_t nout = species.npart() / prtl_stride; array_t buff_x1, buff_x2, buff_x3; - array_t buff_ux1, buff_ux2, buff_ux3; - array_t buff_wei; - buff_wei = array_t { "w", nout }; - buff_ux1 = array_t { "u1", nout }; - buff_ux2 = array_t { "u2", nout }; - buff_ux3 = array_t { "u3", nout }; + array_t buff_ux1 { "u1", nout }; + array_t buff_ux2 { "ux2", nout }; + array_t buff_ux3 { "ux3", nout }; + array_t buff_wei { "w", nout }; if constexpr (M::Dim == Dim::_1D or M::Dim == Dim::_2D or M::Dim == Dim::_3D) { buff_x1 = array_t { "x1", nout }; diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index b667b5ac9..af7a773ed 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -31,10 +31,10 @@ namespace ntt { template - auto get_dx0_V0(const std::vector& resolution, - const boundaries_t& extent, - const std::map& params) - -> std::pair { + auto get_dx0_V0( + const std::vector& resolution, + const boundaries_t& extent, + const std::map& params) -> std::pair { const auto metric = M(resolution, extent, params); const auto dx0 = metric.dxMin(); coord_t x_corner { ZERO }; @@ -445,15 +445,8 @@ namespace ntt { defaults::gr::pusher_niter)); } /* [particles] ---------------------------------------------------------- */ -#if defined(MPI_ENABLED) - const std::size_t sort_interval = 1; -#else - const std::size_t sort_interval = toml::find_or(toml_data, - "particles", - "sort_interval", - defaults::sort_interval); -#endif - set("particles.sort_interval", sort_interval); + set("particles.clear_interval", + toml::find_or(toml_data, "particles", "clear_interval", defaults::clear_interval)); /* [output] ------------------------------------------------------------- */ // fields diff --git a/src/framework/tests/parameters.cpp b/src/framework/tests/parameters.cpp index 393cd2409..1a4228642 100644 --- a/src/framework/tests/parameters.cpp +++ b/src/framework/tests/parameters.cpp @@ -48,7 +48,7 @@ const auto mink_1d = u8R"( [particles] ppc0 = 10.0 - sort_interval = 100 + clear_interval = 100 [[particles.species]] label = "e-" @@ -134,7 +134,7 @@ const auto sph_2d = u8R"( [particles] ppc0 = 25.0 use_weights = true - sort_interval = 50 + clear_interval = 50 [[particles.species]] @@ -199,7 +199,7 @@ const auto qks_2d = u8R"( [particles] ppc0 = 4.0 - sort_interval = 100 + clear_interval = 100 [[particles.species]] label = "e-" @@ -269,7 +269,7 @@ auto main(int argc, char* argv[]) -> int { (real_t)0.0078125, "scales.V0"); boundaries_t fbc = { - {FldsBC::PERIODIC, FldsBC::PERIODIC} + { FldsBC::PERIODIC, FldsBC::PERIODIC } }; assert_equal( params_mink_1d.get>("grid.boundaries.fields")[0].first, @@ -345,8 +345,8 @@ auto main(int argc, char* argv[]) -> int { "simulation.engine"); boundaries_t fbc = { - {FldsBC::ATMOSPHERE, FldsBC::ABSORB}, - { FldsBC::AXIS, FldsBC::AXIS} + { FldsBC::ATMOSPHERE, FldsBC::ABSORB }, + { FldsBC::AXIS, FldsBC::AXIS } }; assert_equal(params_sph_2d.get("scales.B0"), @@ -480,9 +480,9 @@ auto main(int argc, char* argv[]) -> int { "grid.metric.ks_rh"); const auto expect = std::map { - {"r0", 0.0}, - { "h", 0.25}, - { "a", 0.99} + { "r0", 0.0 }, + { "h", 0.25 }, + { "a", 0.99 } }; auto read = params_qks_2d.get>( "grid.metric.params"); @@ -501,8 +501,8 @@ auto main(int argc, char* argv[]) -> int { "algorithms.gr.pusher_niter"); boundaries_t pbc = { - {PrtlBC::HORIZON, PrtlBC::ABSORB}, - { PrtlBC::AXIS, PrtlBC::AXIS} + { PrtlBC::HORIZON, PrtlBC::ABSORB }, + { PrtlBC::AXIS, PrtlBC::AXIS } }; assert_equal(params_qks_2d.get("scales.B0"), @@ -579,86 +579,3 @@ auto main(int argc, char* argv[]) -> int { return 0; } - -// const auto mink_1d = R"( -// [simulation] -// name = "" -// engine = "" -// runtime = "" - -// [grid] -// resolution = "" -// extent = "" - -// [grid.metric] -// metric = "" -// qsph_r0 = "" -// qsph_h = "" -// ks_a = "" - -// [grid.boundaries] -// fields = "" -// particles = "" -// absorb_d = "" -// absorb_coeff = "" - -// [scales] -// larmor0 = "" -// skindepth0 = "" - -// [algorithms] -// current_filters = "" - -// [algorithms.toggles] -// fieldsolver = "" -// deposit = "" - -// [algorithms.timestep] -// CFL = "" -// correction = "" - -// [algorithms.gr] -// pusher_eps = "" -// pusher_niter = "" - -// [algorithms.gca] -// e_ovr_b_max = "" -// larmor_max = "" - -// [algorithms.synchrotron] -// gamma_rad = "" - -// [particles] -// ppc0 = "" -// use_weights = "" -// sort_interval = "" - -// [[particles.species]] -// label = "" -// mass = "" -// charge = "" -// maxnpart = "" -// pusher = "" -// n_payloads = "" -// cooling = "" -// [setup] - -// [output] -// fields = "" -// particles = "" -// format = "" -// mom_smooth = "" -// fields_stride = "" -// prtl_stride = "" -// interval = "" -// interval_time = "" - -// [output.debug] -// as_is = "" -// ghosts = "" - -// [diagnostics] -// interval = "" -// log_level = "" -// blocking_timers = "" -// )"_toml; diff --git a/src/global/arch/directions.h b/src/global/arch/directions.h index 19cf182d6..ccd4e67b0 100644 --- a/src/global/arch/directions.h +++ b/src/global/arch/directions.h @@ -132,8 +132,8 @@ namespace dir { using dirs_t = std::vector>; template - inline auto operator<<(std::ostream& os, const direction_t& dir) - -> std::ostream& { + inline auto operator<<(std::ostream& os, + const direction_t& dir) -> std::ostream& { for (auto& d : dir) { os << std::setw(2) << std::left; if (d > 0) { @@ -175,81 +175,81 @@ namespace dir { template <> struct Directions { inline static const dirs_t all = { - {-1, -1}, - {-1, 0}, - {-1, 1}, - { 0, -1}, - { 0, 1}, - { 1, -1}, - { 1, 0}, - { 1, 1} + { -1, -1 }, + { -1, 0 }, + { -1, 1 }, + { 0, -1 }, + { 0, 1 }, + { 1, -1 }, + { 1, 0 }, + { 1, 1 } }; inline static const dirs_t orth = { - {-1, 0}, - { 0, -1}, - { 0, 1}, - { 1, 0} + { -1, 0 }, + { 0, -1 }, + { 0, 1 }, + { 1, 0 } }; inline static const dirs_t unique = { - { 0, 1}, - { 1, 1}, - { 1, 0}, - {-1, 1} + { 0, 1 }, + { 1, 1 }, + { 1, 0 }, + { -1, 1 } }; }; template <> struct Directions { inline static const dirs_t all = { - {-1, -1, -1}, - {-1, -1, 0}, - {-1, -1, 1}, - {-1, 0, -1}, - {-1, 0, 0}, - {-1, 0, 1}, - {-1, 1, -1}, - {-1, 1, 0}, - {-1, 1, 1}, - { 0, -1, -1}, - { 0, -1, 0}, - { 0, -1, 1}, - { 0, 0, -1}, - { 0, 0, 1}, - { 0, 1, -1}, - { 0, 1, 0}, - { 0, 1, 1}, - { 1, -1, -1}, - { 1, -1, 0}, - { 1, -1, 1}, - { 1, 0, -1}, - { 1, 0, 0}, - { 1, 0, 1}, - { 1, 1, -1}, - { 1, 1, 0}, - { 1, 1, 1} + { -1, -1, -1 }, + { -1, -1, 0 }, + { -1, -1, 1 }, + { -1, 0, -1 }, + { -1, 0, 0 }, + { -1, 0, 1 }, + { -1, 1, -1 }, + { -1, 1, 0 }, + { -1, 1, 1 }, + { 0, -1, -1 }, + { 0, -1, 0 }, + { 0, -1, 1 }, + { 0, 0, -1 }, + { 0, 0, 1 }, + { 0, 1, -1 }, + { 0, 1, 0 }, + { 0, 1, 1 }, + { 1, -1, -1 }, + { 1, -1, 0 }, + { 1, -1, 1 }, + { 1, 0, -1 }, + { 1, 0, 0 }, + { 1, 0, 1 }, + { 1, 1, -1 }, + { 1, 1, 0 }, + { 1, 1, 1 } }; inline static const dirs_t orth = { - {-1, 0, 0}, - { 0, -1, 0}, - { 0, 0, -1}, - { 0, 0, 1}, - { 0, 1, 0}, - { 1, 0, 0} + { -1, 0, 0 }, + { 0, -1, 0 }, + { 0, 0, -1 }, + { 0, 0, 1 }, + { 0, 1, 0 }, + { 1, 0, 0 } }; inline static const dirs_t unique = { - { 0, 0, 1}, - { 0, 1, 0}, - { 1, 0, 0}, - { 1, 1, 0}, - {-1, 1, 0}, - { 0, 1, 1}, - { 0, -1, 1}, - { 1, 0, 1}, - {-1, 0, 1}, - { 1, 1, 1}, - {-1, 1, 1}, - { 1, -1, 1}, - { 1, 1, -1} + { 0, 0, 1 }, + { 0, 1, 0 }, + { 1, 0, 0 }, + { 1, 1, 0 }, + { -1, 1, 0 }, + { 0, 1, 1 }, + { 0, -1, 1 }, + { 1, 0, 1 }, + { -1, 0, 1 }, + { 1, 1, 1 }, + { -1, 1, 1 }, + { 1, -1, 1 }, + { 1, 1, -1 } }; }; diff --git a/src/global/arch/kokkos_aliases.cpp b/src/global/arch/kokkos_aliases.cpp index 4311a40bd..6c15e3d52 100644 --- a/src/global/arch/kokkos_aliases.cpp +++ b/src/global/arch/kokkos_aliases.cpp @@ -5,18 +5,18 @@ #include template <> -auto CreateRangePolicy(const tuple_t& i1, - const tuple_t& i2) - -> range_t { +auto CreateRangePolicy( + const tuple_t& i1, + const tuple_t& i2) -> range_t { index_t i1min = i1[0]; index_t i1max = i2[0]; return Kokkos::RangePolicy(i1min, i1max); } template <> -auto CreateRangePolicy(const tuple_t& i1, - const tuple_t& i2) - -> range_t { +auto CreateRangePolicy( + const tuple_t& i1, + const tuple_t& i2) -> range_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -26,9 +26,9 @@ auto CreateRangePolicy(const tuple_t& i1, } template <> -auto CreateRangePolicy(const tuple_t& i1, - const tuple_t& i2) - -> range_t { +auto CreateRangePolicy( + const tuple_t& i1, + const tuple_t& i2) -> range_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -41,18 +41,18 @@ auto CreateRangePolicy(const tuple_t& i1, } template <> -auto CreateRangePolicyOnHost(const tuple_t& i1, - const tuple_t& i2) - -> range_h_t { +auto CreateRangePolicyOnHost( + const tuple_t& i1, + const tuple_t& i2) -> range_h_t { index_t i1min = i1[0]; index_t i1max = i2[0]; return Kokkos::RangePolicy(i1min, i1max); } template <> -auto CreateRangePolicyOnHost(const tuple_t& i1, - const tuple_t& i2) - -> range_h_t { +auto CreateRangePolicyOnHost( + const tuple_t& i1, + const tuple_t& i2) -> range_h_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -62,9 +62,9 @@ auto CreateRangePolicyOnHost(const tuple_t& i1, } template <> -auto CreateRangePolicyOnHost(const tuple_t& i1, - const tuple_t& i2) - -> range_h_t { +auto CreateRangePolicyOnHost( + const tuple_t& i1, + const tuple_t& i2) -> range_h_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -76,11 +76,11 @@ auto CreateRangePolicyOnHost(const tuple_t& i1, { i1max, i2max, i3max }); } -// auto WaitAndSynchronize(bool debug_only) -> void { -// if (debug_only) { -// #ifndef DEBUG -// return; -// #endif -// } -// Kokkos::fence(); -// } \ No newline at end of file +auto WaitAndSynchronize(bool debug_only) -> void { + if (debug_only) { +#ifndef DEBUG + return; +#endif + } + Kokkos::fence(); +} diff --git a/src/global/defaults.h b/src/global/defaults.h index be92acbf9..b7b0107e7 100644 --- a/src/global/defaults.h +++ b/src/global/defaults.h @@ -22,9 +22,9 @@ namespace ntt::defaults { const unsigned short current_filters = 0; - const std::string em_pusher = "Boris"; - const std::string ph_pusher = "Photon"; - const std::size_t sort_interval = 100; + const std::string em_pusher = "Boris"; + const std::string ph_pusher = "Photon"; + const std::size_t clear_interval = 100; namespace qsph { const real_t r0 = 0.0; @@ -45,7 +45,7 @@ namespace ntt::defaults { const real_t ds_frac = 0.01; const real_t coeff = 1.0; } // namespace absorb - } // namespace bc + } // namespace bc namespace output { const std::string format = "hdf5"; diff --git a/src/global/global.cpp b/src/global/global.cpp index 434740446..ec22fd2f3 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -9,17 +9,7 @@ void ntt::GlobalInitialize(int argc, char* argv[]) { Kokkos::initialize(argc, argv); #if defined(MPI_ENABLED) - int required = MPI_THREAD_MULTIPLE; - int provided; - MPI_Init_thread(&argc, - &argv, - required, - &provided); - if (provided != required) { - std::cerr << "MPI_Init_thread() did not provide the requested threading support." << std::endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - //MPI_Init(&argc, &argv); + MPI_Init(&argc, &argv); #endif // MPI_ENABLED } diff --git a/src/global/global.h b/src/global/global.h index ad524fb0e..dad6afccc 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -209,7 +209,7 @@ namespace Timer { PrintTitle = 1 << 1, AutoConvert = 1 << 2, PrintOutput = 1 << 3, - PrintSorting = 1 << 4, + PrintPrtlClear = 1 << 4, PrintCheckpoint = 1 << 5, PrintNormed = 1 << 6, Default = PrintNormed | PrintTotal | PrintTitle | AutoConvert, diff --git a/src/global/utils/diag.cpp b/src/global/utils/diag.cpp index 0a499dd56..c053cdacf 100644 --- a/src/global/utils/diag.cpp +++ b/src/global/utils/diag.cpp @@ -21,8 +21,9 @@ #include namespace diag { - auto npart_stats(std::size_t npart, std::size_t maxnpart) - -> std::vector> { + auto npart_stats( + std::size_t npart, + std::size_t maxnpart) -> std::vector> { auto stats = std::vector>(); #if !defined(MPI_ENABLED) stats.push_back( @@ -84,7 +85,7 @@ namespace diag { const std::vector& species_labels, const std::vector& species_npart, const std::vector& species_maxnpart, - bool print_sorting, + bool print_prtl_clear, bool print_output, bool print_checkpoint, bool print_colors) { @@ -96,8 +97,8 @@ namespace diag { if (species_labels.size() == 0) { diag_flags ^= Diag::Species; } - if (print_sorting) { - timer_flags |= Timer::PrintSorting; + if (print_prtl_clear) { + timer_flags |= Timer::PrintPrtlClear; } if (print_output) { timer_flags |= Timer::PrintOutput; diff --git a/src/global/utils/diag.h b/src/global/utils/diag.h index 9951602f8..30cca5705 100644 --- a/src/global/utils/diag.h +++ b/src/global/utils/diag.h @@ -34,9 +34,9 @@ namespace diag { * @param species_labels (vector of particle labels) * @param npart (per each species) * @param maxnpart (per each species) - * @param sorting_step (if true, particles were sorted) - * @param output_step (if true, output was written) - * @param checkpoint_step (if true, checkpoint was written) + * @param prtlclear (if true, dead particles were removed) + * @param output (if true, output was written) + * @param checkpoint (if true, checkpoint was written) * @param colorful_print (if true, print with colors) */ void printDiagnostics(std::size_t, diff --git a/src/global/utils/timer.cpp b/src/global/utils/timer.cpp index b5f4408ca..7d5a9bebd 100644 --- a/src/global/utils/timer.cpp +++ b/src/global/utils/timer.cpp @@ -127,10 +127,11 @@ namespace timer { return timer_stats; } - auto Timers::printAll(TimerFlags flags, std::size_t npart, std::size_t ncells) const - -> std::string { - const std::vector extras { "Sorting", "Output", "Checkpoint" }; - const auto stats = gather(extras, npart, ncells); + auto Timers::printAll(TimerFlags flags, + std::size_t npart, + std::size_t ncells) const -> std::string { + const std::vector extras { "PrtlClear", "Output", "Checkpoint" }; + const auto stats = gather(extras, npart, ncells); if (stats.empty()) { return ""; } @@ -253,8 +254,8 @@ namespace timer { } } - // print extra timers for output/checkpoint/sorting - const std::vector extras_f { Timer::PrintSorting, + // print extra timers for output/checkpoint/prtlClear + const std::vector extras_f { Timer::PrintPrtlClear, Timer::PrintOutput, Timer::PrintCheckpoint }; for (auto i { 0u }; i < extras.size(); ++i) { diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 0deb73c6f..b4808f12a 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -90,7 +90,7 @@ namespace kernel::sr { Force(const F& pgen_force) : Force { pgen_force, - {ZERO, ZERO, ZERO}, + { ZERO, ZERO, ZERO }, ZERO, ZERO } { From 541633ead6dfde4444836c901059e9b0221e2e0a Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 22 Jan 2025 15:21:53 -0500 Subject: [PATCH 35/52] toml schema --- .taplo.toml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .taplo.toml diff --git a/.taplo.toml b/.taplo.toml new file mode 100644 index 000000000..423a47594 --- /dev/null +++ b/.taplo.toml @@ -0,0 +1,6 @@ +[formatting] + align_entries = true + indent_tables = true + indent_entries = true + trailing_newline = true + align_comments = true From 9d0c8dbd5f85983d5ef05b970d99ae9742cd4f66 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 22 Jan 2025 15:22:41 -0500 Subject: [PATCH 36/52] nix shells --- dev/nix/adios2.nix | 61 ++++++++++++++++++++++++++++++++++++++++++++++ dev/nix/shell.nix | 56 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 dev/nix/adios2.nix create mode 100644 dev/nix/shell.nix diff --git a/dev/nix/adios2.nix b/dev/nix/adios2.nix new file mode 100644 index 000000000..19c706aa4 --- /dev/null +++ b/dev/nix/adios2.nix @@ -0,0 +1,61 @@ +{ + pkgs ? import { }, + hdf5 ? false, + mpi ? false, +}: + +let + name = "adios2"; + version = "2.10.2"; +in +pkgs.stdenv.mkDerivation { + pname = "${name}${if hdf5 then "-hdf5" else ""}${if mpi then "-mpi" else ""}"; + version = "${version}"; + src = pkgs.fetchgit { + url = "https://github.com/ornladios/ADIOS2/"; + rev = "v${version}"; + sha256 = "sha256-NVyw7xoPutXeUS87jjVv1YxJnwNGZAT4QfkBLzvQbwg="; + }; + + nativeBuildInputs = + with pkgs; + [ + cmake + libgcc + perl + breakpointHook + ] + ++ (if mpi then [ openmpi ] else [ ]); + + buildInputs = if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]; + + configurePhase = '' + cmake -B build $src \ + -D CMAKE_CXX_STANDARD=17 \ + -D CMAKE_CXX_EXTENSIONS=OFF \ + -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ + -D BUILD_SHARED_LIBS=ON \ + -D ADIOS2_USE_HDF5=${if hdf5 then "ON" else "OFF"} \ + -D ADIOS2_USE_Python=OFF \ + -D ADIOS2_USE_Fortran=OFF \ + -D ADIOS2_USE_ZeroMQ=OFF \ + -D BUILD_TESTING=OFF \ + -D ADIOS2_BUILD_EXAMPLES=OFF \ + -D ADIOS2_USE_MPI=${if mpi then "ON" else "OFF"} \ + -D ADIOS2_HAVE_HDF5_VOL=OFF \ + -D CMAKE_BUILD_TYPE=Release + ''; + + buildPhase = '' + cmake --build build -j + ''; + + installPhase = '' + sed -i '/if(CMAKE_INSTALL_COMPONENT/,/^[[:space:]]&endif()$/d' build/cmake/install/post/cmake_install.cmake + cmake --install build --prefix $out + chmod +x build/cmake/install/post/generate-adios2-config.sh + sh build/cmake/install/post/generate-adios2-config.sh $out + ''; + + enableParallelBuilding = true; +} diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix new file mode 100644 index 000000000..22358a837 --- /dev/null +++ b/dev/nix/shell.nix @@ -0,0 +1,56 @@ +{ + pkgs ? import { }, + mpi ? false, + hdf5 ? false, +}: + +let + name = "entity-dev"; + compilerPkg = pkgs.gcc13; + compilerCXX = "g++"; + compilerCC = "gcc"; + adios2Pkg = (pkgs.callPackage ./adios2.nix { inherit pkgs mpi hdf5; }); +in +pkgs.mkShell { + name = "${name}-env"; + nativeBuildInputs = + with pkgs; + [ + zlib + cmake + + compilerPkg + + clang-tools + + adios2Pkg + python312 + python312Packages.jupyter + + cmake-format + neocmakelsp + black + pyright + taplo + vscode-langservers-extracted + ] + ++ (if mpi then [ pkgs.openmpi ] else [ ]) + ++ (if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]); + + LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath ([ + pkgs.clang19Stdenv.cc.cc + pkgs.zlib + ]); + + shellHook = '' + BLUE='\033[0;34m' + NC='\033[0m' + export CC=$(which ${compilerCC}) + export CXX=$(which ${compilerCXX}) + export CMAKE_CXX_COMPILER=$(which ${compilerCXX}) + export CMAKE_C_COMPILER=$(which ${compilerCC}) + + echo "" + echo -e "${name} nix-shell activated: ''\${BLUE}$(which ${compilerCXX})''\${NC}" + ''; +} From 5d1d8f71bcc24b7a48bfb1d8928b6355be027b3c Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 22 Jan 2025 15:23:57 -0500 Subject: [PATCH 37/52] comment --- src/framework/domain/comm_mpi.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 370c02b18..eb77ecbb3 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -345,6 +345,8 @@ namespace comm { auto& this_weight = species.weight; auto& this_tag = species.tag; + // @TODO_1.2.0: communicate payloads + // number of arrays of each type to send/recv const unsigned short NREALS = 4 + static_cast( D == Dim::_2D and C != Coord::Cart); From 179928f6f99a5bcb4c4aa1cd37795a9e66c9a6cb Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 23 Jan 2025 09:12:58 -0500 Subject: [PATCH 38/52] plds are now 2d array --- src/checkpoint/reader.cpp | 38 ++++-- src/checkpoint/reader.h | 8 ++ src/checkpoint/tests/CMakeLists.txt | 2 +- src/checkpoint/tests/checkpoint-mpi.cpp | 149 ++++++++++++++++-------- src/checkpoint/writer.cpp | 33 +++++- src/checkpoint/writer.h | 8 ++ src/framework/containers/particles.cpp | 102 +++++----------- src/framework/containers/particles.h | 35 ++---- src/framework/domain/checkpoint.cpp | 32 ++--- src/framework/domain/comm_mpi.hpp | 1 + src/framework/tests/particles.cpp | 8 +- src/global/arch/mpi_tags.h | 13 ++- 12 files changed, 250 insertions(+), 179 deletions(-) diff --git a/src/checkpoint/reader.cpp b/src/checkpoint/reader.cpp index e89b7d384..208972561 100644 --- a/src/checkpoint/reader.cpp +++ b/src/checkpoint/reader.cpp @@ -35,16 +35,17 @@ namespace checkpoint { reader.Get(field_var, array_h.data(), adios2::Mode::Sync); Kokkos::deep_copy(array, array_h); } else { - raise::Error(fmt::format("Field variable: %s not found", field.c_str()), HERE); + raise::Error(fmt::format("Field variable: %s not found", field.c_str()), + HERE); } } - auto ReadParticleCount(adios2::IO& io, - adios2::Engine& reader, - unsigned short s, - std::size_t local_dom, - std::size_t ndomains) - -> std::pair { + auto ReadParticleCount( + adios2::IO& io, + adios2::Engine& reader, + unsigned short s, + std::size_t local_dom, + std::size_t ndomains) -> std::pair { logger::Checkpoint(fmt::format("Reading particle count for: %d", s + 1), HERE); auto npart_var = io.InquireVariable( fmt::format("s%d_npart", s + 1)); @@ -109,6 +110,29 @@ namespace checkpoint { } } + void ReadParticlePayloads(adios2::IO& io, + adios2::Engine& reader, + unsigned short s, + array_t& array, + std::size_t nplds, + std::size_t count, + std::size_t offset) { + logger::Checkpoint(fmt::format("Reading quantity: s%d_plds", s + 1), HERE); + auto var = io.InquireVariable(fmt::format("s%d_plds", s + 1)); + if (var) { + var.SetSelection(adios2::Box({ offset, 0 }, { count, nplds })); + const auto slice = std::pair { 0, count }; + auto array_h = Kokkos::create_mirror_view(array); + reader.Get(var, + Kokkos::subview(array_h, slice, range_tuple_t(0, nplds)).data(), + adios2::Mode::Sync); + Kokkos::deep_copy(Kokkos::subview(array, slice, range_tuple_t(0, nplds)), + Kokkos::subview(array_h, slice, range_tuple_t(0, nplds))); + } else { + raise::Error(fmt::format("Variable: s%d_plds not found", s + 1), HERE); + } + } + template void ReadFields(adios2::IO&, adios2::Engine&, const std::string&, diff --git a/src/checkpoint/reader.h b/src/checkpoint/reader.h index 2ea11bdb1..e5a91ab75 100644 --- a/src/checkpoint/reader.h +++ b/src/checkpoint/reader.h @@ -45,6 +45,14 @@ namespace checkpoint { std::size_t, std::size_t); + void ReadParticlePayloads(adios2::IO&, + adios2::Engine&, + unsigned short, + array_t&, + std::size_t, + std::size_t, + std::size_t); + } // namespace checkpoint #endif // CHECKPOINT_READER_H diff --git a/src/checkpoint/tests/CMakeLists.txt b/src/checkpoint/tests/CMakeLists.txt index 10836554b..54400652e 100644 --- a/src/checkpoint/tests/CMakeLists.txt +++ b/src/checkpoint/tests/CMakeLists.txt @@ -25,5 +25,5 @@ endfunction() if(NOT ${mpi}) gen_test(checkpoint-nompi) else() - # gen_test(checkpoint-mpi) + gen_test(checkpoint-mpi) endif() diff --git a/src/checkpoint/tests/checkpoint-mpi.cpp b/src/checkpoint/tests/checkpoint-mpi.cpp index 3ce4bab14..f97202ab1 100644 --- a/src/checkpoint/tests/checkpoint-mpi.cpp +++ b/src/checkpoint/tests/checkpoint-mpi.cpp @@ -39,36 +39,53 @@ auto main(int argc, char* argv[]) -> int { // | | | // | 0 | 1 | // |------|------| - constexpr auto g_nx1 = 20; - constexpr auto g_nx2 = 15; - constexpr auto g_nx1_gh = g_nx1 + 4 * N_GHOSTS; - constexpr auto g_nx2_gh = g_nx2 + 4 * N_GHOSTS; + const std::size_t g_nx1 = 20; + const std::size_t g_nx2 = 15; + const std::size_t g_nx1_gh = g_nx1 + 4 * N_GHOSTS; + const std::size_t g_nx2_gh = g_nx2 + 4 * N_GHOSTS; - constexpr auto l_nx1 = 10; - constexpr auto l_nx2 = (rank < 2) ? 10 : 5; + const std::size_t l_nx1 = 10; + const std::size_t l_nx2 = (rank < 2) ? 10 : 5; - constexpr auto l_nx1_gh = l_nx1 + 2 * N_GHOSTS; - constexpr auto l_nx2_gh = l_nx2 + 2 * N_GHOSTS; + const std::size_t l_nx1_gh = l_nx1 + 2 * N_GHOSTS; + const std::size_t l_nx2_gh = l_nx2 + 2 * N_GHOSTS; - constexpr auto l_corner_x1 = (rank % 2) * l_nx1; - constexpr auto l_corner_x2 = (rank / 2) * l_nx2; + const std::size_t l_corner_x1 = (rank % 2 == 0) ? 0 : l_nx1_gh; + const std::size_t l_corner_x2 = (rank < 2) ? 0 : l_nx2_gh; - constexpr auto i1min = N_GHOSTS; - constexpr auto i2min = N_GHOSTS; - constexpr auto i1max = l_nx1 + N_GHOSTS; - constexpr auto i2max = l_nx2 + N_GHOSTS; + const std::size_t i1min = N_GHOSTS; + const std::size_t i2min = N_GHOSTS; + const std::size_t i1max = l_nx1 + N_GHOSTS; + const std::size_t i2max = l_nx2 + N_GHOSTS; - constexpr auto npart1 = (rank % 2 + rank) * 23 + 100; - constexpr auto npart2 = (rank % 2 + rank) * 37 + 100; + const std::size_t npart1 = (rank % 2 + rank) * 23 + 100; + const std::size_t npart2 = (rank % 2 + rank) * 37 + 100; + + std::size_t npart1_offset = 0; + std::size_t npart2_offset = 0; + + std::size_t npart1_globtot = 0; + std::size_t npart2_globtot = 0; + + for (auto r = 0; r < rank - 1; ++r) { + npart1_offset += (r % 2 + r) * 23 + 100; + npart2_offset += (r % 2 + r) * 37 + 100; + } + + for (auto r = 0; r < size; ++r) { + npart1_globtot += (r % 2 + r) * 23 + 100; + npart2_globtot += (r % 2 + r) * 37 + 100; + } // init data ndfield_t field1 { "fld1", l_nx1_gh, l_nx2_gh }; ndfield_t field2 { "fld2", l_nx1_gh, l_nx2_gh }; - array_t i1 { "i_1", npart1 }; - array_t u1 { "u_1", npart1 }; - array_t i2 { "i_2", npart2 }; - array_t u2 { "u_2", npart2 }; + array_t i1 { "i_1", npart1 }; + array_t u1 { "u_1", npart1 }; + array_t i2 { "i_2", npart2 }; + array_t u2 { "u_2", npart2 }; + array_t plds1 { "plds_1", npart1, 3 }; { // fill data @@ -93,8 +110,11 @@ auto main(int argc, char* argv[]) -> int { "fillPrtl1", npart1, Lambda(index_t p) { - u1(p) = static_cast(p); - i1(p) = static_cast(p); + u1(p) = static_cast(p); + i1(p) = static_cast(p); + plds1(p, 0) = static_cast(p); + plds1(p, 1) = static_cast(p * p); + plds1(p, 2) = static_cast(p * p * p); }); Kokkos::parallel_for( "fillPrtl2", @@ -115,8 +135,9 @@ auto main(int argc, char* argv[]) -> int { writer.defineFieldVariables(SimEngine::GRPIC, { g_nx1_gh, g_nx2_gh }, { l_corner_x1, l_corner_x2 }, - { l_nx1, l_nx2 }); - writer.defineParticleVariables(Coord::Sph, Dim::_2D, 2, { 0, 0 }); + { l_nx1_gh, l_nx2_gh }); + + writer.defineParticleVariables(Coord::Sph, Dim::_2D, 2, { 3, 0 }); writer.beginSaving(0, 0.0); @@ -126,41 +147,66 @@ auto main(int argc, char* argv[]) -> int { writer.savePerDomainVariable("s1_npart", 1, 0, npart1); writer.savePerDomainVariable("s2_npart", 1, 0, npart2); - writer.saveParticleQuantity("s1_i1", npart1, 0, npart1, i1); - writer.saveParticleQuantity("s1_ux1", npart1, 0, npart1, u1); - writer.saveParticleQuantity("s2_i1", npart2, 0, npart2, i2); - writer.saveParticleQuantity("s2_ux1", npart2, 0, npart2, u2); + writer.saveParticleQuantity("s1_i1", + npart1_globtot, + npart1_offset, + npart1, + i1); + writer.saveParticleQuantity("s1_ux1", + npart1_globtot, + npart1_offset, + npart1, + u1); + writer.saveParticleQuantity("s2_i1", + npart2_globtot, + npart2_offset, + npart2, + i2); + writer.saveParticleQuantity("s2_ux1", + npart2_globtot, + npart2_offset, + npart2, + u2); + + writer.saveParticlePayloads("s1_plds", + 3, + npart1_globtot, + npart1_offset, + npart1, + plds1); writer.endSaving(); } { // read checkpoint - ndfield_t field1_read { "fld1_read", nx1_gh, nx2_gh, nx3_gh }; - ndfield_t field2_read { "fld2_read", nx1_gh, nx2_gh, nx3_gh }; + ndfield_t field1_read { "fld1_read", l_nx1_gh, l_nx2_gh }; + ndfield_t field2_read { "fld2_read", l_nx1_gh, l_nx2_gh }; - array_t i1_read { "i_1", npart1 }; - array_t u1_read { "u_1", npart1 }; - array_t i2_read { "i_2", npart2 }; - array_t u2_read { "u_2", npart2 }; + array_t i1_read { "i_1", npart1 }; + array_t u1_read { "u_1", npart1 }; + array_t i2_read { "i_2", npart2 }; + array_t u2_read { "u_2", npart2 }; + array_t plds1_read { "plds_1", npart1, 3 }; adios2::IO io = adios.DeclareIO("checkpointRead"); adios2::Engine reader = io.Open("checkpoints/step-00000000.bp", adios2::Mode::Read); reader.BeginStep(); - auto fieldRange = adios2::Box({ 0, 0, 0, 0 }, - { nx1_gh, nx2_gh, nx3_gh, 6 }); - ReadFields(io, reader, "em", fieldRange, field1_read); - ReadFields(io, reader, "em0", fieldRange, field2_read); + auto fieldRange = adios2::Box({ l_corner_x1, l_corner_x2, 0 }, + { l_nx1_gh, l_nx2_gh, 6 }); + ReadFields(io, reader, "em", fieldRange, field1_read); + ReadFields(io, reader, "em0", fieldRange, field2_read); - auto [nprtl1, noff1] = ReadParticleCount(io, reader, 0, 0, 1); - auto [nprtl2, noff2] = ReadParticleCount(io, reader, 1, 0, 1); + auto [nprtl1, noff1] = ReadParticleCount(io, reader, 0, rank, size); + auto [nprtl2, noff2] = ReadParticleCount(io, reader, 1, rank, size); ReadParticleData(io, reader, "ux1", 0, u1_read, nprtl1, noff1); ReadParticleData(io, reader, "ux1", 1, u2_read, nprtl2, noff2); ReadParticleData(io, reader, "i1", 0, i1_read, nprtl1, noff1); ReadParticleData(io, reader, "i1", 1, i2_read, nprtl2, noff2); + ReadParticlePayloads(io, reader, 0, plds1_read, 3, nprtl1, noff1); reader.EndStep(); reader.Close(); @@ -168,15 +214,13 @@ auto main(int argc, char* argv[]) -> int { // check the validity Kokkos::parallel_for( "checkFields", - CreateRangePolicy({ 0, 0, 0 }, { nx1_gh, nx2_gh, nx3_gh }), - Lambda(index_t i1, index_t i2, index_t i3) { + CreateRangePolicy({ 0, 0 }, { l_nx1_gh, l_nx2_gh }), + Lambda(index_t i1, index_t i2) { for (int i = 0; i < 6; ++i) { - if (not cmp::AlmostEqual(field1(i1, i2, i3, i), - field1_read(i1, i2, i3, i))) { + if (not cmp::AlmostEqual(field1(i1, i2, i), field1_read(i1, i2, i))) { raise::KernelError(HERE, "Field1 read failed"); } - if (not cmp::AlmostEqual(field2(i1, i2, i3, i), - field2_read(i1, i2, i3, i))) { + if (not cmp::AlmostEqual(field2(i1, i2, i), field2_read(i1, i2, i))) { raise::KernelError(HERE, "Field2 read failed"); } } @@ -184,12 +228,12 @@ auto main(int argc, char* argv[]) -> int { raise::ErrorIf(npart1 != nprtl1, "Particle count 1 mismatch", HERE); raise::ErrorIf(npart2 != nprtl2, "Particle count 2 mismatch", HERE); - raise::ErrorIf(noff1 != 0, "Particle offset 1 mismatch", HERE); - raise::ErrorIf(noff2 != 0, "Particle offset 2 mismatch", HERE); + raise::ErrorIf(noff1 != npart1_offset, "Particle offset 1 mismatch", HERE); + raise::ErrorIf(noff2 != npart2_offset, "Particle offset 2 mismatch", HERE); Kokkos::parallel_for( "checkPrtl1", - npart1, + nprtl1, Lambda(index_t p) { if (not cmp::AlmostEqual(u1(p), u1_read(p))) { raise::KernelError(HERE, "u1 read failed"); @@ -197,10 +241,15 @@ auto main(int argc, char* argv[]) -> int { if (i1(p) != i1_read(p)) { raise::KernelError(HERE, "i1 read failed"); } + for (auto l = 0; l < 3; ++l) { + if (not cmp::AlmostEqual(plds1(p, l), plds1_read(p, l))) { + raise::KernelError(HERE, "plds1 read failed"); + } + } }); Kokkos::parallel_for( "checkPrtl2", - npart2, + nprtl2, Lambda(index_t p) { if (not cmp::AlmostEqual(u2(p), u2_read(p))) { raise::KernelError(HERE, "u2 read failed"); diff --git a/src/checkpoint/writer.cpp b/src/checkpoint/writer.cpp index 9ef0b51c7..a12e3ef26 100644 --- a/src/checkpoint/writer.cpp +++ b/src/checkpoint/writer.cpp @@ -84,6 +84,7 @@ namespace checkpoint { { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); + for (auto d { 0u }; d < dim; ++d) { m_io.DefineVariable(fmt::format("s%d_i%d", s + 1, d + 1), { adios2::UnknownDim }, @@ -102,18 +103,21 @@ namespace checkpoint { { adios2::UnknownDim }, { adios2::UnknownDim }); } + if (dim == Dim::_2D and C != ntt::Coord::Cart) { m_io.DefineVariable(fmt::format("s%d_phi", s + 1), { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); } + for (auto d { 0u }; d < 3; ++d) { m_io.DefineVariable(fmt::format("s%d_ux%d", s + 1, d + 1), { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); } + m_io.DefineVariable(fmt::format("s%d_tag", s + 1), { adios2::UnknownDim }, { adios2::UnknownDim }, @@ -122,11 +126,11 @@ namespace checkpoint { { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); - for (auto p { 0u }; p < nplds[s]; ++p) { - m_io.DefineVariable(fmt::format("s%d_pld%d", s + 1, p + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); + if (nplds[s] > 0) { + m_io.DefineVariable(fmt::format("s%d_plds", s + 1), + { adios2::UnknownDim, nplds[s] }, + { adios2::UnknownDim, 0 }, + { adios2::UnknownDim, nplds[s] }); } } } @@ -238,6 +242,25 @@ namespace checkpoint { m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); } + void Writer::saveParticlePayloads(const std::string& quantity, + std::size_t nplds, + std::size_t glob_total, + std::size_t loc_offset, + std::size_t loc_size, + const array_t& data) { + const auto slice = range_tuple_t(0, loc_size); + auto var = m_io.InquireVariable(quantity); + + var.SetShape({ glob_total, nplds }); + var.SetSelection( + adios2::Box({ loc_offset, 0 }, { loc_size, nplds })); + + auto data_h = Kokkos::create_mirror_view(data); + Kokkos::deep_copy(data_h, data); + auto data_sub = Kokkos::subview(data_h, slice, range_tuple_t(0, nplds)); + m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); + } + template void Writer::savePerDomainVariable(const std::string&, std::size_t, std::size_t, diff --git a/src/checkpoint/writer.h b/src/checkpoint/writer.h index 34b5f043f..346bee24a 100644 --- a/src/checkpoint/writer.h +++ b/src/checkpoint/writer.h @@ -69,10 +69,18 @@ namespace checkpoint { std::size_t, const array_t&); + void saveParticlePayloads(const std::string&, + std::size_t, + std::size_t, + std::size_t, + std::size_t, + const array_t&); + void defineFieldVariables(const ntt::SimEngine&, const std::vector&, const std::vector&, const std::vector&); + void defineParticleVariables(const ntt::Coord&, Dimension, std::size_t, diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index 758118d6c..fc8214824 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -28,55 +28,43 @@ namespace ntt { const Cooling& cooling, unsigned short npld) : ParticleSpecies(index, label, m, ch, maxnpart, pusher, use_gca, cooling, npld) { - i1 = array_t { label + "_i1", maxnpart }; - i1_h = Kokkos::create_mirror_view(i1); - dx1 = array_t { label + "_dx1", maxnpart }; - dx1_h = Kokkos::create_mirror_view(dx1); - - i1_prev = array_t { label + "_i1_prev", maxnpart }; - dx1_prev = array_t { label + "_dx1_prev", maxnpart }; - - ux1 = array_t { label + "_ux1", maxnpart }; - ux1_h = Kokkos::create_mirror_view(ux1); - ux2 = array_t { label + "_ux2", maxnpart }; - ux2_h = Kokkos::create_mirror_view(ux2); - ux3 = array_t { label + "_ux3", maxnpart }; - ux3_h = Kokkos::create_mirror_view(ux3); - - weight = array_t { label + "_w", maxnpart }; - weight_h = Kokkos::create_mirror_view(weight); - - tag = array_t { label + "_tag", maxnpart }; - tag_h = Kokkos::create_mirror_view(tag); - - for (unsigned short n { 0 }; n < npld; ++n) { - pld.push_back(array_t("pld", maxnpart)); - pld_h.push_back(Kokkos::create_mirror_view(pld[n])); - } - if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - i2 = array_t { label + "_i2", maxnpart }; - i2_h = Kokkos::create_mirror_view(i2); - dx2 = array_t { label + "_dx2", maxnpart }; - dx2_h = Kokkos::create_mirror_view(dx2); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + i1 = array_t { label + "_i1", maxnpart }; + dx1 = array_t { label + "_dx1", maxnpart }; + i1_prev = array_t { label + "_i1_prev", maxnpart }; + dx1_prev = array_t { label + "_dx1_prev", maxnpart }; + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + i2 = array_t { label + "_i2", maxnpart }; + dx2 = array_t { label + "_dx2", maxnpart }; i2_prev = array_t { label + "_i2_prev", maxnpart }; dx2_prev = array_t { label + "_dx2_prev", maxnpart }; } - if ((D == Dim::_2D) && (C != Coord::Cart)) { - phi = array_t { label + "_phi", maxnpart }; - phi_h = Kokkos::create_mirror_view(phi); - } if constexpr (D == Dim::_3D) { - i3 = array_t { label + "_i3", maxnpart }; - i3_h = Kokkos::create_mirror_view(i3); - dx3 = array_t { label + "_dx3", maxnpart }; - dx3_h = Kokkos::create_mirror_view(dx3); - + i3 = array_t { label + "_i3", maxnpart }; + dx3 = array_t { label + "_dx3", maxnpart }; i3_prev = array_t { label + "_i3_prev", maxnpart }; dx3_prev = array_t { label + "_dx3_prev", maxnpart }; } + + ux1 = array_t { label + "_ux1", maxnpart }; + ux2 = array_t { label + "_ux2", maxnpart }; + ux3 = array_t { label + "_ux3", maxnpart }; + + weight = array_t { label + "_w", maxnpart }; + + tag = array_t { label + "_tag", maxnpart }; + + if (npld > 0) { + pld = array_t { label + "_pld", maxnpart, npld }; + } + + if ((D == Dim::_2D) && (C != Coord::Cart)) { + phi = array_t { label + "_phi", maxnpart }; + } } template @@ -205,9 +193,10 @@ namespace ntt { RemoveDeadInArray(phi, indices_alive); } - for (auto& payload : pld) { - RemoveDeadInArray(payload, indices_alive); - } + // for (auto& payload : pld) { + // // @TODO_1.2.0: fix + // RemoveDeadInArray(payload, indices_alive); + // } Kokkos::Experimental::fill( "TagAliveParticles", @@ -226,35 +215,6 @@ namespace ntt { m_is_sorted = true; } - template - void Particles::SyncHostDevice() { - Kokkos::deep_copy(i1_h, i1); - Kokkos::deep_copy(dx1_h, dx1); - Kokkos::deep_copy(ux1_h, ux1); - Kokkos::deep_copy(ux2_h, ux2); - Kokkos::deep_copy(ux3_h, ux3); - - Kokkos::deep_copy(tag_h, tag); - Kokkos::deep_copy(weight_h, weight); - - for (auto n { 0 }; n < npld(); ++n) { - Kokkos::deep_copy(pld_h[n], pld[n]); - } - - if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - Kokkos::deep_copy(i2_h, i2); - Kokkos::deep_copy(dx2_h, dx2); - } - if constexpr (D == Dim::_3D) { - Kokkos::deep_copy(i3_h, i3); - Kokkos::deep_copy(dx3_h, dx3); - } - - if ((D == Dim::_2D) && (C != Coord::Cart)) { - Kokkos::deep_copy(phi_h, phi); - } - } - template struct Particles; template struct Particles; template struct Particles; diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 3ae68b402..9024fef1e 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -48,31 +48,22 @@ namespace ntt { public: // Cell indices of the current particle - array_t i1, i2, i3; + array_t i1, i2, i3; // Displacement of a particle within the cell - array_t dx1, dx2, dx3; + array_t dx1, dx2, dx3; // Three spatial components of the covariant 4-velocity (physical units) - array_t ux1, ux2, ux3; + array_t ux1, ux2, ux3; // Particle weights. - array_t weight; + array_t weight; // Previous timestep coordinates - array_t i1_prev, i2_prev, i3_prev; - array_t dx1_prev, dx2_prev, dx3_prev; + array_t i1_prev, i2_prev, i3_prev; + array_t dx1_prev, dx2_prev, dx3_prev; // Array to tag the particles - array_t tag; - // Array to store the particle load - std::vector> pld; + array_t tag; + // Array to store the particle payloads + array_t pld; // phi coordinate (for axisymmetry) - array_t phi; - - // host mirrors - array_mirror_t i1_h, i2_h, i3_h; - array_mirror_t dx1_h, dx2_h, dx3_h; - array_mirror_t ux1_h, ux2_h, ux3_h; - array_mirror_t weight_h; - array_mirror_t phi_h; - array_mirror_t tag_h; - std::vector> pld_h; + array_t phi; // for empty allocation Particles() {} @@ -178,10 +169,8 @@ namespace ntt { footprint += sizeof(prtldx_t) * dx2_prev.extent(0); footprint += sizeof(prtldx_t) * dx3_prev.extent(0); footprint += sizeof(short) * tag.extent(0); - for (auto& p : pld) { - footprint += sizeof(real_t) * p.extent(0); - } - footprint += sizeof(real_t) * phi.extent(0); + footprint += sizeof(real_t) * pld.extent(0) * pld.extent(1); + footprint += sizeof(real_t) * phi.extent(0); return footprint; } diff --git a/src/framework/domain/checkpoint.cpp b/src/framework/domain/checkpoint.cpp index 3d309c090..6dfb137db 100644 --- a/src/framework/domain/checkpoint.cpp +++ b/src/framework/domain/checkpoint.cpp @@ -242,13 +242,13 @@ namespace ntt { local_domain->species[s].weight); auto nplds = local_domain->species[s].npld(); - for (auto p { 0u }; p < nplds; ++p) { - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_pld%d", s + 1, p + 1), - glob_tot, - offset, - npart, - local_domain->species[s].pld[p]); + if (nplds > 0) { + g_checkpoint_writer.saveParticlePayloads(fmt::format("s%d_plds", s + 1), + nplds, + glob_tot, + offset, + npart, + local_domain->species[s].pld); } } } @@ -451,14 +451,16 @@ namespace ntt { domain.species[s].weight, loc_npart, offset_npart); - for (auto p { 0u }; p < domain.species[s].npld(); ++p) { - checkpoint::ReadParticleData(io, - reader, - fmt::format("pld%d", p + 1), - s, - domain.species[s].pld[p], - loc_npart, - offset_npart); + + const auto nplds = domain.species[s].npld(); + if (nplds > 0) { + checkpoint::ReadParticlePayloads(io, + reader, + s, + domain.species[s].pld, + nplds, + loc_npart, + offset_npart); } domain.species[s].set_npart(loc_npart); } // species loop diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index eb77ecbb3..4f6e04eec 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -17,6 +17,7 @@ #include "arch/directions.h" #include "arch/kokkos_aliases.h" #include "arch/mpi_aliases.h" +#include "arch/mpi_tags.h" #include "utils/error.h" #include "framework/containers/particles.h" diff --git a/src/framework/tests/particles.cpp b/src/framework/tests/particles.cpp index dabcc062f..535198286 100644 --- a/src/framework/tests/particles.cpp +++ b/src/framework/tests/particles.cpp @@ -46,10 +46,8 @@ void testParticles(const int& index, raise::ErrorIf(p.tag.extent(0) != maxnpart, "tag incorrectly allocated", HERE); raise::ErrorIf(p.weight.extent(0) != maxnpart, "weight incorrectly allocated", HERE); - raise::ErrorIf(p.pld.size() != npld, "Number of payloads mismatch", HERE); - for (unsigned short n { 0 }; n < npld; ++n) { - raise::ErrorIf(p.pld[n].extent(0) != maxnpart, "pld incorrectly allocated", HERE); - } + raise::ErrorIf(p.pld.extent(1) != npld, "pld incorrectly allocated", HERE); + raise::ErrorIf(p.pld.extent(0) != maxnpart, "pld incorrectly allocated", HERE); if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { raise::ErrorIf(p.i2.extent(0) != maxnpart, "i2 incorrectly allocated", HERE); @@ -139,4 +137,4 @@ auto main(int argc, char** argv) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/global/arch/mpi_tags.h b/src/global/arch/mpi_tags.h index 0916542d4..aaf38a8f4 100644 --- a/src/global/arch/mpi_tags.h +++ b/src/global/arch/mpi_tags.h @@ -7,6 +7,8 @@ * @namespaces: * - mpi:: */ +#ifndef GLOBAL_ARCH_MPI_TAGS_H +#define GLOBAL_ARCH_MPI_TAGS_H #include "global.h" @@ -188,8 +190,13 @@ namespace mpi { tag; } - Inline auto SendTag(short tag, bool im1, bool ip1, bool jm1, bool jp1, bool km1, bool kp1) - -> short { + Inline auto SendTag(short tag, + bool im1, + bool ip1, + bool jm1, + bool jp1, + bool km1, + bool kp1) -> short { return ((im1 && jm1 && km1) * (PrtlSendTag::im1_jm1_km1 - 1) + (im1 && jm1 && kp1) * (PrtlSendTag::im1_jm1_kp1 - 1) + (im1 && jp1 && km1) * (PrtlSendTag::im1_jp1_km1 - 1) + @@ -226,3 +233,5 @@ namespace mpi { tag; } } // namespace mpi + +#endif // GLOBAL_ARCH_MPI_TAGS_H From ad026630c8814cff062683a9c6e27b693ac26e57 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 23 Jan 2025 14:52:57 -0500 Subject: [PATCH 39/52] comm kernels --- src/framework/domain/comm_mpi.hpp | 145 ++++------- src/framework/domain/communications.cpp | 55 ++--- src/kernels/comm.hpp | 309 ++++++++++++++++++++++++ 3 files changed, 369 insertions(+), 140 deletions(-) create mode 100644 src/kernels/comm.hpp diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 4f6e04eec..ff4984fa3 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -22,6 +22,8 @@ #include "framework/containers/particles.h" +#include "kernels/comm.hpp" + #include #include @@ -318,34 +320,14 @@ namespace comm { } template - void CommunicateParticles(Particles& species, - Kokkos::View outgoing_indices, - Kokkos::View tag_offsets, - std::vector npptag_vec, - std::vector npptag_recv_vec, - std::vector send_ranks, - std::vector recv_ranks, - const dir::dirs_t& dirs_to_comm) { - // Pointers to the particle data arrays - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; - auto& this_i2 = species.i2; - auto& this_i2_prev = species.i2_prev; - auto& this_i3 = species.i3; - auto& this_i3_prev = species.i3_prev; - auto& this_dx1 = species.dx1; - auto& this_dx1_prev = species.dx1_prev; - auto& this_dx2 = species.dx2; - auto& this_dx2_prev = species.dx2_prev; - auto& this_dx3 = species.dx3; - auto& this_dx3_prev = species.dx3_prev; - auto& this_phi = species.phi; - auto& this_ux1 = species.ux1; - auto& this_ux2 = species.ux2; - auto& this_ux3 = species.ux3; - auto& this_weight = species.weight; - auto& this_tag = species.tag; - + void CommunicateParticles(Particles& species, + array_t outgoing_indices, + array_t tag_offsets, + std::vector npptag_vec, + std::vector npptag_recv_vec, + std::vector send_ranks, + std::vector recv_ranks, + const dir::dirs_t& dirs_to_comm) { // @TODO_1.2.0: communicate payloads // number of arrays of each type to send/recv @@ -365,10 +347,9 @@ namespace comm { npptag_recv_vec.end(), static_cast(0)); - Kokkos::View recv_buff_int { "recv_buff_int", npart_recv * NINTS }; - Kokkos::View recv_buff_real { "recv_buff_real", npart_recv * NREALS }; - Kokkos::View recv_buff_prtldx { "recv_buff_prtldx", - npart_recv * NPRTLDX }; + array_t recv_buff_int { "recv_buff_int", npart_recv * NINTS }; + array_t recv_buff_real { "recv_buff_real", npart_recv * NREALS }; + array_t recv_buff_prtldx { "recv_buff_prtldx", npart_recv * NPRTLDX }; auto iteration = 0; auto current_received = 0; @@ -383,44 +364,26 @@ namespace comm { if (send_rank < 0 and recv_rank < 0) { continue; } - Kokkos::View send_buff_int { "send_buff_int", npart_send_in * NINTS }; - Kokkos::View send_buff_real { "send_buff_real", - npart_send_in * NREALS }; - Kokkos::View send_buff_prtldx { "send_buff_prtldx", - npart_send_in * NPRTLDX }; + array_t send_buff_int { "send_buff_int", npart_send_in * NINTS }; + array_t send_buff_real { "send_buff_real", npart_send_in * NREALS }; + array_t send_buff_prtldx { "send_buff_prtldx", + npart_send_in * NPRTLDX }; + // clang-format off Kokkos::parallel_for( "PopulateSendBuffer", npart_send_in, - Lambda(index_t p) { - const auto idx = outgoing_indices( - (tag_send > 2 ? tag_offsets(tag_send - 3) : 0) + npart_dead + p); - if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { - send_buff_int(NINTS * p + 0) = this_i1(idx); - send_buff_int(NINTS * p + 1) = this_i1_prev(idx); - send_buff_prtldx(NPRTLDX * p + 0) = this_dx1(idx); - send_buff_prtldx(NPRTLDX * p + 1) = this_dx1_prev(idx); - } - if constexpr (D == Dim::_2D or D == Dim::_3D) { - send_buff_int(NINTS * p + 2) = this_i2(idx); - send_buff_int(NINTS * p + 3) = this_i2_prev(idx); - send_buff_prtldx(NPRTLDX * p + 2) = this_dx2(idx); - send_buff_prtldx(NPRTLDX * p + 3) = this_dx2_prev(idx); - } - if constexpr (D == Dim::_3D) { - send_buff_int(NINTS * p + 4) = this_i3(idx); - send_buff_int(NINTS * p + 5) = this_i3_prev(idx); - send_buff_prtldx(NPRTLDX * p + 4) = this_dx3(idx); - send_buff_prtldx(NPRTLDX * p + 5) = this_dx3_prev(idx); - } - send_buff_real(NREALS * p + 0) = this_ux1(idx); - send_buff_real(NREALS * p + 1) = this_ux2(idx); - send_buff_real(NREALS * p + 2) = this_ux3(idx); - send_buff_real(NREALS * p + 3) = this_weight(idx); - if constexpr (D == Dim::_2D and C != Coord::Cart) { - send_buff_real(NREALS * p + 4) = this_phi(idx); - } - this_tag(idx) = ParticleTag::dead; - }); + kernel::comm::PopulatePrtlSendBuffer_kernel( + send_buff_int, send_buff_real, send_buff_prtldx, + NINTS, NREALS, NPRTLDX, + (tag_send > 2 ? tag_offsets(tag_send - 3) : 0) + npart_dead, + species.i1, species.i1_prev, species.dx1, species.dx1_prev, + species.i2, species.i2_prev, species.dx2, species.dx2_prev, + species.i3, species.i3_prev, species.dx3, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.weight, species.phi, species.tag, + outgoing_indices, tag_offsets) + ); + // clang-format on const auto recv_offset_int = current_received * NINTS; const auto recv_offset_real = current_received * NREALS; @@ -519,43 +482,25 @@ namespace comm { } // end direction loop - const auto npart = species.npart(); - const auto npart_holes = outgoing_indices.extent(0); - + // clang-format off Kokkos::parallel_for( "PopulateFromRecvBuffer", npart_recv, - Lambda(const std::size_t p) { - const auto idx = (p >= npart_holes ? npart + p - npart_holes - : outgoing_indices(p)); - if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { - this_i1(idx) = recv_buff_int(NINTS * p + 0); - this_i1_prev(idx) = recv_buff_int(NINTS * p + 1); - this_dx1(idx) = recv_buff_prtldx(NPRTLDX * p + 0); - this_dx1_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 1); - } - if constexpr (D == Dim::_2D or D == Dim::_3D) { - this_i2(idx) = recv_buff_int(NINTS * p + 2); - this_i2_prev(idx) = recv_buff_int(NINTS * p + 3); - this_dx2(idx) = recv_buff_prtldx(NPRTLDX * p + 2); - this_dx2_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 3); - } - if constexpr (D == Dim::_3D) { - this_i3(idx) = recv_buff_int(NINTS * p + 4); - this_i3_prev(idx) = recv_buff_int(NINTS * p + 5); - this_dx3(idx) = recv_buff_prtldx(NPRTLDX * p + 4); - this_dx3_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 5); - } - this_ux1(idx) = recv_buff_real(NREALS * p + 0); - this_ux2(idx) = recv_buff_real(NREALS * p + 1); - this_ux3(idx) = recv_buff_real(NREALS * p + 2); - this_weight(idx) = recv_buff_real(NREALS * p + 3); - if constexpr (D == Dim::_2D and C != Coord::Cart) { - this_phi(idx) = recv_buff_real(NREALS * p + 4); - } - this_tag(idx) = ParticleTag::alive; - }); + kernel::comm::ExtractReceivedPrtls_kernel( + recv_buff_int, recv_buff_real, recv_buff_prtldx, + NINTS, NREALS, NPRTLDX, + species.npart(), + species.i1, species.i1_prev, species.dx1, species.dx1_prev, + species.i2, species.i2_prev, species.dx2, species.dx2_prev, + species.i3, species.i3_prev, species.dx3, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.weight, species.phi, species.tag, + outgoing_indices) + ); + // clang-format on + const auto npart = species.npart(); + const auto npart_holes = outgoing_indices.extent(0); if (npart_recv > npart_holes) { species.set_npart(npart + npart_recv - npart_holes); } diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 6175cc4bb..fc065ab9d 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -20,6 +20,7 @@ #include "arch/mpi_tags.h" #include "framework/domain/comm_mpi.hpp" + #include "kernels/comm.hpp" #else #include "framework/domain/comm_nompi.hpp" #endif @@ -601,50 +602,24 @@ namespace ntt { } } // end directions loop - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; - auto& this_i2 = species.i2; - auto& this_i2_prev = species.i2_prev; - auto& this_i3 = species.i3; - auto& this_i3_prev = species.i3_prev; + array_t outgoing_indices { "outgoing_indices", + npart - npart_alive }; - array_t outgoing_indices("outgoing_indices", - npart - npart_alive); - - array_t current_offset("current_offset", ntags); + // clang-format off Kokkos::parallel_for( "OutgoingIndicesAndDisplace", species.rangeActiveParticles(), - Lambda(index_t p) { - if (this_tag(p) != ParticleTag::alive) { - // dead or to-be-sent - const auto idx_for_tag = - Kokkos::atomic_fetch_add(¤t_offset(this_tag(p)), 1) + - (this_tag(p) != ParticleTag::dead ? npart_dead : 0) + - (this_tag(p) > 2 ? tag_offsets(this_tag(p) - 3) : 0); - if (idx_for_tag >= npart - npart_alive) { - raise::KernelError(HERE, - "Outgoing indices idx exceeds the array size"); - } - outgoing_indices(idx_for_tag) = p; - // apply offsets - if (this_tag(p) != ParticleTag::dead) { - if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { - this_i1(p) += shifts_in_x1(this_tag(p) - 2); - this_i1_prev(p) += shifts_in_x1(this_tag(p) - 2); - } - if constexpr (D == Dim::_2D or D == Dim::_3D) { - this_i2(p) += shifts_in_x2(this_tag(p) - 2); - this_i2_prev(p) += shifts_in_x2(this_tag(p) - 2); - } - if constexpr (D == Dim::_3D) { - this_i3(p) += shifts_in_x3(this_tag(p) - 2); - this_i3_prev(p) += shifts_in_x3(this_tag(p) - 2); - } - } - } - }); + kernel::comm::PrepareOutgoingPrtls_kernel( + shifts_in_x1, shifts_in_x2, shifts_in_x3, + outgoing_indices, + npart, npart_alive, npart_dead, ntags, + species.i1, species.i1_prev, + species.i2, species.i2_prev, + species.i3, species.i3_prev, + species.tag, tag_offsets) + ); + // clang-format on + comm::CommunicateParticles(species, outgoing_indices, tag_offsets, diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp new file mode 100644 index 000000000..c470cd4f6 --- /dev/null +++ b/src/kernels/comm.hpp @@ -0,0 +1,309 @@ +/** + * @file kernels/comm.hpp + * @brief Kernels used during communications + * @implements + * - kernel::comm::PrepareOutgoingPrtls_kernel<> + * - kernel::comm::PopulatePrtlSendBuffer_kernel<> + * - kernel::comm::ExtractReceivedPrtls_kernel<> + * @namespaces: + * - kernel::comm:: + */ + +#ifndef KERNELS_COMM_HPP +#define KERNELS_COMM_HPP + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" + +#include + +namespace kernel::comm { + using namespace ntt; + + template + class PrepareOutgoingPrtls_kernel { + const array_t shifts_in_x1, shifts_in_x2, shifts_in_x3; + array_t outgoing_indices; + + const std::size_t npart, npart_alive, npart_dead, ntags; + + array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; + const array_t tag; + + const array_t tag_offsets; + + array_t current_offset; + + public: + PrepareOutgoingPrtls_kernel(const array_t& shifts_in_x1, + const array_t& shifts_in_x2, + const array_t& shifts_in_x3, + array_t& outgoing_indices, + std::size_t npart, + std::size_t npart_alive, + std::size_t npart_dead, + std::size_t ntags, + array_t& i1, + array_t& i1_prev, + array_t& i2, + array_t& i2_prev, + array_t& i3, + array_t& i3_prev, + const array_t& tag, + const array_t& tag_offsets) + : shifts_in_x1 { shifts_in_x1 } + , shifts_in_x2 { shifts_in_x2 } + , shifts_in_x3 { shifts_in_x3 } + , outgoing_indices { outgoing_indices } + , npart { npart } + , npart_alive { npart_alive } + , npart_dead { npart_dead } + , ntags { ntags } + , i1 { i1 } + , i1_prev { i1_prev } + , i2 { i2 } + , i2_prev { i2_prev } + , i3 { i3 } + , i3_prev { i3_prev } + , tag { tag } + , tag_offsets { tag_offsets } + , current_offset { "current_offset", ntags } {} + + Inline void operator()(index_t p) const { + if (tag(p) != ParticleTag::alive) { + // dead or to-be-sent + const auto idx_for_tag = Kokkos::atomic_fetch_add(¤t_offset(tag(p)), + 1) + + (tag(p) != ParticleTag::dead ? npart_dead : 0) + + (tag(p) > 2 ? tag_offsets(tag(p) - 3) : 0); + if (idx_for_tag >= npart - npart_alive) { + raise::KernelError(HERE, "Outgoing indices idx exceeds the array size"); + } + outgoing_indices(idx_for_tag) = p; + // apply offsets + if (tag(p) != ParticleTag::dead) { + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + i1(p) += shifts_in_x1(tag(p) - 2); + i1_prev(p) += shifts_in_x1(tag(p) - 2); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + i2(p) += shifts_in_x2(tag(p) - 2); + i2_prev(p) += shifts_in_x2(tag(p) - 2); + } + if constexpr (D == Dim::_3D) { + i3(p) += shifts_in_x3(tag(p) - 2); + i3_prev(p) += shifts_in_x3(tag(p) - 2); + } + } + } + } + }; + + template + class PopulatePrtlSendBuffer_kernel { + array_t send_buff_int; + array_t send_buff_real; + array_t send_buff_prtldx; + + const unsigned short NINTS, NREALS, NPRTLDX; + const std::size_t idx_offset; + + const array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; + const array_t ux1, ux2, ux3, weight, phi; + const array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; + array_t tag; + const array_t outgoing_indices, tag_offsets; + + public: + PopulatePrtlSendBuffer_kernel(array_t& send_buff_int, + array_t& send_buff_real, + array_t& send_buff_prtldx, + unsigned short NINTS, + unsigned short NREALS, + unsigned short NPRTLDX, + std::size_t idx_offset, + const array_t& i1, + const array_t& i1_prev, + const array_t& dx1, + const array_t& dx1_prev, + const array_t& i2, + const array_t& i2_prev, + const array_t& dx2, + const array_t& dx2_prev, + const array_t& i3, + const array_t& i3_prev, + const array_t& dx3, + const array_t& dx3_prev, + const array_t& ux1, + const array_t& ux2, + const array_t& ux3, + const array_t& weight, + const array_t& phi, + array_t& tag, + const array_t& outgoing_indices, + const array_t& tag_offsets) + : send_buff_int { send_buff_int } + , send_buff_real { send_buff_real } + , send_buff_prtldx { send_buff_prtldx } + , NINTS { NINTS } + , NREALS { NREALS } + , NPRTLDX { NPRTLDX } + , idx_offset { idx_offset } + , i1 { i1 } + , i1_prev { i1_prev } + , dx1 { dx1 } + , dx1_prev { dx1_prev } + , i2 { i2 } + , i2_prev { i2_prev } + , dx2 { dx2 } + , dx2_prev { dx2_prev } + , i3 { i3 } + , i3_prev { i3_prev } + , dx3 { dx3 } + , dx3_prev { dx3_prev } + , ux1 { ux1 } + , ux2 { ux2 } + , ux3 { ux3 } + , weight { weight } + , phi { phi } + , tag { tag } + , outgoing_indices { outgoing_indices } + , tag_offsets { tag_offsets } {} + + Inline void operator()(index_t p) const { + const auto idx = outgoing_indices(idx_offset + p); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + send_buff_int(NINTS * p + 0) = i1(idx); + send_buff_int(NINTS * p + 1) = i1_prev(idx); + send_buff_prtldx(NPRTLDX * p + 0) = dx1(idx); + send_buff_prtldx(NPRTLDX * p + 1) = dx1_prev(idx); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + send_buff_int(NINTS * p + 2) = i2(idx); + send_buff_int(NINTS * p + 3) = i2_prev(idx); + send_buff_prtldx(NPRTLDX * p + 2) = dx2(idx); + send_buff_prtldx(NPRTLDX * p + 3) = dx2_prev(idx); + } + if constexpr (D == Dim::_3D) { + send_buff_int(NINTS * p + 4) = i3(idx); + send_buff_int(NINTS * p + 5) = i3_prev(idx); + send_buff_prtldx(NPRTLDX * p + 4) = dx3(idx); + send_buff_prtldx(NPRTLDX * p + 5) = dx3_prev(idx); + } + send_buff_real(NREALS * p + 0) = ux1(idx); + send_buff_real(NREALS * p + 1) = ux2(idx); + send_buff_real(NREALS * p + 2) = ux3(idx); + send_buff_real(NREALS * p + 3) = weight(idx); + if constexpr (D == Dim::_2D and C != Coord::Cart) { + send_buff_real(NREALS * p + 4) = phi(idx); + } + tag(idx) = ParticleTag::dead; + } + }; + + template + class ExtractReceivedPrtls_kernel { + const array_t recv_buff_int; + const array_t recv_buff_real; + const array_t recv_buff_prtldx; + + const unsigned short NINTS, NREALS, NPRTLDX; + const std::size_t npart, npart_holes; + + array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; + array_t ux1, ux2, ux3, weight, phi; + array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; + array_t tag; + const array_t outgoing_indices; + + public: + ExtractReceivedPrtls_kernel(const array_t& recv_buff_int, + const array_t& recv_buff_real, + const array_t& recv_buff_prtldx, + unsigned short NINTS, + unsigned short NREALS, + unsigned short NPRTLDX, + std::size_t npart, + array_t& i1, + array_t& i1_prev, + array_t& dx1, + array_t& dx1_prev, + array_t& i2, + array_t& i2_prev, + array_t& dx2, + array_t& dx2_prev, + array_t& i3, + array_t& i3_prev, + array_t& dx3, + array_t& dx3_prev, + array_t& ux1, + array_t& ux2, + array_t& ux3, + array_t& weight, + array_t& phi, + array_t& tag, + const array_t& outgoing_indices) + : recv_buff_int { recv_buff_int } + , recv_buff_real { recv_buff_real } + , recv_buff_prtldx { recv_buff_prtldx } + , NINTS { NINTS } + , NREALS { NREALS } + , NPRTLDX { NPRTLDX } + , npart { npart } + , npart_holes { outgoing_indices.extent(0) } + , i1 { i1 } + , i1_prev { i1_prev } + , dx1 { dx1 } + , dx1_prev { dx1_prev } + , i2 { i2 } + , i2_prev { i2_prev } + , dx2 { dx2 } + , dx2_prev { dx2_prev } + , i3 { i3 } + , i3_prev { i3_prev } + , dx3 { dx3 } + , dx3_prev { dx3_prev } + , ux1 { ux1 } + , ux2 { ux2 } + , ux3 { ux3 } + , weight { weight } + , phi { phi } + , tag { tag } {} + + Inline void operator()(index_t p) const { + const auto idx = (p >= npart_holes ? npart + p - npart_holes + : outgoing_indices(p)); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + i1(idx) = recv_buff_int(NINTS * p + 0); + i1_prev(idx) = recv_buff_int(NINTS * p + 1); + dx1(idx) = recv_buff_prtldx(NPRTLDX * p + 0); + dx1_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 1); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + i2(idx) = recv_buff_int(NINTS * p + 2); + i2_prev(idx) = recv_buff_int(NINTS * p + 3); + dx2(idx) = recv_buff_prtldx(NPRTLDX * p + 2); + dx2_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 3); + } + if constexpr (D == Dim::_3D) { + i3(idx) = recv_buff_int(NINTS * p + 4); + i3_prev(idx) = recv_buff_int(NINTS * p + 5); + dx3(idx) = recv_buff_prtldx(NPRTLDX * p + 4); + dx3_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 5); + } + ux1(idx) = recv_buff_real(NREALS * p + 0); + ux2(idx) = recv_buff_real(NREALS * p + 1); + ux3(idx) = recv_buff_real(NREALS * p + 2); + weight(idx) = recv_buff_real(NREALS * p + 3); + if constexpr (D == Dim::_2D and C != Coord::Cart) { + phi(idx) = recv_buff_real(NREALS * p + 4); + } + tag(idx) = ParticleTag::alive; + } + }; + +} // namespace kernel::comm + +#endif // KERNELS_COMM_HPP From ea4366bc69ef3ca366368ead9c200b28b9d5c502 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 23 Jan 2025 15:20:50 -0500 Subject: [PATCH 40/52] minor fix in tags --- src/framework/domain/comm_mpi.hpp | 2 +- src/kernels/comm.hpp | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index ff4984fa3..70c175972 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -381,7 +381,7 @@ namespace comm { species.i3, species.i3_prev, species.dx3, species.dx3_prev, species.ux1, species.ux2, species.ux3, species.weight, species.phi, species.tag, - outgoing_indices, tag_offsets) + outgoing_indices) ); // clang-format on diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp index c470cd4f6..7446d285a 100644 --- a/src/kernels/comm.hpp +++ b/src/kernels/comm.hpp @@ -74,10 +74,15 @@ namespace kernel::comm { Inline void operator()(index_t p) const { if (tag(p) != ParticleTag::alive) { // dead or to-be-sent - const auto idx_for_tag = Kokkos::atomic_fetch_add(¤t_offset(tag(p)), - 1) + - (tag(p) != ParticleTag::dead ? npart_dead : 0) + - (tag(p) > 2 ? tag_offsets(tag(p) - 3) : 0); + auto idx_for_tag = Kokkos::atomic_fetch_add(¤t_offset(tag(p)), 1); + if (tag(p) != ParticleTag::dead) { + idx_for_tag += npart_dead; + } + if (tag(p) > 2) { + idx_for_tag += tag_offsets(tag(p) - 3); + } + // (tag(p) != ParticleTag::dead ? npart_dead : 0) + + // (tag(p) > 2 ? tag_offsets(tag(p) - 3) : 0); if (idx_for_tag >= npart - npart_alive) { raise::KernelError(HERE, "Outgoing indices idx exceeds the array size"); } @@ -114,7 +119,7 @@ namespace kernel::comm { const array_t ux1, ux2, ux3, weight, phi; const array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; array_t tag; - const array_t outgoing_indices, tag_offsets; + const array_t outgoing_indices; public: PopulatePrtlSendBuffer_kernel(array_t& send_buff_int, @@ -142,8 +147,7 @@ namespace kernel::comm { const array_t& weight, const array_t& phi, array_t& tag, - const array_t& outgoing_indices, - const array_t& tag_offsets) + const array_t& outgoing_indices) : send_buff_int { send_buff_int } , send_buff_real { send_buff_real } , send_buff_prtldx { send_buff_prtldx } @@ -169,8 +173,7 @@ namespace kernel::comm { , weight { weight } , phi { phi } , tag { tag } - , outgoing_indices { outgoing_indices } - , tag_offsets { tag_offsets } {} + , outgoing_indices { outgoing_indices } {} Inline void operator()(index_t p) const { const auto idx = outgoing_indices(idx_offset + p); From 29ef300bd920a7f3e6285e6697f73105d7321416 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 23 Jan 2025 15:44:41 -0500 Subject: [PATCH 41/52] proper tag_offsets access --- dev/nix/shell.nix | 2 +- src/framework/containers/particles.cpp | 13 +++++++------ src/framework/domain/comm_mpi.hpp | 8 ++++++-- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix index 22358a837..f9d48fbfd 100644 --- a/dev/nix/shell.nix +++ b/dev/nix/shell.nix @@ -38,7 +38,7 @@ pkgs.mkShell { ++ (if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]); LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath ([ - pkgs.clang19Stdenv.cc.cc + pkgs.stdenv.cc.cc pkgs.zlib ]); diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index fc8214824..235358760 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -97,15 +97,16 @@ namespace ntt { } // count the offsets on the host and copy to device - array_t tag_offset("tag_offset", num_tags - 3); - auto tag_offset_h = Kokkos::create_mirror_view(tag_offset); + array_t tag_offsets("tag_offsets", num_tags - 3); + auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); - for (auto t { 0u }; t < num_tags - 3; ++t) { - tag_offset_h(t) = npptag_vec[t + 2] + (t > 0u ? tag_offset_h(t - 1) : 0); + tag_offsets_h(0) = npptag_vec[2]; + for (auto t { 1u }; t < num_tags - 3; ++t) { + tag_offsets_h(t) = npptag_vec[t + 2] + tag_offsets_h(t - 1); } - Kokkos::deep_copy(tag_offset, tag_offset_h); + Kokkos::deep_copy(tag_offsets, tag_offsets_h); - return { npptag_vec, tag_offset }; + return { npptag_vec, tag_offsets }; } template diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 70c175972..68248db13 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -368,14 +368,18 @@ namespace comm { array_t send_buff_real { "send_buff_real", npart_send_in * NREALS }; array_t send_buff_prtldx { "send_buff_prtldx", npart_send_in * NPRTLDX }; + + std::size_t idx_offset = npart_dead; + if (tag_send > 2) { + idx_offset += tag_offsets(tag_send - 3); + } // clang-format off Kokkos::parallel_for( "PopulateSendBuffer", npart_send_in, kernel::comm::PopulatePrtlSendBuffer_kernel( send_buff_int, send_buff_real, send_buff_prtldx, - NINTS, NREALS, NPRTLDX, - (tag_send > 2 ? tag_offsets(tag_send - 3) : 0) + npart_dead, + NINTS, NREALS, NPRTLDX, idx_offset, species.i1, species.i1_prev, species.dx1, species.dx1_prev, species.i2, species.i2_prev, species.dx2, species.dx2_prev, species.i3, species.i3_prev, species.dx3, species.dx3_prev, From 3c8536392f38325df4338509b5ebb7838a884cc2 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 23 Jan 2025 17:39:41 -0500 Subject: [PATCH 42/52] minor --- src/framework/domain/comm_mpi.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 68248db13..24f17015f 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -320,14 +320,14 @@ namespace comm { } template - void CommunicateParticles(Particles& species, - array_t outgoing_indices, - array_t tag_offsets, - std::vector npptag_vec, - std::vector npptag_recv_vec, - std::vector send_ranks, - std::vector recv_ranks, - const dir::dirs_t& dirs_to_comm) { + void CommunicateParticles(Particles& species, + const array_t& outgoing_indices, + const array_t& tag_offsets, + const std::vector& npptag_vec, + const std::vector& npptag_recv_vec, + const std::vector& send_ranks, + const std::vector& recv_ranks, + const dir::dirs_t& dirs_to_comm) { // @TODO_1.2.0: communicate payloads // number of arrays of each type to send/recv From d376b6e6fb84b795dd1b94a58f0da41e715d4be7 Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 23 Jan 2025 17:54:38 -0500 Subject: [PATCH 43/52] minor bug: tag access --- src/framework/domain/comm_mpi.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 24f17015f..f001738cf 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -369,9 +369,12 @@ namespace comm { array_t send_buff_prtldx { "send_buff_prtldx", npart_send_in * NPRTLDX }; + auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); + Kokkos::deep_copy(tag_offsets_h, tag_offsets); + std::size_t idx_offset = npart_dead; if (tag_send > 2) { - idx_offset += tag_offsets(tag_send - 3); + idx_offset += tag_offsets_h(tag_send - 3); } // clang-format off Kokkos::parallel_for( From f7ec06e55f202efb0c11c21eb3c3bd809e9e2329 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 23 Jan 2025 19:38:03 -0500 Subject: [PATCH 44/52] inline if patched --- extern/Kokkos | 2 +- extern/adios2 | 2 +- extern/plog | 2 +- src/kernels/comm.hpp | 8 ++++++-- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/extern/Kokkos b/extern/Kokkos index eb11070f6..abaec2e5d 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit eb11070f67565b2e660659f5207f0363bdf3b882 +Subproject commit abaec2e5da1e15e367e48d2a3aa649770e8bcc72 diff --git a/extern/adios2 b/extern/adios2 index f80ad829d..b574cc9c2 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit f80ad829d751241140c40923503e1888e27e22e1 +Subproject commit b574cc9c29b19448ed9f279c4966c97740328441 diff --git a/extern/plog b/extern/plog index 85a871b13..94899e0b9 160000 --- a/extern/plog +++ b/extern/plog @@ -1 +1 @@ -Subproject commit 85a871b13be0bd1a9e0110744fa60cc9bd1e8380 +Subproject commit 94899e0b926ac1b0f4750bfbd495167b4a6ae9ef diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp index 7446d285a..a24ee897e 100644 --- a/src/kernels/comm.hpp +++ b/src/kernels/comm.hpp @@ -276,8 +276,12 @@ namespace kernel::comm { , tag { tag } {} Inline void operator()(index_t p) const { - const auto idx = (p >= npart_holes ? npart + p - npart_holes - : outgoing_indices(p)); + std::size_t idx; + if (p >= npart_holes) { + idx = npart + p - npart_holes; + } else { + idx = outgoing_indices(p); + } if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { i1(idx) = recv_buff_int(NINTS * p + 0); i1_prev(idx) = recv_buff_int(NINTS * p + 1); From fed4e62803ada0d808ba012d1772b37425a3c546 Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 24 Jan 2025 13:28:12 -0500 Subject: [PATCH 45/52] bug on gpus fixed --- setups/srpic/blob/pgen.hpp | 49 ++++++++-------- src/framework/containers/particles.cpp | 63 ++++++++++++++++++++- src/framework/containers/particles.h | 2 + src/framework/domain/comm_mpi.hpp | 75 +++++++++++++------------ src/framework/domain/communications.cpp | 43 +++++++------- src/kernels/comm.hpp | 8 ++- 6 files changed, 153 insertions(+), 87 deletions(-) diff --git a/setups/srpic/blob/pgen.hpp b/setups/srpic/blob/pgen.hpp index 38b3db1c5..f7b7d71b5 100644 --- a/setups/srpic/blob/pgen.hpp +++ b/setups/srpic/blob/pgen.hpp @@ -21,17 +21,17 @@ namespace user { CounterstreamEnergyDist(const M& metric, real_t v_max) : arch::EnergyDistribution { metric } , v_max { v_max } {} - + Inline void operator()(const coord_t& x_Ph, vec_t& v, unsigned short sp) const override { v[0] = v_max; } - + private: const real_t v_max; }; - + template struct GaussianDist : public arch::SpatialDistribution { GaussianDist(const M& metric, real_t x1c, real_t x2c, real_t dr) @@ -39,20 +39,20 @@ namespace user { , x1c { x1c } , x2c { x2c } , dr { dr } {} - + // to properly scale the number density, the probability should be normalized to 1 Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - if (math::abs(x_Ph[0] - x1c) < dr && math::abs(x_Ph[1] - x2c) < dr){ - return 1.0; - }else{ - return 0.0; - } + if (math::abs(x_Ph[0] - x1c) < dr && math::abs(x_Ph[1] - x2c) < dr) { + return 1.0; + } else { + return 0.0; + } } private: const real_t x1c, x2c, dr; }; - + template struct PGen : public arch::ProblemGenerator { @@ -78,24 +78,23 @@ namespace user { , dr { p.template get("setup.dr") } {} inline void InitPrtls(Domain& local_domain) { - const auto energy_dist = CounterstreamEnergyDist( - local_domain.mesh.metric, - v_max); + const auto energy_dist = CounterstreamEnergyDist(local_domain.mesh.metric, + v_max); const auto spatial_dist = GaussianDist(local_domain.mesh.metric, - x1c, - x2c, - dr); - const auto injector = - arch::NonUniformInjector( - energy_dist, - spatial_dist, - { 1, 2 }); + x1c, + x2c, + dr); + const auto injector = + arch::NonUniformInjector( + energy_dist, + spatial_dist, + { 1, 2 }); arch::InjectNonUniform>( - params, - local_domain, - injector, - 1.0); + params, + local_domain, + injector, + 1.0); } }; diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index 235358760..d2eed1b81 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -4,6 +4,7 @@ #include "global.h" #include "arch/kokkos_aliases.h" +#include "utils/numeric.h" #include "utils/sorting.h" #include "framework/containers/species.h" @@ -12,6 +13,8 @@ #include #include +#include +#include #include #include #include @@ -72,7 +75,7 @@ namespace ntt { -> std::pair, array_t> { auto this_tag = tag; const auto num_tags = ntags(); - array_t npptag("nparts_per_tag", ntags()); + array_t npptag { "nparts_per_tag", ntags() }; // count # of particles per each tag auto npptag_scat = Kokkos::Experimental::create_scatter_view(npptag); @@ -100,7 +103,7 @@ namespace ntt { array_t tag_offsets("tag_offsets", num_tags - 3); auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); - tag_offsets_h(0) = npptag_vec[2]; + tag_offsets_h(0) = npptag_vec[2]; // offset for tag = 3 for (auto t { 1u }; t < num_tags - 3; ++t) { tag_offsets_h(t) = npptag_vec[t + 2] + tag_offsets_h(t - 1); } @@ -133,7 +136,7 @@ namespace ntt { Kokkos::parallel_reduce( "CountDeadAlive", rangeActiveParticles(), - Lambda(index_t p, std::size_t & nalive, std::size_t & ndead) { + Lambda(index_t p, std::size_t& nalive, std::size_t& ndead) { nalive += (this_tag(p) == ParticleTag::alive); ndead += (this_tag(p) == ParticleTag::dead); if (this_tag(p) != ParticleTag::alive and this_tag(p) != ParticleTag::dead) { @@ -216,6 +219,60 @@ namespace ntt { m_is_sorted = true; } + // template + // void Particles::PrintTags() { + // auto tag_h = Kokkos::create_mirror_view(tag); + // Kokkos::deep_copy(tag_h, tag); + // auto i1_h = Kokkos::create_mirror_view(i1); + // Kokkos::deep_copy(i1_h, i1); + // auto dx1_h = Kokkos::create_mirror_view(dx1); + // Kokkos::deep_copy(dx1_h, dx1); + // std::cout << "species " << label() << " [npart = " << npart() << "]" + // << std::endl; + // std::cout << "idxs: "; + // for (auto i = 0; i < IMIN(tag_h.extent(0), 30); ++i) { + // std::cout << std::setw(3) << i << " "; + // if (i == npart() - 1) { + // std::cout << "| "; + // } + // } + // if (tag_h.extent(0) > 30) { + // std::cout << "... " << std::setw(3) << tag_h.extent(0) - 1; + // } + // std::cout << std::endl << "tags: "; + // for (auto i = 0; i < IMIN(tag_h.extent(0), 30); ++i) { + // std::cout << std::setw(3) << (short)tag_h(i) << " "; + // if (i == npart() - 1) { + // std::cout << "| "; + // } + // } + // if (tag_h.extent(0) > 30) { + // std::cout << "..." << std::setw(3) << (short)tag_h(tag_h.extent(0) - 1); + // } + // std::cout << std::endl << "i1s : "; + // for (auto i = 0; i < IMIN(i1_h.extent(0), 30); ++i) { + // std::cout << std::setw(3) << i1_h(i) << " "; + // if (i == npart() - 1) { + // std::cout << "| "; + // } + // } + // if (i1_h.extent(0) > 30) { + // std::cout << "..." << std::setw(3) << i1_h(i1_h.extent(0) - 1); + // } + // std::cout << std::endl << "dx1s : "; + // for (auto i = 0; i < IMIN(dx1_h.extent(0), 30); ++i) { + // std::cout << std::setprecision(2) << std::setw(3) << dx1_h(i) << " "; + // if (i == npart() - 1) { + // std::cout << "| "; + // } + // } + // if (dx1_h.extent(0) > 30) { + // std::cout << "..." << std::setprecision(2) << std::setw(3) + // << dx1_h(dx1_h.extent(0) - 1); + // } + // std::cout << std::endl; + // } + template struct Particles; template struct Particles; template struct Particles; diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 9024fef1e..d84bd0cc9 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -219,6 +219,8 @@ namespace ntt { * @brief Copy particle data from device to host. */ void SyncHostDevice(); + + // void PrintTags(); }; } // namespace ntt diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index f001738cf..b477e47f7 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -340,13 +340,12 @@ namespace comm { MPI_Comm_rank(MPI_COMM_WORLD, &rank); // buffers to store recv data - const auto npart_alive = npptag_vec[ParticleTag::alive]; - const auto npart_dead = npptag_vec[ParticleTag::dead]; - const auto npart_send = outgoing_indices.extent(0) - npart_dead; - const auto npart_recv = std::accumulate(npptag_recv_vec.begin(), + const auto npart_alive = npptag_vec[ParticleTag::alive]; + const auto npart_dead = npptag_vec[ParticleTag::dead]; + const auto npart_send = outgoing_indices.extent(0) - npart_dead; + const auto npart_recv = std::accumulate(npptag_recv_vec.begin(), npptag_recv_vec.end(), static_cast(0)); - array_t recv_buff_int { "recv_buff_int", npart_recv * NINTS }; array_t recv_buff_real { "recv_buff_real", npart_recv * NREALS }; array_t recv_buff_prtldx { "recv_buff_prtldx", npart_recv * NPRTLDX }; @@ -376,21 +375,23 @@ namespace comm { if (tag_send > 2) { idx_offset += tag_offsets_h(tag_send - 3); } - // clang-format off - Kokkos::parallel_for( - "PopulateSendBuffer", - npart_send_in, - kernel::comm::PopulatePrtlSendBuffer_kernel( - send_buff_int, send_buff_real, send_buff_prtldx, - NINTS, NREALS, NPRTLDX, idx_offset, - species.i1, species.i1_prev, species.dx1, species.dx1_prev, - species.i2, species.i2_prev, species.dx2, species.dx2_prev, - species.i3, species.i3_prev, species.dx3, species.dx3_prev, - species.ux1, species.ux2, species.ux3, - species.weight, species.phi, species.tag, - outgoing_indices) - ); - // clang-format on + if (npart_send_in > 0) { + // clang-format off + Kokkos::parallel_for( + "PopulatePrtlSendBuffer", + npart_send_in, + kernel::comm::PopulatePrtlSendBuffer_kernel( + send_buff_int, send_buff_real, send_buff_prtldx, + NINTS, NREALS, NPRTLDX, idx_offset, + species.i1, species.i1_prev, species.dx1, species.dx1_prev, + species.i2, species.i2_prev, species.dx2, species.dx2_prev, + species.i3, species.i3_prev, species.dx3, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.weight, species.phi, species.tag, + outgoing_indices) + ); + // clang-format on + } const auto recv_offset_int = current_received * NINTS; const auto recv_offset_real = current_received * NREALS; @@ -489,22 +490,24 @@ namespace comm { } // end direction loop - // clang-format off - Kokkos::parallel_for( - "PopulateFromRecvBuffer", - npart_recv, - kernel::comm::ExtractReceivedPrtls_kernel( - recv_buff_int, recv_buff_real, recv_buff_prtldx, - NINTS, NREALS, NPRTLDX, - species.npart(), - species.i1, species.i1_prev, species.dx1, species.dx1_prev, - species.i2, species.i2_prev, species.dx2, species.dx2_prev, - species.i3, species.i3_prev, species.dx3, species.dx3_prev, - species.ux1, species.ux2, species.ux3, - species.weight, species.phi, species.tag, - outgoing_indices) - ); - // clang-format on + if (npart_recv > 0) { + // clang-format off + Kokkos::parallel_for( + "ExtractReceivedPrtls", + npart_recv, + kernel::comm::ExtractReceivedPrtls_kernel( + recv_buff_int, recv_buff_real, recv_buff_prtldx, + NINTS, NREALS, NPRTLDX, + species.npart(), species.maxnpart(), + species.i1, species.i1_prev, species.dx1, species.dx1_prev, + species.i2, species.i2_prev, species.dx2, species.dx2_prev, + species.i3, species.i3_prev, species.dx3, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.weight, species.phi, species.tag, + outgoing_indices) + ); + // clang-format on + } const auto npart = species.npart(); const auto npart_holes = outgoing_indices.extent(0); diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index fc065ab9d..7dc5d285a 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -36,10 +36,10 @@ namespace ntt { using comm_params_t = std::pair>; template - auto GetSendRecvRanks( - Metadomain* metadomain, - Domain& domain, - dir::direction_t direction) -> std::pair { + auto GetSendRecvRanks(Metadomain* metadomain, + Domain& domain, + dir::direction_t direction) + -> std::pair { Domain* send_to_nghbr_ptr = nullptr; Domain* recv_from_nghbr_ptr = nullptr; // set pointers to the correct send/recv domains @@ -119,11 +119,11 @@ namespace ntt { } template - auto GetSendRecvParams( - Metadomain* metadomain, - Domain& domain, - dir::direction_t direction, - bool synchronize) -> std::pair { + auto GetSendRecvParams(Metadomain* metadomain, + Domain& domain, + dir::direction_t direction, + bool synchronize) + -> std::pair { const auto [send_indrank, recv_indrank] = GetSendRecvRanks(metadomain, domain, direction); const auto [send_ind, send_rank] = send_indrank; @@ -512,11 +512,15 @@ namespace ntt { // # of particles to receive per each tag (direction) std::vector npptag_recv_vec(ntags - 2, 0); // coordinate shifts per each direction - array_t shifts_in_x1("shifts_in_x1", ntags - 2); - array_t shifts_in_x2("shifts_in_x2", ntags - 2); - array_t shifts_in_x3("shifts_in_x3", ntags - 2); + array_t shifts_in_x1 { "shifts_in_x1", ntags - 2 }; + array_t shifts_in_x2 { "shifts_in_x2", ntags - 2 }; + array_t shifts_in_x3 { "shifts_in_x3", ntags - 2 }; + auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); + auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); + auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); + // all directions requiring communication - dir::dirs_t dirs_to_comm; + dir::dirs_t dirs_to_comm; // ranks & indices of meshblock to send/recv from std::vector send_ranks, send_inds; @@ -568,7 +572,6 @@ namespace ntt { // ... tag_send - 2: because we only shift tags > 2 (i.e. no dead/alive) if (is_sending) { if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { - auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); if (direction[0] == -1) { // sending backwards in x1 (add sx1 of target meshblock) shifts_in_x1_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( @@ -577,37 +580,35 @@ namespace ntt { // sending forward in x1 (subtract sx1 of source meshblock) shifts_in_x1_h(tag_send - 2) = -domain.mesh.n_active(in::x1); } - Kokkos::deep_copy(shifts_in_x1, shifts_in_x1_h); } if constexpr (D == Dim::_2D || D == Dim::_3D) { - auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); if (direction[1] == -1) { shifts_in_x2_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( in::x2); } else if (direction[1] == 1) { shifts_in_x2_h(tag_send - 2) = -domain.mesh.n_active(in::x2); } - Kokkos::deep_copy(shifts_in_x2, shifts_in_x2_h); } if constexpr (D == Dim::_3D) { - auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); if (direction[2] == -1) { shifts_in_x3_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( in::x3); } else if (direction[2] == 1) { shifts_in_x3_h(tag_send - 2) = -domain.mesh.n_active(in::x3); } - Kokkos::deep_copy(shifts_in_x3, shifts_in_x3_h); } } } // end directions loop + Kokkos::deep_copy(shifts_in_x1, shifts_in_x1_h); + Kokkos::deep_copy(shifts_in_x2, shifts_in_x2_h); + Kokkos::deep_copy(shifts_in_x3, shifts_in_x3_h); + array_t outgoing_indices { "outgoing_indices", npart - npart_alive }; - // clang-format off Kokkos::parallel_for( - "OutgoingIndicesAndDisplace", + "PrepareOutgoingPrtls", species.rangeActiveParticles(), kernel::comm::PrepareOutgoingPrtls_kernel( shifts_in_x1, shifts_in_x2, shifts_in_x3, diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp index a24ee897e..eea79b08b 100644 --- a/src/kernels/comm.hpp +++ b/src/kernels/comm.hpp @@ -81,8 +81,6 @@ namespace kernel::comm { if (tag(p) > 2) { idx_for_tag += tag_offsets(tag(p) - 3); } - // (tag(p) != ParticleTag::dead ? npart_dead : 0) + - // (tag(p) > 2 ? tag_offsets(tag(p) - 3) : 0); if (idx_for_tag >= npart - npart_alive) { raise::KernelError(HERE, "Outgoing indices idx exceeds the array size"); } @@ -214,6 +212,7 @@ namespace kernel::comm { const unsigned short NINTS, NREALS, NPRTLDX; const std::size_t npart, npart_holes; + const std::size_t maxnpart; array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; array_t ux1, ux2, ux3, weight, phi; @@ -229,6 +228,7 @@ namespace kernel::comm { unsigned short NREALS, unsigned short NPRTLDX, std::size_t npart, + std::size_t maxnpart, array_t& i1, array_t& i1_prev, array_t& dx1, @@ -255,6 +255,7 @@ namespace kernel::comm { , NREALS { NREALS } , NPRTLDX { NPRTLDX } , npart { npart } + , maxnpart { maxnpart } , npart_holes { outgoing_indices.extent(0) } , i1 { i1 } , i1_prev { i1_prev } @@ -282,6 +283,9 @@ namespace kernel::comm { } else { idx = outgoing_indices(p); } + if (idx >= maxnpart) { + raise::KernelError(HERE, "Received particle index exceeds the array size"); + } if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { i1(idx) = recv_buff_int(NINTS * p + 0); i1_prev(idx) = recv_buff_int(NINTS * p + 1); From 7388d8e7f805910b1ec53c2f829ecd7aeb9a4e9d Mon Sep 17 00:00:00 2001 From: haykh Date: Fri, 24 Jan 2025 16:28:48 -0500 Subject: [PATCH 46/52] pld comm --- src/framework/domain/comm_mpi.hpp | 57 ++++++++++++++++++++++++++----- src/kernels/comm.hpp | 45 +++++++++++++++++------- 2 files changed, 81 insertions(+), 21 deletions(-) diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index f001738cf..dec321883 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -335,9 +335,7 @@ namespace comm { D == Dim::_2D and C != Coord::Cart); const unsigned short NINTS = 2 * static_cast(D); const unsigned short NPRTLDX = 2 * static_cast(D); - const unsigned short NPLD = species.npld(); - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + const unsigned short NPLDS = species.npld(); // buffers to store recv data const auto npart_alive = npptag_vec[ParticleTag::alive]; @@ -350,6 +348,11 @@ namespace comm { array_t recv_buff_int { "recv_buff_int", npart_recv * NINTS }; array_t recv_buff_real { "recv_buff_real", npart_recv * NREALS }; array_t recv_buff_prtldx { "recv_buff_prtldx", npart_recv * NPRTLDX }; + array_t recv_buff_pld; + + if (NPLDS > 0) { + recv_buff_pld = array_t { "recv_buff_pld", npart_recv * NPLDS }; + } auto iteration = 0; auto current_received = 0; @@ -368,6 +371,10 @@ namespace comm { array_t send_buff_real { "send_buff_real", npart_send_in * NREALS }; array_t send_buff_prtldx { "send_buff_prtldx", npart_send_in * NPRTLDX }; + array_t send_buff_pld; + if (NPLDS > 0) { + send_buff_pld = array_t { "send_buff_pld", npart_send_in * NPLDS }; + } auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); Kokkos::deep_copy(tag_offsets_h, tag_offsets); @@ -381,13 +388,13 @@ namespace comm { "PopulateSendBuffer", npart_send_in, kernel::comm::PopulatePrtlSendBuffer_kernel( - send_buff_int, send_buff_real, send_buff_prtldx, - NINTS, NREALS, NPRTLDX, idx_offset, + send_buff_int, send_buff_real, send_buff_prtldx, send_buff_pld, + NINTS, NREALS, NPRTLDX, NPLDS, idx_offset, species.i1, species.i1_prev, species.dx1, species.dx1_prev, species.i2, species.i2_prev, species.dx2, species.dx2_prev, species.i3, species.i3_prev, species.dx3, species.dx3_prev, species.ux1, species.ux2, species.ux3, - species.weight, species.phi, species.tag, + species.weight, species.phi, species.pld, species.tag, outgoing_indices) ); // clang-format on @@ -395,6 +402,7 @@ namespace comm { const auto recv_offset_int = current_received * NINTS; const auto recv_offset_real = current_received * NREALS; const auto recv_offset_prtldx = current_received * NPRTLDX; + const auto recv_offset_pld = current_received * NPLDS; if ((send_rank >= 0) and (recv_rank >= 0) and (npart_send_in > 0) and (npart_recv_in > 0)) { @@ -438,6 +446,20 @@ namespace comm { 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if (NPLDS > 0) { + MPI_Sendrecv(send_buff_pld.data(), + npart_send_in * NPLDS, + mpi::get_type(), + send_rank, + 0, + recv_buff_pld.data() + recv_offset_pld, + npart_recv_in * NPLDS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } } else if ((send_rank >= 0) and (npart_send_in > 0)) { MPI_Send(send_buff_int.data(), npart_send_in * NINTS, @@ -457,6 +479,14 @@ namespace comm { send_rank, 0, MPI_COMM_WORLD); + if (NPLDS > 0) { + MPI_Send(send_buff_pld.data(), + npart_send_in * NPLDS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + } } else if ((recv_rank >= 0) and (npart_recv_in > 0)) { raise::ErrorIf(recv_offset_int + npart_recv_in * NINTS > recv_buff_int.extent(0), @@ -483,6 +513,15 @@ namespace comm { 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if (NPLDS > 0) { + MPI_Recv(recv_buff_pld.data() + recv_offset_pld, + npart_recv_in * NPLDS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } } current_received += npart_recv_in; iteration++; @@ -494,14 +533,14 @@ namespace comm { "PopulateFromRecvBuffer", npart_recv, kernel::comm::ExtractReceivedPrtls_kernel( - recv_buff_int, recv_buff_real, recv_buff_prtldx, - NINTS, NREALS, NPRTLDX, + recv_buff_int, recv_buff_real, recv_buff_prtldx, recv_buff_pld, + NINTS, NREALS, NPRTLDX, NPLDS, species.npart(), species.i1, species.i1_prev, species.dx1, species.dx1_prev, species.i2, species.i2_prev, species.dx2, species.dx2_prev, species.i3, species.i3_prev, species.dx3, species.dx3_prev, species.ux1, species.ux2, species.ux3, - species.weight, species.phi, species.tag, + species.weight, species.phi, species.pld, species.tag, outgoing_indices) ); // clang-format on diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp index a24ee897e..e8fe96fb0 100644 --- a/src/kernels/comm.hpp +++ b/src/kernels/comm.hpp @@ -111,13 +111,15 @@ namespace kernel::comm { array_t send_buff_int; array_t send_buff_real; array_t send_buff_prtldx; + array_t send_buff_pld; - const unsigned short NINTS, NREALS, NPRTLDX; + const unsigned short NINTS, NREALS, NPRTLDX, NPLDS; const std::size_t idx_offset; const array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; - const array_t ux1, ux2, ux3, weight, phi; const array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; + const array_t ux1, ux2, ux3, weight, phi; + const array_t pld; array_t tag; const array_t outgoing_indices; @@ -125,9 +127,11 @@ namespace kernel::comm { PopulatePrtlSendBuffer_kernel(array_t& send_buff_int, array_t& send_buff_real, array_t& send_buff_prtldx, + array_t& send_buff_pld, unsigned short NINTS, unsigned short NREALS, unsigned short NPRTLDX, + unsigned short NPLDS, std::size_t idx_offset, const array_t& i1, const array_t& i1_prev, @@ -146,25 +150,28 @@ namespace kernel::comm { const array_t& ux3, const array_t& weight, const array_t& phi, + const array_t& pld, array_t& tag, const array_t& outgoing_indices) : send_buff_int { send_buff_int } , send_buff_real { send_buff_real } , send_buff_prtldx { send_buff_prtldx } + , send_buff_pld { send_buff_pld } , NINTS { NINTS } , NREALS { NREALS } , NPRTLDX { NPRTLDX } + , NPLDS { NPLDS } , idx_offset { idx_offset } , i1 { i1 } , i1_prev { i1_prev } - , dx1 { dx1 } - , dx1_prev { dx1_prev } , i2 { i2 } , i2_prev { i2_prev } - , dx2 { dx2 } - , dx2_prev { dx2_prev } , i3 { i3 } , i3_prev { i3_prev } + , dx1 { dx1 } + , dx1_prev { dx1_prev } + , dx2 { dx2 } + , dx2_prev { dx2_prev } , dx3 { dx3 } , dx3_prev { dx3_prev } , ux1 { ux1 } @@ -172,6 +179,7 @@ namespace kernel::comm { , ux3 { ux3 } , weight { weight } , phi { phi } + , pld { pld } , tag { tag } , outgoing_indices { outgoing_indices } {} @@ -202,6 +210,11 @@ namespace kernel::comm { if constexpr (D == Dim::_2D and C != Coord::Cart) { send_buff_real(NREALS * p + 4) = phi(idx); } + if (NPLD > 0) { + for (auto l { 0u }; l < NPLD; ++l) { + send_buff_pld(NPLDS * p + l) = pld(idx, l); + } + } tag(idx) = ParticleTag::dead; } }; @@ -211,13 +224,15 @@ namespace kernel::comm { const array_t recv_buff_int; const array_t recv_buff_real; const array_t recv_buff_prtldx; + const array_t recv_buff_pld; - const unsigned short NINTS, NREALS, NPRTLDX; + const unsigned short NINTS, NREALS, NPRTLDX, NPLDS; const std::size_t npart, npart_holes; array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; - array_t ux1, ux2, ux3, weight, phi; array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; + array_t ux1, ux2, ux3, weight, phi; + array_t pld; array_t tag; const array_t outgoing_indices; @@ -225,9 +240,11 @@ namespace kernel::comm { ExtractReceivedPrtls_kernel(const array_t& recv_buff_int, const array_t& recv_buff_real, const array_t& recv_buff_prtldx, + const array_t& recv_buff_pld, unsigned short NINTS, unsigned short NREALS, unsigned short NPRTLDX, + unsigned short NPLDS, std::size_t npart, array_t& i1, array_t& i1_prev, @@ -246,26 +263,29 @@ namespace kernel::comm { array_t& ux3, array_t& weight, array_t& phi, + array_t& pld, array_t& tag, const array_t& outgoing_indices) : recv_buff_int { recv_buff_int } , recv_buff_real { recv_buff_real } , recv_buff_prtldx { recv_buff_prtldx } + , recv_buff_pld { recv_buff_pld } , NINTS { NINTS } , NREALS { NREALS } , NPRTLDX { NPRTLDX } + , NPLDS { NPLDS } , npart { npart } , npart_holes { outgoing_indices.extent(0) } , i1 { i1 } , i1_prev { i1_prev } - , dx1 { dx1 } - , dx1_prev { dx1_prev } , i2 { i2 } , i2_prev { i2_prev } - , dx2 { dx2 } - , dx2_prev { dx2_prev } , i3 { i3 } , i3_prev { i3_prev } + , dx1 { dx1 } + , dx1_prev { dx1_prev } + , dx2 { dx2 } + , dx2_prev { dx2_prev } , dx3 { dx3 } , dx3_prev { dx3_prev } , ux1 { ux1 } @@ -273,6 +293,7 @@ namespace kernel::comm { , ux3 { ux3 } , weight { weight } , phi { phi } + , pld { pld } , tag { tag } {} Inline void operator()(index_t p) const { From 6aa7a8099d30e0e950d3f026d18578d4da71c728 Mon Sep 17 00:00:00 2001 From: haykh Date: Tue, 28 Jan 2025 15:15:17 -0500 Subject: [PATCH 47/52] minor bug fixed in kernel --- src/kernels/comm.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp index d33e2e006..b280ce38b 100644 --- a/src/kernels/comm.hpp +++ b/src/kernels/comm.hpp @@ -292,7 +292,8 @@ namespace kernel::comm { , weight { weight } , phi { phi } , pld { pld } - , tag { tag } {} + , tag { tag } + , outgoing_indices { outgoing_indices } {} Inline void operator()(index_t p) const { std::size_t idx; From 3071251053b08a42ee852f1e98a6553ad5030dc5 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 10:20:47 -0500 Subject: [PATCH 48/52] nix devshell --- dev/nix/adios2.nix | 69 ++++++++++++++++++++++++++++++---------------- dev/nix/kokkos.nix | 64 ++++++++++++++++++++++++++++++++++++++++++ dev/nix/shell.nix | 54 ++++++++++++++++-------------------- 3 files changed, 133 insertions(+), 54 deletions(-) create mode 100644 dev/nix/kokkos.nix diff --git a/dev/nix/adios2.nix b/dev/nix/adios2.nix index 19c706aa4..f2cd4ca43 100644 --- a/dev/nix/adios2.nix +++ b/dev/nix/adios2.nix @@ -7,6 +7,20 @@ let name = "adios2"; version = "2.10.2"; + cmakeFlags = { + CMAKE_CXX_STANDARD = "17"; + CMAKE_CXX_EXTENSIONS = "OFF"; + CMAKE_POSITION_INDEPENDENT_CODE = "TRUE"; + BUILD_SHARED_LIBS = "ON"; + ADIOS2_USE_HDF5 = if hdf5 then "ON" else "OFF"; + ADIOS2_USE_Python = "OFF"; + ADIOS2_USE_Fortran = "OFF"; + ADIOS2_USE_ZeroMQ = "OFF"; + BUILD_TESTING = "OFF"; + ADIOS2_BUILD_EXAMPLES = "OFF"; + ADIOS2_USE_MPI = if mpi then "ON" else "OFF"; + CMAKE_BUILD_TYPE = "Release"; + } // (if !mpi then { ADIOS2_HAVE_HDF5_VOL = "OFF"; } else { }); in pkgs.stdenv.mkDerivation { pname = "${name}${if hdf5 then "-hdf5" else ""}${if mpi then "-mpi" else ""}"; @@ -17,35 +31,44 @@ pkgs.stdenv.mkDerivation { sha256 = "sha256-NVyw7xoPutXeUS87jjVv1YxJnwNGZAT4QfkBLzvQbwg="; }; - nativeBuildInputs = - with pkgs; + nativeBuildInputs = with pkgs; [ + cmake + perl + ]; + + propagatedBuildInputs = [ - cmake - libgcc - perl - breakpointHook + pkgs.gcc13 ] - ++ (if mpi then [ openmpi ] else [ ]); - - buildInputs = if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]; + ++ (if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]) + ++ (if mpi then [ pkgs.openmpi ] else [ ]); configurePhase = '' - cmake -B build $src \ - -D CMAKE_CXX_STANDARD=17 \ - -D CMAKE_CXX_EXTENSIONS=OFF \ - -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ - -D BUILD_SHARED_LIBS=ON \ - -D ADIOS2_USE_HDF5=${if hdf5 then "ON" else "OFF"} \ - -D ADIOS2_USE_Python=OFF \ - -D ADIOS2_USE_Fortran=OFF \ - -D ADIOS2_USE_ZeroMQ=OFF \ - -D BUILD_TESTING=OFF \ - -D ADIOS2_BUILD_EXAMPLES=OFF \ - -D ADIOS2_USE_MPI=${if mpi then "ON" else "OFF"} \ - -D ADIOS2_HAVE_HDF5_VOL=OFF \ - -D CMAKE_BUILD_TYPE=Release + cmake -B build $src ${ + pkgs.lib.attrsets.foldlAttrs ( + acc: key: value: + acc + " -D ${key}=${value}" + ) "" cmakeFlags + } ''; + # configurePhase = + # '' + # cmake -B build $src \ + # -D CMAKE_CXX_STANDARD=17 \ + # -D CMAKE_CXX_EXTENSIONS=OFF \ + # -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ + # -D BUILD_SHARED_LIBS=ON \ + # -D ADIOS2_USE_HDF5=${if hdf5 then "ON" else "OFF"} \ + # -D ADIOS2_USE_Python=OFF \ + # -D ADIOS2_USE_Fortran=OFF \ + # -D ADIOS2_USE_ZeroMQ=OFF \ + # -D BUILD_TESTING=OFF \ + # -D ADIOS2_BUILD_EXAMPLES=OFF \ + # -D ADIOS2_USE_MPI=${if mpi then "ON" else "OFF"} \ + # -D CMAKE_BUILD_TYPE=Release + # '' + buildPhase = '' cmake --build build -j ''; diff --git a/dev/nix/kokkos.nix b/dev/nix/kokkos.nix new file mode 100644 index 000000000..cfe583c7a --- /dev/null +++ b/dev/nix/kokkos.nix @@ -0,0 +1,64 @@ +{ + pkgs ? import { }, + arch ? "native", + gpu ? "none", +}: + +let + gpuUpper = pkgs.lib.toUpper gpu; + name = "kokkos"; + version = "4.5.01"; + compilerPkgs = { + "HIP" = with pkgs.rocmPackages; [ + rocm-core + clr + rocthrust + rocprim + rocminfo + rocm-smi + ]; + "NONE" = [ + pkgs.gcc13 + ]; + }; + cmakeFlags = { + "HIP" = [ + "-D CMAKE_C_COMPILER=hipcc" + "-D CMAKE_CXX_COMPILER=hipcc" + ]; + "NONE" = [ ]; + }; + getArch = + _: + if gpu != "none" && arch == "native" then + throw "Please specify an architecture when the GPU support is enabled. Available architectures: https://kokkos.org/kokkos-core-wiki/keywords.html#architectures" + else + pkgs.lib.toUpper arch; + +in +pkgs.stdenv.mkDerivation { + pname = "${name}"; + version = "${version}"; + src = pkgs.fetchgit { + url = "https://github.com/kokkos/kokkos/"; + rev = "v${version}"; + sha256 = "sha256-cI2p+6J+8BRV5fXTDxxHTfh6P5PeeLUiF73o5zVysHQ="; + }; + + nativeBuildInputs = with pkgs; [ + cmake + ]; + + propagatedBuildInputs = compilerPkgs.${gpuUpper}; + + cmakeFlags = [ + "-D CMAKE_CXX_STANDARD=17" + "-D CMAKE_CXX_EXTENSIONS=OFF" + "-D CMAKE_POSITION_INDEPENDENT_CODE=TRUE" + "-D Kokkos_ARCH_${getArch { }}=ON" + (if gpu != "none" then "-D Kokkos_ENABLE_${gpuUpper}=ON" else "") + "-D CMAKE_BUILD_TYPE=Release" + ] ++ cmakeFlags.${gpuUpper}; + + enableParallelBuilding = true; +} diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix index f9d48fbfd..219da0038 100644 --- a/dev/nix/shell.nix +++ b/dev/nix/shell.nix @@ -2,40 +2,36 @@ pkgs ? import { }, mpi ? false, hdf5 ? false, + gpu ? "none", + arch ? "native", }: let name = "entity-dev"; - compilerPkg = pkgs.gcc13; - compilerCXX = "g++"; - compilerCC = "gcc"; adios2Pkg = (pkgs.callPackage ./adios2.nix { inherit pkgs mpi hdf5; }); + kokkosPkg = (pkgs.callPackage ./kokkos.nix { inherit pkgs arch gpu; }); in pkgs.mkShell { name = "${name}-env"; - nativeBuildInputs = - with pkgs; - [ - zlib - cmake - - compilerPkg - - clang-tools - - adios2Pkg - python312 - python312Packages.jupyter - - cmake-format - neocmakelsp - black - pyright - taplo - vscode-langservers-extracted - ] - ++ (if mpi then [ pkgs.openmpi ] else [ ]) - ++ (if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]); + nativeBuildInputs = with pkgs; [ + zlib + cmake + + clang-tools + + adios2Pkg + kokkosPkg + + python312 + python312Packages.jupyter + + cmake-format + neocmakelsp + black + pyright + taplo + vscode-langservers-extracted + ]; LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath ([ pkgs.stdenv.cc.cc @@ -45,12 +41,8 @@ pkgs.mkShell { shellHook = '' BLUE='\033[0;34m' NC='\033[0m' - export CC=$(which ${compilerCC}) - export CXX=$(which ${compilerCXX}) - export CMAKE_CXX_COMPILER=$(which ${compilerCXX}) - export CMAKE_C_COMPILER=$(which ${compilerCC}) echo "" - echo -e "${name} nix-shell activated: ''\${BLUE}$(which ${compilerCXX})''\${NC}" + echo -e "${name} nix-shell activated" ''; } From 6aa045a4947854913112858fdfed36ca7100a144 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 10:51:45 -0500 Subject: [PATCH 49/52] pld reading in checkpoint fixed --- src/checkpoint/reader.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/checkpoint/reader.cpp b/src/checkpoint/reader.cpp index 208972561..9fc2d2640 100644 --- a/src/checkpoint/reader.cpp +++ b/src/checkpoint/reader.cpp @@ -98,7 +98,7 @@ namespace checkpoint { fmt::format("s%d_%s", s + 1, quantity.c_str())); if (var) { var.SetSelection(adios2::Box({ offset }, { count })); - const auto slice = std::pair { 0, count }; + const auto slice = range_tuple_t(0, count); auto array_h = Kokkos::create_mirror_view(array); reader.Get(var, Kokkos::subview(array_h, slice).data(), adios2::Mode::Sync); Kokkos::deep_copy(Kokkos::subview(array, slice), @@ -121,13 +121,12 @@ namespace checkpoint { auto var = io.InquireVariable(fmt::format("s%d_plds", s + 1)); if (var) { var.SetSelection(adios2::Box({ offset, 0 }, { count, nplds })); - const auto slice = std::pair { 0, count }; + const auto slice = range_tuple_t(0, count); auto array_h = Kokkos::create_mirror_view(array); reader.Get(var, Kokkos::subview(array_h, slice, range_tuple_t(0, nplds)).data(), adios2::Mode::Sync); - Kokkos::deep_copy(Kokkos::subview(array, slice, range_tuple_t(0, nplds)), - Kokkos::subview(array_h, slice, range_tuple_t(0, nplds))); + Kokkos::deep_copy(array, array_h); } else { raise::Error(fmt::format("Variable: s%d_plds not found", s + 1), HERE); } From 649e3285796f107a2699c38e3c740cf1c998d895 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 10:54:43 -0500 Subject: [PATCH 50/52] clean benchmark --- benchmark/benchmark.cpp | 274 ++-------------------------------------- legacy/benchmark.cpp | 273 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 282 insertions(+), 265 deletions(-) create mode 100644 legacy/benchmark.cpp diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 54fc17cf9..98306c92b 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -1,273 +1,17 @@ -#include "enums.h" #include "global.h" -#include "utils/error.h" - -#include "metrics/metric_base.h" -#include "metrics/minkowski.h" - -#include "framework/containers/species.h" -#include "framework/domain/domain.h" -#include "framework/domain/metadomain.h" - -#include - -#include "framework/domain/communications.cpp" -#include "mpi.h" -#include "mpi-ext.h" - -#define TIMER_START(label) \ - Kokkos::fence(); \ - auto start_##label = std::chrono::high_resolution_clock::now(); - -#define TIMER_STOP(label) \ - Kokkos::fence(); \ - auto stop_##label = std::chrono::high_resolution_clock::now(); \ - auto duration_##label = std::chrono::duration_cast( \ - stop_##label - start_##label) \ - .count(); \ - std::cout << "Timer [" #label "]: " << duration_##label << " microseconds" \ - << std::endl; - -/* - Test to check the performance of the new particle allocation scheme - - Create a metadomain object main() - - Set npart + initialize tags InitializeParticleArrays() - - 'Push' the particles by randomly updating the tags PushParticles() - - Communicate particles to neighbors and time the communication - - Compute the time taken for best of N iterations for the communication - */ -using namespace ntt; - -// Set npart and set the particle tags to alive -template -void InitializeParticleArrays(Domain& domain, const int npart) { - raise::ErrorIf(npart > domain.species[0].maxnpart(), - "Npart cannot be greater than maxnpart", - HERE); - const auto nspecies = domain.species.size(); - for (int i_spec = 0; i_spec < nspecies; i_spec++) { - domain.species[i_spec].set_npart(npart); - domain.species[i_spec].SyncHostDevice(); - auto& this_tag = domain.species[i_spec].tag; - Kokkos::parallel_for( - "Initialize particles", - npart, - Lambda(const std::size_t i) { this_tag(i) = ParticleTag::alive; }); - } - return; -} - -// Randomly reassign tags to particles for a fraction of particles -template -void PushParticles(Domain& domain, - const double send_frac, - const int seed_ind, - const int seed_tag) { - raise::ErrorIf(send_frac > 1.0, "send_frac cannot be greater than 1.0", HERE); - const auto nspecies = domain.species.size(); - for (int i_spec = 0; i_spec < nspecies; i_spec++) { - domain.species[i_spec].set_unsorted(); - const auto nparticles = domain.species[i_spec].npart(); - const auto nparticles_to_send = static_cast(send_frac * nparticles); - // Generate random indices to send - // Kokkos::Random_XorShift64_Pool<> random_pool(seed_ind); - Kokkos::View indices_to_send("indices_to_send", nparticles_to_send); - Kokkos::fill_random(indices_to_send, domain.random_pool, 0, nparticles); - // Generate random tags to send - // Kokkos::Random_XorShift64_Pool<> random_pool_tag(seed_tag); - Kokkos::View tags_to_send("tags_to_send", nparticles_to_send); - Kokkos::fill_random(tags_to_send, - domain.random_pool, - 0, - domain.species[i_spec].ntags()); - auto& this_tag = domain.species[i_spec].tag; - Kokkos::parallel_for( - "Push particles", - nparticles_to_send, - Lambda(const std::size_t i) { - auto prtl_to_send = indices_to_send(i); - auto tag_to_send = tags_to_send(i); - this_tag(prtl_to_send) = tag_to_send; - }); - domain.species[i_spec].npart_per_tag(); - domain.species[i_spec].SyncHostDevice(); - } - return; -} +#include +#include auto main(int argc, char* argv[]) -> int { - GlobalInitialize(argc, argv); - { - /* - MPI checks - */ - printf("Compile time check:\n"); -#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT - printf("This MPI library has CUDA-aware support.\n", MPIX_CUDA_AWARE_SUPPORT); -#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT - printf("This MPI library does not have CUDA-aware support.\n"); -#else - printf("This MPI library cannot determine if there is CUDA-aware support.\n"); -#endif /* MPIX_CUDA_AWARE_SUPPORT */ -printf("Run time check:\n"); -#if defined(MPIX_CUDA_AWARE_SUPPORT) - if (1 == MPIX_Query_cuda_support()) { - printf("This MPI library has CUDA-aware support.\n"); - } else { - printf("This MPI library does not have CUDA-aware support.\n"); - } -#else /* !defined(MPIX_CUDA_AWARE_SUPPORT) */ - printf("This MPI library cannot determine if there is CUDA-aware support.\n"); -#endif /* MPIX_CUDA_AWARE_SUPPORT */ - - /* - Test to send and receive Kokkos arrays - */ - int sender_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &sender_rank); - - int neighbor_rank = 0; - if (sender_rank == 0) { - neighbor_rank = 1; - } - else if (sender_rank == 1) { - neighbor_rank = 0; - } - else { - raise::Error("This test is only for 2 ranks", HERE); - } - Kokkos::View send_array("send_array", 10); - Kokkos::View recv_array("recv_array", 10); - if (sender_rank == 0) { - Kokkos::deep_copy(send_array, 10); - } - else { - Kokkos::deep_copy(send_array, 20); - } - - auto send_array_host = Kokkos::create_mirror_view(send_array); - Kokkos::deep_copy(send_array_host, send_array); - auto host_recv_array = Kokkos::create_mirror_view(recv_array); - - MPI_Sendrecv(send_array.data(), send_array.extent(0), MPI_INT, neighbor_rank, 0, - recv_array.data(), recv_array.extent(0), MPI_INT, neighbor_rank, 0, - MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - // Print the received array - Kokkos::deep_copy(host_recv_array, recv_array); - for (int i = 0; i < 10; ++i) { - printf("Rank %d: Received %d\n", sender_rank, host_recv_array(i)); - } - - - std::cout << "Constructing the domain" << std::endl; - // Create a Metadomain object - const unsigned int ndomains = 2; - const std::vector global_decomposition = { - {-1, -1, -1} - }; - const std::vector global_ncells = { 32, 32, 32 }; - const boundaries_t global_extent = { - {0.0, 3.0}, - {0.0, 3.0}, - {0.0, 3.0} - }; - const boundaries_t global_flds_bc = { - {FldsBC::PERIODIC, FldsBC::PERIODIC}, - {FldsBC::PERIODIC, FldsBC::PERIODIC}, - {FldsBC::PERIODIC, FldsBC::PERIODIC} - }; - const boundaries_t global_prtl_bc = { - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC} - }; - const std::map metric_params = {}; - const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; - const double npart_to_send_frac = 0.01; - const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); - auto species = ntt::ParticleSpecies(1u, - "test_e", - 1.0f, - 1.0f, - maxnpart, - ntt::PrtlPusher::BORIS, - false, - ntt::Cooling::NONE); - auto metadomain = Metadomain>( - ndomains, - global_decomposition, - global_ncells, - global_extent, - global_flds_bc, - global_prtl_bc, - metric_params, - { species }); - - const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; - auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); - auto timers = timer::Timers { { "Communication" }, nullptr, false }; - InitializeParticleArrays(*local_domain, npart); - // Timers for both the communication routines - auto total_time_elapsed_old = 0; - auto total_time_elapsed_new = 0; - - int seed_ind = 0; - int seed_tag = 1; - Kokkos::fence(); - - for (int i = 0; i < 10; ++i) { - { - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort new - Kokkos::fence(); - auto start_new = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticlesBuffer(*local_domain, &timers); - auto stop_new = std::chrono::high_resolution_clock::now(); - auto duration_new = std::chrono::duration_cast( - stop_new - start_new) - .count(); - total_time_elapsed_new += duration_new; - Kokkos::fence(); - } - { - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort old - Kokkos::fence(); - auto start_old = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticles(*local_domain, &timers); - auto stop_old = std::chrono::high_resolution_clock::now(); - auto duration_old = std::chrono::duration_cast( - stop_old - start_old) - .count(); - total_time_elapsed_old += duration_old; - Kokkos::fence(); - } - } - printf("Total time elapsed for old: %f us : %f us/prtl\n", - total_time_elapsed_old / 10.0, - total_time_elapsed_old / 10.0 * 1000 / npart); - printf("Total time elapsed for new: %f us : %f us/prtl\n", - total_time_elapsed_new / 10.0, - total_time_elapsed_new / 10.0 * 1000 / npart); + ntt::GlobalInitialize(argc, argv); + try { + // ... + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + GlobalFinalize(); + return 1; } GlobalFinalize(); return 0; } - -/* - Buggy behavior: - Consider a single domain with a single mpi rank - Particle tag arrays is set to [0, 0, 1, 1, 2, 3, ...] for a single domain - CommunicateParticles() discounts all the dead particles and reassigns the - other tags to alive - CommunicateParticlesBuffer() only keeps the ParticleTag::Alive particles - and discounts the rest -*/ diff --git a/legacy/benchmark.cpp b/legacy/benchmark.cpp new file mode 100644 index 000000000..54fc17cf9 --- /dev/null +++ b/legacy/benchmark.cpp @@ -0,0 +1,273 @@ +#include "enums.h" +#include "global.h" + +#include "utils/error.h" + +#include "metrics/metric_base.h" +#include "metrics/minkowski.h" + +#include "framework/containers/species.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" + +#include + +#include "framework/domain/communications.cpp" +#include "mpi.h" +#include "mpi-ext.h" + +#define TIMER_START(label) \ + Kokkos::fence(); \ + auto start_##label = std::chrono::high_resolution_clock::now(); + +#define TIMER_STOP(label) \ + Kokkos::fence(); \ + auto stop_##label = std::chrono::high_resolution_clock::now(); \ + auto duration_##label = std::chrono::duration_cast( \ + stop_##label - start_##label) \ + .count(); \ + std::cout << "Timer [" #label "]: " << duration_##label << " microseconds" \ + << std::endl; + +/* + Test to check the performance of the new particle allocation scheme + - Create a metadomain object main() + - Set npart + initialize tags InitializeParticleArrays() + - 'Push' the particles by randomly updating the tags PushParticles() + - Communicate particles to neighbors and time the communication + - Compute the time taken for best of N iterations for the communication + */ +using namespace ntt; + +// Set npart and set the particle tags to alive +template +void InitializeParticleArrays(Domain& domain, const int npart) { + raise::ErrorIf(npart > domain.species[0].maxnpart(), + "Npart cannot be greater than maxnpart", + HERE); + const auto nspecies = domain.species.size(); + for (int i_spec = 0; i_spec < nspecies; i_spec++) { + domain.species[i_spec].set_npart(npart); + domain.species[i_spec].SyncHostDevice(); + auto& this_tag = domain.species[i_spec].tag; + Kokkos::parallel_for( + "Initialize particles", + npart, + Lambda(const std::size_t i) { this_tag(i) = ParticleTag::alive; }); + } + return; +} + +// Randomly reassign tags to particles for a fraction of particles +template +void PushParticles(Domain& domain, + const double send_frac, + const int seed_ind, + const int seed_tag) { + raise::ErrorIf(send_frac > 1.0, "send_frac cannot be greater than 1.0", HERE); + const auto nspecies = domain.species.size(); + for (int i_spec = 0; i_spec < nspecies; i_spec++) { + domain.species[i_spec].set_unsorted(); + const auto nparticles = domain.species[i_spec].npart(); + const auto nparticles_to_send = static_cast(send_frac * nparticles); + // Generate random indices to send + // Kokkos::Random_XorShift64_Pool<> random_pool(seed_ind); + Kokkos::View indices_to_send("indices_to_send", nparticles_to_send); + Kokkos::fill_random(indices_to_send, domain.random_pool, 0, nparticles); + // Generate random tags to send + // Kokkos::Random_XorShift64_Pool<> random_pool_tag(seed_tag); + Kokkos::View tags_to_send("tags_to_send", nparticles_to_send); + Kokkos::fill_random(tags_to_send, + domain.random_pool, + 0, + domain.species[i_spec].ntags()); + auto& this_tag = domain.species[i_spec].tag; + Kokkos::parallel_for( + "Push particles", + nparticles_to_send, + Lambda(const std::size_t i) { + auto prtl_to_send = indices_to_send(i); + auto tag_to_send = tags_to_send(i); + this_tag(prtl_to_send) = tag_to_send; + }); + domain.species[i_spec].npart_per_tag(); + domain.species[i_spec].SyncHostDevice(); + } + return; +} + +auto main(int argc, char* argv[]) -> int { + GlobalInitialize(argc, argv); + { + /* + MPI checks + */ + printf("Compile time check:\n"); +#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT + printf("This MPI library has CUDA-aware support.\n", MPIX_CUDA_AWARE_SUPPORT); +#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT + printf("This MPI library does not have CUDA-aware support.\n"); +#else + printf("This MPI library cannot determine if there is CUDA-aware support.\n"); +#endif /* MPIX_CUDA_AWARE_SUPPORT */ +printf("Run time check:\n"); +#if defined(MPIX_CUDA_AWARE_SUPPORT) + if (1 == MPIX_Query_cuda_support()) { + printf("This MPI library has CUDA-aware support.\n"); + } else { + printf("This MPI library does not have CUDA-aware support.\n"); + } +#else /* !defined(MPIX_CUDA_AWARE_SUPPORT) */ + printf("This MPI library cannot determine if there is CUDA-aware support.\n"); +#endif /* MPIX_CUDA_AWARE_SUPPORT */ + + /* + Test to send and receive Kokkos arrays + */ + int sender_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &sender_rank); + + int neighbor_rank = 0; + if (sender_rank == 0) { + neighbor_rank = 1; + } + else if (sender_rank == 1) { + neighbor_rank = 0; + } + else { + raise::Error("This test is only for 2 ranks", HERE); + } + Kokkos::View send_array("send_array", 10); + Kokkos::View recv_array("recv_array", 10); + if (sender_rank == 0) { + Kokkos::deep_copy(send_array, 10); + } + else { + Kokkos::deep_copy(send_array, 20); + } + + auto send_array_host = Kokkos::create_mirror_view(send_array); + Kokkos::deep_copy(send_array_host, send_array); + auto host_recv_array = Kokkos::create_mirror_view(recv_array); + + MPI_Sendrecv(send_array.data(), send_array.extent(0), MPI_INT, neighbor_rank, 0, + recv_array.data(), recv_array.extent(0), MPI_INT, neighbor_rank, 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + // Print the received array + Kokkos::deep_copy(host_recv_array, recv_array); + for (int i = 0; i < 10; ++i) { + printf("Rank %d: Received %d\n", sender_rank, host_recv_array(i)); + } + + + std::cout << "Constructing the domain" << std::endl; + // Create a Metadomain object + const unsigned int ndomains = 2; + const std::vector global_decomposition = { + {-1, -1, -1} + }; + const std::vector global_ncells = { 32, 32, 32 }; + const boundaries_t global_extent = { + {0.0, 3.0}, + {0.0, 3.0}, + {0.0, 3.0} + }; + const boundaries_t global_flds_bc = { + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC} + }; + const boundaries_t global_prtl_bc = { + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC} + }; + const std::map metric_params = {}; + const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; + const double npart_to_send_frac = 0.01; + const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); + auto species = ntt::ParticleSpecies(1u, + "test_e", + 1.0f, + 1.0f, + maxnpart, + ntt::PrtlPusher::BORIS, + false, + ntt::Cooling::NONE); + auto metadomain = Metadomain>( + ndomains, + global_decomposition, + global_ncells, + global_extent, + global_flds_bc, + global_prtl_bc, + metric_params, + { species }); + + const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + auto timers = timer::Timers { { "Communication" }, nullptr, false }; + InitializeParticleArrays(*local_domain, npart); + // Timers for both the communication routines + auto total_time_elapsed_old = 0; + auto total_time_elapsed_new = 0; + + int seed_ind = 0; + int seed_tag = 1; + Kokkos::fence(); + + for (int i = 0; i < 10; ++i) { + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort new + Kokkos::fence(); + auto start_new = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticlesBuffer(*local_domain, &timers); + auto stop_new = std::chrono::high_resolution_clock::now(); + auto duration_new = std::chrono::duration_cast( + stop_new - start_new) + .count(); + total_time_elapsed_new += duration_new; + Kokkos::fence(); + } + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort old + Kokkos::fence(); + auto start_old = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticles(*local_domain, &timers); + auto stop_old = std::chrono::high_resolution_clock::now(); + auto duration_old = std::chrono::duration_cast( + stop_old - start_old) + .count(); + total_time_elapsed_old += duration_old; + Kokkos::fence(); + } + } + printf("Total time elapsed for old: %f us : %f us/prtl\n", + total_time_elapsed_old / 10.0, + total_time_elapsed_old / 10.0 * 1000 / npart); + printf("Total time elapsed for new: %f us : %f us/prtl\n", + total_time_elapsed_new / 10.0, + total_time_elapsed_new / 10.0 * 1000 / npart); + } + GlobalFinalize(); + return 0; +} + +/* + Buggy behavior: + Consider a single domain with a single mpi rank + Particle tag arrays is set to [0, 0, 1, 1, 2, 3, ...] for a single domain + CommunicateParticles() discounts all the dead particles and reassigns the + other tags to alive + CommunicateParticlesBuffer() only keeps the ParticleTag::Alive particles + and discounts the rest +*/ From 3f2674c23088ca344a9e61135f79815672d610ad Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 11:05:27 -0500 Subject: [PATCH 51/52] cleanup prep for release --- dev/nix/adios2.nix | 17 ---- setups/srpic/blob/blob.toml | 66 ------------- setups/srpic/blob/pgen.hpp | 103 -------------------- setups/{srpic => tests}/blob/blob.py | 0 setups/tests/blob/blob.toml | 76 +++++++-------- setups/{srpic => tests}/blob/nparts.py | 0 setups/tests/blob/pgen.hpp | 126 +++++++++++-------------- src/framework/containers/particles.cpp | 54 ----------- 8 files changed, 92 insertions(+), 350 deletions(-) delete mode 100644 setups/srpic/blob/blob.toml delete mode 100644 setups/srpic/blob/pgen.hpp rename setups/{srpic => tests}/blob/blob.py (100%) rename setups/{srpic => tests}/blob/nparts.py (100%) diff --git a/dev/nix/adios2.nix b/dev/nix/adios2.nix index f2cd4ca43..8ec1fd36c 100644 --- a/dev/nix/adios2.nix +++ b/dev/nix/adios2.nix @@ -52,23 +52,6 @@ pkgs.stdenv.mkDerivation { } ''; - # configurePhase = - # '' - # cmake -B build $src \ - # -D CMAKE_CXX_STANDARD=17 \ - # -D CMAKE_CXX_EXTENSIONS=OFF \ - # -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ - # -D BUILD_SHARED_LIBS=ON \ - # -D ADIOS2_USE_HDF5=${if hdf5 then "ON" else "OFF"} \ - # -D ADIOS2_USE_Python=OFF \ - # -D ADIOS2_USE_Fortran=OFF \ - # -D ADIOS2_USE_ZeroMQ=OFF \ - # -D BUILD_TESTING=OFF \ - # -D ADIOS2_BUILD_EXAMPLES=OFF \ - # -D ADIOS2_USE_MPI=${if mpi then "ON" else "OFF"} \ - # -D CMAKE_BUILD_TYPE=Release - # '' - buildPhase = '' cmake --build build -j ''; diff --git a/setups/srpic/blob/blob.toml b/setups/srpic/blob/blob.toml deleted file mode 100644 index 7a047f348..000000000 --- a/setups/srpic/blob/blob.toml +++ /dev/null @@ -1,66 +0,0 @@ -[simulation] - name = "blob" - engine = "srpic" - runtime = 100.0 - - [simulation.domain] - decomposition = [2, 1, 1] - -[grid] - resolution = [1024, 1024] - extent = [[-10.0, 10.0], [-10.0, 10.0]] - - [grid.metric] - metric = "minkowski" - - [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] - particles = [["PERIODIC"], ["PERIODIC"]] - -[scales] - larmor0 = 1.0 - skindepth0 = 1.0 - -[algorithms] - current_filters = 4 - - [algorithms.timestep] - CFL = 0.5 - -[particles] - ppc0 = 16.0 - - [[particles.species]] - label = "e-_p" - mass = 1.0 - charge = -1.0 - maxnpart = 1e7 - - [[particles.species]] - label = "e+_p" - mass = 1.0 - charge = 1.0 - maxnpart = 1e7 - -[setup] - temp_1 = 1e-4 - x1c = -5.0 - x2c = 0.0 - v_max = 50.0 - dr = 1.0 - -[output] - format = "hdf5" - interval_time = 1.0 - - [output.fields] - quantities = ["N_1", "N_2", "B", "E"] - - [output.particles] - enable = false - - [output.spectra] - enable = false - -[diagnostics] - colored_stdout = false diff --git a/setups/srpic/blob/pgen.hpp b/setups/srpic/blob/pgen.hpp deleted file mode 100644 index f7b7d71b5..000000000 --- a/setups/srpic/blob/pgen.hpp +++ /dev/null @@ -1,103 +0,0 @@ -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "enums.h" -#include "global.h" - -#include "arch/kokkos_aliases.h" -#include "arch/traits.h" - -#include "archetypes/energy_dist.h" -#include "archetypes/particle_injector.h" -#include "archetypes/problem_generator.h" -#include "framework/domain/domain.h" -#include "framework/domain/metadomain.h" - -namespace user { - using namespace ntt; - - template - struct CounterstreamEnergyDist : public arch::EnergyDistribution { - CounterstreamEnergyDist(const M& metric, real_t v_max) - : arch::EnergyDistribution { metric } - , v_max { v_max } {} - - Inline void operator()(const coord_t& x_Ph, - vec_t& v, - unsigned short sp) const override { - v[0] = v_max; - } - - private: - const real_t v_max; - }; - - template - struct GaussianDist : public arch::SpatialDistribution { - GaussianDist(const M& metric, real_t x1c, real_t x2c, real_t dr) - : arch::SpatialDistribution { metric } - , x1c { x1c } - , x2c { x2c } - , dr { dr } {} - - // to properly scale the number density, the probability should be normalized to 1 - Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - if (math::abs(x_Ph[0] - x1c) < dr && math::abs(x_Ph[1] - x2c) < dr) { - return 1.0; - } else { - return 0.0; - } - } - - private: - const real_t x1c, x2c, dr; - }; - - template - struct PGen : public arch::ProblemGenerator { - - // compatibility traits for the problem generator - static constexpr auto engines = traits::compatible_with::value; - static constexpr auto metrics = traits::compatible_with::value; - static constexpr auto dimensions = - traits::compatible_with::value; - - // for easy access to variables in the child class - using arch::ProblemGenerator::D; - using arch::ProblemGenerator::C; - using arch::ProblemGenerator::params; - - const real_t temp_1, x1c, x2c, dr, v_max; - - inline PGen(const SimulationParams& p, const Metadomain& global_domain) - : arch::ProblemGenerator { p } - , temp_1 { p.template get("setup.temp_1") } - , x1c { p.template get("setup.x1c") } - , x2c { p.template get("setup.x2c") } - , v_max { p.template get("setup.v_max") } - , dr { p.template get("setup.dr") } {} - - inline void InitPrtls(Domain& local_domain) { - const auto energy_dist = CounterstreamEnergyDist(local_domain.mesh.metric, - v_max); - const auto spatial_dist = GaussianDist(local_domain.mesh.metric, - x1c, - x2c, - dr); - const auto injector = - arch::NonUniformInjector( - energy_dist, - spatial_dist, - { 1, 2 }); - - arch::InjectNonUniform>( - params, - local_domain, - injector, - 1.0); - } - }; - -} // namespace user - -#endif diff --git a/setups/srpic/blob/blob.py b/setups/tests/blob/blob.py similarity index 100% rename from setups/srpic/blob/blob.py rename to setups/tests/blob/blob.py diff --git a/setups/tests/blob/blob.toml b/setups/tests/blob/blob.toml index fffa5fff1..7a047f348 100644 --- a/setups/tests/blob/blob.toml +++ b/setups/tests/blob/blob.toml @@ -1,32 +1,25 @@ [simulation] - name = "blob-1x1x2" - engine = "srpic" - runtime = 5.0 + name = "blob" + engine = "srpic" + runtime = 100.0 [simulation.domain] - decomposition = [1, 1, 2] + decomposition = [2, 1, 1] [grid] - resolution = [128, 192, 64] - # extent = [[1.0, 10.0]] - extent = [[-2.0, 2.0], [-3.0, 3.0], [-1.0, 1.0]] + resolution = [1024, 1024] + extent = [[-10.0, 10.0], [-10.0, 10.0]] [grid.metric] - # metric = "qspherical" metric = "minkowski" [grid.boundaries] - # fields = [["ATMOSPHERE", "ABSORB"]] - # particles = [["ATMOSPHERE", "ABSORB"]] - fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - - # [grid.boundaries.absorb] - # ds = 1.0 - + fields = [["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"]] + [scales] - larmor0 = 2e-5 - skindepth0 = 0.01 + larmor0 = 1.0 + skindepth0 = 1.0 [algorithms] current_filters = 4 @@ -35,32 +28,39 @@ CFL = 0.5 [particles] - ppc0 = 20.0 - # use_weights = true + ppc0 = 16.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e7 - pusher = "Boris" + label = "e-_p" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e7 - pusher = "Boris" + label = "e+_p" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 [setup] - xi_min = [0.55, 1.85, -0.25] - xi_max = [0.65, 2.3, -0.1] - v1 = [0.25, -0.55, 0.0] - v2 = [-0.75, -0.15, 0.0] - + temp_1 = 1e-4 + x1c = -5.0 + x2c = 0.0 + v_max = 50.0 + dr = 1.0 + [output] - format = "hdf5" - interval_time = 0.02 + format = "hdf5" + interval_time = 1.0 [output.fields] - quantities = ["Nppc_1", "Nppc_2", "E", "B", "J"] + quantities = ["N_1", "N_2", "B", "E"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + +[diagnostics] + colored_stdout = false diff --git a/setups/srpic/blob/nparts.py b/setups/tests/blob/nparts.py similarity index 100% rename from setups/srpic/blob/nparts.py rename to setups/tests/blob/nparts.py diff --git a/setups/tests/blob/pgen.hpp b/setups/tests/blob/pgen.hpp index d07240bfd..f7b7d71b5 100644 --- a/setups/tests/blob/pgen.hpp +++ b/setups/tests/blob/pgen.hpp @@ -10,107 +10,89 @@ #include "archetypes/energy_dist.h" #include "archetypes/particle_injector.h" #include "archetypes/problem_generator.h" -#include "archetypes/spatial_dist.h" +#include "framework/domain/domain.h" #include "framework/domain/metadomain.h" -#include - namespace user { using namespace ntt; template - struct Beam : public arch::EnergyDistribution { - Beam(const M& metric, - const std::vector& v1_vec, - const std::vector& v2_vec) - : arch::EnergyDistribution { metric } { - std::copy(v1_vec.begin(), v1_vec.end(), v1); - std::copy(v2_vec.begin(), v2_vec.end(), v2); - } - - Inline void operator()(const coord_t&, - vec_t& v_Ph, - unsigned short sp) const override { - if (sp == 1) { - v_Ph[0] = v1[0]; - v_Ph[1] = v1[1]; - v_Ph[2] = v1[2]; - } else { - v_Ph[0] = v2[0]; - v_Ph[1] = v2[1]; - v_Ph[2] = v2[2]; - } + struct CounterstreamEnergyDist : public arch::EnergyDistribution { + CounterstreamEnergyDist(const M& metric, real_t v_max) + : arch::EnergyDistribution { metric } + , v_max { v_max } {} + + Inline void operator()(const coord_t& x_Ph, + vec_t& v, + unsigned short sp) const override { + v[0] = v_max; } private: - vec_t v1; - vec_t v2; + const real_t v_max; }; template - struct PointDistribution : public arch::SpatialDistribution { - PointDistribution(const M& metric, - const std::vector& xi_min, - const std::vector& xi_max) - : arch::SpatialDistribution { metric } { - std::copy(xi_min.begin(), xi_min.end(), x_min); - std::copy(xi_max.begin(), xi_max.end(), x_max); - } - + struct GaussianDist : public arch::SpatialDistribution { + GaussianDist(const M& metric, real_t x1c, real_t x2c, real_t dr) + : arch::SpatialDistribution { metric } + , x1c { x1c } + , x2c { x2c } + , dr { dr } {} + + // to properly scale the number density, the probability should be normalized to 1 Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - auto fill = true; - for (auto d = 0u; d < M::Dim; ++d) { - fill &= x_Ph[d] > x_min[d] and x_Ph[d] < x_max[d]; + if (math::abs(x_Ph[0] - x1c) < dr && math::abs(x_Ph[1] - x2c) < dr) { + return 1.0; + } else { + return 0.0; } - return fill ? ONE : ZERO; } private: - tuple_t x_min; - tuple_t x_max; + const real_t x1c, x2c, dr; }; template struct PGen : public arch::ProblemGenerator { + // compatibility traits for the problem generator - static constexpr auto engines { traits::compatible_with::value }; - static constexpr auto metrics { - traits::compatible_with::value - }; - static constexpr auto dimensions { - traits::compatible_with::value - }; + static constexpr auto engines = traits::compatible_with::value; + static constexpr auto metrics = traits::compatible_with::value; + static constexpr auto dimensions = + traits::compatible_with::value; // for easy access to variables in the child class using arch::ProblemGenerator::D; using arch::ProblemGenerator::C; using arch::ProblemGenerator::params; - const std::vector xi_min; - const std::vector xi_max; - const std::vector v1; - const std::vector v2; - - inline PGen(const SimulationParams& p, const Metadomain& m) - : arch::ProblemGenerator(p) - , xi_min { p.template get>("setup.xi_min") } - , xi_max { p.template get>("setup.xi_max") } - , v1 { p.template get>("setup.v1") } - , v2 { p.template get>("setup.v2") } {} - - inline void InitPrtls(Domain& domain) { - const auto energy_dist = Beam(domain.mesh.metric, v1, v2); - const auto spatial_dist = PointDistribution(domain.mesh.metric, - xi_min, - xi_max); - const auto injector = arch::NonUniformInjector( - energy_dist, - spatial_dist, - { 1, 2 }); - - arch::InjectNonUniform>( + const real_t temp_1, x1c, x2c, dr, v_max; + + inline PGen(const SimulationParams& p, const Metadomain& global_domain) + : arch::ProblemGenerator { p } + , temp_1 { p.template get("setup.temp_1") } + , x1c { p.template get("setup.x1c") } + , x2c { p.template get("setup.x2c") } + , v_max { p.template get("setup.v_max") } + , dr { p.template get("setup.dr") } {} + + inline void InitPrtls(Domain& local_domain) { + const auto energy_dist = CounterstreamEnergyDist(local_domain.mesh.metric, + v_max); + const auto spatial_dist = GaussianDist(local_domain.mesh.metric, + x1c, + x2c, + dr); + const auto injector = + arch::NonUniformInjector( + energy_dist, + spatial_dist, + { 1, 2 }); + + arch::InjectNonUniform>( params, - domain, + local_domain, injector, 1.0); } diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index 50b410270..d78055824 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -235,60 +235,6 @@ namespace ntt { m_is_sorted = true; } - // template - // void Particles::PrintTags() { - // auto tag_h = Kokkos::create_mirror_view(tag); - // Kokkos::deep_copy(tag_h, tag); - // auto i1_h = Kokkos::create_mirror_view(i1); - // Kokkos::deep_copy(i1_h, i1); - // auto dx1_h = Kokkos::create_mirror_view(dx1); - // Kokkos::deep_copy(dx1_h, dx1); - // std::cout << "species " << label() << " [npart = " << npart() << "]" - // << std::endl; - // std::cout << "idxs: "; - // for (auto i = 0; i < IMIN(tag_h.extent(0), 30); ++i) { - // std::cout << std::setw(3) << i << " "; - // if (i == npart() - 1) { - // std::cout << "| "; - // } - // } - // if (tag_h.extent(0) > 30) { - // std::cout << "... " << std::setw(3) << tag_h.extent(0) - 1; - // } - // std::cout << std::endl << "tags: "; - // for (auto i = 0; i < IMIN(tag_h.extent(0), 30); ++i) { - // std::cout << std::setw(3) << (short)tag_h(i) << " "; - // if (i == npart() - 1) { - // std::cout << "| "; - // } - // } - // if (tag_h.extent(0) > 30) { - // std::cout << "..." << std::setw(3) << (short)tag_h(tag_h.extent(0) - 1); - // } - // std::cout << std::endl << "i1s : "; - // for (auto i = 0; i < IMIN(i1_h.extent(0), 30); ++i) { - // std::cout << std::setw(3) << i1_h(i) << " "; - // if (i == npart() - 1) { - // std::cout << "| "; - // } - // } - // if (i1_h.extent(0) > 30) { - // std::cout << "..." << std::setw(3) << i1_h(i1_h.extent(0) - 1); - // } - // std::cout << std::endl << "dx1s : "; - // for (auto i = 0; i < IMIN(dx1_h.extent(0), 30); ++i) { - // std::cout << std::setprecision(2) << std::setw(3) << dx1_h(i) << " "; - // if (i == npart() - 1) { - // std::cout << "| "; - // } - // } - // if (dx1_h.extent(0) > 30) { - // std::cout << "..." << std::setprecision(2) << std::setw(3) - // << dx1_h(dx1_h.extent(0) - 1); - // } - // std::cout << std::endl; - // } - template struct Particles; template struct Particles; template struct Particles; From 2b6ef3c5542c53225c60e5bd21585cc105fbca58 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 29 Jan 2025 12:04:54 -0500 Subject: [PATCH 52/52] tested with/without mpi cpu+hip --- src/framework/tests/particles.cpp | 12 +++-- src/global/tests/kokkos_aliases.cpp | 6 +-- src/kernels/particle_pusher_sr.hpp | 70 ++++++++++++++--------------- src/kernels/tests/deposit.cpp | 40 ++++++----------- src/kernels/tests/prtl_bc.cpp | 18 ++++---- 5 files changed, 68 insertions(+), 78 deletions(-) diff --git a/src/framework/tests/particles.cpp b/src/framework/tests/particles.cpp index 535198286..6c4c227b5 100644 --- a/src/framework/tests/particles.cpp +++ b/src/framework/tests/particles.cpp @@ -46,8 +46,10 @@ void testParticles(const int& index, raise::ErrorIf(p.tag.extent(0) != maxnpart, "tag incorrectly allocated", HERE); raise::ErrorIf(p.weight.extent(0) != maxnpart, "weight incorrectly allocated", HERE); - raise::ErrorIf(p.pld.extent(1) != npld, "pld incorrectly allocated", HERE); - raise::ErrorIf(p.pld.extent(0) != maxnpart, "pld incorrectly allocated", HERE); + if (npld > 0) { + raise::ErrorIf(p.pld.extent(0) != maxnpart, "pld incorrectly allocated", HERE); + raise::ErrorIf(p.pld.extent(1) != npld, "pld incorrectly allocated", HERE); + } if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { raise::ErrorIf(p.i2.extent(0) != maxnpart, "i2 incorrectly allocated", HERE); @@ -115,7 +117,8 @@ auto main(int argc, char** argv) -> int { 0.0, 100, PrtlPusher::PHOTON, - Cooling::NONE); + Cooling::NONE, + 5); testParticles(4, "e+", 1.0, @@ -129,7 +132,8 @@ auto main(int argc, char** argv) -> int { 1.0, 100, PrtlPusher::BORIS, - Cooling::NONE); + Cooling::NONE, + 1); } catch (const std::exception& e) { std::cerr << "Error: " << e.what() << std::endl; Kokkos::finalize(); diff --git a/src/global/tests/kokkos_aliases.cpp b/src/global/tests/kokkos_aliases.cpp index 56a17c50f..909b6b30c 100644 --- a/src/global/tests/kokkos_aliases.cpp +++ b/src/global/tests/kokkos_aliases.cpp @@ -3,6 +3,7 @@ #include "global.h" #include +#include #include #include @@ -44,8 +45,7 @@ auto main(int argc, char* argv[]) -> int { { // scatter arrays & ranges array_t a { "a", 100 }; - scatter_array_t a_scatter = Kokkos::Experimental::create_scatter_view( - a); + auto a_scatter = Kokkos::Experimental::create_scatter_view(a); Kokkos::parallel_for( // range_t({ 0 }, { 100 }), CreateRangePolicy({ 0 }, { 100 }), @@ -87,4 +87,4 @@ auto main(int argc, char* argv[]) -> int { Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index b4808f12a..2e8a5f652 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -227,41 +227,41 @@ namespace kernel::sr { const real_t coeff_sync; public: - Pusher_kernel(const PrtlPusher::type& pusher, - bool GCA, - bool ext_force, - CoolingTags cooling, - const ndfield_t& EB, - unsigned short sp, - array_t& i1, - array_t& i2, - array_t& i3, - array_t& i1_prev, - array_t& i2_prev, - array_t& i3_prev, - array_t& dx1, - array_t& dx2, - array_t& dx3, - array_t& dx1_prev, - array_t& dx2_prev, - array_t& dx3_prev, - array_t& ux1, - array_t& ux2, - array_t& ux3, - array_t& phi, - array_t& tag, - const M& metric, - const F& force, - real_t time, - real_t coeff, - real_t dt, - int ni1, - int ni2, - int ni3, - const boundaries_t& boundaries, - real_t gca_larmor_max, - real_t gca_eovrb_max, - real_t coeff_sync) + Pusher_kernel(const PrtlPusher::type& pusher, + bool GCA, + bool ext_force, + CoolingTags cooling, + const randacc_ndfield_t& EB, + unsigned short sp, + array_t& i1, + array_t& i2, + array_t& i3, + array_t& i1_prev, + array_t& i2_prev, + array_t& i3_prev, + array_t& dx1, + array_t& dx2, + array_t& dx3, + array_t& dx1_prev, + array_t& dx2_prev, + array_t& dx3_prev, + array_t& ux1, + array_t& ux2, + array_t& ux3, + array_t& phi, + array_t& tag, + const M& metric, + const F& force, + real_t time, + real_t coeff, + real_t dt, + int ni1, + int ni2, + int ni3, + const boundaries_t& boundaries, + real_t gca_larmor_max, + real_t gca_eovrb_max, + real_t coeff_sync) : pusher { pusher } , GCA { GCA } , ext_force { ext_force } diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index 9a8ae1cc6..ec364a313 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -29,8 +29,7 @@ void errorIf(bool condition, const std::string& message) { inline static constexpr auto epsilon = std::numeric_limits::epsilon(); -Inline auto equal(real_t a, real_t b, const char* msg = "", real_t acc = ONE) - -> bool { +Inline auto equal(real_t a, real_t b, const char* msg = "", real_t acc = ONE) -> bool { const auto eps = epsilon * acc; if (not cmp::AlmostEqual(a, b, eps)) { printf("%.12e != %.12e %s\n", a, b, msg); @@ -81,8 +80,6 @@ void testDeposit(const std::vector& res, array_t tag { "tag", 10 }; const real_t charge { 1.0 }, inv_dt { 1.0 }; - auto J_scat = Kokkos::Experimental::create_scatter_view(J); - const int i0 = 4, j0 = 4; const prtldx_t dxi = 0.53, dxf = 0.47; @@ -122,30 +119,19 @@ void testDeposit(const std::vector& res, put_value(weight, 1.0, 0); put_value(tag, ParticleTag::alive, 0); - Kokkos::parallel_for("CurrentsDeposit", - 10, + auto J_scat = Kokkos::Experimental::create_scatter_view(J); + + // clang-format off + Kokkos::parallel_for("CurrentsDeposit", 10, kernel::DepositCurrents_kernel(J_scat, - i1, - i2, - i3, - i1_prev, - i2_prev, - i3_prev, - dx1, - dx2, - dx3, - dx1_prev, - dx2_prev, - dx3_prev, - ux1, - ux2, - ux3, - phi, - weight, - tag, - metric, - charge, - inv_dt)); + i1, i2, i3, + i1_prev, i2_prev, i3_prev, + dx1, dx2, dx3, + dx1_prev, dx2_prev, dx3_prev, + ux1, ux2, ux3, + phi, weight, tag, + metric, charge, inv_dt)); + // clang-format on Kokkos::Experimental::contribute(J, J_scat); diff --git a/src/kernels/tests/prtl_bc.cpp b/src/kernels/tests/prtl_bc.cpp index c8f9eae04..14c1a9f54 100644 --- a/src/kernels/tests/prtl_bc.cpp +++ b/src/kernels/tests/prtl_bc.cpp @@ -201,9 +201,9 @@ void testPeriodicBC(const std::vector& res, // Particle boundaries auto boundaries = boundaries_t {}; boundaries = { - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC} + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC } }; real_t time = ZERO; @@ -343,18 +343,18 @@ auto main(int argc, char* argv[]) -> int { const std::vector res1d { 50 }; const boundaries_t ext1d { - {0.0, 1000.0}, + { 0.0, 1000.0 }, }; const std::vector res2d { 30, 20 }; const boundaries_t ext2d { - {-15.0, 15.0}, - {-10.0, 10.0}, + { -15.0, 15.0 }, + { -10.0, 10.0 }, }; const std::vector res3d { 10, 10, 10 }; const boundaries_t ext3d { - {0.0, 1.0}, - {0.0, 1.0}, - {0.0, 1.0} + { 0.0, 1.0 }, + { 0.0, 1.0 }, + { 0.0, 1.0 } }; testPeriodicBC>(res1d, ext1d, {}); testPeriodicBC>(res2d, ext2d, {});