diff --git a/.taplo.toml b/.taplo.toml new file mode 100644 index 000000000..423a47594 --- /dev/null +++ b/.taplo.toml @@ -0,0 +1,6 @@ +[formatting] + align_entries = true + indent_tables = true + indent_entries = true + trailing_newline = true + align_comments = true diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ee00d1b4..efd240993 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,23 +101,12 @@ if(${output}) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/adios2Config.cmake) find_or_fetch_dependency(adios2 FALSE) if(NOT DEFINED ENV{HDF5_ROOT}) - set(USE_CUSTOM_HDF5 OFF) if(DEFINED ENV{CONDA_PREFIX}) execute_process(COMMAND bash -c "conda list | grep \"hdf5\" -q" RESULT_VARIABLE HDF5_INSTALLED) if(HDF5_INSTALLED EQUAL 0) set(HDF5_ROOT $ENV{CONDA_PREFIX}) - else() - set(USE_CUSTOM_HDF5 ON) endif() - else() - set(USE_CUSTOM_HDF5 ON) - endif() - if(USE_CUSTOM_HDF5) - message( - FATAL_ERROR - "HDF5_ROOT is not set. Please set it to the root of the HDF5 installation" - ) endif() endif() find_package(HDF5 REQUIRED) @@ -134,6 +123,9 @@ link_libraries(${DEPENDENCIES}) if(TESTS) # ---------------------------------- Tests --------------------------------- # include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/tests.cmake) +elseif(BENCHMARK) + # ------------------------------ Benchmark --------------------------------- # + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/benchmark.cmake) else() # ----------------------------------- GUI ---------------------------------- # if(${gui}) diff --git a/TASKLIST.md b/TASKLIST.md index 069a7deb2..c12f60f4c 100644 --- a/TASKLIST.md +++ b/TASKLIST.md @@ -3,3 +3,7 @@ - [ ] removing temporary variables in interpolation - [ ] passing by value vs const ref in metric - [ ] return physical coords one-by-one instead of by passing full vector + +### Things to look into + +1. _h fields in mpi communication diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp new file mode 100644 index 000000000..98306c92b --- /dev/null +++ b/benchmark/benchmark.cpp @@ -0,0 +1,17 @@ +#include "global.h" + +#include +#include + +auto main(int argc, char* argv[]) -> int { + ntt::GlobalInitialize(argc, argv); + try { + // ... + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + GlobalFinalize(); + return 1; + } + GlobalFinalize(); + return 0; +} diff --git a/cmake/benchmark.cmake b/cmake/benchmark.cmake new file mode 100644 index 000000000..d2e8ca47c --- /dev/null +++ b/cmake/benchmark.cmake @@ -0,0 +1,24 @@ +set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) + +add_subdirectory(${SRC_DIR}/global ${CMAKE_CURRENT_BINARY_DIR}/global) +add_subdirectory(${SRC_DIR}/metrics ${CMAKE_CURRENT_BINARY_DIR}/metrics) +add_subdirectory(${SRC_DIR}/kernels ${CMAKE_CURRENT_BINARY_DIR}/kernels) +add_subdirectory(${SRC_DIR}/archetypes ${CMAKE_CURRENT_BINARY_DIR}/archetypes) +add_subdirectory(${SRC_DIR}/framework ${CMAKE_CURRENT_BINARY_DIR}/framework) + +if(${output}) + add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) + add_subdirectory(${SRC_DIR}/checkpoint ${CMAKE_CURRENT_BINARY_DIR}/checkpoint) +endif() + +set(exec benchmark.xc) +set(src ${CMAKE_CURRENT_SOURCE_DIR}/benchmark/benchmark.cpp) + +add_executable(${exec} ${src}) + +set(libs ntt_global ntt_metrics ntt_kernels ntt_archetypes ntt_framework) +if(${output}) + list(APPEND libs ntt_output ntt_checkpoint) +endif() +add_dependencies(${exec} ${libs}) +target_link_libraries(${exec} PRIVATE ${libs} stdc++fs) diff --git a/dev/nix/adios2.nix b/dev/nix/adios2.nix new file mode 100644 index 000000000..8ec1fd36c --- /dev/null +++ b/dev/nix/adios2.nix @@ -0,0 +1,67 @@ +{ + pkgs ? import { }, + hdf5 ? false, + mpi ? false, +}: + +let + name = "adios2"; + version = "2.10.2"; + cmakeFlags = { + CMAKE_CXX_STANDARD = "17"; + CMAKE_CXX_EXTENSIONS = "OFF"; + CMAKE_POSITION_INDEPENDENT_CODE = "TRUE"; + BUILD_SHARED_LIBS = "ON"; + ADIOS2_USE_HDF5 = if hdf5 then "ON" else "OFF"; + ADIOS2_USE_Python = "OFF"; + ADIOS2_USE_Fortran = "OFF"; + ADIOS2_USE_ZeroMQ = "OFF"; + BUILD_TESTING = "OFF"; + ADIOS2_BUILD_EXAMPLES = "OFF"; + ADIOS2_USE_MPI = if mpi then "ON" else "OFF"; + CMAKE_BUILD_TYPE = "Release"; + } // (if !mpi then { ADIOS2_HAVE_HDF5_VOL = "OFF"; } else { }); +in +pkgs.stdenv.mkDerivation { + pname = "${name}${if hdf5 then "-hdf5" else ""}${if mpi then "-mpi" else ""}"; + version = "${version}"; + src = pkgs.fetchgit { + url = "https://github.com/ornladios/ADIOS2/"; + rev = "v${version}"; + sha256 = "sha256-NVyw7xoPutXeUS87jjVv1YxJnwNGZAT4QfkBLzvQbwg="; + }; + + nativeBuildInputs = with pkgs; [ + cmake + perl + ]; + + propagatedBuildInputs = + [ + pkgs.gcc13 + ] + ++ (if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]) + ++ (if mpi then [ pkgs.openmpi ] else [ ]); + + configurePhase = '' + cmake -B build $src ${ + pkgs.lib.attrsets.foldlAttrs ( + acc: key: value: + acc + " -D ${key}=${value}" + ) "" cmakeFlags + } + ''; + + buildPhase = '' + cmake --build build -j + ''; + + installPhase = '' + sed -i '/if(CMAKE_INSTALL_COMPONENT/,/^[[:space:]]&endif()$/d' build/cmake/install/post/cmake_install.cmake + cmake --install build --prefix $out + chmod +x build/cmake/install/post/generate-adios2-config.sh + sh build/cmake/install/post/generate-adios2-config.sh $out + ''; + + enableParallelBuilding = true; +} diff --git a/dev/nix/kokkos.nix b/dev/nix/kokkos.nix new file mode 100644 index 000000000..cfe583c7a --- /dev/null +++ b/dev/nix/kokkos.nix @@ -0,0 +1,64 @@ +{ + pkgs ? import { }, + arch ? "native", + gpu ? "none", +}: + +let + gpuUpper = pkgs.lib.toUpper gpu; + name = "kokkos"; + version = "4.5.01"; + compilerPkgs = { + "HIP" = with pkgs.rocmPackages; [ + rocm-core + clr + rocthrust + rocprim + rocminfo + rocm-smi + ]; + "NONE" = [ + pkgs.gcc13 + ]; + }; + cmakeFlags = { + "HIP" = [ + "-D CMAKE_C_COMPILER=hipcc" + "-D CMAKE_CXX_COMPILER=hipcc" + ]; + "NONE" = [ ]; + }; + getArch = + _: + if gpu != "none" && arch == "native" then + throw "Please specify an architecture when the GPU support is enabled. Available architectures: https://kokkos.org/kokkos-core-wiki/keywords.html#architectures" + else + pkgs.lib.toUpper arch; + +in +pkgs.stdenv.mkDerivation { + pname = "${name}"; + version = "${version}"; + src = pkgs.fetchgit { + url = "https://github.com/kokkos/kokkos/"; + rev = "v${version}"; + sha256 = "sha256-cI2p+6J+8BRV5fXTDxxHTfh6P5PeeLUiF73o5zVysHQ="; + }; + + nativeBuildInputs = with pkgs; [ + cmake + ]; + + propagatedBuildInputs = compilerPkgs.${gpuUpper}; + + cmakeFlags = [ + "-D CMAKE_CXX_STANDARD=17" + "-D CMAKE_CXX_EXTENSIONS=OFF" + "-D CMAKE_POSITION_INDEPENDENT_CODE=TRUE" + "-D Kokkos_ARCH_${getArch { }}=ON" + (if gpu != "none" then "-D Kokkos_ENABLE_${gpuUpper}=ON" else "") + "-D CMAKE_BUILD_TYPE=Release" + ] ++ cmakeFlags.${gpuUpper}; + + enableParallelBuilding = true; +} diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix new file mode 100644 index 000000000..219da0038 --- /dev/null +++ b/dev/nix/shell.nix @@ -0,0 +1,48 @@ +{ + pkgs ? import { }, + mpi ? false, + hdf5 ? false, + gpu ? "none", + arch ? "native", +}: + +let + name = "entity-dev"; + adios2Pkg = (pkgs.callPackage ./adios2.nix { inherit pkgs mpi hdf5; }); + kokkosPkg = (pkgs.callPackage ./kokkos.nix { inherit pkgs arch gpu; }); +in +pkgs.mkShell { + name = "${name}-env"; + nativeBuildInputs = with pkgs; [ + zlib + cmake + + clang-tools + + adios2Pkg + kokkosPkg + + python312 + python312Packages.jupyter + + cmake-format + neocmakelsp + black + pyright + taplo + vscode-langservers-extracted + ]; + + LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath ([ + pkgs.stdenv.cc.cc + pkgs.zlib + ]); + + shellHook = '' + BLUE='\033[0;34m' + NC='\033[0m' + + echo "" + echo -e "${name} nix-shell activated" + ''; +} diff --git a/extern/Kokkos b/extern/Kokkos index 5fc08a9a7..abaec2e5d 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit 5fc08a9a7da14d8530f8c7035d008ef63ddb4e5c +Subproject commit abaec2e5da1e15e367e48d2a3aa649770e8bcc72 diff --git a/extern/adios2 b/extern/adios2 index a6e8314cc..b574cc9c2 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit a6e8314cc3c0b28d496b44dcd4f15685013b887b +Subproject commit b574cc9c29b19448ed9f279c4966c97740328441 diff --git a/extern/plog b/extern/plog index 85a871b13..94899e0b9 160000 --- a/extern/plog +++ b/extern/plog @@ -1 +1 @@ -Subproject commit 85a871b13be0bd1a9e0110744fa60cc9bd1e8380 +Subproject commit 94899e0b926ac1b0f4750bfbd495167b4a6ae9ef diff --git a/input.example.toml b/input.example.toml index 5ee34d65d..2f6d2b285 100644 --- a/input.example.toml +++ b/input.example.toml @@ -105,7 +105,7 @@ # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]) [["ATMOSPHERE", "ABSORB"]] # @note: In GR, the horizon boundary is set automatically (only specify bc @ rmax): [["ABSORB"]] particles = "" - + [grid.boundaries.absorb] # Size of the absorption layer in physical (code) units: # @type: float @@ -119,7 +119,7 @@ coeff = "" [grid.boundaries.atmosphere] - # @required: if ATMOSPHERE is one of the boundaries + # @required: if ATMOSPHERE is one of the boundaries # Temperature of the atmosphere in units of m0 c^2 # @type: float temperature = "" @@ -210,7 +210,7 @@ # @type: float: ~1 # @default: 1.0 correction = "" - + # @inferred: # - dt [= CFL * dx0] # @brief: timestep duration @@ -252,12 +252,11 @@ # @type: bool # @default: false use_weights = "" - # Timesteps between particle re-sorting: + # Timesteps between particle re-sorting (removing dead particles): # @type: unsigned int # @default: 100 - # @note: When MPI is enable, particles are sorted every step. - # @note: When `sort_interval` == 0, the sorting is disabled. - sort_interval = "" + # @note: set to 0 to disable re-sorting + clear_interval = "" # @inferred: # - nspec diff --git a/legacy/benchmark.cpp b/legacy/benchmark.cpp new file mode 100644 index 000000000..54fc17cf9 --- /dev/null +++ b/legacy/benchmark.cpp @@ -0,0 +1,273 @@ +#include "enums.h" +#include "global.h" + +#include "utils/error.h" + +#include "metrics/metric_base.h" +#include "metrics/minkowski.h" + +#include "framework/containers/species.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" + +#include + +#include "framework/domain/communications.cpp" +#include "mpi.h" +#include "mpi-ext.h" + +#define TIMER_START(label) \ + Kokkos::fence(); \ + auto start_##label = std::chrono::high_resolution_clock::now(); + +#define TIMER_STOP(label) \ + Kokkos::fence(); \ + auto stop_##label = std::chrono::high_resolution_clock::now(); \ + auto duration_##label = std::chrono::duration_cast( \ + stop_##label - start_##label) \ + .count(); \ + std::cout << "Timer [" #label "]: " << duration_##label << " microseconds" \ + << std::endl; + +/* + Test to check the performance of the new particle allocation scheme + - Create a metadomain object main() + - Set npart + initialize tags InitializeParticleArrays() + - 'Push' the particles by randomly updating the tags PushParticles() + - Communicate particles to neighbors and time the communication + - Compute the time taken for best of N iterations for the communication + */ +using namespace ntt; + +// Set npart and set the particle tags to alive +template +void InitializeParticleArrays(Domain& domain, const int npart) { + raise::ErrorIf(npart > domain.species[0].maxnpart(), + "Npart cannot be greater than maxnpart", + HERE); + const auto nspecies = domain.species.size(); + for (int i_spec = 0; i_spec < nspecies; i_spec++) { + domain.species[i_spec].set_npart(npart); + domain.species[i_spec].SyncHostDevice(); + auto& this_tag = domain.species[i_spec].tag; + Kokkos::parallel_for( + "Initialize particles", + npart, + Lambda(const std::size_t i) { this_tag(i) = ParticleTag::alive; }); + } + return; +} + +// Randomly reassign tags to particles for a fraction of particles +template +void PushParticles(Domain& domain, + const double send_frac, + const int seed_ind, + const int seed_tag) { + raise::ErrorIf(send_frac > 1.0, "send_frac cannot be greater than 1.0", HERE); + const auto nspecies = domain.species.size(); + for (int i_spec = 0; i_spec < nspecies; i_spec++) { + domain.species[i_spec].set_unsorted(); + const auto nparticles = domain.species[i_spec].npart(); + const auto nparticles_to_send = static_cast(send_frac * nparticles); + // Generate random indices to send + // Kokkos::Random_XorShift64_Pool<> random_pool(seed_ind); + Kokkos::View indices_to_send("indices_to_send", nparticles_to_send); + Kokkos::fill_random(indices_to_send, domain.random_pool, 0, nparticles); + // Generate random tags to send + // Kokkos::Random_XorShift64_Pool<> random_pool_tag(seed_tag); + Kokkos::View tags_to_send("tags_to_send", nparticles_to_send); + Kokkos::fill_random(tags_to_send, + domain.random_pool, + 0, + domain.species[i_spec].ntags()); + auto& this_tag = domain.species[i_spec].tag; + Kokkos::parallel_for( + "Push particles", + nparticles_to_send, + Lambda(const std::size_t i) { + auto prtl_to_send = indices_to_send(i); + auto tag_to_send = tags_to_send(i); + this_tag(prtl_to_send) = tag_to_send; + }); + domain.species[i_spec].npart_per_tag(); + domain.species[i_spec].SyncHostDevice(); + } + return; +} + +auto main(int argc, char* argv[]) -> int { + GlobalInitialize(argc, argv); + { + /* + MPI checks + */ + printf("Compile time check:\n"); +#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT + printf("This MPI library has CUDA-aware support.\n", MPIX_CUDA_AWARE_SUPPORT); +#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT + printf("This MPI library does not have CUDA-aware support.\n"); +#else + printf("This MPI library cannot determine if there is CUDA-aware support.\n"); +#endif /* MPIX_CUDA_AWARE_SUPPORT */ +printf("Run time check:\n"); +#if defined(MPIX_CUDA_AWARE_SUPPORT) + if (1 == MPIX_Query_cuda_support()) { + printf("This MPI library has CUDA-aware support.\n"); + } else { + printf("This MPI library does not have CUDA-aware support.\n"); + } +#else /* !defined(MPIX_CUDA_AWARE_SUPPORT) */ + printf("This MPI library cannot determine if there is CUDA-aware support.\n"); +#endif /* MPIX_CUDA_AWARE_SUPPORT */ + + /* + Test to send and receive Kokkos arrays + */ + int sender_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &sender_rank); + + int neighbor_rank = 0; + if (sender_rank == 0) { + neighbor_rank = 1; + } + else if (sender_rank == 1) { + neighbor_rank = 0; + } + else { + raise::Error("This test is only for 2 ranks", HERE); + } + Kokkos::View send_array("send_array", 10); + Kokkos::View recv_array("recv_array", 10); + if (sender_rank == 0) { + Kokkos::deep_copy(send_array, 10); + } + else { + Kokkos::deep_copy(send_array, 20); + } + + auto send_array_host = Kokkos::create_mirror_view(send_array); + Kokkos::deep_copy(send_array_host, send_array); + auto host_recv_array = Kokkos::create_mirror_view(recv_array); + + MPI_Sendrecv(send_array.data(), send_array.extent(0), MPI_INT, neighbor_rank, 0, + recv_array.data(), recv_array.extent(0), MPI_INT, neighbor_rank, 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + // Print the received array + Kokkos::deep_copy(host_recv_array, recv_array); + for (int i = 0; i < 10; ++i) { + printf("Rank %d: Received %d\n", sender_rank, host_recv_array(i)); + } + + + std::cout << "Constructing the domain" << std::endl; + // Create a Metadomain object + const unsigned int ndomains = 2; + const std::vector global_decomposition = { + {-1, -1, -1} + }; + const std::vector global_ncells = { 32, 32, 32 }; + const boundaries_t global_extent = { + {0.0, 3.0}, + {0.0, 3.0}, + {0.0, 3.0} + }; + const boundaries_t global_flds_bc = { + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC} + }; + const boundaries_t global_prtl_bc = { + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC} + }; + const std::map metric_params = {}; + const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; + const double npart_to_send_frac = 0.01; + const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); + auto species = ntt::ParticleSpecies(1u, + "test_e", + 1.0f, + 1.0f, + maxnpart, + ntt::PrtlPusher::BORIS, + false, + ntt::Cooling::NONE); + auto metadomain = Metadomain>( + ndomains, + global_decomposition, + global_ncells, + global_extent, + global_flds_bc, + global_prtl_bc, + metric_params, + { species }); + + const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + auto timers = timer::Timers { { "Communication" }, nullptr, false }; + InitializeParticleArrays(*local_domain, npart); + // Timers for both the communication routines + auto total_time_elapsed_old = 0; + auto total_time_elapsed_new = 0; + + int seed_ind = 0; + int seed_tag = 1; + Kokkos::fence(); + + for (int i = 0; i < 10; ++i) { + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort new + Kokkos::fence(); + auto start_new = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticlesBuffer(*local_domain, &timers); + auto stop_new = std::chrono::high_resolution_clock::now(); + auto duration_new = std::chrono::duration_cast( + stop_new - start_new) + .count(); + total_time_elapsed_new += duration_new; + Kokkos::fence(); + } + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort old + Kokkos::fence(); + auto start_old = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticles(*local_domain, &timers); + auto stop_old = std::chrono::high_resolution_clock::now(); + auto duration_old = std::chrono::duration_cast( + stop_old - start_old) + .count(); + total_time_elapsed_old += duration_old; + Kokkos::fence(); + } + } + printf("Total time elapsed for old: %f us : %f us/prtl\n", + total_time_elapsed_old / 10.0, + total_time_elapsed_old / 10.0 * 1000 / npart); + printf("Total time elapsed for new: %f us : %f us/prtl\n", + total_time_elapsed_new / 10.0, + total_time_elapsed_new / 10.0 * 1000 / npart); + } + GlobalFinalize(); + return 0; +} + +/* + Buggy behavior: + Consider a single domain with a single mpi rank + Particle tag arrays is set to [0, 0, 1, 1, 2, 3, ...] for a single domain + CommunicateParticles() discounts all the dead particles and reassigns the + other tags to alive + CommunicateParticlesBuffer() only keeps the ParticleTag::Alive particles + and discounts the rest +*/ diff --git a/setups/srpic/em_vacuum/em_vacuum.toml b/setups/srpic/em_vacuum/em_vacuum.toml index 156c8d308..23381b1c6 100644 --- a/setups/srpic/em_vacuum/em_vacuum.toml +++ b/setups/srpic/em_vacuum/em_vacuum.toml @@ -1,21 +1,21 @@ [simulation] - name = "em_vacuum" - engine = "srpic" + name = "em_vacuum" + engine = "srpic" runtime = 2.0 [grid] resolution = [256, 512] - extent = [[-1.0, 1.0], [-2.0, 2.0]] + extent = [[-1.0, 1.0], [-2.0, 2.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 0.1 + larmor0 = 0.1 skindepth0 = 0.01 [algorithms] @@ -28,12 +28,12 @@ [setup] amplitude = 1.0 - kx1 = 1 - kx2 = 1 - kx3 = 0 - + kx1 = 1 + kx2 = 1 + kx3 = 0 + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.1 [output.fields] diff --git a/setups/srpic/langmuir/langmuir.toml b/setups/srpic/langmuir/langmuir.toml index 2f3520fc5..b054a940d 100644 --- a/setups/srpic/langmuir/langmuir.toml +++ b/setups/srpic/langmuir/langmuir.toml @@ -1,21 +1,21 @@ [simulation] - name = "langmuir" - engine = "srpic" + name = "langmuir" + engine = "srpic" runtime = 1.0 [grid] resolution = [2048, 512] - extent = [[0.0, 1.0], [0.0, 0.25]] + extent = [[0.0, 1.0], [0.0, 0.25]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 0.1 + larmor0 = 0.1 skindepth0 = 0.01 [algorithms] @@ -28,24 +28,24 @@ ppc0 = 14.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e7 + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e7 + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 [setup] vmax = 0.1 - nx1 = 4 - nx2 = 2 - + nx1 = 4 + nx2 = 2 + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.0025 [output.fields] diff --git a/setups/srpic/magnetar/magnetar.toml b/setups/srpic/magnetar/magnetar.toml index 2a2260af5..fab2eb01c 100644 --- a/setups/srpic/magnetar/magnetar.toml +++ b/setups/srpic/magnetar/magnetar.toml @@ -1,17 +1,17 @@ [simulation] - name = "magnetar" - engine = "srpic" + name = "magnetar" + engine = "srpic" runtime = 50.0 [grid] - resolution = [2048,1024] - extent = [[1.0, 400.0]] + resolution = [2048, 1024] + extent = [[1.0, 400.0]] [grid.metric] metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] [grid.boundaries.absorb] @@ -19,13 +19,13 @@ [grid.boundaries.atmosphere] temperature = 0.1 - density = 40.0 - height = 0.02 - species = [1, 2] - ds = 0.5 + density = 40.0 + height = 0.02 + species = [1, 2] + ds = 0.5 [scales] - larmor0 = 1e-5 + larmor0 = 1e-5 skindepth0 = 0.01 [algorithms] @@ -36,59 +36,59 @@ [algorithms.gca] e_ovr_b_max = 0.9 - larmor_max = 100.0 + larmor_max = 100.0 [particles] - ppc0 = 4.0 - use_weights = true - sort_interval = 100 + ppc0 = 4.0 + use_weights = true + clear_interval = 100 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [setup] - Bsurf = 1.0 - omega = 0.0125 - pp_thres = 10.0 + Bsurf = 1.0 + omega = 0.0125 + pp_thres = 10.0 gamma_pairs = 1.75 [output] @@ -96,7 +96,7 @@ [output.fields] interval_time = 0.5 - quantities = ["N_1", "N_2", "N_3", "N_4", "N_5", "N_6", "B", "E", "J"] + quantities = ["N_1", "N_2", "N_3", "N_4", "N_5", "N_6", "B", "E", "J"] [output.particles] enable = false diff --git a/setups/srpic/magnetosphere/magnetosphere.toml b/setups/srpic/magnetosphere/magnetosphere.toml index 34e04b02d..4c7c9117d 100644 --- a/setups/srpic/magnetosphere/magnetosphere.toml +++ b/setups/srpic/magnetosphere/magnetosphere.toml @@ -1,31 +1,31 @@ [simulation] - name = "magnetosphere" - engine = "srpic" + name = "magnetosphere" + engine = "srpic" runtime = 60.0 [grid] resolution = [2048, 1024] - extent = [[1.0, 50.0]] + extent = [[1.0, 50.0]] [grid.metric] metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] - + [grid.boundaries.absorb] ds = 1.0 [grid.boundaries.atmosphere] temperature = 0.1 - density = 10.0 - height = 0.02 - species = [1, 2] - ds = 2.0 - + density = 10.0 + height = 0.02 + species = [1, 2] + ds = 2.0 + [scales] - larmor0 = 2e-5 + larmor0 = 2e-5 skindepth0 = 0.01 [algorithms] @@ -36,37 +36,37 @@ [algorithms.gca] e_ovr_b_max = 0.9 - larmor_max = 1.0 + larmor_max = 1.0 [particles] - ppc0 = 5.0 - use_weights = true - sort_interval = 100 + ppc0 = 5.0 + use_weights = true + clear_interval = 100 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e8 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e8 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e8 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e8 + pusher = "Boris,GCA" [setup] - Bsurf = 1.0 + Bsurf = 1.0 period = 60.0 [output] format = "hdf5" - + [output.fields] interval_time = 0.1 - quantities = ["N_1", "N_2", "E", "B", "T00"] + quantities = ["N_1", "N_2", "E", "B", "T00"] [output.particles] enable = false @@ -75,5 +75,5 @@ enable = false [diagnostics] - interval = 50 + interval = 50 colored_stdout = true diff --git a/setups/srpic/monopole/monopole.toml b/setups/srpic/monopole/monopole.toml index 169837489..cf735fce8 100644 --- a/setups/srpic/monopole/monopole.toml +++ b/setups/srpic/monopole/monopole.toml @@ -1,31 +1,31 @@ [simulation] - name = "monopole" - engine = "srpic" + name = "monopole" + engine = "srpic" runtime = 60.0 [grid] resolution = [2048, 1024] - extent = [[1.0, 50.0]] + extent = [[1.0, 50.0]] [grid.metric] metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] - + [grid.boundaries.absorb] ds = 1.0 [grid.boundaries.atmosphere] temperature = 0.1 - density = 10.0 - height = 0.02 - species = [1, 2] - ds = 2.0 - + density = 10.0 + height = 0.02 + species = [1, 2] + ds = 2.0 + [scales] - larmor0 = 2e-5 + larmor0 = 2e-5 skindepth0 = 0.01 [algorithms] @@ -36,38 +36,38 @@ [algorithms.gca] e_ovr_b_max = 0.9 - larmor_max = 1.0 + larmor_max = 1.0 [particles] - ppc0 = 5.0 - use_weights = true - sort_interval = 100 + ppc0 = 5.0 + use_weights = true + clear_interval = 100 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 + label = "e-" + mass = 1.0 + charge = -1.0 maxnpart = 1e8 - pusher = "Boris,GCA" + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 + label = "e+" + mass = 1.0 + charge = 1.0 maxnpart = 1e8 - pusher = "Boris,GCA" + pusher = "Boris,GCA" [setup] - Bsurf = 1.0 + Bsurf = 1.0 period = 60.0 [output] format = "hdf5" - + [output.fields] interval_time = 0.1 - quantities = ["N_1", "N_2", "E", "B", "T00"] - mom_smooth = 2 + quantities = ["N_1", "N_2", "E", "B", "T00"] + mom_smooth = 2 [output.particles] enable = false @@ -76,5 +76,5 @@ enable = false [diagnostics] - interval = 50 + interval = 50 colored_stdout = true diff --git a/setups/srpic/shock/shock.toml b/setups/srpic/shock/shock.toml index f48edb2d6..7b2cdde2c 100644 --- a/setups/srpic/shock/shock.toml +++ b/setups/srpic/shock/shock.toml @@ -1,21 +1,21 @@ [simulation] - name = "shock" - engine = "srpic" + name = "shock" + engine = "srpic" runtime = 50.0 [grid] resolution = [2048, 128] - extent = [[0.0, 10.0], [-0.3125, 0.3125]] + extent = [[0.0, 10.0], [-0.3125, 0.3125]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["CONDUCTOR", "ABSORB"], ["PERIODIC"]] + fields = [["CONDUCTOR", "ABSORB"], ["PERIODIC"]] particles = [["REFLECT", "ABSORB"], ["PERIODIC"]] - + [scales] - larmor0 = 1e-2 + larmor0 = 1e-2 skindepth0 = 1e-2 [algorithms] @@ -28,24 +28,24 @@ ppc0 = 16.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 + label = "e-" + mass = 1.0 + charge = -1.0 maxnpart = 1e8 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 + label = "e+" + mass = 1.0 + charge = 1.0 maxnpart = 1e8 [setup] - drift_ux = 0.1 + drift_ux = 0.1 temperature = 1e-3 [output] interval_time = 0.1 - format = "hdf5" - + format = "hdf5" + [output.fields] quantities = ["N_1", "N_2", "E", "B", "T0i_1", "T0i_2", "J"] diff --git a/setups/srpic/turbulence/turbulence.toml b/setups/srpic/turbulence/turbulence.toml index a28afde15..a1f8e29c1 100644 --- a/setups/srpic/turbulence/turbulence.toml +++ b/setups/srpic/turbulence/turbulence.toml @@ -1,21 +1,21 @@ [simulation] - name = "turbulence" - engine = "srpic" + name = "turbulence" + engine = "srpic" runtime = 20.0 [grid] resolution = [184, 184, 184] - extent = [[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]] + extent = [[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 0.02 + larmor0 = 0.02 skindepth0 = 0.02 [algorithms] @@ -28,22 +28,22 @@ ppc0 = 32.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e8 + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e8 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e8 + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e8 [setup] - + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.1 - + [output.fields] quantities = ["N_1", "N_2", "E", "B", "J", "T00_1", "T00_2"] diff --git a/setups/srpic/weibel/weibel.toml b/setups/srpic/weibel/weibel.toml index c8e2506f6..23d119b24 100644 --- a/setups/srpic/weibel/weibel.toml +++ b/setups/srpic/weibel/weibel.toml @@ -1,21 +1,21 @@ [simulation] - name = "weibel" - engine = "srpic" + name = "weibel" + engine = "srpic" runtime = 100.0 [grid] resolution = [512, 512] - extent = [[-10.0, 10.0], [-10.0, 10.0]] + extent = [[-10.0, 10.0], [-10.0, 10.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 1.0 + larmor0 = 1.0 skindepth0 = 1.0 [algorithms] @@ -28,37 +28,37 @@ ppc0 = 16.0 [[particles.species]] - label = "e-_p" - mass = 1.0 - charge = -1.0 + label = "e-_p" + mass = 1.0 + charge = -1.0 maxnpart = 1e7 [[particles.species]] - label = "e+_p" - mass = 1.0 - charge = 1.0 + label = "e+_p" + mass = 1.0 + charge = 1.0 maxnpart = 1e7 [[particles.species]] - label = "e-_b" - mass = 1.0 - charge = -1.0 + label = "e-_b" + mass = 1.0 + charge = -1.0 maxnpart = 1e7 [[particles.species]] - label = "e+_b" - mass = 1.0 - charge = 1.0 + label = "e+_b" + mass = 1.0 + charge = 1.0 maxnpart = 1e7 [setup] - drift_u_1 = 0.2 - drift_u_2 = 0.2 - temp_1 = 1e-4 - temp_2 = 1e-4 - + drift_u_1 = 0.2 + drift_u_2 = 0.2 + temp_1 = 1e-4 + temp_2 = 1e-4 + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.25 [output.fields] diff --git a/setups/tests/blob/blob.py b/setups/tests/blob/blob.py new file mode 100644 index 000000000..77337d3b2 --- /dev/null +++ b/setups/tests/blob/blob.py @@ -0,0 +1,62 @@ +import h5py +import numpy as np +import matplotlib.pyplot as plt + +f = open("report", "r") +Lines = f.readlines() +f.close() + +em_new = [] +ep_new = [] +time_new = [] +for i in range (len(Lines)): + line = Lines[i] + line = line.strip() + arr = line.split() + + if (len(arr)>0 and arr[0]=='species'): + nparts = arr[2].split("..") + if (nparts[0]=="(e-_p)"): + em_new.append(float(nparts[-1])) + if (nparts[0]=="(e+_p)"): + ep_new.append(float(nparts[-1])) + + if (len(arr)>0 and arr[0]=='Time:'): + time_new.append(float(arr[1])) + +f = h5py.File('blob.h5', 'r') + +Nsteps = len(f.keys()) +print(list(f['Step0'].keys())) + +for i in range (Nsteps): + print (i) + fig = plt.figure(dpi=300, figsize=(8,8), facecolor='white') + + densMax = max(np.max(f['Step'+str(i)]['fN_1']),np.max(f['Step'+str(i)]['fN_2'])) + print(densMax) + ax1 = fig.add_axes([0.05,0.05,0.4,0.4]) + im1=ax1.pcolormesh(f['Step'+str(i)]['X1'],f['Step'+str(i)]['X2'],f['Step'+str(i)]['fN_1'],cmap='turbo',vmin=0,vmax=1.0) + ax1.set_title(r"$N_1$") + ax1.vlines(0,-10.0,10.0,color='white') + + ax1 = fig.add_axes([0.48,0.05,0.4,0.4]) + ax1.pcolormesh(f['Step'+str(i)]['X1'],f['Step'+str(i)]['X2'],f['Step'+str(i)]['fN_2'],cmap='turbo',vmin=0,vmax=1.0) + ax1.set_yticklabels([]) + ax1.set_title(r"$N_2$") + ax1.vlines(0,-10.0,10.0,color='white') + + ax4cb = fig.add_axes([0.89, 0.05, 0.01, 0.4]) + cbar4 = fig.colorbar(im1,cax=ax4cb) + + ax1= fig.add_axes([0.05,0.5,0.83,0.4]) + ax1.plot(time_new,em_new, color='blue', label=r'$e^-$, new') + ax1.plot(time_new,ep_new, color='red', label=r'$e^+$, new') + ax1.legend() + ax1.set_ylim(0,1.8e5) + ax1.set_xlim(0,100) + ax1.vlines(i, 0,1.8e5, color='green',linewidth=0.6) + + + fig.savefig("%05d"%i+".png",dpi=300,bbox_inches='tight') + plt.close() diff --git a/setups/tests/blob/blob.toml b/setups/tests/blob/blob.toml index fffa5fff1..7a047f348 100644 --- a/setups/tests/blob/blob.toml +++ b/setups/tests/blob/blob.toml @@ -1,32 +1,25 @@ [simulation] - name = "blob-1x1x2" - engine = "srpic" - runtime = 5.0 + name = "blob" + engine = "srpic" + runtime = 100.0 [simulation.domain] - decomposition = [1, 1, 2] + decomposition = [2, 1, 1] [grid] - resolution = [128, 192, 64] - # extent = [[1.0, 10.0]] - extent = [[-2.0, 2.0], [-3.0, 3.0], [-1.0, 1.0]] + resolution = [1024, 1024] + extent = [[-10.0, 10.0], [-10.0, 10.0]] [grid.metric] - # metric = "qspherical" metric = "minkowski" [grid.boundaries] - # fields = [["ATMOSPHERE", "ABSORB"]] - # particles = [["ATMOSPHERE", "ABSORB"]] - fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - - # [grid.boundaries.absorb] - # ds = 1.0 - + fields = [["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"]] + [scales] - larmor0 = 2e-5 - skindepth0 = 0.01 + larmor0 = 1.0 + skindepth0 = 1.0 [algorithms] current_filters = 4 @@ -35,32 +28,39 @@ CFL = 0.5 [particles] - ppc0 = 20.0 - # use_weights = true + ppc0 = 16.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e7 - pusher = "Boris" + label = "e-_p" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e7 - pusher = "Boris" + label = "e+_p" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 [setup] - xi_min = [0.55, 1.85, -0.25] - xi_max = [0.65, 2.3, -0.1] - v1 = [0.25, -0.55, 0.0] - v2 = [-0.75, -0.15, 0.0] - + temp_1 = 1e-4 + x1c = -5.0 + x2c = 0.0 + v_max = 50.0 + dr = 1.0 + [output] - format = "hdf5" - interval_time = 0.02 + format = "hdf5" + interval_time = 1.0 [output.fields] - quantities = ["Nppc_1", "Nppc_2", "E", "B", "J"] + quantities = ["N_1", "N_2", "B", "E"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + +[diagnostics] + colored_stdout = false diff --git a/setups/tests/blob/nparts.py b/setups/tests/blob/nparts.py new file mode 100644 index 000000000..e759422c0 --- /dev/null +++ b/setups/tests/blob/nparts.py @@ -0,0 +1,38 @@ +import h5py +import numpy as np +import matplotlib.pyplot as plt + +f = open("report", "r") +Lines = f.readlines() +f.close() + +em_new = [] +ep_new = [] +time_new = [] +for i in range (len(Lines)): + line = Lines[i] + line = line.strip() + arr = line.split() + + if (len(arr)>0 and arr[0]=='species'): + nparts = arr[2].split("..") + if (nparts[0]=="(e-_p)"): + em_new.append(float(nparts[-1])) + if (nparts[0]=="(e+_p)"): + ep_new.append(float(nparts[-1])) + + if (len(arr)>0 and arr[0]=='Time:'): + time_new.append(float(arr[1])) + + +fig = plt.figure(dpi=300, figsize=(8,8), facecolor='white') + +ax1= fig.add_axes([0.05,0.5,0.83,0.4]) +ax1.plot(time_new,em_new, color='blue', label=r'$e^-$, new') +ax1.plot(time_new,ep_new, color='red', label=r'$e^+$, new') +ax1.legend() +ax1.set_ylim(0,1.8e5) +ax1.set_xlim(0,100) + +fig.savefig("nparts.png",dpi=300,bbox_inches='tight') +plt.close() diff --git a/setups/tests/blob/pgen.hpp b/setups/tests/blob/pgen.hpp index d07240bfd..f7b7d71b5 100644 --- a/setups/tests/blob/pgen.hpp +++ b/setups/tests/blob/pgen.hpp @@ -10,107 +10,89 @@ #include "archetypes/energy_dist.h" #include "archetypes/particle_injector.h" #include "archetypes/problem_generator.h" -#include "archetypes/spatial_dist.h" +#include "framework/domain/domain.h" #include "framework/domain/metadomain.h" -#include - namespace user { using namespace ntt; template - struct Beam : public arch::EnergyDistribution { - Beam(const M& metric, - const std::vector& v1_vec, - const std::vector& v2_vec) - : arch::EnergyDistribution { metric } { - std::copy(v1_vec.begin(), v1_vec.end(), v1); - std::copy(v2_vec.begin(), v2_vec.end(), v2); - } - - Inline void operator()(const coord_t&, - vec_t& v_Ph, - unsigned short sp) const override { - if (sp == 1) { - v_Ph[0] = v1[0]; - v_Ph[1] = v1[1]; - v_Ph[2] = v1[2]; - } else { - v_Ph[0] = v2[0]; - v_Ph[1] = v2[1]; - v_Ph[2] = v2[2]; - } + struct CounterstreamEnergyDist : public arch::EnergyDistribution { + CounterstreamEnergyDist(const M& metric, real_t v_max) + : arch::EnergyDistribution { metric } + , v_max { v_max } {} + + Inline void operator()(const coord_t& x_Ph, + vec_t& v, + unsigned short sp) const override { + v[0] = v_max; } private: - vec_t v1; - vec_t v2; + const real_t v_max; }; template - struct PointDistribution : public arch::SpatialDistribution { - PointDistribution(const M& metric, - const std::vector& xi_min, - const std::vector& xi_max) - : arch::SpatialDistribution { metric } { - std::copy(xi_min.begin(), xi_min.end(), x_min); - std::copy(xi_max.begin(), xi_max.end(), x_max); - } - + struct GaussianDist : public arch::SpatialDistribution { + GaussianDist(const M& metric, real_t x1c, real_t x2c, real_t dr) + : arch::SpatialDistribution { metric } + , x1c { x1c } + , x2c { x2c } + , dr { dr } {} + + // to properly scale the number density, the probability should be normalized to 1 Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - auto fill = true; - for (auto d = 0u; d < M::Dim; ++d) { - fill &= x_Ph[d] > x_min[d] and x_Ph[d] < x_max[d]; + if (math::abs(x_Ph[0] - x1c) < dr && math::abs(x_Ph[1] - x2c) < dr) { + return 1.0; + } else { + return 0.0; } - return fill ? ONE : ZERO; } private: - tuple_t x_min; - tuple_t x_max; + const real_t x1c, x2c, dr; }; template struct PGen : public arch::ProblemGenerator { + // compatibility traits for the problem generator - static constexpr auto engines { traits::compatible_with::value }; - static constexpr auto metrics { - traits::compatible_with::value - }; - static constexpr auto dimensions { - traits::compatible_with::value - }; + static constexpr auto engines = traits::compatible_with::value; + static constexpr auto metrics = traits::compatible_with::value; + static constexpr auto dimensions = + traits::compatible_with::value; // for easy access to variables in the child class using arch::ProblemGenerator::D; using arch::ProblemGenerator::C; using arch::ProblemGenerator::params; - const std::vector xi_min; - const std::vector xi_max; - const std::vector v1; - const std::vector v2; - - inline PGen(const SimulationParams& p, const Metadomain& m) - : arch::ProblemGenerator(p) - , xi_min { p.template get>("setup.xi_min") } - , xi_max { p.template get>("setup.xi_max") } - , v1 { p.template get>("setup.v1") } - , v2 { p.template get>("setup.v2") } {} - - inline void InitPrtls(Domain& domain) { - const auto energy_dist = Beam(domain.mesh.metric, v1, v2); - const auto spatial_dist = PointDistribution(domain.mesh.metric, - xi_min, - xi_max); - const auto injector = arch::NonUniformInjector( - energy_dist, - spatial_dist, - { 1, 2 }); - - arch::InjectNonUniform>( + const real_t temp_1, x1c, x2c, dr, v_max; + + inline PGen(const SimulationParams& p, const Metadomain& global_domain) + : arch::ProblemGenerator { p } + , temp_1 { p.template get("setup.temp_1") } + , x1c { p.template get("setup.x1c") } + , x2c { p.template get("setup.x2c") } + , v_max { p.template get("setup.v_max") } + , dr { p.template get("setup.dr") } {} + + inline void InitPrtls(Domain& local_domain) { + const auto energy_dist = CounterstreamEnergyDist(local_domain.mesh.metric, + v_max); + const auto spatial_dist = GaussianDist(local_domain.mesh.metric, + x1c, + x2c, + dr); + const auto injector = + arch::NonUniformInjector( + energy_dist, + spatial_dist, + { 1, 2 }); + + arch::InjectNonUniform>( params, - domain, + local_domain, injector, 1.0); } diff --git a/src/checkpoint/reader.cpp b/src/checkpoint/reader.cpp index e89b7d384..9fc2d2640 100644 --- a/src/checkpoint/reader.cpp +++ b/src/checkpoint/reader.cpp @@ -35,16 +35,17 @@ namespace checkpoint { reader.Get(field_var, array_h.data(), adios2::Mode::Sync); Kokkos::deep_copy(array, array_h); } else { - raise::Error(fmt::format("Field variable: %s not found", field.c_str()), HERE); + raise::Error(fmt::format("Field variable: %s not found", field.c_str()), + HERE); } } - auto ReadParticleCount(adios2::IO& io, - adios2::Engine& reader, - unsigned short s, - std::size_t local_dom, - std::size_t ndomains) - -> std::pair { + auto ReadParticleCount( + adios2::IO& io, + adios2::Engine& reader, + unsigned short s, + std::size_t local_dom, + std::size_t ndomains) -> std::pair { logger::Checkpoint(fmt::format("Reading particle count for: %d", s + 1), HERE); auto npart_var = io.InquireVariable( fmt::format("s%d_npart", s + 1)); @@ -97,7 +98,7 @@ namespace checkpoint { fmt::format("s%d_%s", s + 1, quantity.c_str())); if (var) { var.SetSelection(adios2::Box({ offset }, { count })); - const auto slice = std::pair { 0, count }; + const auto slice = range_tuple_t(0, count); auto array_h = Kokkos::create_mirror_view(array); reader.Get(var, Kokkos::subview(array_h, slice).data(), adios2::Mode::Sync); Kokkos::deep_copy(Kokkos::subview(array, slice), @@ -109,6 +110,28 @@ namespace checkpoint { } } + void ReadParticlePayloads(adios2::IO& io, + adios2::Engine& reader, + unsigned short s, + array_t& array, + std::size_t nplds, + std::size_t count, + std::size_t offset) { + logger::Checkpoint(fmt::format("Reading quantity: s%d_plds", s + 1), HERE); + auto var = io.InquireVariable(fmt::format("s%d_plds", s + 1)); + if (var) { + var.SetSelection(adios2::Box({ offset, 0 }, { count, nplds })); + const auto slice = range_tuple_t(0, count); + auto array_h = Kokkos::create_mirror_view(array); + reader.Get(var, + Kokkos::subview(array_h, slice, range_tuple_t(0, nplds)).data(), + adios2::Mode::Sync); + Kokkos::deep_copy(array, array_h); + } else { + raise::Error(fmt::format("Variable: s%d_plds not found", s + 1), HERE); + } + } + template void ReadFields(adios2::IO&, adios2::Engine&, const std::string&, diff --git a/src/checkpoint/reader.h b/src/checkpoint/reader.h index 2ea11bdb1..e5a91ab75 100644 --- a/src/checkpoint/reader.h +++ b/src/checkpoint/reader.h @@ -45,6 +45,14 @@ namespace checkpoint { std::size_t, std::size_t); + void ReadParticlePayloads(adios2::IO&, + adios2::Engine&, + unsigned short, + array_t&, + std::size_t, + std::size_t, + std::size_t); + } // namespace checkpoint #endif // CHECKPOINT_READER_H diff --git a/src/checkpoint/tests/CMakeLists.txt b/src/checkpoint/tests/CMakeLists.txt index 10836554b..54400652e 100644 --- a/src/checkpoint/tests/CMakeLists.txt +++ b/src/checkpoint/tests/CMakeLists.txt @@ -25,5 +25,5 @@ endfunction() if(NOT ${mpi}) gen_test(checkpoint-nompi) else() - # gen_test(checkpoint-mpi) + gen_test(checkpoint-mpi) endif() diff --git a/src/checkpoint/tests/checkpoint-mpi.cpp b/src/checkpoint/tests/checkpoint-mpi.cpp index 3ce4bab14..f97202ab1 100644 --- a/src/checkpoint/tests/checkpoint-mpi.cpp +++ b/src/checkpoint/tests/checkpoint-mpi.cpp @@ -39,36 +39,53 @@ auto main(int argc, char* argv[]) -> int { // | | | // | 0 | 1 | // |------|------| - constexpr auto g_nx1 = 20; - constexpr auto g_nx2 = 15; - constexpr auto g_nx1_gh = g_nx1 + 4 * N_GHOSTS; - constexpr auto g_nx2_gh = g_nx2 + 4 * N_GHOSTS; + const std::size_t g_nx1 = 20; + const std::size_t g_nx2 = 15; + const std::size_t g_nx1_gh = g_nx1 + 4 * N_GHOSTS; + const std::size_t g_nx2_gh = g_nx2 + 4 * N_GHOSTS; - constexpr auto l_nx1 = 10; - constexpr auto l_nx2 = (rank < 2) ? 10 : 5; + const std::size_t l_nx1 = 10; + const std::size_t l_nx2 = (rank < 2) ? 10 : 5; - constexpr auto l_nx1_gh = l_nx1 + 2 * N_GHOSTS; - constexpr auto l_nx2_gh = l_nx2 + 2 * N_GHOSTS; + const std::size_t l_nx1_gh = l_nx1 + 2 * N_GHOSTS; + const std::size_t l_nx2_gh = l_nx2 + 2 * N_GHOSTS; - constexpr auto l_corner_x1 = (rank % 2) * l_nx1; - constexpr auto l_corner_x2 = (rank / 2) * l_nx2; + const std::size_t l_corner_x1 = (rank % 2 == 0) ? 0 : l_nx1_gh; + const std::size_t l_corner_x2 = (rank < 2) ? 0 : l_nx2_gh; - constexpr auto i1min = N_GHOSTS; - constexpr auto i2min = N_GHOSTS; - constexpr auto i1max = l_nx1 + N_GHOSTS; - constexpr auto i2max = l_nx2 + N_GHOSTS; + const std::size_t i1min = N_GHOSTS; + const std::size_t i2min = N_GHOSTS; + const std::size_t i1max = l_nx1 + N_GHOSTS; + const std::size_t i2max = l_nx2 + N_GHOSTS; - constexpr auto npart1 = (rank % 2 + rank) * 23 + 100; - constexpr auto npart2 = (rank % 2 + rank) * 37 + 100; + const std::size_t npart1 = (rank % 2 + rank) * 23 + 100; + const std::size_t npart2 = (rank % 2 + rank) * 37 + 100; + + std::size_t npart1_offset = 0; + std::size_t npart2_offset = 0; + + std::size_t npart1_globtot = 0; + std::size_t npart2_globtot = 0; + + for (auto r = 0; r < rank - 1; ++r) { + npart1_offset += (r % 2 + r) * 23 + 100; + npart2_offset += (r % 2 + r) * 37 + 100; + } + + for (auto r = 0; r < size; ++r) { + npart1_globtot += (r % 2 + r) * 23 + 100; + npart2_globtot += (r % 2 + r) * 37 + 100; + } // init data ndfield_t field1 { "fld1", l_nx1_gh, l_nx2_gh }; ndfield_t field2 { "fld2", l_nx1_gh, l_nx2_gh }; - array_t i1 { "i_1", npart1 }; - array_t u1 { "u_1", npart1 }; - array_t i2 { "i_2", npart2 }; - array_t u2 { "u_2", npart2 }; + array_t i1 { "i_1", npart1 }; + array_t u1 { "u_1", npart1 }; + array_t i2 { "i_2", npart2 }; + array_t u2 { "u_2", npart2 }; + array_t plds1 { "plds_1", npart1, 3 }; { // fill data @@ -93,8 +110,11 @@ auto main(int argc, char* argv[]) -> int { "fillPrtl1", npart1, Lambda(index_t p) { - u1(p) = static_cast(p); - i1(p) = static_cast(p); + u1(p) = static_cast(p); + i1(p) = static_cast(p); + plds1(p, 0) = static_cast(p); + plds1(p, 1) = static_cast(p * p); + plds1(p, 2) = static_cast(p * p * p); }); Kokkos::parallel_for( "fillPrtl2", @@ -115,8 +135,9 @@ auto main(int argc, char* argv[]) -> int { writer.defineFieldVariables(SimEngine::GRPIC, { g_nx1_gh, g_nx2_gh }, { l_corner_x1, l_corner_x2 }, - { l_nx1, l_nx2 }); - writer.defineParticleVariables(Coord::Sph, Dim::_2D, 2, { 0, 0 }); + { l_nx1_gh, l_nx2_gh }); + + writer.defineParticleVariables(Coord::Sph, Dim::_2D, 2, { 3, 0 }); writer.beginSaving(0, 0.0); @@ -126,41 +147,66 @@ auto main(int argc, char* argv[]) -> int { writer.savePerDomainVariable("s1_npart", 1, 0, npart1); writer.savePerDomainVariable("s2_npart", 1, 0, npart2); - writer.saveParticleQuantity("s1_i1", npart1, 0, npart1, i1); - writer.saveParticleQuantity("s1_ux1", npart1, 0, npart1, u1); - writer.saveParticleQuantity("s2_i1", npart2, 0, npart2, i2); - writer.saveParticleQuantity("s2_ux1", npart2, 0, npart2, u2); + writer.saveParticleQuantity("s1_i1", + npart1_globtot, + npart1_offset, + npart1, + i1); + writer.saveParticleQuantity("s1_ux1", + npart1_globtot, + npart1_offset, + npart1, + u1); + writer.saveParticleQuantity("s2_i1", + npart2_globtot, + npart2_offset, + npart2, + i2); + writer.saveParticleQuantity("s2_ux1", + npart2_globtot, + npart2_offset, + npart2, + u2); + + writer.saveParticlePayloads("s1_plds", + 3, + npart1_globtot, + npart1_offset, + npart1, + plds1); writer.endSaving(); } { // read checkpoint - ndfield_t field1_read { "fld1_read", nx1_gh, nx2_gh, nx3_gh }; - ndfield_t field2_read { "fld2_read", nx1_gh, nx2_gh, nx3_gh }; + ndfield_t field1_read { "fld1_read", l_nx1_gh, l_nx2_gh }; + ndfield_t field2_read { "fld2_read", l_nx1_gh, l_nx2_gh }; - array_t i1_read { "i_1", npart1 }; - array_t u1_read { "u_1", npart1 }; - array_t i2_read { "i_2", npart2 }; - array_t u2_read { "u_2", npart2 }; + array_t i1_read { "i_1", npart1 }; + array_t u1_read { "u_1", npart1 }; + array_t i2_read { "i_2", npart2 }; + array_t u2_read { "u_2", npart2 }; + array_t plds1_read { "plds_1", npart1, 3 }; adios2::IO io = adios.DeclareIO("checkpointRead"); adios2::Engine reader = io.Open("checkpoints/step-00000000.bp", adios2::Mode::Read); reader.BeginStep(); - auto fieldRange = adios2::Box({ 0, 0, 0, 0 }, - { nx1_gh, nx2_gh, nx3_gh, 6 }); - ReadFields(io, reader, "em", fieldRange, field1_read); - ReadFields(io, reader, "em0", fieldRange, field2_read); + auto fieldRange = adios2::Box({ l_corner_x1, l_corner_x2, 0 }, + { l_nx1_gh, l_nx2_gh, 6 }); + ReadFields(io, reader, "em", fieldRange, field1_read); + ReadFields(io, reader, "em0", fieldRange, field2_read); - auto [nprtl1, noff1] = ReadParticleCount(io, reader, 0, 0, 1); - auto [nprtl2, noff2] = ReadParticleCount(io, reader, 1, 0, 1); + auto [nprtl1, noff1] = ReadParticleCount(io, reader, 0, rank, size); + auto [nprtl2, noff2] = ReadParticleCount(io, reader, 1, rank, size); ReadParticleData(io, reader, "ux1", 0, u1_read, nprtl1, noff1); ReadParticleData(io, reader, "ux1", 1, u2_read, nprtl2, noff2); ReadParticleData(io, reader, "i1", 0, i1_read, nprtl1, noff1); ReadParticleData(io, reader, "i1", 1, i2_read, nprtl2, noff2); + ReadParticlePayloads(io, reader, 0, plds1_read, 3, nprtl1, noff1); reader.EndStep(); reader.Close(); @@ -168,15 +214,13 @@ auto main(int argc, char* argv[]) -> int { // check the validity Kokkos::parallel_for( "checkFields", - CreateRangePolicy({ 0, 0, 0 }, { nx1_gh, nx2_gh, nx3_gh }), - Lambda(index_t i1, index_t i2, index_t i3) { + CreateRangePolicy({ 0, 0 }, { l_nx1_gh, l_nx2_gh }), + Lambda(index_t i1, index_t i2) { for (int i = 0; i < 6; ++i) { - if (not cmp::AlmostEqual(field1(i1, i2, i3, i), - field1_read(i1, i2, i3, i))) { + if (not cmp::AlmostEqual(field1(i1, i2, i), field1_read(i1, i2, i))) { raise::KernelError(HERE, "Field1 read failed"); } - if (not cmp::AlmostEqual(field2(i1, i2, i3, i), - field2_read(i1, i2, i3, i))) { + if (not cmp::AlmostEqual(field2(i1, i2, i), field2_read(i1, i2, i))) { raise::KernelError(HERE, "Field2 read failed"); } } @@ -184,12 +228,12 @@ auto main(int argc, char* argv[]) -> int { raise::ErrorIf(npart1 != nprtl1, "Particle count 1 mismatch", HERE); raise::ErrorIf(npart2 != nprtl2, "Particle count 2 mismatch", HERE); - raise::ErrorIf(noff1 != 0, "Particle offset 1 mismatch", HERE); - raise::ErrorIf(noff2 != 0, "Particle offset 2 mismatch", HERE); + raise::ErrorIf(noff1 != npart1_offset, "Particle offset 1 mismatch", HERE); + raise::ErrorIf(noff2 != npart2_offset, "Particle offset 2 mismatch", HERE); Kokkos::parallel_for( "checkPrtl1", - npart1, + nprtl1, Lambda(index_t p) { if (not cmp::AlmostEqual(u1(p), u1_read(p))) { raise::KernelError(HERE, "u1 read failed"); @@ -197,10 +241,15 @@ auto main(int argc, char* argv[]) -> int { if (i1(p) != i1_read(p)) { raise::KernelError(HERE, "i1 read failed"); } + for (auto l = 0; l < 3; ++l) { + if (not cmp::AlmostEqual(plds1(p, l), plds1_read(p, l))) { + raise::KernelError(HERE, "plds1 read failed"); + } + } }); Kokkos::parallel_for( "checkPrtl2", - npart2, + nprtl2, Lambda(index_t p) { if (not cmp::AlmostEqual(u2(p), u2_read(p))) { raise::KernelError(HERE, "u2 read failed"); diff --git a/src/checkpoint/writer.cpp b/src/checkpoint/writer.cpp index 9ef0b51c7..a12e3ef26 100644 --- a/src/checkpoint/writer.cpp +++ b/src/checkpoint/writer.cpp @@ -84,6 +84,7 @@ namespace checkpoint { { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); + for (auto d { 0u }; d < dim; ++d) { m_io.DefineVariable(fmt::format("s%d_i%d", s + 1, d + 1), { adios2::UnknownDim }, @@ -102,18 +103,21 @@ namespace checkpoint { { adios2::UnknownDim }, { adios2::UnknownDim }); } + if (dim == Dim::_2D and C != ntt::Coord::Cart) { m_io.DefineVariable(fmt::format("s%d_phi", s + 1), { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); } + for (auto d { 0u }; d < 3; ++d) { m_io.DefineVariable(fmt::format("s%d_ux%d", s + 1, d + 1), { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); } + m_io.DefineVariable(fmt::format("s%d_tag", s + 1), { adios2::UnknownDim }, { adios2::UnknownDim }, @@ -122,11 +126,11 @@ namespace checkpoint { { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); - for (auto p { 0u }; p < nplds[s]; ++p) { - m_io.DefineVariable(fmt::format("s%d_pld%d", s + 1, p + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); + if (nplds[s] > 0) { + m_io.DefineVariable(fmt::format("s%d_plds", s + 1), + { adios2::UnknownDim, nplds[s] }, + { adios2::UnknownDim, 0 }, + { adios2::UnknownDim, nplds[s] }); } } } @@ -238,6 +242,25 @@ namespace checkpoint { m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); } + void Writer::saveParticlePayloads(const std::string& quantity, + std::size_t nplds, + std::size_t glob_total, + std::size_t loc_offset, + std::size_t loc_size, + const array_t& data) { + const auto slice = range_tuple_t(0, loc_size); + auto var = m_io.InquireVariable(quantity); + + var.SetShape({ glob_total, nplds }); + var.SetSelection( + adios2::Box({ loc_offset, 0 }, { loc_size, nplds })); + + auto data_h = Kokkos::create_mirror_view(data); + Kokkos::deep_copy(data_h, data); + auto data_sub = Kokkos::subview(data_h, slice, range_tuple_t(0, nplds)); + m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); + } + template void Writer::savePerDomainVariable(const std::string&, std::size_t, std::size_t, diff --git a/src/checkpoint/writer.h b/src/checkpoint/writer.h index 34b5f043f..346bee24a 100644 --- a/src/checkpoint/writer.h +++ b/src/checkpoint/writer.h @@ -69,10 +69,18 @@ namespace checkpoint { std::size_t, const array_t&); + void saveParticlePayloads(const std::string&, + std::size_t, + std::size_t, + std::size_t, + std::size_t, + const array_t&); + void defineFieldVariables(const ntt::SimEngine&, const std::vector&, const std::vector&, const std::vector&); + void defineParticleVariables(const ntt::Coord&, Dimension, std::size_t, diff --git a/src/engines/engine.hpp b/src/engines/engine.hpp index 5b7caa502..dac553dcd 100644 --- a/src/engines/engine.hpp +++ b/src/engines/engine.hpp @@ -55,10 +55,12 @@ namespace ntt { static_assert(user::PGen::is_pgen, "unrecognized problem generator"); protected: -#if MPI_ENABLED +#if defined(OUTPUT_ENABLED) + #if defined(MPI_ENABLED) adios2::ADIOS m_adios { MPI_COMM_WORLD }; -#else + #else adios2::ADIOS m_adios; + #endif #endif SimulationParams m_params; diff --git a/src/engines/engine_printer.cpp b/src/engines/engine_printer.cpp index 2608ea2f6..4b6ed42d7 100644 --- a/src/engines/engine_printer.cpp +++ b/src/engines/engine_printer.cpp @@ -105,8 +105,8 @@ namespace ntt { color::RESET); } - auto bytes_to_human_readable(std::size_t bytes) - -> std::pair { + auto bytes_to_human_readable( + std::size_t bytes) -> std::pair { const std::vector units { "B", "KB", "MB", "GB", "TB" }; std::size_t unit_idx = 0; auto size = static_cast(bytes); diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index bec5b8652..1db2de2ca 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -26,8 +26,8 @@ namespace ntt { "CurrentFiltering", "CurrentDeposit", "ParticlePusher", "FieldBoundaries", "ParticleBoundaries", "Communications", - "Injector", "Sorting", - "Custom", "Output", + "Injector", "Custom", + "PrtlClear", "Output", "Checkpoint" }, []() { Kokkos::fence(); @@ -37,9 +37,9 @@ namespace ntt { const auto diag_interval = m_params.get( "diagnostics.interval"); - auto time_history = pbar::DurationHistory { 1000 }; - const auto sort_interval = m_params.template get( - "particles.sort_interval"); + auto time_history = pbar::DurationHistory { 1000 }; + const auto clear_interval = m_params.template get( + "particles.clear_interval"); // main algorithm loop while (step < max_steps) { @@ -56,7 +56,8 @@ namespace ntt { }); timers.stop("Custom"); } - auto print_sorting = (sort_interval > 0 and step % sort_interval == 0); + auto print_prtl_clear = (clear_interval > 0 and + step % clear_interval == 0 and step > 0); // advance time & step time += dt; @@ -109,7 +110,7 @@ namespace ntt { m_metadomain.species_labels(), m_metadomain.l_npart_perspec(), m_metadomain.l_maxnpart_perspec(), - print_sorting, + print_prtl_clear, print_output, print_checkpoint, m_params.get("diagnostics.colored_stdout")); diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 78c8f371e..b54291540 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -80,8 +80,8 @@ namespace ntt { "algorithms.toggles.fieldsolver"); const auto deposit_enabled = m_params.template get( "algorithms.toggles.deposit"); - const auto sort_interval = m_params.template get( - "particles.sort_interval"); + const auto clear_interval = m_params.template get( + "particles.clear_interval"); if (step == 0) { // communicate fields and apply BCs on the first timestep @@ -102,6 +102,7 @@ namespace ntt { timers.start("FieldBoundaries"); FieldBoundaries(dom, BC::B); timers.stop("FieldBoundaries"); + Kokkos::fence(); } { @@ -126,9 +127,7 @@ namespace ntt { } timers.start("Communications"); - if ((sort_interval > 0) and (step % sort_interval == 0)) { - m_metadomain.CommunicateParticles(dom, &timers); - } + m_metadomain.CommunicateParticles(dom); timers.stop("Communications"); } @@ -169,6 +168,12 @@ namespace ntt { ParticleInjector(dom); timers.stop("Injector"); } + + if (clear_interval > 0 and step % clear_interval == 0 and step > 0) { + timers.start("PrtlClear"); + m_metadomain.RemoveDeadParticles(dom); + timers.stop("PrtlClear"); + } } /* algorithm substeps --------------------------------------------------- */ diff --git a/src/entity.cpp b/src/entity.cpp index 272635d68..79b2f1335 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -114,4 +114,4 @@ auto main(int argc, char* argv[]) -> int { } return 0; -} +} \ No newline at end of file diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index f0c64c4ee..d78055824 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -4,13 +4,18 @@ #include "global.h" #include "arch/kokkos_aliases.h" +#include "utils/numeric.h" #include "utils/sorting.h" #include "framework/containers/species.h" #include #include +#include +#include +#include +#include #include #include @@ -26,162 +31,208 @@ namespace ntt { const Cooling& cooling, unsigned short npld) : ParticleSpecies(index, label, m, ch, maxnpart, pusher, use_gca, cooling, npld) { - i1 = array_t { label + "_i1", maxnpart }; - i1_h = Kokkos::create_mirror_view(i1); - dx1 = array_t { label + "_dx1", maxnpart }; - dx1_h = Kokkos::create_mirror_view(dx1); - - i1_prev = array_t { label + "_i1_prev", maxnpart }; - dx1_prev = array_t { label + "_dx1_prev", maxnpart }; - - ux1 = array_t { label + "_ux1", maxnpart }; - ux1_h = Kokkos::create_mirror_view(ux1); - ux2 = array_t { label + "_ux2", maxnpart }; - ux2_h = Kokkos::create_mirror_view(ux2); - ux3 = array_t { label + "_ux3", maxnpart }; - ux3_h = Kokkos::create_mirror_view(ux3); - - weight = array_t { label + "_w", maxnpart }; - weight_h = Kokkos::create_mirror_view(weight); - - tag = array_t { label + "_tag", maxnpart }; - tag_h = Kokkos::create_mirror_view(tag); - - for (unsigned short n { 0 }; n < npld; ++n) { - pld.push_back(array_t("pld", maxnpart)); - pld_h.push_back(Kokkos::create_mirror_view(pld[n])); - } - if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - i2 = array_t { label + "_i2", maxnpart }; - i2_h = Kokkos::create_mirror_view(i2); - dx2 = array_t { label + "_dx2", maxnpart }; - dx2_h = Kokkos::create_mirror_view(dx2); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + i1 = array_t { label + "_i1", maxnpart }; + dx1 = array_t { label + "_dx1", maxnpart }; + i1_prev = array_t { label + "_i1_prev", maxnpart }; + dx1_prev = array_t { label + "_dx1_prev", maxnpart }; + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + i2 = array_t { label + "_i2", maxnpart }; + dx2 = array_t { label + "_dx2", maxnpart }; i2_prev = array_t { label + "_i2_prev", maxnpart }; dx2_prev = array_t { label + "_dx2_prev", maxnpart }; } - if ((D == Dim::_2D) && (C != Coord::Cart)) { - phi = array_t { label + "_phi", maxnpart }; - phi_h = Kokkos::create_mirror_view(phi); - } if constexpr (D == Dim::_3D) { - i3 = array_t { label + "_i3", maxnpart }; - i3_h = Kokkos::create_mirror_view(i3); - dx3 = array_t { label + "_dx3", maxnpart }; - dx3_h = Kokkos::create_mirror_view(dx3); - + i3 = array_t { label + "_i3", maxnpart }; + dx3 = array_t { label + "_dx3", maxnpart }; i3_prev = array_t { label + "_i3_prev", maxnpart }; dx3_prev = array_t { label + "_dx3_prev", maxnpart }; } + + ux1 = array_t { label + "_ux1", maxnpart }; + ux2 = array_t { label + "_ux2", maxnpart }; + ux3 = array_t { label + "_ux3", maxnpart }; + + weight = array_t { label + "_w", maxnpart }; + + tag = array_t { label + "_tag", maxnpart }; + + if (npld > 0) { + pld = array_t { label + "_pld", maxnpart, npld }; + } + + if ((D == Dim::_2D) && (C != Coord::Cart)) { + phi = array_t { label + "_phi", maxnpart }; + } } template - auto Particles::npart_per_tag() const -> std::vector { + auto Particles::NpartsPerTagAndOffsets() const + -> std::pair, array_t> { auto this_tag = tag; - array_t npart_tag("npart_tags", ntags()); + const auto num_tags = ntags(); + array_t npptag { "nparts_per_tag", ntags() }; - auto npart_tag_scatter = Kokkos::Experimental::create_scatter_view(npart_tag); + // count # of particles per each tag + auto npptag_scat = Kokkos::Experimental::create_scatter_view(npptag); Kokkos::parallel_for( "NpartPerTag", - npart(), + rangeActiveParticles(), Lambda(index_t p) { - auto npart_tag_scatter_access = npart_tag_scatter.access(); - npart_tag_scatter_access((int)(this_tag(p))) += 1; + auto npptag_acc = npptag_scat.access(); + if (this_tag(p) < 0 || this_tag(p) >= num_tags) { + raise::KernelError(HERE, "Invalid tag value"); + } + npptag_acc(this_tag(p)) += 1; }); - Kokkos::Experimental::contribute(npart_tag, npart_tag_scatter); + Kokkos::Experimental::contribute(npptag, npptag_scat); + + // copy the count to a vector on the host + auto npptag_h = Kokkos::create_mirror_view(npptag); + Kokkos::deep_copy(npptag_h, npptag); + std::vector npptag_vec(num_tags); + for (auto t { 0u }; t < num_tags; ++t) { + npptag_vec[t] = npptag_h(t); + } - auto npart_tag_host = Kokkos::create_mirror_view(npart_tag); - Kokkos::deep_copy(npart_tag_host, npart_tag); + // count the offsets on the host and copy to device + array_t tag_offsets("tag_offsets", num_tags - 3); + auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); - std::vector npart_tag_vec; - for (std::size_t t { 0 }; t < ntags(); ++t) { - npart_tag_vec.push_back(npart_tag_host(t)); + tag_offsets_h(0) = npptag_vec[2]; // offset for tag = 3 + for (auto t { 1u }; t < num_tags - 3; ++t) { + tag_offsets_h(t) = npptag_vec[t + 2] + tag_offsets_h(t - 1); } - return npart_tag_vec; + Kokkos::deep_copy(tag_offsets, tag_offsets_h); + + return { npptag_vec, tag_offsets }; + } + + template + void RemoveDeadInArray(array_t& arr, + const array_t& indices_alive) { + auto n_alive = indices_alive.extent(0); + auto buffer = Kokkos::View("buffer", n_alive); + Kokkos::parallel_for( + "PopulateBufferAlive", + n_alive, + Lambda(index_t p) { buffer(p) = arr(indices_alive(p)); }); + + Kokkos::deep_copy( + Kokkos::subview(arr, std::make_pair(static_cast(0), n_alive)), + buffer); + } + + template + void RemoveDeadInArray(array_t& arr, + const array_t& indices_alive) { + auto n_alive = indices_alive.extent(0); + auto buffer = array_t { "buffer", n_alive, arr.extent(1) }; + Kokkos::parallel_for( + "PopulateBufferAlive", + CreateRangePolicy({ 0, 0 }, { n_alive, arr.extent(1) }), + Lambda(index_t p, index_t l) { buffer(p, l) = arr(indices_alive(p), l); }); + + Kokkos::deep_copy( + Kokkos::subview(arr, + std::make_pair(static_cast(0), n_alive), + Kokkos::ALL), + buffer); } template - auto Particles::SortByTags() -> std::vector { - if (npart() == 0 || is_sorted()) { - return npart_per_tag(); - } - using KeyType = array_t; - using BinOp = sort::BinTag; - BinOp bin_op(ntags()); - auto slice = range_tuple_t(0, npart()); - Kokkos::BinSort Sorter(Kokkos::subview(tag, slice), bin_op, false); - Sorter.create_permute_vector(); - - Sorter.sort(Kokkos::subview(i1, slice)); - Sorter.sort(Kokkos::subview(dx1, slice)); - Sorter.sort(Kokkos::subview(i1_prev, slice)); - Sorter.sort(Kokkos::subview(dx1_prev, slice)); - Sorter.sort(Kokkos::subview(ux1, slice)); - Sorter.sort(Kokkos::subview(ux2, slice)); - Sorter.sort(Kokkos::subview(ux3, slice)); - - Sorter.sort(Kokkos::subview(tag, slice)); - Sorter.sort(Kokkos::subview(weight, slice)); - - for (unsigned short n { 0 }; n < npld(); ++n) { - Sorter.sort(Kokkos::subview(pld[n], slice)); - } + void Particles::RemoveDead() { + const auto n_part = npart(); + std::size_t n_alive = 0, n_dead = 0; + auto& this_tag = tag; + + Kokkos::parallel_reduce( + "CountDeadAlive", + rangeActiveParticles(), + Lambda(index_t p, std::size_t & nalive, std::size_t & ndead) { + nalive += (this_tag(p) == ParticleTag::alive); + ndead += (this_tag(p) == ParticleTag::dead); + if (this_tag(p) != ParticleTag::alive and this_tag(p) != ParticleTag::dead) { + raise::KernelError(HERE, "wrong particle tag"); + } + }, + n_alive, + n_dead); + + array_t indices_alive { "indices_alive", n_alive }; + array_t alive_counter { "counter_alive", 1 }; - if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - Sorter.sort(Kokkos::subview(i2, slice)); - Sorter.sort(Kokkos::subview(dx2, slice)); + Kokkos::parallel_for( + "AliveIndices", + rangeActiveParticles(), + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive) { + const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); + indices_alive(idx) = p; + } + }); - Sorter.sort(Kokkos::subview(i2_prev, slice)); - Sorter.sort(Kokkos::subview(dx2_prev, slice)); + { + auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); + Kokkos::deep_copy(alive_counter_h, alive_counter); + raise::ErrorIf(alive_counter_h(0) != n_alive, + "error in finding alive particle indices", + HERE); } - if constexpr (D == Dim::_3D) { - Sorter.sort(Kokkos::subview(i3, slice)); - Sorter.sort(Kokkos::subview(dx3, slice)); - Sorter.sort(Kokkos::subview(i3_prev, slice)); - Sorter.sort(Kokkos::subview(dx3_prev, slice)); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + RemoveDeadInArray(i1, indices_alive); + RemoveDeadInArray(i1_prev, indices_alive); + RemoveDeadInArray(dx1, indices_alive); + RemoveDeadInArray(dx1_prev, indices_alive); } - if ((D == Dim::_2D) && (C != Coord::Cart)) { - Sorter.sort(Kokkos::subview(phi, slice)); + if constexpr (D == Dim::_2D or D == Dim::_3D) { + RemoveDeadInArray(i2, indices_alive); + RemoveDeadInArray(i2_prev, indices_alive); + RemoveDeadInArray(dx2, indices_alive); + RemoveDeadInArray(dx2_prev, indices_alive); } - const auto np_per_tag = npart_per_tag(); - set_npart(np_per_tag[(short)(ParticleTag::alive)]); + if constexpr (D == Dim::_3D) { + RemoveDeadInArray(i3, indices_alive); + RemoveDeadInArray(i3_prev, indices_alive); + RemoveDeadInArray(dx3, indices_alive); + RemoveDeadInArray(dx3_prev, indices_alive); + } - m_is_sorted = true; - return np_per_tag; - } + RemoveDeadInArray(ux1, indices_alive); + RemoveDeadInArray(ux2, indices_alive); + RemoveDeadInArray(ux3, indices_alive); + RemoveDeadInArray(weight, indices_alive); - template - void Particles::SyncHostDevice() { - Kokkos::deep_copy(i1_h, i1); - Kokkos::deep_copy(dx1_h, dx1); - Kokkos::deep_copy(ux1_h, ux1); - Kokkos::deep_copy(ux2_h, ux2); - Kokkos::deep_copy(ux3_h, ux3); - - Kokkos::deep_copy(tag_h, tag); - Kokkos::deep_copy(weight_h, weight); - - for (auto n { 0 }; n < npld(); ++n) { - Kokkos::deep_copy(pld_h[n], pld[n]); + if constexpr (D == Dim::_2D && C != Coord::Cart) { + RemoveDeadInArray(phi, indices_alive); } - if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - Kokkos::deep_copy(i2_h, i2); - Kokkos::deep_copy(dx2_h, dx2); - } - if constexpr (D == Dim::_3D) { - Kokkos::deep_copy(i3_h, i3); - Kokkos::deep_copy(dx3_h, dx3); + if (npld() > 0) { + RemoveDeadInArray(pld, indices_alive); } - if ((D == Dim::_2D) && (C != Coord::Cart)) { - Kokkos::deep_copy(phi_h, phi); - } + Kokkos::Experimental::fill( + "TagAliveParticles", + AccelExeSpace(), + Kokkos::subview(this_tag, + std::make_pair(static_cast(0), n_alive)), + ParticleTag::alive); + + Kokkos::Experimental::fill( + "TagDeadParticles", + AccelExeSpace(), + Kokkos::subview(this_tag, std::make_pair(n_alive, n_alive + n_dead)), + ParticleTag::dead); + + set_npart(n_alive); + m_is_sorted = true; } template struct Particles; diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index b4831b64a..d84bd0cc9 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -48,31 +48,22 @@ namespace ntt { public: // Cell indices of the current particle - array_t i1, i2, i3; + array_t i1, i2, i3; // Displacement of a particle within the cell - array_t dx1, dx2, dx3; + array_t dx1, dx2, dx3; // Three spatial components of the covariant 4-velocity (physical units) - array_t ux1, ux2, ux3; + array_t ux1, ux2, ux3; // Particle weights. - array_t weight; + array_t weight; // Previous timestep coordinates - array_t i1_prev, i2_prev, i3_prev; - array_t dx1_prev, dx2_prev, dx3_prev; + array_t i1_prev, i2_prev, i3_prev; + array_t dx1_prev, dx2_prev, dx3_prev; // Array to tag the particles - array_t tag; - // Array to store the particle load - std::vector> pld; + array_t tag; + // Array to store the particle payloads + array_t pld; // phi coordinate (for axisymmetry) - array_t phi; - - // host mirrors - array_mirror_t i1_h, i2_h, i3_h; - array_mirror_t dx1_h, dx2_h, dx3_h; - array_mirror_t ux1_h, ux2_h, ux3_h; - array_mirror_t weight_h; - array_mirror_t phi_h; - array_mirror_t tag_h; - std::vector> pld_h; + array_t phi; // for empty allocation Particles() {} @@ -178,19 +169,26 @@ namespace ntt { footprint += sizeof(prtldx_t) * dx2_prev.extent(0); footprint += sizeof(prtldx_t) * dx3_prev.extent(0); footprint += sizeof(short) * tag.extent(0); - for (auto& p : pld) { - footprint += sizeof(real_t) * p.extent(0); - } - footprint += sizeof(real_t) * phi.extent(0); + footprint += sizeof(real_t) * pld.extent(0) * pld.extent(1); + footprint += sizeof(real_t) * phi.extent(0); return footprint; } /** * @brief Count the number of particles with a specific tag. - * @return The vector of counts for each tag. + * @return The vector of counts for each tag + offsets + * @note For instance, given the counts: 0 -> n0, 1 -> n1, 2 -> n2, 3 -> n3, + * ... it returns: + * ... [n0, n1, n2, n3, ...] of size ntags + * ... [n2, n2 + n3, n2 + n3 + n4, ...] of size ntags - 3 + * ... so in buffer array: + * ... tag=2 particles are offset by 0 + * ... tag=3 particles are offset by n2 + * ... tag=4 particles are offset by n2 + n3 + * ... etc. */ - [[nodiscard]] - auto npart_per_tag() const -> std::vector; + auto NpartsPerTagAndOffsets() const + -> std::pair, array_t>; /* setters -------------------------------------------------------------- */ /** @@ -213,15 +211,16 @@ namespace ntt { } /** - * @brief Sort particles by their tags. - * @return The vector of counts per each tag. + * @brief Move dead particles to the end of arrays */ - auto SortByTags() -> std::vector; + void RemoveDead(); /** * @brief Copy particle data from device to host. */ void SyncHostDevice(); + + // void PrintTags(); }; } // namespace ntt diff --git a/src/framework/domain/checkpoint.cpp b/src/framework/domain/checkpoint.cpp index 3d309c090..6dfb137db 100644 --- a/src/framework/domain/checkpoint.cpp +++ b/src/framework/domain/checkpoint.cpp @@ -242,13 +242,13 @@ namespace ntt { local_domain->species[s].weight); auto nplds = local_domain->species[s].npld(); - for (auto p { 0u }; p < nplds; ++p) { - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_pld%d", s + 1, p + 1), - glob_tot, - offset, - npart, - local_domain->species[s].pld[p]); + if (nplds > 0) { + g_checkpoint_writer.saveParticlePayloads(fmt::format("s%d_plds", s + 1), + nplds, + glob_tot, + offset, + npart, + local_domain->species[s].pld); } } } @@ -451,14 +451,16 @@ namespace ntt { domain.species[s].weight, loc_npart, offset_npart); - for (auto p { 0u }; p < domain.species[s].npld(); ++p) { - checkpoint::ReadParticleData(io, - reader, - fmt::format("pld%d", p + 1), - s, - domain.species[s].pld[p], - loc_npart, - offset_npart); + + const auto nplds = domain.species[s].npld(); + if (nplds > 0) { + checkpoint::ReadParticlePayloads(io, + reader, + s, + domain.species[s].pld, + nplds, + loc_npart, + offset_npart); } domain.species[s].set_npart(loc_npart); } // species loop diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 63dd8271a..e5bc2d21e 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -14,15 +14,20 @@ #include "enums.h" #include "global.h" +#include "arch/directions.h" #include "arch/kokkos_aliases.h" #include "arch/mpi_aliases.h" +#include "arch/mpi_tags.h" #include "utils/error.h" #include "framework/containers/particles.h" +#include "kernels/comm.hpp" + #include #include +#include #include namespace comm { @@ -52,10 +57,11 @@ namespace comm { (recv_rank == rank && recv_idx != idx), "Multiple-domain single-rank communication not yet implemented", HERE); - if ((send_idx == idx) and (recv_idx == idx)) { // trivial copy if sending to self and receiving from self + if (not additive) { + // simply filling the ghost cells if constexpr (D == Dim::_1D) { Kokkos::deep_copy(Kokkos::subview(fld, recv_slice[0], comps), @@ -65,6 +71,7 @@ namespace comm { Kokkos::subview(fld, recv_slice[0], recv_slice[1], comps), Kokkos::subview(fld, send_slice[0], send_slice[1], comps)); } else if constexpr (D == Dim::_3D) { + Kokkos::deep_copy( Kokkos::subview(fld, recv_slice[0], recv_slice[1], recv_slice[2], comps), Kokkos::subview(fld, send_slice[0], send_slice[1], send_slice[2], comps)); @@ -177,6 +184,7 @@ namespace comm { comps.second - comps.first); } } + if (send_rank >= 0 && recv_rank >= 0) { MPI_Sendrecv(send_fld.data(), nsend, @@ -197,6 +205,7 @@ namespace comm { send_rank, 0, MPI_COMM_WORLD); + } else if (recv_rank >= 0) { MPI_Recv(recv_fld.data(), nrecv, @@ -208,7 +217,9 @@ namespace comm { } else { raise::Error("CommunicateField called with negative ranks", HERE); } + if (recv_rank >= 0) { + // !TODO: perhaps directly recv to the fld? if (not additive) { if constexpr (D == Dim::_1D) { @@ -276,50 +287,10 @@ namespace comm { } } - template - void CommunicateParticleQuantity(array_t& arr, - int send_rank, - int recv_rank, - const range_tuple_t& send_slice, - const range_tuple_t& recv_slice) { - const std::size_t send_count = send_slice.second - send_slice.first; - const std::size_t recv_count = recv_slice.second - recv_slice.first; - if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and - (recv_count > 0)) { - MPI_Sendrecv(arr.data() + send_slice.first, - send_count, - mpi::get_type(), - send_rank, - 0, - arr.data() + recv_slice.first, - recv_count, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } else if ((send_rank >= 0) and (send_count > 0)) { - MPI_Send(arr.data() + send_slice.first, - send_count, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - } else if ((recv_rank >= 0) and (recv_count > 0)) { - MPI_Recv(arr.data() + recv_slice.first, - recv_count, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } - } - - void ParticleSendRecvCount(int send_rank, - int recv_rank, - const std::size_t& send_count, - std::size_t& recv_count) { + void ParticleSendRecvCount(int send_rank, + int recv_rank, + std::size_t send_count, + std::size_t& recv_count) { if ((send_rank >= 0) && (recv_rank >= 0)) { MPI_Sendrecv(&send_count, 1, @@ -349,96 +320,233 @@ namespace comm { } template - auto CommunicateParticles(Particles& species, - int send_rank, - int recv_rank, - const range_tuple_t& send_slice, - std::size_t& index_last) -> std::size_t { - if ((send_rank < 0) && (recv_rank < 0)) { - raise::Error("No send or recv in CommunicateParticles", HERE); - } - std::size_t recv_count { 0 }; - ParticleSendRecvCount(send_rank, - recv_rank, - send_slice.second - send_slice.first, - recv_count); - - raise::FatalIf((index_last + recv_count) >= species.maxnpart(), - "Too many particles to receive (cannot fit into maxptl)", - HERE); - const auto recv_slice = range_tuple_t({ index_last, index_last + recv_count }); - - CommunicateParticleQuantity(species.i1, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.dx1, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.i1_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.dx1_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - if constexpr (D == Dim::_2D || D == Dim::_3D) { - CommunicateParticleQuantity(species.i2, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.dx2, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.i2_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.dx2_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - } - if constexpr (D == Dim::_3D) { - CommunicateParticleQuantity(species.i3, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.dx3, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.i3_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.dx3_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - } - CommunicateParticleQuantity(species.ux1, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.ux2, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.ux3, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.weight, - send_rank, - recv_rank, - send_slice, - recv_slice); - if constexpr (D == Dim::_2D and C != Coord::Cart) { - CommunicateParticleQuantity(species.phi, - send_rank, - recv_rank, - send_slice, - recv_slice); + void CommunicateParticles(Particles& species, + const array_t& outgoing_indices, + const array_t& tag_offsets, + const std::vector& npptag_vec, + const std::vector& npptag_recv_vec, + const std::vector& send_ranks, + const std::vector& recv_ranks, + const dir::dirs_t& dirs_to_comm) { + // number of arrays of each type to send/recv + const unsigned short NREALS = 4 + static_cast( + D == Dim::_2D and C != Coord::Cart); + const unsigned short NINTS = 2 * static_cast(D); + const unsigned short NPRTLDX = 2 * static_cast(D); + const unsigned short NPLDS = species.npld(); + + // buffers to store recv data + const auto npart_alive = npptag_vec[ParticleTag::alive]; + const auto npart_dead = npptag_vec[ParticleTag::dead]; + const auto npart_send = outgoing_indices.extent(0) - npart_dead; + const auto npart_recv = std::accumulate(npptag_recv_vec.begin(), + npptag_recv_vec.end(), + static_cast(0)); + array_t recv_buff_int { "recv_buff_int", npart_recv * NINTS }; + array_t recv_buff_real { "recv_buff_real", npart_recv * NREALS }; + array_t recv_buff_prtldx { "recv_buff_prtldx", npart_recv * NPRTLDX }; + array_t recv_buff_pld; + + if (NPLDS > 0) { + recv_buff_pld = array_t { "recv_buff_pld", npart_recv * NPLDS }; } - for (auto p { 0 }; p < species.npld(); ++p) { - CommunicateParticleQuantity(species.pld[p], - send_rank, - recv_rank, - send_slice, - recv_slice); + + auto iteration = 0; + auto current_received = 0; + + for (const auto& direction : dirs_to_comm) { + const auto send_rank = send_ranks[iteration]; + const auto recv_rank = recv_ranks[iteration]; + const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto npart_send_in = npptag_vec[tag_send]; + const auto npart_recv_in = npptag_recv_vec[tag_recv - 2]; + if (send_rank < 0 and recv_rank < 0) { + continue; + } + array_t send_buff_int { "send_buff_int", npart_send_in * NINTS }; + array_t send_buff_real { "send_buff_real", npart_send_in * NREALS }; + array_t send_buff_prtldx { "send_buff_prtldx", + npart_send_in * NPRTLDX }; + array_t send_buff_pld; + if (NPLDS > 0) { + send_buff_pld = array_t { "send_buff_pld", npart_send_in * NPLDS }; + } + + auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); + Kokkos::deep_copy(tag_offsets_h, tag_offsets); + + std::size_t idx_offset = npart_dead; + if (tag_send > 2) { + idx_offset += tag_offsets_h(tag_send - 3); + } + // clang-format off + Kokkos::parallel_for( + "PopulatePrtlSendBuffer", + npart_send_in, + kernel::comm::PopulatePrtlSendBuffer_kernel( + send_buff_int, send_buff_real, send_buff_prtldx, send_buff_pld, + NINTS, NREALS, NPRTLDX, NPLDS, idx_offset, + species.i1, species.i1_prev, species.dx1, species.dx1_prev, + species.i2, species.i2_prev, species.dx2, species.dx2_prev, + species.i3, species.i3_prev, species.dx3, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.weight, species.phi, species.pld, species.tag, + outgoing_indices) + ); + // clang-format on + + const auto recv_offset_int = current_received * NINTS; + const auto recv_offset_real = current_received * NREALS; + const auto recv_offset_prtldx = current_received * NPRTLDX; + const auto recv_offset_pld = current_received * NPLDS; + + if ((send_rank >= 0) and (recv_rank >= 0) and (npart_send_in > 0) and + (npart_recv_in > 0)) { + raise::ErrorIf(recv_offset_int + npart_recv_in * NINTS > + recv_buff_int.extent(0), + "incorrect # of recv particles", + HERE); + MPI_Sendrecv(send_buff_int.data(), + npart_send_in * NINTS, + mpi::get_type(), + send_rank, + 0, + recv_buff_int.data() + recv_offset_int, + npart_recv_in * NINTS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buff_real.data(), + npart_send_in * NREALS, + mpi::get_type(), + send_rank, + 0, + recv_buff_real.data() + recv_offset_real, + npart_recv_in * NREALS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buff_prtldx.data(), + npart_send_in * NPRTLDX, + mpi::get_type(), + send_rank, + 0, + recv_buff_prtldx.data() + recv_offset_prtldx, + npart_recv_in * NPRTLDX, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + if (NPLDS > 0) { + MPI_Sendrecv(send_buff_pld.data(), + npart_send_in * NPLDS, + mpi::get_type(), + send_rank, + 0, + recv_buff_pld.data() + recv_offset_pld, + npart_recv_in * NPLDS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } else if ((send_rank >= 0) and (npart_send_in > 0)) { + MPI_Send(send_buff_int.data(), + npart_send_in * NINTS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + MPI_Send(send_buff_real.data(), + npart_send_in * NREALS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + MPI_Send(send_buff_prtldx.data(), + npart_send_in * NPRTLDX, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + if (NPLDS > 0) { + MPI_Send(send_buff_pld.data(), + npart_send_in * NPLDS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + } + } else if ((recv_rank >= 0) and (npart_recv_in > 0)) { + raise::ErrorIf(recv_offset_int + npart_recv_in * NINTS > + recv_buff_int.extent(0), + "incorrect # of recv particles", + HERE); + MPI_Recv(recv_buff_int.data() + recv_offset_int, + npart_recv_in * NINTS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Recv(recv_buff_real.data() + recv_offset_real, + npart_recv_in * NREALS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Recv(recv_buff_prtldx.data() + recv_offset_prtldx, + npart_recv_in * NPRTLDX, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + if (NPLDS > 0) { + MPI_Recv(recv_buff_pld.data() + recv_offset_pld, + npart_recv_in * NPLDS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + current_received += npart_recv_in; + iteration++; + + } // end direction loop + + // clang-format off + Kokkos::parallel_for( + "PopulateFromRecvBuffer", + npart_recv, + kernel::comm::ExtractReceivedPrtls_kernel( + recv_buff_int, recv_buff_real, recv_buff_prtldx, recv_buff_pld, + NINTS, NREALS, NPRTLDX, NPLDS, + species.npart(), + species.i1, species.i1_prev, species.dx1, species.dx1_prev, + species.i2, species.i2_prev, species.dx2, species.dx2_prev, + species.i3, species.i3_prev, species.dx3, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.weight, species.phi, species.pld, species.tag, + outgoing_indices) + ); + // clang-format on + + const auto npart = species.npart(); + const auto npart_holes = outgoing_indices.extent(0); + if (npart_recv > npart_holes) { + species.set_npart(npart + npart_recv - npart_holes); } - return recv_count; } } // namespace comm diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 60524eedd..7dc5d285a 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -20,10 +20,13 @@ #include "arch/mpi_tags.h" #include "framework/domain/comm_mpi.hpp" + #include "kernels/comm.hpp" #else #include "framework/domain/comm_nompi.hpp" #endif +#include + #include #include @@ -86,8 +89,8 @@ namespace ntt { } else { // no communication necessary return { - {0, -1}, - {0, -1} + { 0, -1 }, + { 0, -1 } }; } #if defined(MPI_ENABLED) @@ -110,8 +113,8 @@ namespace ntt { (void)send_rank; (void)recv_rank; return { - {send_ind, send_rank}, - {recv_ind, recv_rank} + { send_ind, send_rank }, + { recv_ind, recv_rank } }; } @@ -129,8 +132,8 @@ namespace ntt { const auto is_receiving = (recv_rank >= 0); if (not(is_sending or is_receiving)) { return { - {{ 0, -1 }, {}}, - {{ 0, -1 }, {}} + { { 0, -1 }, {} }, + { { 0, -1 }, {} } }; } auto send_slice = std::vector {}; @@ -196,8 +199,8 @@ namespace ntt { } return { - {{ send_ind, send_rank }, send_slice}, - {{ recv_ind, recv_rank }, recv_slice}, + { { send_ind, send_rank }, send_slice }, + { { recv_ind, recv_rank }, recv_slice }, }; } @@ -492,157 +495,151 @@ namespace ntt { } template - void Metadomain::CommunicateParticles(Domain& domain, - timer::Timers* timers) { - raise::ErrorIf(timers == nullptr, - "Timers not passed when Comm::Prtl called", - HERE); + void Metadomain::CommunicateParticles(Domain& domain) { +#if defined(MPI_ENABLED) logger::Checkpoint("Communicating particles\n", HERE); for (auto& species : domain.species) { - // at this point particles should already by tagged in the pusher - timers->start("Sorting"); - const auto npart_per_tag = species.SortByTags(); - timers->stop("Sorting"); -#if defined(MPI_ENABLED) - timers->start("Communications"); - // only necessary when MPI is enabled - /** - * index_last - * | - * alive new dead tag1 tag2 v dead - * [ 11111111 000000000 222222222 3333333 .... nnnnnnn 00000000 ... ] - * ^ ^ - * | | - * tag_offset[tag1] -----+ +----- tag_offset[tag1] + npart_per_tag[tag1] - * "send_pmin" "send_pmax" (after last element) - */ - auto tag_offset { npart_per_tag }; - for (std::size_t i { 1 }; i < tag_offset.size(); ++i) { - tag_offset[i] += tag_offset[i - 1]; - } - for (std::size_t i { 0 }; i < tag_offset.size(); ++i) { - tag_offset[i] -= npart_per_tag[i]; - } - auto index_last = tag_offset[tag_offset.size() - 1] + - npart_per_tag[npart_per_tag.size() - 1]; - for (auto& direction : dir::Directions::all) { + const auto ntags = species.ntags(); + + // at this point particles should already be tagged in the pusher + auto [npptag_vec, tag_offsets] = species.NpartsPerTagAndOffsets(); + const auto npart_dead = npptag_vec[ParticleTag::dead]; + const auto npart_alive = npptag_vec[ParticleTag::alive]; + + const auto npart = species.npart(); + const auto npart_holes = npart - npart_alive; + + // # of particles to receive per each tag (direction) + std::vector npptag_recv_vec(ntags - 2, 0); + // coordinate shifts per each direction + array_t shifts_in_x1 { "shifts_in_x1", ntags - 2 }; + array_t shifts_in_x2 { "shifts_in_x2", ntags - 2 }; + array_t shifts_in_x3 { "shifts_in_x3", ntags - 2 }; + auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); + auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); + auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); + + // all directions requiring communication + dir::dirs_t dirs_to_comm; + + // ranks & indices of meshblock to send/recv from + std::vector send_ranks, send_inds; + std::vector recv_ranks, recv_inds; + + // total # of reaceived particles from all directions + std::size_t npart_recv = 0u; + + for (const auto& direction : dir::Directions::all) { + // tags corresponding to the direction (both send & recv) + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); + + // get indices & ranks of send/recv meshblocks const auto [send_params, recv_params] = GetSendRecvParams(this, domain, direction, true); const auto [send_indrank, send_slice] = send_params; const auto [recv_indrank, recv_slice] = recv_params; const auto [send_ind, send_rank] = send_indrank; const auto [recv_ind, recv_rank] = recv_indrank; - if (send_rank < 0 and recv_rank < 0) { + + // skip if no communication is necessary + const auto is_sending = (send_rank >= 0); + const auto is_receiving = (recv_rank >= 0); + if (not is_sending and not is_receiving) { continue; } - const auto send_dir_tag = mpi::PrtlSendTag::dir2tag(direction); - const auto nsend = npart_per_tag[send_dir_tag]; - const auto send_pmin = tag_offset[send_dir_tag]; - const auto send_pmax = tag_offset[send_dir_tag] + nsend; - const auto recv_count = comm::CommunicateParticles( - species, - send_rank, - recv_rank, - { send_pmin, send_pmax }, - index_last); - if (recv_count > 0) { - if constexpr (D == Dim::_1D) { - int shift_in_x1 { 0 }; - if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active(in::x1); - } - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; - Kokkos::parallel_for( - "CommunicateParticles", - recv_count, - Lambda(index_t p) { - this_tag(index_last + p) = ParticleTag::alive; - this_i1(index_last + p) += shift_in_x1; - this_i1_prev(index_last + p) += shift_in_x1; - }); - } else if constexpr (D == Dim::_2D) { - int shift_in_x1 { 0 }, shift_in_x2 { 0 }; - if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active()[0]; - } - if ((-direction)[1] == -1) { - shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); - } else if ((-direction)[1] == 1) { - shift_in_x2 = domain.mesh.n_active(in::x2); - } - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; - Kokkos::parallel_for( - "CommunicateParticles", - recv_count, - Lambda(index_t p) { - this_tag(index_last + p) = ParticleTag::alive; - this_i1(index_last + p) += shift_in_x1; - this_i2(index_last + p) += shift_in_x2; - this_i1_prev(index_last + p) += shift_in_x1; - this_i2_prev(index_last + p) += shift_in_x2; - }); - } else if constexpr (D == Dim::_3D) { - int shift_in_x1 { 0 }, shift_in_x2 { 0 }, shift_in_x3 { 0 }; - if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active(in::x1); + dirs_to_comm.push_back(direction); + send_ranks.push_back(send_rank); + recv_ranks.push_back(recv_rank); + send_inds.push_back(send_ind); + recv_inds.push_back(recv_ind); + + // record the # of particles to-be-sent + const auto nsend = npptag_vec[tag_send]; + + // request the # of particles to-be-received ... + // ... and send the # of particles to-be-sent + std::size_t nrecv = 0; + comm::ParticleSendRecvCount(send_rank, recv_rank, nsend, nrecv); + npart_recv += nrecv; + npptag_recv_vec[tag_recv - 2] = nrecv; + + raise::ErrorIf((npart + npart_recv) >= species.maxnpart(), + "Too many particles to receive (cannot fit into maxptl)", + HERE); + + // if sending, record displacements to apply before + // ... tag_send - 2: because we only shift tags > 2 (i.e. no dead/alive) + if (is_sending) { + if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { + if (direction[0] == -1) { + // sending backwards in x1 (add sx1 of target meshblock) + shifts_in_x1_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( + in::x1); + } else if (direction[0] == 1) { + // sending forward in x1 (subtract sx1 of source meshblock) + shifts_in_x1_h(tag_send - 2) = -domain.mesh.n_active(in::x1); } - if ((-direction)[1] == -1) { - shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); - } else if ((-direction)[1] == 1) { - shift_in_x2 = domain.mesh.n_active(in::x2); + } + if constexpr (D == Dim::_2D || D == Dim::_3D) { + if (direction[1] == -1) { + shifts_in_x2_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( + in::x2); + } else if (direction[1] == 1) { + shifts_in_x2_h(tag_send - 2) = -domain.mesh.n_active(in::x2); } - if ((-direction)[2] == -1) { - shift_in_x3 = -subdomain(recv_ind).mesh.n_active(in::x3); - } else if ((-direction)[2] == 1) { - shift_in_x3 = domain.mesh.n_active(in::x3); + } + if constexpr (D == Dim::_3D) { + if (direction[2] == -1) { + shifts_in_x3_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( + in::x3); + } else if (direction[2] == 1) { + shifts_in_x3_h(tag_send - 2) = -domain.mesh.n_active(in::x3); } - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i3 = species.i3; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; - auto& this_i3_prev = species.i3_prev; - Kokkos::parallel_for( - "CommunicateParticles", - recv_count, - Lambda(index_t p) { - this_tag(index_last + p) = ParticleTag::alive; - this_i1(index_last + p) += shift_in_x1; - this_i2(index_last + p) += shift_in_x2; - this_i3(index_last + p) += shift_in_x3; - this_i1_prev(index_last + p) += shift_in_x1; - this_i2_prev(index_last + p) += shift_in_x2; - this_i3_prev(index_last + p) += shift_in_x3; - }); } - index_last += recv_count; - species.set_npart(index_last); } + } // end directions loop - Kokkos::deep_copy( - Kokkos::subview(species.tag, std::make_pair(send_pmin, send_pmax)), - ParticleTag::dead); - } - timers->stop("Communications"); - // !TODO: maybe there is a way to not sort twice - timers->start("Sorting"); + Kokkos::deep_copy(shifts_in_x1, shifts_in_x1_h); + Kokkos::deep_copy(shifts_in_x2, shifts_in_x2_h); + Kokkos::deep_copy(shifts_in_x3, shifts_in_x3_h); + + array_t outgoing_indices { "outgoing_indices", + npart - npart_alive }; + // clang-format off + Kokkos::parallel_for( + "PrepareOutgoingPrtls", + species.rangeActiveParticles(), + kernel::comm::PrepareOutgoingPrtls_kernel( + shifts_in_x1, shifts_in_x2, shifts_in_x3, + outgoing_indices, + npart, npart_alive, npart_dead, ntags, + species.i1, species.i1_prev, + species.i2, species.i2_prev, + species.i3, species.i3_prev, + species.tag, tag_offsets) + ); + // clang-format on + + comm::CommunicateParticles(species, + outgoing_indices, + tag_offsets, + npptag_vec, + npptag_recv_vec, + send_ranks, + recv_ranks, + dirs_to_comm); species.set_unsorted(); - species.SortByTags(); - timers->stop("Sorting"); + } // end species loop +#else + (void)domain; #endif + } + + template + void Metadomain::RemoveDeadParticles(Domain& domain) { + for (auto& species : domain.species) { + species.RemoveDead(); } } diff --git a/src/framework/domain/domain.h b/src/framework/domain/domain.h index 397907fef..bc7c6e4b5 100644 --- a/src/framework/domain/domain.h +++ b/src/framework/domain/domain.h @@ -65,7 +65,7 @@ namespace ntt { Mesh mesh; Fields fields; std::vector> species; - random_number_pool_t random_pool { constant::RandomSeed }; + random_number_pool_t random_pool; /** * @brief constructor for "empty" allocation of non-local domain placeholders @@ -81,6 +81,7 @@ namespace ntt { : mesh { ncells, extent, metric_params } , fields {} , species {} + , random_pool { constant::RandomSeed } , m_index { index } , m_offset_ndomains { offset_ndomains } , m_offset_ncells { offset_ncells } {} @@ -95,6 +96,7 @@ namespace ntt { : mesh { ncells, extent, metric_params } , fields { ncells } , species { species_params.begin(), species_params.end() } + , random_pool { constant::RandomSeed + static_cast(index) } , m_index { index } , m_offset_ndomains { offset_ndomains } , m_offset_ncells { offset_ncells } {} @@ -144,8 +146,7 @@ namespace ntt { } /* setters -------------------------------------------------------------- */ - auto set_neighbor_idx(const dir::direction_t& dir, unsigned int idx) - -> void { + auto set_neighbor_idx(const dir::direction_t& dir, unsigned int idx) -> void { m_neighbor_idx[dir] = idx; } @@ -163,8 +164,8 @@ namespace ntt { }; template - inline auto operator<<(std::ostream& os, const Domain& domain) - -> std::ostream& { + inline auto operator<<(std::ostream& os, + const Domain& domain) -> std::ostream& { os << "Domain #" << domain.index(); #if defined(MPI_ENABLED) os << " [MPI rank: " << domain.mpi_rank() << "]"; diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index 5e66bc366..ec8561a9a 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -46,6 +46,9 @@ namespace ntt { #if defined(MPI_ENABLED) MPI_Comm_size(MPI_COMM_WORLD, &g_mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &g_mpi_rank); + raise::ErrorIf(global_ndomains != g_mpi_size, + "Exactly 1 domain per MPI rank is allowed", + HERE); #endif initialValidityCheck(); diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 7b3042b5b..5177571d0 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -88,7 +88,8 @@ namespace ntt { void CommunicateFields(Domain&, CommTags); void SynchronizeFields(Domain&, CommTags, const range_tuple_t& = { 0, 0 }); - void CommunicateParticles(Domain&, timer::Timers*); + void CommunicateParticles(Domain&); + void RemoveDeadParticles(Domain&); /** * @param global_ndomains total number of domains diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index c7cb6bb65..c39f0c67f 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -271,7 +271,6 @@ namespace ntt { }); g_writer.writeMesh(dim, xc, xe); } - const auto output_asis = params.template get("output.debug.as_is"); // !TODO: this can probably be optimized to dump things at once for (auto& fld : g_writer.fieldWriters()) { @@ -475,16 +474,14 @@ namespace ntt { for (const auto& prtl : g_writer.speciesWriters()) { auto& species = local_domain->species[prtl.species() - 1]; if (not species.is_sorted()) { - species.SortByTags(); + species.RemoveDead(); } const std::size_t nout = species.npart() / prtl_stride; array_t buff_x1, buff_x2, buff_x3; - array_t buff_ux1, buff_ux2, buff_ux3; - array_t buff_wei; - buff_wei = array_t { "w", nout }; - buff_ux1 = array_t { "u1", nout }; - buff_ux2 = array_t { "u2", nout }; - buff_ux3 = array_t { "u3", nout }; + array_t buff_ux1 { "u1", nout }; + array_t buff_ux2 { "ux2", nout }; + array_t buff_ux3 { "ux3", nout }; + array_t buff_wei { "w", nout }; if constexpr (M::Dim == Dim::_1D or M::Dim == Dim::_2D or M::Dim == Dim::_3D) { buff_x1 = array_t { "x1", nout }; diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index b667b5ac9..af7a773ed 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -31,10 +31,10 @@ namespace ntt { template - auto get_dx0_V0(const std::vector& resolution, - const boundaries_t& extent, - const std::map& params) - -> std::pair { + auto get_dx0_V0( + const std::vector& resolution, + const boundaries_t& extent, + const std::map& params) -> std::pair { const auto metric = M(resolution, extent, params); const auto dx0 = metric.dxMin(); coord_t x_corner { ZERO }; @@ -445,15 +445,8 @@ namespace ntt { defaults::gr::pusher_niter)); } /* [particles] ---------------------------------------------------------- */ -#if defined(MPI_ENABLED) - const std::size_t sort_interval = 1; -#else - const std::size_t sort_interval = toml::find_or(toml_data, - "particles", - "sort_interval", - defaults::sort_interval); -#endif - set("particles.sort_interval", sort_interval); + set("particles.clear_interval", + toml::find_or(toml_data, "particles", "clear_interval", defaults::clear_interval)); /* [output] ------------------------------------------------------------- */ // fields diff --git a/src/framework/tests/parameters.cpp b/src/framework/tests/parameters.cpp index 393cd2409..1a4228642 100644 --- a/src/framework/tests/parameters.cpp +++ b/src/framework/tests/parameters.cpp @@ -48,7 +48,7 @@ const auto mink_1d = u8R"( [particles] ppc0 = 10.0 - sort_interval = 100 + clear_interval = 100 [[particles.species]] label = "e-" @@ -134,7 +134,7 @@ const auto sph_2d = u8R"( [particles] ppc0 = 25.0 use_weights = true - sort_interval = 50 + clear_interval = 50 [[particles.species]] @@ -199,7 +199,7 @@ const auto qks_2d = u8R"( [particles] ppc0 = 4.0 - sort_interval = 100 + clear_interval = 100 [[particles.species]] label = "e-" @@ -269,7 +269,7 @@ auto main(int argc, char* argv[]) -> int { (real_t)0.0078125, "scales.V0"); boundaries_t fbc = { - {FldsBC::PERIODIC, FldsBC::PERIODIC} + { FldsBC::PERIODIC, FldsBC::PERIODIC } }; assert_equal( params_mink_1d.get>("grid.boundaries.fields")[0].first, @@ -345,8 +345,8 @@ auto main(int argc, char* argv[]) -> int { "simulation.engine"); boundaries_t fbc = { - {FldsBC::ATMOSPHERE, FldsBC::ABSORB}, - { FldsBC::AXIS, FldsBC::AXIS} + { FldsBC::ATMOSPHERE, FldsBC::ABSORB }, + { FldsBC::AXIS, FldsBC::AXIS } }; assert_equal(params_sph_2d.get("scales.B0"), @@ -480,9 +480,9 @@ auto main(int argc, char* argv[]) -> int { "grid.metric.ks_rh"); const auto expect = std::map { - {"r0", 0.0}, - { "h", 0.25}, - { "a", 0.99} + { "r0", 0.0 }, + { "h", 0.25 }, + { "a", 0.99 } }; auto read = params_qks_2d.get>( "grid.metric.params"); @@ -501,8 +501,8 @@ auto main(int argc, char* argv[]) -> int { "algorithms.gr.pusher_niter"); boundaries_t pbc = { - {PrtlBC::HORIZON, PrtlBC::ABSORB}, - { PrtlBC::AXIS, PrtlBC::AXIS} + { PrtlBC::HORIZON, PrtlBC::ABSORB }, + { PrtlBC::AXIS, PrtlBC::AXIS } }; assert_equal(params_qks_2d.get("scales.B0"), @@ -579,86 +579,3 @@ auto main(int argc, char* argv[]) -> int { return 0; } - -// const auto mink_1d = R"( -// [simulation] -// name = "" -// engine = "" -// runtime = "" - -// [grid] -// resolution = "" -// extent = "" - -// [grid.metric] -// metric = "" -// qsph_r0 = "" -// qsph_h = "" -// ks_a = "" - -// [grid.boundaries] -// fields = "" -// particles = "" -// absorb_d = "" -// absorb_coeff = "" - -// [scales] -// larmor0 = "" -// skindepth0 = "" - -// [algorithms] -// current_filters = "" - -// [algorithms.toggles] -// fieldsolver = "" -// deposit = "" - -// [algorithms.timestep] -// CFL = "" -// correction = "" - -// [algorithms.gr] -// pusher_eps = "" -// pusher_niter = "" - -// [algorithms.gca] -// e_ovr_b_max = "" -// larmor_max = "" - -// [algorithms.synchrotron] -// gamma_rad = "" - -// [particles] -// ppc0 = "" -// use_weights = "" -// sort_interval = "" - -// [[particles.species]] -// label = "" -// mass = "" -// charge = "" -// maxnpart = "" -// pusher = "" -// n_payloads = "" -// cooling = "" -// [setup] - -// [output] -// fields = "" -// particles = "" -// format = "" -// mom_smooth = "" -// fields_stride = "" -// prtl_stride = "" -// interval = "" -// interval_time = "" - -// [output.debug] -// as_is = "" -// ghosts = "" - -// [diagnostics] -// interval = "" -// log_level = "" -// blocking_timers = "" -// )"_toml; diff --git a/src/framework/tests/particles.cpp b/src/framework/tests/particles.cpp index dabcc062f..6c4c227b5 100644 --- a/src/framework/tests/particles.cpp +++ b/src/framework/tests/particles.cpp @@ -46,9 +46,9 @@ void testParticles(const int& index, raise::ErrorIf(p.tag.extent(0) != maxnpart, "tag incorrectly allocated", HERE); raise::ErrorIf(p.weight.extent(0) != maxnpart, "weight incorrectly allocated", HERE); - raise::ErrorIf(p.pld.size() != npld, "Number of payloads mismatch", HERE); - for (unsigned short n { 0 }; n < npld; ++n) { - raise::ErrorIf(p.pld[n].extent(0) != maxnpart, "pld incorrectly allocated", HERE); + if (npld > 0) { + raise::ErrorIf(p.pld.extent(0) != maxnpart, "pld incorrectly allocated", HERE); + raise::ErrorIf(p.pld.extent(1) != npld, "pld incorrectly allocated", HERE); } if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { @@ -117,7 +117,8 @@ auto main(int argc, char** argv) -> int { 0.0, 100, PrtlPusher::PHOTON, - Cooling::NONE); + Cooling::NONE, + 5); testParticles(4, "e+", 1.0, @@ -131,7 +132,8 @@ auto main(int argc, char** argv) -> int { 1.0, 100, PrtlPusher::BORIS, - Cooling::NONE); + Cooling::NONE, + 1); } catch (const std::exception& e) { std::cerr << "Error: " << e.what() << std::endl; Kokkos::finalize(); @@ -139,4 +141,4 @@ auto main(int argc, char** argv) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/global/arch/directions.h b/src/global/arch/directions.h index 19cf182d6..ccd4e67b0 100644 --- a/src/global/arch/directions.h +++ b/src/global/arch/directions.h @@ -132,8 +132,8 @@ namespace dir { using dirs_t = std::vector>; template - inline auto operator<<(std::ostream& os, const direction_t& dir) - -> std::ostream& { + inline auto operator<<(std::ostream& os, + const direction_t& dir) -> std::ostream& { for (auto& d : dir) { os << std::setw(2) << std::left; if (d > 0) { @@ -175,81 +175,81 @@ namespace dir { template <> struct Directions { inline static const dirs_t all = { - {-1, -1}, - {-1, 0}, - {-1, 1}, - { 0, -1}, - { 0, 1}, - { 1, -1}, - { 1, 0}, - { 1, 1} + { -1, -1 }, + { -1, 0 }, + { -1, 1 }, + { 0, -1 }, + { 0, 1 }, + { 1, -1 }, + { 1, 0 }, + { 1, 1 } }; inline static const dirs_t orth = { - {-1, 0}, - { 0, -1}, - { 0, 1}, - { 1, 0} + { -1, 0 }, + { 0, -1 }, + { 0, 1 }, + { 1, 0 } }; inline static const dirs_t unique = { - { 0, 1}, - { 1, 1}, - { 1, 0}, - {-1, 1} + { 0, 1 }, + { 1, 1 }, + { 1, 0 }, + { -1, 1 } }; }; template <> struct Directions { inline static const dirs_t all = { - {-1, -1, -1}, - {-1, -1, 0}, - {-1, -1, 1}, - {-1, 0, -1}, - {-1, 0, 0}, - {-1, 0, 1}, - {-1, 1, -1}, - {-1, 1, 0}, - {-1, 1, 1}, - { 0, -1, -1}, - { 0, -1, 0}, - { 0, -1, 1}, - { 0, 0, -1}, - { 0, 0, 1}, - { 0, 1, -1}, - { 0, 1, 0}, - { 0, 1, 1}, - { 1, -1, -1}, - { 1, -1, 0}, - { 1, -1, 1}, - { 1, 0, -1}, - { 1, 0, 0}, - { 1, 0, 1}, - { 1, 1, -1}, - { 1, 1, 0}, - { 1, 1, 1} + { -1, -1, -1 }, + { -1, -1, 0 }, + { -1, -1, 1 }, + { -1, 0, -1 }, + { -1, 0, 0 }, + { -1, 0, 1 }, + { -1, 1, -1 }, + { -1, 1, 0 }, + { -1, 1, 1 }, + { 0, -1, -1 }, + { 0, -1, 0 }, + { 0, -1, 1 }, + { 0, 0, -1 }, + { 0, 0, 1 }, + { 0, 1, -1 }, + { 0, 1, 0 }, + { 0, 1, 1 }, + { 1, -1, -1 }, + { 1, -1, 0 }, + { 1, -1, 1 }, + { 1, 0, -1 }, + { 1, 0, 0 }, + { 1, 0, 1 }, + { 1, 1, -1 }, + { 1, 1, 0 }, + { 1, 1, 1 } }; inline static const dirs_t orth = { - {-1, 0, 0}, - { 0, -1, 0}, - { 0, 0, -1}, - { 0, 0, 1}, - { 0, 1, 0}, - { 1, 0, 0} + { -1, 0, 0 }, + { 0, -1, 0 }, + { 0, 0, -1 }, + { 0, 0, 1 }, + { 0, 1, 0 }, + { 1, 0, 0 } }; inline static const dirs_t unique = { - { 0, 0, 1}, - { 0, 1, 0}, - { 1, 0, 0}, - { 1, 1, 0}, - {-1, 1, 0}, - { 0, 1, 1}, - { 0, -1, 1}, - { 1, 0, 1}, - {-1, 0, 1}, - { 1, 1, 1}, - {-1, 1, 1}, - { 1, -1, 1}, - { 1, 1, -1} + { 0, 0, 1 }, + { 0, 1, 0 }, + { 1, 0, 0 }, + { 1, 1, 0 }, + { -1, 1, 0 }, + { 0, 1, 1 }, + { 0, -1, 1 }, + { 1, 0, 1 }, + { -1, 0, 1 }, + { 1, 1, 1 }, + { -1, 1, 1 }, + { 1, -1, 1 }, + { 1, 1, -1 } }; }; diff --git a/src/global/arch/kokkos_aliases.cpp b/src/global/arch/kokkos_aliases.cpp index 4311a40bd..6c15e3d52 100644 --- a/src/global/arch/kokkos_aliases.cpp +++ b/src/global/arch/kokkos_aliases.cpp @@ -5,18 +5,18 @@ #include template <> -auto CreateRangePolicy(const tuple_t& i1, - const tuple_t& i2) - -> range_t { +auto CreateRangePolicy( + const tuple_t& i1, + const tuple_t& i2) -> range_t { index_t i1min = i1[0]; index_t i1max = i2[0]; return Kokkos::RangePolicy(i1min, i1max); } template <> -auto CreateRangePolicy(const tuple_t& i1, - const tuple_t& i2) - -> range_t { +auto CreateRangePolicy( + const tuple_t& i1, + const tuple_t& i2) -> range_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -26,9 +26,9 @@ auto CreateRangePolicy(const tuple_t& i1, } template <> -auto CreateRangePolicy(const tuple_t& i1, - const tuple_t& i2) - -> range_t { +auto CreateRangePolicy( + const tuple_t& i1, + const tuple_t& i2) -> range_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -41,18 +41,18 @@ auto CreateRangePolicy(const tuple_t& i1, } template <> -auto CreateRangePolicyOnHost(const tuple_t& i1, - const tuple_t& i2) - -> range_h_t { +auto CreateRangePolicyOnHost( + const tuple_t& i1, + const tuple_t& i2) -> range_h_t { index_t i1min = i1[0]; index_t i1max = i2[0]; return Kokkos::RangePolicy(i1min, i1max); } template <> -auto CreateRangePolicyOnHost(const tuple_t& i1, - const tuple_t& i2) - -> range_h_t { +auto CreateRangePolicyOnHost( + const tuple_t& i1, + const tuple_t& i2) -> range_h_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -62,9 +62,9 @@ auto CreateRangePolicyOnHost(const tuple_t& i1, } template <> -auto CreateRangePolicyOnHost(const tuple_t& i1, - const tuple_t& i2) - -> range_h_t { +auto CreateRangePolicyOnHost( + const tuple_t& i1, + const tuple_t& i2) -> range_h_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -76,11 +76,11 @@ auto CreateRangePolicyOnHost(const tuple_t& i1, { i1max, i2max, i3max }); } -// auto WaitAndSynchronize(bool debug_only) -> void { -// if (debug_only) { -// #ifndef DEBUG -// return; -// #endif -// } -// Kokkos::fence(); -// } \ No newline at end of file +auto WaitAndSynchronize(bool debug_only) -> void { + if (debug_only) { +#ifndef DEBUG + return; +#endif + } + Kokkos::fence(); +} diff --git a/src/global/arch/mpi_tags.h b/src/global/arch/mpi_tags.h index 0916542d4..aaf38a8f4 100644 --- a/src/global/arch/mpi_tags.h +++ b/src/global/arch/mpi_tags.h @@ -7,6 +7,8 @@ * @namespaces: * - mpi:: */ +#ifndef GLOBAL_ARCH_MPI_TAGS_H +#define GLOBAL_ARCH_MPI_TAGS_H #include "global.h" @@ -188,8 +190,13 @@ namespace mpi { tag; } - Inline auto SendTag(short tag, bool im1, bool ip1, bool jm1, bool jp1, bool km1, bool kp1) - -> short { + Inline auto SendTag(short tag, + bool im1, + bool ip1, + bool jm1, + bool jp1, + bool km1, + bool kp1) -> short { return ((im1 && jm1 && km1) * (PrtlSendTag::im1_jm1_km1 - 1) + (im1 && jm1 && kp1) * (PrtlSendTag::im1_jm1_kp1 - 1) + (im1 && jp1 && km1) * (PrtlSendTag::im1_jp1_km1 - 1) + @@ -226,3 +233,5 @@ namespace mpi { tag; } } // namespace mpi + +#endif // GLOBAL_ARCH_MPI_TAGS_H diff --git a/src/global/defaults.h b/src/global/defaults.h index be92acbf9..b7b0107e7 100644 --- a/src/global/defaults.h +++ b/src/global/defaults.h @@ -22,9 +22,9 @@ namespace ntt::defaults { const unsigned short current_filters = 0; - const std::string em_pusher = "Boris"; - const std::string ph_pusher = "Photon"; - const std::size_t sort_interval = 100; + const std::string em_pusher = "Boris"; + const std::string ph_pusher = "Photon"; + const std::size_t clear_interval = 100; namespace qsph { const real_t r0 = 0.0; @@ -45,7 +45,7 @@ namespace ntt::defaults { const real_t ds_frac = 0.01; const real_t coeff = 1.0; } // namespace absorb - } // namespace bc + } // namespace bc namespace output { const std::string format = "hdf5"; diff --git a/src/global/global.h b/src/global/global.h index ad524fb0e..dad6afccc 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -209,7 +209,7 @@ namespace Timer { PrintTitle = 1 << 1, AutoConvert = 1 << 2, PrintOutput = 1 << 3, - PrintSorting = 1 << 4, + PrintPrtlClear = 1 << 4, PrintCheckpoint = 1 << 5, PrintNormed = 1 << 6, Default = PrintNormed | PrintTotal | PrintTitle | AutoConvert, diff --git a/src/global/tests/kokkos_aliases.cpp b/src/global/tests/kokkos_aliases.cpp index 56a17c50f..909b6b30c 100644 --- a/src/global/tests/kokkos_aliases.cpp +++ b/src/global/tests/kokkos_aliases.cpp @@ -3,6 +3,7 @@ #include "global.h" #include +#include #include #include @@ -44,8 +45,7 @@ auto main(int argc, char* argv[]) -> int { { // scatter arrays & ranges array_t a { "a", 100 }; - scatter_array_t a_scatter = Kokkos::Experimental::create_scatter_view( - a); + auto a_scatter = Kokkos::Experimental::create_scatter_view(a); Kokkos::parallel_for( // range_t({ 0 }, { 100 }), CreateRangePolicy({ 0 }, { 100 }), @@ -87,4 +87,4 @@ auto main(int argc, char* argv[]) -> int { Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/global/utils/diag.cpp b/src/global/utils/diag.cpp index 0a499dd56..c053cdacf 100644 --- a/src/global/utils/diag.cpp +++ b/src/global/utils/diag.cpp @@ -21,8 +21,9 @@ #include namespace diag { - auto npart_stats(std::size_t npart, std::size_t maxnpart) - -> std::vector> { + auto npart_stats( + std::size_t npart, + std::size_t maxnpart) -> std::vector> { auto stats = std::vector>(); #if !defined(MPI_ENABLED) stats.push_back( @@ -84,7 +85,7 @@ namespace diag { const std::vector& species_labels, const std::vector& species_npart, const std::vector& species_maxnpart, - bool print_sorting, + bool print_prtl_clear, bool print_output, bool print_checkpoint, bool print_colors) { @@ -96,8 +97,8 @@ namespace diag { if (species_labels.size() == 0) { diag_flags ^= Diag::Species; } - if (print_sorting) { - timer_flags |= Timer::PrintSorting; + if (print_prtl_clear) { + timer_flags |= Timer::PrintPrtlClear; } if (print_output) { timer_flags |= Timer::PrintOutput; diff --git a/src/global/utils/diag.h b/src/global/utils/diag.h index 9951602f8..30cca5705 100644 --- a/src/global/utils/diag.h +++ b/src/global/utils/diag.h @@ -34,9 +34,9 @@ namespace diag { * @param species_labels (vector of particle labels) * @param npart (per each species) * @param maxnpart (per each species) - * @param sorting_step (if true, particles were sorted) - * @param output_step (if true, output was written) - * @param checkpoint_step (if true, checkpoint was written) + * @param prtlclear (if true, dead particles were removed) + * @param output (if true, output was written) + * @param checkpoint (if true, checkpoint was written) * @param colorful_print (if true, print with colors) */ void printDiagnostics(std::size_t, diff --git a/src/global/utils/timer.cpp b/src/global/utils/timer.cpp index b5f4408ca..7d5a9bebd 100644 --- a/src/global/utils/timer.cpp +++ b/src/global/utils/timer.cpp @@ -127,10 +127,11 @@ namespace timer { return timer_stats; } - auto Timers::printAll(TimerFlags flags, std::size_t npart, std::size_t ncells) const - -> std::string { - const std::vector extras { "Sorting", "Output", "Checkpoint" }; - const auto stats = gather(extras, npart, ncells); + auto Timers::printAll(TimerFlags flags, + std::size_t npart, + std::size_t ncells) const -> std::string { + const std::vector extras { "PrtlClear", "Output", "Checkpoint" }; + const auto stats = gather(extras, npart, ncells); if (stats.empty()) { return ""; } @@ -253,8 +254,8 @@ namespace timer { } } - // print extra timers for output/checkpoint/sorting - const std::vector extras_f { Timer::PrintSorting, + // print extra timers for output/checkpoint/prtlClear + const std::vector extras_f { Timer::PrintPrtlClear, Timer::PrintOutput, Timer::PrintCheckpoint }; for (auto i { 0u }; i < extras.size(); ++i) { diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp new file mode 100644 index 000000000..b280ce38b --- /dev/null +++ b/src/kernels/comm.hpp @@ -0,0 +1,341 @@ +/** + * @file kernels/comm.hpp + * @brief Kernels used during communications + * @implements + * - kernel::comm::PrepareOutgoingPrtls_kernel<> + * - kernel::comm::PopulatePrtlSendBuffer_kernel<> + * - kernel::comm::ExtractReceivedPrtls_kernel<> + * @namespaces: + * - kernel::comm:: + */ + +#ifndef KERNELS_COMM_HPP +#define KERNELS_COMM_HPP + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" + +#include + +namespace kernel::comm { + using namespace ntt; + + template + class PrepareOutgoingPrtls_kernel { + const array_t shifts_in_x1, shifts_in_x2, shifts_in_x3; + array_t outgoing_indices; + + const std::size_t npart, npart_alive, npart_dead, ntags; + + array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; + const array_t tag; + + const array_t tag_offsets; + + array_t current_offset; + + public: + PrepareOutgoingPrtls_kernel(const array_t& shifts_in_x1, + const array_t& shifts_in_x2, + const array_t& shifts_in_x3, + array_t& outgoing_indices, + std::size_t npart, + std::size_t npart_alive, + std::size_t npart_dead, + std::size_t ntags, + array_t& i1, + array_t& i1_prev, + array_t& i2, + array_t& i2_prev, + array_t& i3, + array_t& i3_prev, + const array_t& tag, + const array_t& tag_offsets) + : shifts_in_x1 { shifts_in_x1 } + , shifts_in_x2 { shifts_in_x2 } + , shifts_in_x3 { shifts_in_x3 } + , outgoing_indices { outgoing_indices } + , npart { npart } + , npart_alive { npart_alive } + , npart_dead { npart_dead } + , ntags { ntags } + , i1 { i1 } + , i1_prev { i1_prev } + , i2 { i2 } + , i2_prev { i2_prev } + , i3 { i3 } + , i3_prev { i3_prev } + , tag { tag } + , tag_offsets { tag_offsets } + , current_offset { "current_offset", ntags } {} + + Inline void operator()(index_t p) const { + if (tag(p) != ParticleTag::alive) { + // dead or to-be-sent + auto idx_for_tag = Kokkos::atomic_fetch_add(¤t_offset(tag(p)), 1); + if (tag(p) != ParticleTag::dead) { + idx_for_tag += npart_dead; + } + if (tag(p) > 2) { + idx_for_tag += tag_offsets(tag(p) - 3); + } + if (idx_for_tag >= npart - npart_alive) { + raise::KernelError(HERE, "Outgoing indices idx exceeds the array size"); + } + outgoing_indices(idx_for_tag) = p; + // apply offsets + if (tag(p) != ParticleTag::dead) { + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + i1(p) += shifts_in_x1(tag(p) - 2); + i1_prev(p) += shifts_in_x1(tag(p) - 2); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + i2(p) += shifts_in_x2(tag(p) - 2); + i2_prev(p) += shifts_in_x2(tag(p) - 2); + } + if constexpr (D == Dim::_3D) { + i3(p) += shifts_in_x3(tag(p) - 2); + i3_prev(p) += shifts_in_x3(tag(p) - 2); + } + } + } + } + }; + + template + class PopulatePrtlSendBuffer_kernel { + array_t send_buff_int; + array_t send_buff_real; + array_t send_buff_prtldx; + array_t send_buff_pld; + + const unsigned short NINTS, NREALS, NPRTLDX, NPLDS; + const std::size_t idx_offset; + + const array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; + const array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; + const array_t ux1, ux2, ux3, weight, phi; + const array_t pld; + array_t tag; + const array_t outgoing_indices; + + public: + PopulatePrtlSendBuffer_kernel(array_t& send_buff_int, + array_t& send_buff_real, + array_t& send_buff_prtldx, + array_t& send_buff_pld, + unsigned short NINTS, + unsigned short NREALS, + unsigned short NPRTLDX, + unsigned short NPLDS, + std::size_t idx_offset, + const array_t& i1, + const array_t& i1_prev, + const array_t& dx1, + const array_t& dx1_prev, + const array_t& i2, + const array_t& i2_prev, + const array_t& dx2, + const array_t& dx2_prev, + const array_t& i3, + const array_t& i3_prev, + const array_t& dx3, + const array_t& dx3_prev, + const array_t& ux1, + const array_t& ux2, + const array_t& ux3, + const array_t& weight, + const array_t& phi, + const array_t& pld, + array_t& tag, + const array_t& outgoing_indices) + : send_buff_int { send_buff_int } + , send_buff_real { send_buff_real } + , send_buff_prtldx { send_buff_prtldx } + , send_buff_pld { send_buff_pld } + , NINTS { NINTS } + , NREALS { NREALS } + , NPRTLDX { NPRTLDX } + , NPLDS { NPLDS } + , idx_offset { idx_offset } + , i1 { i1 } + , i1_prev { i1_prev } + , i2 { i2 } + , i2_prev { i2_prev } + , i3 { i3 } + , i3_prev { i3_prev } + , dx1 { dx1 } + , dx1_prev { dx1_prev } + , dx2 { dx2 } + , dx2_prev { dx2_prev } + , dx3 { dx3 } + , dx3_prev { dx3_prev } + , ux1 { ux1 } + , ux2 { ux2 } + , ux3 { ux3 } + , weight { weight } + , phi { phi } + , pld { pld } + , tag { tag } + , outgoing_indices { outgoing_indices } {} + + Inline void operator()(index_t p) const { + const auto idx = outgoing_indices(idx_offset + p); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + send_buff_int(NINTS * p + 0) = i1(idx); + send_buff_int(NINTS * p + 1) = i1_prev(idx); + send_buff_prtldx(NPRTLDX * p + 0) = dx1(idx); + send_buff_prtldx(NPRTLDX * p + 1) = dx1_prev(idx); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + send_buff_int(NINTS * p + 2) = i2(idx); + send_buff_int(NINTS * p + 3) = i2_prev(idx); + send_buff_prtldx(NPRTLDX * p + 2) = dx2(idx); + send_buff_prtldx(NPRTLDX * p + 3) = dx2_prev(idx); + } + if constexpr (D == Dim::_3D) { + send_buff_int(NINTS * p + 4) = i3(idx); + send_buff_int(NINTS * p + 5) = i3_prev(idx); + send_buff_prtldx(NPRTLDX * p + 4) = dx3(idx); + send_buff_prtldx(NPRTLDX * p + 5) = dx3_prev(idx); + } + send_buff_real(NREALS * p + 0) = ux1(idx); + send_buff_real(NREALS * p + 1) = ux2(idx); + send_buff_real(NREALS * p + 2) = ux3(idx); + send_buff_real(NREALS * p + 3) = weight(idx); + if constexpr (D == Dim::_2D and C != Coord::Cart) { + send_buff_real(NREALS * p + 4) = phi(idx); + } + if (NPLDS > 0) { + for (auto l { 0u }; l < NPLDS; ++l) { + send_buff_pld(NPLDS * p + l) = pld(idx, l); + } + } + tag(idx) = ParticleTag::dead; + } + }; + + template + class ExtractReceivedPrtls_kernel { + const array_t recv_buff_int; + const array_t recv_buff_real; + const array_t recv_buff_prtldx; + const array_t recv_buff_pld; + + const unsigned short NINTS, NREALS, NPRTLDX, NPLDS; + const std::size_t npart, npart_holes; + + array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; + array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; + array_t ux1, ux2, ux3, weight, phi; + array_t pld; + array_t tag; + const array_t outgoing_indices; + + public: + ExtractReceivedPrtls_kernel(const array_t& recv_buff_int, + const array_t& recv_buff_real, + const array_t& recv_buff_prtldx, + const array_t& recv_buff_pld, + unsigned short NINTS, + unsigned short NREALS, + unsigned short NPRTLDX, + unsigned short NPLDS, + std::size_t npart, + array_t& i1, + array_t& i1_prev, + array_t& dx1, + array_t& dx1_prev, + array_t& i2, + array_t& i2_prev, + array_t& dx2, + array_t& dx2_prev, + array_t& i3, + array_t& i3_prev, + array_t& dx3, + array_t& dx3_prev, + array_t& ux1, + array_t& ux2, + array_t& ux3, + array_t& weight, + array_t& phi, + array_t& pld, + array_t& tag, + const array_t& outgoing_indices) + : recv_buff_int { recv_buff_int } + , recv_buff_real { recv_buff_real } + , recv_buff_prtldx { recv_buff_prtldx } + , recv_buff_pld { recv_buff_pld } + , NINTS { NINTS } + , NREALS { NREALS } + , NPRTLDX { NPRTLDX } + , NPLDS { NPLDS } + , npart { npart } + , npart_holes { outgoing_indices.extent(0) } + , i1 { i1 } + , i1_prev { i1_prev } + , i2 { i2 } + , i2_prev { i2_prev } + , i3 { i3 } + , i3_prev { i3_prev } + , dx1 { dx1 } + , dx1_prev { dx1_prev } + , dx2 { dx2 } + , dx2_prev { dx2_prev } + , dx3 { dx3 } + , dx3_prev { dx3_prev } + , ux1 { ux1 } + , ux2 { ux2 } + , ux3 { ux3 } + , weight { weight } + , phi { phi } + , pld { pld } + , tag { tag } + , outgoing_indices { outgoing_indices } {} + + Inline void operator()(index_t p) const { + std::size_t idx; + if (p >= npart_holes) { + idx = npart + p - npart_holes; + } else { + idx = outgoing_indices(p); + } + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + i1(idx) = recv_buff_int(NINTS * p + 0); + i1_prev(idx) = recv_buff_int(NINTS * p + 1); + dx1(idx) = recv_buff_prtldx(NPRTLDX * p + 0); + dx1_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 1); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + i2(idx) = recv_buff_int(NINTS * p + 2); + i2_prev(idx) = recv_buff_int(NINTS * p + 3); + dx2(idx) = recv_buff_prtldx(NPRTLDX * p + 2); + dx2_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 3); + } + if constexpr (D == Dim::_3D) { + i3(idx) = recv_buff_int(NINTS * p + 4); + i3_prev(idx) = recv_buff_int(NINTS * p + 5); + dx3(idx) = recv_buff_prtldx(NPRTLDX * p + 4); + dx3_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 5); + } + ux1(idx) = recv_buff_real(NREALS * p + 0); + ux2(idx) = recv_buff_real(NREALS * p + 1); + ux3(idx) = recv_buff_real(NREALS * p + 2); + weight(idx) = recv_buff_real(NREALS * p + 3); + if constexpr (D == Dim::_2D and C != Coord::Cart) { + phi(idx) = recv_buff_real(NREALS * p + 4); + } + if (NPLDS > 0) { + for (auto l { 0u }; l < NPLDS; ++l) { + pld(idx, l) = recv_buff_pld(NPLDS * p + l); + } + } + tag(idx) = ParticleTag::alive; + } + }; + +} // namespace kernel::comm + +#endif // KERNELS_COMM_HPP diff --git a/src/kernels/particle_moments.hpp b/src/kernels/particle_moments.hpp index 8b668a036..0621646ad 100644 --- a/src/kernels/particle_moments.hpp +++ b/src/kernels/particle_moments.hpp @@ -223,7 +223,6 @@ namespace kernel { } coeff *= weight(p) * smooth; } - auto buff_access = Buff.access(); if constexpr (D == Dim::_1D) { for (auto di1 { -window }; di1 <= window; ++di1) { diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 0deb73c6f..2e8a5f652 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -90,7 +90,7 @@ namespace kernel::sr { Force(const F& pgen_force) : Force { pgen_force, - {ZERO, ZERO, ZERO}, + { ZERO, ZERO, ZERO }, ZERO, ZERO } { @@ -227,41 +227,41 @@ namespace kernel::sr { const real_t coeff_sync; public: - Pusher_kernel(const PrtlPusher::type& pusher, - bool GCA, - bool ext_force, - CoolingTags cooling, - const ndfield_t& EB, - unsigned short sp, - array_t& i1, - array_t& i2, - array_t& i3, - array_t& i1_prev, - array_t& i2_prev, - array_t& i3_prev, - array_t& dx1, - array_t& dx2, - array_t& dx3, - array_t& dx1_prev, - array_t& dx2_prev, - array_t& dx3_prev, - array_t& ux1, - array_t& ux2, - array_t& ux3, - array_t& phi, - array_t& tag, - const M& metric, - const F& force, - real_t time, - real_t coeff, - real_t dt, - int ni1, - int ni2, - int ni3, - const boundaries_t& boundaries, - real_t gca_larmor_max, - real_t gca_eovrb_max, - real_t coeff_sync) + Pusher_kernel(const PrtlPusher::type& pusher, + bool GCA, + bool ext_force, + CoolingTags cooling, + const randacc_ndfield_t& EB, + unsigned short sp, + array_t& i1, + array_t& i2, + array_t& i3, + array_t& i1_prev, + array_t& i2_prev, + array_t& i3_prev, + array_t& dx1, + array_t& dx2, + array_t& dx3, + array_t& dx1_prev, + array_t& dx2_prev, + array_t& dx3_prev, + array_t& ux1, + array_t& ux2, + array_t& ux3, + array_t& phi, + array_t& tag, + const M& metric, + const F& force, + real_t time, + real_t coeff, + real_t dt, + int ni1, + int ni2, + int ni3, + const boundaries_t& boundaries, + real_t gca_larmor_max, + real_t gca_eovrb_max, + real_t coeff_sync) : pusher { pusher } , GCA { GCA } , ext_force { ext_force } diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index 9a8ae1cc6..ec364a313 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -29,8 +29,7 @@ void errorIf(bool condition, const std::string& message) { inline static constexpr auto epsilon = std::numeric_limits::epsilon(); -Inline auto equal(real_t a, real_t b, const char* msg = "", real_t acc = ONE) - -> bool { +Inline auto equal(real_t a, real_t b, const char* msg = "", real_t acc = ONE) -> bool { const auto eps = epsilon * acc; if (not cmp::AlmostEqual(a, b, eps)) { printf("%.12e != %.12e %s\n", a, b, msg); @@ -81,8 +80,6 @@ void testDeposit(const std::vector& res, array_t tag { "tag", 10 }; const real_t charge { 1.0 }, inv_dt { 1.0 }; - auto J_scat = Kokkos::Experimental::create_scatter_view(J); - const int i0 = 4, j0 = 4; const prtldx_t dxi = 0.53, dxf = 0.47; @@ -122,30 +119,19 @@ void testDeposit(const std::vector& res, put_value(weight, 1.0, 0); put_value(tag, ParticleTag::alive, 0); - Kokkos::parallel_for("CurrentsDeposit", - 10, + auto J_scat = Kokkos::Experimental::create_scatter_view(J); + + // clang-format off + Kokkos::parallel_for("CurrentsDeposit", 10, kernel::DepositCurrents_kernel(J_scat, - i1, - i2, - i3, - i1_prev, - i2_prev, - i3_prev, - dx1, - dx2, - dx3, - dx1_prev, - dx2_prev, - dx3_prev, - ux1, - ux2, - ux3, - phi, - weight, - tag, - metric, - charge, - inv_dt)); + i1, i2, i3, + i1_prev, i2_prev, i3_prev, + dx1, dx2, dx3, + dx1_prev, dx2_prev, dx3_prev, + ux1, ux2, ux3, + phi, weight, tag, + metric, charge, inv_dt)); + // clang-format on Kokkos::Experimental::contribute(J, J_scat); diff --git a/src/kernels/tests/prtl_bc.cpp b/src/kernels/tests/prtl_bc.cpp index c8f9eae04..14c1a9f54 100644 --- a/src/kernels/tests/prtl_bc.cpp +++ b/src/kernels/tests/prtl_bc.cpp @@ -201,9 +201,9 @@ void testPeriodicBC(const std::vector& res, // Particle boundaries auto boundaries = boundaries_t {}; boundaries = { - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC} + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC } }; real_t time = ZERO; @@ -343,18 +343,18 @@ auto main(int argc, char* argv[]) -> int { const std::vector res1d { 50 }; const boundaries_t ext1d { - {0.0, 1000.0}, + { 0.0, 1000.0 }, }; const std::vector res2d { 30, 20 }; const boundaries_t ext2d { - {-15.0, 15.0}, - {-10.0, 10.0}, + { -15.0, 15.0 }, + { -10.0, 10.0 }, }; const std::vector res3d { 10, 10, 10 }; const boundaries_t ext3d { - {0.0, 1.0}, - {0.0, 1.0}, - {0.0, 1.0} + { 0.0, 1.0 }, + { 0.0, 1.0 }, + { 0.0, 1.0 } }; testPeriodicBC>(res1d, ext1d, {}); testPeriodicBC>(res2d, ext2d, {});