diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index cd7119789..f60ee9061 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -1,27 +1,39 @@ name: Unit tests on: - pull_request: - branches: - - '**rc' - - 'master' + push: jobs: + check-commit: + runs-on: ubuntu-latest + outputs: + run_tests: ${{ steps.check_message.outputs.run_tests }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Check commit message + id: check_message + run: | + if git log -1 --pretty=%B | grep -q "RUNTEST"; then + echo "run_tests=true" >> "$GITHUB_OUTPUT" + else + echo "run_tests=false" >> "$GITHUB_OUTPUT" + fi tests: + needs: check-commit + if: needs.check-commit.outputs.run_tests == 'true' strategy: fail-fast: false matrix: - device: [amd-gpu, nvidia-gpu] + device: [cpu, amd-gpu, nvidia-gpu] precision: [double, single] - exclude: + exclude: # my AMD GPU doesn't support fp64 atomics : ( - device: amd-gpu precision: double - # my AMD GPU doesn't support fp64 atomics : ( runs-on: [self-hosted, "${{ matrix.device }}"] - if: contains(github.event.head_commit.message, 'totest') steps: - name: Checkout - uses: actions/checkout@v3.3.0 + uses: actions/checkout@v4 - name: Configure run: | if [ "${{ matrix.device }}" = "nvidia-gpu" ]; then @@ -35,6 +47,8 @@ jobs: fi elif [ "${{ matrix.device }}" = "amd-gpu" ]; then FLAGS="-D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_AMD_GFX1100=ON" + elif [ "${{ matrix.device }}" = "cpu" ]; then + FLAGS="-D mpi=ON" fi cmake -B build -D TESTS=ON -D output=ON -D precision=${{ matrix.precision }} $FLAGS - name: Compile diff --git a/.gitignore b/.gitignore index 53d09b648..9a167b9d5 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,8 @@ venv/ # CMake testing files Testing/ +tags +.clangd .schema.json *_old/ action-token diff --git a/.taplo.toml b/.taplo.toml new file mode 100644 index 000000000..423a47594 --- /dev/null +++ b/.taplo.toml @@ -0,0 +1,6 @@ +[formatting] + align_entries = true + indent_tables = true + indent_entries = true + trailing_newline = true + align_comments = true diff --git a/CMakeLists.txt b/CMakeLists.txt index 62319559b..dd22b9308 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,12 +8,13 @@ project( VERSION 1.2.0 LANGUAGES CXX C) add_compile_options("-D ENTITY_VERSION=\"${PROJECT_VERSION}\"") -execute_process(COMMAND - bash -c "git diff --quiet src/ && echo $(git rev-parse HEAD) || echo $(git rev-parse HEAD)-mod" +execute_process( + COMMAND + bash -c + "git diff --quiet src/ && echo $(git rev-parse HEAD) || echo $(git rev-parse HEAD)-mod" WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" OUTPUT_VARIABLE GIT_HASH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE -) + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) message(STATUS "Git hash: ${GIT_HASH}") add_compile_options("-D ENTITY_GIT_HASH=\"${GIT_HASH}\"") @@ -25,56 +26,57 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/defaults.cmake) # defaults set(DEBUG - ${default_debug} - CACHE BOOL "Debug mode") + ${default_debug} + CACHE BOOL "Debug mode") set(precision - ${default_precision} - CACHE STRING "Precision") + ${default_precision} + CACHE STRING "Precision") set(pgen - ${default_pgen} - CACHE STRING "Problem generator") + ${default_pgen} + CACHE STRING "Problem generator") set(gui - ${default_gui} - CACHE BOOL "Use GUI [nttiny]") + ${default_gui} + CACHE BOOL "Use GUI [nttiny]") set(output - ${default_output} - CACHE BOOL "Enable output") + ${default_output} + CACHE BOOL "Enable output") set(mpi - ${default_mpi} - CACHE BOOL "Use MPI") + ${default_mpi} + CACHE BOOL "Use MPI") # -------------------------- Compilation settings -------------------------- # set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) if(${DEBUG} STREQUAL "OFF") set(CMAKE_BUILD_TYPE - Release - CACHE STRING "CMake build type") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG") + Release + CACHE STRING "CMake build type") + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -DNDEBUG -Wno-unused-local-typedefs -Wno-unknown-cuda-version" + ) else() set(CMAKE_BUILD_TYPE - Debug - CACHE STRING "CMake build type") + Debug + CACHE STRING "CMake build type") set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -DDEBUG -Wall -Wextra -Wno-unknown-pragmas") + "${CMAKE_CXX_FLAGS} -DDEBUG -Wall -Wextra -Wno-unknown-pragmas") endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs") - # options set(precisions - "single" "double" - CACHE STRING "Precisions") + "single" "double" + CACHE STRING "Precisions") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.cmake) # ------------------------- Third-Party Tests ------------------------------ # set(BUILD_TESTING - OFF - CACHE BOOL "Build tests") + OFF + CACHE BOOL "Build tests") # ------------------------ Third-party dependencies ------------------------ # include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/kokkosConfig.cmake) @@ -98,21 +100,13 @@ endif() if(${output}) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/adios2Config.cmake) find_or_fetch_dependency(adios2 FALSE) - if (NOT DEFINED ENV{HDF5_ROOT}) - set(USE_CUSTOM_HDF5 OFF) - if (DEFINED ENV{CONDA_PREFIX}) + if(NOT DEFINED ENV{HDF5_ROOT}) + if(DEFINED ENV{CONDA_PREFIX}) execute_process(COMMAND bash -c "conda list | grep \"hdf5\" -q" - RESULT_VARIABLE HDF5_INSTALLED) - if (HDF5_INSTALLED EQUAL 0) + RESULT_VARIABLE HDF5_INSTALLED) + if(HDF5_INSTALLED EQUAL 0) set(HDF5_ROOT $ENV{CONDA_PREFIX}) - else() - set(USE_CUSTOM_HDF5 ON) endif() - else() - set(USE_CUSTOM_HDF5 ON) - endif() - if (USE_CUSTOM_HDF5) - message(FATAL_ERROR "HDF5_ROOT is not set. Please set it to the root of the HDF5 installation") endif() endif() find_package(HDF5 REQUIRED) @@ -129,6 +123,9 @@ link_libraries(${DEPENDENCIES}) if(TESTS) # ---------------------------------- Tests --------------------------------- # include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/tests.cmake) +elseif(BENCHMARK) + # ------------------------------ Benchmark --------------------------------- # + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/benchmark.cmake) else() # ----------------------------------- GUI ---------------------------------- # if(${gui}) diff --git a/README.md b/README.md index d6f4597f5..7287d52db 100644 --- a/README.md +++ b/README.md @@ -10,21 +10,29 @@ Our [detailed documentation](https://entity-toolkit.github.io/) includes everyth [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) -## Core developers (alphabetical) +## Lead developers -πŸ‘€ __Yangyang Cai__ {[@StaticObserver](https://github.com/StaticObserver): GRPIC} +β˜• __Hayk Hakobyan__ {[@haykh](https://github.com/haykh)} -πŸ’β€β™‚οΈ __Alexander Chernoglazov__ {[@SChernoglazov](https://github.com/SChernoglazov): PIC} +πŸ₯” __Jens Mahlmann__ {[@jmahlmann](https://github.com/jmahlmann)} -🍡 __Benjamin Crinquand__ {[@bcrinquand](https://github.com/bcrinquand): GRPIC, cubed-sphere} +πŸ’β€β™‚οΈ __Alexander Chernoglazov__ {[@SChernoglazov](https://github.com/SChernoglazov)} + +πŸ§‹ __Alisa Galishnikova__ {[@alisagk](https://github.com/alisagk)} + +🐬 __Sasha Philippov__ {[@sashaph](https://github.com/sashaph)} -πŸ§‹ __Alisa Galishnikova__ {[@alisagk](https://github.com/alisagk): GRPIC} +## Contributors (alphabetical) -β˜• __Hayk Hakobyan__ {[@haykh](https://github.com/haykh): framework, PIC, GRPIC, cubed-sphere} +🎸 __Ludwig BΓΆss__ {[@LudwigBoess](https://github.com/LudwigBoess): PIC, framework} + +πŸ‘€ __Yangyang Cai__ {[@StaticObserver](https://github.com/StaticObserver): GRPIC} + +🍡 __Benjamin Crinquand__ {[@bcrinquand](https://github.com/bcrinquand): GRPIC, cubed-sphere} -πŸ₯” __Jens Mahlmann__ {[@jmahlmann](https://github.com/jmahlmann): framework, MPI, cubed-sphere} +πŸš‚ __Evgeny Gorbunov__ {[@Alcauchy](https://github.com/Alcauchy): PIC, framework} -🐬 __Sasha Philippov__ {[@sashaph](https://github.com/sashaph): all-around} +:radio: __Siddhant Solanki__ {[@sidruns30](https://github.com/sidruns30): framework} 🀷 __Arno Vanthieghem__ {[@vanthieg](https://github.com/vanthieg): framework, PIC} diff --git a/TASKLIST.md b/TASKLIST.md deleted file mode 100644 index 069a7deb2..000000000 --- a/TASKLIST.md +++ /dev/null @@ -1,5 +0,0 @@ -### Performance improvements to try - -- [ ] removing temporary variables in interpolation -- [ ] passing by value vs const ref in metric -- [ ] return physical coords one-by-one instead of by passing full vector diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp new file mode 100644 index 000000000..98306c92b --- /dev/null +++ b/benchmark/benchmark.cpp @@ -0,0 +1,17 @@ +#include "global.h" + +#include +#include + +auto main(int argc, char* argv[]) -> int { + ntt::GlobalInitialize(argc, argv); + try { + // ... + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + GlobalFinalize(); + return 1; + } + GlobalFinalize(); + return 0; +} diff --git a/cmake/MPIConfig.cmake b/cmake/MPIConfig.cmake index b426641ec..d1bfeaab2 100644 --- a/cmake/MPIConfig.cmake +++ b/cmake/MPIConfig.cmake @@ -1,3 +1,4 @@ find_package(MPI REQUIRED) include_directories(${MPI_CXX_INCLUDE_PATH}) -add_compile_options("-D MPI_ENABLED") \ No newline at end of file +add_compile_options("-D MPI_ENABLED") + diff --git a/cmake/adios2Config.cmake b/cmake/adios2Config.cmake index 16c0c30c7..5c480f3d8 100644 --- a/cmake/adios2Config.cmake +++ b/cmake/adios2Config.cmake @@ -1,15 +1,27 @@ # ----------------------------- Adios2 settings ---------------------------- # -set(ADIOS2_BUILD_EXAMPLES OFF CACHE BOOL "Build ADIOS2 examples") +set(ADIOS2_BUILD_EXAMPLES + OFF + CACHE BOOL "Build ADIOS2 examples") # Language support -set(ADIOS2_USE_Python OFF CACHE BOOL "Use Python for ADIOS2") -set(ADIOS2_USE_Fortran OFF CACHE BOOL "Use Fortran for ADIOS2") +set(ADIOS2_USE_Python + OFF + CACHE BOOL "Use Python for ADIOS2") +set(ADIOS2_USE_Fortran + OFF + CACHE BOOL "Use Fortran for ADIOS2") # Format/compression support -set(ADIOS2_USE_ZeroMQ OFF CACHE BOOL "Use ZeroMQ for ADIOS2") +set(ADIOS2_USE_ZeroMQ + OFF + CACHE BOOL "Use ZeroMQ for ADIOS2") -set(ADIOS2_USE_MPI ${mpi} CACHE BOOL "Use MPI for ADIOS2") +set(ADIOS2_USE_MPI + ${mpi} + CACHE BOOL "Use MPI for ADIOS2") -set(ADIOS2_USE_CUDA OFF CACHE BOOL "Use CUDA for ADIOS2") +set(ADIOS2_USE_CUDA + OFF + CACHE BOOL "Use CUDA for ADIOS2") add_compile_options("-D OUTPUT_ENABLED") diff --git a/cmake/benchmark.cmake b/cmake/benchmark.cmake new file mode 100644 index 000000000..d2e8ca47c --- /dev/null +++ b/cmake/benchmark.cmake @@ -0,0 +1,24 @@ +set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) + +add_subdirectory(${SRC_DIR}/global ${CMAKE_CURRENT_BINARY_DIR}/global) +add_subdirectory(${SRC_DIR}/metrics ${CMAKE_CURRENT_BINARY_DIR}/metrics) +add_subdirectory(${SRC_DIR}/kernels ${CMAKE_CURRENT_BINARY_DIR}/kernels) +add_subdirectory(${SRC_DIR}/archetypes ${CMAKE_CURRENT_BINARY_DIR}/archetypes) +add_subdirectory(${SRC_DIR}/framework ${CMAKE_CURRENT_BINARY_DIR}/framework) + +if(${output}) + add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) + add_subdirectory(${SRC_DIR}/checkpoint ${CMAKE_CURRENT_BINARY_DIR}/checkpoint) +endif() + +set(exec benchmark.xc) +set(src ${CMAKE_CURRENT_SOURCE_DIR}/benchmark/benchmark.cpp) + +add_executable(${exec} ${src}) + +set(libs ntt_global ntt_metrics ntt_kernels ntt_archetypes ntt_framework) +if(${output}) + list(APPEND libs ntt_output ntt_checkpoint) +endif() +add_dependencies(${exec} ${libs}) +target_link_libraries(${exec} PRIVATE ${libs} stdc++fs) diff --git a/cmake/config.cmake b/cmake/config.cmake index fa18a87eb..58dd467e9 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -3,7 +3,10 @@ function(set_precision precision_name) list(FIND precisions ${precision_name} PRECISION_FOUND) if(${PRECISION_FOUND} EQUAL -1) - message(FATAL_ERROR "Invalid precision: ${precision_name}\nValid options are: ${precisions}") + message( + FATAL_ERROR + "Invalid precision: ${precision_name}\nValid options are: ${precisions}" + ) endif() if(${precision_name} STREQUAL "single") @@ -13,19 +16,31 @@ endfunction() # ---------------------------- Problem generator --------------------------- # function(set_problem_generator pgen_name) - file(GLOB_RECURSE PGENS "${CMAKE_CURRENT_SOURCE_DIR}/setups/**/pgen.hpp" "${CMAKE_CURRENT_SOURCE_DIR}/setups/pgen.hpp") + file(GLOB_RECURSE PGENS "${CMAKE_CURRENT_SOURCE_DIR}/setups/**/pgen.hpp" + "${CMAKE_CURRENT_SOURCE_DIR}/setups/pgen.hpp") foreach(PGEN ${PGENS}) get_filename_component(PGEN_NAME ${PGEN} DIRECTORY) - string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/setups/" "" PGEN_NAME ${PGEN_NAME}) - string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/setups" "" PGEN_NAME ${PGEN_NAME}) + string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/setups/" "" PGEN_NAME + ${PGEN_NAME}) + string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/setups" "" PGEN_NAME + ${PGEN_NAME}) list(APPEND PGEN_NAMES ${PGEN_NAME}) endforeach() list(FIND PGEN_NAMES ${pgen_name} PGEN_FOUND) if(NOT ${pgen_name} STREQUAL "." AND ${PGEN_FOUND} EQUAL -1) - message(FATAL_ERROR "Invalid problem generator: ${pgen_name}\nValid options are: ${PGEN_NAMES}") + message( + FATAL_ERROR + "Invalid problem generator: ${pgen_name}\nValid options are: ${PGEN_NAMES}" + ) endif() - set(PGEN ${pgen_name} PARENT_SCOPE) + set(PGEN + ${pgen_name} + PARENT_SCOPE) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/setups/${pgen_name}) - set(PGEN_FOUND TRUE PARENT_SCOPE) - set(problem_generators ${PGEN_NAMES} PARENT_SCOPE) + set(PGEN_FOUND + TRUE + PARENT_SCOPE) + set(problem_generators + ${PGEN_NAMES} + PARENT_SCOPE) endfunction() diff --git a/cmake/defaults.cmake b/cmake/defaults.cmake index f70120e0d..46b4609c5 100644 --- a/cmake/defaults.cmake +++ b/cmake/defaults.cmake @@ -1,62 +1,100 @@ # ----------------------------- Defaults ---------------------------------- # if(DEFINED ENV{Entity_ENABLE_DEBUG}) - set(default_debug $ENV{Entity_ENABLE_DEBUG} CACHE INTERNAL "Default flag for debug mode") + set(default_debug + $ENV{Entity_ENABLE_DEBUG} + CACHE INTERNAL "Default flag for debug mode") else() - set(default_debug OFF CACHE INTERNAL "Default flag for debug mode") + set(default_debug + OFF + CACHE INTERNAL "Default flag for debug mode") endif() set_property(CACHE default_debug PROPERTY TYPE BOOL) -set(default_engine "pic" CACHE INTERNAL "Default engine") -set(default_precision "single" CACHE INTERNAL "Default precision") -set(default_pgen "." CACHE INTERNAL "Default problem generator") -set(default_sr_metric "minkowski" CACHE INTERNAL "Default SR metric") -set(default_gr_metric "kerr_schild" CACHE INTERNAL "Default GR metric") +set(default_engine + "pic" + CACHE INTERNAL "Default engine") +set(default_precision + "single" + CACHE INTERNAL "Default precision") +set(default_pgen + "." + CACHE INTERNAL "Default problem generator") +set(default_sr_metric + "minkowski" + CACHE INTERNAL "Default SR metric") +set(default_gr_metric + "kerr_schild" + CACHE INTERNAL "Default GR metric") if(DEFINED ENV{Entity_ENABLE_OUTPUT}) - set(default_output $ENV{Entity_ENABLE_OUTPUT} CACHE INTERNAL "Default flag for output") + set(default_output + $ENV{Entity_ENABLE_OUTPUT} + CACHE INTERNAL "Default flag for output") else() - set(default_output OFF CACHE INTERNAL "Default flag for output") + set(default_output + OFF + CACHE INTERNAL "Default flag for output") endif() set_property(CACHE default_output PROPERTY TYPE BOOL) if(DEFINED ENV{Entity_ENABLE_GUI}) - set(default_gui $ENV{Entity_ENABLE_GUI} CACHE INTERNAL "Default flag for GUI") + set(default_gui + $ENV{Entity_ENABLE_GUI} + CACHE INTERNAL "Default flag for GUI") else() - set(default_gui OFF CACHE INTERNAL "Default flag for GUI") + set(default_gui + OFF + CACHE INTERNAL "Default flag for GUI") endif() set_property(CACHE default_gui PROPERTY TYPE BOOL) if(DEFINED ENV{Kokkos_ENABLE_CUDA}) - set(default_KOKKOS_ENABLE_CUDA $ENV{Kokkos_ENABLE_CUDA} CACHE INTERNAL "Default flag for CUDA") + set(default_KOKKOS_ENABLE_CUDA + $ENV{Kokkos_ENABLE_CUDA} + CACHE INTERNAL "Default flag for CUDA") else() - set(default_KOKKOS_ENABLE_CUDA OFF CACHE INTERNAL "Default flag for CUDA") + set(default_KOKKOS_ENABLE_CUDA + OFF + CACHE INTERNAL "Default flag for CUDA") endif() set_property(CACHE default_KOKKOS_ENABLE_CUDA PROPERTY TYPE BOOL) if(DEFINED ENV{Kokkos_ENABLE_HIP}) - set(default_KOKKOS_ENABLE_HIP $ENV{Kokkos_ENABLE_HIP} CACHE INTERNAL "Default flag for HIP") + set(default_KOKKOS_ENABLE_HIP + $ENV{Kokkos_ENABLE_HIP} + CACHE INTERNAL "Default flag for HIP") else() - set(default_KOKKOS_ENABLE_HIP OFF CACHE INTERNAL "Default flag for HIP") + set(default_KOKKOS_ENABLE_HIP + OFF + CACHE INTERNAL "Default flag for HIP") endif() set_property(CACHE default_KOKKOS_ENABLE_HIP PROPERTY TYPE BOOL) if(DEFINED ENV{Kokkos_ENABLE_OPENMP}) - set(default_KOKKOS_ENABLE_OPENMP $ENV{Kokkos_ENABLE_OPENMP} CACHE INTERNAL "Default flag for OpenMP") + set(default_KOKKOS_ENABLE_OPENMP + $ENV{Kokkos_ENABLE_OPENMP} + CACHE INTERNAL "Default flag for OpenMP") else() - set(default_KOKKOS_ENABLE_OPENMP OFF CACHE INTERNAL "Default flag for OpenMP") + set(default_KOKKOS_ENABLE_OPENMP + OFF + CACHE INTERNAL "Default flag for OpenMP") endif() set_property(CACHE default_KOKKOS_ENABLE_OPENMP PROPERTY TYPE BOOL) if(DEFINED ENV{Entity_ENABLE_MPI}) - set(default_mpi $ENV{Entity_ENABLE_MPI} CACHE INTERNAL "Default flag for MPI") + set(default_mpi + $ENV{Entity_ENABLE_MPI} + CACHE INTERNAL "Default flag for MPI") else() - set(default_mpi OFF CACHE INTERNAL "Default flag for MPI") + set(default_mpi + OFF + CACHE INTERNAL "Default flag for MPI") endif() set_property(CACHE default_mpi PROPERTY TYPE BOOL) diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index b143befdf..06a3e6a1f 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -1,24 +1,34 @@ -set(Kokkos_REPOSITORY https://github.com/kokkos/kokkos.git CACHE STRING "Kokkos repository") -set(plog_REPOSITORY https://github.com/SergiusTheBest/plog.git CACHE STRING "plog repository") -set(toml11_REPOSITORY https://github.com/ToruNiina/toml11 CACHE STRING "toml11 repository") +set(Kokkos_REPOSITORY + https://github.com/kokkos/kokkos.git + CACHE STRING "Kokkos repository") +set(plog_REPOSITORY + https://github.com/SergiusTheBest/plog.git + CACHE STRING "plog repository") -# set (adios2_REPOSITORY https://github.com/ornladios/ADIOS2.git CACHE STRING "ADIOS2 repository") +# set (adios2_REPOSITORY https://github.com/ornladios/ADIOS2.git CACHE STRING +# "ADIOS2 repository") function(check_internet_connection) if(OFFLINE STREQUAL "ON") - set(FETCHCONTENT_FULLY_DISCONNECTED ON CACHE BOOL "Connection status") + set(FETCHCONTENT_FULLY_DISCONNECTED + ON + CACHE BOOL "Connection status") message(STATUS "${Blue}Offline mode.${ColorReset}") else() execute_process( COMMAND ping 8.8.8.8 -c 2 RESULT_VARIABLE NO_CONNECTION - OUTPUT_QUIET - ) + OUTPUT_QUIET) if(NO_CONNECTION GREATER 0) - set(FETCHCONTENT_FULLY_DISCONNECTED ON CACHE BOOL "Connection status") - message(STATUS "${Red}No internet connection. Fetching disabled.${ColorReset}") + set(FETCHCONTENT_FULLY_DISCONNECTED + ON + CACHE BOOL "Connection status") + message( + STATUS "${Red}No internet connection. Fetching disabled.${ColorReset}") else() - set(FETCHCONTENT_FULLY_DISCONNECTED OFF CACHE BOOL "Connection status") + set(FETCHCONTENT_FULLY_DISCONNECTED + OFF + CACHE BOOL "Connection status") message(STATUS "${Green}Internet connection established.${ColorReset}") endif() endif() @@ -30,66 +40,92 @@ function(find_or_fetch_dependency package_name header_only) endif() if(NOT ${package_name}_FOUND) - if(DEFINED ${package_name}_REPOSITORY AND NOT FETCHCONTENT_FULLY_DISCONNECTED) + if(DEFINED ${package_name}_REPOSITORY AND NOT + FETCHCONTENT_FULLY_DISCONNECTED) # fetching package - message(STATUS "${Blue}${package_name} not found. Fetching from ${${package_name}_REPOSITORY}${ColorReset}") + message( + STATUS + "${Blue}${package_name} not found. Fetching from ${${package_name}_REPOSITORY}${ColorReset}" + ) include(FetchContent) if(${package_name} STREQUAL "Kokkos") FetchContent_Declare( ${package_name} GIT_REPOSITORY ${${package_name}_REPOSITORY} - GIT_TAG 4.3.00 - ) + GIT_TAG 4.3.00) else() - FetchContent_Declare( - ${package_name} - GIT_REPOSITORY ${${package_name}_REPOSITORY} - ) + FetchContent_Declare(${package_name} + GIT_REPOSITORY ${${package_name}_REPOSITORY}) endif() FetchContent_MakeAvailable(${package_name}) set(lower_pckg_name ${package_name}) string(TOLOWER ${lower_pckg_name} lower_pckg_name) - set(${package_name}_SRC ${CMAKE_CURRENT_BINARY_DIR}/_deps/${lower_pckg_name}-src CACHE PATH "Path to ${package_name} src") - set(${package_name}_FETCHED TRUE CACHE BOOL "Whether ${package_name} was fetched") + set(${package_name}_SRC + ${CMAKE_CURRENT_BINARY_DIR}/_deps/${lower_pckg_name}-src + CACHE PATH "Path to ${package_name} src") + set(${package_name}_FETCHED + TRUE + CACHE BOOL "Whether ${package_name} was fetched") message(STATUS "${Green}${package_name} fetched.${ColorReset}") else() # get as submodule - message(STATUS "${Yellow}${package_name} not found. Using as submodule.${ColorReset}") + message( + STATUS + "${Yellow}${package_name} not found. Using as submodule.${ColorReset}" + ) - set(${package_name}_FETCHED FALSE CACHE BOOL "Whether ${package_name} was fetched") + set(${package_name}_FETCHED + FALSE + CACHE BOOL "Whether ${package_name} was fetched") if(NOT FETCHCONTENT_FULLY_DISCONNECTED) - message(STATUS "${GREEN}Updating ${package_name} submodule.${ColorReset}") + message( + STATUS "${GREEN}Updating ${package_name} submodule.${ColorReset}") execute_process( - COMMAND git submodule update --init --remote ${CMAKE_CURRENT_SOURCE_DIR}/extern/${package_name} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - ) + COMMAND git submodule update --init --remote + ${CMAKE_CURRENT_SOURCE_DIR}/extern/${package_name} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extern/${package_name} extern/${package_name}) - set(${package_name}_SRC ${CMAKE_CURRENT_SOURCE_DIR}/extern/${package_name} CACHE PATH "Path to ${package_name} src") - set(${package_name}_BUILD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/build/extern/${package_name} CACHE PATH "Path to ${package_name} build") + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extern/${package_name} + extern/${package_name}) + set(${package_name}_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/extern/${package_name} + CACHE PATH "Path to ${package_name} src") + set(${package_name}_BUILD_DIR + ${CMAKE_CURRENT_SOURCE_DIR}/build/extern/${package_name} + CACHE PATH "Path to ${package_name} build") endif() else() message(STATUS "${Green}${package_name} found.${ColorReset}") - set(${package_name}_FETCHED FALSE CACHE BOOL "Whether ${package_name} was fetched") - set(${package_name}_VERSION ${${package_name}_VERSION} CACHE INTERNAL "${package_name} version") + set(${package_name}_FETCHED + FALSE + CACHE BOOL "Whether ${package_name} was fetched") + set(${package_name}_VERSION + ${${package_name}_VERSION} + CACHE INTERNAL "${package_name} version") endif() if(${package_name} STREQUAL "adios2") if(NOT DEFINED adios2_VERSION OR adios2_VERSION STREQUAL "") - get_directory_property(adios2_VERSION DIRECTORY ${adios2_BUILD_DIR} DEFINITION ADIOS2_VERSION) - set(adios2_VERSION ${adios2_VERSION} CACHE INTERNAL "ADIOS2 version") + get_directory_property(adios2_VERSION DIRECTORY ${adios2_BUILD_DIR} + DEFINITION ADIOS2_VERSION) + set(adios2_VERSION + ${adios2_VERSION} + CACHE INTERNAL "ADIOS2 version") endif() endif() if(${package_name} STREQUAL "Kokkos") if(NOT DEFINED Kokkos_VERSION OR Kokkos_VERSION STREQUAL "") - get_directory_property(Kokkos_VERSION DIRECTORY ${Kokkos_SRC} DEFINITION Kokkos_VERSION) - set(Kokkos_VERSION ${Kokkos_VERSION} CACHE INTERNAL "Kokkos version") + get_directory_property(Kokkos_VERSION DIRECTORY ${Kokkos_SRC} DEFINITION + Kokkos_VERSION) + set(Kokkos_VERSION + ${Kokkos_VERSION} + CACHE INTERNAL "Kokkos version") endif() endif() endfunction() diff --git a/cmake/kokkosConfig.cmake b/cmake/kokkosConfig.cmake index 8928253ae..63c32622d 100644 --- a/cmake/kokkosConfig.cmake +++ b/cmake/kokkosConfig.cmake @@ -1,19 +1,41 @@ # ----------------------------- Kokkos settings ---------------------------- # if(${DEBUG} STREQUAL "OFF") - set(Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION ON CACHE BOOL "Kokkos aggressive vectorization") - set(Kokkos_ENABLE_COMPILER_WARNINGS OFF CACHE BOOL "Kokkos compiler warnings") - set(Kokkos_ENABLE_DEBUG OFF CACHE BOOL "Kokkos debug") - set(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK OFF CACHE BOOL "Kokkos debug bounds check") + set(Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION + ON + CACHE BOOL "Kokkos aggressive vectorization") + set(Kokkos_ENABLE_COMPILER_WARNINGS + OFF + CACHE BOOL "Kokkos compiler warnings") + set(Kokkos_ENABLE_DEBUG + OFF + CACHE BOOL "Kokkos debug") + set(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK + OFF + CACHE BOOL "Kokkos debug bounds check") else() - set(Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION OFF CACHE BOOL "Kokkos aggressive vectorization") - set(Kokkos_ENABLE_COMPILER_WARNINGS ON CACHE BOOL "Kokkos compiler warnings") - set(Kokkos_ENABLE_DEBUG ON CACHE BOOL "Kokkos debug") - set(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK ON CACHE BOOL "Kokkos debug bounds check") + set(Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION + OFF + CACHE BOOL "Kokkos aggressive vectorization") + set(Kokkos_ENABLE_COMPILER_WARNINGS + ON + CACHE BOOL "Kokkos compiler warnings") + set(Kokkos_ENABLE_DEBUG + ON + CACHE BOOL "Kokkos debug") + set(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK + ON + CACHE BOOL "Kokkos debug bounds check") endif() -set(Kokkos_ENABLE_HIP ${default_KOKKOS_ENABLE_HIP} CACHE BOOL "Enable HIP") -set(Kokkos_ENABLE_CUDA ${default_KOKKOS_ENABLE_CUDA} CACHE BOOL "Enable CUDA") -set(Kokkos_ENABLE_OPENMP ${default_KOKKOS_ENABLE_OPENMP} CACHE BOOL "Enable OpenMP") +set(Kokkos_ENABLE_HIP + ${default_KOKKOS_ENABLE_HIP} + CACHE BOOL "Enable HIP") +set(Kokkos_ENABLE_CUDA + ${default_KOKKOS_ENABLE_CUDA} + CACHE BOOL "Enable CUDA") +set(Kokkos_ENABLE_OPENMP + ${default_KOKKOS_ENABLE_OPENMP} + CACHE BOOL "Enable OpenMP") # set memory space if(${Kokkos_ENABLE_CUDA}) @@ -51,7 +73,11 @@ add_compile_options("-D HostExeSpace=${HOST_EXE_SPACE}") add_compile_options("-D HostMemSpace=${HOST_MEM_SPACE}") if(${BUILD_TESTING} STREQUAL "OFF") - set(Kokkos_ENABLE_TESTS OFF CACHE BOOL "Kokkos tests") + set(Kokkos_ENABLE_TESTS + OFF + CACHE BOOL "Kokkos tests") else() - set(Kokkos_ENABLE_TESTS ON CACHE BOOL "Kokkos tests") + set(Kokkos_ENABLE_TESTS + ON + CACHE BOOL "Kokkos tests") endif() diff --git a/cmake/report.cmake b/cmake/report.cmake index 6733dbcd4..13dde63f7 100644 --- a/cmake/report.cmake +++ b/cmake/report.cmake @@ -18,10 +18,22 @@ function(PadTo Text Padding Target Result) set(${rt} "${rt}") endif() - set(${Result} "${rt}" PARENT_SCOPE) + set(${Result} + "${rt}" + PARENT_SCOPE) endfunction() -function(PrintChoices Label Flag Choices Value Default Color OutputString Multiline Padding) +function( + PrintChoices + Label + Flag + Choices + Value + Default + Color + OutputString + Multiline + Padding) list(LENGTH "${Choices}" nchoices) set(rstring "") set(counter 0) @@ -35,14 +47,14 @@ function(PrintChoices Label Flag Choices Value Default Color OutputString Multil endif() set(rstring_i "${rstring_i}:") - PadTo("${rstring_i}" " " ${Padding} rstring_i) + padto("${rstring_i}" " " ${Padding} rstring_i) else() set(rstring_i "") if(NOT ${counter} EQUAL ${nchoices}) if(${Multiline} EQUAL 1) set(rstring_i "${rstring_i}\n") - PadTo("${rstring_i}" " " ${Padding} rstring_i) + padto("${rstring_i}" " " ${Padding} rstring_i) else() set(rstring_i "${rstring_i}/") endif() @@ -71,13 +83,16 @@ function(PrintChoices Label Flag Choices Value Default Color OutputString Multil set(rstring_i "") endforeach() - set(${OutputString} "${rstring}" PARENT_SCOPE) + set(${OutputString} + "${rstring}" + PARENT_SCOPE) endfunction() set(ON_OFF_VALUES "ON" "OFF") if(${PGEN_FOUND}) - PrintChoices("Problem generator" + printchoices( + "Problem generator" "pgen" "${problem_generators}" ${PGEN} @@ -85,11 +100,11 @@ if(${PGEN_FOUND}) "${Blue}" PGEN_REPORT 1 - 36 - ) + 36) endif() -PrintChoices("Precision" +printchoices( + "Precision" "precision" "${precisions}" ${precision} @@ -97,9 +112,9 @@ PrintChoices("Precision" "${Blue}" PRECISION_REPORT 1 - 36 -) -PrintChoices("Output" + 36) +printchoices( + "Output" "output" "${ON_OFF_VALUES}" ${output} @@ -107,9 +122,9 @@ PrintChoices("Output" "${Green}" OUTPUT_REPORT 0 - 36 -) -PrintChoices("GUI" + 36) +printchoices( + "GUI" "gui" "${ON_OFF_VALUES}" ${gui} @@ -117,9 +132,9 @@ PrintChoices("GUI" "${Green}" GUI_REPORT 0 - 36 -) -PrintChoices("MPI" + 36) +printchoices( + "MPI" "mpi" "${ON_OFF_VALUES}" ${mpi} @@ -127,9 +142,9 @@ PrintChoices("MPI" "${Green}" MPI_REPORT 0 - 42 -) -PrintChoices("Debug mode" + 42) +printchoices( + "Debug mode" "DEBUG" "${ON_OFF_VALUES}" ${DEBUG} @@ -137,10 +152,10 @@ PrintChoices("Debug mode" "${Green}" DEBUG_REPORT 0 - 42 -) + 42) -PrintChoices("CUDA" +printchoices( + "CUDA" "Kokkos_ENABLE_CUDA" "${ON_OFF_VALUES}" ${Kokkos_ENABLE_CUDA} @@ -148,9 +163,9 @@ PrintChoices("CUDA" "${Green}" CUDA_REPORT 0 - 42 -) -PrintChoices("HIP" + 42) +printchoices( + "HIP" "Kokkos_ENABLE_HIP" "${ON_OFF_VALUES}" ${Kokkos_ENABLE_HIP} @@ -158,9 +173,9 @@ PrintChoices("HIP" "${Green}" HIP_REPORT 0 - 42 -) -PrintChoices("OpenMP" + 42) +printchoices( + "OpenMP" "Kokkos_ENABLE_OPENMP" "${ON_OFF_VALUES}" ${Kokkos_ENABLE_OPENMP} @@ -168,10 +183,10 @@ PrintChoices("OpenMP" "${Green}" OPENMP_REPORT 0 - 42 -) + 42) -PrintChoices("C++ compiler" +printchoices( + "C++ compiler" "CMAKE_CXX_COMPILER" "${CMAKE_CXX_COMPILER} v${CMAKE_CXX_COMPILER_VERSION}" "${CMAKE_CXX_COMPILER} v${CMAKE_CXX_COMPILER_VERSION}" @@ -179,10 +194,10 @@ PrintChoices("C++ compiler" "${ColorReset}" CXX_COMPILER_REPORT 0 - 42 -) + 42) -PrintChoices("C compiler" +printchoices( + "C compiler" "CMAKE_C_COMPILER" "${CMAKE_C_COMPILER} v${CMAKE_C_COMPILER_VERSION}" "${CMAKE_C_COMPILER} v${CMAKE_C_COMPILER_VERSION}" @@ -190,21 +205,24 @@ PrintChoices("C compiler" "${ColorReset}" C_COMPILER_REPORT 0 - 42 -) + 42) get_cmake_property(_variableNames VARIABLES) -foreach (_variableName ${_variableNames}) - string(REGEX MATCH "Kokkos_ARCH_*" _isMatched ${_variableName}) - if(_isMatched) - get_property(isSet CACHE ${_variableName} PROPERTY VALUE) - if(isSet STREQUAL "ON") - string(REGEX REPLACE "Kokkos_ARCH_" "" ARCH ${_variableName}) - break() - endif() +foreach(_variableName ${_variableNames}) + string(REGEX MATCH "Kokkos_ARCH_*" _isMatched ${_variableName}) + if(_isMatched) + get_property( + isSet + CACHE ${_variableName} + PROPERTY VALUE) + if(isSet STREQUAL "ON") + string(REGEX REPLACE "Kokkos_ARCH_" "" ARCH ${_variableName}) + break() endif() + endif() endforeach() -PrintChoices("Architecture" +printchoices( + "Architecture" "Kokkos_ARCH_*" "${ARCH}" "${ARCH}" @@ -212,8 +230,7 @@ PrintChoices("Architecture" "${ColorReset}" ARCH_REPORT 0 - 42 -) + 42) if(${Kokkos_ENABLE_CUDA}) if("${CMAKE_CUDA_COMPILER}" STREQUAL "") @@ -225,11 +242,15 @@ if(${Kokkos_ENABLE_CUDA}) string(STRIP ${CUDACOMP} CUDACOMP) message(STATUS "CUDA compiler: ${CUDACOMP}") - execute_process(COMMAND bash -c "${CUDACOMP} --version | grep release | sed -e 's/.*release //' -e 's/,.*//'" + execute_process( + COMMAND + bash -c + "${CUDACOMP} --version | grep release | sed -e 's/.*release //' -e 's/,.*//'" OUTPUT_VARIABLE CUDACOMP_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) - PrintChoices("CUDA compiler" + printchoices( + "CUDA compiler" "CMAKE_CUDA_COMPILER" "${CUDACOMP}" "${CUDACOMP}" @@ -237,28 +258,37 @@ if(${Kokkos_ENABLE_CUDA}) "${ColorReset}" CUDA_COMPILER_REPORT 0 - 42 - ) + 42) endif() -if (${Kokkos_ENABLE_HIP}) - execute_process(COMMAND bash -c "hipcc --version | grep HIP | cut -d ':' -f 2 | tr -d ' '" +if(${Kokkos_ENABLE_HIP}) + execute_process( + COMMAND bash -c "hipcc --version | grep HIP | cut -d ':' -f 2 | tr -d ' '" OUTPUT_VARIABLE ROCM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) endif() set(DOT_SYMBOL "${ColorReset}.") -set(DOTTED_LINE_SYMBOL "${ColorReset}. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ") +set(DOTTED_LINE_SYMBOL + "${ColorReset}. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " +) -set(DASHED_LINE_SYMBOL "${ColorReset}....................................................................... ") +set(DASHED_LINE_SYMBOL + "${ColorReset}....................................................................... " +) if(NOT ${PROJECT_VERSION_TWEAK} EQUAL 0) - set(VERSION_SYMBOL "v${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}-rc${PROJECT_VERSION_TWEAK}") + set(VERSION_SYMBOL + "v${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}-rc${PROJECT_VERSION_TWEAK}" + ) else() - set(VERSION_SYMBOL "v${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} ") + set(VERSION_SYMBOL + "v${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} " + ) endif() -message("${Blue} __ __ +message( + "${Blue} __ __ /\\ \\__ __/\\ \\__ __ ___\\ \\ _\\/\\_\\ \\ _\\ __ __ / __ \\ / __ \\ \\ \\/\\/\\ \\ \\ \\/ /\\ \\/\\ \\ @@ -299,7 +329,7 @@ message(" ${DEBUG_REPORT}") message("${DASHED_LINE_SYMBOL}\nDependencies") -if (NOT "${CUDACOMP_VERSION}" STREQUAL "") +if(NOT "${CUDACOMP_VERSION}" STREQUAL "") message(" - CUDA:\tv${CUDACOMP_VERSION}") elseif(NOT "${ROCM_VERSION}" STREQUAL "") message(" - ROCm:\tv${ROCM_VERSION}") @@ -312,7 +342,8 @@ if(${HDF5_FOUND}) message(" - HDF5:\tv${HDF5_VERSION}") endif() -message("${DASHED_LINE_SYMBOL} +message( + "${DASHED_LINE_SYMBOL} Notes ${Dim}: Set flags with `cmake ... -D ${Magenta}${ColorReset}${Dim}=`, the ${Underline}default${ColorReset}${Dim} value : will be used unless the variable is explicitly set.${ColorReset} diff --git a/cmake/styling.cmake b/cmake/styling.cmake index fb9cfcc87..70c448fff 100644 --- a/cmake/styling.cmake +++ b/cmake/styling.cmake @@ -23,20 +23,17 @@ if(NOT WIN32) set(StrikeEnd "${Esc}[0m") endif() -# message("This is normal") -# message("${Red}This is Red${ColorReset}") -# message("${Green}This is Green${ColorReset}") -# message("${Yellow}This is Yellow${ColorReset}") -# message("${Blue}This is Blue${ColorReset}") -# message("${Magenta}This is Magenta${ColorReset}") -# message("${Cyan}This is Cyan${ColorReset}") -# message("${White}This is White${ColorReset}") -# message("${BoldRed}This is BoldRed${ColorReset}") -# message("${BoldGreen}This is BoldGreen${ColorReset}") -# message("${BoldYellow}This is BoldYellow${ColorReset}") -# message("${BoldBlue}This is BoldBlue${ColorReset}") +# message("This is normal") message("${Red}This is Red${ColorReset}") +# message("${Green}This is Green${ColorReset}") message("${Yellow}This is +# Yellow${ColorReset}") message("${Blue}This is Blue${ColorReset}") +# message("${Magenta}This is Magenta${ColorReset}") message("${Cyan}This is +# Cyan${ColorReset}") message("${White}This is White${ColorReset}") +# message("${BoldRed}This is BoldRed${ColorReset}") message("${BoldGreen}This is +# BoldGreen${ColorReset}") message("${BoldYellow}This is +# BoldYellow${ColorReset}") message("${BoldBlue}This is BoldBlue${ColorReset}") # message("${BoldMagenta}This is BoldMagenta${ColorReset}") -# message("${BoldCyan}This is BoldCyan${ColorReset}") -# message("${BoldWhite}This is BoldWhite\n\n${ColorReset}") +# message("${BoldCyan}This is BoldCyan${ColorReset}") message("${BoldWhite}This +# is BoldWhite\n\n${ColorReset}") + +# message() -# message() \ No newline at end of file diff --git a/cmake/tests.cmake b/cmake/tests.cmake index f1342f679..ca8ee69c4 100644 --- a/cmake/tests.cmake +++ b/cmake/tests.cmake @@ -8,27 +8,37 @@ add_subdirectory(${SRC_DIR}/metrics ${CMAKE_CURRENT_BINARY_DIR}/metrics) add_subdirectory(${SRC_DIR}/kernels ${CMAKE_CURRENT_BINARY_DIR}/kernels) add_subdirectory(${SRC_DIR}/archetypes ${CMAKE_CURRENT_BINARY_DIR}/archetypes) add_subdirectory(${SRC_DIR}/framework ${CMAKE_CURRENT_BINARY_DIR}/framework) -if (${output}) +if(${output}) add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) add_subdirectory(${SRC_DIR}/checkpoint ${CMAKE_CURRENT_BINARY_DIR}/checkpoint) endif() -if (${mpi}) +if(${mpi}) # tests with mpi - if (${output}) - add_subdirectory(${SRC_DIR}/output/tests ${CMAKE_CURRENT_BINARY_DIR}/output/tests) - add_subdirectory(${SRC_DIR}/framework/tests ${CMAKE_CURRENT_BINARY_DIR}/checkpoint/tests) - add_subdirectory(${SRC_DIR}/framework/tests ${CMAKE_CURRENT_BINARY_DIR}/framework/tests) + if(${output}) + add_subdirectory(${SRC_DIR}/output/tests + ${CMAKE_CURRENT_BINARY_DIR}/output/tests) + add_subdirectory(${SRC_DIR}/checkpoint/tests + ${CMAKE_CURRENT_BINARY_DIR}/checkpoint/tests) + add_subdirectory(${SRC_DIR}/framework/tests + ${CMAKE_CURRENT_BINARY_DIR}/framework/tests) endif() else() # tests without mpi - add_subdirectory(${SRC_DIR}/global/tests ${CMAKE_CURRENT_BINARY_DIR}/global/tests) - add_subdirectory(${SRC_DIR}/metrics/tests ${CMAKE_CURRENT_BINARY_DIR}/metrics/tests) - add_subdirectory(${SRC_DIR}/kernels/tests ${CMAKE_CURRENT_BINARY_DIR}/kernels/tests) - add_subdirectory(${SRC_DIR}/archetypes/tests ${CMAKE_CURRENT_BINARY_DIR}/archetypes/tests) - add_subdirectory(${SRC_DIR}/framework/tests ${CMAKE_CURRENT_BINARY_DIR}/framework/tests) - if (${output}) - add_subdirectory(${SRC_DIR}/output/tests ${CMAKE_CURRENT_BINARY_DIR}/output/tests) - add_subdirectory(${SRC_DIR}/checkpoint/tests ${CMAKE_CURRENT_BINARY_DIR}/checkpoint/tests) + add_subdirectory(${SRC_DIR}/global/tests + ${CMAKE_CURRENT_BINARY_DIR}/global/tests) + add_subdirectory(${SRC_DIR}/metrics/tests + ${CMAKE_CURRENT_BINARY_DIR}/metrics/tests) + add_subdirectory(${SRC_DIR}/kernels/tests + ${CMAKE_CURRENT_BINARY_DIR}/kernels/tests) + add_subdirectory(${SRC_DIR}/archetypes/tests + ${CMAKE_CURRENT_BINARY_DIR}/archetypes/tests) + add_subdirectory(${SRC_DIR}/framework/tests + ${CMAKE_CURRENT_BINARY_DIR}/framework/tests) + if(${output}) + add_subdirectory(${SRC_DIR}/output/tests + ${CMAKE_CURRENT_BINARY_DIR}/output/tests) + add_subdirectory(${SRC_DIR}/checkpoint/tests + ${CMAKE_CURRENT_BINARY_DIR}/checkpoint/tests) endif() endif() diff --git a/dev/nix/adios2.nix b/dev/nix/adios2.nix new file mode 100644 index 000000000..7218f894f --- /dev/null +++ b/dev/nix/adios2.nix @@ -0,0 +1,67 @@ +{ + pkgs ? import { }, + hdf5, + mpi, +}: + +let + name = "adios2"; + version = "2.10.2"; + cmakeFlags = { + CMAKE_CXX_STANDARD = "17"; + CMAKE_CXX_EXTENSIONS = "OFF"; + CMAKE_POSITION_INDEPENDENT_CODE = "TRUE"; + BUILD_SHARED_LIBS = "ON"; + ADIOS2_USE_HDF5 = if hdf5 then "ON" else "OFF"; + ADIOS2_USE_Python = "OFF"; + ADIOS2_USE_Fortran = "OFF"; + ADIOS2_USE_ZeroMQ = "OFF"; + BUILD_TESTING = "OFF"; + ADIOS2_BUILD_EXAMPLES = "OFF"; + ADIOS2_USE_MPI = if mpi then "ON" else "OFF"; + CMAKE_BUILD_TYPE = "Release"; + } // (if !mpi then { ADIOS2_HAVE_HDF5_VOL = "OFF"; } else { }); +in +pkgs.stdenv.mkDerivation { + pname = "${name}${if hdf5 then "-hdf5" else ""}${if mpi then "-mpi" else ""}"; + version = "${version}"; + src = pkgs.fetchgit { + url = "https://github.com/ornladios/ADIOS2/"; + rev = "v${version}"; + sha256 = "sha256-NVyw7xoPutXeUS87jjVv1YxJnwNGZAT4QfkBLzvQbwg="; + }; + + nativeBuildInputs = with pkgs; [ + cmake + perl + ]; + + propagatedBuildInputs = + [ + pkgs.gcc13 + ] + ++ (if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5 ]) else [ ]) + ++ (if mpi then [ pkgs.openmpi ] else [ ]); + + configurePhase = '' + cmake -B build $src ${ + pkgs.lib.attrsets.foldlAttrs ( + acc: key: value: + acc + " -D ${key}=${value}" + ) "" cmakeFlags + } + ''; + + buildPhase = '' + cmake --build build -j + ''; + + installPhase = '' + sed -i '/if(CMAKE_INSTALL_COMPONENT/,/^[[:space:]]&endif()$/d' build/cmake/install/post/cmake_install.cmake + cmake --install build --prefix $out + chmod +x build/cmake/install/post/generate-adios2-config.sh + sh build/cmake/install/post/generate-adios2-config.sh $out + ''; + + enableParallelBuilding = true; +} diff --git a/dev/nix/kokkos.nix b/dev/nix/kokkos.nix new file mode 100644 index 000000000..6271604c5 --- /dev/null +++ b/dev/nix/kokkos.nix @@ -0,0 +1,63 @@ +{ + pkgs ? import { }, + arch, + gpu, +}: + +let + name = "kokkos"; + version = "4.5.01"; + compilerPkgs = { + "HIP" = with pkgs.rocmPackages; [ + rocm-core + clr + rocthrust + rocprim + rocminfo + rocm-smi + ]; + "NONE" = [ + pkgs.gcc13 + ]; + }; + cmakeFlags = { + "HIP" = [ + "-D CMAKE_C_COMPILER=hipcc" + "-D CMAKE_CXX_COMPILER=hipcc" + ]; + "NONE" = [ ]; + }; + getArch = + _: + if gpu != "NONE" && arch == "NATIVE" then + throw "Please specify an architecture when the GPU support is enabled. Available architectures: https://kokkos.org/kokkos-core-wiki/keywords.html#architectures" + else + arch; + +in +pkgs.stdenv.mkDerivation { + pname = "${name}"; + version = "${version}"; + src = pkgs.fetchgit { + url = "https://github.com/kokkos/kokkos/"; + rev = "${version}"; + sha256 = "sha256-cI2p+6J+8BRV5fXTDxxHTfh6P5PeeLUiF73o5zVysHQ="; + }; + + nativeBuildInputs = with pkgs; [ + cmake + ]; + + propagatedBuildInputs = compilerPkgs.${gpu}; + + cmakeFlags = [ + "-D CMAKE_CXX_STANDARD=17" + "-D CMAKE_CXX_EXTENSIONS=OFF" + "-D CMAKE_POSITION_INDEPENDENT_CODE=TRUE" + "-D Kokkos_ARCH_${getArch { }}=ON" + (if gpu != "none" then "-D Kokkos_ENABLE_${gpu}=ON" else "") + "-D CMAKE_BUILD_TYPE=Release" + ] ++ cmakeFlags.${gpu}; + + enableParallelBuilding = true; +} diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix new file mode 100644 index 000000000..01d80298b --- /dev/null +++ b/dev/nix/shell.nix @@ -0,0 +1,95 @@ +{ + pkgs ? import { }, + gpu ? "NONE", + arch ? "NATIVE", + hdf5 ? true, + mpi ? false, +}: + +let + gpuUpper = pkgs.lib.toUpper gpu; + archUpper = pkgs.lib.toUpper arch; + name = "entity-dev"; + adios2Pkg = (pkgs.callPackage ./adios2.nix { inherit pkgs mpi hdf5; }); + kokkosPkg = ( + pkgs.callPackage ./kokkos.nix { + inherit pkgs; + arch = archUpper; + gpu = gpuUpper; + } + ); + envVars = { + compiler = rec { + NONE = { + CXX = "g++"; + CC = "gcc"; + }; + HIP = { + CXX = "hipcc"; + CC = "hipcc"; + }; + CUDA = NONE; + }; + kokkos = { + HIP = { + Kokkos_ENABLE_HIP = "ON"; + }; + CUDA = { + Kokkos_ENABLE_CUDA = "ON"; + }; + NONE = { }; + }; + }; +in +pkgs.mkShell { + name = "${name}-env"; + nativeBuildInputs = with pkgs; [ + zlib + cmake + + clang-tools + + adios2Pkg + kokkosPkg + + python312 + python312Packages.jupyter + + cmake-format + cmake-lint + neocmakelsp + black + pyright + taplo + vscode-langservers-extracted + ]; + + LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath ([ + pkgs.stdenv.cc.cc + pkgs.zlib + ]); + + shellHook = + '' + BLUE='\033[0;34m' + NC='\033[0m' + + echo "following environment variables are set:" + '' + + pkgs.lib.concatStringsSep "" ( + pkgs.lib.mapAttrsToList ( + category: vars: + pkgs.lib.concatStringsSep "" ( + pkgs.lib.mapAttrsToList (name: value: '' + export ${name}=${value} + echo -e " ''\${BLUE}${name}''\${NC}=${value}" + '') vars.${gpuUpper} + ) + ) envVars + ) + + '' + echo "" + echo -e "${name} nix-shell activated" + ''; + +} diff --git a/dev/runners/Dockerfile.runner.cpu b/dev/runners/Dockerfile.runner.cpu new file mode 100644 index 000000000..3c2cf4926 --- /dev/null +++ b/dev/runners/Dockerfile.runner.cpu @@ -0,0 +1,73 @@ +FROM ubuntu:22.04 + +ARG DEBIAN_FRONTEND=noninteractive + +# upgrade +RUN apt-get update && apt-get upgrade -y + +# cmake & build tools +RUN apt-get remove -y --purge cmake && \ + apt-get install -y sudo wget curl build-essential openmpi-bin openmpi-common libopenmpi-dev && \ + wget "https://github.com/Kitware/CMake/releases/download/v3.29.6/cmake-3.29.6-linux-x86_64.tar.gz" -P /opt && \ + tar xvf /opt/cmake-3.29.6-linux-x86_64.tar.gz -C /opt && \ + rm /opt/cmake-3.29.6-linux-x86_64.tar.gz +ENV PATH=/opt/cmake-3.29.6-linux-x86_64/bin:$PATH + +# adios2 +RUN apt-get update && apt-get install -y git libhdf5-openmpi-dev && \ + git clone https://github.com/ornladios/ADIOS2.git /opt/adios2-src && \ + cd /opt/adios2-src && \ + cmake -B build \ + -D CMAKE_CXX_STANDARD=17 \ + -D CMAKE_CXX_EXTENSIONS=OFF \ + -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ + -D BUILD_SHARED_LIBS=ON \ + -D ADIOS2_USE_HDF5=ON \ + -D ADIOS2_USE_Python=OFF \ + -D ADIOS2_USE_Fortran=OFF \ + -D ADIOS2_USE_ZeroMQ=OFF \ + -D BUILD_TESTING=OFF \ + -D ADIOS2_BUILD_EXAMPLES=OFF \ + -D ADIOS2_USE_MPI=ON \ + -D ADIOS2_HAVE_HDF5_VOL=OFF \ + -D CMAKE_INSTALL_PREFIX=/opt/adios2 && \ + cmake --build build -j && \ + cmake --install build && \ + rm -rf /opt/adios2-src + +ENV HDF5_ROOT=/usr +ENV ADIOS2_DIR=/opt/adios2 +ENV PATH=/opt/adios2/bin:$PATH + +# cleanup +RUN apt-get clean && \ + apt-get autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/cache/* && \ + rm -rf /var/lib/log/* && \ + rm -rf /var/lib/apt/lists/* + +ARG USER=runner +RUN useradd -ms /usr/bin/zsh $USER && \ + usermod -aG sudo $USER && \ + echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers + +USER $USER +ARG HOME=/home/$USER +WORKDIR $HOME + +# gh runner +ARG RUNNER_VERSION=2.317.0 +RUN mkdir actions-runner +WORKDIR $HOME/actions-runner + +RUN curl -o actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz \ + -L https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz && \ + tar xzf ./actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz && \ + sudo ./bin/installdependencies.sh + +ADD start.sh start.sh +RUN sudo chown $USER:$USER start.sh && \ + sudo chmod +x start.sh + +ENTRYPOINT ["./start.sh"] diff --git a/dev/runners/README.md b/dev/runners/README.md index 08d0cd176..957898fa7 100644 --- a/dev/runners/README.md +++ b/dev/runners/README.md @@ -19,3 +19,9 @@ docker run -e TOKEN= -e LABEL=nvidia-gpu --runtime=nvidia --gpus=all -dt docker build -t ghrunner:amd -f Dockerfile.runner.rocm . docker run -e TOKEN= -e LABEL=amd-gpu --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video -dt ghrunner:amd ``` + +### CPU + +```sh +docker build -t ghrunner:cpu -f Dockerfile.runner.cpu . +``` diff --git a/extern/Kokkos b/extern/Kokkos index 5fc08a9a7..175257a51 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit 5fc08a9a7da14d8530f8c7035d008ef63ddb4e5c +Subproject commit 175257a51ff29a0059ec48bcd233ee096b2c0438 diff --git a/extern/adios2 b/extern/adios2 index e524dce1b..a19dad6ce 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit e524dce1b72ccf75422cea6342ee2d64a6a87964 +Subproject commit a19dad6cecb00319825f20fd9f455ebbab903d34 diff --git a/extern/plog b/extern/plog index 85a871b13..e21baecd4 160000 --- a/extern/plog +++ b/extern/plog @@ -1 +1 @@ -Subproject commit 85a871b13be0bd1a9e0110744fa60cc9bd1e8380 +Subproject commit e21baecd4753f14da64ede979c5a19302618b752 diff --git a/input.example.toml b/input.example.toml index 06225024a..a49067811 100644 --- a/input.example.toml +++ b/input.example.toml @@ -90,11 +90,11 @@ # Boundary conditions for fields: # @required # @type: 1/2/3-size array of string tuples, each of size 1 or 2 - # @valid: "PERIODIC", "ABSORB", "ATMOSPHERE", "CUSTOM", "HORIZON" - # @example: [["CUSTOM", "ABSORB"]] (for 2D spherical [[rmin, rmax]]) - # @note: When periodic in any of the directions, you should only set one value [..., ["PERIODIC"], ...] - # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]) [["ATMOSPHERE", "ABSORB"]] - # @note: In GR, the horizon boundary is set automatically (only specify bc @ rmax): [["ABSORB"]] + # @valid: "PERIODIC", "MATCH", "FIXED", "ATMOSPHERE", "CUSTOM", "HORIZON" + # @example: [["CUSTOM", "MATCH"]] (for 2D spherical [[rmin, rmax]]) + # @note: When periodic in any of the directions, you should only set one value: [..., ["PERIODIC"], ...] + # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]): [["ATMOSPHERE", "MATCH"]] + # @note: In GR, the horizon boundary is set automatically (only specify bc @ rmax): [["MATCH"]] fields = "" # Boundary conditions for fields: # @required @@ -105,9 +105,9 @@ # @note: In spherical, bondaries in theta/phi are set automatically (only specify bc @ [rmin, rmax]) [["ATMOSPHERE", "ABSORB"]] # @note: In GR, the horizon boundary is set automatically (only specify bc @ rmax): [["ABSORB"]] particles = "" - - [grid.boundaries.absorb] - # Size of the absorption layer in physical (code) units: + + [grid.boundaries.match] + # Size of the matching layer for fields in physical (code) units: # @type: float # @default: 1% of the domain size (in shortest dimension) # @note: In spherical, this is the size of the layer in r from the outer wall @@ -118,8 +118,16 @@ # @default: 1.0 coeff = "" + [grid.boundaries.absorb] + # Size of the absorption layer for particles in physical (code) units: + # @type: float + # @default: 1% of the domain size (in shortest dimension) + # @note: In spherical, this is the size of the layer in r from the outer wall + # @note: In cartesian, this is the same for all dimensions where applicable + ds = "" + [grid.boundaries.atmosphere] - # @required: if ATMOSPHERE is one of the boundaries + # @required: if ATMOSPHERE is one of the boundaries # Temperature of the atmosphere in units of m0 c^2 # @type: float temperature = "" @@ -210,7 +218,7 @@ # @type: float: ~1 # @default: 1.0 correction = "" - + # @inferred: # - dt [= CFL * dx0] # @brief: timestep duration @@ -252,12 +260,11 @@ # @type: bool # @default: false use_weights = "" - # Timesteps between particle re-sorting: + # Timesteps between particle re-sorting (removing dead particles): # @type: unsigned int # @default: 100 - # @note: When MPI is enable, particles are sorted every step. - # @note: When `sort_interval` == 0, the sorting is disabled. - sort_interval = "" + # @note: set to 0 to disable re-sorting + clear_interval = "" # @inferred: # - nspec @@ -320,6 +327,10 @@ # @default: -1.0 (disabled) # @note: When `interval_time` < 0, the output is controlled by `interval`, otherwise by `interval_time` interval_time = "" + # Whether to output each timestep into separate files: + # @type: bool + # @default: true + separate_files = "" [output.fields] # Toggle for the field output: @@ -329,7 +340,7 @@ # Field quantities to output: # @type: array of strings # @valid: fields: "E", "B", "J", "divE" - # @valid: moments: "Rho", "Charge", "N", "Nppc", "T0i", "Tij" + # @valid: moments: "Rho", "Charge", "N", "Nppc", "T0i", "Tij", "Vi" # @valid: for GR: "D", "H", "divD", "A" # @default: [] # @note: For T, you can use unspecified indices, e.g., Tij, T0i, or specific ones, e.g., Ttt, T00, T02, T23 @@ -340,10 +351,6 @@ # @type: array of strings # @default: [] custom = "" - # @NOT_IMPLEMENTED: Stride for the output of fields: - # @type: unsigned short: > 1 - # @default: 1 - stride = "" # Smoothing window for the output of moments (e.g., "Rho", "Charge", "T", etc.): # @type: unsigned short # @default: 0 @@ -357,6 +364,11 @@ # @default: -1.0 (use `output.interval_time`) # @note: When `interval_time` < 0, the output is controlled by `interval`, otherwise by `interval_time` interval_time = "" + # Downsample factor for the output of fields: + # @type: array of unsigned int >= 1 + # @default: [1, 1, 1] + # @note: The output is downsampled by the given factors in each direction + downsampling = "" [output.particles] # Toggle for the particles output: diff --git a/setups/srpic/monopole/monopole.toml b/legacy/_monopole/monopole.toml similarity index 54% rename from setups/srpic/monopole/monopole.toml rename to legacy/_monopole/monopole.toml index 169837489..cf735fce8 100644 --- a/setups/srpic/monopole/monopole.toml +++ b/legacy/_monopole/monopole.toml @@ -1,31 +1,31 @@ [simulation] - name = "monopole" - engine = "srpic" + name = "monopole" + engine = "srpic" runtime = 60.0 [grid] resolution = [2048, 1024] - extent = [[1.0, 50.0]] + extent = [[1.0, 50.0]] [grid.metric] metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] - + [grid.boundaries.absorb] ds = 1.0 [grid.boundaries.atmosphere] temperature = 0.1 - density = 10.0 - height = 0.02 - species = [1, 2] - ds = 2.0 - + density = 10.0 + height = 0.02 + species = [1, 2] + ds = 2.0 + [scales] - larmor0 = 2e-5 + larmor0 = 2e-5 skindepth0 = 0.01 [algorithms] @@ -36,38 +36,38 @@ [algorithms.gca] e_ovr_b_max = 0.9 - larmor_max = 1.0 + larmor_max = 1.0 [particles] - ppc0 = 5.0 - use_weights = true - sort_interval = 100 + ppc0 = 5.0 + use_weights = true + clear_interval = 100 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 + label = "e-" + mass = 1.0 + charge = -1.0 maxnpart = 1e8 - pusher = "Boris,GCA" + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 + label = "e+" + mass = 1.0 + charge = 1.0 maxnpart = 1e8 - pusher = "Boris,GCA" + pusher = "Boris,GCA" [setup] - Bsurf = 1.0 + Bsurf = 1.0 period = 60.0 [output] format = "hdf5" - + [output.fields] interval_time = 0.1 - quantities = ["N_1", "N_2", "E", "B", "T00"] - mom_smooth = 2 + quantities = ["N_1", "N_2", "E", "B", "T00"] + mom_smooth = 2 [output.particles] enable = false @@ -76,5 +76,5 @@ enable = false [diagnostics] - interval = 50 + interval = 50 colored_stdout = true diff --git a/setups/srpic/monopole/pgen.hpp b/legacy/_monopole/pgen.hpp similarity index 97% rename from setups/srpic/monopole/pgen.hpp rename to legacy/_monopole/pgen.hpp index 389a6c6f7..ed8877b71 100644 --- a/setups/srpic/monopole/pgen.hpp +++ b/legacy/_monopole/pgen.hpp @@ -86,7 +86,7 @@ namespace user { inline PGen() {} - auto FieldDriver(real_t time) const -> DriveFields { + auto AtmFields(real_t time) const -> DriveFields { return DriveFields { time, Bsurf, Rstar, Omega }; } }; diff --git a/legacy/benchmark.cpp b/legacy/benchmark.cpp new file mode 100644 index 000000000..54fc17cf9 --- /dev/null +++ b/legacy/benchmark.cpp @@ -0,0 +1,273 @@ +#include "enums.h" +#include "global.h" + +#include "utils/error.h" + +#include "metrics/metric_base.h" +#include "metrics/minkowski.h" + +#include "framework/containers/species.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" + +#include + +#include "framework/domain/communications.cpp" +#include "mpi.h" +#include "mpi-ext.h" + +#define TIMER_START(label) \ + Kokkos::fence(); \ + auto start_##label = std::chrono::high_resolution_clock::now(); + +#define TIMER_STOP(label) \ + Kokkos::fence(); \ + auto stop_##label = std::chrono::high_resolution_clock::now(); \ + auto duration_##label = std::chrono::duration_cast( \ + stop_##label - start_##label) \ + .count(); \ + std::cout << "Timer [" #label "]: " << duration_##label << " microseconds" \ + << std::endl; + +/* + Test to check the performance of the new particle allocation scheme + - Create a metadomain object main() + - Set npart + initialize tags InitializeParticleArrays() + - 'Push' the particles by randomly updating the tags PushParticles() + - Communicate particles to neighbors and time the communication + - Compute the time taken for best of N iterations for the communication + */ +using namespace ntt; + +// Set npart and set the particle tags to alive +template +void InitializeParticleArrays(Domain& domain, const int npart) { + raise::ErrorIf(npart > domain.species[0].maxnpart(), + "Npart cannot be greater than maxnpart", + HERE); + const auto nspecies = domain.species.size(); + for (int i_spec = 0; i_spec < nspecies; i_spec++) { + domain.species[i_spec].set_npart(npart); + domain.species[i_spec].SyncHostDevice(); + auto& this_tag = domain.species[i_spec].tag; + Kokkos::parallel_for( + "Initialize particles", + npart, + Lambda(const std::size_t i) { this_tag(i) = ParticleTag::alive; }); + } + return; +} + +// Randomly reassign tags to particles for a fraction of particles +template +void PushParticles(Domain& domain, + const double send_frac, + const int seed_ind, + const int seed_tag) { + raise::ErrorIf(send_frac > 1.0, "send_frac cannot be greater than 1.0", HERE); + const auto nspecies = domain.species.size(); + for (int i_spec = 0; i_spec < nspecies; i_spec++) { + domain.species[i_spec].set_unsorted(); + const auto nparticles = domain.species[i_spec].npart(); + const auto nparticles_to_send = static_cast(send_frac * nparticles); + // Generate random indices to send + // Kokkos::Random_XorShift64_Pool<> random_pool(seed_ind); + Kokkos::View indices_to_send("indices_to_send", nparticles_to_send); + Kokkos::fill_random(indices_to_send, domain.random_pool, 0, nparticles); + // Generate random tags to send + // Kokkos::Random_XorShift64_Pool<> random_pool_tag(seed_tag); + Kokkos::View tags_to_send("tags_to_send", nparticles_to_send); + Kokkos::fill_random(tags_to_send, + domain.random_pool, + 0, + domain.species[i_spec].ntags()); + auto& this_tag = domain.species[i_spec].tag; + Kokkos::parallel_for( + "Push particles", + nparticles_to_send, + Lambda(const std::size_t i) { + auto prtl_to_send = indices_to_send(i); + auto tag_to_send = tags_to_send(i); + this_tag(prtl_to_send) = tag_to_send; + }); + domain.species[i_spec].npart_per_tag(); + domain.species[i_spec].SyncHostDevice(); + } + return; +} + +auto main(int argc, char* argv[]) -> int { + GlobalInitialize(argc, argv); + { + /* + MPI checks + */ + printf("Compile time check:\n"); +#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT + printf("This MPI library has CUDA-aware support.\n", MPIX_CUDA_AWARE_SUPPORT); +#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT + printf("This MPI library does not have CUDA-aware support.\n"); +#else + printf("This MPI library cannot determine if there is CUDA-aware support.\n"); +#endif /* MPIX_CUDA_AWARE_SUPPORT */ +printf("Run time check:\n"); +#if defined(MPIX_CUDA_AWARE_SUPPORT) + if (1 == MPIX_Query_cuda_support()) { + printf("This MPI library has CUDA-aware support.\n"); + } else { + printf("This MPI library does not have CUDA-aware support.\n"); + } +#else /* !defined(MPIX_CUDA_AWARE_SUPPORT) */ + printf("This MPI library cannot determine if there is CUDA-aware support.\n"); +#endif /* MPIX_CUDA_AWARE_SUPPORT */ + + /* + Test to send and receive Kokkos arrays + */ + int sender_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &sender_rank); + + int neighbor_rank = 0; + if (sender_rank == 0) { + neighbor_rank = 1; + } + else if (sender_rank == 1) { + neighbor_rank = 0; + } + else { + raise::Error("This test is only for 2 ranks", HERE); + } + Kokkos::View send_array("send_array", 10); + Kokkos::View recv_array("recv_array", 10); + if (sender_rank == 0) { + Kokkos::deep_copy(send_array, 10); + } + else { + Kokkos::deep_copy(send_array, 20); + } + + auto send_array_host = Kokkos::create_mirror_view(send_array); + Kokkos::deep_copy(send_array_host, send_array); + auto host_recv_array = Kokkos::create_mirror_view(recv_array); + + MPI_Sendrecv(send_array.data(), send_array.extent(0), MPI_INT, neighbor_rank, 0, + recv_array.data(), recv_array.extent(0), MPI_INT, neighbor_rank, 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + // Print the received array + Kokkos::deep_copy(host_recv_array, recv_array); + for (int i = 0; i < 10; ++i) { + printf("Rank %d: Received %d\n", sender_rank, host_recv_array(i)); + } + + + std::cout << "Constructing the domain" << std::endl; + // Create a Metadomain object + const unsigned int ndomains = 2; + const std::vector global_decomposition = { + {-1, -1, -1} + }; + const std::vector global_ncells = { 32, 32, 32 }; + const boundaries_t global_extent = { + {0.0, 3.0}, + {0.0, 3.0}, + {0.0, 3.0} + }; + const boundaries_t global_flds_bc = { + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC}, + {FldsBC::PERIODIC, FldsBC::PERIODIC} + }; + const boundaries_t global_prtl_bc = { + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, + {PrtlBC::PERIODIC, PrtlBC::PERIODIC} + }; + const std::map metric_params = {}; + const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; + const double npart_to_send_frac = 0.01; + const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); + auto species = ntt::ParticleSpecies(1u, + "test_e", + 1.0f, + 1.0f, + maxnpart, + ntt::PrtlPusher::BORIS, + false, + ntt::Cooling::NONE); + auto metadomain = Metadomain>( + ndomains, + global_decomposition, + global_ncells, + global_extent, + global_flds_bc, + global_prtl_bc, + metric_params, + { species }); + + const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; + auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); + auto timers = timer::Timers { { "Communication" }, nullptr, false }; + InitializeParticleArrays(*local_domain, npart); + // Timers for both the communication routines + auto total_time_elapsed_old = 0; + auto total_time_elapsed_new = 0; + + int seed_ind = 0; + int seed_tag = 1; + Kokkos::fence(); + + for (int i = 0; i < 10; ++i) { + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort new + Kokkos::fence(); + auto start_new = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticlesBuffer(*local_domain, &timers); + auto stop_new = std::chrono::high_resolution_clock::now(); + auto duration_new = std::chrono::duration_cast( + stop_new - start_new) + .count(); + total_time_elapsed_new += duration_new; + Kokkos::fence(); + } + { + // Push + seed_ind += 2; + seed_tag += 3; + PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); + // Sort old + Kokkos::fence(); + auto start_old = std::chrono::high_resolution_clock::now(); + metadomain.CommunicateParticles(*local_domain, &timers); + auto stop_old = std::chrono::high_resolution_clock::now(); + auto duration_old = std::chrono::duration_cast( + stop_old - start_old) + .count(); + total_time_elapsed_old += duration_old; + Kokkos::fence(); + } + } + printf("Total time elapsed for old: %f us : %f us/prtl\n", + total_time_elapsed_old / 10.0, + total_time_elapsed_old / 10.0 * 1000 / npart); + printf("Total time elapsed for new: %f us : %f us/prtl\n", + total_time_elapsed_new / 10.0, + total_time_elapsed_new / 10.0 * 1000 / npart); + } + GlobalFinalize(); + return 0; +} + +/* + Buggy behavior: + Consider a single domain with a single mpi rank + Particle tag arrays is set to [0, 0, 1, 1, 2, 3, ...] for a single domain + CommunicateParticles() discounts all the dead particles and reassigns the + other tags to alive + CommunicateParticlesBuffer() only keeps the ParticleTag::Alive particles + and discounts the rest +*/ diff --git a/setups/CMakeLists.txt b/setups/CMakeLists.txt index b1753d7b8..c92c1d345 100644 --- a/setups/CMakeLists.txt +++ b/setups/CMakeLists.txt @@ -1,23 +1,25 @@ # ------------------------------ # @defines: ntt_pgen [INTERFACE] +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_pgen [required] +# +# * ntt_pgen [required] +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] # ------------------------------ add_library(ntt_pgen INTERFACE) -target_link_libraries(ntt_pgen INTERFACE - ntt_global - ntt_framework - ntt_archetypes - ntt_kernels -) +target_link_libraries(ntt_pgen INTERFACE ntt_global ntt_framework + ntt_archetypes ntt_kernels) target_include_directories(ntt_pgen - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/${PGEN} -) \ No newline at end of file + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/${PGEN}) + diff --git a/setups/srpic/em_vacuum/em_vacuum.toml b/setups/srpic/em_vacuum/em_vacuum.toml index 156c8d308..23381b1c6 100644 --- a/setups/srpic/em_vacuum/em_vacuum.toml +++ b/setups/srpic/em_vacuum/em_vacuum.toml @@ -1,21 +1,21 @@ [simulation] - name = "em_vacuum" - engine = "srpic" + name = "em_vacuum" + engine = "srpic" runtime = 2.0 [grid] resolution = [256, 512] - extent = [[-1.0, 1.0], [-2.0, 2.0]] + extent = [[-1.0, 1.0], [-2.0, 2.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 0.1 + larmor0 = 0.1 skindepth0 = 0.01 [algorithms] @@ -28,12 +28,12 @@ [setup] amplitude = 1.0 - kx1 = 1 - kx2 = 1 - kx3 = 0 - + kx1 = 1 + kx2 = 1 + kx3 = 0 + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.1 [output.fields] diff --git a/setups/srpic/langmuir/langmuir.toml b/setups/srpic/langmuir/langmuir.toml index 2f3520fc5..b054a940d 100644 --- a/setups/srpic/langmuir/langmuir.toml +++ b/setups/srpic/langmuir/langmuir.toml @@ -1,21 +1,21 @@ [simulation] - name = "langmuir" - engine = "srpic" + name = "langmuir" + engine = "srpic" runtime = 1.0 [grid] resolution = [2048, 512] - extent = [[0.0, 1.0], [0.0, 0.25]] + extent = [[0.0, 1.0], [0.0, 0.25]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 0.1 + larmor0 = 0.1 skindepth0 = 0.01 [algorithms] @@ -28,24 +28,24 @@ ppc0 = 14.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e7 + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e7 + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 [setup] vmax = 0.1 - nx1 = 4 - nx2 = 2 - + nx1 = 4 + nx2 = 2 + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.0025 [output.fields] diff --git a/setups/srpic/magnetar/magnetar.toml b/setups/srpic/magnetar/magnetar.toml index 2a2260af5..fab2eb01c 100644 --- a/setups/srpic/magnetar/magnetar.toml +++ b/setups/srpic/magnetar/magnetar.toml @@ -1,17 +1,17 @@ [simulation] - name = "magnetar" - engine = "srpic" + name = "magnetar" + engine = "srpic" runtime = 50.0 [grid] - resolution = [2048,1024] - extent = [[1.0, 400.0]] + resolution = [2048, 1024] + extent = [[1.0, 400.0]] [grid.metric] metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] [grid.boundaries.absorb] @@ -19,13 +19,13 @@ [grid.boundaries.atmosphere] temperature = 0.1 - density = 40.0 - height = 0.02 - species = [1, 2] - ds = 0.5 + density = 40.0 + height = 0.02 + species = [1, 2] + ds = 0.5 [scales] - larmor0 = 1e-5 + larmor0 = 1e-5 skindepth0 = 0.01 [algorithms] @@ -36,59 +36,59 @@ [algorithms.gca] e_ovr_b_max = 0.9 - larmor_max = 100.0 + larmor_max = 100.0 [particles] - ppc0 = 4.0 - use_weights = true - sort_interval = 100 + ppc0 = 4.0 + use_weights = true + clear_interval = 100 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 5e7 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 5e7 + pusher = "Boris,GCA" [setup] - Bsurf = 1.0 - omega = 0.0125 - pp_thres = 10.0 + Bsurf = 1.0 + omega = 0.0125 + pp_thres = 10.0 gamma_pairs = 1.75 [output] @@ -96,7 +96,7 @@ [output.fields] interval_time = 0.5 - quantities = ["N_1", "N_2", "N_3", "N_4", "N_5", "N_6", "B", "E", "J"] + quantities = ["N_1", "N_2", "N_3", "N_4", "N_5", "N_6", "B", "E", "J"] [output.particles] enable = false diff --git a/setups/srpic/magnetar/pgen.hpp b/setups/srpic/magnetar/pgen.hpp index cacbb7c9a..10f98ea5d 100644 --- a/setups/srpic/magnetar/pgen.hpp +++ b/setups/srpic/magnetar/pgen.hpp @@ -85,7 +85,7 @@ namespace user { const real_t Bsurf, Rstar, Omega, gamma_pairs, pp_thres; InitFields init_flds; - + inline PGen(const SimulationParams& p, const Metadomain& m) : arch::ProblemGenerator(p) , global_domain { m } @@ -94,12 +94,11 @@ namespace user { , Omega { p.template get("setup.omega") } , pp_thres { p.template get("setup.pp_thres") } , gamma_pairs { p.template get("setup.gamma_pairs") } - , init_flds { Bsurf, Rstar } { - } + , init_flds { Bsurf, Rstar } {} inline PGen() {} - auto FieldDriver(real_t time) const -> DriveFields { + auto AtmFields(real_t time) const -> DriveFields { const real_t omega_t = Omega * ((ONE - math::tanh((static_cast(5.0) - time) * HALF)) * @@ -109,170 +108,172 @@ namespace user { return DriveFields { time, Bsurf, Rstar, omega_t }; } - void CustomPostStep(std::size_t , long double, Domain& domain) { - - // Ad-hoc PP kernel - { - - auto& species2_e = domain.species[2]; - auto& species2_p = domain.species[3]; - auto& species3_e = domain.species[4]; - auto& species3_p = domain.species[5]; - auto metric = domain.mesh.metric; - auto pp_thres_ = this->pp_thres; - auto gamma_pairs_ = this->gamma_pairs; - - for (std::size_t s { 0 }; s < 6; ++s) { - if (s == 1) { - continue; - } - - array_t elec_ind("elec_ind"); - array_t pos_ind("pos_ind"); - - auto offset_e = species3_e.npart(); - auto offset_p = species3_p.npart(); - - auto ux1_e = species3_e.ux1; - auto ux2_e = species3_e.ux2; - auto ux3_e = species3_e.ux3; - auto i1_e = species3_e.i1; - auto i2_e = species3_e.i2; - auto dx1_e = species3_e.dx1; - auto dx2_e = species3_e.dx2; - auto phi_e = species3_e.phi; - auto weight_e = species3_e.weight; - auto tag_e = species3_e.tag; - - auto ux1_p = species3_p.ux1; - auto ux2_p = species3_p.ux2; - auto ux3_p = species3_p.ux3; - auto i1_p = species3_p.i1; - auto i2_p = species3_p.i2; - auto dx1_p = species3_p.dx1; - auto dx2_p = species3_p.dx2; - auto phi_p = species3_p.phi; - auto weight_p = species3_p.weight; - auto tag_p = species3_p.tag; - - if (s == 0) { - - offset_e = species2_e.npart(); - offset_p = species2_p.npart(); - - ux1_e = species2_e.ux1; - ux2_e = species2_e.ux2; - ux3_e = species2_e.ux3; - i1_e = species2_e.i1; - i2_e = species2_e.i2; - dx1_e = species2_e.dx1; - dx2_e = species2_e.dx2; - phi_e = species2_e.phi; - weight_e = species2_e.weight; - tag_e = species2_e.tag; - - ux1_p = species2_p.ux1; - ux2_p = species2_p.ux2; - ux3_p = species2_p.ux3; - i1_p = species2_p.i1; - i2_p = species2_p.i2; - dx1_p = species2_p.dx1; - dx2_p = species2_p.dx2; - phi_p = species2_p.phi; - weight_p = species2_p.weight; - tag_p = species2_p.tag; - - } - - auto& species = domain.species[s]; - auto ux1 = species.ux1; - auto ux2 = species.ux2; - auto ux3 = species.ux3; - auto i1 = species.i1; - auto i2 = species.i2; - auto dx1 = species.dx1; - auto dx2 = species.dx2; - auto phi = species.phi; - auto weight = species.weight; - auto tag = species.tag; - - Kokkos::parallel_for( - "InjectPairs", species.rangeActiveParticles(), Lambda(index_t p) { - if (tag(p) == ParticleTag::dead) { - return; - } + auto MatchFields(real_t) const -> InitFields { + return InitFields { Bsurf, Rstar }; + } - auto px = ux1(p); - auto py = ux2(p); - auto pz = ux3(p); - auto gamma = math::sqrt(ONE + SQR(px) + SQR(py) + SQR(pz)); - - const coord_t xCd{ - static_cast(i1(p)) + dx1(p), - static_cast(i2(p)) + dx2(p)}; - - coord_t xPh { ZERO }; - metric.template convert(xCd, xPh); - - if ((gamma > pp_thres_) && (math::sin(xPh[1]) > 0.1)) { - - auto new_gamma = gamma - 2.0 * gamma_pairs_; - auto new_fac = math::sqrt(SQR(new_gamma) - 1.0) / math::sqrt(SQR(gamma) - 1.0); - auto pair_fac = math::sqrt(SQR(gamma_pairs_) - 1.0) / math::sqrt(SQR(gamma) - 1.0); - - auto elec_p = Kokkos::atomic_fetch_add(&elec_ind(), 1); - auto pos_p = Kokkos::atomic_fetch_add(&pos_ind(), 1); - - i1_e(elec_p + offset_e) = i1(p); - dx1_e(elec_p + offset_e) = dx1(p); - i2_e(elec_p + offset_e) = i2(p); - dx2_e(elec_p + offset_e) = dx2(p); - phi_e(elec_p + offset_e) = phi(p); - ux1_e(elec_p + offset_e) = px * pair_fac; - ux2_e(elec_p + offset_e) = py * pair_fac; - ux3_e(elec_p + offset_e) = pz * pair_fac; - weight_e(elec_p + offset_e) = weight(p); - tag_e(elec_p + offset_e) = ParticleTag::alive; - - i1_p(pos_p + offset_p) = i1(p); - dx1_p(pos_p + offset_p) = dx1(p); - i2_p(pos_p + offset_p) = i2(p); - dx2_p(pos_p + offset_p) = dx2(p); - phi_p(pos_p + offset_p) = phi(p); - ux1_p(pos_p + offset_p) = px * pair_fac; - ux2_p(pos_p + offset_p) = py * pair_fac; - ux3_p(pos_p + offset_p) = pz * pair_fac; - weight_p(pos_p + offset_p) = weight(p); - tag_p(pos_p + offset_p) = ParticleTag::alive; - - ux1(p) *= new_fac; - ux2(p) *= new_fac; - ux3(p) *= new_fac; - } + void CustomPostStep(std::size_t, long double, Domain& domain) { - }); + // Ad-hoc PP kernel + { - auto elec_ind_h = Kokkos::create_mirror(elec_ind); - Kokkos::deep_copy(elec_ind_h, elec_ind); - if (s == 0) { - species2_e.set_npart(offset_e + elec_ind_h()); - } else { - species3_e.set_npart(offset_e + elec_ind_h()); - } + auto& species2_e = domain.species[2]; + auto& species2_p = domain.species[3]; + auto& species3_e = domain.species[4]; + auto& species3_p = domain.species[5]; + auto metric = domain.mesh.metric; + auto pp_thres_ = this->pp_thres; + auto gamma_pairs_ = this->gamma_pairs; - auto pos_ind_h = Kokkos::create_mirror(pos_ind); - Kokkos::deep_copy(pos_ind_h, pos_ind); - if (s == 0) { - species2_p.set_npart(offset_p + pos_ind_h()); - } else { - species3_p.set_npart(offset_p + pos_ind_h()); - } + for (std::size_t s { 0 }; s < 6; ++s) { + if (s == 1) { + continue; + } + array_t elec_ind("elec_ind"); + array_t pos_ind("pos_ind"); + + auto offset_e = species3_e.npart(); + auto offset_p = species3_p.npart(); + + auto ux1_e = species3_e.ux1; + auto ux2_e = species3_e.ux2; + auto ux3_e = species3_e.ux3; + auto i1_e = species3_e.i1; + auto i2_e = species3_e.i2; + auto dx1_e = species3_e.dx1; + auto dx2_e = species3_e.dx2; + auto phi_e = species3_e.phi; + auto weight_e = species3_e.weight; + auto tag_e = species3_e.tag; + + auto ux1_p = species3_p.ux1; + auto ux2_p = species3_p.ux2; + auto ux3_p = species3_p.ux3; + auto i1_p = species3_p.i1; + auto i2_p = species3_p.i2; + auto dx1_p = species3_p.dx1; + auto dx2_p = species3_p.dx2; + auto phi_p = species3_p.phi; + auto weight_p = species3_p.weight; + auto tag_p = species3_p.tag; + + if (s == 0) { + + offset_e = species2_e.npart(); + offset_p = species2_p.npart(); + + ux1_e = species2_e.ux1; + ux2_e = species2_e.ux2; + ux3_e = species2_e.ux3; + i1_e = species2_e.i1; + i2_e = species2_e.i2; + dx1_e = species2_e.dx1; + dx2_e = species2_e.dx2; + phi_e = species2_e.phi; + weight_e = species2_e.weight; + tag_e = species2_e.tag; + + ux1_p = species2_p.ux1; + ux2_p = species2_p.ux2; + ux3_p = species2_p.ux3; + i1_p = species2_p.i1; + i2_p = species2_p.i2; + dx1_p = species2_p.dx1; + dx2_p = species2_p.dx2; + phi_p = species2_p.phi; + weight_p = species2_p.weight; + tag_p = species2_p.tag; + } + + auto& species = domain.species[s]; + auto ux1 = species.ux1; + auto ux2 = species.ux2; + auto ux3 = species.ux3; + auto i1 = species.i1; + auto i2 = species.i2; + auto dx1 = species.dx1; + auto dx2 = species.dx2; + auto phi = species.phi; + auto weight = species.weight; + auto tag = species.tag; + + Kokkos::parallel_for( + "InjectPairs", + species.rangeActiveParticles(), + Lambda(index_t p) { + if (tag(p) == ParticleTag::dead) { + return; + } + + auto px = ux1(p); + auto py = ux2(p); + auto pz = ux3(p); + auto gamma = math::sqrt(ONE + SQR(px) + SQR(py) + SQR(pz)); + + const coord_t xCd { static_cast(i1(p)) + dx1(p), + static_cast(i2(p)) + dx2(p) }; + + coord_t xPh { ZERO }; + metric.template convert(xCd, xPh); + + if ((gamma > pp_thres_) && (math::sin(xPh[1]) > 0.1)) { + + auto new_gamma = gamma - 2.0 * gamma_pairs_; + auto new_fac = math::sqrt(SQR(new_gamma) - 1.0) / + math::sqrt(SQR(gamma) - 1.0); + auto pair_fac = math::sqrt(SQR(gamma_pairs_) - 1.0) / + math::sqrt(SQR(gamma) - 1.0); + + auto elec_p = Kokkos::atomic_fetch_add(&elec_ind(), 1); + auto pos_p = Kokkos::atomic_fetch_add(&pos_ind(), 1); + + i1_e(elec_p + offset_e) = i1(p); + dx1_e(elec_p + offset_e) = dx1(p); + i2_e(elec_p + offset_e) = i2(p); + dx2_e(elec_p + offset_e) = dx2(p); + phi_e(elec_p + offset_e) = phi(p); + ux1_e(elec_p + offset_e) = px * pair_fac; + ux2_e(elec_p + offset_e) = py * pair_fac; + ux3_e(elec_p + offset_e) = pz * pair_fac; + weight_e(elec_p + offset_e) = weight(p); + tag_e(elec_p + offset_e) = ParticleTag::alive; + + i1_p(pos_p + offset_p) = i1(p); + dx1_p(pos_p + offset_p) = dx1(p); + i2_p(pos_p + offset_p) = i2(p); + dx2_p(pos_p + offset_p) = dx2(p); + phi_p(pos_p + offset_p) = phi(p); + ux1_p(pos_p + offset_p) = px * pair_fac; + ux2_p(pos_p + offset_p) = py * pair_fac; + ux3_p(pos_p + offset_p) = pz * pair_fac; + weight_p(pos_p + offset_p) = weight(p); + tag_p(pos_p + offset_p) = ParticleTag::alive; + + ux1(p) *= new_fac; + ux2(p) *= new_fac; + ux3(p) *= new_fac; + } + }); + + auto elec_ind_h = Kokkos::create_mirror(elec_ind); + Kokkos::deep_copy(elec_ind_h, elec_ind); + if (s == 0) { + species2_e.set_npart(offset_e + elec_ind_h()); + } else { + species3_e.set_npart(offset_e + elec_ind_h()); + } + auto pos_ind_h = Kokkos::create_mirror(pos_ind); + Kokkos::deep_copy(pos_ind_h, pos_ind); + if (s == 0) { + species2_p.set_npart(offset_p + pos_ind_h()); + } else { + species3_p.set_npart(offset_p + pos_ind_h()); } - } // Ad-hoc PP kernel } - + } // Ad-hoc PP kernel + } }; } // namespace user diff --git a/setups/srpic/magnetosphere/magnetosphere.toml b/setups/srpic/magnetosphere/magnetosphere.toml index 34e04b02d..4c7c9117d 100644 --- a/setups/srpic/magnetosphere/magnetosphere.toml +++ b/setups/srpic/magnetosphere/magnetosphere.toml @@ -1,31 +1,31 @@ [simulation] - name = "magnetosphere" - engine = "srpic" + name = "magnetosphere" + engine = "srpic" runtime = 60.0 [grid] resolution = [2048, 1024] - extent = [[1.0, 50.0]] + extent = [[1.0, 50.0]] [grid.metric] metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "ABSORB"]] particles = [["ATMOSPHERE", "ABSORB"]] - + [grid.boundaries.absorb] ds = 1.0 [grid.boundaries.atmosphere] temperature = 0.1 - density = 10.0 - height = 0.02 - species = [1, 2] - ds = 2.0 - + density = 10.0 + height = 0.02 + species = [1, 2] + ds = 2.0 + [scales] - larmor0 = 2e-5 + larmor0 = 2e-5 skindepth0 = 0.01 [algorithms] @@ -36,37 +36,37 @@ [algorithms.gca] e_ovr_b_max = 0.9 - larmor_max = 1.0 + larmor_max = 1.0 [particles] - ppc0 = 5.0 - use_weights = true - sort_interval = 100 + ppc0 = 5.0 + use_weights = true + clear_interval = 100 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e8 - pusher = "Boris,GCA" + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e8 + pusher = "Boris,GCA" [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e8 - pusher = "Boris,GCA" + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e8 + pusher = "Boris,GCA" [setup] - Bsurf = 1.0 + Bsurf = 1.0 period = 60.0 [output] format = "hdf5" - + [output.fields] interval_time = 0.1 - quantities = ["N_1", "N_2", "E", "B", "T00"] + quantities = ["N_1", "N_2", "E", "B", "T00"] [output.particles] enable = false @@ -75,5 +75,5 @@ enable = false [diagnostics] - interval = 50 + interval = 50 colored_stdout = true diff --git a/setups/srpic/magnetosphere/pgen.hpp b/setups/srpic/magnetosphere/pgen.hpp index 681c4d6d1..64fe13cfe 100644 --- a/setups/srpic/magnetosphere/pgen.hpp +++ b/setups/srpic/magnetosphere/pgen.hpp @@ -86,9 +86,13 @@ namespace user { inline PGen() {} - auto FieldDriver(real_t time) const -> DriveFields { + auto AtmFields(real_t time) const -> DriveFields { return DriveFields { time, Bsurf, Rstar, Omega }; } + + auto MatchFields(real_t) const -> InitFields { + return InitFields { Bsurf, Rstar }; + } }; } // namespace user diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index f07b99878..b8f169521 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -5,15 +5,66 @@ #include "global.h" #include "arch/traits.h" +#include "utils/error.h" +#include "utils/numeric.h" #include "archetypes/energy_dist.h" #include "archetypes/particle_injector.h" #include "archetypes/problem_generator.h" #include "framework/domain/metadomain.h" +#include + namespace user { using namespace ntt; + template + struct InitFields { + /* + Sets up magnetic and electric field components for the simulation. + Must satisfy E = -v x B for Lorentz Force to be zero. + + @param bmag: magnetic field scaling + @param btheta: magnetic field polar angle + @param bphi: magnetic field azimuthal angle + @param drift_ux: drift velocity in the x direction + */ + InitFields(real_t bmag, real_t btheta, real_t bphi, real_t drift_ux) + : Bmag { bmag } + , Btheta { btheta * static_cast(convert::deg2rad) } + , Bphi { bphi * static_cast(convert::deg2rad) } + , Vx { drift_ux } {} + + // magnetic field components + Inline auto bx1(const coord_t&) const -> real_t { + return Bmag * math::cos(Btheta); + } + + Inline auto bx2(const coord_t&) const -> real_t { + return Bmag * math::sin(Btheta) * math::sin(Bphi); + } + + Inline auto bx3(const coord_t&) const -> real_t { + return Bmag * math::sin(Btheta) * math::cos(Bphi); + } + + // electric field components + Inline auto ex1(const coord_t&) const -> real_t { + return ZERO; + } + + Inline auto ex2(const coord_t&) const -> real_t { + return -Vx * Bmag * math::sin(Btheta) * math::cos(Bphi); + } + + Inline auto ex3(const coord_t&) const -> real_t { + return Vx * Bmag * math::sin(Btheta) * math::sin(Bphi); + } + + private: + const real_t Btheta, Bphi, Vx, Bmag; + }; + template struct PGen : public arch::ProblemGenerator { // compatibility traits for the problem generator @@ -30,20 +81,43 @@ namespace user { const real_t drift_ux, temperature; + const real_t Btheta, Bphi, Bmag; + InitFields init_flds; + inline PGen(const SimulationParams& p, const Metadomain& m) - : arch::ProblemGenerator(p) + : arch::ProblemGenerator { p } , drift_ux { p.template get("setup.drift_ux") } - , temperature { p.template get("setup.temperature") } {} + , temperature { p.template get("setup.temperature") } + , Bmag { p.template get("setup.Bmag", ZERO) } + , Btheta { p.template get("setup.Btheta", ZERO) } + , Bphi { p.template get("setup.Bphi", ZERO) } + , init_flds { Bmag, Btheta, Bphi, drift_ux } {} inline PGen() {} + auto FixFieldsConst(const bc_in&, const em& comp) const + -> std::pair { + if (comp == em::ex2) { + return { init_flds.ex2({ ZERO }), true }; + } else if (comp == em::ex3) { + return { init_flds.ex3({ ZERO }), true }; + } else { + return { ZERO, false }; + } + } + + auto MatchFields(real_t time) const -> InitFields { + return init_flds; + } + inline void InitPrtls(Domain& local_domain) { const auto energy_dist = arch::Maxwellian(local_domain.mesh.metric, local_domain.random_pool, temperature, -drift_ux, in::x1); - const auto injector = arch::UniformInjector( + + const auto injector = arch::UniformInjector( energy_dist, { 1, 2 }); arch::InjectUniform>( diff --git a/setups/srpic/shock/shock.py b/setups/srpic/shock/shock.py index 64224c728..dc1565572 100644 --- a/setups/srpic/shock/shock.py +++ b/setups/srpic/shock/shock.py @@ -2,7 +2,7 @@ import matplotlib.pyplot as plt import matplotlib as mpl -data = nt2r.Data("shock-03.h5") +data = nt2r.Data("shock.h5") def frame(ti, f): @@ -55,7 +55,7 @@ def frame(ti, f): axs = [fig.add_subplot(gs[i]) for i in range(len(quantities))] for ax, q in zip(axs, quantities): - q["compute"](f).coarsen(x=2, y=2).mean().plot( + q["compute"](f.isel(t=ti)).plot( ax=ax, cmap=q["cmap"], norm=q["norm"], diff --git a/setups/srpic/shock/shock.toml b/setups/srpic/shock/shock.toml index f48edb2d6..4ed3a2b9e 100644 --- a/setups/srpic/shock/shock.toml +++ b/setups/srpic/shock/shock.toml @@ -1,21 +1,21 @@ [simulation] - name = "shock" - engine = "srpic" + name = "shock" + engine = "srpic" runtime = 50.0 [grid] resolution = [2048, 128] - extent = [[0.0, 10.0], [-0.3125, 0.3125]] + extent = [[0.0, 10.0], [-0.3125, 0.3125]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["CONDUCTOR", "ABSORB"], ["PERIODIC"]] + fields = [["ABSORB", "FIXED"], ["PERIODIC"]] particles = [["REFLECT", "ABSORB"], ["PERIODIC"]] - + [scales] - larmor0 = 1e-2 + larmor0 = 1e-2 skindepth0 = 1e-2 [algorithms] @@ -28,24 +28,27 @@ ppc0 = 16.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 + label = "e-" + mass = 1.0 + charge = -1.0 maxnpart = 1e8 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 + label = "e+" + mass = 1.0 + charge = 1.0 maxnpart = 1e8 [setup] - drift_ux = 0.1 + drift_ux = 0.1 temperature = 1e-3 + Bmag = 1.0 + Btheta = 0.0 + Bphi = 0.0 [output] interval_time = 0.1 - format = "hdf5" - + format = "hdf5" + [output.fields] quantities = ["N_1", "N_2", "E", "B", "T0i_1", "T0i_2", "J"] diff --git a/setups/srpic/turbulence/turbulence.toml b/setups/srpic/turbulence/turbulence.toml index a28afde15..a1f8e29c1 100644 --- a/setups/srpic/turbulence/turbulence.toml +++ b/setups/srpic/turbulence/turbulence.toml @@ -1,21 +1,21 @@ [simulation] - name = "turbulence" - engine = "srpic" + name = "turbulence" + engine = "srpic" runtime = 20.0 [grid] resolution = [184, 184, 184] - extent = [[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]] + extent = [[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 0.02 + larmor0 = 0.02 skindepth0 = 0.02 [algorithms] @@ -28,22 +28,22 @@ ppc0 = 32.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e8 + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e8 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e8 + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e8 [setup] - + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.1 - + [output.fields] quantities = ["N_1", "N_2", "E", "B", "J", "T00_1", "T00_2"] diff --git a/setups/srpic/weibel/weibel.toml b/setups/srpic/weibel/weibel.toml index c8e2506f6..23d119b24 100644 --- a/setups/srpic/weibel/weibel.toml +++ b/setups/srpic/weibel/weibel.toml @@ -1,21 +1,21 @@ [simulation] - name = "weibel" - engine = "srpic" + name = "weibel" + engine = "srpic" runtime = 100.0 [grid] resolution = [512, 512] - extent = [[-10.0, 10.0], [-10.0, 10.0]] + extent = [[-10.0, 10.0], [-10.0, 10.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 1.0 + larmor0 = 1.0 skindepth0 = 1.0 [algorithms] @@ -28,37 +28,37 @@ ppc0 = 16.0 [[particles.species]] - label = "e-_p" - mass = 1.0 - charge = -1.0 + label = "e-_p" + mass = 1.0 + charge = -1.0 maxnpart = 1e7 [[particles.species]] - label = "e+_p" - mass = 1.0 - charge = 1.0 + label = "e+_p" + mass = 1.0 + charge = 1.0 maxnpart = 1e7 [[particles.species]] - label = "e-_b" - mass = 1.0 - charge = -1.0 + label = "e-_b" + mass = 1.0 + charge = -1.0 maxnpart = 1e7 [[particles.species]] - label = "e+_b" - mass = 1.0 - charge = 1.0 + label = "e+_b" + mass = 1.0 + charge = 1.0 maxnpart = 1e7 [setup] - drift_u_1 = 0.2 - drift_u_2 = 0.2 - temp_1 = 1e-4 - temp_2 = 1e-4 - + drift_u_1 = 0.2 + drift_u_2 = 0.2 + temp_1 = 1e-4 + temp_2 = 1e-4 + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.25 [output.fields] diff --git a/setups/tests/blob/blob.py b/setups/tests/blob/blob.py new file mode 100644 index 000000000..77337d3b2 --- /dev/null +++ b/setups/tests/blob/blob.py @@ -0,0 +1,62 @@ +import h5py +import numpy as np +import matplotlib.pyplot as plt + +f = open("report", "r") +Lines = f.readlines() +f.close() + +em_new = [] +ep_new = [] +time_new = [] +for i in range (len(Lines)): + line = Lines[i] + line = line.strip() + arr = line.split() + + if (len(arr)>0 and arr[0]=='species'): + nparts = arr[2].split("..") + if (nparts[0]=="(e-_p)"): + em_new.append(float(nparts[-1])) + if (nparts[0]=="(e+_p)"): + ep_new.append(float(nparts[-1])) + + if (len(arr)>0 and arr[0]=='Time:'): + time_new.append(float(arr[1])) + +f = h5py.File('blob.h5', 'r') + +Nsteps = len(f.keys()) +print(list(f['Step0'].keys())) + +for i in range (Nsteps): + print (i) + fig = plt.figure(dpi=300, figsize=(8,8), facecolor='white') + + densMax = max(np.max(f['Step'+str(i)]['fN_1']),np.max(f['Step'+str(i)]['fN_2'])) + print(densMax) + ax1 = fig.add_axes([0.05,0.05,0.4,0.4]) + im1=ax1.pcolormesh(f['Step'+str(i)]['X1'],f['Step'+str(i)]['X2'],f['Step'+str(i)]['fN_1'],cmap='turbo',vmin=0,vmax=1.0) + ax1.set_title(r"$N_1$") + ax1.vlines(0,-10.0,10.0,color='white') + + ax1 = fig.add_axes([0.48,0.05,0.4,0.4]) + ax1.pcolormesh(f['Step'+str(i)]['X1'],f['Step'+str(i)]['X2'],f['Step'+str(i)]['fN_2'],cmap='turbo',vmin=0,vmax=1.0) + ax1.set_yticklabels([]) + ax1.set_title(r"$N_2$") + ax1.vlines(0,-10.0,10.0,color='white') + + ax4cb = fig.add_axes([0.89, 0.05, 0.01, 0.4]) + cbar4 = fig.colorbar(im1,cax=ax4cb) + + ax1= fig.add_axes([0.05,0.5,0.83,0.4]) + ax1.plot(time_new,em_new, color='blue', label=r'$e^-$, new') + ax1.plot(time_new,ep_new, color='red', label=r'$e^+$, new') + ax1.legend() + ax1.set_ylim(0,1.8e5) + ax1.set_xlim(0,100) + ax1.vlines(i, 0,1.8e5, color='green',linewidth=0.6) + + + fig.savefig("%05d"%i+".png",dpi=300,bbox_inches='tight') + plt.close() diff --git a/setups/tests/blob/blob.toml b/setups/tests/blob/blob.toml index fffa5fff1..7a047f348 100644 --- a/setups/tests/blob/blob.toml +++ b/setups/tests/blob/blob.toml @@ -1,32 +1,25 @@ [simulation] - name = "blob-1x1x2" - engine = "srpic" - runtime = 5.0 + name = "blob" + engine = "srpic" + runtime = 100.0 [simulation.domain] - decomposition = [1, 1, 2] + decomposition = [2, 1, 1] [grid] - resolution = [128, 192, 64] - # extent = [[1.0, 10.0]] - extent = [[-2.0, 2.0], [-3.0, 3.0], [-1.0, 1.0]] + resolution = [1024, 1024] + extent = [[-10.0, 10.0], [-10.0, 10.0]] [grid.metric] - # metric = "qspherical" metric = "minkowski" [grid.boundaries] - # fields = [["ATMOSPHERE", "ABSORB"]] - # particles = [["ATMOSPHERE", "ABSORB"]] - fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - - # [grid.boundaries.absorb] - # ds = 1.0 - + fields = [["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"]] + [scales] - larmor0 = 2e-5 - skindepth0 = 0.01 + larmor0 = 1.0 + skindepth0 = 1.0 [algorithms] current_filters = 4 @@ -35,32 +28,39 @@ CFL = 0.5 [particles] - ppc0 = 20.0 - # use_weights = true + ppc0 = 16.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e7 - pusher = "Boris" + label = "e-_p" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e7 - pusher = "Boris" + label = "e+_p" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 [setup] - xi_min = [0.55, 1.85, -0.25] - xi_max = [0.65, 2.3, -0.1] - v1 = [0.25, -0.55, 0.0] - v2 = [-0.75, -0.15, 0.0] - + temp_1 = 1e-4 + x1c = -5.0 + x2c = 0.0 + v_max = 50.0 + dr = 1.0 + [output] - format = "hdf5" - interval_time = 0.02 + format = "hdf5" + interval_time = 1.0 [output.fields] - quantities = ["Nppc_1", "Nppc_2", "E", "B", "J"] + quantities = ["N_1", "N_2", "B", "E"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + +[diagnostics] + colored_stdout = false diff --git a/setups/tests/blob/nparts.py b/setups/tests/blob/nparts.py new file mode 100644 index 000000000..e759422c0 --- /dev/null +++ b/setups/tests/blob/nparts.py @@ -0,0 +1,38 @@ +import h5py +import numpy as np +import matplotlib.pyplot as plt + +f = open("report", "r") +Lines = f.readlines() +f.close() + +em_new = [] +ep_new = [] +time_new = [] +for i in range (len(Lines)): + line = Lines[i] + line = line.strip() + arr = line.split() + + if (len(arr)>0 and arr[0]=='species'): + nparts = arr[2].split("..") + if (nparts[0]=="(e-_p)"): + em_new.append(float(nparts[-1])) + if (nparts[0]=="(e+_p)"): + ep_new.append(float(nparts[-1])) + + if (len(arr)>0 and arr[0]=='Time:'): + time_new.append(float(arr[1])) + + +fig = plt.figure(dpi=300, figsize=(8,8), facecolor='white') + +ax1= fig.add_axes([0.05,0.5,0.83,0.4]) +ax1.plot(time_new,em_new, color='blue', label=r'$e^-$, new') +ax1.plot(time_new,ep_new, color='red', label=r'$e^+$, new') +ax1.legend() +ax1.set_ylim(0,1.8e5) +ax1.set_xlim(0,100) + +fig.savefig("nparts.png",dpi=300,bbox_inches='tight') +plt.close() diff --git a/setups/tests/blob/pgen.hpp b/setups/tests/blob/pgen.hpp index d07240bfd..f7b7d71b5 100644 --- a/setups/tests/blob/pgen.hpp +++ b/setups/tests/blob/pgen.hpp @@ -10,107 +10,89 @@ #include "archetypes/energy_dist.h" #include "archetypes/particle_injector.h" #include "archetypes/problem_generator.h" -#include "archetypes/spatial_dist.h" +#include "framework/domain/domain.h" #include "framework/domain/metadomain.h" -#include - namespace user { using namespace ntt; template - struct Beam : public arch::EnergyDistribution { - Beam(const M& metric, - const std::vector& v1_vec, - const std::vector& v2_vec) - : arch::EnergyDistribution { metric } { - std::copy(v1_vec.begin(), v1_vec.end(), v1); - std::copy(v2_vec.begin(), v2_vec.end(), v2); - } - - Inline void operator()(const coord_t&, - vec_t& v_Ph, - unsigned short sp) const override { - if (sp == 1) { - v_Ph[0] = v1[0]; - v_Ph[1] = v1[1]; - v_Ph[2] = v1[2]; - } else { - v_Ph[0] = v2[0]; - v_Ph[1] = v2[1]; - v_Ph[2] = v2[2]; - } + struct CounterstreamEnergyDist : public arch::EnergyDistribution { + CounterstreamEnergyDist(const M& metric, real_t v_max) + : arch::EnergyDistribution { metric } + , v_max { v_max } {} + + Inline void operator()(const coord_t& x_Ph, + vec_t& v, + unsigned short sp) const override { + v[0] = v_max; } private: - vec_t v1; - vec_t v2; + const real_t v_max; }; template - struct PointDistribution : public arch::SpatialDistribution { - PointDistribution(const M& metric, - const std::vector& xi_min, - const std::vector& xi_max) - : arch::SpatialDistribution { metric } { - std::copy(xi_min.begin(), xi_min.end(), x_min); - std::copy(xi_max.begin(), xi_max.end(), x_max); - } - + struct GaussianDist : public arch::SpatialDistribution { + GaussianDist(const M& metric, real_t x1c, real_t x2c, real_t dr) + : arch::SpatialDistribution { metric } + , x1c { x1c } + , x2c { x2c } + , dr { dr } {} + + // to properly scale the number density, the probability should be normalized to 1 Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - auto fill = true; - for (auto d = 0u; d < M::Dim; ++d) { - fill &= x_Ph[d] > x_min[d] and x_Ph[d] < x_max[d]; + if (math::abs(x_Ph[0] - x1c) < dr && math::abs(x_Ph[1] - x2c) < dr) { + return 1.0; + } else { + return 0.0; } - return fill ? ONE : ZERO; } private: - tuple_t x_min; - tuple_t x_max; + const real_t x1c, x2c, dr; }; template struct PGen : public arch::ProblemGenerator { + // compatibility traits for the problem generator - static constexpr auto engines { traits::compatible_with::value }; - static constexpr auto metrics { - traits::compatible_with::value - }; - static constexpr auto dimensions { - traits::compatible_with::value - }; + static constexpr auto engines = traits::compatible_with::value; + static constexpr auto metrics = traits::compatible_with::value; + static constexpr auto dimensions = + traits::compatible_with::value; // for easy access to variables in the child class using arch::ProblemGenerator::D; using arch::ProblemGenerator::C; using arch::ProblemGenerator::params; - const std::vector xi_min; - const std::vector xi_max; - const std::vector v1; - const std::vector v2; - - inline PGen(const SimulationParams& p, const Metadomain& m) - : arch::ProblemGenerator(p) - , xi_min { p.template get>("setup.xi_min") } - , xi_max { p.template get>("setup.xi_max") } - , v1 { p.template get>("setup.v1") } - , v2 { p.template get>("setup.v2") } {} - - inline void InitPrtls(Domain& domain) { - const auto energy_dist = Beam(domain.mesh.metric, v1, v2); - const auto spatial_dist = PointDistribution(domain.mesh.metric, - xi_min, - xi_max); - const auto injector = arch::NonUniformInjector( - energy_dist, - spatial_dist, - { 1, 2 }); - - arch::InjectNonUniform>( + const real_t temp_1, x1c, x2c, dr, v_max; + + inline PGen(const SimulationParams& p, const Metadomain& global_domain) + : arch::ProblemGenerator { p } + , temp_1 { p.template get("setup.temp_1") } + , x1c { p.template get("setup.x1c") } + , x2c { p.template get("setup.x2c") } + , v_max { p.template get("setup.v_max") } + , dr { p.template get("setup.dr") } {} + + inline void InitPrtls(Domain& local_domain) { + const auto energy_dist = CounterstreamEnergyDist(local_domain.mesh.metric, + v_max); + const auto spatial_dist = GaussianDist(local_domain.mesh.metric, + x1c, + x2c, + dr); + const auto injector = + arch::NonUniformInjector( + energy_dist, + spatial_dist, + { 1, 2 }); + + arch::InjectNonUniform>( params, - domain, + local_domain, injector, 1.0); } diff --git a/setups/wip/magpump/pgen.hpp b/setups/wip/magpump/pgen.hpp new file mode 100644 index 000000000..21d4c8882 --- /dev/null +++ b/setups/wip/magpump/pgen.hpp @@ -0,0 +1,170 @@ +#ifndef PROBLEM_GENERATOR_H +#define PROBLEM_GENERATOR_H + +#include "enums.h" +#include "global.h" + +#include "arch/traits.h" + +#include "archetypes/particle_injector.h" +#include "archetypes/problem_generator.h" +#include "framework/domain/metadomain.h" + +#include + +namespace user { + using namespace ntt; + + template + struct InitFields { + InitFields(real_t bsurf, real_t rstar) : Bsurf { bsurf }, Rstar { rstar } {} + + Inline auto bx1(const coord_t& x_Ph) const -> real_t { + return Bsurf * math::cos(x_Ph[1]) / CUBE(x_Ph[0] / Rstar); + } + + Inline auto bx2(const coord_t& x_Ph) const -> real_t { + return Bsurf * HALF * math::sin(x_Ph[1]) / CUBE(x_Ph[0] / Rstar); + } + + private: + const real_t Bsurf, Rstar; + }; + + template + struct DriveFields : public InitFields { + DriveFields(real_t time, real_t bsurf, real_t rstar) + : InitFields { bsurf, rstar } + , time { time } {} + + using InitFields::bx1; + using InitFields::bx2; + + Inline auto bx3(const coord_t&) const -> real_t { + return ZERO; + } + + Inline auto ex1(const coord_t&) const -> real_t { + return ZERO; + } + + Inline auto ex2(const coord_t& x_Ph) const -> real_t { + return ZERO; + } + + Inline auto ex3(const coord_t&) const -> real_t { + return ZERO; + } + + private: + const real_t time; + }; + + template + struct Inflow : public arch::EnergyDistribution { + Inflow(const M& metric, real_t vin) + : arch::EnergyDistribution { metric } + , vin { vin } {} + + Inline void operator()(const coord_t&, + vec_t& v_Ph, + unsigned short) const override { + v_Ph[0] = -vin; + } + + private: + const real_t vin; + }; + + template + struct Sphere : public arch::SpatialDistribution { + Sphere(const M& metric, real_t r0, real_t dr) + : arch::SpatialDistribution { metric } + , r0 { r0 } + , dr { dr } {} + + Inline auto operator()(const coord_t& x_Ph) const -> real_t override { + return math::exp(-SQR((x_Ph[0] - r0) / dr)) * + (x_Ph[1] > 0.25 && x_Ph[1] < constant::PI - 0.25); + } + + private: + const real_t r0, dr; + }; + + template + struct PGen : public arch::ProblemGenerator { + static constexpr auto engines { traits::compatible_with::value }; + static constexpr auto metrics { + traits::compatible_with::value + }; + static constexpr auto dimensions { traits::compatible_with::value }; + + using arch::ProblemGenerator::D; + using arch::ProblemGenerator::C; + using arch::ProblemGenerator::params; + + const real_t Bsurf, pump_period, pump_ampl, pump_radius, Rstar; + const real_t vin, drinj; + InitFields init_flds; + + inline PGen(const SimulationParams& p, const Metadomain& m) + : arch::ProblemGenerator { p } + , Bsurf { p.template get("setup.Bsurf", ONE) } + , pump_period { p.template get("setup.pump_period") } + , pump_ampl { p.template get("setup.pump_ampl") } + , pump_radius { p.template get("setup.pump_radius") } + , Rstar { m.mesh().extent(in::x1).first } + , vin { p.template get("setup.vin") } + , drinj { p.template get("setup.drinj") } + , init_flds { Bsurf, Rstar } {} + + auto FieldDriver(real_t time) const -> DriveFields { + return DriveFields { time, Bsurf, Rstar }; + } + + void CustomPostStep(std::size_t, long double time, Domain& domain) { + const real_t radius = pump_radius + + pump_ampl * + math::sin(time * constant::TWO_PI / pump_period); + const real_t dr = 1.0; + const auto& metric = domain.mesh.metric; + auto EM = domain.fields.em; + Kokkos::parallel_for( + "outerBC", + domain.mesh.rangeActiveCells(), + Lambda(index_t i1, index_t i2) { + const auto i1_ = COORD(i1), i2_ = COORD(i2); + const auto r = metric.template convert<1, Crd::Cd, Crd::Ph>(i1_); + if (r > radius - 5 * dr) { + const auto smooth = HALF * (ONE - math::tanh((r - radius) / dr)); + EM(i1, i2, em::ex1) = smooth * EM(i1, i2, em::ex1); + EM(i1, i2, em::ex2) = smooth * EM(i1, i2, em::ex2); + EM(i1, i2, em::ex3) = smooth * EM(i1, i2, em::ex3); + EM(i1, i2, em::bx1) = smooth * EM(i1, i2, em::bx1); + EM(i1, i2, em::bx2) = smooth * EM(i1, i2, em::bx2); + EM(i1, i2, em::bx3) = smooth * EM(i1, i2, em::bx3); + } + }); + + if (time < pump_period * 0.25) { + const auto energy_dist = Inflow(domain.mesh.metric, vin); + const auto spatial_dist = Sphere(domain.mesh.metric, radius, drinj); + const auto injector = arch::NonUniformInjector( + energy_dist, + spatial_dist, + { 1, 2 }); + + arch::InjectNonUniform>( + params, + domain, + injector, + ONE, + true); + } + } + }; + +} // namespace user + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d75094c2b..a41b84900 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,27 +1,30 @@ # ------------------------------ # @defines: entity [STATIC/SHARED] +# # @sources: -# - entity.cpp +# +# * entity.cpp +# # @depends: -# - ntt_global [required] -# - ntt_framework [required] -# - ntt_metrics [required] -# - ntt_engine [required] -# - ntt_pgen [required] +# +# * ntt_global [required] +# * ntt_framework [required] +# * ntt_metrics [required] +# * ntt_engine [required] +# * ntt_pgen [required] +# # @uses: -# - kokkos [required] -# - plog [required] -# - toml11 [required] -# - ADIOS2 [optional] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * toml11 [required] +# * ADIOS2 [optional] +# * mpi [optional] # ------------------------------ - set(ENTITY ${PROJECT_NAME}.xc) set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES - ${SRC_DIR}/entity.cpp -) +set(SOURCES ${SRC_DIR}/entity.cpp) add_executable(${ENTITY} entity.cpp) # dependencies @@ -32,7 +35,7 @@ add_subdirectory(${SRC_DIR}/archetypes ${CMAKE_CURRENT_BINARY_DIR}/archetypes) add_subdirectory(${SRC_DIR}/framework ${CMAKE_CURRENT_BINARY_DIR}/framework) add_subdirectory(${SRC_DIR}/engines ${CMAKE_CURRENT_BINARY_DIR}/engines) -if (${output}) +if(${output}) add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) add_subdirectory(${SRC_DIR}/checkpoint ${CMAKE_CURRENT_BINARY_DIR}/checkpoint) endif() diff --git a/src/archetypes/CMakeLists.txt b/src/archetypes/CMakeLists.txt index 7883ba6a5..8e2f325af 100644 --- a/src/archetypes/CMakeLists.txt +++ b/src/archetypes/CMakeLists.txt @@ -1,13 +1,19 @@ # ------------------------------ # @defines: ntt_archetypes [INTERFACE] +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] -# - ntt_kernels [required] +# +# * ntt_global [required] +# * ntt_kernels [required] +# # @uses: -# - kokkos [required] -# - mpi [optional] +# +# * kokkos [required] +# * mpi [optional] # ------------------------------ add_library(ntt_archetypes INTERFACE) @@ -17,5 +23,5 @@ add_dependencies(ntt_archetypes ${libs}) target_link_libraries(ntt_archetypes INTERFACE ${libs}) target_include_directories(ntt_archetypes - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) \ No newline at end of file + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) + diff --git a/src/archetypes/tests/CMakeLists.txt b/src/archetypes/tests/CMakeLists.txt index 4ffc35322..694a6b4f9 100644 --- a/src/archetypes/tests/CMakeLists.txt +++ b/src/archetypes/tests/CMakeLists.txt @@ -1,9 +1,11 @@ # ------------------------------ # @brief: Generates tests for the `ntt_archetypes` module +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/checkpoint/CMakeLists.txt b/src/checkpoint/CMakeLists.txt index d97bd4a34..fa641bfb5 100644 --- a/src/checkpoint/CMakeLists.txt +++ b/src/checkpoint/CMakeLists.txt @@ -1,23 +1,28 @@ # ------------------------------ # @defines: ntt_checkpoint [STATIC/SHARED] +# # @sources: -# - writer.cpp -# - reader.cpp +# +# * writer.cpp +# * reader.cpp +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] +# +# * ntt_global [required] +# # @uses: -# - kokkos [required] -# - ADIOS2 [required] -# - mpi [optional] +# +# * kokkos [required] +# * ADIOS2 [required] +# * mpi [optional] # ------------------------------ set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES - ${SRC_DIR}/writer.cpp - ${SRC_DIR}/reader.cpp -) +set(SOURCES ${SRC_DIR}/writer.cpp ${SRC_DIR}/reader.cpp) add_library(ntt_checkpoint ${SOURCES}) set(libs ntt_global) @@ -25,7 +30,7 @@ add_dependencies(ntt_checkpoint ${libs}) target_link_libraries(ntt_checkpoint PUBLIC ${libs}) target_link_libraries(ntt_checkpoint PRIVATE stdc++fs) -target_include_directories(ntt_checkpoint +target_include_directories( + ntt_checkpoint PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../ - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/checkpoint/reader.cpp b/src/checkpoint/reader.cpp index 66fcd6757..9fc2d2640 100644 --- a/src/checkpoint/reader.cpp +++ b/src/checkpoint/reader.cpp @@ -35,16 +35,17 @@ namespace checkpoint { reader.Get(field_var, array_h.data(), adios2::Mode::Sync); Kokkos::deep_copy(array, array_h); } else { - raise::Error(fmt::format("Field variable: %s not found", field), HERE); + raise::Error(fmt::format("Field variable: %s not found", field.c_str()), + HERE); } } - auto ReadParticleCount(adios2::IO& io, - adios2::Engine& reader, - unsigned short s, - std::size_t local_dom, - std::size_t ndomains) - -> std::pair { + auto ReadParticleCount( + adios2::IO& io, + adios2::Engine& reader, + unsigned short s, + std::size_t local_dom, + std::size_t ndomains) -> std::pair { logger::Checkpoint(fmt::format("Reading particle count for: %d", s + 1), HERE); auto npart_var = io.InquireVariable( fmt::format("s%d_npart", s + 1)); @@ -97,7 +98,7 @@ namespace checkpoint { fmt::format("s%d_%s", s + 1, quantity.c_str())); if (var) { var.SetSelection(adios2::Box({ offset }, { count })); - const auto slice = std::pair { 0, count }; + const auto slice = range_tuple_t(0, count); auto array_h = Kokkos::create_mirror_view(array); reader.Get(var, Kokkos::subview(array_h, slice).data(), adios2::Mode::Sync); Kokkos::deep_copy(Kokkos::subview(array, slice), @@ -109,6 +110,28 @@ namespace checkpoint { } } + void ReadParticlePayloads(adios2::IO& io, + adios2::Engine& reader, + unsigned short s, + array_t& array, + std::size_t nplds, + std::size_t count, + std::size_t offset) { + logger::Checkpoint(fmt::format("Reading quantity: s%d_plds", s + 1), HERE); + auto var = io.InquireVariable(fmt::format("s%d_plds", s + 1)); + if (var) { + var.SetSelection(adios2::Box({ offset, 0 }, { count, nplds })); + const auto slice = range_tuple_t(0, count); + auto array_h = Kokkos::create_mirror_view(array); + reader.Get(var, + Kokkos::subview(array_h, slice, range_tuple_t(0, nplds)).data(), + adios2::Mode::Sync); + Kokkos::deep_copy(array, array_h); + } else { + raise::Error(fmt::format("Variable: s%d_plds not found", s + 1), HERE); + } + } + template void ReadFields(adios2::IO&, adios2::Engine&, const std::string&, diff --git a/src/checkpoint/reader.h b/src/checkpoint/reader.h index 2ea11bdb1..e5a91ab75 100644 --- a/src/checkpoint/reader.h +++ b/src/checkpoint/reader.h @@ -45,6 +45,14 @@ namespace checkpoint { std::size_t, std::size_t); + void ReadParticlePayloads(adios2::IO&, + adios2::Engine&, + unsigned short, + array_t&, + std::size_t, + std::size_t, + std::size_t); + } // namespace checkpoint #endif // CHECKPOINT_READER_H diff --git a/src/checkpoint/tests/CMakeLists.txt b/src/checkpoint/tests/CMakeLists.txt index 3d7475a52..54400652e 100644 --- a/src/checkpoint/tests/CMakeLists.txt +++ b/src/checkpoint/tests/CMakeLists.txt @@ -1,9 +1,11 @@ # ------------------------------ # @brief: Generates tests for the `ntt_checkpoint` module +# # @uses: -# - kokkos [required] -# - adios2 [required] -# - mpi [optional] +# +# * kokkos [required] +# * adios2 [required] +# * mpi [optional] # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) @@ -13,15 +15,15 @@ function(gen_test title) set(src ${title}.cpp) add_executable(${exec} ${src}) - set (libs ntt_checkpoint ntt_global) + set(libs ntt_checkpoint ntt_global) add_dependencies(${exec} ${libs}) target_link_libraries(${exec} PRIVATE ${libs} stdc++fs) add_test(NAME "CHECKPOINT::${title}" COMMAND "${exec}") endfunction() -if (NOT ${mpi}) +if(NOT ${mpi}) gen_test(checkpoint-nompi) else() - # gen_test(checkpoint-mpi) + gen_test(checkpoint-mpi) endif() diff --git a/src/checkpoint/tests/checkpoint-mpi.cpp b/src/checkpoint/tests/checkpoint-mpi.cpp new file mode 100644 index 000000000..f97202ab1 --- /dev/null +++ b/src/checkpoint/tests/checkpoint-mpi.cpp @@ -0,0 +1,272 @@ +#include "enums.h" +#include "global.h" + +#include "utils/comparators.h" + +#include "checkpoint/reader.h" +#include "checkpoint/writer.h" + +#include +#include +#include +#include + +#include +#include +#include + +using namespace ntt; +using namespace checkpoint; + +void cleanup() { + namespace fs = std::filesystem; + fs::path temp_path { "checkpoints" }; + fs::remove_all(temp_path); +} + +auto main(int argc, char* argv[]) -> int { + Kokkos::initialize(argc, argv); + MPI_Init(&argc, &argv); + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + try { + // assuming 4 ranks + // |------|------| + // | 2 | 3 | + // |------|------| + // | | | + // | 0 | 1 | + // |------|------| + const std::size_t g_nx1 = 20; + const std::size_t g_nx2 = 15; + const std::size_t g_nx1_gh = g_nx1 + 4 * N_GHOSTS; + const std::size_t g_nx2_gh = g_nx2 + 4 * N_GHOSTS; + + const std::size_t l_nx1 = 10; + const std::size_t l_nx2 = (rank < 2) ? 10 : 5; + + const std::size_t l_nx1_gh = l_nx1 + 2 * N_GHOSTS; + const std::size_t l_nx2_gh = l_nx2 + 2 * N_GHOSTS; + + const std::size_t l_corner_x1 = (rank % 2 == 0) ? 0 : l_nx1_gh; + const std::size_t l_corner_x2 = (rank < 2) ? 0 : l_nx2_gh; + + const std::size_t i1min = N_GHOSTS; + const std::size_t i2min = N_GHOSTS; + const std::size_t i1max = l_nx1 + N_GHOSTS; + const std::size_t i2max = l_nx2 + N_GHOSTS; + + const std::size_t npart1 = (rank % 2 + rank) * 23 + 100; + const std::size_t npart2 = (rank % 2 + rank) * 37 + 100; + + std::size_t npart1_offset = 0; + std::size_t npart2_offset = 0; + + std::size_t npart1_globtot = 0; + std::size_t npart2_globtot = 0; + + for (auto r = 0; r < rank - 1; ++r) { + npart1_offset += (r % 2 + r) * 23 + 100; + npart2_offset += (r % 2 + r) * 37 + 100; + } + + for (auto r = 0; r < size; ++r) { + npart1_globtot += (r % 2 + r) * 23 + 100; + npart2_globtot += (r % 2 + r) * 37 + 100; + } + + // init data + ndfield_t field1 { "fld1", l_nx1_gh, l_nx2_gh }; + ndfield_t field2 { "fld2", l_nx1_gh, l_nx2_gh }; + + array_t i1 { "i_1", npart1 }; + array_t u1 { "u_1", npart1 }; + array_t i2 { "i_2", npart2 }; + array_t u2 { "u_2", npart2 }; + array_t plds1 { "plds_1", npart1, 3 }; + + { + // fill data + Kokkos::parallel_for( + "fillFlds", + CreateRangePolicy({ i1min, i2min }, { i1max, i2max }), + Lambda(index_t i1, index_t i2) { + field1(i1, i2, 0) = static_cast(i1 + i2); + field1(i1, i2, 1) = static_cast(i1 * i2); + field1(i1, i2, 2) = static_cast(i1 / i2); + field1(i1, i2, 3) = static_cast(i1 - i2); + field1(i1, i2, 4) = static_cast(i2 / i1); + field1(i1, i2, 5) = static_cast(i1); + field2(i1, i2, 0) = static_cast(-(i1 + i2)); + field2(i1, i2, 1) = static_cast(-(i1 * i2)); + field2(i1, i2, 2) = static_cast(-(i1 / i2)); + field2(i1, i2, 3) = static_cast(-(i1 - i2)); + field2(i1, i2, 4) = static_cast(-(i2 / i1)); + field2(i1, i2, 5) = static_cast(-i1); + }); + Kokkos::parallel_for( + "fillPrtl1", + npart1, + Lambda(index_t p) { + u1(p) = static_cast(p); + i1(p) = static_cast(p); + plds1(p, 0) = static_cast(p); + plds1(p, 1) = static_cast(p * p); + plds1(p, 2) = static_cast(p * p * p); + }); + Kokkos::parallel_for( + "fillPrtl2", + npart2, + Lambda(index_t p) { + u2(p) = -static_cast(p); + i2(p) = -static_cast(p); + }); + } + + adios2::ADIOS adios; + + { + // write checkpoint + Writer writer; + writer.init(&adios, 0, 0.0, 1); + + writer.defineFieldVariables(SimEngine::GRPIC, + { g_nx1_gh, g_nx2_gh }, + { l_corner_x1, l_corner_x2 }, + { l_nx1_gh, l_nx2_gh }); + + writer.defineParticleVariables(Coord::Sph, Dim::_2D, 2, { 3, 0 }); + + writer.beginSaving(0, 0.0); + + writer.saveField("em", field1); + writer.saveField("em0", field2); + + writer.savePerDomainVariable("s1_npart", 1, 0, npart1); + writer.savePerDomainVariable("s2_npart", 1, 0, npart2); + + writer.saveParticleQuantity("s1_i1", + npart1_globtot, + npart1_offset, + npart1, + i1); + writer.saveParticleQuantity("s1_ux1", + npart1_globtot, + npart1_offset, + npart1, + u1); + writer.saveParticleQuantity("s2_i1", + npart2_globtot, + npart2_offset, + npart2, + i2); + writer.saveParticleQuantity("s2_ux1", + npart2_globtot, + npart2_offset, + npart2, + u2); + + writer.saveParticlePayloads("s1_plds", + 3, + npart1_globtot, + npart1_offset, + npart1, + plds1); + + writer.endSaving(); + } + + { + // read checkpoint + ndfield_t field1_read { "fld1_read", l_nx1_gh, l_nx2_gh }; + ndfield_t field2_read { "fld2_read", l_nx1_gh, l_nx2_gh }; + + array_t i1_read { "i_1", npart1 }; + array_t u1_read { "u_1", npart1 }; + array_t i2_read { "i_2", npart2 }; + array_t u2_read { "u_2", npart2 }; + array_t plds1_read { "plds_1", npart1, 3 }; + + adios2::IO io = adios.DeclareIO("checkpointRead"); + adios2::Engine reader = io.Open("checkpoints/step-00000000.bp", + adios2::Mode::Read); + reader.BeginStep(); + + auto fieldRange = adios2::Box({ l_corner_x1, l_corner_x2, 0 }, + { l_nx1_gh, l_nx2_gh, 6 }); + ReadFields(io, reader, "em", fieldRange, field1_read); + ReadFields(io, reader, "em0", fieldRange, field2_read); + + auto [nprtl1, noff1] = ReadParticleCount(io, reader, 0, rank, size); + auto [nprtl2, noff2] = ReadParticleCount(io, reader, 1, rank, size); + + ReadParticleData(io, reader, "ux1", 0, u1_read, nprtl1, noff1); + ReadParticleData(io, reader, "ux1", 1, u2_read, nprtl2, noff2); + ReadParticleData(io, reader, "i1", 0, i1_read, nprtl1, noff1); + ReadParticleData(io, reader, "i1", 1, i2_read, nprtl2, noff2); + ReadParticlePayloads(io, reader, 0, plds1_read, 3, nprtl1, noff1); + + reader.EndStep(); + reader.Close(); + + // check the validity + Kokkos::parallel_for( + "checkFields", + CreateRangePolicy({ 0, 0 }, { l_nx1_gh, l_nx2_gh }), + Lambda(index_t i1, index_t i2) { + for (int i = 0; i < 6; ++i) { + if (not cmp::AlmostEqual(field1(i1, i2, i), field1_read(i1, i2, i))) { + raise::KernelError(HERE, "Field1 read failed"); + } + if (not cmp::AlmostEqual(field2(i1, i2, i), field2_read(i1, i2, i))) { + raise::KernelError(HERE, "Field2 read failed"); + } + } + }); + + raise::ErrorIf(npart1 != nprtl1, "Particle count 1 mismatch", HERE); + raise::ErrorIf(npart2 != nprtl2, "Particle count 2 mismatch", HERE); + raise::ErrorIf(noff1 != npart1_offset, "Particle offset 1 mismatch", HERE); + raise::ErrorIf(noff2 != npart2_offset, "Particle offset 2 mismatch", HERE); + + Kokkos::parallel_for( + "checkPrtl1", + nprtl1, + Lambda(index_t p) { + if (not cmp::AlmostEqual(u1(p), u1_read(p))) { + raise::KernelError(HERE, "u1 read failed"); + } + if (i1(p) != i1_read(p)) { + raise::KernelError(HERE, "i1 read failed"); + } + for (auto l = 0; l < 3; ++l) { + if (not cmp::AlmostEqual(plds1(p, l), plds1_read(p, l))) { + raise::KernelError(HERE, "plds1 read failed"); + } + } + }); + Kokkos::parallel_for( + "checkPrtl2", + nprtl2, + Lambda(index_t p) { + if (not cmp::AlmostEqual(u2(p), u2_read(p))) { + raise::KernelError(HERE, "u2 read failed"); + } + if (i2(p) != i2_read(p)) { + raise::KernelError(HERE, "i2 read failed"); + } + }); + } + + } catch (std::exception& e) { + std::cerr << e.what() << std::endl; + cleanup(); + Kokkos::finalize(); + return 1; + } + cleanup(); + Kokkos::finalize(); + return 0; +} diff --git a/src/checkpoint/tests/checkpoint-nompi.cpp b/src/checkpoint/tests/checkpoint-nompi.cpp index 8f7a522fd..23dbd8871 100644 --- a/src/checkpoint/tests/checkpoint-nompi.cpp +++ b/src/checkpoint/tests/checkpoint-nompi.cpp @@ -29,9 +29,9 @@ auto main(int argc, char* argv[]) -> int { try { constexpr auto nx1 = 10; constexpr auto nx1_gh = nx1 + 2 * N_GHOSTS; - constexpr auto nx2 = 10; + constexpr auto nx2 = 13; constexpr auto nx2_gh = nx2 + 2 * N_GHOSTS; - constexpr auto nx3 = 10; + constexpr auto nx3 = 9; constexpr auto nx3_gh = nx3 + 2 * N_GHOSTS; constexpr auto i1min = N_GHOSTS; constexpr auto i2min = N_GHOSTS; diff --git a/src/checkpoint/writer.cpp b/src/checkpoint/writer.cpp index 9ef0b51c7..a12e3ef26 100644 --- a/src/checkpoint/writer.cpp +++ b/src/checkpoint/writer.cpp @@ -84,6 +84,7 @@ namespace checkpoint { { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); + for (auto d { 0u }; d < dim; ++d) { m_io.DefineVariable(fmt::format("s%d_i%d", s + 1, d + 1), { adios2::UnknownDim }, @@ -102,18 +103,21 @@ namespace checkpoint { { adios2::UnknownDim }, { adios2::UnknownDim }); } + if (dim == Dim::_2D and C != ntt::Coord::Cart) { m_io.DefineVariable(fmt::format("s%d_phi", s + 1), { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); } + for (auto d { 0u }; d < 3; ++d) { m_io.DefineVariable(fmt::format("s%d_ux%d", s + 1, d + 1), { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); } + m_io.DefineVariable(fmt::format("s%d_tag", s + 1), { adios2::UnknownDim }, { adios2::UnknownDim }, @@ -122,11 +126,11 @@ namespace checkpoint { { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); - for (auto p { 0u }; p < nplds[s]; ++p) { - m_io.DefineVariable(fmt::format("s%d_pld%d", s + 1, p + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); + if (nplds[s] > 0) { + m_io.DefineVariable(fmt::format("s%d_plds", s + 1), + { adios2::UnknownDim, nplds[s] }, + { adios2::UnknownDim, 0 }, + { adios2::UnknownDim, nplds[s] }); } } } @@ -238,6 +242,25 @@ namespace checkpoint { m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); } + void Writer::saveParticlePayloads(const std::string& quantity, + std::size_t nplds, + std::size_t glob_total, + std::size_t loc_offset, + std::size_t loc_size, + const array_t& data) { + const auto slice = range_tuple_t(0, loc_size); + auto var = m_io.InquireVariable(quantity); + + var.SetShape({ glob_total, nplds }); + var.SetSelection( + adios2::Box({ loc_offset, 0 }, { loc_size, nplds })); + + auto data_h = Kokkos::create_mirror_view(data); + Kokkos::deep_copy(data_h, data); + auto data_sub = Kokkos::subview(data_h, slice, range_tuple_t(0, nplds)); + m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); + } + template void Writer::savePerDomainVariable(const std::string&, std::size_t, std::size_t, diff --git a/src/checkpoint/writer.h b/src/checkpoint/writer.h index 34b5f043f..346bee24a 100644 --- a/src/checkpoint/writer.h +++ b/src/checkpoint/writer.h @@ -69,10 +69,18 @@ namespace checkpoint { std::size_t, const array_t&); + void saveParticlePayloads(const std::string&, + std::size_t, + std::size_t, + std::size_t, + std::size_t, + const array_t&); + void defineFieldVariables(const ntt::SimEngine&, const std::vector&, const std::vector&, const std::vector&); + void defineParticleVariables(const ntt::Coord&, Dimension, std::size_t, diff --git a/src/engines/CMakeLists.txt b/src/engines/CMakeLists.txt index 2ab7289b2..6da2f4efd 100644 --- a/src/engines/CMakeLists.txt +++ b/src/engines/CMakeLists.txt @@ -1,37 +1,43 @@ # ------------------------------ # @defines: ntt_engines [STATIC/SHARED] +# # @sources: -# - engine_printer.cpp -# - engine_init.cpp -# - engine_run.cpp +# +# * engine_printer.cpp +# * engine_init.cpp +# * engine_run.cpp +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] -# - ntt_framework [required] -# - ntt_metrics [required] -# - ntt_kernels [required] -# - ntt_archetypes [required] -# - ntt_pgen [required] -# - ntt_output [optional] +# +# * ntt_global [required] +# * ntt_framework [required] +# * ntt_metrics [required] +# * ntt_kernels [required] +# * ntt_archetypes [required] +# * ntt_pgen [required] +# * ntt_output [optional] +# # @uses: -# - kokkos [required] -# - plog [required] -# - toml11 [required] -# - adios2 [optional] -# - hdf5 [optional] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * toml11 [required] +# * adios2 [optional] +# * hdf5 [optional] +# * mpi [optional] # ------------------------------ set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES - ${SRC_DIR}/engine_printer.cpp - ${SRC_DIR}/engine_init.cpp - ${SRC_DIR}/engine_run.cpp -) +set(SOURCES ${SRC_DIR}/engine_printer.cpp ${SRC_DIR}/engine_init.cpp + ${SRC_DIR}/engine_run.cpp) add_library(ntt_engines ${SOURCES}) -set(libs ntt_global ntt_framework ntt_metrics ntt_archetypes ntt_kernels ntt_pgen) +set(libs ntt_global ntt_framework ntt_metrics ntt_archetypes ntt_kernels + ntt_pgen) if(${output}) list(APPEND libs ntt_output hdf5::hdf5) endif() @@ -39,7 +45,7 @@ add_dependencies(ntt_engines ${libs}) target_link_libraries(ntt_engines PUBLIC ${libs}) target_compile_definitions(ntt_engines PRIVATE PGEN=\"${PGEN}\") -target_include_directories(ntt_engines +target_include_directories( + ntt_engines PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../ - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/engines/engine.hpp b/src/engines/engine.hpp index 5b7caa502..dac553dcd 100644 --- a/src/engines/engine.hpp +++ b/src/engines/engine.hpp @@ -55,10 +55,12 @@ namespace ntt { static_assert(user::PGen::is_pgen, "unrecognized problem generator"); protected: -#if MPI_ENABLED +#if defined(OUTPUT_ENABLED) + #if defined(MPI_ENABLED) adios2::ADIOS m_adios { MPI_COMM_WORLD }; -#else + #else adios2::ADIOS m_adios; + #endif #endif SimulationParams m_params; diff --git a/src/engines/engine_init.cpp b/src/engines/engine_init.cpp index e4ce9fa5f..0239724e1 100644 --- a/src/engines/engine_init.cpp +++ b/src/engines/engine_init.cpp @@ -50,6 +50,7 @@ namespace ntt { }); } } else { +#if defined(OUTPUT_ENABLED) // read simulation data from the checkpoint raise::ErrorIf( m_params.template get("checkpoint.start_step") == 0, @@ -57,6 +58,11 @@ namespace ntt { HERE); logger::Checkpoint("Resuming simulation from a checkpoint", HERE); m_metadomain.ContinueFromCheckpoint(&m_adios, m_params); +#else + raise::Error( + "Resuming simulation from a checkpoint requires -D output=ON", + HERE); +#endif } } } diff --git a/src/engines/engine_printer.cpp b/src/engines/engine_printer.cpp index 2608ea2f6..2a7ee4405 100644 --- a/src/engines/engine_printer.cpp +++ b/src/engines/engine_printer.cpp @@ -105,8 +105,8 @@ namespace ntt { color::RESET); } - auto bytes_to_human_readable(std::size_t bytes) - -> std::pair { + auto bytes_to_human_readable( + std::size_t bytes) -> std::pair { const std::vector units { "B", "KB", "MB", "GB", "TB" }; std::size_t unit_idx = 0; auto size = static_cast(bytes); @@ -415,13 +415,13 @@ namespace ntt { } } - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; + template void Engine>::print_report() const; + template void Engine>::print_report() const; + template void Engine>::print_report() const; + template void Engine>::print_report() const; + template void Engine>::print_report() const; + template void Engine>::print_report() const; + template void Engine>::print_report() const; + template void Engine>::print_report() const; } // namespace ntt diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index bec5b8652..506fd121d 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -26,8 +26,8 @@ namespace ntt { "CurrentFiltering", "CurrentDeposit", "ParticlePusher", "FieldBoundaries", "ParticleBoundaries", "Communications", - "Injector", "Sorting", - "Custom", "Output", + "Injector", "Custom", + "PrtlClear", "Output", "Checkpoint" }, []() { Kokkos::fence(); @@ -37,9 +37,9 @@ namespace ntt { const auto diag_interval = m_params.get( "diagnostics.interval"); - auto time_history = pbar::DurationHistory { 1000 }; - const auto sort_interval = m_params.template get( - "particles.sort_interval"); + auto time_history = pbar::DurationHistory { 1000 }; + const auto clear_interval = m_params.template get( + "particles.clear_interval"); // main algorithm loop while (step < max_steps) { @@ -56,7 +56,8 @@ namespace ntt { }); timers.stop("Custom"); } - auto print_sorting = (sort_interval > 0 and step % sort_interval == 0); + auto print_prtl_clear = (clear_interval > 0 and + step % clear_interval == 0 and step > 0); // advance time & step time += dt; @@ -109,7 +110,7 @@ namespace ntt { m_metadomain.species_labels(), m_metadomain.l_npart_perspec(), m_metadomain.l_maxnpart_perspec(), - print_sorting, + print_prtl_clear, print_output, print_checkpoint, m_params.get("diagnostics.colored_stdout")); @@ -119,12 +120,12 @@ namespace ntt { } } - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; - template class Engine>; + template void Engine>::run(); + template void Engine>::run(); + template void Engine>::run(); + template void Engine>::run(); + template void Engine>::run(); + template void Engine>::run(); + template void Engine>::run(); + template void Engine>::run(); } // namespace ntt diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 78c8f371e..9f5e4551f 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -42,7 +42,6 @@ #include #include -#include #include namespace ntt { @@ -80,8 +79,8 @@ namespace ntt { "algorithms.toggles.fieldsolver"); const auto deposit_enabled = m_params.template get( "algorithms.toggles.deposit"); - const auto sort_interval = m_params.template get( - "particles.sort_interval"); + const auto clear_interval = m_params.template get( + "particles.clear_interval"); if (step == 0) { // communicate fields and apply BCs on the first timestep @@ -102,6 +101,7 @@ namespace ntt { timers.start("FieldBoundaries"); FieldBoundaries(dom, BC::B); timers.stop("FieldBoundaries"); + Kokkos::fence(); } { @@ -126,9 +126,7 @@ namespace ntt { } timers.start("Communications"); - if ((sort_interval > 0) and (step % sort_interval == 0)) { - m_metadomain.CommunicateParticles(dom, &timers); - } + m_metadomain.CommunicateParticles(dom); timers.stop("Communications"); } @@ -169,6 +167,12 @@ namespace ntt { ParticleInjector(dom); timers.stop("Injector"); } + + if (clear_interval > 0 and step % clear_interval == 0 and step > 0) { + timers.start("PrtlClear"); + m_metadomain.RemoveDeadParticles(dom); + timers.stop("PrtlClear"); + } } /* algorithm substeps --------------------------------------------------- */ @@ -580,17 +584,17 @@ namespace ntt { void FieldBoundaries(domain_t& domain, BCTags tags) { for (auto& direction : dir::Directions::orth) { - if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::ABSORB) { - AbsorbFieldsIn(direction, domain, tags); + if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::MATCH) { + MatchFieldsIn(direction, domain, tags); } else if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::AXIS) { if (domain.mesh.flds_bc_in(direction) == FldsBC::AXIS) { AxisFieldsIn(direction, domain, tags); } } else if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::ATMOSPHERE) { AtmosphereFieldsIn(direction, domain, tags); - } else if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::CONDUCTOR) { - if (domain.mesh.flds_bc_in(direction) == FldsBC::CONDUCTOR) { - ConductorFieldsIn(direction, domain, tags); + } else if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::FIXED) { + if (domain.mesh.flds_bc_in(direction) == FldsBC::FIXED) { + FixedFieldsIn(direction, domain, tags); } } else if (m_metadomain.mesh().flds_bc_in(direction) == FldsBC::CUSTOM) { if (domain.mesh.flds_bc_in(direction) == FldsBC::CUSTOM) { @@ -602,14 +606,13 @@ namespace ntt { } // loop over directions } - void AbsorbFieldsIn(dir::direction_t direction, - domain_t& domain, - BCTags tags) { + void MatchFieldsIn(dir::direction_t direction, + domain_t& domain, + BCTags tags) { /** - * absorbing boundaries + * matching boundaries */ - const auto ds = m_params.template get( - "grid.boundaries.absorb.ds"); + const auto ds = m_params.template get("grid.boundaries.match.ds"); const auto dim = direction.get_dim(); real_t xg_min, xg_max, xg_edge; auto sign = direction.get_sign(); @@ -648,40 +651,49 @@ namespace ntt { range_min[d] = intersect_range[d].first; range_max[d] = intersect_range[d].second; } - if (dim == in::x1) { - Kokkos::parallel_for( - "AbsorbFields", - CreateRangePolicy(range_min, range_max), - kernel::AbsorbBoundaries_kernel(domain.fields.em, - domain.mesh.metric, - xg_edge, - ds, - tags)); - } else if (dim == in::x2) { - if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { - Kokkos::parallel_for( - "AbsorbFields", - CreateRangePolicy(range_min, range_max), - kernel::AbsorbBoundaries_kernel(domain.fields.em, - domain.mesh.metric, - xg_edge, - ds, - tags)); - } else { - raise::Error("Invalid dimension", HERE); - } - } else if (dim == in::x3) { - if constexpr (M::Dim == Dim::_3D) { + if constexpr (traits::has_member::value) { + auto match_fields = m_pgen.MatchFields(time); + if (dim == in::x1) { Kokkos::parallel_for( - "AbsorbFields", + "MatchFields", CreateRangePolicy(range_min, range_max), - kernel::AbsorbBoundaries_kernel(domain.fields.em, - domain.mesh.metric, - xg_edge, - ds, - tags)); - } else { - raise::Error("Invalid dimension", HERE); + kernel::bc::MatchBoundaries_kernel( + domain.fields.em, + match_fields, + domain.mesh.metric, + xg_edge, + ds, + tags)); + } else if (dim == in::x2) { + if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { + Kokkos::parallel_for( + "MatchFields", + CreateRangePolicy(range_min, range_max), + kernel::bc::MatchBoundaries_kernel( + domain.fields.em, + match_fields, + domain.mesh.metric, + xg_edge, + ds, + tags)); + } else { + raise::Error("Invalid dimension", HERE); + } + } else if (dim == in::x3) { + if constexpr (M::Dim == Dim::_3D) { + Kokkos::parallel_for( + "MatchFields", + CreateRangePolicy(range_min, range_max), + kernel::bc::MatchBoundaries_kernel( + domain.fields.em, + match_fields, + domain.mesh.metric, + xg_edge, + ds, + tags)); + } else { + raise::Error("Invalid dimension", HERE); + } } } } @@ -704,12 +716,121 @@ namespace ntt { Kokkos::parallel_for( "AxisBCFields", domain.mesh.n_all(in::x1), - kernel::AxisBoundaries_kernel(domain.fields.em, i2_min, tags)); + kernel::bc::AxisBoundaries_kernel(domain.fields.em, + i2_min, + tags)); } else { Kokkos::parallel_for( "AxisBCFields", domain.mesh.n_all(in::x1), - kernel::AxisBoundaries_kernel(domain.fields.em, i2_max, tags)); + kernel::bc::AxisBoundaries_kernel(domain.fields.em, + i2_max, + tags)); + } + } + + void FixedFieldsIn(dir::direction_t direction, + domain_t& domain, + BCTags tags) { + /** + * fixed field boundaries + */ + const auto sign = direction.get_sign(); + const auto dim = direction.get_dim(); + raise::ErrorIf(dim != in::x1 and M::CoordType != Coord::Cart, + "Fixed BCs only implemented for x1 in " + "non-cartesian coordinates", + HERE); + em normal_b_comp, tang_e_comp1, tang_e_comp2; + if (dim == in::x1) { + normal_b_comp = em::bx1; + tang_e_comp1 = em::ex2; + tang_e_comp2 = em::ex3; + } else if (dim == in::x2) { + normal_b_comp = em::bx2; + tang_e_comp1 = em::ex1; + tang_e_comp2 = em::ex3; + } else if (dim == in::x3) { + normal_b_comp = em::bx3; + tang_e_comp1 = em::ex1; + tang_e_comp2 = em::ex2; + } else { + raise::Error("Invalid dimension", HERE); + } + std::vector xi_min, xi_max; + const std::vector all_dirs { in::x1, in::x2, in::x3 }; + for (unsigned short d { 0 }; d < static_cast(M::Dim); ++d) { + const auto dd = all_dirs[d]; + if (dim == dd) { + if (sign > 0) { // + direction + xi_min.push_back(domain.mesh.n_all(dd) - N_GHOSTS); + xi_max.push_back(domain.mesh.n_all(dd)); + } else { // - direction + xi_min.push_back(0); + xi_max.push_back(N_GHOSTS); + } + } else { + xi_min.push_back(0); + xi_max.push_back(domain.mesh.n_all(dd)); + } + } + raise::ErrorIf(xi_min.size() != xi_max.size() or + xi_min.size() != static_cast(M::Dim), + "Invalid range size", + HERE); + std::vector comps; + if (tags & BC::E) { + comps.push_back(tang_e_comp1); + comps.push_back(tang_e_comp2); + } + if (tags & BC::B) { + comps.push_back(normal_b_comp); + } + if constexpr (traits::has_member::value) { + raise::Error("Non-const fixed fields not implemented", HERE); + } else if constexpr ( + traits::has_member::value) { + for (const auto& comp : comps) { + auto value = ZERO; + bool shouldset = false; + if constexpr ( + traits::has_member::value) { + // if fix field function present, read from it + const auto newset = m_pgen.FixFieldsConst( + (bc_in)(sign * ((short)dim + 1)), + (em)comp); + value = newset.first; + shouldset = newset.second; + } + if (shouldset) { + if constexpr (M::Dim == Dim::_1D) { + Kokkos::deep_copy( + Kokkos::subview(domain.fields.em, + std::make_pair(xi_min[0], xi_max[0]), + comp), + value); + } else if constexpr (M::Dim == Dim::_2D) { + Kokkos::deep_copy( + Kokkos::subview(domain.fields.em, + std::make_pair(xi_min[0], xi_max[0]), + std::make_pair(xi_min[1], xi_max[1]), + comp), + value); + } else if constexpr (M::Dim == Dim::_3D) { + Kokkos::deep_copy( + Kokkos::subview(domain.fields.em, + std::make_pair(xi_min[0], xi_max[0]), + std::make_pair(xi_min[1], xi_max[1]), + std::make_pair(xi_min[2], xi_max[2]), + comp), + value); + } else { + raise::Error("Invalid dimension", HERE); + } + } + } + } else { + raise::Error("Fixed fields not present (both const and non-const)", HERE); } } @@ -717,9 +838,9 @@ namespace ntt { domain_t& domain, BCTags tags) { /** - * atmosphere boundaries + * atmosphere field boundaries */ - if constexpr (traits::has_member::value) { + if constexpr (traits::has_member::value) { const auto [sign, dim, xg_min, xg_max] = get_atm_extent(direction); const auto dd = static_cast(dim); boundaries_t box; @@ -748,7 +869,7 @@ namespace ntt { range_min[d] = intersect_range[d].first; range_max[d] = intersect_range[d].second; } - auto field_driver = m_pgen.FieldDriver(time); + auto atm_fields = m_pgen.AtmFields(time); std::size_t il_edge; if (sign > 0) { il_edge = range_min[dd] - N_GHOSTS; @@ -761,9 +882,9 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::AtmosphereBoundaries_kernel( + kernel::bc::EnforcedBoundaries_kernel( domain.fields.em, - field_driver, + atm_fields, domain.mesh.metric, il_edge, tags)); @@ -771,9 +892,9 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::AtmosphereBoundaries_kernel( + kernel::bc::EnforcedBoundaries_kernel( domain.fields.em, - field_driver, + atm_fields, domain.mesh.metric, il_edge, tags)); @@ -784,9 +905,9 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::AtmosphereBoundaries_kernel( + kernel::bc::EnforcedBoundaries_kernel( domain.fields.em, - field_driver, + atm_fields, domain.mesh.metric, il_edge, tags)); @@ -794,9 +915,9 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::AtmosphereBoundaries_kernel( + kernel::bc::EnforcedBoundaries_kernel( domain.fields.em, - field_driver, + atm_fields, domain.mesh.metric, il_edge, tags)); @@ -810,9 +931,9 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::AtmosphereBoundaries_kernel( + kernel::bc::EnforcedBoundaries_kernel( domain.fields.em, - field_driver, + atm_fields, domain.mesh.metric, il_edge, tags)); @@ -820,9 +941,9 @@ namespace ntt { Kokkos::parallel_for( "AtmosphereBCFields", range, - kernel::AtmosphereBoundaries_kernel( + kernel::bc::EnforcedBoundaries_kernel( domain.fields.em, - field_driver, + atm_fields, domain.mesh.metric, il_edge, tags)); @@ -834,87 +955,7 @@ namespace ntt { raise::Error("Invalid dimension", HERE); } } else { - raise::Error("Field driver not implemented in PGEN for atmosphere BCs", - HERE); - } - } - - void ConductorFieldsIn(dir::direction_t direction, - domain_t& domain, - BCTags tags) { - const auto sign = direction.get_sign(); - const auto dim = direction.get_dim(); - raise::ErrorIf( - dim != in::x1 and M::CoordType != Coord::Cart, - "Conductor BCs only implemented for x1 in non-cartesian coordinates", - HERE); - em normal_b_comp, tang_e_comp1, tang_e_comp2; - if (dim == in::x1) { - normal_b_comp = em::bx1; - tang_e_comp1 = em::ex2; - tang_e_comp2 = em::ex3; - } else if (dim == in::x2) { - normal_b_comp = em::bx2; - tang_e_comp1 = em::ex1; - tang_e_comp2 = em::ex3; - } else if (dim == in::x3) { - normal_b_comp = em::bx3; - tang_e_comp1 = em::ex1; - tang_e_comp2 = em::ex2; - } else { - raise::Error("Invalid dimension", HERE); - } - std::vector xi_min, xi_max; - const std::vector all_dirs { in::x1, in::x2, in::x3 }; - for (unsigned short d { 0 }; d < static_cast(M::Dim); ++d) { - const auto dd = all_dirs[d]; - if (dim == dd) { - if (sign > 0) { // + direction - xi_min.push_back(domain.mesh.n_all(dd) - N_GHOSTS); - xi_max.push_back(domain.mesh.n_all(dd)); - } else { // - direction - xi_min.push_back(0); - xi_max.push_back(N_GHOSTS); - } - } else { - xi_min.push_back(0); - xi_max.push_back(domain.mesh.n_all(dd)); - } - } - raise::ErrorIf(xi_min.size() != xi_max.size() or - xi_min.size() != static_cast(M::Dim), - "Invalid range size", - HERE); - std::vector comps; - if (tags & BC::E) { - comps.push_back(tang_e_comp1); - comps.push_back(tang_e_comp2); - } - if (tags & BC::B) { - comps.push_back(normal_b_comp); - } - for (const auto& comp : comps) { - if constexpr (M::Dim == Dim::_1D) { - Kokkos::deep_copy(Kokkos::subview(domain.fields.em, - std::make_pair(xi_min[0], xi_max[0]), - comp), - ZERO); - } else if constexpr (M::Dim == Dim::_2D) { - Kokkos::deep_copy(Kokkos::subview(domain.fields.em, - std::make_pair(xi_min[0], xi_max[0]), - std::make_pair(xi_min[1], xi_max[1]), - comp), - ZERO); - } else if constexpr (M::Dim == Dim::_3D) { - Kokkos::deep_copy(Kokkos::subview(domain.fields.em, - std::make_pair(xi_min[0], xi_max[0]), - std::make_pair(xi_min[1], xi_max[1]), - std::make_pair(xi_min[2], xi_max[2]), - comp), - ZERO); - } else { - raise::Error("Invalid dimension", HERE); - } + raise::Error("Atm fields not implemented in PGEN for atmosphere BCs", HERE); } } diff --git a/src/entity.cpp b/src/entity.cpp index 272635d68..79b2f1335 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -114,4 +114,4 @@ auto main(int argc, char* argv[]) -> int { } return 0; -} +} \ No newline at end of file diff --git a/src/framework/CMakeLists.txt b/src/framework/CMakeLists.txt index 241780575..8802f696b 100644 --- a/src/framework/CMakeLists.txt +++ b/src/framework/CMakeLists.txt @@ -1,42 +1,48 @@ # ------------------------------ # @defines: ntt_framework [STATIC/SHARED] +# # @sources: -# - parameters.cpp -# - simulation.cpp -# - domain/grid.cpp -# - domain/metadomain.cpp -# - domain/communications.cpp -# - domain/checkpoint.cpp -# - containers/particles.cpp -# - containers/fields.cpp -# - domain/output.cpp +# +# * parameters.cpp +# * simulation.cpp +# * domain/grid.cpp +# * domain/metadomain.cpp +# * domain/communications.cpp +# * domain/checkpoint.cpp +# * containers/particles.cpp +# * containers/fields.cpp +# * domain/output.cpp +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] -# - ntt_metrics [required] -# - ntt_kernels [required] -# - ntt_output [optional] +# +# * ntt_global [required] +# * ntt_metrics [required] +# * ntt_kernels [required] +# * ntt_output [optional] +# # @uses: -# - kokkos [required] -# - plog [required] -# - toml11 [required] -# - ADIOS2 [optional] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * toml11 [required] +# * ADIOS2 [optional] +# * mpi [optional] # ------------------------------ set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES - ${SRC_DIR}/parameters.cpp - ${SRC_DIR}/simulation.cpp - ${SRC_DIR}/domain/grid.cpp - ${SRC_DIR}/domain/metadomain.cpp - ${SRC_DIR}/domain/communications.cpp - ${SRC_DIR}/domain/checkpoint.cpp - ${SRC_DIR}/containers/particles.cpp - ${SRC_DIR}/containers/fields.cpp -) -if (${output}) +set(SOURCES + ${SRC_DIR}/parameters.cpp + ${SRC_DIR}/simulation.cpp + ${SRC_DIR}/domain/grid.cpp + ${SRC_DIR}/domain/metadomain.cpp + ${SRC_DIR}/domain/communications.cpp + ${SRC_DIR}/containers/particles.cpp + ${SRC_DIR}/containers/fields.cpp) +if(${output}) list(APPEND SOURCES ${SRC_DIR}/domain/output.cpp) list(APPEND SOURCES ${SRC_DIR}/domain/checkpoint.cpp) endif() @@ -51,7 +57,7 @@ add_dependencies(ntt_framework ${libs}) target_link_libraries(ntt_framework PUBLIC ${libs}) target_link_libraries(ntt_framework PRIVATE stdc++fs) -target_include_directories(ntt_framework +target_include_directories( + ntt_framework PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../ - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index f0c64c4ee..d78055824 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -4,13 +4,18 @@ #include "global.h" #include "arch/kokkos_aliases.h" +#include "utils/numeric.h" #include "utils/sorting.h" #include "framework/containers/species.h" #include #include +#include +#include +#include +#include #include #include @@ -26,162 +31,208 @@ namespace ntt { const Cooling& cooling, unsigned short npld) : ParticleSpecies(index, label, m, ch, maxnpart, pusher, use_gca, cooling, npld) { - i1 = array_t { label + "_i1", maxnpart }; - i1_h = Kokkos::create_mirror_view(i1); - dx1 = array_t { label + "_dx1", maxnpart }; - dx1_h = Kokkos::create_mirror_view(dx1); - - i1_prev = array_t { label + "_i1_prev", maxnpart }; - dx1_prev = array_t { label + "_dx1_prev", maxnpart }; - - ux1 = array_t { label + "_ux1", maxnpart }; - ux1_h = Kokkos::create_mirror_view(ux1); - ux2 = array_t { label + "_ux2", maxnpart }; - ux2_h = Kokkos::create_mirror_view(ux2); - ux3 = array_t { label + "_ux3", maxnpart }; - ux3_h = Kokkos::create_mirror_view(ux3); - - weight = array_t { label + "_w", maxnpart }; - weight_h = Kokkos::create_mirror_view(weight); - - tag = array_t { label + "_tag", maxnpart }; - tag_h = Kokkos::create_mirror_view(tag); - - for (unsigned short n { 0 }; n < npld; ++n) { - pld.push_back(array_t("pld", maxnpart)); - pld_h.push_back(Kokkos::create_mirror_view(pld[n])); - } - if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - i2 = array_t { label + "_i2", maxnpart }; - i2_h = Kokkos::create_mirror_view(i2); - dx2 = array_t { label + "_dx2", maxnpart }; - dx2_h = Kokkos::create_mirror_view(dx2); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + i1 = array_t { label + "_i1", maxnpart }; + dx1 = array_t { label + "_dx1", maxnpart }; + i1_prev = array_t { label + "_i1_prev", maxnpart }; + dx1_prev = array_t { label + "_dx1_prev", maxnpart }; + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + i2 = array_t { label + "_i2", maxnpart }; + dx2 = array_t { label + "_dx2", maxnpart }; i2_prev = array_t { label + "_i2_prev", maxnpart }; dx2_prev = array_t { label + "_dx2_prev", maxnpart }; } - if ((D == Dim::_2D) && (C != Coord::Cart)) { - phi = array_t { label + "_phi", maxnpart }; - phi_h = Kokkos::create_mirror_view(phi); - } if constexpr (D == Dim::_3D) { - i3 = array_t { label + "_i3", maxnpart }; - i3_h = Kokkos::create_mirror_view(i3); - dx3 = array_t { label + "_dx3", maxnpart }; - dx3_h = Kokkos::create_mirror_view(dx3); - + i3 = array_t { label + "_i3", maxnpart }; + dx3 = array_t { label + "_dx3", maxnpart }; i3_prev = array_t { label + "_i3_prev", maxnpart }; dx3_prev = array_t { label + "_dx3_prev", maxnpart }; } + + ux1 = array_t { label + "_ux1", maxnpart }; + ux2 = array_t { label + "_ux2", maxnpart }; + ux3 = array_t { label + "_ux3", maxnpart }; + + weight = array_t { label + "_w", maxnpart }; + + tag = array_t { label + "_tag", maxnpart }; + + if (npld > 0) { + pld = array_t { label + "_pld", maxnpart, npld }; + } + + if ((D == Dim::_2D) && (C != Coord::Cart)) { + phi = array_t { label + "_phi", maxnpart }; + } } template - auto Particles::npart_per_tag() const -> std::vector { + auto Particles::NpartsPerTagAndOffsets() const + -> std::pair, array_t> { auto this_tag = tag; - array_t npart_tag("npart_tags", ntags()); + const auto num_tags = ntags(); + array_t npptag { "nparts_per_tag", ntags() }; - auto npart_tag_scatter = Kokkos::Experimental::create_scatter_view(npart_tag); + // count # of particles per each tag + auto npptag_scat = Kokkos::Experimental::create_scatter_view(npptag); Kokkos::parallel_for( "NpartPerTag", - npart(), + rangeActiveParticles(), Lambda(index_t p) { - auto npart_tag_scatter_access = npart_tag_scatter.access(); - npart_tag_scatter_access((int)(this_tag(p))) += 1; + auto npptag_acc = npptag_scat.access(); + if (this_tag(p) < 0 || this_tag(p) >= num_tags) { + raise::KernelError(HERE, "Invalid tag value"); + } + npptag_acc(this_tag(p)) += 1; }); - Kokkos::Experimental::contribute(npart_tag, npart_tag_scatter); + Kokkos::Experimental::contribute(npptag, npptag_scat); + + // copy the count to a vector on the host + auto npptag_h = Kokkos::create_mirror_view(npptag); + Kokkos::deep_copy(npptag_h, npptag); + std::vector npptag_vec(num_tags); + for (auto t { 0u }; t < num_tags; ++t) { + npptag_vec[t] = npptag_h(t); + } - auto npart_tag_host = Kokkos::create_mirror_view(npart_tag); - Kokkos::deep_copy(npart_tag_host, npart_tag); + // count the offsets on the host and copy to device + array_t tag_offsets("tag_offsets", num_tags - 3); + auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); - std::vector npart_tag_vec; - for (std::size_t t { 0 }; t < ntags(); ++t) { - npart_tag_vec.push_back(npart_tag_host(t)); + tag_offsets_h(0) = npptag_vec[2]; // offset for tag = 3 + for (auto t { 1u }; t < num_tags - 3; ++t) { + tag_offsets_h(t) = npptag_vec[t + 2] + tag_offsets_h(t - 1); } - return npart_tag_vec; + Kokkos::deep_copy(tag_offsets, tag_offsets_h); + + return { npptag_vec, tag_offsets }; + } + + template + void RemoveDeadInArray(array_t& arr, + const array_t& indices_alive) { + auto n_alive = indices_alive.extent(0); + auto buffer = Kokkos::View("buffer", n_alive); + Kokkos::parallel_for( + "PopulateBufferAlive", + n_alive, + Lambda(index_t p) { buffer(p) = arr(indices_alive(p)); }); + + Kokkos::deep_copy( + Kokkos::subview(arr, std::make_pair(static_cast(0), n_alive)), + buffer); + } + + template + void RemoveDeadInArray(array_t& arr, + const array_t& indices_alive) { + auto n_alive = indices_alive.extent(0); + auto buffer = array_t { "buffer", n_alive, arr.extent(1) }; + Kokkos::parallel_for( + "PopulateBufferAlive", + CreateRangePolicy({ 0, 0 }, { n_alive, arr.extent(1) }), + Lambda(index_t p, index_t l) { buffer(p, l) = arr(indices_alive(p), l); }); + + Kokkos::deep_copy( + Kokkos::subview(arr, + std::make_pair(static_cast(0), n_alive), + Kokkos::ALL), + buffer); } template - auto Particles::SortByTags() -> std::vector { - if (npart() == 0 || is_sorted()) { - return npart_per_tag(); - } - using KeyType = array_t; - using BinOp = sort::BinTag; - BinOp bin_op(ntags()); - auto slice = range_tuple_t(0, npart()); - Kokkos::BinSort Sorter(Kokkos::subview(tag, slice), bin_op, false); - Sorter.create_permute_vector(); - - Sorter.sort(Kokkos::subview(i1, slice)); - Sorter.sort(Kokkos::subview(dx1, slice)); - Sorter.sort(Kokkos::subview(i1_prev, slice)); - Sorter.sort(Kokkos::subview(dx1_prev, slice)); - Sorter.sort(Kokkos::subview(ux1, slice)); - Sorter.sort(Kokkos::subview(ux2, slice)); - Sorter.sort(Kokkos::subview(ux3, slice)); - - Sorter.sort(Kokkos::subview(tag, slice)); - Sorter.sort(Kokkos::subview(weight, slice)); - - for (unsigned short n { 0 }; n < npld(); ++n) { - Sorter.sort(Kokkos::subview(pld[n], slice)); - } + void Particles::RemoveDead() { + const auto n_part = npart(); + std::size_t n_alive = 0, n_dead = 0; + auto& this_tag = tag; + + Kokkos::parallel_reduce( + "CountDeadAlive", + rangeActiveParticles(), + Lambda(index_t p, std::size_t & nalive, std::size_t & ndead) { + nalive += (this_tag(p) == ParticleTag::alive); + ndead += (this_tag(p) == ParticleTag::dead); + if (this_tag(p) != ParticleTag::alive and this_tag(p) != ParticleTag::dead) { + raise::KernelError(HERE, "wrong particle tag"); + } + }, + n_alive, + n_dead); + + array_t indices_alive { "indices_alive", n_alive }; + array_t alive_counter { "counter_alive", 1 }; - if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - Sorter.sort(Kokkos::subview(i2, slice)); - Sorter.sort(Kokkos::subview(dx2, slice)); + Kokkos::parallel_for( + "AliveIndices", + rangeActiveParticles(), + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive) { + const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); + indices_alive(idx) = p; + } + }); - Sorter.sort(Kokkos::subview(i2_prev, slice)); - Sorter.sort(Kokkos::subview(dx2_prev, slice)); + { + auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); + Kokkos::deep_copy(alive_counter_h, alive_counter); + raise::ErrorIf(alive_counter_h(0) != n_alive, + "error in finding alive particle indices", + HERE); } - if constexpr (D == Dim::_3D) { - Sorter.sort(Kokkos::subview(i3, slice)); - Sorter.sort(Kokkos::subview(dx3, slice)); - Sorter.sort(Kokkos::subview(i3_prev, slice)); - Sorter.sort(Kokkos::subview(dx3_prev, slice)); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + RemoveDeadInArray(i1, indices_alive); + RemoveDeadInArray(i1_prev, indices_alive); + RemoveDeadInArray(dx1, indices_alive); + RemoveDeadInArray(dx1_prev, indices_alive); } - if ((D == Dim::_2D) && (C != Coord::Cart)) { - Sorter.sort(Kokkos::subview(phi, slice)); + if constexpr (D == Dim::_2D or D == Dim::_3D) { + RemoveDeadInArray(i2, indices_alive); + RemoveDeadInArray(i2_prev, indices_alive); + RemoveDeadInArray(dx2, indices_alive); + RemoveDeadInArray(dx2_prev, indices_alive); } - const auto np_per_tag = npart_per_tag(); - set_npart(np_per_tag[(short)(ParticleTag::alive)]); + if constexpr (D == Dim::_3D) { + RemoveDeadInArray(i3, indices_alive); + RemoveDeadInArray(i3_prev, indices_alive); + RemoveDeadInArray(dx3, indices_alive); + RemoveDeadInArray(dx3_prev, indices_alive); + } - m_is_sorted = true; - return np_per_tag; - } + RemoveDeadInArray(ux1, indices_alive); + RemoveDeadInArray(ux2, indices_alive); + RemoveDeadInArray(ux3, indices_alive); + RemoveDeadInArray(weight, indices_alive); - template - void Particles::SyncHostDevice() { - Kokkos::deep_copy(i1_h, i1); - Kokkos::deep_copy(dx1_h, dx1); - Kokkos::deep_copy(ux1_h, ux1); - Kokkos::deep_copy(ux2_h, ux2); - Kokkos::deep_copy(ux3_h, ux3); - - Kokkos::deep_copy(tag_h, tag); - Kokkos::deep_copy(weight_h, weight); - - for (auto n { 0 }; n < npld(); ++n) { - Kokkos::deep_copy(pld_h[n], pld[n]); + if constexpr (D == Dim::_2D && C != Coord::Cart) { + RemoveDeadInArray(phi, indices_alive); } - if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - Kokkos::deep_copy(i2_h, i2); - Kokkos::deep_copy(dx2_h, dx2); - } - if constexpr (D == Dim::_3D) { - Kokkos::deep_copy(i3_h, i3); - Kokkos::deep_copy(dx3_h, dx3); + if (npld() > 0) { + RemoveDeadInArray(pld, indices_alive); } - if ((D == Dim::_2D) && (C != Coord::Cart)) { - Kokkos::deep_copy(phi_h, phi); - } + Kokkos::Experimental::fill( + "TagAliveParticles", + AccelExeSpace(), + Kokkos::subview(this_tag, + std::make_pair(static_cast(0), n_alive)), + ParticleTag::alive); + + Kokkos::Experimental::fill( + "TagDeadParticles", + AccelExeSpace(), + Kokkos::subview(this_tag, std::make_pair(n_alive, n_alive + n_dead)), + ParticleTag::dead); + + set_npart(n_alive); + m_is_sorted = true; } template struct Particles; diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index b4831b64a..d84bd0cc9 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -48,31 +48,22 @@ namespace ntt { public: // Cell indices of the current particle - array_t i1, i2, i3; + array_t i1, i2, i3; // Displacement of a particle within the cell - array_t dx1, dx2, dx3; + array_t dx1, dx2, dx3; // Three spatial components of the covariant 4-velocity (physical units) - array_t ux1, ux2, ux3; + array_t ux1, ux2, ux3; // Particle weights. - array_t weight; + array_t weight; // Previous timestep coordinates - array_t i1_prev, i2_prev, i3_prev; - array_t dx1_prev, dx2_prev, dx3_prev; + array_t i1_prev, i2_prev, i3_prev; + array_t dx1_prev, dx2_prev, dx3_prev; // Array to tag the particles - array_t tag; - // Array to store the particle load - std::vector> pld; + array_t tag; + // Array to store the particle payloads + array_t pld; // phi coordinate (for axisymmetry) - array_t phi; - - // host mirrors - array_mirror_t i1_h, i2_h, i3_h; - array_mirror_t dx1_h, dx2_h, dx3_h; - array_mirror_t ux1_h, ux2_h, ux3_h; - array_mirror_t weight_h; - array_mirror_t phi_h; - array_mirror_t tag_h; - std::vector> pld_h; + array_t phi; // for empty allocation Particles() {} @@ -178,19 +169,26 @@ namespace ntt { footprint += sizeof(prtldx_t) * dx2_prev.extent(0); footprint += sizeof(prtldx_t) * dx3_prev.extent(0); footprint += sizeof(short) * tag.extent(0); - for (auto& p : pld) { - footprint += sizeof(real_t) * p.extent(0); - } - footprint += sizeof(real_t) * phi.extent(0); + footprint += sizeof(real_t) * pld.extent(0) * pld.extent(1); + footprint += sizeof(real_t) * phi.extent(0); return footprint; } /** * @brief Count the number of particles with a specific tag. - * @return The vector of counts for each tag. + * @return The vector of counts for each tag + offsets + * @note For instance, given the counts: 0 -> n0, 1 -> n1, 2 -> n2, 3 -> n3, + * ... it returns: + * ... [n0, n1, n2, n3, ...] of size ntags + * ... [n2, n2 + n3, n2 + n3 + n4, ...] of size ntags - 3 + * ... so in buffer array: + * ... tag=2 particles are offset by 0 + * ... tag=3 particles are offset by n2 + * ... tag=4 particles are offset by n2 + n3 + * ... etc. */ - [[nodiscard]] - auto npart_per_tag() const -> std::vector; + auto NpartsPerTagAndOffsets() const + -> std::pair, array_t>; /* setters -------------------------------------------------------------- */ /** @@ -213,15 +211,16 @@ namespace ntt { } /** - * @brief Sort particles by their tags. - * @return The vector of counts per each tag. + * @brief Move dead particles to the end of arrays */ - auto SortByTags() -> std::vector; + void RemoveDead(); /** * @brief Copy particle data from device to host. */ void SyncHostDevice(); + + // void PrintTags(); }; } // namespace ntt diff --git a/src/framework/domain/checkpoint.cpp b/src/framework/domain/checkpoint.cpp index 3d309c090..6dfb137db 100644 --- a/src/framework/domain/checkpoint.cpp +++ b/src/framework/domain/checkpoint.cpp @@ -242,13 +242,13 @@ namespace ntt { local_domain->species[s].weight); auto nplds = local_domain->species[s].npld(); - for (auto p { 0u }; p < nplds; ++p) { - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_pld%d", s + 1, p + 1), - glob_tot, - offset, - npart, - local_domain->species[s].pld[p]); + if (nplds > 0) { + g_checkpoint_writer.saveParticlePayloads(fmt::format("s%d_plds", s + 1), + nplds, + glob_tot, + offset, + npart, + local_domain->species[s].pld); } } } @@ -451,14 +451,16 @@ namespace ntt { domain.species[s].weight, loc_npart, offset_npart); - for (auto p { 0u }; p < domain.species[s].npld(); ++p) { - checkpoint::ReadParticleData(io, - reader, - fmt::format("pld%d", p + 1), - s, - domain.species[s].pld[p], - loc_npart, - offset_npart); + + const auto nplds = domain.species[s].npld(); + if (nplds > 0) { + checkpoint::ReadParticlePayloads(io, + reader, + s, + domain.species[s].pld, + nplds, + loc_npart, + offset_npart); } domain.species[s].set_npart(loc_npart); } // species loop diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 63dd8271a..e5bc2d21e 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -14,15 +14,20 @@ #include "enums.h" #include "global.h" +#include "arch/directions.h" #include "arch/kokkos_aliases.h" #include "arch/mpi_aliases.h" +#include "arch/mpi_tags.h" #include "utils/error.h" #include "framework/containers/particles.h" +#include "kernels/comm.hpp" + #include #include +#include #include namespace comm { @@ -52,10 +57,11 @@ namespace comm { (recv_rank == rank && recv_idx != idx), "Multiple-domain single-rank communication not yet implemented", HERE); - if ((send_idx == idx) and (recv_idx == idx)) { // trivial copy if sending to self and receiving from self + if (not additive) { + // simply filling the ghost cells if constexpr (D == Dim::_1D) { Kokkos::deep_copy(Kokkos::subview(fld, recv_slice[0], comps), @@ -65,6 +71,7 @@ namespace comm { Kokkos::subview(fld, recv_slice[0], recv_slice[1], comps), Kokkos::subview(fld, send_slice[0], send_slice[1], comps)); } else if constexpr (D == Dim::_3D) { + Kokkos::deep_copy( Kokkos::subview(fld, recv_slice[0], recv_slice[1], recv_slice[2], comps), Kokkos::subview(fld, send_slice[0], send_slice[1], send_slice[2], comps)); @@ -177,6 +184,7 @@ namespace comm { comps.second - comps.first); } } + if (send_rank >= 0 && recv_rank >= 0) { MPI_Sendrecv(send_fld.data(), nsend, @@ -197,6 +205,7 @@ namespace comm { send_rank, 0, MPI_COMM_WORLD); + } else if (recv_rank >= 0) { MPI_Recv(recv_fld.data(), nrecv, @@ -208,7 +217,9 @@ namespace comm { } else { raise::Error("CommunicateField called with negative ranks", HERE); } + if (recv_rank >= 0) { + // !TODO: perhaps directly recv to the fld? if (not additive) { if constexpr (D == Dim::_1D) { @@ -276,50 +287,10 @@ namespace comm { } } - template - void CommunicateParticleQuantity(array_t& arr, - int send_rank, - int recv_rank, - const range_tuple_t& send_slice, - const range_tuple_t& recv_slice) { - const std::size_t send_count = send_slice.second - send_slice.first; - const std::size_t recv_count = recv_slice.second - recv_slice.first; - if ((send_rank >= 0) and (recv_rank >= 0) and (send_count > 0) and - (recv_count > 0)) { - MPI_Sendrecv(arr.data() + send_slice.first, - send_count, - mpi::get_type(), - send_rank, - 0, - arr.data() + recv_slice.first, - recv_count, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } else if ((send_rank >= 0) and (send_count > 0)) { - MPI_Send(arr.data() + send_slice.first, - send_count, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - } else if ((recv_rank >= 0) and (recv_count > 0)) { - MPI_Recv(arr.data() + recv_slice.first, - recv_count, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } - } - - void ParticleSendRecvCount(int send_rank, - int recv_rank, - const std::size_t& send_count, - std::size_t& recv_count) { + void ParticleSendRecvCount(int send_rank, + int recv_rank, + std::size_t send_count, + std::size_t& recv_count) { if ((send_rank >= 0) && (recv_rank >= 0)) { MPI_Sendrecv(&send_count, 1, @@ -349,96 +320,233 @@ namespace comm { } template - auto CommunicateParticles(Particles& species, - int send_rank, - int recv_rank, - const range_tuple_t& send_slice, - std::size_t& index_last) -> std::size_t { - if ((send_rank < 0) && (recv_rank < 0)) { - raise::Error("No send or recv in CommunicateParticles", HERE); - } - std::size_t recv_count { 0 }; - ParticleSendRecvCount(send_rank, - recv_rank, - send_slice.second - send_slice.first, - recv_count); - - raise::FatalIf((index_last + recv_count) >= species.maxnpart(), - "Too many particles to receive (cannot fit into maxptl)", - HERE); - const auto recv_slice = range_tuple_t({ index_last, index_last + recv_count }); - - CommunicateParticleQuantity(species.i1, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.dx1, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.i1_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.dx1_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - if constexpr (D == Dim::_2D || D == Dim::_3D) { - CommunicateParticleQuantity(species.i2, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.dx2, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.i2_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.dx2_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - } - if constexpr (D == Dim::_3D) { - CommunicateParticleQuantity(species.i3, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.dx3, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.i3_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - CommunicateParticleQuantity(species.dx3_prev, - send_rank, - recv_rank, - send_slice, - recv_slice); - } - CommunicateParticleQuantity(species.ux1, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.ux2, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.ux3, send_rank, recv_rank, send_slice, recv_slice); - CommunicateParticleQuantity(species.weight, - send_rank, - recv_rank, - send_slice, - recv_slice); - if constexpr (D == Dim::_2D and C != Coord::Cart) { - CommunicateParticleQuantity(species.phi, - send_rank, - recv_rank, - send_slice, - recv_slice); + void CommunicateParticles(Particles& species, + const array_t& outgoing_indices, + const array_t& tag_offsets, + const std::vector& npptag_vec, + const std::vector& npptag_recv_vec, + const std::vector& send_ranks, + const std::vector& recv_ranks, + const dir::dirs_t& dirs_to_comm) { + // number of arrays of each type to send/recv + const unsigned short NREALS = 4 + static_cast( + D == Dim::_2D and C != Coord::Cart); + const unsigned short NINTS = 2 * static_cast(D); + const unsigned short NPRTLDX = 2 * static_cast(D); + const unsigned short NPLDS = species.npld(); + + // buffers to store recv data + const auto npart_alive = npptag_vec[ParticleTag::alive]; + const auto npart_dead = npptag_vec[ParticleTag::dead]; + const auto npart_send = outgoing_indices.extent(0) - npart_dead; + const auto npart_recv = std::accumulate(npptag_recv_vec.begin(), + npptag_recv_vec.end(), + static_cast(0)); + array_t recv_buff_int { "recv_buff_int", npart_recv * NINTS }; + array_t recv_buff_real { "recv_buff_real", npart_recv * NREALS }; + array_t recv_buff_prtldx { "recv_buff_prtldx", npart_recv * NPRTLDX }; + array_t recv_buff_pld; + + if (NPLDS > 0) { + recv_buff_pld = array_t { "recv_buff_pld", npart_recv * NPLDS }; } - for (auto p { 0 }; p < species.npld(); ++p) { - CommunicateParticleQuantity(species.pld[p], - send_rank, - recv_rank, - send_slice, - recv_slice); + + auto iteration = 0; + auto current_received = 0; + + for (const auto& direction : dirs_to_comm) { + const auto send_rank = send_ranks[iteration]; + const auto recv_rank = recv_ranks[iteration]; + const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto npart_send_in = npptag_vec[tag_send]; + const auto npart_recv_in = npptag_recv_vec[tag_recv - 2]; + if (send_rank < 0 and recv_rank < 0) { + continue; + } + array_t send_buff_int { "send_buff_int", npart_send_in * NINTS }; + array_t send_buff_real { "send_buff_real", npart_send_in * NREALS }; + array_t send_buff_prtldx { "send_buff_prtldx", + npart_send_in * NPRTLDX }; + array_t send_buff_pld; + if (NPLDS > 0) { + send_buff_pld = array_t { "send_buff_pld", npart_send_in * NPLDS }; + } + + auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); + Kokkos::deep_copy(tag_offsets_h, tag_offsets); + + std::size_t idx_offset = npart_dead; + if (tag_send > 2) { + idx_offset += tag_offsets_h(tag_send - 3); + } + // clang-format off + Kokkos::parallel_for( + "PopulatePrtlSendBuffer", + npart_send_in, + kernel::comm::PopulatePrtlSendBuffer_kernel( + send_buff_int, send_buff_real, send_buff_prtldx, send_buff_pld, + NINTS, NREALS, NPRTLDX, NPLDS, idx_offset, + species.i1, species.i1_prev, species.dx1, species.dx1_prev, + species.i2, species.i2_prev, species.dx2, species.dx2_prev, + species.i3, species.i3_prev, species.dx3, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.weight, species.phi, species.pld, species.tag, + outgoing_indices) + ); + // clang-format on + + const auto recv_offset_int = current_received * NINTS; + const auto recv_offset_real = current_received * NREALS; + const auto recv_offset_prtldx = current_received * NPRTLDX; + const auto recv_offset_pld = current_received * NPLDS; + + if ((send_rank >= 0) and (recv_rank >= 0) and (npart_send_in > 0) and + (npart_recv_in > 0)) { + raise::ErrorIf(recv_offset_int + npart_recv_in * NINTS > + recv_buff_int.extent(0), + "incorrect # of recv particles", + HERE); + MPI_Sendrecv(send_buff_int.data(), + npart_send_in * NINTS, + mpi::get_type(), + send_rank, + 0, + recv_buff_int.data() + recv_offset_int, + npart_recv_in * NINTS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buff_real.data(), + npart_send_in * NREALS, + mpi::get_type(), + send_rank, + 0, + recv_buff_real.data() + recv_offset_real, + npart_recv_in * NREALS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Sendrecv(send_buff_prtldx.data(), + npart_send_in * NPRTLDX, + mpi::get_type(), + send_rank, + 0, + recv_buff_prtldx.data() + recv_offset_prtldx, + npart_recv_in * NPRTLDX, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + if (NPLDS > 0) { + MPI_Sendrecv(send_buff_pld.data(), + npart_send_in * NPLDS, + mpi::get_type(), + send_rank, + 0, + recv_buff_pld.data() + recv_offset_pld, + npart_recv_in * NPLDS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } else if ((send_rank >= 0) and (npart_send_in > 0)) { + MPI_Send(send_buff_int.data(), + npart_send_in * NINTS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + MPI_Send(send_buff_real.data(), + npart_send_in * NREALS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + MPI_Send(send_buff_prtldx.data(), + npart_send_in * NPRTLDX, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + if (NPLDS > 0) { + MPI_Send(send_buff_pld.data(), + npart_send_in * NPLDS, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); + } + } else if ((recv_rank >= 0) and (npart_recv_in > 0)) { + raise::ErrorIf(recv_offset_int + npart_recv_in * NINTS > + recv_buff_int.extent(0), + "incorrect # of recv particles", + HERE); + MPI_Recv(recv_buff_int.data() + recv_offset_int, + npart_recv_in * NINTS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Recv(recv_buff_real.data() + recv_offset_real, + npart_recv_in * NREALS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Recv(recv_buff_prtldx.data() + recv_offset_prtldx, + npart_recv_in * NPRTLDX, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + if (NPLDS > 0) { + MPI_Recv(recv_buff_pld.data() + recv_offset_pld, + npart_recv_in * NPLDS, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + current_received += npart_recv_in; + iteration++; + + } // end direction loop + + // clang-format off + Kokkos::parallel_for( + "PopulateFromRecvBuffer", + npart_recv, + kernel::comm::ExtractReceivedPrtls_kernel( + recv_buff_int, recv_buff_real, recv_buff_prtldx, recv_buff_pld, + NINTS, NREALS, NPRTLDX, NPLDS, + species.npart(), + species.i1, species.i1_prev, species.dx1, species.dx1_prev, + species.i2, species.i2_prev, species.dx2, species.dx2_prev, + species.i3, species.i3_prev, species.dx3, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.weight, species.phi, species.pld, species.tag, + outgoing_indices) + ); + // clang-format on + + const auto npart = species.npart(); + const auto npart_holes = outgoing_indices.extent(0); + if (npart_recv > npart_holes) { + species.set_npart(npart + npart_recv - npart_holes); } - return recv_count; } } // namespace comm diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index 60524eedd..7dc5d285a 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -20,10 +20,13 @@ #include "arch/mpi_tags.h" #include "framework/domain/comm_mpi.hpp" + #include "kernels/comm.hpp" #else #include "framework/domain/comm_nompi.hpp" #endif +#include + #include #include @@ -86,8 +89,8 @@ namespace ntt { } else { // no communication necessary return { - {0, -1}, - {0, -1} + { 0, -1 }, + { 0, -1 } }; } #if defined(MPI_ENABLED) @@ -110,8 +113,8 @@ namespace ntt { (void)send_rank; (void)recv_rank; return { - {send_ind, send_rank}, - {recv_ind, recv_rank} + { send_ind, send_rank }, + { recv_ind, recv_rank } }; } @@ -129,8 +132,8 @@ namespace ntt { const auto is_receiving = (recv_rank >= 0); if (not(is_sending or is_receiving)) { return { - {{ 0, -1 }, {}}, - {{ 0, -1 }, {}} + { { 0, -1 }, {} }, + { { 0, -1 }, {} } }; } auto send_slice = std::vector {}; @@ -196,8 +199,8 @@ namespace ntt { } return { - {{ send_ind, send_rank }, send_slice}, - {{ recv_ind, recv_rank }, recv_slice}, + { { send_ind, send_rank }, send_slice }, + { { recv_ind, recv_rank }, recv_slice }, }; } @@ -492,157 +495,151 @@ namespace ntt { } template - void Metadomain::CommunicateParticles(Domain& domain, - timer::Timers* timers) { - raise::ErrorIf(timers == nullptr, - "Timers not passed when Comm::Prtl called", - HERE); + void Metadomain::CommunicateParticles(Domain& domain) { +#if defined(MPI_ENABLED) logger::Checkpoint("Communicating particles\n", HERE); for (auto& species : domain.species) { - // at this point particles should already by tagged in the pusher - timers->start("Sorting"); - const auto npart_per_tag = species.SortByTags(); - timers->stop("Sorting"); -#if defined(MPI_ENABLED) - timers->start("Communications"); - // only necessary when MPI is enabled - /** - * index_last - * | - * alive new dead tag1 tag2 v dead - * [ 11111111 000000000 222222222 3333333 .... nnnnnnn 00000000 ... ] - * ^ ^ - * | | - * tag_offset[tag1] -----+ +----- tag_offset[tag1] + npart_per_tag[tag1] - * "send_pmin" "send_pmax" (after last element) - */ - auto tag_offset { npart_per_tag }; - for (std::size_t i { 1 }; i < tag_offset.size(); ++i) { - tag_offset[i] += tag_offset[i - 1]; - } - for (std::size_t i { 0 }; i < tag_offset.size(); ++i) { - tag_offset[i] -= npart_per_tag[i]; - } - auto index_last = tag_offset[tag_offset.size() - 1] + - npart_per_tag[npart_per_tag.size() - 1]; - for (auto& direction : dir::Directions::all) { + const auto ntags = species.ntags(); + + // at this point particles should already be tagged in the pusher + auto [npptag_vec, tag_offsets] = species.NpartsPerTagAndOffsets(); + const auto npart_dead = npptag_vec[ParticleTag::dead]; + const auto npart_alive = npptag_vec[ParticleTag::alive]; + + const auto npart = species.npart(); + const auto npart_holes = npart - npart_alive; + + // # of particles to receive per each tag (direction) + std::vector npptag_recv_vec(ntags - 2, 0); + // coordinate shifts per each direction + array_t shifts_in_x1 { "shifts_in_x1", ntags - 2 }; + array_t shifts_in_x2 { "shifts_in_x2", ntags - 2 }; + array_t shifts_in_x3 { "shifts_in_x3", ntags - 2 }; + auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); + auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); + auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); + + // all directions requiring communication + dir::dirs_t dirs_to_comm; + + // ranks & indices of meshblock to send/recv from + std::vector send_ranks, send_inds; + std::vector recv_ranks, recv_inds; + + // total # of reaceived particles from all directions + std::size_t npart_recv = 0u; + + for (const auto& direction : dir::Directions::all) { + // tags corresponding to the direction (both send & recv) + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); + + // get indices & ranks of send/recv meshblocks const auto [send_params, recv_params] = GetSendRecvParams(this, domain, direction, true); const auto [send_indrank, send_slice] = send_params; const auto [recv_indrank, recv_slice] = recv_params; const auto [send_ind, send_rank] = send_indrank; const auto [recv_ind, recv_rank] = recv_indrank; - if (send_rank < 0 and recv_rank < 0) { + + // skip if no communication is necessary + const auto is_sending = (send_rank >= 0); + const auto is_receiving = (recv_rank >= 0); + if (not is_sending and not is_receiving) { continue; } - const auto send_dir_tag = mpi::PrtlSendTag::dir2tag(direction); - const auto nsend = npart_per_tag[send_dir_tag]; - const auto send_pmin = tag_offset[send_dir_tag]; - const auto send_pmax = tag_offset[send_dir_tag] + nsend; - const auto recv_count = comm::CommunicateParticles( - species, - send_rank, - recv_rank, - { send_pmin, send_pmax }, - index_last); - if (recv_count > 0) { - if constexpr (D == Dim::_1D) { - int shift_in_x1 { 0 }; - if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active(in::x1); - } - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i1_prev = species.i1_prev; - Kokkos::parallel_for( - "CommunicateParticles", - recv_count, - Lambda(index_t p) { - this_tag(index_last + p) = ParticleTag::alive; - this_i1(index_last + p) += shift_in_x1; - this_i1_prev(index_last + p) += shift_in_x1; - }); - } else if constexpr (D == Dim::_2D) { - int shift_in_x1 { 0 }, shift_in_x2 { 0 }; - if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active()[0]; - } - if ((-direction)[1] == -1) { - shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); - } else if ((-direction)[1] == 1) { - shift_in_x2 = domain.mesh.n_active(in::x2); - } - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; - Kokkos::parallel_for( - "CommunicateParticles", - recv_count, - Lambda(index_t p) { - this_tag(index_last + p) = ParticleTag::alive; - this_i1(index_last + p) += shift_in_x1; - this_i2(index_last + p) += shift_in_x2; - this_i1_prev(index_last + p) += shift_in_x1; - this_i2_prev(index_last + p) += shift_in_x2; - }); - } else if constexpr (D == Dim::_3D) { - int shift_in_x1 { 0 }, shift_in_x2 { 0 }, shift_in_x3 { 0 }; - if ((-direction)[0] == -1) { - shift_in_x1 = -subdomain(recv_ind).mesh.n_active(in::x1); - } else if ((-direction)[0] == 1) { - shift_in_x1 = domain.mesh.n_active(in::x1); + dirs_to_comm.push_back(direction); + send_ranks.push_back(send_rank); + recv_ranks.push_back(recv_rank); + send_inds.push_back(send_ind); + recv_inds.push_back(recv_ind); + + // record the # of particles to-be-sent + const auto nsend = npptag_vec[tag_send]; + + // request the # of particles to-be-received ... + // ... and send the # of particles to-be-sent + std::size_t nrecv = 0; + comm::ParticleSendRecvCount(send_rank, recv_rank, nsend, nrecv); + npart_recv += nrecv; + npptag_recv_vec[tag_recv - 2] = nrecv; + + raise::ErrorIf((npart + npart_recv) >= species.maxnpart(), + "Too many particles to receive (cannot fit into maxptl)", + HERE); + + // if sending, record displacements to apply before + // ... tag_send - 2: because we only shift tags > 2 (i.e. no dead/alive) + if (is_sending) { + if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { + if (direction[0] == -1) { + // sending backwards in x1 (add sx1 of target meshblock) + shifts_in_x1_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( + in::x1); + } else if (direction[0] == 1) { + // sending forward in x1 (subtract sx1 of source meshblock) + shifts_in_x1_h(tag_send - 2) = -domain.mesh.n_active(in::x1); } - if ((-direction)[1] == -1) { - shift_in_x2 = -subdomain(recv_ind).mesh.n_active(in::x2); - } else if ((-direction)[1] == 1) { - shift_in_x2 = domain.mesh.n_active(in::x2); + } + if constexpr (D == Dim::_2D || D == Dim::_3D) { + if (direction[1] == -1) { + shifts_in_x2_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( + in::x2); + } else if (direction[1] == 1) { + shifts_in_x2_h(tag_send - 2) = -domain.mesh.n_active(in::x2); } - if ((-direction)[2] == -1) { - shift_in_x3 = -subdomain(recv_ind).mesh.n_active(in::x3); - } else if ((-direction)[2] == 1) { - shift_in_x3 = domain.mesh.n_active(in::x3); + } + if constexpr (D == Dim::_3D) { + if (direction[2] == -1) { + shifts_in_x3_h(tag_send - 2) = subdomain(send_ind).mesh.n_active( + in::x3); + } else if (direction[2] == 1) { + shifts_in_x3_h(tag_send - 2) = -domain.mesh.n_active(in::x3); } - auto& this_tag = species.tag; - auto& this_i1 = species.i1; - auto& this_i2 = species.i2; - auto& this_i3 = species.i3; - auto& this_i1_prev = species.i1_prev; - auto& this_i2_prev = species.i2_prev; - auto& this_i3_prev = species.i3_prev; - Kokkos::parallel_for( - "CommunicateParticles", - recv_count, - Lambda(index_t p) { - this_tag(index_last + p) = ParticleTag::alive; - this_i1(index_last + p) += shift_in_x1; - this_i2(index_last + p) += shift_in_x2; - this_i3(index_last + p) += shift_in_x3; - this_i1_prev(index_last + p) += shift_in_x1; - this_i2_prev(index_last + p) += shift_in_x2; - this_i3_prev(index_last + p) += shift_in_x3; - }); } - index_last += recv_count; - species.set_npart(index_last); } + } // end directions loop - Kokkos::deep_copy( - Kokkos::subview(species.tag, std::make_pair(send_pmin, send_pmax)), - ParticleTag::dead); - } - timers->stop("Communications"); - // !TODO: maybe there is a way to not sort twice - timers->start("Sorting"); + Kokkos::deep_copy(shifts_in_x1, shifts_in_x1_h); + Kokkos::deep_copy(shifts_in_x2, shifts_in_x2_h); + Kokkos::deep_copy(shifts_in_x3, shifts_in_x3_h); + + array_t outgoing_indices { "outgoing_indices", + npart - npart_alive }; + // clang-format off + Kokkos::parallel_for( + "PrepareOutgoingPrtls", + species.rangeActiveParticles(), + kernel::comm::PrepareOutgoingPrtls_kernel( + shifts_in_x1, shifts_in_x2, shifts_in_x3, + outgoing_indices, + npart, npart_alive, npart_dead, ntags, + species.i1, species.i1_prev, + species.i2, species.i2_prev, + species.i3, species.i3_prev, + species.tag, tag_offsets) + ); + // clang-format on + + comm::CommunicateParticles(species, + outgoing_indices, + tag_offsets, + npptag_vec, + npptag_recv_vec, + send_ranks, + recv_ranks, + dirs_to_comm); species.set_unsorted(); - species.SortByTags(); - timers->stop("Sorting"); + } // end species loop +#else + (void)domain; #endif + } + + template + void Metadomain::RemoveDeadParticles(Domain& domain) { + for (auto& species : domain.species) { + species.RemoveDead(); } } diff --git a/src/framework/domain/domain.h b/src/framework/domain/domain.h index 397907fef..bc7c6e4b5 100644 --- a/src/framework/domain/domain.h +++ b/src/framework/domain/domain.h @@ -65,7 +65,7 @@ namespace ntt { Mesh mesh; Fields fields; std::vector> species; - random_number_pool_t random_pool { constant::RandomSeed }; + random_number_pool_t random_pool; /** * @brief constructor for "empty" allocation of non-local domain placeholders @@ -81,6 +81,7 @@ namespace ntt { : mesh { ncells, extent, metric_params } , fields {} , species {} + , random_pool { constant::RandomSeed } , m_index { index } , m_offset_ndomains { offset_ndomains } , m_offset_ncells { offset_ncells } {} @@ -95,6 +96,7 @@ namespace ntt { : mesh { ncells, extent, metric_params } , fields { ncells } , species { species_params.begin(), species_params.end() } + , random_pool { constant::RandomSeed + static_cast(index) } , m_index { index } , m_offset_ndomains { offset_ndomains } , m_offset_ncells { offset_ncells } {} @@ -144,8 +146,7 @@ namespace ntt { } /* setters -------------------------------------------------------------- */ - auto set_neighbor_idx(const dir::direction_t& dir, unsigned int idx) - -> void { + auto set_neighbor_idx(const dir::direction_t& dir, unsigned int idx) -> void { m_neighbor_idx[dir] = idx; } @@ -163,8 +164,8 @@ namespace ntt { }; template - inline auto operator<<(std::ostream& os, const Domain& domain) - -> std::ostream& { + inline auto operator<<(std::ostream& os, + const Domain& domain) -> std::ostream& { os << "Domain #" << domain.index(); #if defined(MPI_ENABLED) os << " [MPI rank: " << domain.mpi_rank() << "]"; diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index 5e66bc366..ed4373df2 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -46,6 +46,9 @@ namespace ntt { #if defined(MPI_ENABLED) MPI_Comm_size(MPI_COMM_WORLD, &g_mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &g_mpi_rank); + raise::ErrorIf(global_ndomains != g_mpi_size, + "Exactly 1 domain per MPI rank is allowed", + HERE); #endif initialValidityCheck(); @@ -381,7 +384,7 @@ namespace ntt { #if defined(MPI_ENABLED) auto dx_mins = std::vector(g_ndomains); dx_mins[g_mpi_rank] = dx_min; - MPI_Allgather(&dx_mins[g_mpi_rank], + MPI_Allgather(&dx_min, 1, mpi::get_type(), dx_mins.data(), diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 027a2982d..5177571d0 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -21,7 +21,6 @@ #include "arch/kokkos_aliases.h" #include "utils/timer.h" -#include "checkpoint/writer.h" #include "framework/containers/species.h" #include "framework/domain/domain.h" #include "framework/domain/mesh.h" @@ -32,6 +31,7 @@ #endif // MPI_ENABLED #if defined(OUTPUT_ENABLED) + #include "checkpoint/writer.h" #include "output/writer.h" #include @@ -88,7 +88,8 @@ namespace ntt { void CommunicateFields(Domain&, CommTags); void SynchronizeFields(Domain&, CommTags, const range_tuple_t& = { 0, 0 }); - void CommunicateParticles(Domain&, timer::Timers*); + void CommunicateParticles(Domain&); + void RemoveDeadParticles(Domain&); /** * @param global_ndomains total number of domains diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 0918eb2d3..8fb756130 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -65,10 +65,13 @@ namespace ntt { g_writer.init(ptr_adios, params.template get("output.format"), - params.template get("simulation.name")); + params.template get("simulation.name"), + params.template get("output.separate_files")); g_writer.defineMeshLayout(glob_shape_with_ghosts, off_ncells_with_ghosts, loc_shape_with_ghosts, + params.template get>( + "output.fields.downsampling"), incl_ghosts, M::CoordType); const auto fields_to_write = params.template get>( @@ -213,45 +216,62 @@ namespace ntt { "local_domain is a placeholder", HERE); logger::Checkpoint("Writing output", HERE); - g_writer.beginWriting(current_step, current_time); if (write_fields) { + g_writer.beginWriting(WriteMode::Fields, current_step, current_time); const auto incl_ghosts = params.template get("output.debug.ghosts"); + const auto dwn = params.template get>( + "output.fields.downsampling"); for (unsigned short dim = 0; dim < M::Dim; ++dim) { - const auto is_last = local_domain->offset_ncells()[dim] + - local_domain->mesh.n_active()[dim] == - mesh().n_active()[dim]; - array_t xc { "Xc", - local_domain->mesh.n_active()[dim] + - (incl_ghosts ? 2 * N_GHOSTS : 0) }; - array_t xe { "Xe", - local_domain->mesh.n_active()[dim] + - (incl_ghosts ? 2 * N_GHOSTS : 0) + - (is_last ? 1 : 0) }; - const auto offset = (incl_ghosts ? N_GHOSTS : 0); - const auto ncells = local_domain->mesh.n_active()[dim]; - const auto& metric = local_domain->mesh.metric; + const auto l_size = local_domain->mesh.n_active()[dim]; + const auto l_offset = local_domain->offset_ncells()[dim]; + const auto g_size = mesh().n_active()[dim]; + + const auto dwn_in_dim = dwn[dim]; + + const double n = l_size; + const double d = dwn_in_dim; + const double l = l_offset; + const double f = math::ceil(l / d) * d - l; + + const auto first_cell = static_cast(f); + const auto l_size_dwn = static_cast(math::ceil((n - f) / d)); + + const auto is_last = l_offset + l_size == g_size; + + const auto add_ghost = (incl_ghosts ? 2 * N_GHOSTS : 0); + const auto add_last = (is_last ? 1 : 0); + + array_t xc { "Xc", l_size_dwn + add_ghost }; + array_t xe { "Xe", l_size_dwn + add_ghost + add_last }; + + const auto offset = (incl_ghosts ? N_GHOSTS : 0); + const auto ncells = l_size_dwn; + + const auto& metric = local_domain->mesh.metric; + Kokkos::parallel_for( "GenerateMesh", ncells, - Lambda(index_t i) { + Lambda(index_t i_dwn) { + const auto i = first_cell + i_dwn * dwn_in_dim; const auto i_ = static_cast(i); coord_t x_Cd { ZERO }, x_Ph { ZERO }; x_Cd[dim] = i_ + HALF; + // TODO : change to convert by component metric.template convert(x_Cd, x_Ph); - xc(offset + i) = x_Ph[dim]; - x_Cd[dim] = i_; + xc(offset + i_dwn) = x_Ph[dim]; + x_Cd[dim] = i_; metric.template convert(x_Cd, x_Ph); - xe(offset + i) = x_Ph[dim]; - if (is_last && i == ncells - 1) { + xe(offset + i_dwn) = x_Ph[dim]; + if (is_last && i_dwn == ncells - 1) { x_Cd[dim] = i_ + ONE; metric.template convert(x_Cd, x_Ph); - xe(offset + i + 1) = x_Ph[dim]; + xe(offset + i_dwn + 1) = x_Ph[dim]; } }); g_writer.writeMesh(dim, xc, xe); } - const auto output_asis = params.template get("output.debug.as_is"); // !TODO: this can probably be optimized to dump things at once for (auto& fld : g_writer.fieldWriters()) { @@ -308,6 +328,18 @@ namespace ntt { {}, local_domain->fields.bckp, c); + } else if (fld.id() == FldsID::V) { + if constexpr (S != SimEngine::GRPIC) { + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + fld.species, + fld.comp[0], + local_domain->fields.bckp, + c); + } else { + raise::Error("Bulk velocity not supported for GRPIC", HERE); + } } else { raise::Error("Wrong moment requested for output", HERE); } @@ -337,16 +369,35 @@ namespace ntt { if (fld.is_moment()) { for (auto i = 0; i < 3; ++i) { const auto c = static_cast(addresses[i]); - raise::ErrorIf(fld.comp[i].size() != 2, - "Wrong # of components requested for moment", - HERE); - ComputeMoments(params, - local_domain->mesh, - local_domain->species, - fld.species, - fld.comp[i], - local_domain->fields.bckp, - c); + if (fld.id() == FldsID::T) { + raise::ErrorIf(fld.comp[i].size() != 2, + "Wrong # of components requested for moment", + HERE); + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + fld.species, + fld.comp[i], + local_domain->fields.bckp, + c); + } else if (fld.id() == FldsID::V) { + raise::ErrorIf(fld.comp[i].size() != 1, + "Wrong # of components requested for 3vel", + HERE); + if constexpr (S == SimEngine::SRPIC) { + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + fld.species, + fld.comp[i], + local_domain->fields.bckp, + c); + } else { + raise::Error("Bulk velocity not supported for GRPIC", HERE); + } + } else { + raise::Error("Wrong moment requested for output", HERE); + } } raise::ErrorIf(addresses[1] - addresses[0] != addresses[2] - addresses[1], @@ -355,6 +406,28 @@ namespace ntt { SynchronizeFields(*local_domain, Comm::Bckp, { addresses[0], addresses[2] + 1 }); + if constexpr (S == SimEngine::SRPIC) { + if (fld.id() == FldsID::V) { + // normalize 3vel * rho (combuted above) by rho + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + fld.species, + {}, + local_domain->fields.bckp, + 0u); + SynchronizeFields(*local_domain, Comm::Bckp, { 0, 1 }); + Kokkos::parallel_for("NormalizeVectorByRho", + local_domain->mesh.rangeActiveCells(), + kernel::NormalizeVectorByRho_kernel( + local_domain->fields.bckp, + local_domain->fields.bckp, + 0, + addresses[0], + addresses[1], + addresses[2])); + } + } } else { // copy fields to bckp (:, 0, 1, 2) // if as-is specified ==> copy directly to 3, 4, 5 @@ -447,24 +520,24 @@ namespace ntt { } g_writer.writeField(names, local_domain->fields.bckp, addresses); } + g_writer.endWriting(WriteMode::Fields); } // end shouldWrite("fields", step, time) if (write_particles) { + g_writer.beginWriting(WriteMode::Particles, current_step, current_time); const auto prtl_stride = params.template get( "output.particles.stride"); for (const auto& prtl : g_writer.speciesWriters()) { auto& species = local_domain->species[prtl.species() - 1]; if (not species.is_sorted()) { - species.SortByTags(); + species.RemoveDead(); } const std::size_t nout = species.npart() / prtl_stride; array_t buff_x1, buff_x2, buff_x3; - array_t buff_ux1, buff_ux2, buff_ux3; - array_t buff_wei; - buff_wei = array_t { "w", nout }; - buff_ux1 = array_t { "u1", nout }; - buff_ux2 = array_t { "u2", nout }; - buff_ux3 = array_t { "u3", nout }; + array_t buff_ux1 { "u1", nout }; + array_t buff_ux2 { "ux2", nout }; + array_t buff_ux3 { "ux3", nout }; + array_t buff_wei { "w", nout }; if constexpr (M::Dim == Dim::_1D or M::Dim == Dim::_2D or M::Dim == Dim::_3D) { buff_x1 = array_t { "x1", nout }; @@ -527,9 +600,11 @@ namespace ntt { g_writer.writeParticleQuantity(buff_x3, glob_tot, offset, prtl.name("X", 3)); } } + g_writer.endWriting(WriteMode::Particles); } // end shouldWrite("particles", step, time) if (write_spectra) { + g_writer.beginWriting(WriteMode::Spectra, current_step, current_time); const auto log_bins = params.template get( "output.spectra.log_bins"); const auto n_bins = params.template get( @@ -593,9 +668,9 @@ namespace ntt { g_writer.writeSpectrum(dn, spec.name()); } g_writer.writeSpectrumBins(energy, "sEbn"); + g_writer.endWriting(WriteMode::Spectra); } // end shouldWrite("spectra", step, time) - g_writer.endWriting(); return true; } diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 1d4672212..4a9b3056a 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -31,10 +31,10 @@ namespace ntt { template - auto get_dx0_V0(const std::vector& resolution, - const boundaries_t& extent, - const std::map& params) - -> std::pair { + auto get_dx0_V0( + const std::vector& resolution, + const boundaries_t& extent, + const std::map& params) -> std::pair { const auto metric = M(resolution, extent, params); const auto dx0 = metric.dxMin(); coord_t x_corner { ZERO }; @@ -47,7 +47,7 @@ namespace ntt { /* * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . - * Parameters that must not be changed during after the checkpoint restart + * Parameters that must not be changed during the checkpoint restart * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ void SimulationParams::setImmutableParams(const toml::value& toml_data) { @@ -322,7 +322,7 @@ namespace ntt { /* * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . - * Parameters that may be changed during after the checkpoint restart + * Parameters that may be changed during the checkpoint restart * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ void SimulationParams::setMutableParams(const toml::value& toml_data) { @@ -351,9 +351,9 @@ namespace ntt { auto atm_defined = false; for (const auto& bcs : flds_bc) { for (const auto& bc : bcs) { - if (fmt::toLower(bc) == "absorb") { - promiseToDefine("grid.boundaries.absorb.ds"); - promiseToDefine("grid.boundaries.absorb.coeff"); + if (fmt::toLower(bc) == "match") { + promiseToDefine("grid.boundaries.match.ds"); + promiseToDefine("grid.boundaries.match.coeff"); } if (fmt::toLower(bc) == "atmosphere") { raise::ErrorIf(atm_defined, @@ -386,7 +386,6 @@ namespace ntt { for (const auto& bc : bcs) { if (fmt::toLower(bc) == "absorb") { promiseToDefine("grid.boundaries.absorb.ds"); - promiseToDefine("grid.boundaries.absorb.coeff"); } if (fmt::toLower(bc) == "atmosphere") { raise::ErrorIf(atm_defined, @@ -445,15 +444,8 @@ namespace ntt { defaults::gr::pusher_niter)); } /* [particles] ---------------------------------------------------------- */ -#if defined(MPI_ENABLED) - const std::size_t sort_interval = 1; -#else - const std::size_t sort_interval = toml::find_or(toml_data, - "particles", - "sort_interval", - defaults::sort_interval); -#endif - set("particles.sort_interval", sort_interval); + set("particles.clear_interval", + toml::find_or(toml_data, "particles", "clear_interval", defaults::clear_interval)); /* [output] ------------------------------------------------------------- */ // fields @@ -463,6 +455,9 @@ namespace ntt { toml::find_or(toml_data, "output", "interval", defaults::output::interval)); set("output.interval_time", toml::find_or(toml_data, "output", "interval_time", -1.0)); + set("output.separate_files", + toml::find_or(toml_data, "output", "separate_files", true)); + promiseToDefine("output.fields.interval"); promiseToDefine("output.fields.interval_time"); promiseToDefine("output.fields.enable"); @@ -494,19 +489,31 @@ namespace ntt { "fields", "mom_smooth", defaults::output::mom_smooth)); - set("output.fields.stride", - toml::find_or(toml_data, - "output", - "fields", - "stride", - defaults::output::flds_stride)); + auto field_dwn = toml::find_or(toml_data, + "output", + "fields", + "downsampling", + std::vector { 1, 1, 1 }); + raise::ErrorIf(field_dwn.size() > 3, "invalid `output.fields.downsampling`", HERE); + if (field_dwn.size() > dim) { + field_dwn.erase(field_dwn.begin() + (std::size_t)(dim), field_dwn.end()); + } + for (const auto& dwn : field_dwn) { + raise::ErrorIf(dwn == 0, "downsampling factor must be nonzero", HERE); + } + set("output.fields.downsampling", field_dwn); // particles + auto all_specs = std::vector {}; + const auto nspec = get("particles.nspec"); + for (auto i = 0u; i < nspec; ++i) { + all_specs.push_back(static_cast(i + 1)); + } const auto prtl_out = toml::find_or(toml_data, "output", "particles", "species", - std::vector {}); + all_specs); set("output.particles.species", prtl_out); set("output.particles.stride", toml::find_or(toml_data, @@ -561,8 +568,20 @@ namespace ntt { /* [output.debug] ------------------------------------------------------- */ set("output.debug.as_is", toml::find_or(toml_data, "output", "debug", "as_is", false)); - set("output.debug.ghosts", - toml::find_or(toml_data, "output", "debug", "ghosts", false)); + const auto output_ghosts = toml::find_or(toml_data, + "output", + "debug", + "ghosts", + false); + set("output.debug.ghosts", output_ghosts); + if (output_ghosts) { + for (const auto& dwn : field_dwn) { + raise::ErrorIf( + dwn != 1, + "full resolution required when outputting with ghost cells", + HERE); + } + } /* [checkpoint] --------------------------------------------------------- */ set("checkpoint.interval", @@ -711,6 +730,38 @@ namespace ntt { set("grid.boundaries.fields", flds_bc_pairwise); set("grid.boundaries.particles", prtl_bc_pairwise); + if (isPromised("grid.boundaries.match.ds")) { + if (coord_enum == Coord::Cart) { + auto min_extent = std::numeric_limits::max(); + for (const auto& e : extent_pairwise) { + min_extent = std::min(min_extent, e.second - e.first); + } + set("grid.boundaries.match.ds", + toml::find_or(toml_data, + "grid", + "boundaries", + "match", + "ds", + min_extent * defaults::bc::match::ds_frac)); + } else { + auto r_extent = extent_pairwise[0].second - extent_pairwise[0].first; + set("grid.boundaries.match.ds", + toml::find_or(toml_data, + "grid", + "boundaries", + "match", + "ds", + r_extent * defaults::bc::match::ds_frac)); + } + set("grid.boundaries.match.coeff", + toml::find_or(toml_data, + "grid", + "boundaries", + "match", + "coeff", + defaults::bc::match::coeff)); + } + if (isPromised("grid.boundaries.absorb.ds")) { if (coord_enum == Coord::Cart) { auto min_extent = std::numeric_limits::max(); @@ -734,13 +785,6 @@ namespace ntt { "ds", r_extent * defaults::bc::absorb::ds_frac)); } - set("grid.boundaries.absorb.coeff", - toml::find_or(toml_data, - "grid", - "boundaries", - "absorb", - "coeff", - defaults::bc::absorb::coeff)); } if (isPromised("grid.boundaries.atmosphere.temperature")) { diff --git a/src/framework/tests/CMakeLists.txt b/src/framework/tests/CMakeLists.txt index 56ad0783b..ce188e9f1 100644 --- a/src/framework/tests/CMakeLists.txt +++ b/src/framework/tests/CMakeLists.txt @@ -1,19 +1,23 @@ # ------------------------------ # @brief: Generates tests for the `ntt_framework` module +# # @uses: -# - kokkos [required] -# - plog [required] -# - toml11 [required] -# - mpi [optional] -# - adios2 [optional] +# +# * kokkos [required] +# * plog [required] +# * toml11 [required] +# * mpi [optional] +# * adios2 [optional] +# # !TODO: -# - add tests for mesh separately -# - add test for 3D metadomain +# +# * add tests for mesh separately +# * add test for 3D metadomain # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) -function(gen_test title) +function(gen_test title is_parallel) set(exec test-framework-${title}.xc) set(src ${title}.cpp) add_executable(${exec} ${src}) @@ -22,24 +26,30 @@ function(gen_test title) add_dependencies(${exec} ${libs}) target_link_libraries(${exec} PRIVATE ${libs}) - add_test(NAME "FRAMEWORK::${title}" COMMAND "${exec}") + if(${is_parallel}) + add_test(NAME "FRAMEWORK::${title}" + COMMAND "${MPIEXEC_EXECUTABLE}" "${MPIEXEC_NUMPROC_FLAG}" "4" + "${exec}") + else() + add_test(NAME "FRAMEWORK::${title}" COMMAND "${exec}") + endif() endfunction() -if (${mpi}) - gen_test(comm_mpi) +if(${mpi}) + gen_test(comm_mpi true) else() - gen_test(parameters) - gen_test(particles) - gen_test(fields) - gen_test(grid_mesh) - if (${DEBUG}) - gen_test(metadomain) + gen_test(parameters false) + gen_test(particles false) + gen_test(fields false) + gen_test(grid_mesh false) + if(${DEBUG}) + gen_test(metadomain false) endif() - gen_test(comm_nompi) + gen_test(comm_nompi false) endif() - # this test is only run manually to ensure ... # ... command line args are working properly ... # ... and that the logging is done correctly -# gen_test(simulation) +# +# gen_test(simulation) diff --git a/src/framework/tests/parameters.cpp b/src/framework/tests/parameters.cpp index 8d30355b9..7cd5ce46a 100644 --- a/src/framework/tests/parameters.cpp +++ b/src/framework/tests/parameters.cpp @@ -32,7 +32,7 @@ const auto mink_1d = u8R"( fields = [["PERIODIC"]] particles = [["ABSORB", "ABSORB"]] - [grid.boundaries.absorb] + [grid.boundaries.match] coeff = 10.0 ds = 0.025 @@ -48,7 +48,7 @@ const auto mink_1d = u8R"( [particles] ppc0 = 10.0 - sort_interval = 100 + clear_interval = 100 [[particles.species]] label = "e-" @@ -73,13 +73,18 @@ const auto mink_1d = u8R"( mystr = "hi" [output] - fields = ["Rho", "J", "B"] - particles = ["X", "U"] format = "hdf5" - mom_smooth = 2 - fields_stride = 1 - prtl_stride = 100 - interval_time = 0.01 + + [output.fields] + quantities = ["Rho", "J", "B"] + mom_smooth = 2 + downsampling = [4, 5] + interval = 100 + + [output.particles] + species = [1, 2] + stride = 100 + interval_time = 0.01 )"_toml; const auto sph_2d = u8R"( @@ -96,10 +101,10 @@ const auto sph_2d = u8R"( metric = "spherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "MATCH"]] particles = [["ATMOSPHERE", "ABSORB"]] - [grid.boundaries.absorb] + [grid.boundaries.match] coeff = 10.0 [grid.boundaries.atmosphere] @@ -129,7 +134,7 @@ const auto sph_2d = u8R"( [particles] ppc0 = 25.0 use_weights = true - sort_interval = 50 + clear_interval = 50 [[particles.species]] @@ -175,7 +180,7 @@ const auto qks_2d = u8R"( ks_a = 0.99 [grid.boundaries] - fields = [["ABSORB"]] + fields = [["MATCH"]] particles = [["ABSORB"]] [scales] @@ -194,7 +199,7 @@ const auto qks_2d = u8R"( [particles] ppc0 = 4.0 - sort_interval = 100 + clear_interval = 100 [[particles.species]] label = "e-" @@ -264,7 +269,7 @@ auto main(int argc, char* argv[]) -> int { (real_t)0.0078125, "scales.V0"); boundaries_t fbc = { - {FldsBC::PERIODIC, FldsBC::PERIODIC} + { FldsBC::PERIODIC, FldsBC::PERIODIC } }; assert_equal( params_mink_1d.get>("grid.boundaries.fields")[0].first, @@ -315,6 +320,13 @@ auto main(int argc, char* argv[]) -> int { assert_equal(params_mink_1d.get("setup.mystr"), "hi", "setup.mystr"); + + const auto output_stride = params_mink_1d.get>( + "output.fields.downsampling"); + assert_equal(output_stride.size(), + 1, + "output.fields.downsampling.size()"); + assert_equal(output_stride[0], 4, "output.fields.downsampling[0]"); } { @@ -333,8 +345,8 @@ auto main(int argc, char* argv[]) -> int { "simulation.engine"); boundaries_t fbc = { - {FldsBC::ATMOSPHERE, FldsBC::ABSORB}, - { FldsBC::AXIS, FldsBC::AXIS} + { FldsBC::ATMOSPHERE, FldsBC::MATCH }, + { FldsBC::AXIS, FldsBC::AXIS } }; assert_equal(params_sph_2d.get("scales.B0"), @@ -369,16 +381,16 @@ auto main(int argc, char* argv[]) -> int { fbc.size(), "grid.boundaries.fields.size()"); - // absorb coeffs + // match coeffs assert_equal( - params_sph_2d.get("grid.boundaries.absorb.ds"), - (real_t)(defaults::bc::absorb::ds_frac * 19.0), - "grid.boundaries.absorb.ds"); + params_sph_2d.get("grid.boundaries.match.ds"), + (real_t)(defaults::bc::match::ds_frac * 19.0), + "grid.boundaries.match.ds"); assert_equal( - params_sph_2d.get("grid.boundaries.absorb.coeff"), + params_sph_2d.get("grid.boundaries.match.coeff"), (real_t)10.0, - "grid.boundaries.absorb.coeff"); + "grid.boundaries.match.coeff"); assert_equal(params_sph_2d.get("particles.use_weights"), true, @@ -468,9 +480,9 @@ auto main(int argc, char* argv[]) -> int { "grid.metric.ks_rh"); const auto expect = std::map { - {"r0", 0.0}, - { "h", 0.25}, - { "a", 0.99} + { "r0", 0.0 }, + { "h", 0.25 }, + { "a", 0.99 } }; auto read = params_qks_2d.get>( "grid.metric.params"); @@ -489,8 +501,8 @@ auto main(int argc, char* argv[]) -> int { "algorithms.gr.pusher_niter"); boundaries_t pbc = { - {PrtlBC::HORIZON, PrtlBC::ABSORB}, - { PrtlBC::AXIS, PrtlBC::AXIS} + { PrtlBC::HORIZON, PrtlBC::ABSORB }, + { PrtlBC::AXIS, PrtlBC::AXIS } }; assert_equal(params_qks_2d.get("scales.B0"), @@ -525,16 +537,16 @@ auto main(int argc, char* argv[]) -> int { pbc.size(), "grid.boundaries.particles.size()"); - // absorb coeffs + // match coeffs assert_equal( - params_qks_2d.get("grid.boundaries.absorb.ds"), - (real_t)(defaults::bc::absorb::ds_frac * (100.0 - 0.8)), - "grid.boundaries.absorb.ds"); + params_qks_2d.get("grid.boundaries.match.ds"), + (real_t)(defaults::bc::match::ds_frac * (100.0 - 0.8)), + "grid.boundaries.match.ds"); assert_equal( - params_qks_2d.get("grid.boundaries.absorb.coeff"), - defaults::bc::absorb::coeff, - "grid.boundaries.absorb.coeff"); + params_qks_2d.get("grid.boundaries.match.coeff"), + defaults::bc::match::coeff, + "grid.boundaries.match.coeff"); const auto species = params_qks_2d.get>( "particles.species"); @@ -567,86 +579,3 @@ auto main(int argc, char* argv[]) -> int { return 0; } - -// const auto mink_1d = R"( -// [simulation] -// name = "" -// engine = "" -// runtime = "" - -// [grid] -// resolution = "" -// extent = "" - -// [grid.metric] -// metric = "" -// qsph_r0 = "" -// qsph_h = "" -// ks_a = "" - -// [grid.boundaries] -// fields = "" -// particles = "" -// absorb_d = "" -// absorb_coeff = "" - -// [scales] -// larmor0 = "" -// skindepth0 = "" - -// [algorithms] -// current_filters = "" - -// [algorithms.toggles] -// fieldsolver = "" -// deposit = "" - -// [algorithms.timestep] -// CFL = "" -// correction = "" - -// [algorithms.gr] -// pusher_eps = "" -// pusher_niter = "" - -// [algorithms.gca] -// e_ovr_b_max = "" -// larmor_max = "" - -// [algorithms.synchrotron] -// gamma_rad = "" - -// [particles] -// ppc0 = "" -// use_weights = "" -// sort_interval = "" - -// [[particles.species]] -// label = "" -// mass = "" -// charge = "" -// maxnpart = "" -// pusher = "" -// n_payloads = "" -// cooling = "" -// [setup] - -// [output] -// fields = "" -// particles = "" -// format = "" -// mom_smooth = "" -// fields_stride = "" -// prtl_stride = "" -// interval = "" -// interval_time = "" - -// [output.debug] -// as_is = "" -// ghosts = "" - -// [diagnostics] -// interval = "" -// log_level = "" -// blocking_timers = "" -// )"_toml; diff --git a/src/framework/tests/particles.cpp b/src/framework/tests/particles.cpp index dabcc062f..6c4c227b5 100644 --- a/src/framework/tests/particles.cpp +++ b/src/framework/tests/particles.cpp @@ -46,9 +46,9 @@ void testParticles(const int& index, raise::ErrorIf(p.tag.extent(0) != maxnpart, "tag incorrectly allocated", HERE); raise::ErrorIf(p.weight.extent(0) != maxnpart, "weight incorrectly allocated", HERE); - raise::ErrorIf(p.pld.size() != npld, "Number of payloads mismatch", HERE); - for (unsigned short n { 0 }; n < npld; ++n) { - raise::ErrorIf(p.pld[n].extent(0) != maxnpart, "pld incorrectly allocated", HERE); + if (npld > 0) { + raise::ErrorIf(p.pld.extent(0) != maxnpart, "pld incorrectly allocated", HERE); + raise::ErrorIf(p.pld.extent(1) != npld, "pld incorrectly allocated", HERE); } if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { @@ -117,7 +117,8 @@ auto main(int argc, char** argv) -> int { 0.0, 100, PrtlPusher::PHOTON, - Cooling::NONE); + Cooling::NONE, + 5); testParticles(4, "e+", 1.0, @@ -131,7 +132,8 @@ auto main(int argc, char** argv) -> int { 1.0, 100, PrtlPusher::BORIS, - Cooling::NONE); + Cooling::NONE, + 1); } catch (const std::exception& e) { std::cerr << "Error: " << e.what() << std::endl; Kokkos::finalize(); @@ -139,4 +141,4 @@ auto main(int argc, char** argv) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/global/CMakeLists.txt b/src/global/CMakeLists.txt index 334ce078d..97946f059 100644 --- a/src/global/CMakeLists.txt +++ b/src/global/CMakeLists.txt @@ -1,36 +1,38 @@ # ------------------------------ # @defines: ntt_global [STATIC/SHARED] +# # @sources: -# - global.cpp -# - arch/kokkos_aliases.cpp -# - utils/cargs.cpp -# - utils/param_container.cpp -# - utils/timer.cpp -# - utils/diag.cpp -# - utils/progressbar.cpp +# +# * global.cpp +# * arch/kokkos_aliases.cpp +# * utils/cargs.cpp +# * utils/param_container.cpp +# * utils/timer.cpp +# * utils/diag.cpp +# * utils/progressbar.cpp +# # @includes: -# - ./ +# +# * ./ +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] # ------------------------------ set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES - ${SRC_DIR}/global.cpp - ${SRC_DIR}/arch/kokkos_aliases.cpp - ${SRC_DIR}/utils/cargs.cpp - ${SRC_DIR}/utils/timer.cpp - ${SRC_DIR}/utils/diag.cpp - ${SRC_DIR}/utils/progressbar.cpp -) -if (${output}) +set(SOURCES + ${SRC_DIR}/global.cpp ${SRC_DIR}/arch/kokkos_aliases.cpp + ${SRC_DIR}/utils/cargs.cpp ${SRC_DIR}/utils/timer.cpp + ${SRC_DIR}/utils/diag.cpp ${SRC_DIR}/utils/progressbar.cpp) +if(${output}) list(APPEND SOURCES ${SRC_DIR}/utils/param_container.cpp) endif() add_library(ntt_global ${SOURCES}) -target_include_directories(ntt_global +target_include_directories( + ntt_global PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR} -) + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(ntt_global PRIVATE stdc++fs) diff --git a/src/global/arch/directions.h b/src/global/arch/directions.h index 19cf182d6..ccd4e67b0 100644 --- a/src/global/arch/directions.h +++ b/src/global/arch/directions.h @@ -132,8 +132,8 @@ namespace dir { using dirs_t = std::vector>; template - inline auto operator<<(std::ostream& os, const direction_t& dir) - -> std::ostream& { + inline auto operator<<(std::ostream& os, + const direction_t& dir) -> std::ostream& { for (auto& d : dir) { os << std::setw(2) << std::left; if (d > 0) { @@ -175,81 +175,81 @@ namespace dir { template <> struct Directions { inline static const dirs_t all = { - {-1, -1}, - {-1, 0}, - {-1, 1}, - { 0, -1}, - { 0, 1}, - { 1, -1}, - { 1, 0}, - { 1, 1} + { -1, -1 }, + { -1, 0 }, + { -1, 1 }, + { 0, -1 }, + { 0, 1 }, + { 1, -1 }, + { 1, 0 }, + { 1, 1 } }; inline static const dirs_t orth = { - {-1, 0}, - { 0, -1}, - { 0, 1}, - { 1, 0} + { -1, 0 }, + { 0, -1 }, + { 0, 1 }, + { 1, 0 } }; inline static const dirs_t unique = { - { 0, 1}, - { 1, 1}, - { 1, 0}, - {-1, 1} + { 0, 1 }, + { 1, 1 }, + { 1, 0 }, + { -1, 1 } }; }; template <> struct Directions { inline static const dirs_t all = { - {-1, -1, -1}, - {-1, -1, 0}, - {-1, -1, 1}, - {-1, 0, -1}, - {-1, 0, 0}, - {-1, 0, 1}, - {-1, 1, -1}, - {-1, 1, 0}, - {-1, 1, 1}, - { 0, -1, -1}, - { 0, -1, 0}, - { 0, -1, 1}, - { 0, 0, -1}, - { 0, 0, 1}, - { 0, 1, -1}, - { 0, 1, 0}, - { 0, 1, 1}, - { 1, -1, -1}, - { 1, -1, 0}, - { 1, -1, 1}, - { 1, 0, -1}, - { 1, 0, 0}, - { 1, 0, 1}, - { 1, 1, -1}, - { 1, 1, 0}, - { 1, 1, 1} + { -1, -1, -1 }, + { -1, -1, 0 }, + { -1, -1, 1 }, + { -1, 0, -1 }, + { -1, 0, 0 }, + { -1, 0, 1 }, + { -1, 1, -1 }, + { -1, 1, 0 }, + { -1, 1, 1 }, + { 0, -1, -1 }, + { 0, -1, 0 }, + { 0, -1, 1 }, + { 0, 0, -1 }, + { 0, 0, 1 }, + { 0, 1, -1 }, + { 0, 1, 0 }, + { 0, 1, 1 }, + { 1, -1, -1 }, + { 1, -1, 0 }, + { 1, -1, 1 }, + { 1, 0, -1 }, + { 1, 0, 0 }, + { 1, 0, 1 }, + { 1, 1, -1 }, + { 1, 1, 0 }, + { 1, 1, 1 } }; inline static const dirs_t orth = { - {-1, 0, 0}, - { 0, -1, 0}, - { 0, 0, -1}, - { 0, 0, 1}, - { 0, 1, 0}, - { 1, 0, 0} + { -1, 0, 0 }, + { 0, -1, 0 }, + { 0, 0, -1 }, + { 0, 0, 1 }, + { 0, 1, 0 }, + { 1, 0, 0 } }; inline static const dirs_t unique = { - { 0, 0, 1}, - { 0, 1, 0}, - { 1, 0, 0}, - { 1, 1, 0}, - {-1, 1, 0}, - { 0, 1, 1}, - { 0, -1, 1}, - { 1, 0, 1}, - {-1, 0, 1}, - { 1, 1, 1}, - {-1, 1, 1}, - { 1, -1, 1}, - { 1, 1, -1} + { 0, 0, 1 }, + { 0, 1, 0 }, + { 1, 0, 0 }, + { 1, 1, 0 }, + { -1, 1, 0 }, + { 0, 1, 1 }, + { 0, -1, 1 }, + { 1, 0, 1 }, + { -1, 0, 1 }, + { 1, 1, 1 }, + { -1, 1, 1 }, + { 1, -1, 1 }, + { 1, 1, -1 } }; }; diff --git a/src/global/arch/kokkos_aliases.cpp b/src/global/arch/kokkos_aliases.cpp index 4311a40bd..6c15e3d52 100644 --- a/src/global/arch/kokkos_aliases.cpp +++ b/src/global/arch/kokkos_aliases.cpp @@ -5,18 +5,18 @@ #include template <> -auto CreateRangePolicy(const tuple_t& i1, - const tuple_t& i2) - -> range_t { +auto CreateRangePolicy( + const tuple_t& i1, + const tuple_t& i2) -> range_t { index_t i1min = i1[0]; index_t i1max = i2[0]; return Kokkos::RangePolicy(i1min, i1max); } template <> -auto CreateRangePolicy(const tuple_t& i1, - const tuple_t& i2) - -> range_t { +auto CreateRangePolicy( + const tuple_t& i1, + const tuple_t& i2) -> range_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -26,9 +26,9 @@ auto CreateRangePolicy(const tuple_t& i1, } template <> -auto CreateRangePolicy(const tuple_t& i1, - const tuple_t& i2) - -> range_t { +auto CreateRangePolicy( + const tuple_t& i1, + const tuple_t& i2) -> range_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -41,18 +41,18 @@ auto CreateRangePolicy(const tuple_t& i1, } template <> -auto CreateRangePolicyOnHost(const tuple_t& i1, - const tuple_t& i2) - -> range_h_t { +auto CreateRangePolicyOnHost( + const tuple_t& i1, + const tuple_t& i2) -> range_h_t { index_t i1min = i1[0]; index_t i1max = i2[0]; return Kokkos::RangePolicy(i1min, i1max); } template <> -auto CreateRangePolicyOnHost(const tuple_t& i1, - const tuple_t& i2) - -> range_h_t { +auto CreateRangePolicyOnHost( + const tuple_t& i1, + const tuple_t& i2) -> range_h_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -62,9 +62,9 @@ auto CreateRangePolicyOnHost(const tuple_t& i1, } template <> -auto CreateRangePolicyOnHost(const tuple_t& i1, - const tuple_t& i2) - -> range_h_t { +auto CreateRangePolicyOnHost( + const tuple_t& i1, + const tuple_t& i2) -> range_h_t { index_t i1min = i1[0]; index_t i1max = i2[0]; index_t i2min = i1[1]; @@ -76,11 +76,11 @@ auto CreateRangePolicyOnHost(const tuple_t& i1, { i1max, i2max, i3max }); } -// auto WaitAndSynchronize(bool debug_only) -> void { -// if (debug_only) { -// #ifndef DEBUG -// return; -// #endif -// } -// Kokkos::fence(); -// } \ No newline at end of file +auto WaitAndSynchronize(bool debug_only) -> void { + if (debug_only) { +#ifndef DEBUG + return; +#endif + } + Kokkos::fence(); +} diff --git a/src/global/arch/mpi_tags.h b/src/global/arch/mpi_tags.h index 0916542d4..aaf38a8f4 100644 --- a/src/global/arch/mpi_tags.h +++ b/src/global/arch/mpi_tags.h @@ -7,6 +7,8 @@ * @namespaces: * - mpi:: */ +#ifndef GLOBAL_ARCH_MPI_TAGS_H +#define GLOBAL_ARCH_MPI_TAGS_H #include "global.h" @@ -188,8 +190,13 @@ namespace mpi { tag; } - Inline auto SendTag(short tag, bool im1, bool ip1, bool jm1, bool jp1, bool km1, bool kp1) - -> short { + Inline auto SendTag(short tag, + bool im1, + bool ip1, + bool jm1, + bool jp1, + bool km1, + bool kp1) -> short { return ((im1 && jm1 && km1) * (PrtlSendTag::im1_jm1_km1 - 1) + (im1 && jm1 && kp1) * (PrtlSendTag::im1_jm1_kp1 - 1) + (im1 && jp1 && km1) * (PrtlSendTag::im1_jp1_km1 - 1) + @@ -226,3 +233,5 @@ namespace mpi { tag; } } // namespace mpi + +#endif // GLOBAL_ARCH_MPI_TAGS_H diff --git a/src/global/arch/traits.h b/src/global/arch/traits.h index e915bdf1a..4cde4fca5 100644 --- a/src/global/arch/traits.h +++ b/src/global/arch/traits.h @@ -10,7 +10,11 @@ * - traits::run_t, traits::to_string_t * - traits::pgen::init_flds_t * - traits::pgen::ext_force_t - * - traits::pgen::field_driver_t + * - traits::pgen::atm_fields_t + * - traits::pgen::match_fields_const_t + * - traits::pgen::match_fields_t + * - traits::pgen::fix_fields_const_t + * - traits::pgen::fix_fields_t * - traits::pgen::init_prtls_t * - traits::pgen::custom_fields_t * - traits::pgen::custom_field_output_t @@ -94,7 +98,19 @@ namespace traits { using ext_force_t = decltype(&T::ext_force); template - using field_driver_t = decltype(&T::FieldDriver); + using atm_fields_t = decltype(&T::AtmFields); + + template + using match_fields_t = decltype(&T::MatchFields); + + template + using match_fields_const_t = decltype(&T::MatchFieldsConst); + + template + using fix_fields_t = decltype(&T::FixFields); + + template + using fix_fields_const_t = decltype(&T::FixFieldsConst); template using custom_fields_t = decltype(&T::CustomFields); diff --git a/src/global/defaults.h b/src/global/defaults.h index ee9a65af5..f44fd1844 100644 --- a/src/global/defaults.h +++ b/src/global/defaults.h @@ -22,9 +22,9 @@ namespace ntt::defaults { const unsigned short current_filters = 0; - const std::string em_pusher = "Boris"; - const std::string ph_pusher = "Photon"; - const std::size_t sort_interval = 100; + const std::string em_pusher = "Boris"; + const std::string ph_pusher = "Photon"; + const std::size_t clear_interval = 100; namespace qsph { const real_t r0 = 0.0; @@ -41,17 +41,20 @@ namespace ntt::defaults { } // namespace gr namespace bc { - namespace absorb { + namespace match { const real_t ds_frac = 0.01; const real_t coeff = 1.0; + } // namespace match + + namespace absorb { + const real_t ds_frac = 0.01; } // namespace absorb - } // namespace bc + } // namespace bc namespace output { const std::string format = "hdf5"; const std::size_t interval = 100; const unsigned short mom_smooth = 0; - const unsigned short flds_stride = 1; const std::size_t prtl_stride = 100; const real_t spec_emin = 1e-3; const real_t spec_emax = 1e3; diff --git a/src/global/enums.h b/src/global/enums.h index 57822dec4..6c6a4ec8f 100644 --- a/src/global/enums.h +++ b/src/global/enums.h @@ -8,12 +8,12 @@ * - enum ntt::SimEngine // SRPIC, GRPIC * - enum ntt::PrtlBC // periodic, absorb, atmosphere, custom, * reflect, horizon, axis, sync - * - enum ntt::FldsBC // periodic, absorb, atmosphere, custom, - * conductor, horizon, axis, sync + * - enum ntt::FldsBC // periodic, match, fixed, atmosphere, + * custom, horizon, axis, sync * - enum ntt::PrtlPusher // boris, vay, photon, none * - enum ntt::Cooling // synchrotron, none * - enum ntt::FldsID // e, dive, d, divd, b, h, j, - * a, t, rho, charge, n, nppc, custom + * a, t, rho, charge, n, nppc, v, custom * @namespaces: * - ntt:: * @note Enums of the same type can be compared with each other and with strings @@ -215,10 +215,10 @@ namespace ntt { enum type : uint8_t { INVALID = 0, PERIODIC = 1, - ABSORB = 2, - ATMOSPHERE = 3, - CUSTOM = 4, - CONDUCTOR = 5, + MATCH = 2, + FIXED = 3, + ATMOSPHERE = 4, + CUSTOM = 5, HORIZON = 6, AXIS = 7, SYNC = 8, // <- SYNC means synchronization with other domains @@ -226,11 +226,10 @@ namespace ntt { constexpr FldsBC(uint8_t c) : enums_hidden::BaseEnum { c } {} - static constexpr type variants[] = { PERIODIC, ABSORB, ATMOSPHERE, CUSTOM, - CONDUCTOR, HORIZON, AXIS, SYNC }; - static constexpr const char* lookup[] = { "periodic", "absorb", - "atmosphere", "custom", - "conductor", "horizon", + static constexpr type variants[] = { PERIODIC, MATCH, FIXED, ATMOSPHERE, + CUSTOM, HORIZON, AXIS, SYNC }; + static constexpr const char* lookup[] = { "periodic", "match", "fixed", + "atmosphere", "custom", "horizon", "axis", "sync" }; static constexpr std::size_t total = sizeof(variants) / sizeof(variants[0]); }; @@ -288,17 +287,19 @@ namespace ntt { Charge = 11, N = 12, Nppc = 13, - Custom = 14, + V = 14, + Custom = 15, }; constexpr FldsID(uint8_t c) : enums_hidden::BaseEnum { c } {} - static constexpr type variants[] = { E, divE, D, divD, B, H, J, - A, T, Rho, Charge, N, Nppc, Custom }; - static constexpr const char* lookup[] = { "e", "dive", "d", "divd", - "b", "h", "j", "a", - "t", "rho", "charge", "n", - "nppc", "custom" }; + static constexpr type variants[] = { E, divE, D, divD, B, + H, J, A, T, Rho, + Charge, N, Nppc, V, Custom }; + static constexpr const char* lookup[] = { "e", "dive", "d", "divd", + "b", "h", "j", "a", + "t", "rho", "charge", "n", + "nppc", "v", "custom" }; static constexpr std::size_t total = sizeof(variants) / sizeof(variants[0]); }; diff --git a/src/global/global.h b/src/global/global.h index ad524fb0e..77fa8c51c 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -13,6 +13,10 @@ * - enum PrepareOutput * - enum CellLayer // allLayer, activeLayer, minGhostLayer, * minActiveLayer, maxActiveLayer, maxGhostLayer + * - enum Idx // U, D, T, XYZ, Sph, PU, PD + * - enum Crd // Cd, Ph, XYZ, Sph + * - enum in // x1, x2, x3 + * - enum bc_in // Px1, Mx1, Px2, Mx2, Px3, Mx3 * - type box_region_t * - files::LogFile, files::ErrFile, files::InfoFile * - type prtldx_t @@ -184,6 +188,15 @@ enum class in : unsigned short { x3 = 2, }; +enum class bc_in : short { + Mx1 = -1, + Px1 = 1, + Mx2 = -2, + Px2 = 2, + Mx3 = -3, + Px3 = 3, +}; + template using box_region_t = CellLayer[D]; @@ -209,7 +222,7 @@ namespace Timer { PrintTitle = 1 << 1, AutoConvert = 1 << 2, PrintOutput = 1 << 3, - PrintSorting = 1 << 4, + PrintPrtlClear = 1 << 4, PrintCheckpoint = 1 << 5, PrintNormed = 1 << 6, Default = PrintNormed | PrintTotal | PrintTitle | AutoConvert, @@ -249,6 +262,17 @@ namespace Comm { typedef int CommTags; +namespace WriteMode { + enum WriteModeTags_ { + None = 0, + Fields = 1 << 0, + Particles = 1 << 1, + Spectra = 1 << 2, + }; +} // namespace WriteMode + +typedef int WriteModeTags; + namespace BC { enum BCTags_ { None = 0, diff --git a/src/global/tests/CMakeLists.txt b/src/global/tests/CMakeLists.txt index e9e5de687..e30da20a0 100644 --- a/src/global/tests/CMakeLists.txt +++ b/src/global/tests/CMakeLists.txt @@ -1,11 +1,15 @@ # ------------------------------ # @brief: Generates tests for the `ntt_global` module +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] +# # !TODO: -# - add optional tests for the `mpi_aliases.h` +# +# * add optional tests for the `mpi_aliases.h` # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/global/tests/enums.cpp b/src/global/tests/enums.cpp index 1fc57398f..673efaf34 100644 --- a/src/global/tests/enums.cpp +++ b/src/global/tests/enums.cpp @@ -61,14 +61,14 @@ auto main() -> int { enum_str_t all_simulation_engines = { "srpic", "grpic" }; enum_str_t all_particle_bcs = { "periodic", "absorb", "atmosphere", "custom", "reflect", "horizon", "axis", "sync" }; - enum_str_t all_fields_bcs = { "periodic", "absorb", "atmosphere", "custom", - "horizon", "conductor", "axis", "sync" }; + enum_str_t all_fields_bcs = { "periodic", "match", "fixed", "atmosphere", + "custom", "horizon", "axis", "sync" }; enum_str_t all_particle_pushers = { "boris", "vay", "photon", "none" }; enum_str_t all_coolings = { "synchrotron", "none" }; - enum_str_t all_out_flds = { "e", "dive", "d", "divd", "b", - "h", "j", "a", "t", "rho", - "charge", "n", "nppc", "custom" }; + enum_str_t all_out_flds = { "e", "dive", "d", "divd", "b", + "h", "j", "a", "t", "rho", + "charge", "n", "nppc", "v", "custom" }; checkEnum(all_coords); checkEnum(all_metrics); diff --git a/src/global/tests/kokkos_aliases.cpp b/src/global/tests/kokkos_aliases.cpp index 56a17c50f..909b6b30c 100644 --- a/src/global/tests/kokkos_aliases.cpp +++ b/src/global/tests/kokkos_aliases.cpp @@ -3,6 +3,7 @@ #include "global.h" #include +#include #include #include @@ -44,8 +45,7 @@ auto main(int argc, char* argv[]) -> int { { // scatter arrays & ranges array_t a { "a", 100 }; - scatter_array_t a_scatter = Kokkos::Experimental::create_scatter_view( - a); + auto a_scatter = Kokkos::Experimental::create_scatter_view(a); Kokkos::parallel_for( // range_t({ 0 }, { 100 }), CreateRangePolicy({ 0 }, { 100 }), @@ -87,4 +87,4 @@ auto main(int argc, char* argv[]) -> int { Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/global/utils/diag.cpp b/src/global/utils/diag.cpp index 0a499dd56..c053cdacf 100644 --- a/src/global/utils/diag.cpp +++ b/src/global/utils/diag.cpp @@ -21,8 +21,9 @@ #include namespace diag { - auto npart_stats(std::size_t npart, std::size_t maxnpart) - -> std::vector> { + auto npart_stats( + std::size_t npart, + std::size_t maxnpart) -> std::vector> { auto stats = std::vector>(); #if !defined(MPI_ENABLED) stats.push_back( @@ -84,7 +85,7 @@ namespace diag { const std::vector& species_labels, const std::vector& species_npart, const std::vector& species_maxnpart, - bool print_sorting, + bool print_prtl_clear, bool print_output, bool print_checkpoint, bool print_colors) { @@ -96,8 +97,8 @@ namespace diag { if (species_labels.size() == 0) { diag_flags ^= Diag::Species; } - if (print_sorting) { - timer_flags |= Timer::PrintSorting; + if (print_prtl_clear) { + timer_flags |= Timer::PrintPrtlClear; } if (print_output) { timer_flags |= Timer::PrintOutput; diff --git a/src/global/utils/diag.h b/src/global/utils/diag.h index 9951602f8..30cca5705 100644 --- a/src/global/utils/diag.h +++ b/src/global/utils/diag.h @@ -34,9 +34,9 @@ namespace diag { * @param species_labels (vector of particle labels) * @param npart (per each species) * @param maxnpart (per each species) - * @param sorting_step (if true, particles were sorted) - * @param output_step (if true, output was written) - * @param checkpoint_step (if true, checkpoint was written) + * @param prtlclear (if true, dead particles were removed) + * @param output (if true, output was written) + * @param checkpoint (if true, checkpoint was written) * @param colorful_print (if true, print with colors) */ void printDiagnostics(std::size_t, diff --git a/src/global/utils/numeric.h b/src/global/utils/numeric.h index 0b09f6c11..719256d1d 100644 --- a/src/global/utils/numeric.h +++ b/src/global/utils/numeric.h @@ -91,4 +91,8 @@ namespace constant { inline constexpr double SQRT3 = 1.73205080756887729352; } // namespace constant +namespace convert { + inline constexpr double deg2rad = constant::PI / 180.0; +} // namespace convert + #endif // GLOBAL_UTILS_NUMERIC_H diff --git a/src/global/utils/timer.cpp b/src/global/utils/timer.cpp index b5f4408ca..a12d79b96 100644 --- a/src/global/utils/timer.cpp +++ b/src/global/utils/timer.cpp @@ -116,21 +116,23 @@ namespace timer { (timer.second / local_tot) * 100.0); timer_stats.insert( { name, - std::make_tuple(timer.second, - timer.second / static_cast(npart), - timer.second / static_cast(ncells), - pcent, - 0u) }); + std::make_tuple( + timer.second, + npart > 0 ? timer.second / static_cast(npart) : 0.0, + timer.second / static_cast(ncells), + pcent, + 0u) }); } timer_stats.insert({ "Total", std::make_tuple(local_tot, 0.0, 0.0, 100u, 0u) }); #endif return timer_stats; } - auto Timers::printAll(TimerFlags flags, std::size_t npart, std::size_t ncells) const - -> std::string { - const std::vector extras { "Sorting", "Output", "Checkpoint" }; - const auto stats = gather(extras, npart, ncells); + auto Timers::printAll(TimerFlags flags, + std::size_t npart, + std::size_t ncells) const -> std::string { + const std::vector extras { "PrtlClear", "Output", "Checkpoint" }; + const auto stats = gather(extras, npart, ncells); if (stats.empty()) { return ""; } @@ -253,8 +255,8 @@ namespace timer { } } - // print extra timers for output/checkpoint/sorting - const std::vector extras_f { Timer::PrintSorting, + // print extra timers for output/checkpoint/prtlClear + const std::vector extras_f { Timer::PrintPrtlClear, Timer::PrintOutput, Timer::PrintCheckpoint }; for (auto i { 0u }; i < extras.size(); ++i) { diff --git a/src/kernels/CMakeLists.txt b/src/kernels/CMakeLists.txt index d24dff0a4..c8a1f409f 100644 --- a/src/kernels/CMakeLists.txt +++ b/src/kernels/CMakeLists.txt @@ -1,13 +1,19 @@ # ------------------------------ # @defines: ntt_kernels [INTERFACE] +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] +# +# * ntt_global [required] +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] # ------------------------------ add_library(ntt_kernels INTERFACE) @@ -17,5 +23,5 @@ add_dependencies(ntt_kernels ${libs}) target_link_libraries(ntt_kernels INTERFACE ${libs}) target_include_directories(ntt_kernels - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) \ No newline at end of file + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) + diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp new file mode 100644 index 000000000..b280ce38b --- /dev/null +++ b/src/kernels/comm.hpp @@ -0,0 +1,341 @@ +/** + * @file kernels/comm.hpp + * @brief Kernels used during communications + * @implements + * - kernel::comm::PrepareOutgoingPrtls_kernel<> + * - kernel::comm::PopulatePrtlSendBuffer_kernel<> + * - kernel::comm::ExtractReceivedPrtls_kernel<> + * @namespaces: + * - kernel::comm:: + */ + +#ifndef KERNELS_COMM_HPP +#define KERNELS_COMM_HPP + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" + +#include + +namespace kernel::comm { + using namespace ntt; + + template + class PrepareOutgoingPrtls_kernel { + const array_t shifts_in_x1, shifts_in_x2, shifts_in_x3; + array_t outgoing_indices; + + const std::size_t npart, npart_alive, npart_dead, ntags; + + array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; + const array_t tag; + + const array_t tag_offsets; + + array_t current_offset; + + public: + PrepareOutgoingPrtls_kernel(const array_t& shifts_in_x1, + const array_t& shifts_in_x2, + const array_t& shifts_in_x3, + array_t& outgoing_indices, + std::size_t npart, + std::size_t npart_alive, + std::size_t npart_dead, + std::size_t ntags, + array_t& i1, + array_t& i1_prev, + array_t& i2, + array_t& i2_prev, + array_t& i3, + array_t& i3_prev, + const array_t& tag, + const array_t& tag_offsets) + : shifts_in_x1 { shifts_in_x1 } + , shifts_in_x2 { shifts_in_x2 } + , shifts_in_x3 { shifts_in_x3 } + , outgoing_indices { outgoing_indices } + , npart { npart } + , npart_alive { npart_alive } + , npart_dead { npart_dead } + , ntags { ntags } + , i1 { i1 } + , i1_prev { i1_prev } + , i2 { i2 } + , i2_prev { i2_prev } + , i3 { i3 } + , i3_prev { i3_prev } + , tag { tag } + , tag_offsets { tag_offsets } + , current_offset { "current_offset", ntags } {} + + Inline void operator()(index_t p) const { + if (tag(p) != ParticleTag::alive) { + // dead or to-be-sent + auto idx_for_tag = Kokkos::atomic_fetch_add(¤t_offset(tag(p)), 1); + if (tag(p) != ParticleTag::dead) { + idx_for_tag += npart_dead; + } + if (tag(p) > 2) { + idx_for_tag += tag_offsets(tag(p) - 3); + } + if (idx_for_tag >= npart - npart_alive) { + raise::KernelError(HERE, "Outgoing indices idx exceeds the array size"); + } + outgoing_indices(idx_for_tag) = p; + // apply offsets + if (tag(p) != ParticleTag::dead) { + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + i1(p) += shifts_in_x1(tag(p) - 2); + i1_prev(p) += shifts_in_x1(tag(p) - 2); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + i2(p) += shifts_in_x2(tag(p) - 2); + i2_prev(p) += shifts_in_x2(tag(p) - 2); + } + if constexpr (D == Dim::_3D) { + i3(p) += shifts_in_x3(tag(p) - 2); + i3_prev(p) += shifts_in_x3(tag(p) - 2); + } + } + } + } + }; + + template + class PopulatePrtlSendBuffer_kernel { + array_t send_buff_int; + array_t send_buff_real; + array_t send_buff_prtldx; + array_t send_buff_pld; + + const unsigned short NINTS, NREALS, NPRTLDX, NPLDS; + const std::size_t idx_offset; + + const array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; + const array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; + const array_t ux1, ux2, ux3, weight, phi; + const array_t pld; + array_t tag; + const array_t outgoing_indices; + + public: + PopulatePrtlSendBuffer_kernel(array_t& send_buff_int, + array_t& send_buff_real, + array_t& send_buff_prtldx, + array_t& send_buff_pld, + unsigned short NINTS, + unsigned short NREALS, + unsigned short NPRTLDX, + unsigned short NPLDS, + std::size_t idx_offset, + const array_t& i1, + const array_t& i1_prev, + const array_t& dx1, + const array_t& dx1_prev, + const array_t& i2, + const array_t& i2_prev, + const array_t& dx2, + const array_t& dx2_prev, + const array_t& i3, + const array_t& i3_prev, + const array_t& dx3, + const array_t& dx3_prev, + const array_t& ux1, + const array_t& ux2, + const array_t& ux3, + const array_t& weight, + const array_t& phi, + const array_t& pld, + array_t& tag, + const array_t& outgoing_indices) + : send_buff_int { send_buff_int } + , send_buff_real { send_buff_real } + , send_buff_prtldx { send_buff_prtldx } + , send_buff_pld { send_buff_pld } + , NINTS { NINTS } + , NREALS { NREALS } + , NPRTLDX { NPRTLDX } + , NPLDS { NPLDS } + , idx_offset { idx_offset } + , i1 { i1 } + , i1_prev { i1_prev } + , i2 { i2 } + , i2_prev { i2_prev } + , i3 { i3 } + , i3_prev { i3_prev } + , dx1 { dx1 } + , dx1_prev { dx1_prev } + , dx2 { dx2 } + , dx2_prev { dx2_prev } + , dx3 { dx3 } + , dx3_prev { dx3_prev } + , ux1 { ux1 } + , ux2 { ux2 } + , ux3 { ux3 } + , weight { weight } + , phi { phi } + , pld { pld } + , tag { tag } + , outgoing_indices { outgoing_indices } {} + + Inline void operator()(index_t p) const { + const auto idx = outgoing_indices(idx_offset + p); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + send_buff_int(NINTS * p + 0) = i1(idx); + send_buff_int(NINTS * p + 1) = i1_prev(idx); + send_buff_prtldx(NPRTLDX * p + 0) = dx1(idx); + send_buff_prtldx(NPRTLDX * p + 1) = dx1_prev(idx); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + send_buff_int(NINTS * p + 2) = i2(idx); + send_buff_int(NINTS * p + 3) = i2_prev(idx); + send_buff_prtldx(NPRTLDX * p + 2) = dx2(idx); + send_buff_prtldx(NPRTLDX * p + 3) = dx2_prev(idx); + } + if constexpr (D == Dim::_3D) { + send_buff_int(NINTS * p + 4) = i3(idx); + send_buff_int(NINTS * p + 5) = i3_prev(idx); + send_buff_prtldx(NPRTLDX * p + 4) = dx3(idx); + send_buff_prtldx(NPRTLDX * p + 5) = dx3_prev(idx); + } + send_buff_real(NREALS * p + 0) = ux1(idx); + send_buff_real(NREALS * p + 1) = ux2(idx); + send_buff_real(NREALS * p + 2) = ux3(idx); + send_buff_real(NREALS * p + 3) = weight(idx); + if constexpr (D == Dim::_2D and C != Coord::Cart) { + send_buff_real(NREALS * p + 4) = phi(idx); + } + if (NPLDS > 0) { + for (auto l { 0u }; l < NPLDS; ++l) { + send_buff_pld(NPLDS * p + l) = pld(idx, l); + } + } + tag(idx) = ParticleTag::dead; + } + }; + + template + class ExtractReceivedPrtls_kernel { + const array_t recv_buff_int; + const array_t recv_buff_real; + const array_t recv_buff_prtldx; + const array_t recv_buff_pld; + + const unsigned short NINTS, NREALS, NPRTLDX, NPLDS; + const std::size_t npart, npart_holes; + + array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; + array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; + array_t ux1, ux2, ux3, weight, phi; + array_t pld; + array_t tag; + const array_t outgoing_indices; + + public: + ExtractReceivedPrtls_kernel(const array_t& recv_buff_int, + const array_t& recv_buff_real, + const array_t& recv_buff_prtldx, + const array_t& recv_buff_pld, + unsigned short NINTS, + unsigned short NREALS, + unsigned short NPRTLDX, + unsigned short NPLDS, + std::size_t npart, + array_t& i1, + array_t& i1_prev, + array_t& dx1, + array_t& dx1_prev, + array_t& i2, + array_t& i2_prev, + array_t& dx2, + array_t& dx2_prev, + array_t& i3, + array_t& i3_prev, + array_t& dx3, + array_t& dx3_prev, + array_t& ux1, + array_t& ux2, + array_t& ux3, + array_t& weight, + array_t& phi, + array_t& pld, + array_t& tag, + const array_t& outgoing_indices) + : recv_buff_int { recv_buff_int } + , recv_buff_real { recv_buff_real } + , recv_buff_prtldx { recv_buff_prtldx } + , recv_buff_pld { recv_buff_pld } + , NINTS { NINTS } + , NREALS { NREALS } + , NPRTLDX { NPRTLDX } + , NPLDS { NPLDS } + , npart { npart } + , npart_holes { outgoing_indices.extent(0) } + , i1 { i1 } + , i1_prev { i1_prev } + , i2 { i2 } + , i2_prev { i2_prev } + , i3 { i3 } + , i3_prev { i3_prev } + , dx1 { dx1 } + , dx1_prev { dx1_prev } + , dx2 { dx2 } + , dx2_prev { dx2_prev } + , dx3 { dx3 } + , dx3_prev { dx3_prev } + , ux1 { ux1 } + , ux2 { ux2 } + , ux3 { ux3 } + , weight { weight } + , phi { phi } + , pld { pld } + , tag { tag } + , outgoing_indices { outgoing_indices } {} + + Inline void operator()(index_t p) const { + std::size_t idx; + if (p >= npart_holes) { + idx = npart + p - npart_holes; + } else { + idx = outgoing_indices(p); + } + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + i1(idx) = recv_buff_int(NINTS * p + 0); + i1_prev(idx) = recv_buff_int(NINTS * p + 1); + dx1(idx) = recv_buff_prtldx(NPRTLDX * p + 0); + dx1_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 1); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + i2(idx) = recv_buff_int(NINTS * p + 2); + i2_prev(idx) = recv_buff_int(NINTS * p + 3); + dx2(idx) = recv_buff_prtldx(NPRTLDX * p + 2); + dx2_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 3); + } + if constexpr (D == Dim::_3D) { + i3(idx) = recv_buff_int(NINTS * p + 4); + i3_prev(idx) = recv_buff_int(NINTS * p + 5); + dx3(idx) = recv_buff_prtldx(NPRTLDX * p + 4); + dx3_prev(idx) = recv_buff_prtldx(NPRTLDX * p + 5); + } + ux1(idx) = recv_buff_real(NREALS * p + 0); + ux2(idx) = recv_buff_real(NREALS * p + 1); + ux3(idx) = recv_buff_real(NREALS * p + 2); + weight(idx) = recv_buff_real(NREALS * p + 3); + if constexpr (D == Dim::_2D and C != Coord::Cart) { + phi(idx) = recv_buff_real(NREALS * p + 4); + } + if (NPLDS > 0) { + for (auto l { 0u }; l < NPLDS; ++l) { + pld(idx, l) = recv_buff_pld(NPLDS * p + l); + } + } + tag(idx) = ParticleTag::alive; + } + }; + +} // namespace kernel::comm + +#endif // KERNELS_COMM_HPP diff --git a/src/kernels/fields_bcs.hpp b/src/kernels/fields_bcs.hpp index e617010b4..363ff3ad2 100644 --- a/src/kernels/fields_bcs.hpp +++ b/src/kernels/fields_bcs.hpp @@ -1,5 +1,12 @@ /** - * @brief: kernels/fields_bcs.hpp + * @file kernels/fields_bcs.hpp + * @brief Kernels used for field boundary conditions + * @implements + * - kernel::bc::MatchBoundaries_kernel<> + * - kernel::bc::AxisBoundaries_kernel<> + * - kernel::bc::EnforcedBoundaries_kernel<> + * @namespaces: + * - kernel::bc:: */ #ifndef KERNELS_FIELDS_BCS_HPP @@ -12,64 +19,162 @@ #include "utils/error.h" #include "utils/numeric.h" -namespace kernel { +namespace kernel::bc { using namespace ntt; - template - struct AbsorbBoundaries_kernel { + /* + * @tparam S: Simulation Engine + * @tparam I: Field Setter class + * @tparam M: Metric + * @tparam o: Orientation + * + * @brief Applies matching boundary conditions (with a smooth profile) in a specific direction. + * @note If a component is not specified in the field setter, it is ignored. + * @note It is supposed to only be called on the active side of the absorbing edge (so sign is not needed). + */ + template + struct MatchBoundaries_kernel { static_assert(M::is_metric, "M must be a metric class"); - static_assert(i <= static_cast(M::Dim), + static_assert(static_cast(o) < + static_cast(M::Dim), "Invalid component index"); + static constexpr idx_t i = static_cast(o) + 1u; + static constexpr bool defines_dx1 = traits::has_method::value; + static constexpr bool defines_dx2 = traits::has_method::value; + static constexpr bool defines_dx3 = traits::has_method::value; + static constexpr bool defines_ex1 = traits::has_method::value; + static constexpr bool defines_ex2 = traits::has_method::value; + static constexpr bool defines_ex3 = traits::has_method::value; + static constexpr bool defines_bx1 = traits::has_method::value; + static constexpr bool defines_bx2 = traits::has_method::value; + static constexpr bool defines_bx3 = traits::has_method::value; + static_assert( + (S == SimEngine::SRPIC and (defines_ex1 or defines_ex2 or defines_ex3 or + defines_bx1 or defines_bx2 or defines_bx3)) or + ((S == SimEngine::GRPIC) and (defines_dx1 or defines_dx2 or defines_dx3 or + defines_bx1 or defines_bx2 or defines_bx3)), + "none of the components of E/D or B are specified in PGEN"); ndfield_t Fld; + const I fset; const M metric; const real_t xg_edge; const real_t dx_abs; const BCTags tags; - AbsorbBoundaries_kernel(ndfield_t Fld, - const M& metric, - real_t xg_edge, - real_t dx_abs, - BCTags tags) + MatchBoundaries_kernel(ndfield_t Fld, + const I& fset, + const M& metric, + real_t xg_edge, + real_t dx_abs, + BCTags tags) : Fld { Fld } + , fset { fset } , metric { metric } , xg_edge { xg_edge } , dx_abs { dx_abs } , tags { tags } {} + Inline auto shape(const real_t& dx) const -> real_t { + return math::tanh(dx * FOUR / dx_abs); + } + Inline void operator()(index_t i1) const { if constexpr (M::Dim == Dim::_1D) { const auto i1_ = COORD(i1); - for (const auto comp : - { em::ex1, em::ex2, em::ex3, em::bx1, em::bx2, em::bx3 }) { - if ((comp == em::ex1) and not(tags & BC::Ex1)) { - continue; - } else if ((comp == em::ex2) and not(tags & BC::Ex2)) { - continue; - } else if ((comp == em::ex3) and not(tags & BC::Ex3)) { - continue; - } else if ((comp == em::bx1) and not(tags & BC::Bx1)) { - continue; - } else if ((comp == em::bx2) and not(tags & BC::Bx2)) { - continue; - } else if ((comp == em::bx3) and not(tags & BC::Bx3)) { - continue; - } - coord_t x_Cd { ZERO }; - if (comp == em::ex1 or comp == em::bx2 or comp == em::bx3) { - x_Cd[0] = i1_ + HALF; - } else if (comp == em::ex2 or comp == em::bx1 or comp == em::ex3) { - x_Cd[0] = i1_; - } - const auto dx = math::abs( - metric.template convert(x_Cd[i - 1]) - xg_edge); - Fld(i1, comp) *= math::tanh(dx / (INV_4 * dx_abs)); + + if constexpr (S == SimEngine::SRPIC) { + coord_t x_Ph_0 { ZERO }; + coord_t x_Ph_H { ZERO }; + metric.template convert({ i1_ }, x_Ph_0); + metric.template convert({ i1_ + HALF }, x_Ph_H); + + // SRPIC + auto ex1_U { ZERO }, ex2_U { ZERO }, ex3_U { ZERO }, bx1_U { ZERO }, + bx2_U { ZERO }, bx3_U { ZERO }; + if (tags & BC::E) { + if constexpr (defines_ex1) { + ex1_U = metric.template transform<1, Idx::T, Idx::U>( + { i1_ + HALF }, + fset.ex1(x_Ph_H)); + } + if constexpr (defines_ex2) { + ex2_U = metric.template transform<2, Idx::T, Idx::U>( + { i1_ }, + fset.ex2(x_Ph_0)); + } + if constexpr (defines_ex3) { + ex3_U = metric.template transform<3, Idx::T, Idx::U>( + { i1_ }, + fset.ex3(x_Ph_0)); + } + } + if (tags & BC::B) { + if constexpr (defines_bx1) { + bx1_U = metric.template transform<1, Idx::T, Idx::U>( + { i1_ }, + fset.bx1(x_Ph_0)); + } + if constexpr (defines_bx2) { + bx2_U = metric.template transform<2, Idx::T, Idx::U>( + { i1_ + HALF }, + fset.bx2(x_Ph_H)); + } + if constexpr (defines_bx3) { + bx3_U = metric.template transform<3, Idx::T, Idx::U>( + { i1_ + HALF }, + fset.bx3(x_Ph_H)); + } + } + + if constexpr (defines_ex1 or defines_bx2 or defines_bx3) { + const auto dx = math::abs( + metric.template convert(i1_ + HALF) - xg_edge); + const auto s = shape(dx); + if constexpr (defines_ex1) { + if (tags & BC::E) { + Fld(i1, em::ex1) = s * Fld(i1, em::ex1) + (ONE - s) * ex1_U; + } + } + if constexpr (defines_bx2 or defines_bx3) { + if (tags & BC::B) { + if constexpr (defines_bx2) { + Fld(i1, em::bx2) = s * Fld(i1, em::bx2) + (ONE - s) * bx2_U; + } + if constexpr (defines_bx3) { + Fld(i1, em::bx3) = s * Fld(i1, em::bx3) + (ONE - s) * bx3_U; + } + } + } + } + if constexpr (defines_bx1 or defines_ex2 or defines_ex3) { + const auto dx = math::abs( + metric.template convert(i1_) - xg_edge); + const auto s = shape(dx); + if constexpr (defines_bx1) { + if (tags & BC::B) { + Fld(i1, em::bx1) = s * Fld(i1, em::bx1) + (ONE - s) * bx1_U; + } + } + if constexpr (defines_ex2 or defines_ex3) { + if (tags & BC::E) { + if constexpr (defines_ex2) { + Fld(i1, em::ex2) = s * Fld(i1, em::ex2) + (ONE - s) * ex2_U; + } + if constexpr (defines_ex3) { + Fld(i1, em::ex3) = s * Fld(i1, em::ex3) + (ONE - s) * ex3_U; + } + } + } + } + } else { + // GRPIC + raise::KernelError(HERE, "1D GRPIC not implemented"); } } else { raise::KernelError( HERE, - "AbsorbFields_kernel: 1D implementation called for D != 1"); + "MatchBoundaries_kernel: 1D implementation called for D != 1"); } } @@ -77,43 +182,129 @@ namespace kernel { if constexpr (M::Dim == Dim::_2D) { const auto i1_ = COORD(i1); const auto i2_ = COORD(i2); - for (const auto comp : - { em::ex1, em::ex2, em::ex3, em::bx1, em::bx2, em::bx3 }) { - if ((comp == em::ex1) and not(tags & BC::Ex1)) { - continue; - } else if ((comp == em::ex2) and not(tags & BC::Ex2)) { - continue; - } else if ((comp == em::ex3) and not(tags & BC::Ex3)) { - continue; - } else if ((comp == em::bx1) and not(tags & BC::Bx1)) { - continue; - } else if ((comp == em::bx2) and not(tags & BC::Bx2)) { - continue; - } else if ((comp == em::bx3) and not(tags & BC::Bx3)) { - continue; - } - coord_t x_Cd { ZERO }; - if (comp == em::ex1 or comp == em::bx2) { - x_Cd[0] = i1_ + HALF; - x_Cd[1] = i2_; - } else if (comp == em::ex2 or comp == em::bx1) { - x_Cd[0] = i1_; - x_Cd[1] = i2_ + HALF; - } else if (comp == em::ex3) { - x_Cd[0] = i1_; - x_Cd[1] = i2_; - } else if (comp == em::bx3) { - x_Cd[0] = i1_ + HALF; - x_Cd[1] = i2_ + HALF; - } - const auto dx = math::abs( - metric.template convert(x_Cd[i - 1]) - xg_edge); - Fld(i1, i2, comp) *= math::tanh(dx / (INV_4 * dx_abs)); + + if constexpr (S == SimEngine::SRPIC) { + // SRPIC + if constexpr (defines_ex1 or defines_bx2) { + coord_t x_Ph_H0 { ZERO }; + metric.template convert({ i1_ + HALF, i2_ }, x_Ph_H0); + // i1 + 1/2, i2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else { + xi_Cd = i2_; + } + + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + + if constexpr (defines_ex1) { + if (tags & BC::E) { + const auto ex1_U = metric.template transform<1, Idx::T, Idx::U>( + { i1_ + HALF, i2_ }, + fset.ex1(x_Ph_H0)); + Fld(i1, i2, em::ex1) = s * Fld(i1, i2, em::ex1) + (ONE - s) * ex1_U; + } + } + if constexpr (defines_bx2) { + if (tags & BC::B) { + const auto bx2_U = metric.template transform<2, Idx::T, Idx::U>( + { i1_ + HALF, i2_ }, + fset.bx2(x_Ph_H0)); + Fld(i1, i2, em::bx2) = s * Fld(i1, i2, em::bx2) + (ONE - s) * bx2_U; + } + } + } + + if constexpr (defines_ex2 or defines_bx1) { + coord_t x_Ph_0H { ZERO }; + metric.template convert({ i1_, i2_ + HALF }, x_Ph_0H); + // i1, i2 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else { + xi_Cd = i2_ + HALF; + } + + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + if constexpr (defines_ex2) { + if (tags & BC::E) { + auto ex2_U { ZERO }; + ex2_U = metric.template transform<2, Idx::T, Idx::U>( + { i1_, i2_ + HALF }, + fset.ex2(x_Ph_0H)); + Fld(i1, i2, em::ex2) = s * Fld(i1, i2, em::ex2) + (ONE - s) * ex2_U; + } + } + if constexpr (defines_bx1) { + if (tags & BC::B) { + auto bx1_U { ZERO }; + bx1_U = metric.template transform<1, Idx::T, Idx::U>( + { i1_, i2_ + HALF }, + fset.bx1(x_Ph_0H)); + Fld(i1, i2, em::bx1) = s * Fld(i1, i2, em::bx1) + (ONE - s) * bx1_U; + } + } + } + + if constexpr (defines_ex3) { + if (tags & BC::E) { + auto ex3_U { ZERO }; + coord_t x_Ph_00 { ZERO }; + metric.template convert({ i1_, i2_ }, x_Ph_00); + ex3_U = metric.template transform<3, Idx::T, Idx::U>( + { i1_, i2_ }, + fset.ex3(x_Ph_00)); + // i1, i2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else { + xi_Cd = i2_; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + Fld(i1, i2, em::ex3) = s * Fld(i1, i2, em::ex3) + (ONE - s) * ex3_U; + } + } + + if constexpr (defines_bx3) { + if (tags & BC::B) { + auto bx3_U { ZERO }; + coord_t x_Ph_HH { ZERO }; + metric.template convert({ i1_ + HALF, i2_ + HALF }, + x_Ph_HH); + bx3_U = metric.template transform<3, Idx::T, Idx::U>( + { i1_ + HALF, i2_ + HALF }, + fset.bx3(x_Ph_HH)); + // i1 + 1/2, i2 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else { + xi_Cd = i2_ + HALF; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + // bx3 + Fld(i1, i2, em::bx3) = s * Fld(i1, i2, em::bx3) + (ONE - s) * bx3_U; + } + } + } else { + // GRPIC + raise::KernelError(HERE, "GRPIC not implemented"); } } else { raise::KernelError( HERE, - "AbsorbFields_kernel: 2D implementation called for D != 2"); + "MatchBoundaries_kernel: 2D implementation called for D != 2"); } } @@ -122,59 +313,184 @@ namespace kernel { const auto i1_ = COORD(i1); const auto i2_ = COORD(i2); const auto i3_ = COORD(i3); - for (const auto comp : - { em::ex1, em::ex2, em::ex3, em::bx1, em::bx2, em::bx3 }) { - if ((comp == em::ex1) and not(tags & BC::Ex1)) { - continue; - } else if ((comp == em::ex2) and not(tags & BC::Ex2)) { - continue; - } else if ((comp == em::ex3) and not(tags & BC::Ex3)) { - continue; - } else if ((comp == em::bx1) and not(tags & BC::Bx1)) { - continue; - } else if ((comp == em::bx2) and not(tags & BC::Bx2)) { - continue; - } else if ((comp == em::bx3) and not(tags & BC::Bx3)) { - continue; - } - coord_t x_Cd { ZERO }; - if (comp == em::ex1) { - x_Cd[0] = i1_ + HALF; - x_Cd[1] = i2_; - x_Cd[2] = i3_; - } else if (comp == em::ex2) { - x_Cd[0] = i1_; - x_Cd[1] = i2_ + HALF; - x_Cd[2] = i3_; - } else if (comp == em::ex3) { - x_Cd[0] = i1_; - x_Cd[1] = i2_; - x_Cd[2] = i3_ + HALF; - } else if (comp == em::bx1) { - x_Cd[0] = i1_; - x_Cd[1] = i2_ + HALF; - x_Cd[2] = i3_ + HALF; - } else if (comp == em::bx2) { - x_Cd[0] = i1_ + HALF; - x_Cd[1] = i2_; - x_Cd[2] = i3_ + HALF; - } else if (comp == em::bx3) { - x_Cd[0] = i1_ + HALF; - x_Cd[1] = i2_ + HALF; - x_Cd[2] = i3_; - } - const auto dx = math::abs( - metric.template convert(x_Cd[i - 1]) - xg_edge); - Fld(i1, i2, i3, comp) *= math::tanh(dx / (INV_4 * dx_abs)); + + if constexpr (S == SimEngine::SRPIC) { + // SRPIC + if constexpr (defines_ex1 or defines_ex2 or defines_ex3) { + if (tags & BC::E) { + if constexpr (defines_ex1) { + // i1 + 1/2, i2, i3 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else if constexpr (o == in::x2) { + xi_Cd = i2_; + } else { + xi_Cd = i3_; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto ex1_U { ZERO }; + coord_t x_Ph_H00 { ZERO }; + metric.template convert({ i1_ + HALF, i2_, i3_ }, + x_Ph_H00); + ex1_U = metric.template transform<1, Idx::T, Idx::U>( + { i1_ + HALF, i2_, i3_ }, + fset.ex1(x_Ph_H00)); + Fld(i1, i2, i3, em::ex1) = s * Fld(i1, i2, i3, em::ex1) + + (ONE - s) * ex1_U; + } + + if constexpr (defines_ex2) { + // i1, i2 + 1/2, i3 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else if constexpr (o == in::x2) { + xi_Cd = i2_ + HALF; + } else { + xi_Cd = i3_; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto ex2_U { ZERO }; + coord_t x_Ph_0H0 { ZERO }; + metric.template convert({ i1_, i2_ + HALF, i3_ }, + x_Ph_0H0); + ex2_U = metric.template transform<2, Idx::T, Idx::U>( + { i1_, i2_ + HALF, i3_ }, + fset.ex2(x_Ph_0H0)); + Fld(i1, i2, i3, em::ex2) = s * Fld(i1, i2, i3, em::ex2) + + (ONE - s) * ex2_U; + } + + if constexpr (defines_ex3) { + // i1, i2, i3 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else if constexpr (o == in::x2) { + xi_Cd = i2_; + } else { + xi_Cd = i3_ + HALF; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto ex3_U { ZERO }; + coord_t x_Ph_00H { ZERO }; + metric.template convert({ i1_, i2_, i3_ + HALF }, + x_Ph_00H); + ex3_U = metric.template transform<3, Idx::T, Idx::U>( + { i1_, i2_, i3_ + HALF }, + fset.ex3(x_Ph_00H)); + Fld(i1, i2, i3, em::ex3) = s * Fld(i1, i2, i3, em::ex3) + + (ONE - s) * ex3_U; + } + } + } + + if constexpr (defines_bx1 or defines_bx2 or defines_bx3) { + if (tags & BC::B) { + if constexpr (defines_bx1) { + // i1, i2 + 1/2, i3 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_; + } else if constexpr (o == in::x2) { + xi_Cd = i2_ + HALF; + } else { + xi_Cd = i3_ + HALF; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto bx1_U { ZERO }; + if constexpr (defines_bx1) { + coord_t x_Ph_0HH { ZERO }; + metric.template convert( + { i1_, i2_ + HALF, i3_ + HALF }, + x_Ph_0HH); + bx1_U = metric.template transform<1, Idx::T, Idx::U>( + { i1_, i2_ + HALF, i3_ + HALF }, + fset.bx1(x_Ph_0HH)); + } + // bx1 + Fld(i1, i2, i3, em::bx1) = s * Fld(i1, i2, i3, em::bx1) + + (ONE - s) * bx1_U; + } + + if constexpr (defines_bx2) { + // i1 + 1/2, i2, i3 + 1/2 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else if constexpr (o == in::x2) { + xi_Cd = i2_; + } else { + xi_Cd = i3_ + HALF; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto bx2_U { ZERO }; + coord_t x_Ph_H0H { ZERO }; + metric.template convert( + { i1_ + HALF, i2_, i3_ + HALF }, + x_Ph_H0H); + bx2_U = metric.template transform<2, Idx::T, Idx::U>( + { i1_ + HALF, i2_, i3_ + HALF }, + fset.bx2(x_Ph_H0H)); + Fld(i1, i2, i3, em::bx2) = s * Fld(i1, i2, i3, em::bx2) + + (ONE - s) * bx2_U; + } + + if constexpr (defines_bx3) { + // i1 + 1/2, i2 + 1/2, i3 + real_t xi_Cd; + if constexpr (o == in::x1) { + xi_Cd = i1_ + HALF; + } else if constexpr (o == in::x2) { + xi_Cd = i2_ + HALF; + } else { + xi_Cd = i3_; + } + const auto dx = math::abs( + metric.template convert(xi_Cd) - xg_edge); + const auto s = shape(dx); + auto bx3_U { ZERO }; + coord_t x_Ph_HH0 { ZERO }; + metric.template convert( + { i1_ + HALF, i2_ + HALF, i3_ }, + x_Ph_HH0); + bx3_U = metric.template transform<3, Idx::T, Idx::U>( + { i1_ + HALF, i2_ + HALF, i3_ }, + fset.bx3(x_Ph_HH0)); + Fld(i1, i2, i3, em::bx3) = s * Fld(i1, i2, i3, em::bx3) + + (ONE - s) * bx3_U; + } + } + } + } else { + // GRPIC + raise::KernelError(HERE, "GRPIC not implemented"); } } else { raise::KernelError( HERE, - "AbsorbFields_kernel: 3D implementation called for D != 3"); + "MatchBoundaries_kernel: 3D implementation called for D != 3"); } } }; + /* + * @tparam D: Dimension + * @tparam P: Positive/Negative direction + * + * @brief Applies boundary conditions near the polar axis + */ template struct AxisBoundaries_kernel { ndfield_t Fld; @@ -216,8 +532,16 @@ namespace kernel { } }; + /* + * @tparam I: Field Setter class + * @tparam M: Metric + * @tparam P: Positive/Negative direction + * @tparam O: Orientation + * + * @brief Applies enforced boundary conditions (fixed value) + */ template - struct AtmosphereBoundaries_kernel { + struct EnforcedBoundaries_kernel { static constexpr Dimension D = M::Dim; static constexpr bool defines_ex1 = traits::has_method::value; static constexpr bool defines_ex2 = traits::has_method::value; @@ -226,31 +550,28 @@ namespace kernel { static constexpr bool defines_bx2 = traits::has_method::value; static constexpr bool defines_bx3 = traits::has_method::value; - static_assert(defines_ex1 and defines_ex2 and defines_ex3 and - defines_bx1 and defines_bx2 and defines_bx3, - "not all components of E or B are specified in PGEN"); + static_assert(defines_ex1 or defines_ex2 or defines_ex3 or defines_bx1 or + defines_bx2 or defines_bx3, + "none of the components of E or B are specified in PGEN"); static_assert(M::is_metric, "M must be a metric class"); static_assert(static_cast(O) < static_cast(M::Dim), "Invalid Orientation"); ndfield_t Fld; - const I finit; + const I fset; const M metric; const std::size_t i_edge; - const bool setE, setB; - AtmosphereBoundaries_kernel(ndfield_t& Fld, - const I& finit, - const M& metric, - std::size_t i_edge, - BCTags tags) + EnforcedBoundaries_kernel(ndfield_t& Fld, + const I& fset, + const M& metric, + std::size_t i_edge, + BCTags tags) : Fld { Fld } - , finit { finit } + , fset { fset } , metric { metric } - , i_edge { i_edge + N_GHOSTS } - , setE { tags & BC::Ex1 or tags & BC::Ex2 or tags & BC::Ex3 } - , setB { tags & BC::Bx1 or tags & BC::Bx2 or tags & BC::Bx3 } {} + , i_edge { i_edge + N_GHOSTS } {} Inline void operator()(index_t i1) const { if constexpr (D == Dim::_1D) { @@ -259,8 +580,8 @@ namespace kernel { coord_t x_Ph_H { ZERO }; metric.template convert({ i1_ }, x_Ph_0); metric.template convert({ i1_ + HALF }, x_Ph_H); - bool setEx1 = setE, setEx2 = setE, setEx3 = setE, setBx1 = setB, - setBx2 = setB, setBx3 = setB; + bool setEx1 = defines_ex1, setEx2 = defines_ex2, setEx3 = defines_ex3, + setBx1 = defines_bx1, setBx2 = defines_bx2, setBx3 = defines_bx3; if constexpr (O == in::x1) { // x1 -- normal // x2,x3 -- tangential @@ -276,35 +597,47 @@ namespace kernel { } else { raise::KernelError(HERE, "Invalid Orientation"); } - if (setEx1) { - Fld(i1, em::ex1) = metric.template transform<1, Idx::T, Idx::U>( - { i1_ + HALF }, - finit.ex1(x_Ph_H)); + if constexpr (defines_ex1) { + if (setEx1) { + Fld(i1, em::ex1) = metric.template transform<1, Idx::T, Idx::U>( + { i1_ + HALF }, + fset.ex1(x_Ph_H)); + } } - if (setEx2) { - Fld(i1, em::ex2) = metric.template transform<2, Idx::T, Idx::U>( - { i1_ }, - finit.ex2(x_Ph_0)); + if constexpr (defines_ex2) { + if (setEx2) { + Fld(i1, em::ex2) = metric.template transform<2, Idx::T, Idx::U>( + { i1_ }, + fset.ex2(x_Ph_0)); + } } - if (setEx3) { - Fld(i1, em::ex3) = metric.template transform<3, Idx::T, Idx::U>( - { i1_ }, - finit.ex3(x_Ph_0)); + if constexpr (defines_ex3) { + if (setEx3) { + Fld(i1, em::ex3) = metric.template transform<3, Idx::T, Idx::U>( + { i1_ }, + fset.ex3(x_Ph_0)); + } } - if (setBx1) { - Fld(i1, em::bx1) = metric.template transform<1, Idx::T, Idx::U>( - { i1_ }, - finit.bx1(x_Ph_0)); + if constexpr (defines_bx1) { + if (setBx1) { + Fld(i1, em::bx1) = metric.template transform<1, Idx::T, Idx::U>( + { i1_ }, + fset.bx1(x_Ph_0)); + } } - if (setBx2) { - Fld(i1, em::bx2) = metric.template transform<2, Idx::T, Idx::U>( - { i1_ + HALF }, - finit.bx2(x_Ph_H)); + if constexpr (defines_bx2) { + if (setBx2) { + Fld(i1, em::bx2) = metric.template transform<2, Idx::T, Idx::U>( + { i1_ + HALF }, + fset.bx2(x_Ph_H)); + } } - if (setBx3) { - Fld(i1, em::bx3) = metric.template transform<3, Idx::T, Idx::U>( - { i1_ + HALF }, - finit.bx3(x_Ph_H)); + if constexpr (defines_bx3) { + if (setBx3) { + Fld(i1, em::bx3) = metric.template transform<3, Idx::T, Idx::U>( + { i1_ + HALF }, + fset.bx3(x_Ph_H)); + } } } else { raise::KernelError(HERE, "Invalid Dimension"); @@ -324,8 +657,8 @@ namespace kernel { metric.template convert({ i1_ + HALF, i2_ }, x_Ph_H0); metric.template convert({ i1_ + HALF, i2_ + HALF }, x_Ph_HH); - bool setEx1 = setE, setEx2 = setE, setEx3 = setE, setBx1 = setB, - setBx2 = setB, setBx3 = setB; + bool setEx1 = defines_ex1, setEx2 = defines_ex2, setEx3 = defines_ex3, + setBx1 = defines_bx1, setBx2 = defines_bx2, setBx3 = defines_bx3; if constexpr (O == in::x1) { // x1 -- normal // x2,x3 -- tangential @@ -353,35 +686,47 @@ namespace kernel { } else { raise::KernelError(HERE, "Invalid Orientation"); } - if (setEx1) { - Fld(i1, i2, em::ex1) = metric.template transform<1, Idx::T, Idx::U>( - { i1_ + HALF, i2_ }, - finit.ex1(x_Ph_H0)); + if constexpr (defines_ex1) { + if (setEx1) { + Fld(i1, i2, em::ex1) = metric.template transform<1, Idx::T, Idx::U>( + { i1_ + HALF, i2_ }, + fset.ex1(x_Ph_H0)); + } } - if (setEx2) { - Fld(i1, i2, em::ex2) = metric.template transform<2, Idx::T, Idx::U>( - { i1_, i2_ + HALF }, - finit.ex2(x_Ph_0H)); + if constexpr (defines_ex2) { + if (setEx2) { + Fld(i1, i2, em::ex2) = metric.template transform<2, Idx::T, Idx::U>( + { i1_, i2_ + HALF }, + fset.ex2(x_Ph_0H)); + } } - if (setEx3) { - Fld(i1, i2, em::ex3) = metric.template transform<3, Idx::T, Idx::U>( - { i1_, i2_ }, - finit.ex3(x_Ph_00)); + if constexpr (defines_ex3) { + if (setEx3) { + Fld(i1, i2, em::ex3) = metric.template transform<3, Idx::T, Idx::U>( + { i1_, i2_ }, + fset.ex3(x_Ph_00)); + } } - if (setBx1) { - Fld(i1, i2, em::bx1) = metric.template transform<1, Idx::T, Idx::U>( - { i1_, i2_ + HALF }, - finit.bx1(x_Ph_0H)); + if constexpr (defines_bx1) { + if (setBx1) { + Fld(i1, i2, em::bx1) = metric.template transform<1, Idx::T, Idx::U>( + { i1_, i2_ + HALF }, + fset.bx1(x_Ph_0H)); + } } - if (setBx2) { - Fld(i1, i2, em::bx2) = metric.template transform<2, Idx::T, Idx::U>( - { i1_ + HALF, i2_ }, - finit.bx2(x_Ph_H0)); + if constexpr (defines_bx2) { + if (setBx2) { + Fld(i1, i2, em::bx2) = metric.template transform<2, Idx::T, Idx::U>( + { i1_ + HALF, i2_ }, + fset.bx2(x_Ph_H0)); + } } - if (setBx3) { - Fld(i1, i2, em::bx3) = metric.template transform<3, Idx::T, Idx::U>( - { i1_ + HALF, i2_ + HALF }, - finit.bx3(x_Ph_HH)); + if constexpr (defines_bx3) { + if (setBx3) { + Fld(i1, i2, em::bx3) = metric.template transform<3, Idx::T, Idx::U>( + { i1_ + HALF, i2_ + HALF }, + fset.bx3(x_Ph_HH)); + } } } else { raise::KernelError(HERE, "Invalid Dimension"); @@ -412,8 +757,8 @@ namespace kernel { x_Ph_H0H); metric.template convert({ i1_, i2_ + HALF, i3_ + HALF }, x_Ph_0HH); - bool setEx1 = setE, setEx2 = setE, setEx3 = setE, setBx1 = setB, - setBx2 = setB, setBx3 = setB; + bool setEx1 = defines_ex1, setEx2 = defines_ex2, setEx3 = defines_ex3, + setBx1 = defines_bx1, setBx2 = defines_bx2, setBx3 = defines_bx3; if constexpr (O == in::x1) { // x1 -- normal // x2,x3 -- tangential @@ -453,35 +798,47 @@ namespace kernel { } else { raise::KernelError(HERE, "Invalid Orientation"); } - if (setEx1) { - Fld(i1, i2, i3, em::ex1) = metric.template transform<1, Idx::T, Idx::U>( - { i1_ + HALF, i2_, i3_ }, - finit.ex1(x_Ph_H00)); + if constexpr (defines_ex1) { + if (setEx1) { + Fld(i1, i2, i3, em::ex1) = metric.template transform<1, Idx::T, Idx::U>( + { i1_ + HALF, i2_, i3_ }, + fset.ex1(x_Ph_H00)); + } } - if (setEx2) { - Fld(i1, i2, i3, em::ex2) = metric.template transform<2, Idx::T, Idx::U>( - { i1_, i2_ + HALF, i3_ }, - finit.ex2(x_Ph_0H0)); + if constexpr (defines_ex2) { + if (setEx2) { + Fld(i1, i2, i3, em::ex2) = metric.template transform<2, Idx::T, Idx::U>( + { i1_, i2_ + HALF, i3_ }, + fset.ex2(x_Ph_0H0)); + } } - if (setEx3) { - Fld(i1, i2, i3, em::ex3) = metric.template transform<3, Idx::T, Idx::U>( - { i1_, i2_, i3_ + HALF }, - finit.ex3(x_Ph_00H)); + if constexpr (defines_ex3) { + if (setEx3) { + Fld(i1, i2, i3, em::ex3) = metric.template transform<3, Idx::T, Idx::U>( + { i1_, i2_, i3_ + HALF }, + fset.ex3(x_Ph_00H)); + } } - if (setBx1) { - Fld(i1, i2, i3, em::bx1) = metric.template transform<1, Idx::T, Idx::U>( - { i1_, i2_ + HALF, i3_ + HALF }, - finit.bx1(x_Ph_0HH)); + if constexpr (defines_bx1) { + if (setBx1) { + Fld(i1, i2, i3, em::bx1) = metric.template transform<1, Idx::T, Idx::U>( + { i1_, i2_ + HALF, i3_ + HALF }, + fset.bx1(x_Ph_0HH)); + } } - if (setBx2) { - Fld(i1, i2, i3, em::bx2) = metric.template transform<2, Idx::T, Idx::U>( - { i1_ + HALF, i2_, i3_ + HALF }, - finit.bx2(x_Ph_H0H)); + if constexpr (defines_bx2) { + if (setBx2) { + Fld(i1, i2, i3, em::bx2) = metric.template transform<2, Idx::T, Idx::U>( + { i1_ + HALF, i2_, i3_ + HALF }, + fset.bx2(x_Ph_H0H)); + } } - if (setBx3) { - Fld(i1, i2, i3, em::bx3) = metric.template transform<3, Idx::T, Idx::U>( - { i1_ + HALF, i2_ + HALF, i3_ }, - finit.bx3(x_Ph_HH0)); + if constexpr (defines_bx3) { + if (setBx3) { + Fld(i1, i2, i3, em::bx3) = metric.template transform<3, Idx::T, Idx::U>( + { i1_ + HALF, i2_ + HALF, i3_ }, + fset.bx3(x_Ph_HH0)); + } } } else { raise::KernelError(HERE, "Invalid Dimension"); @@ -489,6 +846,6 @@ namespace kernel { } }; -} // namespace kernel +} // namespace kernel::bc #endif // KERNELS_FIELDS_BCS_HPP diff --git a/src/kernels/particle_moments.hpp b/src/kernels/particle_moments.hpp index 8b668a036..07a40d180 100644 --- a/src/kernels/particle_moments.hpp +++ b/src/kernels/particle_moments.hpp @@ -14,6 +14,7 @@ #include "global.h" #include "arch/kokkos_aliases.h" +#include "utils/comparators.h" #include "utils/error.h" #include "utils/numeric.h" @@ -40,8 +41,10 @@ namespace kernel { static_assert(M::is_metric, "M must be a metric class"); static constexpr auto D = M::Dim; - static_assert((F == FldsID::Rho) || (F == FldsID::Charge) || - (F == FldsID::N) || (F == FldsID::Nppc) || (F == FldsID::T), + static_assert(!((S == SimEngine::GRPIC) && (F == FldsID::V)), + "Bulk velocity not supported for GRPIC"); + static_assert((F == FldsID::Rho) || (F == FldsID::Charge) || (F == FldsID::N) || + (F == FldsID::Nppc) || (F == FldsID::T) || (F == FldsID::V), "Invalid field ID"); const unsigned short c1, c2; @@ -89,8 +92,8 @@ namespace kernel { std::size_t ni2, real_t inv_n0, unsigned short window) - : c1 { (components.size() == 2) ? components[0] - : static_cast(0) } + : c1 { (components.size() > 0) ? components[0] + : static_cast(0) } , c2 { (components.size() == 2) ? components[1] : static_cast(0) } , Buff { scatter_buff } @@ -200,11 +203,97 @@ namespace kernel { coeff *= u_Phys[c - 1]; } } + } else if constexpr (F == FldsID::V) { + real_t gamma { ZERO }; + // for bulk 3vel (tetrad basis) + vec_t u_Phys { ZERO }; + if constexpr (M::CoordType == Coord::Cart) { + u_Phys[0] = ux1(p); + u_Phys[1] = ux2(p); + u_Phys[2] = ux3(p); + } else { + coord_t x_Code { ZERO }; + x_Code[0] = static_cast(i1(p)) + static_cast(dx1(p)); + x_Code[1] = static_cast(i2(p)) + static_cast(dx2(p)); + if constexpr (D == Dim::_3D) { + x_Code[2] = static_cast(i3(p)) + static_cast(dx3(p)); + } else { + x_Code[2] = phi(p); + } + metric.template transform_xyz(x_Code, + { ux1(p), ux2(p), ux3(p) }, + u_Phys); + } + if (mass == ZERO) { + gamma = NORM(u_Phys[0], u_Phys[1], u_Phys[2]); + } else { + gamma = math::sqrt(ONE + NORM_SQR(u_Phys[0], u_Phys[1], u_Phys[2])); + } + // compute the corresponding moment + coeff = (mass == ZERO ? ONE : mass) * u_Phys[c1 - 1] / gamma; } else { // for other cases, use the `contrib` defined above coeff = contrib; } + if constexpr (F == FldsID::V) { + real_t gamma { ZERO }; + // for stress-energy tensor + vec_t u_Phys { ZERO }; + if constexpr (S == SimEngine::SRPIC) { + // SR + // stress-energy tensor for SR is computed in the tetrad (hatted) basis + if constexpr (M::CoordType == Coord::Cart) { + u_Phys[0] = ux1(p); + u_Phys[1] = ux2(p); + u_Phys[2] = ux3(p); + } else { + static_assert(D != Dim::_1D, "non-Cartesian SRPIC 1D"); + coord_t x_Code { ZERO }; + x_Code[0] = static_cast(i1(p)) + static_cast(dx1(p)); + x_Code[1] = static_cast(i2(p)) + static_cast(dx2(p)); + if constexpr (D == Dim::_3D) { + x_Code[2] = static_cast(i3(p)) + static_cast(dx3(p)); + } else { + x_Code[2] = phi(p); + } + metric.template transform_xyz( + x_Code, + { ux1(p), ux2(p), ux3(p) }, + u_Phys); + } + if (mass == ZERO) { + gamma = NORM(u_Phys[0], u_Phys[1], u_Phys[2]); + } else { + gamma = math::sqrt(ONE + NORM_SQR(u_Phys[0], u_Phys[1], u_Phys[2])); + } + } else { + // GR + // stress-energy tensor for GR is computed in contravariant basis + static_assert(D != Dim::_1D, "GRPIC 1D"); + coord_t x_Code { ZERO }; + x_Code[0] = static_cast(i1(p)) + static_cast(dx1(p)); + x_Code[1] = static_cast(i2(p)) + static_cast(dx2(p)); + if constexpr (D == Dim::_3D) { + x_Code[2] = static_cast(i3(p)) + static_cast(dx3(p)); + } + vec_t u_Cntrv { ZERO }; + // compute u_i u^i for energy + metric.template transform(x_Code, + { ux1(p), ux2(p), ux3(p) }, + u_Cntrv); + gamma = u_Cntrv[0] * ux1(p) + u_Cntrv[1] * ux2(p) + u_Cntrv[2] * ux3(p); + if (mass == ZERO) { + gamma = math::sqrt(gamma); + } else { + gamma = math::sqrt(ONE + gamma); + } + metric.template transform(x_Code, u_Cntrv, u_Phys); + } + // compute the corresponding moment + coeff = u_Phys[c1 - 1] / gamma; + } + if constexpr (F != FldsID::Nppc) { // for nppc calculation ... // ... do not take volume, weights or smoothing into account @@ -223,7 +312,6 @@ namespace kernel { } coeff *= weight(p) * smooth; } - auto buff_access = Buff.access(); if constexpr (D == Dim::_1D) { for (auto di1 { -window }; di1 <= window; ++di1) { @@ -289,6 +377,79 @@ namespace kernel { } }; + template + class NormalizeVectorByRho_kernel { + const ndfield_t Rho; + ndfield_t Vector; + const unsigned short c_rho, c_v1, c_v2, c_v3; + + public: + NormalizeVectorByRho_kernel(const ndfield_t& rho, + const ndfield_t& vector, + unsigned short crho, + unsigned short cv1, + unsigned short cv2, + unsigned short cv3) + : Rho { rho } + , Vector { vector } + , c_rho { crho } + , c_v1 { cv1 } + , c_v2 { cv2 } + , c_v3 { cv3 } { + raise::ErrorIf(c_rho >= N or c_v1 >= N or c_v2 >= N or c_v3 >= N, + "Invalid component index", + HERE); + raise::ErrorIf(c_rho == c_v1 or c_rho == c_v2 or c_rho == c_v3, + "Invalid component index", + HERE); + raise::ErrorIf(c_v1 == c_v2 or c_v1 == c_v3 or c_v2 == c_v3, + "Invalid component index", + HERE); + } + + Inline void operator()(index_t i1) const { + if constexpr (D == Dim::_1D) { + if (not cmp::AlmostZero(Rho(i1, c_rho))) { + Vector(i1, c_v1) /= Rho(i1, c_rho); + Vector(i1, c_v2) /= Rho(i1, c_rho); + Vector(i1, c_v3) /= Rho(i1, c_rho); + } + } else { + raise::KernelError( + HERE, + "1D implementation of NormalizeVectorByRho_kernel called for non-1D"); + } + } + + Inline void operator()(index_t i1, index_t i2) const { + if constexpr (D == Dim::_2D) { + if (not cmp::AlmostZero(Rho(i1, i2, c_rho))) { + Vector(i1, i2, c_v1) /= Rho(i1, i2, c_rho); + Vector(i1, i2, c_v2) /= Rho(i1, i2, c_rho); + Vector(i1, i2, c_v3) /= Rho(i1, i2, c_rho); + } + } else { + raise::KernelError( + HERE, + "2D implementation of NormalizeVectorByRho_kernel called for non-2D"); + } + } + + Inline void operator()(index_t i1, index_t i2, index_t i3) const { + if constexpr (D == Dim::_3D) { + if (not cmp::AlmostZero(Rho(i1, i2, i3, c_rho))) { + Vector(i1, i2, i3, c_v1) /= Rho(i1, i2, i3, c_rho); + Vector(i1, i2, i3, c_v2) /= Rho(i1, i2, i3, c_rho); + Vector(i1, i2, i3, c_v3) /= Rho(i1, i2, i3, c_rho); + } + } else { + raise::KernelError( + HERE, + "3D implementation of NormalizeVectorByRho_kernel called for non-3D"); + } + } + }; + } // namespace kernel #endif // KERNELS_PARTICLE_MOMENTS_HPP diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 0deb73c6f..2e8a5f652 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -90,7 +90,7 @@ namespace kernel::sr { Force(const F& pgen_force) : Force { pgen_force, - {ZERO, ZERO, ZERO}, + { ZERO, ZERO, ZERO }, ZERO, ZERO } { @@ -227,41 +227,41 @@ namespace kernel::sr { const real_t coeff_sync; public: - Pusher_kernel(const PrtlPusher::type& pusher, - bool GCA, - bool ext_force, - CoolingTags cooling, - const ndfield_t& EB, - unsigned short sp, - array_t& i1, - array_t& i2, - array_t& i3, - array_t& i1_prev, - array_t& i2_prev, - array_t& i3_prev, - array_t& dx1, - array_t& dx2, - array_t& dx3, - array_t& dx1_prev, - array_t& dx2_prev, - array_t& dx3_prev, - array_t& ux1, - array_t& ux2, - array_t& ux3, - array_t& phi, - array_t& tag, - const M& metric, - const F& force, - real_t time, - real_t coeff, - real_t dt, - int ni1, - int ni2, - int ni3, - const boundaries_t& boundaries, - real_t gca_larmor_max, - real_t gca_eovrb_max, - real_t coeff_sync) + Pusher_kernel(const PrtlPusher::type& pusher, + bool GCA, + bool ext_force, + CoolingTags cooling, + const randacc_ndfield_t& EB, + unsigned short sp, + array_t& i1, + array_t& i2, + array_t& i3, + array_t& i1_prev, + array_t& i2_prev, + array_t& i3_prev, + array_t& dx1, + array_t& dx2, + array_t& dx3, + array_t& dx1_prev, + array_t& dx2_prev, + array_t& dx3_prev, + array_t& ux1, + array_t& ux2, + array_t& ux3, + array_t& phi, + array_t& tag, + const M& metric, + const F& force, + real_t time, + real_t coeff, + real_t dt, + int ni1, + int ni2, + int ni3, + const boundaries_t& boundaries, + real_t gca_larmor_max, + real_t gca_eovrb_max, + real_t coeff_sync) : pusher { pusher } , GCA { GCA } , ext_force { ext_force } diff --git a/src/kernels/tests/CMakeLists.txt b/src/kernels/tests/CMakeLists.txt index e55dbc111..a41ea43ef 100644 --- a/src/kernels/tests/CMakeLists.txt +++ b/src/kernels/tests/CMakeLists.txt @@ -1,9 +1,11 @@ # ------------------------------ # @brief: Generates tests for the `ntt_kernels` module +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) @@ -29,3 +31,4 @@ gen_test(fields_to_phys) gen_test(prtls_to_phys) gen_test(gca_pusher) gen_test(prtl_bc) +gen_test(flds_bc) diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index 9a8ae1cc6..ec364a313 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -29,8 +29,7 @@ void errorIf(bool condition, const std::string& message) { inline static constexpr auto epsilon = std::numeric_limits::epsilon(); -Inline auto equal(real_t a, real_t b, const char* msg = "", real_t acc = ONE) - -> bool { +Inline auto equal(real_t a, real_t b, const char* msg = "", real_t acc = ONE) -> bool { const auto eps = epsilon * acc; if (not cmp::AlmostEqual(a, b, eps)) { printf("%.12e != %.12e %s\n", a, b, msg); @@ -81,8 +80,6 @@ void testDeposit(const std::vector& res, array_t tag { "tag", 10 }; const real_t charge { 1.0 }, inv_dt { 1.0 }; - auto J_scat = Kokkos::Experimental::create_scatter_view(J); - const int i0 = 4, j0 = 4; const prtldx_t dxi = 0.53, dxf = 0.47; @@ -122,30 +119,19 @@ void testDeposit(const std::vector& res, put_value(weight, 1.0, 0); put_value(tag, ParticleTag::alive, 0); - Kokkos::parallel_for("CurrentsDeposit", - 10, + auto J_scat = Kokkos::Experimental::create_scatter_view(J); + + // clang-format off + Kokkos::parallel_for("CurrentsDeposit", 10, kernel::DepositCurrents_kernel(J_scat, - i1, - i2, - i3, - i1_prev, - i2_prev, - i3_prev, - dx1, - dx2, - dx3, - dx1_prev, - dx2_prev, - dx3_prev, - ux1, - ux2, - ux3, - phi, - weight, - tag, - metric, - charge, - inv_dt)); + i1, i2, i3, + i1_prev, i2_prev, i3_prev, + dx1, dx2, dx3, + dx1_prev, dx2_prev, dx3_prev, + ux1, ux2, ux3, + phi, weight, tag, + metric, charge, inv_dt)); + // clang-format on Kokkos::Experimental::contribute(J, J_scat); diff --git a/src/kernels/tests/flds_bc.cpp b/src/kernels/tests/flds_bc.cpp new file mode 100644 index 000000000..aba829e8b --- /dev/null +++ b/src/kernels/tests/flds_bc.cpp @@ -0,0 +1,210 @@ +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "utils/comparators.h" +#include "utils/error.h" + +#include "metrics/minkowski.h" + +#include "kernels/fields_bcs.hpp" + +#include + +#include +#include +#include + +using namespace ntt; +using namespace kernel::bc; +using namespace metric; + +void errorIf(bool condition, const std::string& message) { + if (condition) { + throw std::runtime_error(message); + } +} + +template +struct DummyFieldsBCs { + DummyFieldsBCs() {} + + Inline auto ex1(const coord_t&) const -> real_t { + return TWO; + } + + Inline auto ex2(const coord_t&) const -> real_t { + return THREE; + } + + Inline auto bx2(const coord_t&) const -> real_t { + return FOUR; + } + + Inline auto bx3(const coord_t&) const -> real_t { + return FIVE; + } +}; + +Inline auto equal(real_t a, real_t b, const char* msg, real_t acc) -> bool { + if (not(math::abs(a - b) < acc)) { + printf("%.12e != %.12e [%.12e] %s\n", a, b, math::abs(a - b), msg); + return false; + } + return true; +} + +template +void testFldsBCs(const std::vector& res) { + errorIf(res.size() != (unsigned short)D, "res.size() != D"); + boundaries_t sx; + for (const auto& r : res) { + sx.emplace_back(ZERO, r); + } + const auto metric = Minkowski { res, sx }; + auto fset = DummyFieldsBCs {}; + ndfield_t flds; + if constexpr (D == Dim::_1D) { + flds = ndfield_t { "flds", res[0] + 2 * N_GHOSTS }; + } else if constexpr (D == Dim::_2D) { + flds = ndfield_t { "flds", res[0] + 2 * N_GHOSTS, res[1] + 2 * N_GHOSTS }; + } else if constexpr (D == Dim::_3D) { + flds = ndfield_t { "flds", + res[0] + 2 * N_GHOSTS, + res[1] + 2 * N_GHOSTS, + res[2] + 2 * N_GHOSTS }; + } + + range_t range; + + if constexpr (D == Dim::_1D) { + range = CreateRangePolicy({ res[0] / 2 + N_GHOSTS }, + { res[0] + 2 * N_GHOSTS }); + } else if constexpr (D == Dim::_2D) { + range = CreateRangePolicy({ res[0] / 2 + N_GHOSTS, 0 }, + { res[0] + 2 * N_GHOSTS, res[1] + N_GHOSTS }); + } else if constexpr (D == Dim::_3D) { + range = CreateRangePolicy( + { res[0] / 2 + N_GHOSTS, 0, 0 }, + { res[0] + 2 * N_GHOSTS, res[1] + N_GHOSTS, res[2] + N_GHOSTS }); + } + + const auto xg_edge = (real_t)(sx[0].second); + const auto dx_abs = (real_t)(res[0] / 10.0); + + Kokkos::parallel_for( + "MatchBoundaries_kernel", + range, + MatchBoundaries_kernel( + flds, + fset, + metric, + xg_edge, + dx_abs, + BC::E | BC::B)); + + if constexpr (D == Dim::_1D) { + Kokkos::parallel_for( + "MatchBoundaries_kernel", + CreateRangePolicy({ N_GHOSTS }, { res[0] + N_GHOSTS }), + Lambda(index_t i1) { + const auto x = static_cast(i1 - N_GHOSTS); + const auto factor1 = math::tanh( + FOUR * math::abs(x + HALF - xg_edge) / dx_abs); + const auto factor2 = math::tanh(FOUR * math::abs(x - xg_edge) / dx_abs); + if (not cmp::AlmostEqual(flds(i1, em::ex1), TWO * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, em::ex1), TWO * (ONE - factor1)); + raise::KernelError(HERE, "incorrect ex1"); + } + if (not cmp::AlmostEqual(flds(i1, em::ex2), THREE * (ONE - factor2))) { + printf("%f != %f\n", flds(i1, em::ex2), THREE * (ONE - factor2)); + raise::KernelError(HERE, "incorrect ex2"); + } + if (not cmp::AlmostEqual(flds(i1, em::bx2), FOUR * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, em::bx2), FOUR * (ONE - factor1)); + raise::KernelError(HERE, "incorrect bx2"); + } + if (not cmp::AlmostEqual(flds(i1, em::bx3), FIVE * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, em::bx3), FIVE * (ONE - factor1)); + raise::KernelError(HERE, "incorrect bx3"); + } + }); + } else if constexpr (D == Dim::_2D) { + Kokkos::parallel_for( + "MatchBoundaries_kernel", + CreateRangePolicy({ N_GHOSTS, N_GHOSTS }, + { res[0] + N_GHOSTS, res[1] + N_GHOSTS }), + Lambda(index_t i1, index_t i2) { + const auto x = static_cast(i1 - N_GHOSTS); + const auto factor1 = math::tanh( + FOUR * math::abs(x + HALF - xg_edge) / dx_abs); + const auto factor2 = math::tanh(FOUR * math::abs(x - xg_edge) / dx_abs); + if (not cmp::AlmostEqual(flds(i1, i2, em::ex1), TWO * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, i2, em::ex1), TWO * (ONE - factor1)); + raise::KernelError(HERE, "incorrect ex1"); + } + if (not cmp::AlmostEqual(flds(i1, i2, em::ex2), THREE * (ONE - factor2))) { + printf("%f != %f\n", flds(i1, i2, em::ex2), THREE * (ONE - factor2)); + raise::KernelError(HERE, "incorrect ex2"); + } + if (not cmp::AlmostEqual(flds(i1, i2, em::bx2), FOUR * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, i2, em::bx2), FOUR * (ONE - factor1)); + raise::KernelError(HERE, "incorrect bx2"); + } + if (not cmp::AlmostEqual(flds(i1, i2, em::bx3), FIVE * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, i2, em::bx3), FIVE * (ONE - factor1)); + raise::KernelError(HERE, "incorrect bx3"); + } + }); + } else if constexpr (D == Dim::_3D) { + Kokkos::parallel_for( + "MatchBoundaries_kernel", + CreateRangePolicy( + { N_GHOSTS, N_GHOSTS, N_GHOSTS }, + { res[0] + N_GHOSTS, res[1] + N_GHOSTS, res[2] + N_GHOSTS }), + Lambda(index_t i1, index_t i2, index_t i3) { + const auto x = static_cast(i1 - N_GHOSTS); + const auto factor1 = math::tanh( + FOUR * math::abs(x + HALF - xg_edge) / dx_abs); + const auto factor2 = math::tanh(FOUR * math::abs(x - xg_edge) / dx_abs); + if (not cmp::AlmostEqual(flds(i1, i2, i3, em::ex1), TWO * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, i2, i3, em::ex1), TWO * (ONE - factor1)); + raise::KernelError(HERE, "incorrect ex1"); + } + if (not cmp::AlmostEqual(flds(i1, i2, i3, em::ex2), + THREE * (ONE - factor2))) { + printf("%f != %f\n", flds(i1, i2, i3, em::ex2), THREE * (ONE - factor2)); + raise::KernelError(HERE, "incorrect ex2"); + } + if (not cmp::AlmostEqual(flds(i1, i2, i3, em::bx2), + FOUR * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, i2, i3, em::bx2), FOUR * (ONE - factor1)); + raise::KernelError(HERE, "incorrect bx2"); + } + if (not cmp::AlmostEqual(flds(i1, i2, i3, em::bx3), + FIVE * (ONE - factor1))) { + printf("%f != %f\n", flds(i1, i2, i3, em::bx3), FIVE * (ONE - factor1)); + raise::KernelError(HERE, "incorrect bx3"); + } + }); + } +} + +auto main(int argc, char* argv[]) -> int { + Kokkos::initialize(argc, argv); + + try { + using namespace ntt; + + testFldsBCs({ 24 }); + testFldsBCs({ 64, 32 }); + testFldsBCs({ 14, 22, 15 }); + + } catch (std::exception& e) { + std::cerr << e.what() << std::endl; + Kokkos::finalize(); + return 1; + } + Kokkos::finalize(); + return 0; +} diff --git a/src/kernels/tests/prtl_bc.cpp b/src/kernels/tests/prtl_bc.cpp index c8f9eae04..14c1a9f54 100644 --- a/src/kernels/tests/prtl_bc.cpp +++ b/src/kernels/tests/prtl_bc.cpp @@ -201,9 +201,9 @@ void testPeriodicBC(const std::vector& res, // Particle boundaries auto boundaries = boundaries_t {}; boundaries = { - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC} + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC } }; real_t time = ZERO; @@ -343,18 +343,18 @@ auto main(int argc, char* argv[]) -> int { const std::vector res1d { 50 }; const boundaries_t ext1d { - {0.0, 1000.0}, + { 0.0, 1000.0 }, }; const std::vector res2d { 30, 20 }; const boundaries_t ext2d { - {-15.0, 15.0}, - {-10.0, 10.0}, + { -15.0, 15.0 }, + { -10.0, 10.0 }, }; const std::vector res3d { 10, 10, 10 }; const boundaries_t ext3d { - {0.0, 1.0}, - {0.0, 1.0}, - {0.0, 1.0} + { 0.0, 1.0 }, + { 0.0, 1.0 }, + { 0.0, 1.0 } }; testPeriodicBC>(res1d, ext1d, {}); testPeriodicBC>(res2d, ext2d, {}); diff --git a/src/metrics/CMakeLists.txt b/src/metrics/CMakeLists.txt index 0f303fcfc..e053bb61c 100644 --- a/src/metrics/CMakeLists.txt +++ b/src/metrics/CMakeLists.txt @@ -1,11 +1,17 @@ # ------------------------------ # @defines: ntt_metrics [INTERFACE] +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] +# +# * ntt_global [required] +# # @uses: -# - kokkos [required] +# +# * kokkos [required] # ------------------------------ add_library(ntt_metrics INTERFACE) @@ -15,5 +21,5 @@ add_dependencies(ntt_metrics ${libs}) target_link_libraries(ntt_metrics INTERFACE ${libs}) target_include_directories(ntt_metrics - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) \ No newline at end of file + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) + diff --git a/src/metrics/tests/CMakeLists.txt b/src/metrics/tests/CMakeLists.txt index 117cb3295..c997ab079 100644 --- a/src/metrics/tests/CMakeLists.txt +++ b/src/metrics/tests/CMakeLists.txt @@ -1,9 +1,11 @@ # ------------------------------ # @brief: Generates tests for the `ntt_metrics` module +# # @uses: -# - kokkos [required] -# - plog [required] -# - mpi [optional] +# +# * kokkos [required] +# * plog [required] +# * mpi [optional] # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) @@ -13,7 +15,7 @@ function(gen_test title) set(src ${title}.cpp) add_executable(${exec} ${src}) - set (libs ntt_metrics) + set(libs ntt_metrics) add_dependencies(${exec} ${libs}) target_link_libraries(${exec} PRIVATE ${libs}) @@ -25,4 +27,5 @@ gen_test(vec_trans) gen_test(coord_trans) gen_test(sph-qsph) gen_test(ks-qks) -gen_test(sr-cart-sph) \ No newline at end of file +gen_test(sr-cart-sph) + diff --git a/src/metrics/tests/sr-cart-sph.cpp b/src/metrics/tests/sr-cart-sph.cpp index ec2f6ddc0..42aa5d639 100644 --- a/src/metrics/tests/sr-cart-sph.cpp +++ b/src/metrics/tests/sr-cart-sph.cpp @@ -123,30 +123,30 @@ auto main(int argc, char* argv[]) -> int { const auto res2d = std::vector { 64, 32 }; const auto res3d = std::vector { 64, 32, 16 }; const auto ext1dcart = boundaries_t { - {10.0, 20.0} + { 10.0, 20.0 } }; const auto ext2dcart = boundaries_t { - {0.0, 20.0}, - {0.0, 10.0} + { 0.0, 20.0 }, + { 0.0, 10.0 } }; const auto ext3dcart = boundaries_t { - {-2.0, 2.0}, - {-1.0, 1.0}, - {-0.5, 0.5} + { -2.0, 2.0 }, + { -1.0, 1.0 }, + { -0.5, 0.5 } }; const auto extsph = boundaries_t { - {1.0, 10.0}, - {0.0, constant::PI} + { 1.0, 10.0 }, + { 0.0, constant::PI } }; const auto params = std::map { - {"r0", -ONE}, - { "h", (real_t)0.25} + { "r0", -ONE }, + { "h", (real_t)0.25 } }; testMetric>({ 128 }, ext1dcart); testMetric>(res2d, ext2dcart, 200); testMetric>(res3d, ext3dcart, 500); - testMetric>(res2d, extsph, 10); + testMetric>(res2d, extsph, 100); testMetric>(res2d, extsph, 200, params); } catch (std::exception& e) { diff --git a/src/output/CMakeLists.txt b/src/output/CMakeLists.txt index 2c25631ec..81333e9ff 100644 --- a/src/output/CMakeLists.txt +++ b/src/output/CMakeLists.txt @@ -1,32 +1,38 @@ # ------------------------------ # @defines: ntt_output [STATIC/SHARED] +# # @sources: -# - writer.cpp -# - fields.cpp -# - utils/interpret_prompt.cpp +# +# * writer.cpp +# * fields.cpp +# * utils/interpret_prompt.cpp +# # @includes: -# - ../ +# +# * ../ +# # @depends: -# - ntt_global [required] +# +# * ntt_global [required] +# # @uses: -# - kokkos [required] -# - ADIOS2 [required] -# - mpi [optional] +# +# * kokkos [required] +# * ADIOS2 [required] +# * mpi [optional] # ------------------------------ set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES - ${SRC_DIR}/writer.cpp - ${SRC_DIR}/fields.cpp - ${SRC_DIR}/utils/interpret_prompt.cpp -) +set(SOURCES ${SRC_DIR}/writer.cpp ${SRC_DIR}/fields.cpp + ${SRC_DIR}/utils/interpret_prompt.cpp) add_library(ntt_output ${SOURCES}) set(libs ntt_global) add_dependencies(ntt_output ${libs}) target_link_libraries(ntt_output PUBLIC ${libs}) +target_link_libraries(ntt_output PRIVATE stdc++fs) -target_include_directories(ntt_output +target_include_directories( + ntt_output PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../ - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../ -) + INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/output/fields.cpp b/src/output/fields.cpp index aa5a752d4..25267bdee 100644 --- a/src/output/fields.cpp +++ b/src/output/fields.cpp @@ -29,14 +29,21 @@ namespace out { } else { m_id = FldsID::Custom; } + // check compatibility + raise::ErrorIf(id() == FldsID::A and S != SimEngine::GRPIC, + "Output of A_phi not supported for non-GRPIC", + HERE); + raise::ErrorIf(id() == FldsID::V and S == SimEngine::GRPIC, + "Output of bulk 3-vel not supported for GRPIC", + HERE); // determine the species and components to output if (is_moment()) { species = InterpretSpecies(name); } else { species = {}; } - if (is_field() || is_current()) { - // always write all the field/current components + if (is_field() || is_current() || id() == FldsID::V) { + // always write all the field/current/bulk vel components comp = { { 1 }, { 2 }, { 3 } }; } else if (id() == FldsID::A) { // only write A3 @@ -44,6 +51,9 @@ namespace out { } else if (id() == FldsID::T) { // energy-momentum tensor comp = InterpretComponents({ name.substr(1, 1), name.substr(2, 1) }); + } else if (id() == FldsID::V) { + // energy-momentum tensor + comp = InterpretComponents({ name.substr(1, 1) }); } else { // scalar (Rho, divE, Custom, etc.) comp = {}; diff --git a/src/output/fields.h b/src/output/fields.h index a520a246d..0e8e31d08 100644 --- a/src/output/fields.h +++ b/src/output/fields.h @@ -43,7 +43,7 @@ namespace out { [[nodiscard]] auto is_moment() const -> bool { return (id() == FldsID::T || id() == FldsID::Rho || id() == FldsID::Nppc || - id() == FldsID::N || id() == FldsID::Charge); + id() == FldsID::N || id() == FldsID::Charge || id() == FldsID::V); } [[nodiscard]] @@ -94,7 +94,7 @@ namespace out { tmp += m_name.substr(1, 2); } else if (id() == FldsID::A) { tmp += "3"; - } else if (is_field()) { + } else if (is_field() || id() == FldsID::V) { tmp += "i"; } if (species.size() > 0) { diff --git a/src/output/tests/CMakeLists.txt b/src/output/tests/CMakeLists.txt index d33cc6c54..afc7950c4 100644 --- a/src/output/tests/CMakeLists.txt +++ b/src/output/tests/CMakeLists.txt @@ -1,30 +1,36 @@ # ------------------------------ # @brief: Generates tests for the `ntt_output` module +# # @uses: -# - kokkos [required] -# - mpi [optional] -# - adios2 [optional] -# !TODO: -# - add more proper write tests for ADIOS2 +# +# * kokkos [required] +# * mpi [optional] +# * adios2 [optional] # ------------------------------ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) -function(gen_test title) +function(gen_test title is_parallel) set(exec test-output-${title}.xc) set(src ${title}.cpp) add_executable(${exec} ${src}) - set (libs ntt_output ntt_global ntt_metrics ntt_framework) + set(libs ntt_output ntt_global ntt_metrics ntt_framework) add_dependencies(${exec} ${libs}) target_link_libraries(${exec} PRIVATE ${libs} stdc++fs) - add_test(NAME "OUTPUT::${title}" COMMAND "${exec}") + if(${is_parallel}) + add_test(NAME "OUTPUT::${title}" + COMMAND "${MPIEXEC_EXECUTABLE}" "${MPIEXEC_NUMPROC_FLAG}" "4" + "${exec}") + else() + add_test(NAME "OUTPUT::${title}" COMMAND "${exec}") + endif() endfunction() -if (NOT ${mpi}) - gen_test(fields) - gen_test(writer-nompi) +if(NOT ${mpi}) + gen_test(fields false) + gen_test(writer-nompi false) else() - gen_test(writer-mpi) -endif() \ No newline at end of file + gen_test(writer-mpi true) +endif() diff --git a/src/output/tests/writer-mpi.cpp b/src/output/tests/writer-mpi.cpp index 6b810fa22..f6d3ee88a 100644 --- a/src/output/tests/writer-mpi.cpp +++ b/src/output/tests/writer-mpi.cpp @@ -2,9 +2,7 @@ #include "global.h" #include "arch/mpi_aliases.h" -#include "utils/formatting.h" -#include "output/fields.h" #include "output/writer.h" #include @@ -14,7 +12,6 @@ #include #include -#include #include #include @@ -24,48 +21,160 @@ void cleanup() { fs::remove(tempfile_path); } +#define CEILDIV(a, b) \ + (static_cast(math::ceil(static_cast(a) / static_cast(b)))) + auto main(int argc, char* argv[]) -> int { Kokkos::initialize(argc, argv); MPI_Init(&argc, &argv); - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + int mpi_rank, mpi_size; + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); try { using namespace ntt; + constexpr auto nx1 = 10; + constexpr auto nx1_gh = nx1 + 2 * N_GHOSTS; + constexpr auto i1min = N_GHOSTS; + constexpr auto i1max = nx1 + N_GHOSTS; + constexpr auto dwn1 = 3; + + ndfield_t field { "fld", nx1_gh }; + std::vector field_names; + + { + // fill data + Kokkos::parallel_for( + "fill", + CreateRangePolicy({ i1min }, { i1max }), + Lambda(index_t i1) { + const auto i1_ = static_cast(i1); + field(i1, 0) = i1_; + field(i1, 1) = -i1_; + field(i1, 2) = SQR(i1_); + }); + } adios2::ADIOS adios { MPI_COMM_WORLD }; - auto writer = out::Writer(); - writer.init(&adios, "hdf5"); - writer.defineMeshLayout({ static_cast(size) * 10 }, - { static_cast(rank) * 10 }, - { 10 }, - false, - Coord::Cart); - writer.defineFieldOutputs(SimEngine::SRPIC, { "E" }); - - ndfield_t field { "fld", 10 + 2 * N_GHOSTS }; - Kokkos::parallel_for( - "fill", - CreateRangePolicy({ N_GHOSTS }, { 10 + N_GHOSTS }), - Lambda(index_t i1) { - field(i1, 0) = i1; - field(i1, 1) = -(real_t)(i1); - field(i1, 2) = i1 / 2; - }); - std::vector names; - std::vector addresses; - for (auto i = 0; i < 3; ++i) { - names.push_back(writer.fieldWriters()[0].name(i)); - addresses.push_back(i); + + { + // write + auto writer = out::Writer(); + writer.init(&adios, "hdf5", "test", false); + writer.defineMeshLayout({ static_cast(mpi_size) * nx1 }, + { static_cast(mpi_rank) * nx1 }, + { nx1 }, + { dwn1 }, + false, + Coord::Cart); + writer.defineFieldOutputs(SimEngine::SRPIC, { "E" }); + + std::vector addresses; + for (auto i = 0; i < 3; ++i) { + field_names.push_back(writer.fieldWriters()[0].name(i)); + addresses.push_back(i); + } + writer.beginWriting(WriteMode::Fields, 0, 0.0); + writer.writeField(field_names, field, addresses); + writer.endWriting(WriteMode::Fields); + + writer.beginWriting(WriteMode::Fields, 1, 0.1); + writer.writeField(field_names, field, addresses); + writer.endWriting(WriteMode::Fields); + adios.ExitComputationBlock(); } - writer.beginWriting("test", 0, 0.0); - writer.writeField(names, field, addresses); - writer.endWriting(); - writer.beginWriting("test", 1, 0.1); - writer.writeField(names, field, addresses); - writer.endWriting(); + adios.FlushAll(); + + { + // read + adios2::IO io = adios.DeclareIO("read-test"); + io.SetEngine("hdf5"); + adios2::Engine reader = io.Open("test.h5", adios2::Mode::Read, MPI_COMM_SELF); + raise::ErrorIf(io.InquireAttribute("NGhosts").Data()[0] != 0, + "NGhosts is not correct", + HERE); + raise::ErrorIf(io.InquireAttribute("Dimension").Data()[0] != 1, + "Dimension is not correct", + HERE); + for (std::size_t step = 0; reader.BeginStep() == adios2::StepStatus::OK; + ++step) { + std::size_t step_read; + long double time_read; + + reader.Get(io.InquireVariable("Step"), + &step_read, + adios2::Mode::Sync); + reader.Get(io.InquireVariable("Time"), + &time_read, + adios2::Mode::Sync); + raise::ErrorIf(step_read != step, "Step is not correct", HERE); + raise::ErrorIf((float)time_read != (float)step * 0.1f, + "Time is not correct", + HERE); + + const auto l_size = nx1; + const auto l_offset = nx1 * mpi_rank; + const auto g_size = nx1 * mpi_size; + + const double n = l_size; + const double d = dwn1; + const double l = l_offset; + const double f = math::ceil(l / d) * d - l; + + const auto first_cell = static_cast(f); + const auto l_size_dwn = static_cast(math::ceil((n - f) / d)); + const auto l_corner_dwn = static_cast(math::ceil(l / d)); + + array_t field_read {}; + int cntr = 0; + for (const auto& name : field_names) { + auto fieldVar = io.InquireVariable(name); + if (fieldVar) { + raise::ErrorIf(fieldVar.Shape().size() != 1, + fmt::format("%s is not 1D", name.c_str()), + HERE); + auto dims = fieldVar.Shape(); + std::size_t nx1_r = dims[0]; + raise::ErrorIf((nx1_r != CEILDIV(nx1 * mpi_size, dwn1)), + fmt::format("%s = %ld is not %d", + name.c_str(), + nx1_r, + CEILDIV(nx1 * mpi_size, dwn1)), + HERE); + + fieldVar.SetSelection( + adios2::Box({ l_corner_dwn }, { l_size_dwn })); + field_read = array_t(name, l_size_dwn); + auto field_read_h = Kokkos::create_mirror_view(field_read); + reader.Get(fieldVar, field_read_h.data(), adios2::Mode::Sync); + Kokkos::deep_copy(field_read, field_read_h); + + Kokkos::parallel_for( + "check", + CreateRangePolicy({ 0 }, { l_size_dwn }), + Lambda(index_t i1) { + if (not cmp::AlmostEqual( + field_read(i1), + field(i1 * dwn1 + first_cell + i1min, cntr))) { + printf("\n:::::::::::::::\nfield_read(%ld) = %f != " + "field(%ld, %d) = %f\n:::::::::::::::\n", + i1, + field_read(i1), + i1 * dwn1 + first_cell + i1min, + cntr, + field(i1 * dwn1 + first_cell + i1min, cntr)); + raise::KernelError(HERE, "Field is not read correctly"); + } + }); + } else { + raise::Error("Field not found", HERE); + } + ++cntr; + } + } + reader.Close(); + } } catch (std::exception& e) { std::cerr << e.what() << std::endl; @@ -81,3 +190,5 @@ auto main(int argc, char* argv[]) -> int { Kokkos::finalize(); return 0; } + +#undef CEILDIV diff --git a/src/output/tests/writer-nompi.cpp b/src/output/tests/writer-nompi.cpp index 25a9a2c51..8fb2ac026 100644 --- a/src/output/tests/writer-nompi.cpp +++ b/src/output/tests/writer-nompi.cpp @@ -3,7 +3,6 @@ #include "utils/formatting.h" -#include "output/fields.h" #include "output/writer.h" #include @@ -12,100 +11,194 @@ #include #include -#include #include #include +using namespace ntt; + void cleanup() { namespace fs = std::filesystem; fs::path tempfile_path { "test.h5" }; fs::remove(tempfile_path); } +#define CEILDIV(a, b) \ + (static_cast(math::ceil(static_cast(a) / static_cast(b)))) + auto main(int argc, char* argv[]) -> int { Kokkos::initialize(argc, argv); try { + constexpr auto nx1 = 10; + constexpr auto nx1_gh = nx1 + 2 * N_GHOSTS; + constexpr auto nx2 = 14; + constexpr auto nx2_gh = nx2 + 2 * N_GHOSTS; + constexpr auto nx3 = 17; + constexpr auto nx3_gh = nx3 + 2 * N_GHOSTS; + constexpr auto i1min = N_GHOSTS; + constexpr auto i2min = N_GHOSTS; + constexpr auto i3min = N_GHOSTS; + constexpr auto i1max = nx1 + N_GHOSTS; + constexpr auto i2max = nx2 + N_GHOSTS; + constexpr auto i3max = nx3 + N_GHOSTS; + + constexpr auto dwn1 = 2; + constexpr auto dwn2 = 1; + constexpr auto dwn3 = 5; + + ndfield_t field { "fld", nx1_gh, nx2_gh, nx3_gh }; + std::vector field_names; + + { + // fill data + Kokkos::parallel_for( + "fill", + CreateRangePolicy({ i1min, i2min, i3min }, + { i1max, i2max, i3max }), + Lambda(index_t i1, index_t i2, index_t i3) { + const auto i1_ = static_cast(i1); + const auto i2_ = static_cast(i2); + const auto i3_ = static_cast(i3); + field(i1, i2, i3, 0) = i1_; + field(i1, i2, i3, 1) = i2_; + field(i1, i2, i3, 2) = i3_; + }); + } + adios2::ADIOS adios; - using namespace ntt; - auto writer = out::Writer(); - writer.init(&adios, "hdf5", "test"); - writer.defineMeshLayout({ 10, 10, 10 }, - { 0, 0, 0 }, - { 10, 10, 10 }, - false, - Coord::Cart); - writer.defineFieldOutputs(SimEngine::SRPIC, { "E", "B", "Rho_1_3", "N_2" }); - - ndfield_t field { "fld", - 10 + 2 * N_GHOSTS, - 10 + 2 * N_GHOSTS, - 10 + 2 * N_GHOSTS }; - Kokkos::parallel_for( - "fill", - CreateRangePolicy({ N_GHOSTS, N_GHOSTS, N_GHOSTS }, - { 10 + N_GHOSTS, 10 + N_GHOSTS, 10 + N_GHOSTS }), - Lambda(index_t i1, index_t i2, index_t i3) { - field(i1, i2, i3, 0) = i1 + i2 + i3; - field(i1, i2, i3, 1) = i1 * i2 / i3; - field(i1, i2, i3, 2) = i1 / i2 * i3; - }); - std::vector names; - std::vector addresses; - for (auto i = 0; i < 3; ++i) { - names.push_back(writer.fieldWriters()[0].name(i)); - addresses.push_back(i); + { + // write + auto writer = out::Writer(); + writer.init(&adios, "hdf5", "test", false); + writer.defineMeshLayout({ nx1, nx2, nx3 }, + { 0, 0, 0 }, + { nx1, nx2, nx3 }, + { dwn1, dwn2, dwn3 }, + false, + Coord::Cart); + writer.defineFieldOutputs(SimEngine::SRPIC, { "E", "B", "Rho_1_3", "N_2" }); + + std::vector addresses; + for (auto i = 0; i < 3; ++i) { + field_names.push_back(writer.fieldWriters()[0].name(i)); + addresses.push_back(i); + } + writer.beginWriting(WriteMode::Fields, 10, 123.0); + writer.writeField(field_names, field, addresses); + writer.endWriting(WriteMode::Fields); + + writer.beginWriting(WriteMode::Fields, 20, 123.4); + writer.writeField(field_names, field, addresses); + writer.endWriting(WriteMode::Fields); } - writer.beginWriting(0, 0.0); - writer.writeField(names, field, addresses); - writer.endWriting(); - writer.beginWriting(1, 0.1); - writer.writeField(names, field, addresses); - writer.endWriting(); + adios.FlushAll(); { // read - adios2::ADIOS adios; - adios2::IO io = adios.DeclareIO("read-test"); + adios2::IO io = adios.DeclareIO("read-test"); io.SetEngine("hdf5"); adios2::Engine reader = io.Open("test.h5", adios2::Mode::Read); + const auto layoutRight = io.InquireAttribute("LayoutRight").Data()[0] == + 1; - std::size_t step { 0 }; - long double time { 0.0 }; - reader.Get(io.InquireVariable("Step"), step); - reader.Get(io.InquireVariable("Time"), time); - raise::ErrorIf(step != 0, "Step is not 0", HERE); - raise::ErrorIf(time != 0.0, "Time is not 0.0", HERE); + raise::ErrorIf(io.InquireAttribute("NGhosts").Data()[0] != 0, + "NGhosts is not correct", + HERE); + raise::ErrorIf(io.InquireAttribute("Dimension").Data()[0] != 3, + "Dimension is not correct", + HERE); for (std::size_t step = 0; reader.BeginStep() == adios2::StepStatus::OK; ++step) { - std::size_t step_read; - adios2::Variable stepVar = io.InquireVariable( - "Step"); - reader.Get(stepVar, step_read); - + std::size_t step_read; long double time_read; - reader.Get(io.InquireVariable("Time"), time_read); - raise::ErrorIf(step_read != step, "Step is not correct", HERE); - raise::ErrorIf((float)time_read != (float)step / 10.0f, + + reader.Get(io.InquireVariable("Step"), + &step_read, + adios2::Mode::Sync); + reader.Get(io.InquireVariable("Time"), + &time_read, + adios2::Mode::Sync); + raise::ErrorIf(step_read != (step + 1) * 10, "Step is not correct", HERE); + raise::ErrorIf((float)time_read != 123 + (float)step * 0.4f, "Time is not correct", HERE); - for (const auto& name : names) { - auto data = io.InquireVariable(name); - raise::ErrorIf(data.Shape().size() != 3, - fmt::format("%s is not 3D", name.c_str()), - HERE); - - auto dims = data.Shape(); - std::size_t nx1 = dims[0]; - std::size_t nx2 = dims[1]; - std::size_t nx3 = dims[2]; - raise::ErrorIf((nx1 != 10) || (nx2 != 10) || (nx3 != 10), - fmt::format("%s is not 10x10x10", name.c_str()), - HERE); + array_t field_read {}; + + int cntr = 0; + for (const auto& name : field_names) { + auto fieldVar = io.InquireVariable(name); + if (fieldVar) { + raise::ErrorIf(fieldVar.Shape().size() != 3, + fmt::format("%s is not 3D", name.c_str()), + HERE); + + auto dims = fieldVar.Shape(); + std::size_t nx1_r = dims[0]; + std::size_t nx2_r = dims[1]; + std::size_t nx3_r = dims[2]; + if (!layoutRight) { + std::swap(nx1_r, nx3_r); + } + raise::ErrorIf((nx1_r != CEILDIV(nx1, dwn1)) || + (nx2_r != CEILDIV(nx2, dwn2)) || + (nx3_r != CEILDIV(nx3, dwn3)), + fmt::format("%s = %ldx%ldx%ld is not %dx%dx%d", + name.c_str(), + nx1_r, + nx2_r, + nx3_r, + CEILDIV(nx1, dwn1), + CEILDIV(nx2, dwn2), + CEILDIV(nx3, dwn3)), + HERE); + + if (!layoutRight) { + std::swap(nx1_r, nx3_r); + } + fieldVar.SetSelection( + adios2::Box({ 0, 0, 0 }, { nx1_r, nx2_r, nx3_r })); + if (!layoutRight) { + std::swap(nx1_r, nx3_r); + } + field_read = array_t(name, nx1_r, nx2_r, nx3_r); + auto field_read_h = Kokkos::create_mirror_view(field_read); + reader.Get(fieldVar, field_read_h.data(), adios2::Mode::Sync); + Kokkos::deep_copy(field_read, field_read_h); + + Kokkos::parallel_for( + "check", + CreateRangePolicy({ 0, 0, 0 }, { nx1_r, nx2_r, nx3_r }), + Lambda(index_t i1, index_t i2, index_t i3) { + if (not cmp::AlmostEqual(field_read(i1, i2, i3), + field(i1 * dwn1 + i1min, + i2 * dwn2 + i2min, + i3 * dwn3 + i3min, + cntr))) { + printf("\n:::::::::::::::\nfield_read(%ld, %ld, %ld) = %f != " + "field(%ld, %ld, %ld, %d) = %f\n:::::::::::::::\n", + i1, + i2, + i3, + field_read(i1, i2, i3), + i1 * dwn1 + i1min, + i2 * dwn2 + i2min, + i3 * dwn3 + i3min, + cntr, + field(i1 * dwn1 + i1min, + i2 * dwn2 + i2min, + i3 * dwn3 + i3min, + cntr)); + raise::KernelError(HERE, "Field is not read correctly"); + } + }); + } else { + raise::Error("Field not found", HERE); + } + ++cntr; } reader.EndStep(); } @@ -121,3 +214,5 @@ auto main(int argc, char* argv[]) -> int { Kokkos::finalize(); return 0; } + +#undef CEILDIV diff --git a/src/output/writer.cpp b/src/output/writer.cpp index 3d526b306..95965c864 100644 --- a/src/output/writer.cpp +++ b/src/output/writer.cpp @@ -18,6 +18,7 @@ #include #endif +#include #include #include @@ -25,9 +26,11 @@ namespace out { void Writer::init(adios2::ADIOS* ptr_adios, const std::string& engine, - const std::string& title) { - m_engine = engine; - p_adios = ptr_adios; + const std::string& title, + bool use_separate_files) { + m_separate_files = use_separate_files; + m_engine = engine; + p_adios = ptr_adios; raise::ErrorIf(p_adios == nullptr, "ADIOS pointer is null", HERE); @@ -36,7 +39,7 @@ namespace out { m_io.DefineVariable("Step"); m_io.DefineVariable("Time"); - m_fname = title + (m_engine == "hdf5" ? ".h5" : ".bp"); + m_fname = title; } void Writer::addTracker(const std::string& type, @@ -60,25 +63,45 @@ namespace out { m_mode = mode; } - void Writer::defineMeshLayout(const std::vector& glob_shape, - const std::vector& loc_corner, - const std::vector& loc_shape, - bool incl_ghosts, - Coord coords) { - m_flds_ghosts = incl_ghosts; + void Writer::defineMeshLayout(const std::vector& glob_shape, + const std::vector& loc_corner, + const std::vector& loc_shape, + const std::vector& dwn, + bool incl_ghosts, + Coord coords) { + m_flds_ghosts = incl_ghosts; + m_dwn = dwn; + m_flds_g_shape = glob_shape; m_flds_l_corner = loc_corner; m_flds_l_shape = loc_shape; + for (std::size_t i { 0 }; i < glob_shape.size(); ++i) { + raise::ErrorIf(dwn[i] != 1 && incl_ghosts, + "Downsampling with ghosts not supported", + HERE); + + const double g = glob_shape[i]; + const double d = m_dwn[i]; + const double l = loc_corner[i]; + const double n = loc_shape[i]; + const double f = math::ceil(l / d) * d - l; + m_flds_g_shape_dwn.push_back(static_cast(math::ceil(g / d))); + m_flds_l_corner_dwn.push_back(static_cast(math::ceil(l / d))); + m_flds_l_first.push_back(static_cast(f)); + m_flds_l_shape_dwn.push_back( + static_cast(math::ceil((n - f) / d))); + } + m_io.DefineAttribute("NGhosts", incl_ghosts ? N_GHOSTS : 0); m_io.DefineAttribute("Dimension", m_flds_g_shape.size()); m_io.DefineAttribute("Coordinates", std::string(coords.to_string())); for (std::size_t i { 0 }; i < m_flds_g_shape.size(); ++i) { // cell-centers - adios2::Dims g_shape = { m_flds_g_shape[i] }; - adios2::Dims l_corner = { m_flds_l_corner[i] }; - adios2::Dims l_shape = { m_flds_l_shape[i] }; + adios2::Dims g_shape = { m_flds_g_shape_dwn[i] }; + adios2::Dims l_corner = { m_flds_l_corner_dwn[i] }; + adios2::Dims l_shape = { m_flds_l_shape_dwn[i] }; m_io.DefineVariable("X" + std::to_string(i + 1), g_shape, l_corner, @@ -87,8 +110,8 @@ namespace out { // cell-edges const auto is_last = (m_flds_l_corner[i] + m_flds_l_shape[i] == m_flds_g_shape[i]); - adios2::Dims g_shape1 = { m_flds_g_shape[i] + 1 }; - adios2::Dims l_shape1 = { m_flds_l_shape[i] + (is_last ? 1 : 0) }; + adios2::Dims g_shape1 = { m_flds_g_shape_dwn[i] + 1 }; + adios2::Dims l_shape1 = { m_flds_l_shape_dwn[i] + (is_last ? 1 : 0) }; m_io.DefineVariable("X" + std::to_string(i + 1) + "e", g_shape1, l_corner, @@ -100,9 +123,9 @@ namespace out { Kokkos::LayoutRight>::value) { m_io.DefineAttribute("LayoutRight", 1); } else { - std::reverse(m_flds_g_shape.begin(), m_flds_g_shape.end()); - std::reverse(m_flds_l_corner.begin(), m_flds_l_corner.end()); - std::reverse(m_flds_l_shape.begin(), m_flds_l_shape.end()); + std::reverse(m_flds_g_shape_dwn.begin(), m_flds_g_shape_dwn.end()); + std::reverse(m_flds_l_corner_dwn.begin(), m_flds_l_corner_dwn.end()); + std::reverse(m_flds_l_shape_dwn.begin(), m_flds_l_shape_dwn.end()); m_io.DefineAttribute("LayoutRight", 0); } } @@ -110,8 +133,9 @@ namespace out { void Writer::defineFieldOutputs(const SimEngine& S, const std::vector& flds_out) { m_flds_writers.clear(); - raise::ErrorIf((m_flds_g_shape.size() == 0) || (m_flds_l_corner.size() == 0) || - (m_flds_l_shape.size() == 0), + raise::ErrorIf((m_flds_g_shape_dwn.size() == 0) || + (m_flds_l_corner_dwn.size() == 0) || + (m_flds_l_shape_dwn.size() == 0), "Mesh layout must be defined before field output", HERE); for (const auto& fld : flds_out) { @@ -119,17 +143,19 @@ namespace out { } for (const auto& fld : m_flds_writers) { if (fld.comp.size() == 0) { + // scalar m_io.DefineVariable(fld.name(), - m_flds_g_shape, - m_flds_l_corner, - m_flds_l_shape, + m_flds_g_shape_dwn, + m_flds_l_corner_dwn, + m_flds_l_shape_dwn, adios2::ConstantDims); } else { + // vector or tensor for (std::size_t i { 0 }; i < fld.comp.size(); ++i) { m_io.DefineVariable(fld.name(i), - m_flds_g_shape, - m_flds_l_corner, - m_flds_l_shape, + m_flds_g_shape_dwn, + m_flds_l_corner_dwn, + m_flds_l_shape_dwn, adios2::ConstantDims); } } @@ -178,48 +204,123 @@ namespace out { } template - void WriteField(adios2::IO& io, - adios2::Engine& writer, - const std::string& varname, - const ndfield_t& field, - std::size_t comp, - bool ghosts) { - auto var = io.InquireVariable(varname); - const auto gh_zones = ghosts ? 0 : N_GHOSTS; + void WriteField(adios2::IO& io, + adios2::Engine& writer, + const std::string& varname, + const ndfield_t& field, + std::size_t comp, + std::vector dwn, + std::vector first_cell, + bool ghosts) { + // when dwn != 1 in any direction, it is assumed that ghosts == false + auto var = io.InquireVariable(varname); + const auto gh_zones = ghosts ? 0 : N_GHOSTS; + ndarray_t output_field {}; if constexpr (D == Dim::_1D) { - auto slice_i1 = range_tuple_t(gh_zones, field.extent(0) - gh_zones); - auto slice = Kokkos::subview(field, slice_i1, comp); - auto output_field = array_t("output_field", slice.extent(0)); - Kokkos::deep_copy(output_field, slice); - auto output_field_host = Kokkos::create_mirror_view(output_field); - Kokkos::deep_copy(output_field_host, output_field); - writer.Put(var, output_field_host); + if (ghosts || dwn[0] == 1) { + auto slice_i1 = range_tuple_t(gh_zones, field.extent(0) - gh_zones); + auto slice = Kokkos::subview(field, slice_i1, comp); + output_field = array_t { "output_field", slice.extent(0) }; + Kokkos::deep_copy(output_field, slice); + } else { + + const auto dwn1 = dwn[0]; + const double first_cell1_d = first_cell[0]; + const double nx1_full = field.extent(0) - 2 * N_GHOSTS; + const auto first_cell1 = first_cell[0]; + + const auto nx1_dwn = static_cast( + math::ceil((nx1_full - first_cell1_d) / dwn1)); + + output_field = array_t { "output_field", nx1_dwn }; + Kokkos::parallel_for( + "outputField", + nx1_dwn, + Lambda(index_t i1) { + output_field(i1) = field(first_cell1 + i1 * dwn1 + N_GHOSTS, comp); + }); + } } else if constexpr (D == Dim::_2D) { - auto slice_i1 = range_tuple_t(gh_zones, field.extent(0) - gh_zones); - auto slice_i2 = range_tuple_t(gh_zones, field.extent(1) - gh_zones); - auto slice = Kokkos::subview(field, slice_i1, slice_i2, comp); - auto output_field = array_t("output_field", + if (ghosts || (dwn[0] == 1 && dwn[1] == 1)) { + auto slice_i1 = range_tuple_t(gh_zones, field.extent(0) - gh_zones); + auto slice_i2 = range_tuple_t(gh_zones, field.extent(1) - gh_zones); + auto slice = Kokkos::subview(field, slice_i1, slice_i2, comp); + output_field = array_t { "output_field", slice.extent(0), - slice.extent(1)); - Kokkos::deep_copy(output_field, slice); - auto output_field_host = Kokkos::create_mirror_view(output_field); - Kokkos::deep_copy(output_field_host, output_field); - writer.Put(var, output_field_host); + slice.extent(1) }; + Kokkos::deep_copy(output_field, slice); + } else { + const auto dwn1 = dwn[0]; + const auto dwn2 = dwn[1]; + const double first_cell1_d = first_cell[0]; + const double first_cell2_d = first_cell[1]; + const double nx1_full = field.extent(0) - 2 * N_GHOSTS; + const double nx2_full = field.extent(1) - 2 * N_GHOSTS; + const auto first_cell1 = first_cell[0]; + const auto first_cell2 = first_cell[1]; + + const auto nx1_dwn = static_cast( + math::ceil((nx1_full - first_cell1_d) / dwn1)); + const auto nx2_dwn = static_cast( + math::ceil((nx2_full - first_cell2_d) / dwn2)); + output_field = array_t { "output_field", nx1_dwn, nx2_dwn }; + Kokkos::parallel_for( + "outputField", + CreateRangePolicy({ 0, 0 }, { nx1_dwn, nx2_dwn }), + Lambda(index_t i1, index_t i2) { + output_field(i1, i2) = field(first_cell1 + i1 * dwn1 + N_GHOSTS, + first_cell2 + i2 * dwn2 + N_GHOSTS, + comp); + }); + } } else if constexpr (D == Dim::_3D) { - auto slice_i1 = range_tuple_t(gh_zones, field.extent(0) - gh_zones); - auto slice_i2 = range_tuple_t(gh_zones, field.extent(1) - gh_zones); - auto slice_i3 = range_tuple_t(gh_zones, field.extent(2) - gh_zones); - auto slice = Kokkos::subview(field, slice_i1, slice_i2, slice_i3, comp); - auto output_field = array_t("output_field", - slice.extent(0), - slice.extent(1), - slice.extent(2)); - Kokkos::deep_copy(output_field, slice); - auto output_field_host = Kokkos::create_mirror_view(output_field); - Kokkos::deep_copy(output_field_host, output_field); - writer.Put(var, output_field_host); + if (ghosts || (dwn[0] == 1 && dwn[1] == 1 && dwn[2] == 1)) { + auto slice_i1 = range_tuple_t(gh_zones, field.extent(0) - gh_zones); + auto slice_i2 = range_tuple_t(gh_zones, field.extent(1) - gh_zones); + auto slice_i3 = range_tuple_t(gh_zones, field.extent(2) - gh_zones); + auto slice = Kokkos::subview(field, slice_i1, slice_i2, slice_i3, comp); + output_field = array_t { "output_field", + slice.extent(0), + slice.extent(1), + slice.extent(2) }; + Kokkos::deep_copy(output_field, slice); + } else { + const auto dwn1 = dwn[0]; + const auto dwn2 = dwn[1]; + const auto dwn3 = dwn[2]; + const double first_cell1_d = first_cell[0]; + const double first_cell2_d = first_cell[1]; + const double first_cell3_d = first_cell[2]; + const double nx1_full = field.extent(0) - 2 * N_GHOSTS; + const double nx2_full = field.extent(1) - 2 * N_GHOSTS; + const double nx3_full = field.extent(2) - 2 * N_GHOSTS; + const auto first_cell1 = first_cell[0]; + const auto first_cell2 = first_cell[1]; + const auto first_cell3 = first_cell[2]; + + const auto nx1_dwn = static_cast( + math::ceil((nx1_full - first_cell1_d) / dwn1)); + const auto nx2_dwn = static_cast( + math::ceil((nx2_full - first_cell2_d) / dwn2)); + const auto nx3_dwn = static_cast( + math::ceil((nx3_full - first_cell3_d) / dwn3)); + + output_field = array_t { "output_field", nx1_dwn, nx2_dwn, nx3_dwn }; + Kokkos::parallel_for( + "outputField", + CreateRangePolicy({ 0, 0, 0 }, { nx1_dwn, nx2_dwn, nx3_dwn }), + Lambda(index_t i1, index_t i2, index_t i3) { + output_field(i1, i2, i3) = field(first_cell1 + i1 * dwn1 + N_GHOSTS, + first_cell2 + i2 * dwn2 + N_GHOSTS, + first_cell3 + i3 * dwn3 + N_GHOSTS, + comp); + }); + } } + auto output_field_h = Kokkos::create_mirror_view(output_field); + Kokkos::deep_copy(output_field_h, output_field); + writer.Put(var, output_field_h); } template @@ -233,7 +334,14 @@ namespace out { "# of names != # of addresses ", HERE); for (std::size_t i { 0 }; i < addresses.size(); ++i) { - WriteField(m_io, m_writer, names[i], fld, addresses[i], m_flds_ghosts); + WriteField(m_io, + m_writer, + names[i], + fld, + addresses[i], + m_dwn, + m_flds_l_first, + m_flds_ghosts); } } @@ -307,33 +415,75 @@ namespace out { m_writer.Put(vare, xe_h); } - void Writer::beginWriting(std::size_t tstep, long double time) { + void Writer::beginWriting(WriteModeTags write_mode, + std::size_t tstep, + long double time) { + raise::ErrorIf(write_mode == WriteMode::None, "None is not a valid mode", HERE); raise::ErrorIf(p_adios == nullptr, "ADIOS pointer is null", HERE); - p_adios->ExitComputationBlock(); - if (m_writing_mode) { + if (m_active_mode != WriteMode::None) { raise::Fatal("Already writing", HERE); } - m_writing_mode = true; + m_active_mode = write_mode; try { - m_writer = m_io.Open(m_fname, m_mode); + std::string filename; + const std::string ext = m_engine == "hdf5" ? "h5" : "bp"; + if (m_separate_files) { + std::string mode_str; + if (m_active_mode == WriteMode::Fields) { + mode_str = "fields"; + } else if (m_active_mode == WriteMode::Particles) { + mode_str = "particles"; + } else if (m_active_mode == WriteMode::Spectra) { + mode_str = "spectra"; + } else { + raise::Fatal("Unknown write mode", HERE); + } + CallOnce( + [](auto& main_path, auto& mode_path) { + const std::filesystem::path main { main_path }; + const std::filesystem::path mode { mode_path }; + if (!std::filesystem::exists(main_path)) { + std::filesystem::create_directory(main_path); + } + if (!std::filesystem::exists(main / mode)) { + std::filesystem::create_directory(main / mode); + } + }, + m_fname, + mode_str); + filename = fmt::format("%s/%s/%s.%08lu.%s", + m_fname.c_str(), + mode_str.c_str(), + mode_str.c_str(), + tstep, + ext.c_str()); + m_mode = adios2::Mode::Write; + } else { + filename = fmt::format("%s.%s", m_fname.c_str(), ext.c_str()); + m_mode = std::filesystem::exists(filename) ? adios2::Mode::Append + : adios2::Mode::Write; + } + m_writer = m_io.Open(filename, m_mode); } catch (std::exception& e) { raise::Fatal(e.what(), HERE); } - m_mode = adios2::Mode::Append; m_writer.BeginStep(); m_writer.Put(m_io.InquireVariable("Step"), &tstep); m_writer.Put(m_io.InquireVariable("Time"), &time); } - void Writer::endWriting() { + void Writer::endWriting(WriteModeTags write_mode) { + raise::ErrorIf(write_mode == WriteMode::None, "None is not a valid mode", HERE); raise::ErrorIf(p_adios == nullptr, "ADIOS pointer is null", HERE); - if (!m_writing_mode) { + if (m_active_mode == WriteMode::None) { raise::Fatal("Not writing", HERE); } - m_writing_mode = false; + if (m_active_mode != write_mode) { + raise::Fatal("Writing mode mismatch", HERE); + } + m_active_mode = WriteMode::None; m_writer.EndStep(); m_writer.Close(); - p_adios->EnterComputationBlock(); } template void Writer::writeField(const std::vector&, @@ -360,36 +510,48 @@ namespace out { const std::string&, const ndfield_t&, std::size_t, + std::vector, + std::vector, bool); template void WriteField(adios2::IO&, adios2::Engine&, const std::string&, const ndfield_t&, std::size_t, + std::vector, + std::vector, bool); template void WriteField(adios2::IO&, adios2::Engine&, const std::string&, const ndfield_t&, std::size_t, + std::vector, + std::vector, bool); template void WriteField(adios2::IO&, adios2::Engine&, const std::string&, const ndfield_t&, std::size_t, + std::vector, + std::vector, bool); template void WriteField(adios2::IO&, adios2::Engine&, const std::string&, const ndfield_t&, std::size_t, + std::vector, + std::vector, bool); template void WriteField(adios2::IO&, adios2::Engine&, const std::string&, const ndfield_t&, std::size_t, + std::vector, + std::vector, bool); } // namespace out diff --git a/src/output/writer.h b/src/output/writer.h index ba24a3d65..a8abf4b12 100644 --- a/src/output/writer.h +++ b/src/output/writer.h @@ -36,15 +36,28 @@ namespace out { adios2::Engine m_writer; adios2::Mode m_mode { adios2::Mode::Write }; + bool m_separate_files; + // global shape of the fields array to output - adios2::Dims m_flds_g_shape; + std::vector m_flds_g_shape; // local corner of the fields array to output - adios2::Dims m_flds_l_corner; + std::vector m_flds_l_corner; // local shape of the fields array to output - adios2::Dims m_flds_l_shape; - bool m_flds_ghosts; - std::string m_engine; - std::string m_fname; + std::vector m_flds_l_shape; + + // downsampling factors for each dimension + std::vector m_dwn; + // starting cell in each dimension (not including ghosts) + std::vector m_flds_l_first; + + // same but downsampled + adios2::Dims m_flds_g_shape_dwn; + adios2::Dims m_flds_l_corner_dwn; + adios2::Dims m_flds_l_shape_dwn; + + bool m_flds_ghosts; + std::string m_engine; + std::string m_fname; std::map m_trackers; @@ -52,7 +65,7 @@ namespace out { std::vector m_prtl_writers; std::vector m_spectra_writers; - bool m_writing_mode { false }; + WriteModeTags m_active_mode { WriteMode::None }; public: Writer() {} @@ -61,7 +74,7 @@ namespace out { Writer(Writer&&) = default; - void init(adios2::ADIOS*, const std::string&, const std::string&); + void init(adios2::ADIOS*, const std::string&, const std::string&, bool); void setMode(adios2::Mode); @@ -73,7 +86,8 @@ namespace out { void defineMeshLayout(const std::vector&, const std::vector&, const std::vector&, - bool incl_ghosts, + const std::vector&, + bool, Coord); void defineFieldOutputs(const SimEngine&, const std::vector&); @@ -94,8 +108,8 @@ namespace out { void writeSpectrum(const array_t&, const std::string&); void writeSpectrumBins(const array_t&, const std::string&); - void beginWriting(std::size_t, long double); - void endWriting(); + void beginWriting(WriteModeTags, std::size_t, long double); + void endWriting(WriteModeTags); /* getters -------------------------------------------------------------- */ auto fname() const -> const std::string& {