diff --git a/CMakeLists.txt b/CMakeLists.txt index f83e6637c..06a7690d7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,6 +48,10 @@ set(mpi ${default_mpi} CACHE BOOL "Use MPI") +set(gpu_aware_mpi + ${default_gpu_aware_mpi} + CACHE BOOL "Enable GPU-aware MPI") + # -------------------------- Compilation settings -------------------------- # set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -57,8 +61,7 @@ if(${DEBUG} STREQUAL "OFF") set(CMAKE_BUILD_TYPE Release CACHE STRING "CMake build type") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG " - "-Wno-unused-local-typedefs -Wno-unknown-cuda-version") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG") else() set(CMAKE_BUILD_TYPE Debug @@ -89,6 +92,29 @@ include_directories(${plog_SRC}/include) # -------------------------------- Main code ------------------------------- # set_precision(${precision}) +if("${Kokkos_DEVICES}" MATCHES "CUDA") + add_compile_options("-D CUDA_ENABLED") + set(DEVICE_ENABLED ON) + add_compile_options("-D DEVICE_ENABLED") +elseif("${Kokkos_DEVICES}" MATCHES "HIP") + add_compile_options("-D HIP_ENABLED") + set(DEVICE_ENABLED ON) + add_compile_options("-D DEVICE_ENABLED") +elseif("${Kokkos_DEVICES}" MATCHES "SYCL") + add_compile_options("-D SYCL_ENABLED") + set(DEVICE_ENABLED ON) + add_compile_options("-D DEVICE_ENABLED") +else() + set(DEVICE_ENABLED OFF) +endif() + +if(("${Kokkos_DEVICES}" MATCHES "CUDA") + OR ("${Kokkos_DEVICES}" MATCHES "HIP") + OR ("${Kokkos_DEVICES}" MATCHES "SYCL")) + set(DEVICE_ENABLED ON) +else() + set(DEVICE_ENABLED OFF) +endif() # MPI if(${mpi}) @@ -96,6 +122,15 @@ if(${mpi}) include_directories(${MPI_CXX_INCLUDE_PATH}) add_compile_options("-D MPI_ENABLED") set(DEPENDENCIES ${DEPENDENCIES} MPI::MPI_CXX) + if(${DEVICE_ENABLED}) + if(${gpu_aware_mpi}) + add_compile_options("-D GPU_AWARE_MPI") + endif() + else() + set(gpu_aware_mpi + OFF + CACHE BOOL "Use explicit copy when using MPI + GPU") + endif() endif() # Output diff --git a/README.md b/README.md index 7287d52db..915a5b358 100644 --- a/README.md +++ b/README.md @@ -10,33 +10,20 @@ Our [detailed documentation](https://entity-toolkit.github.io/) includes everyth [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) -## Lead developers - -β˜• __Hayk Hakobyan__ {[@haykh](https://github.com/haykh)} - -πŸ₯” __Jens Mahlmann__ {[@jmahlmann](https://github.com/jmahlmann)} - -πŸ’β€β™‚οΈ __Alexander Chernoglazov__ {[@SChernoglazov](https://github.com/SChernoglazov)} - -πŸ§‹ __Alisa Galishnikova__ {[@alisagk](https://github.com/alisagk)} - -🐬 __Sasha Philippov__ {[@sashaph](https://github.com/sashaph)} - ## Contributors (alphabetical) -🎸 __Ludwig BΓΆss__ {[@LudwigBoess](https://github.com/LudwigBoess): PIC, framework} - -πŸ‘€ __Yangyang Cai__ {[@StaticObserver](https://github.com/StaticObserver): GRPIC} - -🍡 __Benjamin Crinquand__ {[@bcrinquand](https://github.com/bcrinquand): GRPIC, cubed-sphere} - -πŸš‚ __Evgeny Gorbunov__ {[@Alcauchy](https://github.com/Alcauchy): PIC, framework} - -:radio: __Siddhant Solanki__ {[@sidruns30](https://github.com/sidruns30): framework} - -🀷 __Arno Vanthieghem__ {[@vanthieg](https://github.com/vanthieg): framework, PIC} - -😺 __Muni Zhou__ {[@munizhou](https://github.com/munizhou): PIC} +* :guitar: Ludwig BΓΆss {[@LudwigBoess](https://github.com/LudwigBoess)} +* :eyes: Yangyang Cai {[@StaticObserver](https://github.com/StaticObserver)} +* :person_tipping_hand: Alexander Chernoglazov {[@SChernoglazov](https://github.com/SChernoglazov)} +* :tea: Benjamin Crinquand {[@bcrinquand](https://github.com/bcrinquand)} +* :bubble_tea: Alisa Galishnikova {[@alisagk](https://github.com/alisagk)} +* :locomotive: Evgeny Gorbunov {[@Alcauchy](https://github.com/Alcauchy)} +* :coffee: Hayk Hakobyan {[@haykh](https://github.com/haykh)} +* :potato: Jens Mahlmann {[@jmahlmann](https://github.com/jmahlmann)} +* :dolphin: Sasha Philippov {[@sashaph](https://github.com/sashaph)} +* :radio: Siddhant Solanki {[@sidruns30](https://github.com/sidruns30)} +* :shrug: Arno Vanthieghem {[@vanthieg](https://github.com/vanthieg)} +* :cat: Muni Zhou {[@munizhou](https://github.com/munizhou)} ## Branch policy diff --git a/cmake/defaults.cmake b/cmake/defaults.cmake index 30e605a5c..2bfa9a61c 100644 --- a/cmake/defaults.cmake +++ b/cmake/defaults.cmake @@ -63,4 +63,26 @@ else() CACHE INTERNAL "Default flag for MPI") endif() +if(DEFINED ENV{Entity_MPI_DEVICE_COPY}) + set(default_mpi_device_copy + $ENV{Entity_MPI_DEVICE_COPY} + CACHE INTERNAL "Default flag for copying from device to host for MPI") +else() + set(default_mpi_device_copy + OFF + CACHE INTERNAL "Default flag for copying from device to host for MPI") +endif() + set_property(CACHE default_mpi PROPERTY TYPE BOOL) + +if(DEFINED ENV{Entity_ENABLE_GPU_AWARE_MPI}) + set(default_gpu_aware_mpi + $ENV{Entity_ENABLE_GPU_AWARE_MPI} + CACHE INTERNAL "Default flag for GPU-aware MPI") +else() + set(default_gpu_aware_mpi + ON + CACHE INTERNAL "Default flag for GPU-aware MPI") +endif() + +set_property(CACHE default_gpu_aware_mpi PROPERTY TYPE BOOL) diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index a21ea00e8..1780bf97e 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -58,7 +58,7 @@ function(find_or_fetch_dependency package_name header_only mode) FetchContent_Declare( ${package_name} GIT_REPOSITORY ${${package_name}_REPOSITORY} - GIT_TAG 4.5.01) + GIT_TAG 4.6.01) else() FetchContent_Declare(${package_name} GIT_REPOSITORY ${${package_name}_REPOSITORY}) diff --git a/cmake/report.cmake b/cmake/report.cmake index 5a38b0dd5..b0e299d87 100644 --- a/cmake/report.cmake +++ b/cmake/report.cmake @@ -36,7 +36,7 @@ printchoices( ${default_precision} "${Blue}" PRECISION_REPORT - 36) + 46) printchoices( "Output" "output" @@ -45,7 +45,7 @@ printchoices( ${default_output} "${Green}" OUTPUT_REPORT - 36) + 46) printchoices( "MPI" "mpi" @@ -54,7 +54,18 @@ printchoices( OFF "${Green}" MPI_REPORT - 36) + 46) +if(${mpi} AND ${DEVICE_ENABLED}) + printchoices( + "GPU-aware MPI" + "gpu_aware_mpi" + "${ON_OFF_VALUES}" + ${gpu_aware_mpi} + OFF + "${Green}" + GPU_AWARE_MPI_REPORT + 46) +endif() printchoices( "Debug mode" "DEBUG" @@ -63,7 +74,7 @@ printchoices( OFF "${Green}" DEBUG_REPORT - 36) + 46) if(NOT ${PROJECT_VERSION_TWEAK} EQUAL 0) set(VERSION_SYMBOL "v${PROJECT_VERSION_MAJOR}." "${PROJECT_VERSION_MINOR}.") @@ -111,13 +122,23 @@ string(REPLACE ";" "+" Kokkos_DEVICES "${Kokkos_DEVICES}") string( APPEND REPORT_TEXT - " - ARCH [${Magenta}Kokkos_ARCH_***${ColorReset}]: ${Kokkos_ARCH}" + " - ARCH [${Magenta}Kokkos_ARCH_***${ColorReset}]: " + "${Kokkos_ARCH}" "\n" - " - DEVICES [${Magenta}Kokkos_ENABLE_***${ColorReset}]: ${Kokkos_DEVICES}" + " - DEVICES [${Magenta}Kokkos_ENABLE_***${ColorReset}]: " + "${Kokkos_DEVICES}" "\n" " " ${MPI_REPORT} - "\n" + "\n") + +if(${mpi} AND ${DEVICE_ENABLED}) + string(APPEND REPORT_TEXT " " ${GPU_AWARE_MPI_REPORT} "\n") +endif() + +string( + APPEND + REPORT_TEXT " " ${DEBUG_REPORT} "\n" @@ -155,7 +176,6 @@ if(${Kokkos_DEVICES} MATCHES "CUDA") COMMAND bash -c ${cmd} OUTPUT_VARIABLE CUDACOMP_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) - message(STATUS "CUDACOMP: ${CUDACOMP_VERSION}") string( APPEND REPORT_TEXT diff --git a/cmake/styling.cmake b/cmake/styling.cmake index 878cb44a4..5f1e4a7ad 100644 --- a/cmake/styling.cmake +++ b/cmake/styling.cmake @@ -86,8 +86,6 @@ function( if(${Padding} EQUAL 0) list(LENGTH "${Choices}" nchoices) math(EXPR lastchoice "${nchoices} - 1") - set(ncols 4) - math(EXPR lastcol "${ncols} - 1") set(longest 0) foreach(ch IN LISTS Choices) @@ -97,6 +95,13 @@ function( endif() endforeach() + if(longest GREATER 20) + set(ncols 3) + else() + set(ncols 4) + endif() + math(EXPR lastcol "${ncols} - 1") + set(counter 0) foreach(ch IN LISTS Choices) if(NOT ${Value} STREQUAL "") diff --git a/cmake/tests.cmake b/cmake/tests.cmake index 0e108d365..189cc2cc4 100644 --- a/cmake/tests.cmake +++ b/cmake/tests.cmake @@ -8,8 +8,8 @@ add_subdirectory(${SRC_DIR}/metrics ${CMAKE_CURRENT_BINARY_DIR}/metrics) add_subdirectory(${SRC_DIR}/kernels ${CMAKE_CURRENT_BINARY_DIR}/kernels) add_subdirectory(${SRC_DIR}/archetypes ${CMAKE_CURRENT_BINARY_DIR}/archetypes) add_subdirectory(${SRC_DIR}/framework ${CMAKE_CURRENT_BINARY_DIR}/framework) +add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) if(${output}) - add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) add_subdirectory(${SRC_DIR}/checkpoint ${CMAKE_CURRENT_BINARY_DIR}/checkpoint) endif() @@ -25,8 +25,9 @@ elseif(${mpi} AND ${output}) list(APPEND TEST_DIRECTORIES framework) endif() +list(APPEND TEST_DIRECTORIES output) + if(${output}) - list(APPEND TEST_DIRECTORIES output) list(APPEND TEST_DIRECTORIES checkpoint) endif() diff --git a/dev/nix/kokkos.nix b/dev/nix/kokkos.nix index 6271604c5..c83a489e5 100644 --- a/dev/nix/kokkos.nix +++ b/dev/nix/kokkos.nix @@ -1,12 +1,13 @@ { pkgs ? import { }, + stdenv, arch, gpu, }: let name = "kokkos"; - version = "4.5.01"; + pversion = "4.6.01"; compilerPkgs = { "HIP" = with pkgs.rocmPackages; [ rocm-core @@ -16,15 +17,26 @@ let rocminfo rocm-smi ]; + "CUDA" = with pkgs.cudaPackages; [ + cudatoolkit + cuda_cudart + ]; "NONE" = [ pkgs.gcc13 ]; }; - cmakeFlags = { + cmakeExtraFlags = { "HIP" = [ + "-D Kokkos_ENABLE_HIP=ON" + "-D Kokkos_ARCH_${getArch { }}=ON" "-D CMAKE_C_COMPILER=hipcc" "-D CMAKE_CXX_COMPILER=hipcc" ]; + "CUDA" = [ + "-D Kokkos_ENABLE_CUDA=ON" + "-D Kokkos_ARCH_${getArch { }}=ON" + "-D CMAKE_CXX_COMPILER=$WRAPPER_PATH" + ]; "NONE" = [ ]; }; getArch = @@ -35,13 +47,13 @@ let arch; in -pkgs.stdenv.mkDerivation { +pkgs.stdenv.mkDerivation rec { pname = "${name}"; - version = "${version}"; + version = "${pversion}"; src = pkgs.fetchgit { url = "https://github.com/kokkos/kokkos/"; - rev = "${version}"; - sha256 = "sha256-cI2p+6J+8BRV5fXTDxxHTfh6P5PeeLUiF73o5zVysHQ="; + rev = "${pversion}"; + sha256 = "sha256-+yszUbdHqhIkJZiGLZ9Ln4DYUosuJWKhO8FkbrY0/tY="; }; nativeBuildInputs = with pkgs; [ @@ -50,14 +62,42 @@ pkgs.stdenv.mkDerivation { propagatedBuildInputs = compilerPkgs.${gpu}; - cmakeFlags = [ - "-D CMAKE_CXX_STANDARD=17" - "-D CMAKE_CXX_EXTENSIONS=OFF" - "-D CMAKE_POSITION_INDEPENDENT_CODE=TRUE" - "-D Kokkos_ARCH_${getArch { }}=ON" - (if gpu != "none" then "-D Kokkos_ENABLE_${gpu}=ON" else "") - "-D CMAKE_BUILD_TYPE=Release" - ] ++ cmakeFlags.${gpu}; + patchPhase = + if gpu == "CUDA" then + '' + export WRAPPER_PATH="$(mktemp -d)/nvcc_wrapper" + cp ${src}/bin/nvcc_wrapper $WRAPPER_PATH + substituteInPlace $WRAPPER_PATH --replace-fail "#!/usr/bin/env bash" "#!${stdenv.shell}" + chmod +x "$WRAPPER_PATH" + '' + else + ""; + + configurePhase = '' + cmake -B build -D CMAKE_BUILD_TYPE=Release \ + -D CMAKE_CXX_STANDARD=17 \ + -D CMAKE_CXX_EXTENSIONS=OFF \ + -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ + ${pkgs.lib.concatStringsSep " " cmakeExtraFlags.${gpu}} \ + -D CMAKE_INSTALL_PREFIX=$out + ''; + + buildPhase = '' + cmake --build build -j + ''; + + installPhase = '' + cmake --install build + ''; + + # cmakeFlags = [ + # "-D CMAKE_CXX_STANDARD=17" + # "-D CMAKE_CXX_EXTENSIONS=OFF" + # "-D CMAKE_POSITION_INDEPENDENT_CODE=TRUE" + # "-D Kokkos_ARCH_${getArch { }}=ON" + # (if gpu != "none" then "-D Kokkos_ENABLE_${gpu}=ON" else "") + # "-D CMAKE_BUILD_TYPE=Release" + # ] ++ (cmakeExtraFlags.${gpu} src); - enableParallelBuilding = true; + # enableParallelBuilding = true; } diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix index 01d80298b..be4b87bcb 100644 --- a/dev/nix/shell.nix +++ b/dev/nix/shell.nix @@ -1,5 +1,8 @@ { - pkgs ? import { }, + pkgs ? import { + config.allowUnfree = true; + config.cudaSupport = gpu == "CUDA"; + }, gpu ? "NONE", arch ? "NATIVE", hdf5 ? true, @@ -14,12 +17,13 @@ let kokkosPkg = ( pkgs.callPackage ./kokkos.nix { inherit pkgs; + stdenv = pkgs.stdenv; arch = archUpper; gpu = gpuUpper; } ); envVars = { - compiler = rec { + compiler = { NONE = { CXX = "g++"; CC = "gcc"; @@ -28,16 +32,7 @@ let CXX = "hipcc"; CC = "hipcc"; }; - CUDA = NONE; - }; - kokkos = { - HIP = { - Kokkos_ENABLE_HIP = "ON"; - }; - CUDA = { - Kokkos_ENABLE_CUDA = "ON"; - }; - NONE = { }; + CUDA = { }; }; }; in diff --git a/dev/runners/README.md b/dev/runners/README.md index 957898fa7..0aac76e2d 100644 --- a/dev/runners/README.md +++ b/dev/runners/README.md @@ -24,4 +24,5 @@ docker run -e TOKEN= -e LABEL=amd-gpu --device=/dev/kfd --device=/dev/dri ```sh docker build -t ghrunner:cpu -f Dockerfile.runner.cpu . +docker run -e TOKEN= -e LABEL=cpu -dt ghrunner:cpu ``` diff --git a/extern/Kokkos b/extern/Kokkos index 175257a51..1b1383c60 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit 175257a51ff29a0059ec48bcd233ee096b2c0438 +Subproject commit 1b1383c6001f3bfe9fe309ca923c2d786600cc79 diff --git a/input.example.toml b/input.example.toml index f519a2799..4a671fab6 100644 --- a/input.example.toml +++ b/input.example.toml @@ -107,16 +107,14 @@ particles = "" [grid.boundaries.match] - # Size of the matching layer for fields in physical (code) units: - # @type: float + # Size of the matching layer in each direction for fields in physical (code) units: + # @type: float or array of tuples # @default: 1% of the domain size (in shortest dimension) # @note: In spherical, this is the size of the layer in r from the outer wall - # @note: In cartesian, this is the same for all dimensions where applicable + # @example: ds = 1.5 (will set the same for all directions) + # @example: ds = [[1.5], [2.0, 1.0], [1.1]] (will duplicate 1.5 for +/- x1 and 1.1 for +/- x3) + # @example: ds = [[], [1.5], []] (will only set for x2) ds = "" - # Absorption coefficient for fields: - # @type: float: -inf < ... < inf, != 0 - # @default: 1.0 - coeff = "" [grid.boundaries.absorb] # Size of the absorption layer for particles in physical (code) units: @@ -434,6 +432,28 @@ # @default: false ghosts = "" + [output.stats] + # Toggle for the stats output: + # @type: bool + # @default: true + enable = "" + # Number of timesteps between stat outputs (overriden if `output.stats.interval_time != -1`): + # @type: unsigned int: > 0 + # @default: 100 + interval = "" + # Physical (code) time interval between stat outputs: + # @type: float + # @default: -1.0 (use `output.stats.interval_time`) + # @note: When `interval_time` < 0, the output is controlled by `interval`, otherwise by `interval_time` + interval_time = "" + # Field quantities to output: + # @type: array of strings + # @valid: fields: "B^2", "E^2", "ExB" + # @valid: moments: "N", "Charge", "Rho", "T00", "T0i", "Tij" + # @default: ["B^2", "E^2", "ExB", "Rho", "T00"] + # @note: Same notation as for `output.fields.quantities` + quantities = "" + [checkpoint] # Number of timesteps between checkpoints: # @type: unsigned int: > 0 diff --git a/minimal/CMakeLists.txt b/minimal/CMakeLists.txt new file mode 100644 index 000000000..b21dd0fec --- /dev/null +++ b/minimal/CMakeLists.txt @@ -0,0 +1,180 @@ +# cmake-lint: disable=C0103,C0111,E1120,R0913,R0915 +cmake_minimum_required(VERSION 3.16) +cmake_policy(SET CMP0110 NEW) + +set(PROJECT_NAME minimal-test) + +project(${PROJECT_NAME} LANGUAGES CXX C) +set(CMAKE_CXX_EXTENSIONS OFF) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if($DEBUG) + set(CMAKE_BUILD_TYPE + Release + CACHE STRING "CMake build type") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG") +else() + set(CMAKE_BUILD_TYPE + Debug + CACHE STRING "CMake build type") + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -DDEBUG -Wall -Wextra -Wno-unknown-pragmas") +endif() + +set(BUILD_TESTING + OFF + CACHE BOOL "Build tests") + +set(MODES + "KOKKOS;ADIOS2_NOMPI" + CACHE STRING "Build modes") + +function(find_kokkos) + find_package(Kokkos QUIET) + if(NOT Kokkos_FOUND) + include(FetchContent) + FetchContent_Declare( + Kokkos + GIT_REPOSITORY https://github.com/kokkos/kokkos.git + GIT_TAG 4.6.01) + FetchContent_MakeAvailable(Kokkos) + endif() + if(NOT DEFINED Kokkos_ARCH + OR Kokkos_ARCH STREQUAL "" + OR NOT DEFINED Kokkos_DEVICES + OR Kokkos_DEVICES STREQUAL "") + if(${Kokkos_FOUND}) + include(${Kokkos_DIR}/KokkosConfigCommon.cmake) + elseif(NOT ${Kokkos_BUILD_DIR} STREQUAL "") + include(${Kokkos_BUILD_DIR}/KokkosConfigCommon.cmake) + else() + message( + STATUS "${Red}Kokkos_DIR and Kokkos_BUILD_DIR not set.${ColorReset}") + endif() + endif() +endfunction() + +function(find_adios2) + find_package(adios2 QUIET) + if(NOT adios2_FOUND) + include(FetchContent) + FetchContent_Declare( + adios2 + GIT_REPOSITORY https://github.com/ornladios/ADIOS2.git + GIT_TAG 2.10.2) + FetchContent_MakeAvailable(adios2) + endif() +endfunction() + +if("KOKKOS" IN_LIST MODES) + set(libs "") + set(exec kokkos.xc) + set(src ${CMAKE_CURRENT_SOURCE_DIR}/kokkos.cpp) + + find_kokkos() + list(APPEND libs Kokkos::kokkos) + + add_executable(${exec} ${src}) + + target_link_libraries(${exec} ${libs}) +endif() + +if("ADIOS2_NOMPI" IN_LIST MODES) + set(libs stdc++fs) + set(exec adios2-nompi.xc) + set(src ${CMAKE_CURRENT_SOURCE_DIR}/adios2.cpp) + + find_kokkos() + find_adios2() + list(APPEND libs Kokkos::kokkos adios2::cxx11) + + add_executable(${exec} ${src}) + + target_link_libraries(${exec} ${libs}) +endif() + +if("ADIOS2_MPI" IN_LIST MODES) + set(libs stdc++fs) + set(exec adios2-mpi.xc) + set(src ${CMAKE_CURRENT_SOURCE_DIR}/adios2.cpp) + + find_package(MPI REQUIRED) + find_kokkos() + find_adios2() + list(APPEND libs MPI::MPI_CXX Kokkos::kokkos adios2::cxx11_mpi) + + add_executable(${exec} ${src}) + + target_include_directories(${exec} PUBLIC ${MPI_CXX_INCLUDE_PATH}) + target_compile_options(${exec} PUBLIC "-D MPI_ENABLED") + target_link_libraries(${exec} ${libs}) +endif() + +if("MPI" IN_LIST MODES) + set(libs "") + set(exec mpi-simple.xc) + set(src ${CMAKE_CURRENT_SOURCE_DIR}/mpi-simple.cpp) + + find_package(MPI REQUIRED) + find_kokkos() + list(APPEND libs MPI::MPI_CXX Kokkos::kokkos) + + add_executable(${exec} ${src}) + + target_include_directories(${exec} PUBLIC ${MPI_CXX_INCLUDE_PATH}) + target_link_libraries(${exec} ${libs}) + + set(GPU_AWARE_MPI + ON + CACHE BOOL "Enable GPU-aware MPI support") + + if(("${Kokkos_DEVICES}" MATCHES "CUDA") + OR ("${Kokkos_DEVICES}" MATCHES "HIP") + OR ("${Kokkos_DEVICES}" MATCHES "SYCL")) + set(DEVICE_ENABLED ON) + target_compile_options(${exec} PRIVATE -DDEVICE_ENABLED) + else() + set(DEVICE_ENABLED OFF) + endif() + + if(${GPU_AWARE_MPI}) + target_compile_options(${exec} PRIVATE -DGPU_AWARE_MPI) + endif() +endif() + +if("MPI_SIMPLE" IN_LIST MODES) + set(libs "") + set(exec mpi-simple.xc) + set(src ${CMAKE_CURRENT_SOURCE_DIR}/mpi-simple.cpp) + + find_package(MPI REQUIRED) + find_kokkos() + list(APPEND libs MPI::MPI_CXX Kokkos::kokkos) + + add_executable(${exec} ${src}) + + target_include_directories(${exec} PUBLIC ${MPI_CXX_INCLUDE_PATH}) + target_link_libraries(${exec} ${libs}) + + set(GPU_AWARE_MPI + ON + CACHE BOOL "Enable GPU-aware MPI support") + + if(("${Kokkos_DEVICES}" MATCHES "CUDA") + OR ("${Kokkos_DEVICES}" MATCHES "HIP") + OR ("${Kokkos_DEVICES}" MATCHES "SYCL")) + set(DEVICE_ENABLED ON) + target_compile_options(${exec} PRIVATE -DDEVICE_ENABLED) + else() + set(DEVICE_ENABLED OFF) + endif() + + if(${GPU_AWARE_MPI}) + target_compile_options(${exec} PRIVATE -DGPU_AWARE_MPI) + endif() +endif() + +message(STATUS "Build modes: ${MODES}") diff --git a/minimal/README.md b/minimal/README.md new file mode 100644 index 000000000..b7e5691a2 --- /dev/null +++ b/minimal/README.md @@ -0,0 +1,21 @@ +# Minimal third-party tests + +These minimal tests are designed to test the third-party libraries outside of the `Entity` scope. These tests will show whether there is an issue with the way third-party are installed (or the cluster is set up). + +To compile: + +```sh +cmake -B build -D MODES="MPI;MPI_SIMPLE;ADIOS2_NOMPI;ADIOS2_MPI" +cmake --build build -j +``` + +This will produce executables, one for each test, in the `build` directory. + +The `MODES` flag determines the tests it will generate and can be a subset of the following (separated with a `;`): + +- `MPI` test of pure MPI + Kokkos (can also add `-D GPU_AWARE_MPI=OFF` to disable the GPU-aware MPI explicitly); +- `MPI_SIMPLE` a simpler test of pure MPI + Kokkos; +- `ADIOS2_NOMPI` test of ADIOS2 library without MPI; +- `ADIOS2_MPI` same but with MPI. + +All tests also use `Kokkos`. To build `ADIOS2` or `Kokkos` in-tree, you may pass the regular `-D Kokkos_***` and `-D ADIOS2_***` flags to cmake`. diff --git a/minimal/adios2.cpp b/minimal/adios2.cpp new file mode 100644 index 000000000..cd4ca3d6f --- /dev/null +++ b/minimal/adios2.cpp @@ -0,0 +1,438 @@ +#include +#include +#include + +#if defined(MPI_ENABLED) + #include + #define MPI_ROOT_RANK 0 +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +auto pad(const std::string&, std::size_t, char, bool = false) -> std::string; + +template +void CallOnce(Func, Args&&...); + +template +auto define_constdim_array(adios2::IO&, + const std::vector&, + const std::vector&, + const std::vector&) -> std::string; + +template +auto define_unknowndim_array(adios2::IO&) -> std::string; + +template +void put_constdim_array(adios2::IO&, adios2::Engine&, const A&, const std::string&); + +template +void put_unknowndim_array(adios2::IO&, + adios2::Engine&, + const Kokkos::View&, + std::size_t, + const std::string&); + +auto main(int argc, char** argv) -> int { + try { + Kokkos::initialize(argc, argv); +#if defined(MPI_ENABLED) + MPI_Init(&argc, &argv); + adios2::ADIOS adios { MPI_COMM_WORLD }; +#else + adios2::ADIOS adios; +#endif + + std::string engine = "hdf5"; + if (argc > 1) { + engine = std::string(argv[1]); + if (engine != "hdf5" && engine != "bp") { + throw std::invalid_argument("Engine must be either 'hdf5' or 'bp'"); + } + } + const std::string format = (engine == "hdf5") ? "h5" : "bp"; + + auto io = adios.DeclareIO("Test::Output"); + io.SetEngine(engine); + + io.DefineAttribute("Attr::Int", 42); + io.DefineAttribute("Attr::Float", 42.0f); + io.DefineAttribute("Attr::Double", 42.0); + io.DefineAttribute("Attr::String", engine); + + io.DefineVariable("Var::Int"); + io.DefineVariable("Var::Size_t"); + + int rank = 0, size = 1; +#if defined(MPI_ENABLED) + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); +#endif + + // global sizes + const std::size_t Sx_1d = (size - 1) * 1000 + 230; + const std::size_t Sx_2d = 100, Sy_2d = (size - 1) * 100 + 23; + const std::size_t Sx_3d = 10, Sy_3d = 10, Sz_3d = (size - 1) * 10 + 3; + + // local sizes + const std::size_t sx_1d = (rank != size - 1) ? 1000 : 230; + const std::size_t sx_2d = 100, sy_2d = (rank != size - 1) ? 100 : 23; + const std::size_t sx_3d = 10, sy_3d = 10, sz_3d = (rank != size - 1) ? 10 : 3; + + // displacements + const std::size_t ox_1d = rank * 1000; + const std::size_t ox_2d = 0, oy_2d = rank * 100; + const std::size_t ox_3d = 0, oy_3d = 0, oz_3d = rank * 10; + + CallOnce( + [](auto&& size) { + std::cout << "Running ADIOS2 test" << std::endl; +#if defined(MPI_ENABLED) + std::cout << "- Number of MPI ranks: " << size << std::endl; +#else + (void)size; + std::cout << "- No MPI" << std::endl; +#endif + }, + size); + + std::vector vars; + + { + vars.push_back( + define_constdim_array(io, { Sx_1d }, { ox_1d }, { sx_1d })); + vars.push_back(define_constdim_array(io, + { Sx_2d, Sy_2d }, + { ox_2d, oy_2d }, + { sx_2d, sy_2d })); + vars.push_back(define_constdim_array(io, + { Sx_3d, Sy_3d, Sz_3d }, + { ox_3d, oy_3d, oz_3d }, + { sx_3d, sy_3d, sz_3d })); + vars.push_back( + define_constdim_array(io, { Sx_1d }, { ox_1d }, { sx_1d })); + vars.push_back(define_constdim_array(io, + { Sx_2d, Sy_2d }, + { ox_2d, oy_2d }, + { sx_2d, sy_2d })); + vars.push_back(define_constdim_array(io, + { Sx_3d, Sy_3d, Sz_3d }, + { ox_3d, oy_3d, oz_3d }, + { sx_3d, sy_3d, sz_3d })); + } + + { + vars.push_back(define_unknowndim_array(io)); + vars.push_back(define_unknowndim_array(io)); + vars.push_back(define_unknowndim_array(io)); + } + + Kokkos::View constdim_1d_f { "constdim_1d_f", sx_1d }; + Kokkos::View constdim_2d_f { "constdim_2d_f", sx_2d, sy_2d }; + Kokkos::View constdim_3d_f { "constdim_3d_f", sx_3d, sy_3d, sz_3d }; + + Kokkos::View constdim_1d_d { "constdim_1d_d", sx_1d }; + Kokkos::View constdim_2d_d { "constdim_2d_d", sx_2d, sy_2d }; + Kokkos::View constdim_3d_d { "constdim_3d_d", sx_3d, sy_3d, sz_3d }; + + { + // fill 1d + Kokkos::parallel_for( + "fill_constdim_1d_f", + Kokkos::RangePolicy<>(0, sx_1d), + KOKKOS_LAMBDA(std::size_t i) { + constdim_1d_f(i) = static_cast(ox_1d + i); + constdim_1d_d(i) = static_cast(ox_1d + i); + }); + + // fill 2d + Kokkos::parallel_for( + "fill_constdim_2d_f", + Kokkos::MDRangePolicy>({ 0, 0 }, { sx_2d, sy_2d }), + KOKKOS_LAMBDA(std::size_t i, std::size_t j) { + constdim_2d_f(i, j) = static_cast(ox_2d + i + (oy_2d + j) * Sx_2d); + constdim_2d_d(i, j) = static_cast(ox_2d + i + (oy_2d + j) * Sx_2d); + }); + + // fill 3d + Kokkos::parallel_for( + "fill_constdim_3d_f", + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { sx_3d, sy_3d, sz_3d }), + KOKKOS_LAMBDA(std::size_t i, std::size_t j, std::size_t k) { + constdim_3d_f(i, j, k) = static_cast( + ox_3d + i + (oy_3d + j + (oz_3d + k) * Sy_3d) * Sx_3d); + constdim_3d_d(i, j, k) = static_cast( + ox_3d + i + (oy_3d + j + (oz_3d + k) * Sy_3d) * Sx_3d); + }); + } + + { + // test multiple file mode + const std::string path = "steps"; + CallOnce( + [](auto&& path) { + const std::filesystem::path parent_path { path }; + if (std::filesystem::exists(parent_path)) { + std::filesystem::remove_all(parent_path); + } + std::filesystem::create_directory(path); + }, + path); + for (auto step { 0u }; step < 5u; ++step) { + const std::string filename = path + "/step_" + + pad(std::to_string(step * 20u), 6, '0') + + "." + format; + auto writer = io.Open(filename, adios2::Mode::Write); + writer.BeginStep(); + + { + // constant dim arrays + put_constdim_array(io, + writer, + constdim_1d_f, + vars[0]); + put_constdim_array(io, + writer, + constdim_2d_f, + vars[1]); + put_constdim_array(io, + writer, + constdim_3d_f, + vars[2]); + put_constdim_array(io, + writer, + constdim_1d_d, + vars[3]); + put_constdim_array(io, + writer, + constdim_2d_d, + vars[4]); + put_constdim_array(io, + writer, + constdim_3d_d, + vars[5]); + } + + { + // unknown dim arrays + const std::size_t nelems = static_cast( + (std::sin((step + 1 + rank) * 0.25) + 2.0) * 1000.0); + + Kokkos::View unknowndim_f { "unknowndim_f", nelems }; + Kokkos::View unknowndim_d { "unknowndim_d", nelems }; + Kokkos::View unknowndim_i { "unknowndim_i", nelems }; + + // fill unknown dim arrays + Kokkos::parallel_for( + "fill_unknowndim", + Kokkos::RangePolicy<>(0, nelems), + KOKKOS_LAMBDA(std::size_t i) { + unknowndim_f(i) = static_cast(i + step * 1000); + unknowndim_d(i) = static_cast(i + step * 1000); + unknowndim_i(i) = static_cast(i + step * 1000); + }); + + put_unknowndim_array(io, writer, unknowndim_f, nelems, vars[6]); + put_unknowndim_array(io, writer, unknowndim_d, nelems, vars[7]); + put_unknowndim_array(io, writer, unknowndim_i, nelems, vars[8]); + } + + writer.EndStep(); + writer.Close(); + } + } + { + // test single file mode + const std::string filename = "allsteps." + format; + adios2::Mode mode = adios2::Mode::Write; + for (auto step { 0u }; step < 5u; ++step) { + auto writer = io.Open(filename, mode); + writer.BeginStep(); + + { + // constant dim arrays + put_constdim_array(io, + writer, + constdim_1d_f, + vars[0]); + put_constdim_array(io, + writer, + constdim_2d_f, + vars[1]); + put_constdim_array(io, + writer, + constdim_3d_f, + vars[2]); + put_constdim_array(io, + writer, + constdim_1d_d, + vars[3]); + put_constdim_array(io, + writer, + constdim_2d_d, + vars[4]); + put_constdim_array(io, + writer, + constdim_3d_d, + vars[5]); + } + + { + // unknown dim arrays + const std::size_t nelems = static_cast( + (std::sin((step + 1 + rank) * 0.25) + 2.0) * 1000.0); + + Kokkos::View unknowndim_f { "unknowndim_f", nelems }; + Kokkos::View unknowndim_d { "unknowndim_d", nelems }; + Kokkos::View unknowndim_i { "unknowndim_i", nelems }; + + // fill unknown dim arrays + Kokkos::parallel_for( + "fill_unknowndim", + Kokkos::RangePolicy<>(0, nelems), + KOKKOS_LAMBDA(std::size_t i) { + unknowndim_f(i) = static_cast(i + step * 1000); + unknowndim_d(i) = static_cast(i + step * 1000); + unknowndim_i(i) = static_cast(i + step * 1000); + }); + + put_unknowndim_array(io, writer, unknowndim_f, nelems, vars[6]); + put_unknowndim_array(io, writer, unknowndim_d, nelems, vars[7]); + put_unknowndim_array(io, writer, unknowndim_i, nelems, vars[8]); + } + + writer.EndStep(); + writer.Close(); + mode = adios2::Mode::Append; + } + } + } catch (const std::exception& e) { +#if defined(MPI_ENABLED) + if (MPI_COMM_WORLD != MPI_COMM_NULL) { + MPI_Finalize(); + } +#endif + if (Kokkos::is_initialized()) { + Kokkos::finalize(); + } + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + +#if defined(MPI_ENABLED) + MPI_Finalize(); +#endif + Kokkos::finalize(); + return 0; +} + +auto pad(const std::string& str, std::size_t n, char c, bool right) -> std::string { + if (n <= str.size()) { + return str; + } + if (right) { + return str + std::string(n - str.size(), c); + } + return std::string(n - str.size(), c) + str; +} + +#if !defined(MPI_ENABLED) + +template +void CallOnce(Func func, Args&&... args) { + func(std::forward(args)...); +} + +#else + +template +void CallOnce(Func func, Args&&... args) { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == MPI_ROOT_RANK) { + func(std::forward(args)...); + } +} +#endif + +template +auto define_constdim_array(adios2::IO& io, + const std::vector& glob_shape, + const std::vector& loc_corner, + const std::vector& loc_shape) -> std::string { + const std::string arrname = "ConstantDimArr" + + std::to_string(glob_shape.size()) + + "D::" + std::string(typeid(T).name()); + io.DefineVariable(arrname, glob_shape, loc_corner, loc_shape, adios2::ConstantDims); + return arrname; +} + +template +auto define_unknowndim_array(adios2::IO& io) -> std::string { + const std::string arrname = "UnknownDimArr::" + std::string(typeid(T).name()); + io.DefineVariable(arrname, + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + return arrname; +} + +template +void put_constdim_array(adios2::IO& io, + adios2::Engine& writer, + const A& array, + const std::string& varname) { + auto var = io.InquireVariable(varname); + if (!var) { + throw std::runtime_error("Variable not found: " + varname); + } + auto array_h = Kokkos::create_mirror_view(array); + Kokkos::deep_copy(array_h, array); + writer.Put(var, array_h); +} + +template +void put_unknowndim_array(adios2::IO& io, + adios2::Engine& writer, + const Kokkos::View& array, + std::size_t nelems, + const std::string& varname) { + auto var = io.InquireVariable(varname); + if (!var) { + throw std::runtime_error("Variable not found: " + varname); + } + std::size_t glob_nelems = nelems; + std::size_t offset_nelems = 0u; +#if defined(MPI_ENABLED) + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + std::vector all_nelems(size); + MPI_Allgather(&nelems, + 1, + MPI_UNSIGNED_LONG, + all_nelems.data(), + 1, + MPI_UNSIGNED_LONG, + MPI_COMM_WORLD); + glob_nelems = 0u; + for (int r = 0; r < size; ++r) { + if (r < rank) { + offset_nelems += all_nelems[r]; + } + glob_nelems += all_nelems[r]; + } +#endif + var.SetShape({ glob_nelems }); + var.SetSelection(adios2::Box({ offset_nelems }, { nelems })); + auto array_h = Kokkos::create_mirror_view(array); + Kokkos::deep_copy(array_h, array); + writer.Put(var, array_h); +} diff --git a/minimal/kokkos.cpp b/minimal/kokkos.cpp new file mode 100644 index 000000000..2be2996a8 --- /dev/null +++ b/minimal/kokkos.cpp @@ -0,0 +1,58 @@ +#include + +#include +#include + +auto main(int argc, char** argv) -> int { + try { + Kokkos::initialize(argc, argv); + Kokkos::DefaultExecutionSpace {}.print_configuration(std::cout); + + std::cout << "1D views" << std::endl; + for (const auto& sz : { 100u, 10000u, 1000000u }) { + Kokkos::View view { "test_view", sz }; + Kokkos::parallel_for( + "fill_1d", + Kokkos::RangePolicy<>(0, sz), + KOKKOS_LAMBDA(std::size_t i) { view(i) = static_cast(i); }); + Kokkos::fence(); + std::cout << "- allocated " << view.size() << std::endl; + } + + std::cout << "2D views" << std::endl; + for (const auto& sz : { 10u, 100u, 1000u }) { + Kokkos::View view { "test_view", sz, 2 * sz }; + Kokkos::parallel_for( + "fill_2d", + Kokkos::MDRangePolicy>({ 0, 0 }, { sz, 2 * sz }), + KOKKOS_LAMBDA(std::size_t i, std::size_t j) { + view(i, j) = static_cast(i * 2 * sz + j); + }); + Kokkos::fence(); + std::cout << "- allocated " << view.size() << std::endl; + } + + std::cout << "3D views" << std::endl; + for (const auto& sz : { 10u, 100u }) { + Kokkos::View view { "test_view", sz, 2 * sz, 3 * sz }; + Kokkos::parallel_for( + "fill_3d", + Kokkos::MDRangePolicy>({ 0, 0, 0 }, { sz, 2 * sz, 3 * sz }), + KOKKOS_LAMBDA(std::size_t i, std::size_t j, std::size_t k) { + view(i, j, k) = static_cast(i * 2 * sz * 3 * sz + j * 3 * sz + k); + }); + Kokkos::fence(); + std::cout << "- allocated " << view.size() << std::endl; + } + + } catch (const std::exception& e) { + if (Kokkos::is_initialized()) { + Kokkos::finalize(); + } + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + + Kokkos::finalize(); + return 0; +} diff --git a/minimal/mpi-simple.cpp b/minimal/mpi-simple.cpp new file mode 100644 index 000000000..4663d73be --- /dev/null +++ b/minimal/mpi-simple.cpp @@ -0,0 +1,84 @@ +#include +#include + +#include +#include +#include + +auto main(int argc, char** argv) -> int { + try { + Kokkos::initialize(argc, argv); + MPI_Init(&argc, &argv); + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + const auto nelems = 500u; + const auto nsend = 10u; + const auto nrecv = 10u; + + if (rank == 0) { + std::cout << "Running the simple MPI communication test" << std::endl; + std::cout << "- Number of MPI ranks: " << size << std::endl; + std::cout << "- Number elements to send/recv (2D): " << nelems << "x" + << nsend << std::endl; +#if defined(GPU_AWARE_MPI) && defined(DEVICE_ENABLED) + std::cout << "- GPU-aware MPI is enabled" << std::endl; +#else + std::cout << "- GPU-aware MPI is disabled" << std::endl; +#endif + } + + Kokkos::View view("view", nelems, nelems); + Kokkos::View send("send", nsend, nelems); + Kokkos::View recv("recv", nrecv, nelems); + Kokkos::deep_copy( + send, + Kokkos::subview(view, std::make_pair(0u, nsend), Kokkos::ALL)); + +#if defined(GPU_AWARE_MPI) || !defined(DEVICE_ENABLED) + MPI_Sendrecv(send.data(), + nsend * nelems, + MPI_FLOAT, + (rank + 1) % size, + 0, + recv.data(), + nrecv * nelems, + MPI_FLOAT, + (rank - 1 + size) % size, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); +#else + auto send_h = Kokkos::create_mirror_view(send); + auto recv_h = Kokkos::create_mirror_view(recv); + Kokkos::deep_copy(send_h, send); + MPI_Sendrecv(send_h.data(), + nsend * nelems, + MPI_FLOAT, + (rank + 1) % size, + 0, + recv_h.data(), + nrecv * nelems, + MPI_FLOAT, + (rank - 1 + size) % size, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + Kokkos::deep_copy(recv, recv_h); +#endif + } catch (const std::exception& e) { + if (MPI_COMM_WORLD != MPI_COMM_NULL) { + MPI_Finalize(); + } + if (Kokkos::is_initialized()) { + Kokkos::finalize(); + } + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + + MPI_Finalize(); + Kokkos::finalize(); + return 0; +} diff --git a/minimal/mpi.cpp b/minimal/mpi.cpp new file mode 100644 index 000000000..821b5fcae --- /dev/null +++ b/minimal/mpi.cpp @@ -0,0 +1,306 @@ +#include +#include + +#include +#include +#include +#include +#include + +#define MPI_ROOT_RANK 0 +#define N_GHOSTS 2 + +template +void CallOnce(Func, Args&&...); + +template +using R = std::conditional_t< + D == 1, + T*, + std::conditional_t>>; + +template +void send_recv(int send_to, + int recv_from, + bool sendxmin, + const Kokkos::View[N]>& view, + std::size_t smallsize) { + const auto mpi_type = std::is_same_v ? MPI_FLOAT : MPI_DOUBLE; + std::size_t nsend = 0; + Kokkos::View[N]> send_buffer; + if (send_to == MPI_PROC_NULL) { + nsend = 0; + } else { + std::pair range = { 0, N_GHOSTS }; + if (not sendxmin) { + range = { view.extent(0) - N_GHOSTS, view.extent(0) }; + } + if constexpr (D == 1) { + nsend = N_GHOSTS * N; + send_buffer = Kokkos::View[N]> { + "comm_1d_send_buffer", N_GHOSTS + }; + Kokkos::deep_copy(send_buffer, Kokkos::subview(view, range, Kokkos::ALL)); + } else if constexpr (D == 2) { + nsend = N_GHOSTS * smallsize * N; + send_buffer = Kokkos::View[N]> { + "comm_2d_send_buffer", N_GHOSTS, smallsize + }; + Kokkos::deep_copy(send_buffer, + Kokkos::subview(view, range, Kokkos::ALL, Kokkos::ALL)); + } else if constexpr (D == 3) { + nsend = N_GHOSTS * smallsize * smallsize * N; + send_buffer = Kokkos::View[N]> { + "comm_3d_send_buffer", N_GHOSTS, smallsize, smallsize + }; + Kokkos::deep_copy( + send_buffer, + Kokkos::subview(view, range, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL)); + } + } + + std::size_t nrecv = 0; + Kokkos::View[N]> recv_buffer; + if (recv_from == MPI_PROC_NULL) { + nrecv = 0; + } else { + if constexpr (D == 1) { + nrecv = N_GHOSTS * N; + recv_buffer = Kokkos::View[N]> { + "comm_1d_recv_buffer", N_GHOSTS + }; + } else if constexpr (D == 2) { + nrecv = N_GHOSTS * smallsize * N; + recv_buffer = Kokkos::View[N]> { + "comm_2d_recv_buffer", N_GHOSTS, smallsize + }; + } else if constexpr (D == 3) { + nrecv = N_GHOSTS * smallsize * smallsize * N; + recv_buffer = Kokkos::View[N]> { + "comm_3d_recv_buffer", N_GHOSTS, smallsize, smallsize + }; + } + } + + if (nrecv == 0 and nsend == 0) { + throw std::invalid_argument( + "Both nsend and nrecv are zero, no communication to perform."); + } else if (nrecv > 0 and nsend > 0) { +#if defined(GPU_AWARE_MPI) || !defined(DEVICE_ENABLED) + MPI_Sendrecv(send_buffer.data(), + nsend, + mpi_type, + send_to, + 0, + recv_buffer.data(), + nrecv, + mpi_type, + recv_from, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); +#else + auto send_buffer_h = Kokkos::create_mirror_view(send_buffer); + auto recv_buffer_h = Kokkos::create_mirror_view(recv_buffer); + Kokkos::deep_copy(send_buffer_h, send_buffer); + MPI_Sendrecv(send_buffer_h.data(), + nsend, + mpi_type, + send_to, + 0, + recv_buffer_h.data(), + nrecv, + mpi_type, + recv_from, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + Kokkos::deep_copy(recv_buffer, recv_buffer_h); +#endif + } else if (nrecv > 0) { +#if defined(GPU_AWARE_MPI) || !defined(DEVICE_ENABLED) + MPI_Recv(recv_buffer.data(), + nrecv, + mpi_type, + recv_from, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); +#else + auto recv_buffer_h = Kokkos::create_mirror_view(recv_buffer); + MPI_Recv(recv_buffer_h.data(), + nrecv, + mpi_type, + recv_from, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + Kokkos::deep_copy(recv_buffer, recv_buffer_h); +#endif + } else if (nsend > 0) { +#if defined(GPU_AWARE_MPI) || !defined(DEVICE_ENABLED) + MPI_Send(send_buffer.data(), nsend, mpi_type, send_to, 0, MPI_COMM_WORLD); +#else + auto send_buffer_h = Kokkos::create_mirror_view(send_buffer); + Kokkos::deep_copy(send_buffer_h, send_buffer); + MPI_Send(send_buffer_h.data(), nsend, mpi_type, send_to, 0, MPI_COMM_WORLD); +#endif + } + + if (nrecv > 0) { + std::pair range = { view.extent(0) - N_GHOSTS, + view.extent(0) }; + if (not sendxmin) { + range = { 0, N_GHOSTS }; + } + if constexpr (D == 1) { + Kokkos::deep_copy(Kokkos::subview(view, range, Kokkos::ALL), recv_buffer); + } else if constexpr (D == 2) { + Kokkos::deep_copy(Kokkos::subview(view, range, Kokkos::ALL, Kokkos::ALL), + recv_buffer); + } else if constexpr (D == 3) { + Kokkos::deep_copy( + Kokkos::subview(view, range, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL), + recv_buffer); + } + } +} + +template +void comm(int rank, int size, std::size_t bigsize, std::size_t smallsize) { + static_assert(D <= 3 and D != 0, "Only dimensions 1, 2, and 3 are supported."); + static_assert(N == 3 or N == 6, "Only 3 or 6 last indices are supported."); + static_assert(std::is_same_v || std::is_same_v, + "Only float and double types are supported."); + + // smallsize must be the same for all ranks + if (bigsize < 2 * N_GHOSTS) { + throw std::invalid_argument( + "bigsize must be at least 2 * N_GHOSTS for communication to work."); + } + + Kokkos::View[N]> view; + + // define and fill the view + if constexpr (D == 1) { + view = Kokkos::View[N]> { + "comm_1d_view", bigsize + }; + Kokkos::parallel_for( + "fill_comm_1d_view", + Kokkos::MDRangePolicy>({ 0, 0 }, + { view.extent(0), view.extent(1) }), + KOKKOS_LAMBDA(std::size_t i, std::size_t c) { + view(i, c) = static_cast(i * c + rank); + }); + } else if constexpr (D == 2) { + view = Kokkos::View[N]> { + "comm_2d_view", bigsize, smallsize + }; + Kokkos::parallel_for( + "fill_comm_2d_view", + Kokkos::MDRangePolicy>( + { 0, 0, 0 }, + { view.extent(0), view.extent(1), view.extent(2) }), + KOKKOS_LAMBDA(std::size_t i, std::size_t j, std::size_t c) { + view(i, j, c) = static_cast(i * j * c + rank); + }); + } else if constexpr (D == 3) { + view = Kokkos::View[N]> { + "comm_3d_view", bigsize, smallsize, smallsize + }; + Kokkos::parallel_for( + "fill_comm_3d_view", + Kokkos::MDRangePolicy>( + { 0, 0, 0, 0 }, + { view.extent(0), view.extent(1), view.extent(2), view.extent(3) }), + KOKKOS_LAMBDA(std::size_t i, std::size_t j, std::size_t k, std::size_t c) { + view(i, j, k, c) = static_cast(i * j * k * c + rank); + }); + } + + // communicate + const int r_neighbor = (rank != size - 1) ? rank + 1 : MPI_PROC_NULL; + const int l_neighbor = (rank != 0) ? rank - 1 : MPI_PROC_NULL; + + send_recv(r_neighbor, l_neighbor, false, view, smallsize); + send_recv(l_neighbor, r_neighbor, true, view, smallsize); + + MPI_Barrier(MPI_COMM_WORLD); + CallOnce([]() { + std::cout << "Finished " << D << "D "; + if constexpr (std::is_same_v) { + std::cout << "float"; + } else { + std::cout << "double"; + } + std::cout << " communication test" << std::endl; + }); +} + +auto main(int argc, char** argv) -> int { + try { + Kokkos::initialize(argc, argv); + MPI_Init(&argc, &argv); + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + const std::size_t bigsize = (std::sin((rank + 1) * 0.25) + 2) * 1e3; + const std::size_t smallsize = 123; + + CallOnce( + [](auto&& size, auto&& bigsize, auto&& smallsize) { + std::cout << "Running the MPI communication test" << std::endl; + std::cout << "- Number of MPI ranks: " << size << std::endl; + std::cout << "- Big size: " << bigsize << std::endl; + std::cout << "- Small size: " << smallsize << std::endl; +#if defined(GPU_AWARE_MPI) && defined(DEVICE_ENABLED) + std::cout << "- GPU-aware MPI is enabled" << std::endl; +#else + std::cout << "- GPU-aware MPI is disabled" << std::endl; +#endif + }, + size, + bigsize, + smallsize); + + comm(rank, size, bigsize, smallsize); + comm(rank, size, bigsize, smallsize); + comm(rank, size, bigsize, smallsize); + + comm(rank, size, bigsize, smallsize); + comm(rank, size, bigsize, smallsize); + comm(rank, size, bigsize, smallsize); + + comm(rank, size, bigsize, smallsize); + comm(rank, size, bigsize, smallsize); + comm(rank, size, bigsize, smallsize); + + comm(rank, size, bigsize, smallsize); + comm(rank, size, bigsize, smallsize); + comm(rank, size, bigsize, smallsize); + } catch (const std::exception& e) { + if (MPI_COMM_WORLD != MPI_COMM_NULL) { + MPI_Finalize(); + } + if (Kokkos::is_initialized()) { + Kokkos::finalize(); + } + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + + MPI_Finalize(); + Kokkos::finalize(); + return 0; +} + +template +void CallOnce(Func func, Args&&... args) { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == MPI_ROOT_RANK) { + func(std::forward(args)...); + } +} diff --git a/setups/srpic/em_vacuum/em_vacuum.py b/setups/legacy/em_vacuum/em_vacuum.py similarity index 100% rename from setups/srpic/em_vacuum/em_vacuum.py rename to setups/legacy/em_vacuum/em_vacuum.py diff --git a/setups/srpic/em_vacuum/em_vacuum.toml b/setups/legacy/em_vacuum/em_vacuum.toml similarity index 100% rename from setups/srpic/em_vacuum/em_vacuum.toml rename to setups/legacy/em_vacuum/em_vacuum.toml diff --git a/setups/srpic/em_vacuum/pgen.hpp b/setups/legacy/em_vacuum/pgen.hpp similarity index 100% rename from setups/srpic/em_vacuum/pgen.hpp rename to setups/legacy/em_vacuum/pgen.hpp diff --git a/setups/srpic/example/pgen.hpp b/setups/legacy/example/pgen.hpp similarity index 82% rename from setups/srpic/example/pgen.hpp rename to setups/legacy/example/pgen.hpp index 3739243cd..cf6c12b7a 100644 --- a/setups/srpic/example/pgen.hpp +++ b/setups/legacy/example/pgen.hpp @@ -44,31 +44,31 @@ namespace user { template struct ExtForce { - const std::vector species { 1, 2 }; + const std::vector species { 1, 2 }; ExtForce() = default; - Inline auto fx1(const unsigned short& sp, - const real_t& time, - const coord_t& x_Ph) const -> real_t { + Inline auto fx1(const spidx_t& sp, + const simtime_t& time, + const coord_t& x_Ph) const -> real_t { (void)sp; (void)time; (void)x_Ph; return ZERO; } - Inline auto fx2(const unsigned short& sp, - const real_t& time, - const coord_t& x_Ph) const -> real_t { + Inline auto fx2(const spidx_t& sp, + const simtime_t& time, + const coord_t& x_Ph) const -> real_t { (void)sp; (void)time; (void)x_Ph; return ZERO; } - Inline auto fx3(const unsigned short& sp, - const real_t& time, - const coord_t& x_Ph) const -> real_t { + Inline auto fx3(const spidx_t& sp, + const simtime_t& time, + const coord_t& x_Ph) const -> real_t { (void)sp; (void)time; (void)x_Ph; diff --git a/setups/srpic/langmuir/langmuir.py b/setups/legacy/langmuir/langmuir.py similarity index 100% rename from setups/srpic/langmuir/langmuir.py rename to setups/legacy/langmuir/langmuir.py diff --git a/setups/srpic/langmuir/langmuir.toml b/setups/legacy/langmuir/langmuir.toml similarity index 100% rename from setups/srpic/langmuir/langmuir.toml rename to setups/legacy/langmuir/langmuir.toml diff --git a/setups/srpic/langmuir/pgen.hpp b/setups/legacy/langmuir/pgen.hpp similarity index 98% rename from setups/srpic/langmuir/pgen.hpp rename to setups/legacy/langmuir/pgen.hpp index 2a23b17f7..28dbd24c5 100644 --- a/setups/srpic/langmuir/pgen.hpp +++ b/setups/legacy/langmuir/pgen.hpp @@ -37,7 +37,7 @@ namespace user { Inline void operator()(const coord_t& x_Ph, vec_t& v, - unsigned short sp) const override { + spidx_t sp) const override { if (sp == 1) { const auto k = math::sqrt(SQR(kx1) + SQR(kx2) + SQR(kx3)); if constexpr (M::Dim == Dim::_1D) { diff --git a/setups/srpic/magnetar/magnetar.py b/setups/legacy/magnetar/magnetar.py similarity index 100% rename from setups/srpic/magnetar/magnetar.py rename to setups/legacy/magnetar/magnetar.py diff --git a/setups/srpic/magnetar/magnetar.toml b/setups/legacy/magnetar/magnetar.toml similarity index 100% rename from setups/srpic/magnetar/magnetar.toml rename to setups/legacy/magnetar/magnetar.toml diff --git a/setups/srpic/magnetar/pgen.hpp b/setups/legacy/magnetar/pgen.hpp similarity index 100% rename from setups/srpic/magnetar/pgen.hpp rename to setups/legacy/magnetar/pgen.hpp diff --git a/setups/wip/magpump/pgen.hpp b/setups/legacy/magpump/pgen.hpp similarity index 98% rename from setups/wip/magpump/pgen.hpp rename to setups/legacy/magpump/pgen.hpp index 21d4c8882..045552aff 100644 --- a/setups/wip/magpump/pgen.hpp +++ b/setups/legacy/magpump/pgen.hpp @@ -68,7 +68,7 @@ namespace user { Inline void operator()(const coord_t&, vec_t& v_Ph, - unsigned short) const override { + spidx_t) const override { v_Ph[0] = -vin; } diff --git a/setups/wip/reconnection/pgen.hpp b/setups/legacy/rec-gravity/pgen.hpp similarity index 61% rename from setups/wip/reconnection/pgen.hpp rename to setups/legacy/rec-gravity/pgen.hpp index e97bc518a..e8c461418 100644 --- a/setups/wip/reconnection/pgen.hpp +++ b/setups/legacy/rec-gravity/pgen.hpp @@ -18,6 +18,40 @@ namespace user { using namespace ntt; + template + struct Gravity { + const std::vector species { 1, 2 }; + + Gravity(real_t f, real_t tscale, real_t ymid) + : force { f } + , tscale { tscale } + , ymid { ymid } {} + + Inline auto fx1(const spidx_t&, const simtime_t&, const coord_t&) const + -> real_t { + return ZERO; + } + + Inline auto fx2(const spidx_t&, const simtime_t& t, const coord_t& x_Ph) const + -> real_t { + const auto sign { (x_Ph[1] < ymid) ? ONE : -ONE }; + const auto t_ { static_cast(t) }; + if (t_ > tscale) { + return sign * force; + } else { + return sign * force * (ONE - math::cos(constant::PI * t_ / tscale)) / TWO; + } + } + + Inline auto fx3(const spidx_t&, const simtime_t&, const coord_t&) const + -> real_t { + return ZERO; + } + + private: + const real_t force, tscale, ymid; + }; + template struct CurrentLayer : public arch::SpatialDistribution { CurrentLayer(const M& metric, real_t width, real_t yi) @@ -41,7 +75,7 @@ namespace user { , y1 { y1 } , y2 { y2 } {} - Inline auto bx1(const coord_t& x_Ph) const -> real_t { + Inline auto bx3(const coord_t& x_Ph) const -> real_t { return Bmag * (math::tanh((x_Ph[1] - y1) / width) - math::tanh((x_Ph[1] - y2) / width) - 1); } @@ -67,11 +101,14 @@ namespace user { const real_t Bmag, width, overdensity, y1, y2, bg_temp; InitFields init_flds; + Gravity ext_force; + inline PGen(const SimulationParams& p, const Metadomain& m) : arch::ProblemGenerator(p) , Bmag { p.template get("setup.Bmag", 1.0) } , width { p.template get("setup.width") } , overdensity { p.template get("setup.overdensity") } + , bg_temp { p.template get("setup.bg_temp") } , y1 { m.mesh().extent(in::x2).first + INV_4 * (m.mesh().extent(in::x2).second - m.mesh().extent(in::x2).first) } @@ -79,7 +116,12 @@ namespace user { 3 * INV_4 * (m.mesh().extent(in::x2).second - m.mesh().extent(in::x2).first) } , init_flds { Bmag, width, y1, y2 } - , bg_temp { p.template get("setup.bg_temp") } {} + , ext_force { + p.template get("setup.fmag", 0.1) * + p.template get("scales.omegaB0"), + (m.mesh().extent(in::x1).second - m.mesh().extent(in::x1).first), + INV_2 * (m.mesh().extent(in::x2).second + m.mesh().extent(in::x2).first) + } {} inline PGen() {} @@ -91,51 +133,40 @@ namespace user { const auto injector = arch::UniformInjector( energy_dist, { 1, 2 }); - arch::InjectUniform>( - params, - local_domain, - injector, - HALF); - + arch::InjectUniform(params, + local_domain, + injector, + ONE); + // current layers const auto sigma = params.template get("scales.sigma0"); const auto c_omp = params.template get("scales.skindepth0"); const auto cs_drift_beta = math::sqrt(sigma) * c_omp / (width * overdensity); const auto cs_drift_gamma = ONE / math::sqrt(ONE - SQR(cs_drift_beta)); const auto cs_drift_u = cs_drift_beta * cs_drift_gamma; const auto cs_temp = HALF * sigma / overdensity; - // current layer #1 - auto edist_cs_1 = arch::Maxwellian(local_domain.mesh.metric, + + for (auto i = 0; i < 2; ++i) { + const auto drift_vel = (i == 0) ? cs_drift_u : -cs_drift_u; + const auto y_cs = (i == 0) ? y1 : y2; + auto edist_cs = arch::Maxwellian(local_domain.mesh.metric, local_domain.random_pool, cs_temp, - cs_drift_u, - in::x3, + drift_vel, + in::x1, false); - const auto sdist_cs_1 = CurrentLayer(local_domain.mesh.metric, width, y1); - const auto inj_cs_1 = arch::NonUniformInjector( - edist_cs_1, - sdist_cs_1, - { 1, 2 }); - arch::InjectNonUniform(params, + const auto sdist_cs = CurrentLayer(local_domain.mesh.metric, + width, + y_cs); + const auto inj_cs = arch::NonUniformInjector( + edist_cs, + sdist_cs, + { 1, 2 }); + arch::InjectNonUniform(params, local_domain, - inj_cs_1, + inj_cs, overdensity); - // current layer #2 - const auto edist_cs_2 = arch::Maxwellian(local_domain.mesh.metric, - local_domain.random_pool, - cs_temp, - -cs_drift_u, - in::x3, - false); - const auto sdist_cs_2 = CurrentLayer(local_domain.mesh.metric, width, y2); - const auto inj_cs_2 = arch::NonUniformInjector( - edist_cs_2, - sdist_cs_2, - { 1, 2 }); - arch::InjectNonUniform(params, - local_domain, - inj_cs_2, - overdensity); - } + } + } // namespace user }; } // namespace user diff --git a/setups/wip/rec-gravity/rec-gravity.toml b/setups/legacy/rec-gravity/rec-gravity.toml similarity index 58% rename from setups/wip/rec-gravity/rec-gravity.toml rename to setups/legacy/rec-gravity/rec-gravity.toml index f8d5b94ee..f29b090e3 100644 --- a/setups/wip/rec-gravity/rec-gravity.toml +++ b/setups/legacy/rec-gravity/rec-gravity.toml @@ -1,21 +1,21 @@ [simulation] - name = "rec-gravity" - engine = "srpic" + name = "rec-gravity" + engine = "srpic" runtime = 20.0 [grid] resolution = [2000, 4000] - extent = [[-0.5, 0.5], [-1.0, 1.0]] + extent = [[-0.5, 0.5], [-1.0, 1.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] particles = [["PERIODIC"], ["PERIODIC"]] - + [scales] - larmor0 = 3.1e-4 + larmor0 = 3.1e-4 skindepth0 = 1e-3 [algorithms] @@ -28,26 +28,26 @@ ppc0 = 8.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 + label = "e-" + mass = 1.0 + charge = -1.0 maxnpart = 1e8 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 + label = "e+" + mass = 1.0 + charge = 1.0 maxnpart = 1e8 [setup] - Bmag = 1.0 - width = 0.04 - bg_temp = 1e-4 + Bmag = 1.0 + width = 0.04 + bg_temp = 1e-4 overdensity = 3.0 - angle = 0.0 - + angle = 0.0 + [output] - format = "hdf5" + format = "hdf5" interval_time = 0.36 [output.fields] diff --git a/setups/srpic/shocktest/pgen.hpp b/setups/legacy/shocktest/pgen.hpp similarity index 100% rename from setups/srpic/shocktest/pgen.hpp rename to setups/legacy/shocktest/pgen.hpp diff --git a/setups/srpic/shocktest/shock.py b/setups/legacy/shocktest/shock.py similarity index 100% rename from setups/srpic/shocktest/shock.py rename to setups/legacy/shocktest/shock.py diff --git a/setups/srpic/shocktest/shock.toml b/setups/legacy/shocktest/shock.toml similarity index 100% rename from setups/srpic/shocktest/shock.toml rename to setups/legacy/shocktest/shock.toml diff --git a/setups/wip/spider/pgen.hpp b/setups/legacy/spider/pgen.hpp similarity index 100% rename from setups/wip/spider/pgen.hpp rename to setups/legacy/spider/pgen.hpp diff --git a/setups/srpic/magnetosphere/magnetosphere.toml b/setups/srpic/magnetosphere/magnetosphere.toml index 4c7c9117d..1a4af8a09 100644 --- a/setups/srpic/magnetosphere/magnetosphere.toml +++ b/setups/srpic/magnetosphere/magnetosphere.toml @@ -11,7 +11,7 @@ metric = "qspherical" [grid.boundaries] - fields = [["ATMOSPHERE", "ABSORB"]] + fields = [["ATMOSPHERE", "MATCH"]] particles = [["ATMOSPHERE", "ABSORB"]] [grid.boundaries.absorb] @@ -39,7 +39,7 @@ larmor_max = 1.0 [particles] - ppc0 = 5.0 + ppc0 = 10.0 use_weights = true clear_interval = 100 @@ -75,5 +75,4 @@ enable = false [diagnostics] - interval = 50 - colored_stdout = true + interval = 50 diff --git a/setups/srpic/reconnection/pgen.hpp b/setups/srpic/reconnection/pgen.hpp new file mode 100644 index 000000000..e92de5847 --- /dev/null +++ b/setups/srpic/reconnection/pgen.hpp @@ -0,0 +1,341 @@ +#ifndef PROBLEM_GENERATOR_H +#define PROBLEM_GENERATOR_H + +#include "enums.h" +#include "global.h" + +#include "arch/directions.h" +#include "arch/kokkos_aliases.h" +#include "arch/traits.h" +#include "utils/numeric.h" + +#include "archetypes/energy_dist.h" +#include "archetypes/particle_injector.h" +#include "archetypes/problem_generator.h" +#include "archetypes/spatial_dist.h" +#include "framework/domain/metadomain.h" + +#include "kernels/particle_moments.hpp" + +namespace user { + using namespace ntt; + + template + struct CurrentLayer : public arch::SpatialDistribution { + CurrentLayer(const M& metric, real_t cs_width, real_t center_x, real_t cs_y) + : arch::SpatialDistribution { metric } + , cs_width { cs_width } + , center_x { center_x } + , cs_y { cs_y } {} + + Inline auto operator()(const coord_t& x_Ph) const -> real_t override { + return ONE / SQR(math::cosh((x_Ph[1] - cs_y) / cs_width)) * + (ONE - math::exp(-SQR((x_Ph[0] - center_x) / cs_width))); + } + + private: + const real_t cs_width, center_x, cs_y; + }; + + // field initializer + template + struct InitFields { + InitFields(real_t bg_B, real_t bg_Bguide, real_t cs_width, real_t cs_y) + : bg_B { bg_B } + , bg_Bguide { bg_Bguide } + , cs_width { cs_width } + , cs_y { cs_y } {} + + Inline auto bx1(const coord_t& x_Ph) const -> real_t { + return bg_B * (math::tanh((x_Ph[1] - cs_y) / cs_width)); + } + + Inline auto bx3(const coord_t&) const -> real_t { + return bg_Bguide; + } + + private: + const real_t bg_B, bg_Bguide, cs_width, cs_y; + }; + + template + struct BoundaryFieldsInX1 { + BoundaryFieldsInX1(real_t bg_B, + real_t bg_Bguide, + real_t beta_rec, + real_t cs_width, + real_t cs_x, + real_t cs_y) + : bg_B { bg_B } + , bg_Bguide { bg_Bguide } + , beta_rec { beta_rec } + , cs_width { cs_width } + , cs_x { cs_x } + , cs_y { cs_y } {} + + Inline auto bx1(const coord_t& x_Ph) const -> real_t { + return bg_B * (math::tanh((x_Ph[1] - cs_y) / cs_width)); + } + + Inline auto bx2(const coord_t& x_Ph) const -> real_t { + return beta_rec * bg_B * (math::tanh((x_Ph[0] - cs_x) / cs_width)); + } + + Inline auto bx3(const coord_t&) const -> real_t { + return bg_Bguide; + } + + Inline auto ex1(const coord_t& x_Ph) const -> real_t { + return beta_rec * bg_Bguide * math::tanh((x_Ph[1] - cs_y) / cs_width); + } + + Inline auto ex2(const coord_t&) const -> real_t { + return ZERO; + } + + Inline auto ex3(const coord_t&) const -> real_t { + return -beta_rec * bg_B; + } + + private: + const real_t bg_B, bg_Bguide, beta_rec, cs_width, cs_x, cs_y; + }; + + template + struct BoundaryFieldsInX2 { + BoundaryFieldsInX2(real_t bg_B, real_t bg_Bguide, real_t cs_width, real_t cs_y) + : bg_B { bg_B } + , bg_Bguide { bg_Bguide } + , cs_width { cs_width } + , cs_y { cs_y } {} + + Inline auto bx1(const coord_t& x_Ph) const -> real_t { + return bg_B * (math::tanh((x_Ph[1] - cs_y) / cs_width)); + } + + Inline auto bx2(const coord_t&) const -> real_t { + return ZERO; + } + + Inline auto bx3(const coord_t&) const -> real_t { + return bg_Bguide; + } + + Inline auto ex1(const coord_t&) const -> real_t { + return ZERO; + } + + Inline auto ex2(const coord_t&) const -> real_t { + return ZERO; + } + + Inline auto ex3(const coord_t&) const -> real_t { + return ZERO; + } + + private: + const real_t bg_B, bg_Bguide, cs_width, cs_y; + }; + + // constant particle density for particle boundaries + template + struct ConstDens { + Inline auto operator()(const coord_t& x_Ph) const -> real_t { + return ONE; + } + }; + template + using spatial_dist_t = arch::Replenish>; + + template + struct PGen : public arch::ProblemGenerator { + // compatibility traits for the problem generator + static constexpr auto engines { traits::compatible_with::value }; + static constexpr auto metrics { traits::compatible_with::value }; + static constexpr auto dimensions { + traits::compatible_with::value + }; + + // for easy access to variables in the child class + using arch::ProblemGenerator::D; + using arch::ProblemGenerator::C; + using arch::ProblemGenerator::params; + + const real_t bg_B, bg_Bguide, bg_temperature, inj_ypad; + const real_t cs_width, cs_overdensity, cs_x, cs_y; + const real_t ymin, ymax; + const simtime_t t_open; + bool bc_opened { false }; + + Metadomain& metadomain; + + InitFields init_flds; + + inline PGen(const SimulationParams& p, Metadomain& m) + : arch::ProblemGenerator(p) + , bg_B { p.template get("setup.bg_B", 1.0) } + , bg_Bguide { p.template get("setup.bg_Bguide", 0.0) } + , bg_temperature { p.template get("setup.bg_temperature", 0.001) } + , inj_ypad { p.template get("setup.inj_ypad", (real_t)0.05) } + , cs_width { p.template get("setup.cs_width") } + , cs_overdensity { p.template get("setup.cs_overdensity") } + , cs_x { INV_2 * + (m.mesh().extent(in::x1).second + m.mesh().extent(in::x1).first) } + , cs_y { INV_2 * + (m.mesh().extent(in::x2).second + m.mesh().extent(in::x2).first) } + , ymin { m.mesh().extent(in::x2).first } + , ymax { m.mesh().extent(in::x2).second } + , t_open { p.template get( + "setup.t_open", + 1.5 * HALF * + (m.mesh().extent(in::x1).second - m.mesh().extent(in::x1).first)) } + , metadomain { m } + , init_flds { bg_B, bg_Bguide, cs_width, cs_y } {} + + inline PGen() {} + + auto MatchFieldsInX1(simtime_t) const -> BoundaryFieldsInX1 { + return BoundaryFieldsInX1 { bg_B, bg_Bguide, (real_t)0.1, + cs_width, cs_x, cs_y }; + } + + auto MatchFieldsInX2(simtime_t) const -> BoundaryFieldsInX2 { + return BoundaryFieldsInX2 { bg_B, bg_Bguide, cs_width, cs_y }; + } + + inline void InitPrtls(Domain& local_domain) { + // background + const auto energy_dist = arch::Maxwellian(local_domain.mesh.metric, + local_domain.random_pool, + bg_temperature); + const auto injector = arch::UniformInjector( + energy_dist, + { 1, 2 }); + arch::InjectUniform>( + params, + local_domain, + injector, + ONE); + + const auto sigma = params.template get("scales.sigma0"); + const auto c_omp = params.template get("scales.skindepth0"); + const auto cs_drift_beta = math::sqrt(sigma) * c_omp / + (cs_width * cs_overdensity); + const auto cs_drift_gamma = ONE / math::sqrt(ONE - SQR(cs_drift_beta)); + const auto cs_drift_u = cs_drift_beta * cs_drift_gamma; + const auto cs_temperature = HALF * sigma / cs_overdensity; + + // current layer + auto edist_cs = arch::Maxwellian(local_domain.mesh.metric, + local_domain.random_pool, + cs_temperature, + cs_drift_u, + in::x3, + false); + const auto sdist_cs = CurrentLayer(local_domain.mesh.metric, + cs_width, + cs_x, + cs_y); + const auto inj_cs = arch::NonUniformInjector( + edist_cs, + sdist_cs, + { 1, 2 }); + arch::InjectNonUniform(params, + local_domain, + inj_cs, + cs_overdensity); + } + + void CustomPostStep(timestep_t, simtime_t time, Domain& domain) { + // open boundaries if not yet opened at time = t_open + if ((t_open > 0.0) and (not bc_opened) and (time > t_open)) { + bc_opened = true; + metadomain.setFldsBC(bc_in::Mx1, FldsBC::MATCH); + metadomain.setPrtlBC(bc_in::Mx1, PrtlBC::ABSORB); + metadomain.setFldsBC(bc_in::Px1, FldsBC::MATCH); + metadomain.setPrtlBC(bc_in::Px1, PrtlBC::ABSORB); + } + + const auto energy_dist = arch::Maxwellian(domain.mesh.metric, + domain.random_pool, + bg_temperature); + + const auto dx = domain.mesh.metric.template sqrt_h_<1, 1>({}); + + boundaries_t inj_box_up, inj_box_down; + boundaries_t probe_box_up, probe_box_down; + inj_box_up.push_back(Range::All); + inj_box_down.push_back(Range::All); + probe_box_up.push_back(Range::All); + probe_box_down.push_back(Range::All); + inj_box_up.push_back({ ymax - inj_ypad - 10 * dx, ymax - inj_ypad }); + inj_box_down.push_back({ ymin + inj_ypad, ymin + inj_ypad + 10 * dx }); + probe_box_up.push_back({ ymax - inj_ypad - 10 * dx, ymax - inj_ypad }); + probe_box_down.push_back({ ymin + inj_ypad, ymin + inj_ypad + 10 * dx }); + + if constexpr (M::Dim == Dim::_3D) { + inj_box_up.push_back(Range::All); + inj_box_down.push_back(Range::All); + } + + { + // compute density of species #1 and #2 + const auto use_weights = params.template get( + "particles.use_weights"); + const auto ni2 = domain.mesh.n_active(in::x2); + const auto inv_n0 = ONE / params.template get("scales.n0"); + + auto scatter_buff = Kokkos::Experimental::create_scatter_view( + domain.fields.buff); + Kokkos::deep_copy(domain.fields.buff, ZERO); + for (const auto sp : std::vector { 1, 2 }) { + const auto& prtl_spec = domain.species[sp - 1]; + // clang-format off + Kokkos::parallel_for( + "ComputeMoments", + prtl_spec.rangeActiveParticles(), + kernel::ParticleMoments_kernel({}, scatter_buff, 0u, + prtl_spec.i1, prtl_spec.i2, prtl_spec.i3, + prtl_spec.dx1, prtl_spec.dx2, prtl_spec.dx3, + prtl_spec.ux1, prtl_spec.ux2, prtl_spec.ux3, + prtl_spec.phi, prtl_spec.weight, prtl_spec.tag, + prtl_spec.mass(), prtl_spec.charge(), + use_weights, + domain.mesh.metric, domain.mesh.flds_bc(), + ni2, inv_n0, 0u)); + // clang-format on + } + Kokkos::Experimental::contribute(domain.fields.buff, scatter_buff); + } + + const auto injector_up = arch::KeepConstantInjector( + energy_dist, + { 1, 2 }, + 0u, + probe_box_up); + const auto injector_down = arch::KeepConstantInjector( + energy_dist, + { 1, 2 }, + 0u, + probe_box_down); + + arch::InjectUniform( + params, + domain, + injector_up, + ONE, + params.template get("particles.use_weights"), + inj_box_up); + arch::InjectUniform( + params, + domain, + injector_down, + ONE, + params.template get("particles.use_weights"), + inj_box_down); + } + }; + +} // namespace user + +#endif diff --git a/setups/srpic/reconnection/reconnection.toml b/setups/srpic/reconnection/reconnection.toml new file mode 100644 index 000000000..db3dbde72 --- /dev/null +++ b/setups/srpic/reconnection/reconnection.toml @@ -0,0 +1,53 @@ +[simulation] + name = "reconnection" + engine = "srpic" + runtime = 10.0 + +[grid] + resolution = [1024, 512] + extent = [[-1.0, 1.0], [-0.5, 0.5]] + + [grid.metric] + metric = "minkowski" + + [grid.boundaries] + fields = [["MATCH", "MATCH"], ["MATCH", "MATCH"], ["PERIODIC"]] + particles = [["ABSORB", "ABSORB"], ["ABSORB", "ABSORB"], ["PERIODIC"]] + +[scales] + larmor0 = 2e-4 + skindepth0 = 2e-3 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 2.0 + + [[particles.species]] + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 + + [[particles.species]] + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 + +[setup] + Bmag = 1.0 + width = 0.01 + bg_temp = 1e-4 + overdensity = 3.0 + +[output] + format = "hdf5" + interval_time = 0.1 + + [output.fields] + quantities = ["N_1", "N_2", "E", "B", "J"] diff --git a/setups/srpic/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp index ad260bda0..6bd6f21a9 100644 --- a/setups/srpic/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -14,6 +14,7 @@ #include "archetypes/problem_generator.h" #include "framework/domain/metadomain.h" +#include #include namespace user { @@ -66,6 +67,7 @@ namespace user { const real_t Btheta, Bphi, Vx, Bmag; }; + template struct PGen : public arch::ProblemGenerator { // compatibility traits for the problem generator @@ -80,8 +82,10 @@ namespace user { using arch::ProblemGenerator::C; using arch::ProblemGenerator::params; + // domain properties + const real_t global_xmin, global_xmax; // gas properties - const real_t drift_ux, temperature, filling_fraction; + const real_t drift_ux, temperature, temperature_ratio, filling_fraction; // injector properties const real_t injector_velocity, injection_start, dt; const int injection_frequency; @@ -89,10 +93,13 @@ namespace user { real_t Btheta, Bphi, Bmag; InitFields init_flds; - inline PGen(const SimulationParams& p, const Metadomain& m) + inline PGen(const SimulationParams& p, const Metadomain& global_domain) : arch::ProblemGenerator { p } + , global_xmin { global_domain.mesh().extent(in::x1).first } + , global_xmax { global_domain.mesh().extent(in::x1).second } , drift_ux { p.template get("setup.drift_ux") } , temperature { p.template get("setup.temperature") } + , temperature_ratio { p.template get("setup.temperature_ratio") } , Bmag { p.template get("setup.Bmag", ZERO) } , Btheta { p.template get("setup.Btheta", ZERO) } , Bphi { p.template get("setup.Bphi", ZERO) } @@ -109,39 +116,52 @@ namespace user { return init_flds; } - auto ResetFields(const em& comp) const -> real_t { + auto FixFieldsConst(const bc_in&, const em& comp) const + -> std::pair { if (comp == em::ex1) { - return init_flds.ex1({ ZERO }); + return { init_flds.ex1({ ZERO }), true }; } else if (comp == em::ex2) { - return init_flds.ex2({ ZERO }); + return { ZERO, true }; } else if (comp == em::ex3) { - return init_flds.ex3({ ZERO }); + return { ZERO, true }; } else if (comp == em::bx1) { - return init_flds.bx1({ ZERO }); + return { init_flds.bx1({ ZERO }), true }; } else if (comp == em::bx2) { - return init_flds.bx2({ ZERO }); + return { init_flds.bx2({ ZERO }), true }; } else if (comp == em::bx3) { - return init_flds.bx3({ ZERO }); + return { init_flds.bx3({ ZERO }), true }; } else { raise::Error("Invalid component", HERE); - return ZERO; + return { ZERO, false }; } } inline void InitPrtls(Domain& local_domain) { + /* + * Plasma setup as partially filled box + * + * Plasma setup: + * + * global_xmin global_xmax + * | | + * V V + * |:::::::::::|..........................| + * ^ + * | + * filling_fraction + */ + // minimum and maximum position of particles - real_t xg_min = local_domain.mesh.extent(in::x1).first; - real_t xg_max = local_domain.mesh.extent(in::x1).first + - filling_fraction * (local_domain.mesh.extent(in::x1).second - - local_domain.mesh.extent(in::x1).first); + real_t xg_min = global_xmin; + real_t xg_max = global_xmin + filling_fraction * (global_xmax - global_xmin); // define box to inject into boundaries_t box; // loop over all dimensions - for (unsigned short d { 0 }; d < static_cast(M::Dim); ++d) { + for (auto d { 0u }; d < (unsigned int)M::Dim; ++d) { // compute the range for the x-direction - if (d == static_cast(in::x1)) { + if (d == static_cast(in::x1)) { box.push_back({ xg_min, xg_max }); } else { // inject into full range in other directions @@ -149,26 +169,26 @@ namespace user { } } - // spatial distribution of the particles - // -> hack to use the uniform distribution in NonUniformInjector - const auto spatial_dist = arch::Piston(local_domain.mesh.metric, - xg_min, - xg_max, - in::x1); + // species #1 -> e^- + // species #2 -> protons // energy distribution of the particles - const auto energy_dist = arch::Maxwellian(local_domain.mesh.metric, - local_domain.random_pool, - temperature, - -drift_ux, - in::x1); - - const auto injector = arch::NonUniformInjector( + const auto energy_dist = arch::TwoTemperatureMaxwellian( + local_domain.mesh.metric, + local_domain.random_pool, + { temperature_ratio * temperature * local_domain.species[1].mass() , + temperature }, + { 1, 2 }, + -drift_ux, + in::x1); + + // we want to set up a uniform density distribution + const auto injector = arch::UniformInjector( energy_dist, - spatial_dist, { 1, 2 }); - arch::InjectNonUniform>( + // inject uniformly within the defined box + arch::InjectUniform>( params, local_domain, injector, @@ -177,7 +197,22 @@ namespace user { box); } - void CustomPostStep(std::size_t step, long double time, Domain& domain) { + void CustomPostStep(timestep_t step, simtime_t time, Domain& domain) { + + /* + * Replenish plasma in a moving injector + * + * Injector setup: + * + * global_xmin purge/replenish global_xmax + * | x_init | | + * V v V V + * |:::::::::::;::::::::::|\\\\\\\\|......| + * xmin xmax + * ^ + * | + * moving injector + */ // check if the injector should be active if (step % injection_frequency != 0) { @@ -185,47 +220,40 @@ namespace user { } // initial position of injector - const auto x_init = domain.mesh.extent(in::x1).first + - filling_fraction * (domain.mesh.extent(in::x1).second - - domain.mesh.extent(in::x1).first); - - // check if injector is supposed to start moving already - const auto dt_inj = time - injection_start > ZERO ? time - injection_start - : ZERO; - - // compute the position of the injector - auto xmax = x_init + injector_velocity * (dt_inj + dt); - if (xmax >= domain.mesh.extent(in::x1).second) { - xmax = domain.mesh.extent(in::x1).second; + const auto x_init = global_xmin + + filling_fraction * (global_xmax - global_xmin); + + // compute the position of the injector after the current timestep + auto xmax = x_init + injector_velocity * + (std::max(time - injection_start, ZERO) + dt); + if (xmax >= global_xmax) { + xmax = global_xmax; } - // define box to inject into - boundaries_t box; - // loop over all dimension - for (auto d = 0u; d < M::Dim; ++d) { - if (d == 0) { - box.push_back({ xmax - drift_ux / math::sqrt(1 + SQR(drift_ux)) * dt - - injection_frequency * dt, - xmax }); - } else { - box.push_back(Range::All); - } + // compute the beginning of the injected region + auto xmin = xmax - injection_frequency * dt; + if (xmin <= global_xmin) { + xmin = global_xmin; } // define indice range to reset fields boundaries_t incl_ghosts; for (auto d = 0; d < M::Dim; ++d) { - incl_ghosts.push_back({ true, true }); + incl_ghosts.push_back({ false, false }); } - auto fields_box = box; - // check if the box is still inside the domain - if (xmax + injection_frequency * dt < domain.mesh.extent(in::x1).second) { - fields_box[0].second += injection_frequency * dt; - } else { - // if right side of the box is outside of the domain -> truncate box - fields_box[0].second = domain.mesh.extent(in::x1).second; + + // define box to reset fields + boundaries_t purge_box; + // loop over all dimension + for (auto d = 0u; d < M::Dim; ++d) { + if (d == 0) { + purge_box.push_back({ xmin, global_xmax }); + } else { + purge_box.push_back(Range::All); + } } - const auto extent = domain.mesh.ExtentToRange(fields_box, incl_ghosts); + + const auto extent = domain.mesh.ExtentToRange(purge_box, incl_ghosts); tuple_t x_min { 0 }, x_max { 0 }; for (auto d = 0; d < M::Dim; ++d) { x_min[d] = extent[d].first; @@ -239,20 +267,19 @@ namespace user { init_flds, domain.mesh.metric }); - /* tag particles inside the injection zone as dead */ + const auto& mesh = domain.mesh; // loop over particle species - for (std::size_t s { 0 }; s < 2; ++s) { - + for (auto s { 0u }; s < 2; ++s) { // get particle properties auto& species = domain.species[s]; auto i1 = species.i1; + auto dx1 = species.dx1; auto tag = species.tag; - // tag all particles with x > box[0].first as dead Kokkos::parallel_for( "RemoveParticles", species.rangeActiveParticles(), @@ -261,74 +288,56 @@ namespace user { if (tag(p) == ParticleTag::dead) { return; } - // select the x-coordinate index - auto x_i1 = i1(p); - // check if the particle is inside the box of new plasma - if (x_i1 >= x_min[0]) { + const auto x_Cd = static_cast(i1(p)) + + static_cast(dx1(p)); + const auto x_Ph = mesh.metric.template convert<1, Crd::Cd, Crd::XYZ>( + x_Cd); + + if (x_Ph > xmin) { tag(p) = ParticleTag::dead; } }); } /* - Inject piston of fresh plasma + Inject slab of fresh plasma */ + // define box to inject into + boundaries_t inj_box; + // loop over all dimension + for (auto d = 0u; d < M::Dim; ++d) { + if (d == 0) { + inj_box.push_back({ xmin, xmax }); + } else { + inj_box.push_back(Range::All); + } + } + // same maxwell distribution as above - const auto energy_dist = arch::Maxwellian(domain.mesh.metric, - domain.random_pool, - temperature, - -drift_ux, - in::x1); - // spatial distribution of the particles - // -> hack to use the uniform distribution in NonUniformInjector - const auto spatial_dist = arch::Piston(domain.mesh.metric, - box[0].first, - box[0].second, - in::x1); - - // inject piston of fresh plasma - const auto injector = arch::NonUniformInjector( + const auto energy_dist = arch::TwoTemperatureMaxwellian( + domain.mesh.metric, + domain.random_pool, + { temperature_ratio * temperature * domain.species[1].mass(), + temperature }, + { 1, 2 }, + -drift_ux, + in::x1); + + // we want to set up a uniform density distribution + const auto injector = arch::UniformInjector( energy_dist, - spatial_dist, { 1, 2 }); - // inject non-uniformly within the defined box - arch::InjectNonUniform(params, - domain, - injector, - ONE, - false, - box); - - /* - I thought this option would be better, but I can't get it to work - */ - - // const auto spatial_dist = arch::Replenish(domain.mesh.metric, - // domain.fields.bckp, - // box, - // TargetDensity, - // 1.0); - - // const auto injector = arch::NonUniformInjector( - // energy_dist, - // spatial_dist, - // {1, 2}); - - // const auto injector = arch::MovingInjector { - // domain.mesh.metric, - // domain.fields.bckp, - // energy_dist, - // box[0].first, - // box[0].second, - // 1.0, - // { 1, 2 } - // }; + // inject uniformly within the defined box + arch::InjectUniform>( + params, + domain, + injector, + 1.0, // target density + false, // no weights + inj_box); } }; - } // namespace user - #endif diff --git a/setups/srpic/shock/shock.toml b/setups/srpic/shock/shock.toml index ca19a4078..90678488a 100644 --- a/setups/srpic/shock/shock.toml +++ b/setups/srpic/shock/shock.toml @@ -1,11 +1,14 @@ [simulation] - name = "shock" + name = "shock_perp" engine = "srpic" - runtime = 30000.0 + runtime = 50.0 + + [simulation.domain] + decomposition = [1,-1] [grid] resolution = [4096, 128] - extent = [[0.0, 2000.0], [-31.25, 31.25]] + extent = [[0.0, 4.096], [-0.064, 0.064]] [grid.metric] metric = "minkowski" @@ -13,10 +16,11 @@ [grid.boundaries] fields = [["CONDUCTOR", "MATCH"], ["PERIODIC"]] particles = [["REFLECT", "ABSORB"], ["PERIODIC"]] + [scales] - larmor0 = 100.0 - skindepth0 = 1.0 + larmor0 = 0.057735 + skindepth0 = 0.01 [algorithms] current_filters = 8 @@ -31,26 +35,28 @@ label = "e-" mass = 1.0 charge = -1.0 - maxnpart = 1e8 + maxnpart = 8e7 [[particles.species]] label = "p+" - mass = 1.0 + mass = 100.0 charge = 1.0 - maxnpart = 1e8 + maxnpart = 8e7 [setup] - drift_ux = 0.1 # speed towards the wall [c] - temperature = 1e-4 # temeperature of maxwell distribution [m_e c^2] - Bmag = 1.0 # magnetic field strength as fraction of magnetisation - Btheta = 0.0 # magnetic field angle in the plane - Bphi = 0.0 # magnetic field angle out of plane - filling_fraction = 0.1 # fraction of the shock piston filled with plasma - injector_velocity = 1.0 # speed of injector [c] - injection_start = 0.0 # start time of moving injector + drift_ux = 0.15 # speed towards the wall [c] + temperature = 0.001683 # temperature of maxwell distribution [kB T / (m_i c^2)] + temperature_ratio = 1.0 # temperature ratio of electrons to protons + Bmag = 1.0 # magnetic field strength as fraction of magnetisation + Btheta = 63.0 # magnetic field angle in the plane + Bphi = 0.0 # magnetic field angle out of plane + filling_fraction = 0.1 # fraction of the shock piston filled with plasma + injector_velocity = 0.2 # speed of injector [c] + injection_start = 0.0 # start time of moving injector + injection_frequency = 100 # inject particles every 100 timesteps [output] - interval_time = 10.0 + interval_time = 0.1 format = "hdf5" [output.fields] @@ -62,4 +68,3 @@ [output.spectra] enable = false - diff --git a/setups/srpic/streaming/pgen.hpp b/setups/srpic/streaming/pgen.hpp new file mode 100644 index 000000000..ee14712de --- /dev/null +++ b/setups/srpic/streaming/pgen.hpp @@ -0,0 +1,112 @@ +#ifndef PROBLEM_GENERATOR_H +#define PROBLEM_GENERATOR_H + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "arch/traits.h" +#include "utils/error.h" +#include "utils/numeric.h" + +#include "archetypes/energy_dist.h" +#include "archetypes/particle_injector.h" +#include "archetypes/problem_generator.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" + +namespace user { + using namespace ntt; + + template + struct PGen : public arch::ProblemGenerator { + + // compatibility traits for the problem generator + static constexpr auto engines = traits::compatible_with::value; + static constexpr auto metrics = traits::compatible_with::value; + static constexpr auto dimensions = + traits::compatible_with::value; + + // for easy access to variables in the child class + using arch::ProblemGenerator::D; + using arch::ProblemGenerator::C; + using arch::ProblemGenerator::params; + + using prmvec_t = std::vector; + + prmvec_t drifts_in_x, drifts_in_y, drifts_in_z; + prmvec_t densities, temperatures; + + inline PGen(const SimulationParams& p, const Metadomain& global_domain) + : arch::ProblemGenerator { p } + , drifts_in_x { p.template get("setup.drifts_in_x", prmvec_t {}) } + , drifts_in_y { p.template get("setup.drifts_in_y", prmvec_t {}) } + , drifts_in_z { p.template get("setup.drifts_in_z", prmvec_t {}) } + , densities { p.template get("setup.densities", prmvec_t {}) } + , temperatures { p.template get("setup.temperatures", prmvec_t {}) } { + const auto nspec = p.template get("particles.nspec"); + raise::ErrorIf(nspec % 2 != 0, + "Number of species must be even for this setup", + HERE); + for (auto n = 0u; n < nspec; n += 2) { + raise::ErrorIf( + global_domain.species_params()[n].charge() != + -global_domain.species_params()[n + 1].charge(), + "Charges of i-th and i+1-th species must be opposite for this setup", + HERE); + } + for (auto* specs : + { &drifts_in_x, &drifts_in_y, &drifts_in_z, &temperatures }) { + if (specs->empty()) { + for (auto n = 0u; n < nspec; ++n) { + specs->push_back(ZERO); + } + } + raise::ErrorIf(specs->size() != nspec, + "Drift vector and/or temperature vector length does " + "not match number of species", + HERE); + } + if (densities.empty()) { + for (auto n = 0u; n < nspec; n += 2) { + densities.push_back(TWO / static_cast(nspec)); + } + } + raise::ErrorIf(densities.size() != nspec / 2, + "Density vector length must be half of the number of " + "species (per each pair of species)", + HERE); + } + + inline void InitPrtls(Domain& domain) { + const auto nspec = domain.species.size(); + for (auto n = 0u; n < nspec; n += 2) { + const auto drift_1 = prmvec_t { drifts_in_x[n], + drifts_in_y[n], + drifts_in_z[n] }; + const auto drift_2 = prmvec_t { drifts_in_x[n + 1], + drifts_in_y[n + 1], + drifts_in_z[n + 1] }; + const auto injector = arch::experimental:: + UniformInjector( + arch::experimental::Maxwellian(domain.mesh.metric, + domain.random_pool, + temperatures[n], + drift_1), + arch::experimental::Maxwellian(domain.mesh.metric, + domain.random_pool, + temperatures[n + 1], + drift_2), + { n + 1, n + 2 }); + arch::experimental::InjectUniform( + params, + domain, + injector, + densities[n / 2]); + } + } + }; + +} // namespace user + +#endif diff --git a/setups/srpic/streaming/twostream.toml b/setups/srpic/streaming/twostream.toml new file mode 100644 index 000000000..1b2334777 --- /dev/null +++ b/setups/srpic/streaming/twostream.toml @@ -0,0 +1,83 @@ +[simulation] + name = "twostream" + engine = "srpic" + runtime = 1000.0 + +[grid] + resolution = [12288] + extent = [[0.0, 100.0]] + + [grid.metric] + metric = "minkowski" + + [grid.boundaries] + fields = [["PERIODIC"]] + particles = [["PERIODIC"]] + +[scales] + larmor0 = 100.0 + skindepth0 = 10.0 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 16.0 + + [[particles.species]] + label = "e-Px" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 + + [[particles.species]] + label = "e+bg1" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 + pusher = "None" + + [[particles.species]] + label = "e-Mx" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 + + [[particles.species]] + label = "e+bg2" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 + pusher = "None" + +[setup] + # Drift 4-velocities for each species in all 3 directions + # @type: array of floats (length = nspec) + # @default: [ 0.0, ... ] + drifts_in_x = [0.1, 0.0, -0.1, 0.0] + drifts_in_y = [0.0, 0.0, 0.0, 0.0] + drifts_in_z = [0.0, 0.0, 0.0, 0.0] + # Pair-wise species densities in units of n0 + # @type: array of floats (length = nspec/2) + # @default: [ 2 / nspec, ... ] + densities = [0.5, 0.5] + # Species temperatures in units of m0 (c^2) + # @type: array of floats (length = nspec) + # @default: [ 0.0, ... ] + temperatures = [1e-4, 1e-4, 1e-4, 1e-4] + +[output] + interval_time = 2.0 + + [output.fields] + quantities = ["N_1", "N_3", "E", "B", "J", "T0i_1", "T0i_3"] + + [output.particles] + species = [1, 3] + stride = 10 + + [output.spectra] + enable = false diff --git a/setups/srpic/weibel/weibel.toml b/setups/srpic/streaming/weibel.toml similarity index 57% rename from setups/srpic/weibel/weibel.toml rename to setups/srpic/streaming/weibel.toml index 23d119b24..0d1f15bca 100644 --- a/setups/srpic/weibel/weibel.toml +++ b/setups/srpic/streaming/weibel.toml @@ -4,7 +4,7 @@ runtime = 100.0 [grid] - resolution = [512, 512] + resolution = [1024, 1024] extent = [[-10.0, 10.0], [-10.0, 10.0]] [grid.metric] @@ -52,23 +52,38 @@ maxnpart = 1e7 [setup] - drift_u_1 = 0.2 - drift_u_2 = 0.2 - temp_1 = 1e-4 - temp_2 = 1e-4 + # Drift 4-velocities for each species in all 3 directions + # @type: array of floats (length = nspec) + # @default: [ 0.0, ... ] + drifts_in_x = [0.0, 0.0, 0.0, 0.0] + drifts_in_y = [0.0, 0.0, 0.0, 0.0] + drifts_in_z = [0.3, 0.3, -0.3, -0.3] + # Pair-wise species densities in units of n0 + # @type: array of floats (length = nspec/2) + # @default: [ 2 / nspec, ... ] + densities = [0.5, 0.5] + # Species temperatures in units of m0 (c^2) + # @type: array of floats (length = nspec) + # @default: [ 0.0, ... ] + temperatures = [1e-4, 1e-4, 1e-4, 1e-4] [output] - format = "hdf5" interval_time = 0.25 [output.fields] - quantities = ["N_1_2", "N_3_4", "B", "E", "T0i_1", "T0i_3"] + quantities = [ + "N_1_2", + "N_3_4", + "E", + "B", + "T0i_1", + "T0i_2", + "T0i_3", + "T0i_4", + ] [output.particles] enable = false [output.spectra] enable = false - -[diagnostics] - colored_stdout = true diff --git a/setups/srpic/turbulence/pgen.hpp b/setups/srpic/turbulence/pgen.hpp index bbd61cc3a..4725d67db 100644 --- a/setups/srpic/turbulence/pgen.hpp +++ b/setups/srpic/turbulence/pgen.hpp @@ -5,124 +5,258 @@ #include "global.h" #include "arch/kokkos_aliases.h" -#include "arch/traits.h" +#include "utils/error.h" #include "utils/numeric.h" #include "archetypes/energy_dist.h" #include "archetypes/particle_injector.h" #include "archetypes/problem_generator.h" +#include "framework/domain/domain.h" #include "framework/domain/metadomain.h" -#include -#include - -enum { - REAL = 0, - IMAG = 1 -}; +#if defined(MPI_ENABLED) + #include +#endif // MPI_ENABLED namespace user { using namespace ntt; + // initializing guide field and curl(B) = J_ext at the initial time step template - struct ExtForce { - ExtForce(array_t amplitudes, real_t SX1, real_t SX2, real_t SX3) - : amps { amplitudes } - , sx1 { SX1 } - , sx2 { SX2 } - , sx3 { SX3 } {} - - const std::vector species { 1, 2 }; - - ExtForce() = default; - - Inline auto fx1(const unsigned short&, - const real_t&, - const coord_t& x_Ph) const -> real_t { - real_t k01 = ONE * constant::TWO_PI / sx1; - real_t k02 = ZERO * constant::TWO_PI / sx2; - real_t k03 = ZERO * constant::TWO_PI / sx3; - real_t k04 = ONE; - real_t k11 = ZERO * constant::TWO_PI / sx1; - real_t k12 = ONE * constant::TWO_PI / sx2; - real_t k13 = ZERO * constant::TWO_PI / sx3; - real_t k14 = ONE; - real_t k21 = ZERO * constant::TWO_PI / sx1; - real_t k22 = ZERO * constant::TWO_PI / sx2; - real_t k23 = ONE * constant::TWO_PI / sx3; - real_t k24 = ONE; - - // return 0.1 * cos(2.0 * constant::TWO_PI * x_Ph[1]); - - return (k14 * amps(0, REAL) * - math::cos(k11 * x_Ph[0] + k12 * x_Ph[1] + k13 * x_Ph[2]) + - k14 * amps(0, IMAG) * - math::sin(k11 * x_Ph[0] + k12 * x_Ph[1] + k13 * x_Ph[2])) + - (k24 * amps(1, REAL) * - math::cos(k21 * x_Ph[0] + k22 * x_Ph[1] + k23 * x_Ph[2]) + - k24 * amps(1, IMAG) * - math::sin(k21 * x_Ph[0] + k22 * x_Ph[1] + k23 * x_Ph[2])); + struct InitFields { + InitFields(array_t& k, + array_t& a_real, + array_t& a_imag, + array_t& a_real_inv, + array_t& a_imag_inv) + : k { k } + , a_real { a_real } + , a_imag { a_imag } + , a_real_inv { a_real_inv } + , a_imag_inv { a_imag_inv } + , n_modes { a_real.size() } {}; + + Inline auto bx1(const coord_t& x_Ph) const -> real_t { + auto bx1_0 = ZERO; + for (auto i = 0; i < n_modes; i++) { + auto k_dot_r = k(0, i) * x_Ph[0] + k(1, i) * x_Ph[1]; + bx1_0 -= TWO * k(1, i) * + (a_real(i) * math::sin(k_dot_r) + a_imag(i) * math::cos(k_dot_r)); + bx1_0 -= TWO * k(1, i) * + (a_real_inv(i) * math::sin(k_dot_r) + + a_imag_inv(i) * math::cos(k_dot_r)); + } + return bx1_0; + } + + Inline auto bx2(const coord_t& x_Ph) const -> real_t { + auto bx2_0 = ZERO; + for (auto i = 0; i < n_modes; i++) { + auto k_dot_r = k(0, i) * x_Ph[0] + k(1, i) * x_Ph[1]; + bx2_0 += TWO * k(0, i) * + (a_real(i) * math::sin(k_dot_r) + a_imag(i) * math::cos(k_dot_r)); + bx2_0 += TWO * k(0, i) * + (a_real_inv(i) * math::sin(k_dot_r) + + a_imag_inv(i) * math::cos(k_dot_r)); + } + return bx2_0; + } + + Inline auto bx3(const coord_t&) const -> real_t { + return ONE; + } + + array_t k; + array_t a_real; + array_t a_imag; + array_t a_real_inv; + array_t a_imag_inv; + std::size_t n_modes; + }; + + inline auto init_pool(int seed) -> unsigned int { + if (seed < 0) { + unsigned int new_seed = static_cast(rand()); +#if defined(MPI_ENABLED) + MPI_Bcast(&new_seed, 1, MPI_UNSIGNED, MPI_ROOT_RANK, MPI_COMM_WORLD); +#endif // MPI_ENABLED + return new_seed; + } else { + return static_cast(seed); } + } - Inline auto fx2(const unsigned short&, - const real_t&, - const coord_t& x_Ph) const -> real_t { - real_t k01 = ONE * constant::TWO_PI / sx1; - real_t k02 = ZERO * constant::TWO_PI / sx2; - real_t k03 = ZERO * constant::TWO_PI / sx3; - real_t k04 = ONE; - real_t k11 = ZERO * constant::TWO_PI / sx1; - real_t k12 = ONE * constant::TWO_PI / sx2; - real_t k13 = ZERO * constant::TWO_PI / sx3; - real_t k14 = ONE; - real_t k21 = ZERO * constant::TWO_PI / sx1; - real_t k22 = ZERO * constant::TWO_PI / sx2; - real_t k23 = ONE * constant::TWO_PI / sx3; - real_t k24 = ONE; - return (k04 * amps(2, REAL) * - math::cos(k01 * x_Ph[0] + k02 * x_Ph[1] + k03 * x_Ph[2]) + - k04 * amps(2, IMAG) * - math::sin(k01 * x_Ph[0] + k02 * x_Ph[1] + k03 * x_Ph[2])) + - (k24 * amps(3, REAL) * - math::cos(k21 * x_Ph[0] + k22 * x_Ph[1] + k23 * x_Ph[2]) + - k24 * amps(3, IMAG) * - math::sin(k21 * x_Ph[0] + k22 * x_Ph[1] + k23 * x_Ph[2])); - // return ZERO; + template + inline auto init_wavenumbers() -> std::vector> { + if constexpr (D == Dim::_2D) { + return { + { 1, 0 }, + { 0, 1 }, + { 1, 1 }, + { -1, 1 } + }; + } else if constexpr (D == Dim::_3D) { + return { + { 1, 0, 1 }, + { 0, 1, 1 }, + { -1, 0, 1 }, + { 0, -1, 1 } + }; + } else { + raise::Error("Invalid dimension", HERE); + return {}; } + } + + // external current definition + template + struct ExternalCurrent { + ExternalCurrent(real_t dB, + real_t om0, + real_t g0, + std::vector>& wavenumbers, + random_number_pool_t& random_pool, + real_t Lx, + real_t Ly, + real_t Lz) + : wavenumbers { wavenumbers } + , n_modes { wavenumbers.size() } + , dB { dB } + , Lx { Lx } + , Ly { Ly } + , Lz { Lz } + , omega_0 { om0 } + , gamma_0 { g0 } + , k { "wavevector", D, n_modes } + , a_real { "a_real", n_modes } + , a_imag { "a_imag", n_modes } + , a_real_inv { "a_real_inv", n_modes } + , a_imag_inv { "a_imag_inv", n_modes } + , A0 { "A0", n_modes } { + // initializing wavevectors + auto k_host = Kokkos::create_mirror_view(k); + if constexpr (D == Dim::_2D) { + for (auto i = 0u; i < n_modes; i++) { + k_host(0, i) = constant::TWO_PI * wavenumbers[i][0] / Lx; + k_host(1, i) = constant::TWO_PI * wavenumbers[i][1] / Ly; + } + } + if constexpr (D == Dim::_3D) { + for (auto i = 0u; i < n_modes; i++) { + k_host(0, i) = constant::TWO_PI * wavenumbers[i][0] / Lx; + k_host(1, i) = constant::TWO_PI * wavenumbers[i][1] / Ly; + k_host(2, i) = constant::TWO_PI * wavenumbers[i][2] / Lz; + } + } + // initializing initial complex amplitudes + auto a_real_host = Kokkos::create_mirror_view(a_real); + auto a_imag_host = Kokkos::create_mirror_view(a_imag); + auto a_real_inv_host = Kokkos::create_mirror_view(a_real_inv); + auto a_imag_inv_host = Kokkos::create_mirror_view(a_imag_inv); + auto A0_host = Kokkos::create_mirror_view(A0); + + real_t prefac { ZERO }; + if constexpr (D == Dim::_2D) { + prefac = HALF; // HALF = 1/sqrt(twice modes due to reality condition * twice the frequencies due to sign change) + } else if constexpr (D == Dim::_3D) { + prefac = constant::SQRT2; // 1/sqrt(2) = 1/sqrt(twice modes due to reality condition) + } + for (auto i = 0u; i < n_modes; i++) { + auto k_perp = math::sqrt( + k_host(0, i) * k_host(0, i) + k_host(1, i) * k_host(1, i)); + auto phase = constant::TWO_PI / 6.; + A0_host(i) = dB / math::sqrt((real_t)n_modes) / k_perp * prefac; + a_real_host(i) = A0_host(i) * math::cos(phase); + a_imag_host(i) = A0_host(i) * math::sin(phase); + phase = constant::TWO_PI / 3; + a_imag_inv_host(i) = A0_host(i) * math::cos(phase); + a_real_inv_host(i) = A0_host(i) * math::sin(phase); + } - Inline auto fx3(const unsigned short&, - const real_t&, - const coord_t& x_Ph) const -> real_t { - real_t k01 = ONE * constant::TWO_PI / sx1; - real_t k02 = ZERO * constant::TWO_PI / sx2; - real_t k03 = ZERO * constant::TWO_PI / sx3; - real_t k04 = ONE; - real_t k11 = ZERO * constant::TWO_PI / sx1; - real_t k12 = ONE * constant::TWO_PI / sx2; - real_t k13 = ZERO * constant::TWO_PI / sx3; - real_t k14 = ONE; - real_t k21 = ZERO * constant::TWO_PI / sx1; - real_t k22 = ZERO * constant::TWO_PI / sx2; - real_t k23 = ONE * constant::TWO_PI / sx3; - real_t k24 = ONE; - return (k04 * amps(4, REAL) * - math::cos(k01 * x_Ph[0] + k02 * x_Ph[1] + k03 * x_Ph[2]) + - k04 * amps(4, IMAG) * - math::sin(k01 * x_Ph[0] + k02 * x_Ph[1] + k03 * x_Ph[2])) + - (k14 * amps(5, REAL) * - math::cos(k11 * x_Ph[0] + k12 * x_Ph[1] + k13 * x_Ph[2]) + - k14 * amps(5, IMAG) * - math::sin(k11 * x_Ph[0] + k12 * x_Ph[1] + k13 * x_Ph[2])); - // return ZERO; + Kokkos::deep_copy(a_real, a_real_host); + Kokkos::deep_copy(a_imag, a_imag_host); + Kokkos::deep_copy(a_real_inv, a_real_inv_host); + Kokkos::deep_copy(a_imag_inv, a_imag_inv_host); + Kokkos::deep_copy(A0, A0_host); + Kokkos::deep_copy(k, k_host); + }; + + Inline auto jx1(const coord_t& x_Ph) const -> real_t { + if constexpr (D == Dim::_2D) { + return ZERO; + } + if constexpr (D == Dim::_3D) { + real_t jx1_ant = ZERO; + for (auto i = 0u; i < n_modes; i++) { + auto k_dot_r = k(0, i) * x_Ph[0] + k(1, i) * x_Ph[1] + k(2, i) * x_Ph[2]; + jx1_ant -= TWO * k(0, i) * k(2, i) * + (a_real_inv(i) * math::cos(k_dot_r) - + a_imag_inv(i) * math::sin(k_dot_r)); + } + return jx1_ant; + } + } + + Inline auto jx2(const coord_t& x_Ph) const -> real_t { + if constexpr (D == Dim::_2D) { + return ZERO; + } else if constexpr (D == Dim::_3D) { + real_t jx2_ant = ZERO; + for (auto i = 0u; i < n_modes; i++) { + auto k_dot_r = k(0, i) * x_Ph[0] + k(1, i) * x_Ph[1] + k(2, i) * x_Ph[2]; + jx2_ant -= TWO * k(1, i) * k(2, i) * + (a_real_inv(i) * math::cos(k_dot_r) - + a_imag_inv(i) * math::sin(k_dot_r)); + } + return jx2_ant; + } + } + + Inline auto jx3(const coord_t& x_Ph) const -> real_t { + if constexpr (D == Dim::_2D) { + real_t jx3_ant = ZERO; + for (auto i = 0u; i < n_modes; i++) { + auto k_perp_sq = k(0, i) * k(0, i) + k(1, i) * k(1, i); + auto k_dot_r = k(0, i) * x_Ph[0] + k(1, i) * x_Ph[1]; + jx3_ant += TWO * k_perp_sq * + (a_real(i) * math::cos(k_dot_r) - + a_imag(i) * math::sin(k_dot_r)); + jx3_ant += TWO * k_perp_sq * + (a_real_inv(i) * math::cos(k_dot_r) - + a_imag_inv(i) * math::sin(k_dot_r)); + } + return jx3_ant; + } else if constexpr (D == Dim::_3D) { + real_t jx3_ant = ZERO; + for (auto i = 0u; i < n_modes; i++) { + auto k_perp_sq = k(0, i) * k(0, i) + k(1, i) * k(1, i); + auto k_dot_r = k(0, i) * x_Ph[0] + k(1, i) * x_Ph[1] + k(2, i) * x_Ph[2]; + jx3_ant += TWO * k_perp_sq * + (a_real_inv(i) * math::cos(k_dot_r) - + a_imag_inv(i) * math::sin(k_dot_r)); + } + return jx3_ant; + } } private: - array_t amps; - const real_t sx1, sx2, sx3; + const std::vector> wavenumbers; + const std::size_t n_modes; + const real_t dB, Lx, Ly, Lz; + + public: + const real_t omega_0, gamma_0; + array_t k; + array_t a_real; + array_t a_imag; + array_t a_real_inv; + array_t a_imag_inv; + array_t A0; }; template struct PGen : public arch::ProblemGenerator { + // compatibility traits for the problem generator static constexpr auto engines = traits::compatible_with::value; static constexpr auto metrics = traits::compatible_with::value; @@ -133,214 +267,145 @@ namespace user { using arch::ProblemGenerator::C; using arch::ProblemGenerator::params; - const real_t SX1, SX2, SX3; - const real_t temperature, machno; - const unsigned int nmodes; - const real_t amp0, phi0; - array_t amplitudes; - ExtForce ext_force; - const real_t dt; - - inline PGen(const SimulationParams& params, const Metadomain& global_domain) - : arch::ProblemGenerator { params } - , SX1 { global_domain.mesh().extent(in::x1).second - - global_domain.mesh().extent(in::x1).first } - , SX2 { global_domain.mesh().extent(in::x2).second - - global_domain.mesh().extent(in::x2).first } - , SX3 { global_domain.mesh().extent(in::x3).second - - global_domain.mesh().extent(in::x3).first } - // , SX1 { 2.0 } - // , SX2 { 2.0 } - // , SX3 { 2.0 } - , temperature { params.template get("problem.temperature", 0.1) } - , machno { params.template get("problem.machno", 0.1) } - , nmodes { params.template get("setup.nmodes", 6) } - , amp0 { machno * temperature / static_cast(nmodes) } - , phi0 { INV_4 } // !TODO: randomize - , amplitudes { "DrivingModes", nmodes } - , ext_force { amplitudes, SX1, SX2, SX3 } - , dt { params.template get("algorithms.timestep.dt") } { - Init(); - } - - void Init() { - // initializing amplitudes - auto amplitudes_ = amplitudes; - const auto amp0_ = amp0; - const auto phi0_ = phi0; - Kokkos::parallel_for( - "RandomAmplitudes", - amplitudes.extent(0), - Lambda(index_t i) { - amplitudes_(i, REAL) = amp0_ * math::cos(phi0_); - amplitudes_(i, IMAG) = amp0_ * math::sin(phi0_); - }); - } + const real_t temperature, dB, omega_0, gamma_0; + const real_t Lx, Ly, Lz, escape_dist; + const int random_seed; + std::vector> wavenumbers; + random_number_pool_t random_pool; + + // debugging, will delete later + real_t total_sum = ZERO; + real_t total_sum_inv = ZERO; + real_t number_of_timesteps = ZERO; + + ExternalCurrent ext_current; + InitFields init_flds; + + inline PGen(const SimulationParams& p, const Metadomain& global_domain) + : arch::ProblemGenerator { p } + , temperature { p.template get("setup.temperature") } + , dB { p.template get("setup.dB", ONE) } + , omega_0 { p.template get("setup.omega_0") } + , gamma_0 { p.template get("setup.gamma_0") } + , wavenumbers { init_wavenumbers() } + , random_seed { p.template get("setup.seed", -1) } + , random_pool { init_pool(random_seed) } + , Lx { global_domain.mesh().extent(in::x1).second - + global_domain.mesh().extent(in::x1).first } + , Ly { global_domain.mesh().extent(in::x2).second - + global_domain.mesh().extent(in::x2).first } + , Lz { global_domain.mesh().extent(in::x3).second - + global_domain.mesh().extent(in::x3).first } + , escape_dist { p.template get("setup.escape_dist", HALF * Lx) } + , ext_current { dB, omega_0, gamma_0, wavenumbers, random_pool, Lx, Ly, Lz } + , init_flds { ext_current.k, + ext_current.a_real, + ext_current.a_imag, + ext_current.a_real_inv, + ext_current.a_imag_inv } {} inline void InitPrtls(Domain& local_domain) { - { - const auto energy_dist = arch::Maxwellian(local_domain.mesh.metric, - local_domain.random_pool, - temperature); - const auto injector = arch::UniformInjector( - energy_dist, - { 1, 2 }); - const real_t ndens = 0.9; - arch::InjectUniform(params, - local_domain, - injector, - ndens); - } - - { - const auto energy_dist = arch::PowerlawDist(local_domain.mesh.metric, - local_domain.random_pool, - 0.1, 100.0, -3.0); - const auto injector = arch::UniformInjector( - energy_dist, - { 1, 2 }); - const real_t ndens = 0.1; - arch::InjectUniform(params, - local_domain, - injector, - ndens); - } - } - - void CustomPostStep(std::size_t time, long double, Domain& domain) { - auto omega0 = 0.6 * math::sqrt(temperature * machno * constant::TWO_PI / SX1); - auto gamma0 = 0.5 * math::sqrt(temperature * machno * constant::TWO_PI / SX2); - auto sigma0 = amp0 * math::sqrt(static_cast(nmodes) * gamma0); - auto pool = domain.random_pool; + const auto energy_dist = arch::Maxwellian(local_domain.mesh.metric, + local_domain.random_pool, + temperature); + const auto spatial_dist = arch::UniformInjector( + energy_dist, + { 1, 2 }); + arch::InjectUniform>( + params, + local_domain, + spatial_dist, + ONE); + }; + + void CustomPostStep(timestep_t, simtime_t, Domain& domain) { + // update amplitudes of antenna + const auto dt = params.template get("algorithms.timestep.dt"); + const auto& ext_curr = ext_current; Kokkos::parallel_for( - "RandomAmplitudes", - amplitudes.extent(0), + "Antenna amplitudes", + wavenumbers.size(), ClassLambda(index_t i) { - auto rand_gen = pool.get_state(); - const auto unr = Random(rand_gen) - HALF; - const auto uni = Random(rand_gen) - HALF; - pool.free_state(rand_gen); - const auto ampr_prev = amplitudes(i, REAL); - const auto ampi_prev = amplitudes(i, IMAG); - amplitudes(i, REAL) = (ampr_prev * math::cos(omega0 * dt) + - ampi_prev * math::sin(omega0 * dt)) * - math::exp(-gamma0 * dt) + - unr * sigma0; - amplitudes(i, IMAG) = (-ampr_prev * math::sin(omega0 * dt) + - ampi_prev * math::cos(omega0 * dt)) * - math::exp(-gamma0 * dt) + - uni * sigma0; + auto generator = random_pool.get_state(); + const auto u_imag = Random(generator) - HALF; + const auto u_real = Random(generator) - HALF; + const auto u_real_inv = Random(generator) - HALF; + const auto u_imag_inv = Random(generator) - HALF; + random_pool.free_state(generator); + + auto a_real_prev = ext_curr.a_real(i); + auto a_imag_prev = ext_curr.a_imag(i); + auto a_real_inv_prev = ext_curr.a_real_inv(i); + auto a_imag_inv_prev = ext_curr.a_imag_inv(i); + ext_curr.a_real(i) = (a_real_prev * math::cos(ext_curr.omega_0 * dt) + + a_imag_prev * math::sin(ext_curr.omega_0 * dt)) * + math::exp(-ext_curr.gamma_0 * dt) + + ext_curr.A0(i) * + math::sqrt(TWELVE * ext_curr.gamma_0 / dt) * + u_real * dt; + + ext_curr.a_imag(i) = (a_imag_prev * math::cos(ext_curr.omega_0 * dt) - + a_real_prev * math::sin(ext_curr.omega_0 * dt)) * + math::exp(-ext_curr.gamma_0 * dt) + + ext_curr.A0(i) * + math::sqrt(TWELVE * ext_curr.gamma_0 / dt) * + u_imag * dt; + + ext_curr.a_real_inv( + i) = (a_real_inv_prev * math::cos(-ext_curr.omega_0 * dt) + + a_imag_inv_prev * math::sin(-ext_curr.omega_0 * dt)) * + math::exp(-ext_curr.gamma_0 * dt) + + ext_curr.A0(i) * math::sqrt(TWELVE * ext_curr.gamma_0 / dt) * + u_real_inv * dt; + + ext_curr.a_imag_inv( + i) = (a_imag_inv_prev * math::cos(-ext_curr.omega_0 * dt) - + a_real_inv_prev * math::sin(-ext_curr.omega_0 * dt)) * + math::exp(-ext_curr.gamma_0 * dt) + + ext_curr.A0(i) * math::sqrt(TWELVE * ext_curr.gamma_0 / dt) * + u_imag_inv * dt; }); - // auto fext_en_total = ZERO; - // for (auto& species : domain.species) { - // auto pld = species.pld[0]; - // auto weight = species.weight; - // Kokkos::parallel_reduce( - // "ExtForceEnrg", - // species.rangeActiveParticles(), - // ClassLambda(index_t p, real_t & fext_en) { - // fext_en += pld(p) * weight(p); - // }, - // fext_en_total); - // } - - // auto pkin_en_total = ZERO; - // for (auto& species : domain.species) { - // auto ux1 = species.ux1; - // auto ux2 = species.ux2; - // auto ux3 = species.ux3; - // auto weight = species.weight; - // Kokkos::parallel_reduce( - // "KinEnrg", - // species.rangeActiveParticles(), - // ClassLambda(index_t p, real_t & pkin_en) { - // pkin_en += (math::sqrt(ONE + SQR(ux1(p)) + SQR(ux2(p)) + SQR(ux3(p))) - - // ONE) * - // weight(p); - // }, - // pkin_en_total); - // } - // // Weight the macroparticle integral by sim parameters - // pkin_en_total /= params.template get("scales.n0"); - - // std::ofstream myfile; - // if (time == 0) { - // myfile.open("fextenrg.txt"); - // } else { - // myfile.open("fextenrg.txt", std::ios_base::app); - // } - // myfile << fext_en_total << std::endl; - // myfile.close(); - - // if (time == 0) { - // myfile.open("kenrg.txt"); - // } else { - // myfile.open("kenrg.txt", std::ios_base::app); - // } - // myfile << pkin_en_total << std::endl; - // myfile.close(); - - // if constexpr (D == Dim::_3D) { - - // auto metric = domain.mesh.metric; - - // auto benrg_total = ZERO; - // auto EB = domain.fields.em; - // Kokkos::parallel_reduce( - // "BEnrg", - // domain.mesh.rangeActiveCells(), - // Lambda(index_t i1, index_t i2, index_t i3, real_t & benrg) { - // coord_t x_Cd { ZERO }; - // vec_t b_Cntrv { EB(i1, i2, i3, em::bx1), - // EB(i1, i2, i3, em::bx2), - // EB(i1, i2, i3, em::bx3) }; - // vec_t b_XYZ; - // metric.template transform(x_Cd, - // b_Cntrv, - // b_XYZ); - // benrg += (SQR(b_XYZ[0]) + SQR(b_XYZ[1]) + SQR(b_XYZ[2])); - // }, - // benrg_total); - // benrg_total *= params.template get("scales.sigma0") * HALF; - - // if (time == 0) { - // myfile.open("bsqenrg.txt"); - // } else { - // myfile.open("bsqenrg.txt", std::ios_base::app); - // } - // myfile << benrg_total << std::endl; - // myfile.close(); - // auto eenrg_total = ZERO; - // Kokkos::parallel_reduce( - // "BEnrg", - // domain.mesh.rangeActiveCells(), - // Lambda(index_t i1, index_t i2, index_t i3, real_t & eenrg) { - // coord_t x_Cd { ZERO }; - // vec_t e_Cntrv { EB(i1, i2, i3, em::ex1), - // EB(i1, i2, i3, em::ex2), - // EB(i1, i2, i3, em::ex3) }; - // vec_t e_XYZ; - // metric.template transform(x_Cd, - // e_Cntrv, - // e_XYZ); - // eenrg += (SQR(e_XYZ[0]) + SQR(e_XYZ[1]) + SQR(e_XYZ[2])); - // }, - // eenrg_total); - // eenrg_total *= params.template get("scales.sigma0") * HALF; - - - // if (time == 0) { - // myfile.open("esqenrg.txt"); - // } else { - // myfile.open("esqenrg.txt", std::ios_base::app); - // } - // myfile << eenrg_total << std::endl; - // myfile.close(); - // } + // particle escape (resample velocities) + const auto energy_dist = arch::Maxwellian(domain.mesh.metric, + domain.random_pool, + temperature); + for (const auto& sp : { 0, 1 }) { + if (domain.species[sp].npld() > 1) { + const auto& ux1 = domain.species[sp].ux1; + const auto& ux2 = domain.species[sp].ux2; + const auto& ux3 = domain.species[sp].ux3; + const auto& pld = domain.species[sp].pld; + const auto& tag = domain.species[sp].tag; + const auto L = escape_dist; + printf("Entering the escape loop %d, L = %f\n", sp, L); + Kokkos::parallel_for( + "UpdatePld", + domain.species[sp].npart(), + Lambda(index_t p) { + if (tag(p) == ParticleTag::dead) { + return; + } + const auto gamma = math::sqrt( + ONE + ux1(p) * ux1(p) + ux2(p) * ux2(p) + ux3(p) * ux3(p)); + pld(p, 0) += ux1(p) * dt / gamma; + pld(p, 1) += ux2(p) * dt / gamma; + + if (math::abs(pld(p, 0) > L) or (math::abs(pld(p,1)) > L)) { + coord_t x_Ph { ZERO }; + vec_t u_Mxw { ZERO }; + energy_dist(x_Ph, u_Mxw); + ux1(p) = u_Mxw[0]; + ux2(p) = u_Mxw[1]; + ux3(p) = u_Mxw[2]; + pld(p, 0) = ZERO; + pld(p, 1) = ZERO; + } + }); + } + } } }; - } // namespace user -#endif \ No newline at end of file +#endif diff --git a/setups/srpic/turbulence/turbulence.toml b/setups/srpic/turbulence/turbulence.toml index a1f8e29c1..79cc641ef 100644 --- a/setups/srpic/turbulence/turbulence.toml +++ b/setups/srpic/turbulence/turbulence.toml @@ -1,22 +1,22 @@ [simulation] name = "turbulence" engine = "srpic" - runtime = 20.0 + runtime = 1200.0 [grid] - resolution = [184, 184, 184] - extent = [[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]] + resolution = [1024, 1024] + extent = [[-128.0, 128.0], [-128.0, 128.0]] [grid.metric] metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + fields = [["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"]] [scales] - larmor0 = 0.02 - skindepth0 = 0.02 + larmor0 = 1.0 + skindepth0 = 1.0 [algorithms] current_filters = 4 @@ -28,22 +28,38 @@ ppc0 = 32.0 [[particles.species]] - label = "e-" + label = "e-_p" mass = 1.0 charge = -1.0 - maxnpart = 1e8 + maxnpart = 2e7 [[particles.species]] - label = "e+" + label = "e+_p" mass = 1.0 charge = 1.0 - maxnpart = 1e8 + maxnpart = 2e7 [setup] + temperature = 1e0 + dB = 1.0 + omega_0 = 0.0156 + gamma_0 = 0.0078 + [output] format = "hdf5" - interval_time = 0.1 + interval_time = 12.0 [output.fields] - quantities = ["N_1", "N_2", "E", "B", "J", "T00_1", "T00_2"] + quantities = ["N_1_2", "J", "B", "E"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + [output.stats] + enable = false + +[diagnostics] + colored_stdout = true diff --git a/setups/srpic/weibel/pgen.hpp b/setups/srpic/weibel/pgen.hpp deleted file mode 100644 index 21acc8032..000000000 --- a/setups/srpic/weibel/pgen.hpp +++ /dev/null @@ -1,75 +0,0 @@ -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "enums.h" -#include "global.h" - -#include "arch/kokkos_aliases.h" -#include "arch/traits.h" - -#include "archetypes/energy_dist.h" -#include "archetypes/particle_injector.h" -#include "archetypes/problem_generator.h" -#include "framework/domain/domain.h" -#include "framework/domain/metadomain.h" - -namespace user { - using namespace ntt; - - template - struct PGen : public arch::ProblemGenerator { - - // compatibility traits for the problem generator - static constexpr auto engines = traits::compatible_with::value; - static constexpr auto metrics = traits::compatible_with::value; - static constexpr auto dimensions = - traits::compatible_with::value; - - // for easy access to variables in the child class - using arch::ProblemGenerator::D; - using arch::ProblemGenerator::C; - using arch::ProblemGenerator::params; - - const real_t temp_1, temp_2; - const real_t drift_u_1, drift_u_2; - - inline PGen(const SimulationParams& p, const Metadomain& global_domain) - : arch::ProblemGenerator { p } - , temp_1 { p.template get("setup.temp_1") } - , temp_2 { p.template get("setup.temp_2") } - , drift_u_1 { p.template get("setup.drift_u_1") } - , drift_u_2 { p.template get("setup.drift_u_2") } {} - - inline void InitPrtls(Domain& local_domain) { - const auto energy_dist_1 = arch::Maxwellian(local_domain.mesh.metric, - local_domain.random_pool, - temp_1, - -drift_u_1, - in::x3); - const auto energy_dist_2 = arch::Maxwellian(local_domain.mesh.metric, - local_domain.random_pool, - temp_2, - drift_u_2, - in::x3); - const auto injector_1 = arch::UniformInjector( - energy_dist_1, - { 1, 2 }); - const auto injector_2 = arch::UniformInjector( - energy_dist_2, - { 3, 4 }); - arch::InjectUniform>( - params, - local_domain, - injector_1, - HALF); - arch::InjectUniform>( - params, - local_domain, - injector_2, - HALF); - } - }; - -} // namespace user - -#endif diff --git a/setups/tests/blob/pgen.hpp b/setups/tests/blob/pgen.hpp index f7b7d71b5..9120e244f 100644 --- a/setups/tests/blob/pgen.hpp +++ b/setups/tests/blob/pgen.hpp @@ -24,7 +24,7 @@ namespace user { Inline void operator()(const coord_t& x_Ph, vec_t& v, - unsigned short sp) const override { + spidx_t sp) const override { v[0] = v_max; } diff --git a/setups/tests/deposit/deposit-mink.toml b/setups/tests/deposit/deposit-mink.toml new file mode 100644 index 000000000..2dc953896 --- /dev/null +++ b/setups/tests/deposit/deposit-mink.toml @@ -0,0 +1,71 @@ +[simulation] + name = "deposit-test-mink" + engine = "srpic" + runtime = 5.0 + +[grid] + resolution = [32, 32] + extent = [[0.0, 1.0], [0.0, 1.0]] + + [grid.metric] + metric = "minkowski" + + [grid.boundaries] + fields = [["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"]] + +[scales] + larmor0 = 0.1 + skindepth0 = 0.1 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 10.0 + + [[particles.species]] + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e2 + + [[particles.species]] + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e2 + +[setup] + x1_e = [0.25] + x2_e = [0.85] + x3_e = [0.33] + ux1_e = [0.6] + ux2_e = [-0.3] + ux3_e = [-0.2] + + x1_i = [0.25] + x2_i = [0.85] + x3_i = [0.33] + ux1_i = [-0.6] + ux2_i = [0.3] + ux3_i = [0.2] + +[output] + format = "hdf5" + interval = 5 + + [output.fields] + quantities = ["N_1", "N_2", "E", "B", "J"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + +[checkpoint] + keep = 0 diff --git a/setups/tests/deposit/deposit-sr.toml b/setups/tests/deposit/deposit-sr.toml new file mode 100644 index 000000000..0e1648d12 --- /dev/null +++ b/setups/tests/deposit/deposit-sr.toml @@ -0,0 +1,71 @@ +[simulation] + name = "deposit-sr" + engine = "srpic" + runtime = 10.0 + +[grid] + resolution = [64, 64] + extent = [[1.0, 5.0]] + + [grid.metric] + metric = "qspherical" + + [grid.boundaries] + fields = [["FIXED", "FIXED"]] + particles = [["REFLECT", "REFLECT"]] + +[scales] + larmor0 = 0.1 + skindepth0 = 0.1 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 10.0 + + [[particles.species]] + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e2 + + [[particles.species]] + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e2 + +[setup] + x1_e = [2.25] + x2_e = [1.25] + phi_e = [0.0] + ux1_e = [0.6] + ux2_e = [-0.3] + ux3_e = [-0.2] + + x1_i = [2.25] + x2_i = [1.25] + phi_i = [0.0] + ux1_i = [-0.6] + ux2_i = [0.3] + ux3_i = [0.2] + +[output] + format = "hdf5" + interval = 5 + + [output.fields] + quantities = ["N_1", "N_2", "E", "B", "J"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + +[checkpoint] + keep = 0 diff --git a/setups/tests/deposit/deposit.toml b/setups/tests/deposit/deposit.toml deleted file mode 100644 index 04c23ce7d..000000000 --- a/setups/tests/deposit/deposit.toml +++ /dev/null @@ -1,53 +0,0 @@ -[simulation] - name = "deposit-test" - engine = "srpic" - runtime = 1.0 - -[grid] - resolution = [256, 256] - extent = [[0.0, 1.0], [0.0, 1.0]] - - [grid.metric] - metric = "minkowski" - - [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] - particles = [["PERIODIC"], ["PERIODIC"]] - -[scales] - larmor0 = 0.1 - skindepth0 = 0.1 - -[algorithms] - current_filters = 4 - - [algorithms.timestep] - CFL = 0.5 - -[particles] - ppc0 = 10.0 - - [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e2 - - [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e2 - -[setup] - -[output] - format = "hdf5" - interval_time = 0.01 - - [output.quantities] - quantities = ["N_1", "N_2", "E", "B", "J"] - -[diagnostics] - colored_stdout = true - blocking_timers = true diff --git a/setups/tests/deposit/pgen.hpp b/setups/tests/deposit/pgen.hpp index fd9a41c2e..0080af8fd 100644 --- a/setups/tests/deposit/pgen.hpp +++ b/setups/tests/deposit/pgen.hpp @@ -4,14 +4,8 @@ #include "enums.h" #include "global.h" -#include "arch/kokkos_aliases.h" #include "arch/traits.h" -#include "utils/comparators.h" -#include "utils/formatting.h" -#include "utils/log.h" -#include "utils/numeric.h" -#include "archetypes/energy_dist.h" #include "archetypes/particle_injector.h" #include "archetypes/problem_generator.h" #include "framework/domain/domain.h" @@ -26,10 +20,20 @@ namespace user { struct PGen : public arch::ProblemGenerator { // compatibility traits for the problem generator - static constexpr auto engines = traits::compatible_with::value; - static constexpr auto metrics = traits::compatible_with::value; - static constexpr auto dimensions = - traits::compatible_with::value; + static constexpr auto engines { + traits::compatible_with::value + }; + static constexpr auto metrics { + traits::compatible_with::value + }; + static constexpr auto dimensions { + traits::compatible_with::value + }; // for easy access to variables in the child class using arch::ProblemGenerator::D; @@ -44,88 +48,64 @@ namespace user { inline void InitPrtls(Domain& local_domain) { const auto empty = std::vector {}; - const auto x1s = params.template get>("setup.x1s", empty); - const auto y1s = params.template get>("setup.y1s", empty); - const auto z1s = params.template get>("setup.z1s", empty); - const auto ux1s = params.template get>("setup.ux1s", + const auto x1_e = params.template get>("setup.x1_e", empty); - const auto uy1s = params.template get>("setup.uy1s", + const auto x2_e = params.template get>("setup.x2_e", empty); - const auto uz1s = params.template get>("setup.uz1s", + const auto x3_e = params.template get>("setup.x3_e", empty); - - const auto x2s = params.template get>("setup.x2s", empty); - const auto y2s = params.template get>("setup.y2s", empty); - const auto z2s = params.template get>("setup.z2s", empty); - const auto ux2s = params.template get>("setup.ux2s", + const auto phi_e = params.template get>("setup.phi_e", + empty); + const auto ux1_e = params.template get>("setup.ux1_e", + empty); + const auto ux2_e = params.template get>("setup.ux2_e", + empty); + const auto ux3_e = params.template get>("setup.ux3_e", + empty); + + const auto x1_i = params.template get>("setup.x1_i", empty); - const auto uy2s = params.template get>("setup.uy2s", + const auto x2_i = params.template get>("setup.x2_i", empty); - const auto uz2s = params.template get>("setup.uz2s", + const auto x3_i = params.template get>("setup.x3_i", empty); - // std::vector x, y, z, ux_1, uy_1, uz_1, ux_2, uy_2, uz_2; - // x.push_back(0.85); - // x.push_back(0.123); - // if constexpr (D == Dim::_2D || D == Dim::_3D) { - // y.push_back(0.32); - // y.push_back(0.321); - // } - // if constexpr (D == Dim::_3D) { - // z.push_back(0.231); - // z.push_back(0.687); - // } - // ux_1.push_back(1.0); - // uy_1.push_back(-1.0); - // uz_1.push_back(0.0); - // ux_1.push_back(1.0); - // uy_1.push_back(-2.0); - // uz_1.push_back(1.0); - // - // ux_2.push_back(1.0); - // uy_2.push_back(1.0); - // uz_2.push_back(0.0); - // ux_2.push_back(-2.0); - // uy_2.push_back(3.0); - // uz_2.push_back(-1.0); - // - const std::map> data_1 { - { "x1", x1s}, - { "x2", y1s}, - { "x3", z1s}, - {"ux1", ux1s}, - {"ux2", uy1s}, - {"ux3", uz1s} + const auto phi_i = params.template get>("setup.phi_i", + empty); + const auto ux1_i = params.template get>("setup.ux1_i", + empty); + const auto ux2_i = params.template get>("setup.ux2_i", + empty); + const auto ux3_i = params.template get>("setup.ux3_i", + empty); + std::map> data_e { + { "x1", x1_e }, + { "x2", x2_e }, + { "ux1", ux1_e }, + { "ux2", ux2_e }, + { "ux3", ux3_e } }; - const std::map> data_2 { - { "x1", x2s}, - { "x2", y2s}, - { "x3", z2s}, - {"ux1", ux2s}, - {"ux2", uy2s}, - {"ux3", uz2s} + std::map> data_i { + { "x1", x1_i }, + { "x2", x2_i }, + { "ux1", ux1_i }, + { "ux2", ux2_i }, + { "ux3", ux3_i } }; - - arch::InjectGlobally(global_domain, local_domain, (arch::spidx_t)1, data_1); - arch::InjectGlobally(global_domain, local_domain, (arch::spidx_t)2, data_2); + if constexpr (M::CoordType == Coord::Cart or D == Dim::_3D) { + data_e["x3"] = x3_e; + data_i["x3"] = x3_i; + } else if constexpr (D == Dim::_2D) { + data_e["phi"] = phi_e; + data_i["phi"] = phi_i; + } + + arch::InjectGlobally(global_domain, local_domain, (spidx_t)1, data_e); + arch::InjectGlobally(global_domain, local_domain, (spidx_t)2, data_i); } - // void CustomPostStep(std::size_t, long double time, Domain& domain) { - // if (time >= 0.1) { - // for (auto& species : domain.species) { - // auto ux1 = species.ux1; - // auto ux2 = species.ux2; - // auto ux3 = species.ux3; - // Kokkos::parallel_for( - // "Stop", - // species.rangeActiveParticles(), - // Lambda(index_t p) { - // ux1(p) = ZERO; - // ux2(p) = ZERO; - // ux3(p) = ZERO; - // }); - // } - // } - // } + auto FixFieldsConst(const bc_in&, const em&) const -> std::pair { + return { ZERO, false }; + } }; } // namespace user diff --git a/setups/tests/deposit/plot-mink.py b/setups/tests/deposit/plot-mink.py new file mode 100644 index 000000000..9d6760613 --- /dev/null +++ b/setups/tests/deposit/plot-mink.py @@ -0,0 +1,62 @@ +import nt2 +import matplotlib.pyplot as plt +import matplotlib as mpl + +datas = [] +cpus = [1, 2, 3, 4, 5, 6, 8] +for i in cpus: + datas.append(nt2.Data(path=f"mink-np{i}")) + + +def plot(ti): + fig = plt.figure(figsize=(16, 7), dpi=300) + gs = mpl.gridspec.GridSpec(3, 7, figure=fig) + + for p, quant in enumerate(["Jx", "Jy", "Jz"]): + axs = [fig.add_subplot(gs[p, i]) for i in range(7)] + (datas[0].fields[quant]).isel(t=ti).plot( + ax=axs[0], + cmap="seismic", + add_colorbar=False, + norm=mpl.colors.SymLogNorm( + linthresh=1e-5, + linscale=1, + vmin=-1e-2, + vmax=1e-2, + ), + ) + for i, (d, ax) in enumerate(zip(datas[1:], axs[1:])): + (d.fields[quant] - datas[0].fields[quant]).isel(t=ti).plot( + ax=ax, + cmap="seismic", + add_colorbar=False, + norm=mpl.colors.SymLogNorm( + linthresh=1e-10, + linscale=1, + vmin=-1e-7, + vmax=1e-7, + ), + ) + + for i, ax in enumerate(axs): + ax.set_aspect(1) + if i > 0: + if p == 0: + ax.set_title(f"np{cpus[i]} - np1") + else: + ax.set_title(None) + ax.set_yticklabels([]) + ax.set_ylabel(None) + else: + if p == 0: + ax.set_title(f"np1") + else: + ax.set_title(None) + + if p != 2: + ax.set_xticklabels([]) + ax.set_xlabel(None) + + +nt2.export.makeFrames(plot, datas[0].fields.s[::4], "mink-diff", num_cpus=4) +nt2.export.makeMovie(framerate=10, input="mink-diff/", number=5, output="mink-diff.mp4") diff --git a/setups/tests/deposit/plot-sr.py b/setups/tests/deposit/plot-sr.py new file mode 100644 index 000000000..0ba5dff24 --- /dev/null +++ b/setups/tests/deposit/plot-sr.py @@ -0,0 +1,29 @@ +import nt2 +import matplotlib.pyplot as plt +import matplotlib as mpl + +data = nt2.Data(path=f"sr-np8") + + +def plot(ti): + fig = plt.figure(figsize=(9, 6), dpi=300) + gs = mpl.gridspec.GridSpec(1, 3, figure=fig) + axs = [fig.add_subplot(gs[0, i]) for i in range(3)] + for i, (ax, j) in enumerate(zip(axs, ["Jr", "Jth", "Jph"])): + data.fields.isel(t=ti)[j].polar.pcolor( + ax=ax, + cbar_position="top", + cbar_size="2%", + norm=mpl.colors.SymLogNorm(linthresh=1e-8, vmin=-1e-4, vmax=1e-4), + cmap="seismic", + ) + ax.set_title(None) + ax.add_artist(mpl.patches.Circle((0, 0), 1, color="k", alpha=0.2)) + ax.add_artist(mpl.patches.Circle((0, 0), 5, edgecolor="k", facecolor="none")) + if i > 0: + ax.set_yticklabels([]) + ax.set_ylabel(None) + + +nt2.export.makeFrames(plot, data.fields.s, "sr-dep", num_cpus=4) +nt2.export.makeMovie(framerate=10, input="sr-dep/", number=5, output="sr-dep.mp4") diff --git a/setups/tests/deposit/run-mink.sh b/setups/tests/deposit/run-mink.sh new file mode 100755 index 000000000..4b52d6642 --- /dev/null +++ b/setups/tests/deposit/run-mink.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +for i in {1..8}; do + if [ $i -eq 7 ]; then + continue + fi + run=$(echo "np${i}") + cp deposit-mink.toml deposit-mink-${run}.toml && \ + sed -i 's/name[[:space:]]*=[[:space:]]*".*\?"/name = "mink-'${run}'"/g' deposit-mink-${run}.toml && \ + mpiexec -np ${i} ./entity.xc -input deposit-mink-${run}.toml && \ + rm deposit-mink-${run}.toml +done + +rm *.info *.err *.log *.csv diff --git a/setups/tests/injector/pgen.hpp b/setups/tests/injector/pgen.hpp index 17d7f9398..0b4a34a07 100644 --- a/setups/tests/injector/pgen.hpp +++ b/setups/tests/injector/pgen.hpp @@ -27,7 +27,7 @@ namespace user { Inline void operator()(const coord_t&, vec_t& v_Ph, - unsigned short) const override { + spidx_t) const override { v_Ph[0] = vmax * math::cos(phase); v_Ph[1] = vmax * math::sin(phase); } diff --git a/setups/wip/rec-gravity/pgen.hpp b/setups/wip/rec-gravity/pgen.hpp deleted file mode 100644 index a4f927113..000000000 --- a/setups/wip/rec-gravity/pgen.hpp +++ /dev/null @@ -1,211 +0,0 @@ -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "enums.h" -#include "global.h" - -#include "arch/directions.h" -#include "arch/kokkos_aliases.h" -#include "arch/traits.h" -#include "utils/numeric.h" - -#include "archetypes/energy_dist.h" -#include "archetypes/particle_injector.h" -#include "archetypes/problem_generator.h" -#include "archetypes/spatial_dist.h" -#include "framework/domain/metadomain.h" - -namespace user { - using namespace ntt; - - template - struct Gravity { - const std::vector species { 1, 2 }; - - Gravity(real_t f, real_t tscale, real_t ymid) - : force { f } - , tscale { tscale } - , ymid { ymid } {} - - Inline auto fx1(const unsigned short&, const real_t&, const coord_t&) const - -> real_t { - return ZERO; - } - - Inline auto fx2(const unsigned short&, - const real_t& t, - const coord_t& x_Ph) const -> real_t { - const auto sign = (x_Ph[1] < ymid) ? ONE : -ONE; - if (t > tscale) { - return sign * force; - } else { - return sign * force * (ONE - math::cos(constant::PI * t / tscale)) / TWO; - } - } - - Inline auto fx3(const unsigned short&, const real_t&, const coord_t&) const - -> real_t { - return ZERO; - } - - private: - const real_t force, tscale, ymid; - }; - - template - struct CurrentLayer : public arch::SpatialDistribution { - CurrentLayer(const M& metric, real_t width, real_t yi) - : arch::SpatialDistribution { metric } - , width { width } - , yi { yi } {} - - Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - return ONE / SQR(math::cosh((x_Ph[1] - yi) / width)); - } - - private: - const real_t yi, width; - }; - - template - struct InitFields { - InitFields(real_t Bmag, real_t width, real_t angle, real_t y1, real_t y2) - : Bmag { Bmag } - , width { width } - , angle { angle } - , y1 { y1 } - , y2 { y2 } {} - - Inline auto bx1(const coord_t& x_Ph) const -> real_t { - return Bmag * math::cos(angle) * - (math::tanh((x_Ph[1] - y1) / width) - - math::tanh((x_Ph[1] - y2) / width) - 1); - } - - Inline auto bx3(const coord_t& x_Ph) const -> real_t { - return Bmag * math::sin(angle) * - (math::tanh((x_Ph[1] - y1) / width) - - math::tanh((x_Ph[1] - y2) / width) - 1); - } - - private: - const real_t Bmag, width, angle, y1, y2; - }; - - template - struct PGen : public arch::ProblemGenerator { - // compatibility traits for the problem generator - static constexpr auto engines { traits::compatible_with::value }; - static constexpr auto metrics { traits::compatible_with::value }; - static constexpr auto dimensions { - traits::compatible_with::value - }; - - // for easy access to variables in the child class - using arch::ProblemGenerator::D; - using arch::ProblemGenerator::C; - using arch::ProblemGenerator::params; - - const real_t Bmag, width, angle, overdensity, y1, y2, bg_temp; - InitFields init_flds; - - Gravity ext_force; - - inline PGen(const SimulationParams& p, const Metadomain& m) - : arch::ProblemGenerator(p) - , Bmag { p.template get("setup.Bmag", 1.0) } - , width { p.template get("setup.width") } - , angle { p.template get("setup.angle") } - , overdensity { p.template get("setup.overdensity") } - , y1 { m.mesh().extent(in::x2).first + - INV_4 * - (m.mesh().extent(in::x2).second - m.mesh().extent(in::x2).first) } - , y2 { m.mesh().extent(in::x2).first + - 3 * INV_4 * - (m.mesh().extent(in::x2).second - m.mesh().extent(in::x2).first) } - , init_flds { Bmag, width, angle, y1, y2 } - , bg_temp { p.template get("setup.bg_temp") } - , ext_force { - p.template get("setup.fmag", 0.1), - (m.mesh().extent(in::x1).second - m.mesh().extent(in::x1).first), - INV_2 * (m.mesh().extent(in::x2).second + m.mesh().extent(in::x2).first) - } {} - - inline PGen() {} - - inline void InitPrtls(Domain& local_domain) { - // background - const auto energy_dist = arch::Maxwellian(local_domain.mesh.metric, - local_domain.random_pool, - bg_temp); - const auto injector = arch::UniformInjector( - energy_dist, - { 1, 2 }); - arch::InjectUniform(params, - local_domain, - injector, - HALF); - // record npart - const auto npart1 = local_domain.species[0].npart(); - const auto npart2 = local_domain.species[1].npart(); - - const auto sigma = params.template get("scales.sigma0"); - const auto c_omp = params.template get("scales.skindepth0"); - const auto cs_drift_beta = math::sqrt(sigma) * c_omp / (width * overdensity); - const auto cs_drift_gamma = ONE / math::sqrt(ONE - SQR(cs_drift_beta)); - const auto cs_drift_u = cs_drift_beta * cs_drift_gamma; - const auto cs_temp = HALF * sigma / overdensity; - // current layer #1 - auto edist_cs_1 = arch::Maxwellian(local_domain.mesh.metric, - local_domain.random_pool, - cs_temp, - cs_drift_u, - in::x3, - false); - const auto sdist_cs_1 = CurrentLayer(local_domain.mesh.metric, width, y1); - const auto inj_cs_1 = arch::NonUniformInjector( - edist_cs_1, - sdist_cs_1, - { 1, 2 }); - arch::InjectNonUniform(params, - local_domain, - inj_cs_1, - overdensity); - // current layer #2 - const auto edist_cs_2 = arch::Maxwellian(local_domain.mesh.metric, - local_domain.random_pool, - cs_temp, - -cs_drift_u, - in::x3, - false); - const auto sdist_cs_2 = CurrentLayer(local_domain.mesh.metric, width, y2); - const auto inj_cs_2 = arch::NonUniformInjector( - edist_cs_2, - sdist_cs_2, - { 1, 2 }); - arch::InjectNonUniform(params, - local_domain, - inj_cs_2, - overdensity); - auto ux1_1 = local_domain.species[0].ux1; - auto ux3_1 = local_domain.species[0].ux3; - auto ux1_2 = local_domain.species[1].ux1; - auto ux3_2 = local_domain.species[1].ux3; - Kokkos::parallel_for( - "TurnParticles", - CreateRangePolicy({ npart1 }, { local_domain.species[0].npart() }), - ClassLambda(index_t p) { - auto ux1_ = ux1_1(p), ux3_ = ux3_1(p); - ux1_1(p) = math::cos(angle) * ux1_ - math::sin(angle) * ux3_; - ux3_1(p) = math::sin(angle) * ux1_ + math::cos(angle) * ux3_; - - ux1_ = ux1_2(p), ux3_ = ux3_2(p); - ux1_2(p) = math::cos(angle) * ux1_ - math::sin(angle) * ux3_; - ux3_2(p) = math::sin(angle) * ux1_ + math::cos(angle) * ux3_; - }); - } // namespace user - }; - -} // namespace user - -#endif diff --git a/setups/wip/reconnection/reconnection.toml b/setups/wip/reconnection/reconnection.toml deleted file mode 100644 index fa7b049f4..000000000 --- a/setups/wip/reconnection/reconnection.toml +++ /dev/null @@ -1,53 +0,0 @@ -[simulation] - name = "reconnection" - engine = "srpic" - runtime = 10.0 - -[grid] - resolution = [1024, 2048] - extent = [[-1.0, 1.0], [-2.0, 2.0]] - - [grid.metric] - metric = "minkowski" - - [grid.boundaries] - fields = [["PERIODIC"], ["PERIODIC"]] - particles = [["PERIODIC"], ["PERIODIC"]] - -[scales] - larmor0 = 2e-4 - skindepth0 = 2e-3 - -[algorithms] - current_filters = 4 - - [algorithms.timestep] - CFL = 0.5 - -[particles] - ppc0 = 8.0 - - [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e7 - - [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e7 - -[setup] - Bmag = 1.0 - width = 0.01 - bg_temp = 1e-4 - overdensity = 3.0 - -[output] - format = "hdf5" - interval_time = 0.1 - - [output.fields] - quantities = ["N_1", "N_2", "E", "B", "J"] diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f9d921df0..715183b64 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,10 +22,7 @@ # * mpi [optional] # ------------------------------ -set(ENTITY ${PROJECT_NAME}.xc) set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES ${SRC_DIR}/entity.cpp) -add_executable(${ENTITY} entity.cpp) # dependencies add_subdirectory(${SRC_DIR}/global ${CMAKE_CURRENT_BINARY_DIR}/global) @@ -34,14 +31,18 @@ add_subdirectory(${SRC_DIR}/kernels ${CMAKE_CURRENT_BINARY_DIR}/kernels) add_subdirectory(${SRC_DIR}/archetypes ${CMAKE_CURRENT_BINARY_DIR}/archetypes) add_subdirectory(${SRC_DIR}/framework ${CMAKE_CURRENT_BINARY_DIR}/framework) add_subdirectory(${SRC_DIR}/engines ${CMAKE_CURRENT_BINARY_DIR}/engines) +add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) if(${output}) - add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) add_subdirectory(${SRC_DIR}/checkpoint ${CMAKE_CURRENT_BINARY_DIR}/checkpoint) endif() add_subdirectory(${SRC_DIR}/../setups ${CMAKE_CURRENT_BINARY_DIR}/setups) +set(ENTITY ${PROJECT_NAME}.xc) +set(SOURCES ${SRC_DIR}/entity.cpp) + +add_executable(${ENTITY} ${SOURCES}) set(libs ntt_global ntt_framework ntt_metrics ntt_engines ntt_pgen) add_dependencies(${ENTITY} ${libs}) target_link_libraries(${ENTITY} PUBLIC ${libs}) diff --git a/src/archetypes/energy_dist.h b/src/archetypes/energy_dist.h index 231ea5b0b..67f1bdf8a 100644 --- a/src/archetypes/energy_dist.h +++ b/src/archetypes/energy_dist.h @@ -45,7 +45,7 @@ namespace arch { // last argument -- is the species index (1, ..., nspec) Inline virtual void operator()(const coord_t&, vec_t& v, - unsigned short = 0) const { + spidx_t = 0) const { v[0] = ZERO; v[1] = ZERO; v[2] = ZERO; @@ -61,7 +61,7 @@ namespace arch { Inline void operator()(const coord_t&, vec_t& v, - unsigned short = 0) const override { + spidx_t = 0) const override { v[0] = ZERO; v[1] = ZERO; v[2] = ZERO; @@ -85,7 +85,7 @@ namespace arch { Inline void operator()(const coord_t& x_Code, vec_t& v, - unsigned short = 0) const override { + spidx_t = 0) const override { auto rand_gen = pool.get_state(); auto rand_X1 = Random(rand_gen); auto rand_gam = ONE; @@ -125,6 +125,103 @@ namespace arch { random_number_pool_t pool; }; + Inline void JuttnerSinge(vec_t& v, + const real_t& temp, + const random_number_pool_t& pool) { + auto rand_gen = pool.get_state(); + real_t randX1, randX2; + if (temp < static_cast(0.5)) { + // Juttner-Synge distribution using the Box-Muller method - non-relativistic + randX1 = Random(rand_gen); + while (cmp::AlmostZero(randX1)) { + randX1 = Random(rand_gen); + } + randX1 = math::sqrt(-TWO * math::log(randX1)); + randX2 = constant::TWO_PI * Random(rand_gen); + v[0] = randX1 * math::cos(randX2) * math::sqrt(temp); + + randX1 = Random(rand_gen); + while (cmp::AlmostZero(randX1)) { + randX1 = Random(rand_gen); + } + randX1 = math::sqrt(-TWO * math::log(randX1)); + randX2 = constant::TWO_PI * Random(rand_gen); + v[1] = randX1 * math::cos(randX2) * math::sqrt(temp); + + randX1 = Random(rand_gen); + while (cmp::AlmostZero(randX1)) { + randX1 = Random(rand_gen); + } + randX1 = math::sqrt(-TWO * math::log(randX1)); + randX2 = constant::TWO_PI * Random(rand_gen); + v[2] = randX1 * math::cos(randX2) * math::sqrt(temp); + } else { + // Juttner-Synge distribution using the Sobol method - relativistic + auto randu = ONE; + auto randeta = Random(rand_gen); + while (SQR(randeta) <= SQR(randu) + ONE) { + randX1 = Random(rand_gen) * Random(rand_gen) * + Random(rand_gen); + while (cmp::AlmostZero(randX1)) { + randX1 = Random(rand_gen) * Random(rand_gen) * + Random(rand_gen); + } + randu = -temp * math::log(randX1); + randX2 = Random(rand_gen); + while (cmp::AlmostZero(randX2)) { + randX2 = Random(rand_gen); + } + randeta = -temp * math::log(randX1 * randX2); + } + randX1 = Random(rand_gen); + randX2 = Random(rand_gen); + v[0] = randu * (TWO * randX1 - ONE); + v[2] = TWO * randu * math::sqrt(randX1 * (ONE - randX1)); + v[1] = v[2] * math::cos(constant::TWO_PI * randX2); + v[2] = v[2] * math::sin(constant::TWO_PI * randX2); + } + pool.free_state(rand_gen); + } + + template + Inline void SampleFromMaxwellian(vec_t& v, + const real_t& temperature, + const real_t& boost_velocity, + const in& boost_direction, + bool flip_velocity, + const random_number_pool_t& pool) { + if (cmp::AlmostZero(temperature)) { + v[0] = ZERO; + v[1] = ZERO; + v[2] = ZERO; + } else { + JuttnerSinge(v, temperature, pool); + } + if constexpr (CanBoost) { + // Boost a symmetric distribution to a relativistic speed using flipping + // method https://arxiv.org/pdf/1504.03910.pdf + // @note: boost only when using cartesian coordinates + if (not cmp::AlmostZero(boost_velocity)) { + const auto boost_dir = static_cast(boost_direction); + const auto boost_beta { boost_velocity / + math::sqrt(ONE + SQR(boost_velocity)) }; + const auto gamma { U2GAMMA(v[0], v[1], v[2]) }; + auto rand_gen = pool.get_state(); + if (-boost_beta * v[boost_dir] > gamma * Random(rand_gen)) { + v[boost_dir] = -v[boost_dir]; + } + pool.free_state(rand_gen); + v[boost_dir] = math::sqrt(ONE + SQR(boost_velocity)) * + (v[boost_dir] + boost_beta * gamma); + if (flip_velocity) { + v[0] = -v[0]; + v[1] = -v[1]; + v[2] = -v[2]; + } + } + } + } + template struct Maxwellian : public EnergyDistribution { using EnergyDistribution::metric; @@ -150,89 +247,76 @@ namespace arch { HERE); } - // Juttner-Synge distribution - Inline void JS(vec_t& v, const real_t& temp) const { - auto rand_gen = pool.get_state(); - real_t randX1, randX2; - if (temp < static_cast(0.5)) { - // Juttner-Synge distribution using the Box-Muller method - non-relativistic - randX1 = Random(rand_gen); - while (cmp::AlmostZero(randX1)) { - randX1 = Random(rand_gen); - } - randX1 = math::sqrt(-TWO * math::log(randX1)); - randX2 = constant::TWO_PI * Random(rand_gen); - v[0] = randX1 * math::cos(randX2) * math::sqrt(temp); - - randX1 = Random(rand_gen); - while (cmp::AlmostZero(randX1)) { - randX1 = Random(rand_gen); - } - randX1 = math::sqrt(-TWO * math::log(randX1)); - randX2 = constant::TWO_PI * Random(rand_gen); - v[1] = randX1 * math::cos(randX2) * math::sqrt(temp); - - randX1 = Random(rand_gen); - while (cmp::AlmostZero(randX1)) { - randX1 = Random(rand_gen); - } - randX1 = math::sqrt(-TWO * math::log(randX1)); - randX2 = constant::TWO_PI * Random(rand_gen); - v[2] = randX1 * math::cos(randX2) * math::sqrt(temp); - } else { - // Juttner-Synge distribution using the Sobol method - relativistic - auto randu = ONE; - auto randeta = Random(rand_gen); - while (SQR(randeta) <= SQR(randu) + ONE) { - randX1 = Random(rand_gen) * Random(rand_gen) * - Random(rand_gen); - while (cmp::AlmostZero(randX1)) { - randX1 = Random(rand_gen) * Random(rand_gen) * - Random(rand_gen); - } - randu = -temp * math::log(randX1); - randX2 = Random(rand_gen); - while (cmp::AlmostZero(randX2)) { - randX2 = Random(rand_gen); - } - randeta = -temp * math::log(randX1 * randX2); - } - randX1 = Random(rand_gen); - randX2 = Random(rand_gen); - v[0] = randu * (TWO * randX1 - ONE); - v[2] = TWO * randu * math::sqrt(randX1 * (ONE - randX1)); - v[1] = v[2] * math::cos(constant::TWO_PI * randX2); - v[2] = v[2] * math::sin(constant::TWO_PI * randX2); + Inline void operator()(const coord_t& x_Code, + vec_t& v, + spidx_t sp = 0) const override { + SampleFromMaxwellian(v, + temperature, + boost_velocity, + boost_direction, + not zero_current and + sp % 2 == 0, + pool); + if constexpr (S == SimEngine::GRPIC) { + // convert from the tetrad basis to covariant + vec_t v_Hat; + v_Hat[0] = v[0]; + v_Hat[1] = v[1]; + v_Hat[2] = v[2]; + metric.template transform(x_Code, v_Hat, v); } - pool.free_state(rand_gen); } - // Boost a symmetric distribution to a relativistic speed using flipping - // method https://arxiv.org/pdf/1504.03910.pdf - Inline void boost(vec_t& v) const { - const auto boost_dir = static_cast(boost_direction); - const auto boost_beta { boost_velocity / - math::sqrt(ONE + SQR(boost_velocity)) }; - const auto gamma { U2GAMMA(v[0], v[1], v[2]) }; - auto rand_gen = pool.get_state(); - if (-boost_beta * v[boost_dir] > gamma * Random(rand_gen)) { - v[boost_dir] = -v[boost_dir]; - } - pool.free_state(rand_gen); - v[boost_dir] = math::sqrt(ONE + SQR(boost_velocity)) * - (v[boost_dir] + boost_beta * gamma); + private: + random_number_pool_t pool; + + const real_t temperature; + const real_t boost_velocity; + const in boost_direction; + const bool zero_current; + }; + + template + struct TwoTemperatureMaxwellian : public EnergyDistribution { + using EnergyDistribution::metric; + + TwoTemperatureMaxwellian(const M& metric, + random_number_pool_t& pool, + const std::pair& temperatures, + const std::pair& species, + real_t boost_vel = ZERO, + in boost_direction = in::x1, + bool zero_current = true) + : EnergyDistribution { metric } + , pool { pool } + , temperature_1 { temperatures.first } + , temperature_2 { temperatures.second } + , sp_1 { species.first } + , sp_2 { species.second } + , boost_velocity { boost_vel } + , boost_direction { boost_direction } + , zero_current { zero_current } { + raise::ErrorIf( + (temperature_1 < ZERO) or (temperature_2 < ZERO), + "TwoTemperatureMaxwellian: Temperature must be non-negative", + HERE); + raise::ErrorIf((not cmp::AlmostZero(boost_vel, ZERO)) && + (M::CoordType != Coord::Cart), + "TwoTemperatureMaxwellian: Boosting is only supported in " + "Cartesian coordinates", + HERE); } Inline void operator()(const coord_t& x_Code, vec_t& v, - unsigned short sp = 0) const override { - if (cmp::AlmostZero(temperature)) { - v[0] = ZERO; - v[1] = ZERO; - v[2] = ZERO; - } else { - JS(v, temperature); - } + spidx_t sp = 0) const override { + SampleFromMaxwellian( + v, + (sp == sp_1) ? temperature_1 : temperature_2, + boost_velocity, + boost_direction, + not zero_current and sp == sp_1, + pool); if constexpr (S == SimEngine::GRPIC) { // convert from the tetrad basis to covariant vec_t v_Hat; @@ -241,28 +325,153 @@ namespace arch { v_Hat[2] = v[2]; metric.template transform(x_Code, v_Hat, v); } - if constexpr (M::CoordType == Coord::Cart) { - // boost only when using cartesian coordinates - if (not cmp::AlmostZero(boost_velocity)) { - boost(v); - if (not zero_current and sp % 2 == 0) { - v[0] = -v[0]; - v[1] = -v[1]; - v[2] = -v[2]; - } - } - } } private: random_number_pool_t pool; - const real_t temperature; - const real_t boost_velocity; - const in boost_direction; - const bool zero_current; + const real_t temperature_1, temperature_2; + const spidx_t sp_1, sp_2; + const real_t boost_velocity; + const in boost_direction; + const bool zero_current; }; + namespace experimental { + + template + struct Maxwellian : public EnergyDistribution { + using EnergyDistribution::metric; + + Maxwellian(const M& metric, + random_number_pool_t& pool, + real_t temperature, + const std::vector& drift_four_vel = { ZERO, ZERO, ZERO }) + : EnergyDistribution { metric } + , pool { pool } + , temperature { temperature } { + raise::ErrorIf(drift_four_vel.size() != 3, + "Maxwellian: Drift velocity must be a 3D vector", + HERE); + raise::ErrorIf(temperature < ZERO, + "Maxwellian: Temperature must be non-negative", + HERE); + if constexpr (M::CoordType == Coord::Cart) { + drift_4vel = NORM(drift_four_vel[0], drift_four_vel[1], drift_four_vel[2]); + if (cmp::AlmostZero_host(drift_4vel)) { + drift_dir = 0; + } else { + drift_3vel = drift_4vel / math::sqrt(ONE + SQR(drift_4vel)); + drift_dir_x1 = drift_four_vel[0] / drift_4vel; + drift_dir_x2 = drift_four_vel[1] / drift_4vel; + drift_dir_x3 = drift_four_vel[2] / drift_4vel; + + // assume drift is in an arbitrary direction + drift_dir = 4; + // check whether drift is in one of principal directions + for (auto d { 0u }; d < 3u; ++d) { + const auto dprev = (d + 2) % 3; + const auto dnext = (d + 1) % 3; + if (cmp::AlmostZero_host(drift_four_vel[dprev]) and + cmp::AlmostZero_host(drift_four_vel[dnext])) { + drift_dir = SIGN(drift_four_vel[d]) * (d + 1); + break; + } + } + } + raise::ErrorIf(drift_dir > 3 and drift_dir != 4, + "Maxwellian: Incorrect drift direction", + HERE); + raise::ErrorIf( + drift_dir != 0 and (M::CoordType != Coord::Cart), + "Maxwellian: Boosting is only supported in Cartesian coordinates", + HERE); + } + } + + Inline void operator()(const coord_t& x_Code, + vec_t& v, + spidx_t sp = 0) const override { + if (cmp::AlmostZero(temperature)) { + v[0] = ZERO; + v[1] = ZERO; + v[2] = ZERO; + } else { + JuttnerSinge(v, temperature, pool); + } + // @note: boost only when using cartesian coordinates + if constexpr (M::CoordType == Coord::Cart) { + if (drift_dir != 0) { + // Boost an isotropic Maxwellian with a drift velocity using + // flipping method https://arxiv.org/pdf/1504.03910.pdf + // 1. apply drift in X1 direction + const auto gamma { U2GAMMA(v[0], v[1], v[2]) }; + auto rand_gen = pool.get_state(); + if (-drift_3vel * v[0] > gamma * Random(rand_gen)) { + v[0] = -v[0]; + } + pool.free_state(rand_gen); + v[0] = math::sqrt(ONE + SQR(drift_4vel)) * (v[0] + drift_3vel * gamma); + // 2. rotate to desired orientation + if (drift_dir == -1) { + v[0] = -v[0]; + } else if (drift_dir == 2 || drift_dir == -2) { + const auto tmp = v[1]; + v[1] = drift_dir > 0 ? v[0] : -v[0]; + v[0] = tmp; + } else if (drift_dir == 3 || drift_dir == -3) { + const auto tmp = v[2]; + v[2] = drift_dir > 0 ? v[0] : -v[0]; + v[0] = tmp; + } else if (drift_dir == 4) { + vec_t v_old; + v_old[0] = v[0]; + v_old[1] = v[1]; + v_old[2] = v[2]; + + v[0] = v_old[0] * drift_dir_x1 - v_old[1] * drift_dir_x2 - + v_old[2] * drift_dir_x3; + v[1] = (v_old[0] * drift_dir_x2 * (drift_dir_x1 + ONE) + + v_old[1] * + (SQR(drift_dir_x1) + drift_dir_x1 + SQR(drift_dir_x3)) - + v_old[2] * drift_dir_x2 * drift_dir_x3) / + (drift_dir_x1 + ONE); + v[2] = (v_old[0] * drift_dir_x3 * (drift_dir_x1 + ONE) - + v_old[1] * drift_dir_x2 * drift_dir_x3 - + v_old[2] * (-drift_dir_x1 + SQR(drift_dir_x3) - ONE)) / + (drift_dir_x1 + ONE); + } + } + } else if constexpr (S == SimEngine::GRPIC) { + // convert from the tetrad basis to covariant + vec_t v_Hat; + v_Hat[0] = v[0]; + v_Hat[1] = v[1]; + v_Hat[2] = v[2]; + metric.template transform(x_Code, v_Hat, v); + } + } + + private: + random_number_pool_t pool; + + const real_t temperature; + + real_t drift_3vel { ZERO }, drift_4vel { ZERO }; + // components of the unit vector in the direction of the drift + real_t drift_dir_x1 { ZERO }, drift_dir_x2 { ZERO }, drift_dir_x3 { ZERO }; + + // values of boost_dir: + // 4 -> arbitrary direction + // 0 -> no drift + // +/- 1 -> +/- x1 + // +/- 2 -> +/- x2 + // +/- 3 -> +/- x3 + short drift_dir { 0 }; + }; + + } // namespace experimental + } // namespace arch #endif // ARCHETYPES_ENERGY_DIST_HPP diff --git a/src/archetypes/particle_injector.h b/src/archetypes/particle_injector.h index 62b9249c3..d15036cf5 100644 --- a/src/archetypes/particle_injector.h +++ b/src/archetypes/particle_injector.h @@ -28,19 +28,97 @@ #include "framework/domain/metadomain.h" #include "kernels/injectors.hpp" +#include "kernels/particle_moments.hpp" +#include "kernels/utils.hpp" #include +#if defined(MPI_ENABLED) + #include +#endif + #include +#include #include #include namespace arch { using namespace ntt; - using spidx_t = unsigned short; + + template + struct BaseInjector { + virtual auto DeduceRegion(const Domain& domain, + const boundaries_t& box) const + -> std::tuple, array_t> { + if (not domain.mesh.Intersects(box)) { + return { false, array_t {}, array_t {} }; + } + coord_t xCorner_min_Ph { ZERO }; + coord_t xCorner_max_Ph { ZERO }; + coord_t xCorner_min_Cd { ZERO }; + coord_t xCorner_max_Cd { ZERO }; + + for (auto d { 0u }; d < M::Dim; ++d) { + const auto local_xi_min = domain.mesh.extent(static_cast(d)).first; + const auto local_xi_max = domain.mesh.extent(static_cast(d)).second; + const auto extent_min = std::min(std::max(local_xi_min, box[d].first), + local_xi_max); + const auto extent_max = std::max(std::min(local_xi_max, box[d].second), + local_xi_min); + xCorner_min_Ph[d] = extent_min; + xCorner_max_Ph[d] = extent_max; + } + domain.mesh.metric.template convert(xCorner_min_Ph, + xCorner_min_Cd); + domain.mesh.metric.template convert(xCorner_max_Ph, + xCorner_max_Cd); + + array_t xi_min { "xi_min", M::Dim }, xi_max { "xi_max", M::Dim }; + + auto xi_min_h = Kokkos::create_mirror_view(xi_min); + auto xi_max_h = Kokkos::create_mirror_view(xi_max); + for (auto d { 0u }; d < M::Dim; ++d) { + xi_min_h(d) = xCorner_min_Cd[d]; + xi_max_h(d) = xCorner_max_Cd[d]; + } + Kokkos::deep_copy(xi_min, xi_min_h); + Kokkos::deep_copy(xi_max, xi_max_h); + + return { true, xi_min, xi_max }; + } + + virtual auto ComputeNumInject(const SimulationParams& params, + const Domain& domain, + real_t number_density, + const boundaries_t& box) const + -> std::tuple, array_t> { + const auto result = DeduceRegion(domain, box); + if (not std::get<0>(result)) { + return { false, (npart_t)0, array_t {}, array_t {} }; + } + const auto xi_min = std::get<1>(result); + const auto xi_max = std::get<2>(result); + auto xi_min_h = Kokkos::create_mirror_view(xi_min); + auto xi_max_h = Kokkos::create_mirror_view(xi_max); + Kokkos::deep_copy(xi_min_h, xi_min); + Kokkos::deep_copy(xi_max_h, xi_max); + + long double num_cells { 1.0 }; + for (auto d { 0u }; d < M::Dim; ++d) { + num_cells *= static_cast(xi_max_h(d)) - + static_cast(xi_min_h(d)); + } + + const auto ppc0 = params.template get("particles.ppc0"); + const auto nparticles = static_cast( + (long double)(ppc0 * number_density * 0.5) * num_cells); + + return { true, nparticles, xi_min, xi_max }; + } + }; template class ED> - struct UniformInjector { + struct UniformInjector : BaseInjector { using energy_dist_t = ED; static_assert(M::is_metric, "M must be a metric class"); static_assert(energy_dist_t::is_energy_dist, @@ -60,12 +138,126 @@ namespace arch { ~UniformInjector() = default; }; + template class ED> + struct KeepConstantInjector : UniformInjector { + using energy_dist_t = ED; + using UniformInjector::D; + using UniformInjector::C; + + const idx_t density_buff_idx; + boundaries_t probe_box; + + KeepConstantInjector(const energy_dist_t& energy_dist, + const std::pair& species, + idx_t density_buff_idx, + boundaries_t box = {}) + : UniformInjector { energy_dist, species } + , density_buff_idx { density_buff_idx } { + for (auto d { 0u }; d < M::Dim; ++d) { + if (d < box.size()) { + probe_box.push_back({ box[d].first, box[d].second }); + } else { + probe_box.push_back(Range::All); + } + } + } + + ~KeepConstantInjector() = default; + + auto ComputeAvgDensity(const SimulationParams& params, + Domain& domain) const -> real_t { + const auto result = this->DeduceRegion(domain, probe_box); + const auto should_probe = std::get<0>(result); + if (not should_probe) { + return ZERO; + } + const auto xi_min_arr = std::get<1>(result); + const auto xi_max_arr = std::get<2>(result); + + tuple_t i_min { 0 }; + tuple_t i_max { 0 }; + + auto xi_min_h = Kokkos::create_mirror_view(xi_min_arr); + auto xi_max_h = Kokkos::create_mirror_view(xi_max_arr); + Kokkos::deep_copy(xi_min_h, xi_min_arr); + Kokkos::deep_copy(xi_max_h, xi_max_arr); + + ncells_t num_cells = 1u; + for (auto d { 0u }; d < M::Dim; ++d) { + i_min[d] = std::floor(xi_min_h(d)) + N_GHOSTS; + i_max[d] = std::ceil(xi_max_h(d)) + N_GHOSTS; + num_cells *= (i_max[d] - i_min[d]); + } + + real_t dens { ZERO }; + if (should_probe) { + Kokkos::parallel_reduce( + "AvgDensity", + CreateRangePolicy(i_min, i_max), + kernel::ComputeSum_kernel(domain.fields.buff, density_buff_idx), + dens); + } +#if defined(MPI_ENABLED) + real_t tot_dens { ZERO }; + ncells_t tot_num_cells { 0 }; + MPI_Allreduce(&dens, &tot_dens, 1, mpi::get_type(), MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&num_cells, + &tot_num_cells, + 1, + mpi::get_type(), + MPI_SUM, + MPI_COMM_WORLD); + dens = tot_dens; + num_cells = tot_num_cells; +#endif + if (num_cells > 0) { + return dens / (real_t)(num_cells); + } else { + return ZERO; + } + } + + auto ComputeNumInject(const SimulationParams& params, + Domain& domain, + real_t number_density, + const boundaries_t& box) const + -> std::tuple, array_t> override { + const auto computed_avg_density = ComputeAvgDensity(params, domain); + + const auto result = this->DeduceRegion(domain, box); + if (not std::get<0>(result)) { + return { false, (npart_t)0, array_t {}, array_t {} }; + } + + const auto xi_min = std::get<1>(result); + const auto xi_max = std::get<2>(result); + auto xi_min_h = Kokkos::create_mirror_view(xi_min); + auto xi_max_h = Kokkos::create_mirror_view(xi_max); + Kokkos::deep_copy(xi_min_h, xi_min); + Kokkos::deep_copy(xi_max_h, xi_max); + + long double num_cells { 1.0 }; + for (auto d { 0u }; d < M::Dim; ++d) { + num_cells *= static_cast(xi_max_h(d)) - + static_cast(xi_min_h(d)); + } + + const auto ppc0 = params.template get("particles.ppc0"); + npart_t nparticles { 0u }; + if (number_density > computed_avg_density) { + nparticles = static_cast( + (long double)(ppc0 * (number_density - computed_avg_density) * 0.5) * + num_cells); + } + + return { nparticles != 0u, nparticles, xi_min, xi_max }; + } + }; + template - class ED, - template - class SD> + template class ED, + template class SD> struct NonUniformInjector { using energy_dist_t = ED; using spatial_dist_t = SD; @@ -107,7 +299,7 @@ namespace arch { if constexpr ((O == in::x1) or (O == in::x2 and (M::Dim == Dim::_2D or M::Dim == Dim::_3D)) or (O == in::x3 and M::Dim == Dim::_3D)) { - const auto xi = x_Ph[static_cast(O)]; + const auto xi = x_Ph[static_cast(O)]; if constexpr (P) { // + direction if (xi < xsurf - ds or xi >= xsurf) { @@ -184,7 +376,7 @@ namespace arch { if constexpr ((O == in::x1) or (O == in::x2 and (M::Dim == Dim::_2D or M::Dim == Dim::_3D)) or (O == in::x3 and M::Dim == Dim::_3D)) { - const auto xi = x_Ph[static_cast(O)]; + const auto xi = x_Ph[static_cast(O)]; // + direction if (xi < xdrift or xi >= xinj) { return ZERO; @@ -238,16 +430,18 @@ namespace arch { * @param injector Uniform injector object * @param number_density Total number density (in units of n0) * @param use_weights Use weights + * @param box Region to inject the particles in global coords * @tparam S Simulation engine type * @tparam M Metric type * @tparam I Injector type */ template - inline void InjectUniform(const SimulationParams& params, - Domain& domain, - const I& injector, - real_t number_density, - bool use_weights = false) { + inline void InjectUniform(const SimulationParams& params, + Domain& domain, + const I& injector, + real_t number_density, + bool use_weights = false, + const boundaries_t& box = {}) { static_assert(M::is_metric, "M must be a metric class"); static_assert(I::is_uniform_injector, "I must be a uniform injector class"); raise::ErrorIf((M::CoordType != Coord::Cart) && (not use_weights), @@ -267,17 +461,24 @@ namespace arch { } { - auto ppc0 = params.template get("particles.ppc0"); - array_t ni { "ni", M::Dim }; - auto ni_h = Kokkos::create_mirror_view(ni); - ncells_t ncells = 1; - for (auto d = 0; d < M::Dim; ++d) { - ni_h(d) = domain.mesh.n_active()[d]; - ncells *= domain.mesh.n_active()[d]; + boundaries_t nonempty_box; + for (auto d { 0u }; d < M::Dim; ++d) { + if (d < box.size()) { + nonempty_box.push_back({ box[d].first, box[d].second }); + } else { + nonempty_box.push_back(Range::All); + } } - Kokkos::deep_copy(ni, ni_h); - const auto nparticles = static_cast( - (long double)(ppc0 * number_density * 0.5) * (long double)(ncells)); + const auto result = injector.ComputeNumInject(params, + domain, + number_density, + nonempty_box); + if (not std::get<0>(result)) { + return; + } + const auto nparticles = std::get<1>(result); + const auto xi_min = std::get<2>(result); + const auto xi_max = std::get<3>(result); Kokkos::parallel_for( "InjectUniform", @@ -290,7 +491,8 @@ namespace arch { domain.species[injector.species.first - 1].npart(), domain.species[injector.species.second - 1].npart(), domain.mesh.metric, - ni, + xi_min, + xi_max, injector.energy_dist, ONE / params.template get("scales.V0"), domain.random_pool)); @@ -301,6 +503,122 @@ namespace arch { } } + namespace experimental { + + template class ED1, + template class ED2> + struct UniformInjector : BaseInjector { + using energy_dist_1_t = ED1; + using energy_dist_2_t = ED2; + static_assert(M::is_metric, "M must be a metric class"); + static_assert(energy_dist_1_t::is_energy_dist, + "ED1 must be an energy distribution class"); + static_assert(energy_dist_2_t::is_energy_dist, + "ED2 must be an energy distribution class"); + static constexpr bool is_uniform_injector { true }; + static constexpr Dimension D { M::Dim }; + static constexpr Coord C { M::CoordType }; + + const energy_dist_1_t energy_dist_1; + const energy_dist_2_t energy_dist_2; + const std::pair species; + + UniformInjector(const energy_dist_1_t& energy_dist_1, + const energy_dist_2_t& energy_dist_2, + const std::pair& species) + : energy_dist_1 { energy_dist_1 } + , energy_dist_2 { energy_dist_2 } + , species { species } {} + + ~UniformInjector() = default; + }; + + /** + * @brief Injects uniform number density of particles everywhere in the domain + * @param domain Domain object + * @param injector Uniform injector object + * @param number_density Total number density (in units of n0) + * @param use_weights Use weights + * @param box Region to inject the particles in global coords + * @tparam S Simulation engine type + * @tparam M Metric type + * @tparam I Injector type + */ + template + inline void InjectUniform(const SimulationParams& params, + Domain& domain, + const I& injector, + real_t number_density, + bool use_weights = false, + const boundaries_t& box = {}) { + static_assert(M::is_metric, "M must be a metric class"); + static_assert(I::is_uniform_injector, "I must be a uniform injector class"); + raise::ErrorIf((M::CoordType != Coord::Cart) && (not use_weights), + "Weights must be used for non-Cartesian coordinates", + HERE); + raise::ErrorIf((M::CoordType == Coord::Cart) && use_weights, + "Weights should not be used for Cartesian coordinates", + HERE); + raise::ErrorIf( + params.template get("particles.use_weights") != use_weights, + "Weights must be enabled from the input file to use them in " + "the injector", + HERE); + if (domain.species[injector.species.first - 1].charge() + + domain.species[injector.species.second - 1].charge() != + 0.0f) { + raise::Warning("Total charge of the injected species is non-zero", HERE); + } + + { + boundaries_t nonempty_box; + for (auto d { 0u }; d < M::Dim; ++d) { + if (d < box.size()) { + nonempty_box.push_back({ box[d].first, box[d].second }); + } else { + nonempty_box.push_back(Range::All); + } + } + const auto result = injector.ComputeNumInject(params, + domain, + number_density, + nonempty_box); + if (not std::get<0>(result)) { + return; + } + const auto nparticles = std::get<1>(result); + const auto xi_min = std::get<2>(result); + const auto xi_max = std::get<3>(result); + + Kokkos::parallel_for( + "InjectUniform", + nparticles, + kernel::experimental:: + UniformInjector_kernel( + injector.species.first, + injector.species.second, + domain.species[injector.species.first - 1], + domain.species[injector.species.second - 1], + domain.species[injector.species.first - 1].npart(), + domain.species[injector.species.second - 1].npart(), + domain.mesh.metric, + xi_min, + xi_max, + injector.energy_dist_1, + injector.energy_dist_2, + ONE / params.template get("scales.V0"), + domain.random_pool)); + domain.species[injector.species.first - 1].set_npart( + domain.species[injector.species.first - 1].npart() + nparticles); + domain.species[injector.species.second - 1].set_npart( + domain.species[injector.species.second - 1].npart() + nparticles); + } + } + + } // namespace experimental + /** * @brief Injects particles from a globally-defined map * @note very inefficient, should only be used for debug purposes @@ -341,12 +659,12 @@ namespace arch { * @param box Region to inject the particles in */ template - inline void InjectNonUniform(const SimulationParams& params, - Domain& domain, - const I& injector, - real_t number_density, - bool use_weights = false, - boundaries_t box = {}) { + inline void InjectNonUniform(const SimulationParams& params, + Domain& domain, + const I& injector, + real_t number_density, + bool use_weights = false, + const boundaries_t& box = {}) { static_assert(M::is_metric, "M must be a metric class"); static_assert(I::is_nonuniform_injector, "I must be a nonuniform injector class"); diff --git a/src/archetypes/spatial_dist.h b/src/archetypes/spatial_dist.h index ad9404ea3..225c66eb5 100644 --- a/src/archetypes/spatial_dist.h +++ b/src/archetypes/spatial_dist.h @@ -4,7 +4,6 @@ * @implements * - arch::SpatialDistribution<> * - arch::Uniform<> : arch::SpatialDistribution<> - * - arch::Piston<> : arch::SpatialDistribution<> * - arch::Replenish<> : arch::SpatialDistribution<> * @namespace * - arch:: @@ -50,42 +49,18 @@ namespace arch { } }; - template - struct Piston : public arch::SpatialDistribution { - Piston(const M& metric, real_t xmin, real_t xmax, in piston_direction = in::x1) - : arch::SpatialDistribution { metric } - , xmin { xmin } - , xmax { xmax } - , piston_direction { piston_direction } {} - - Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - // dimentsion to fill - const auto fill_dim = static_cast(piston_direction); - - if (x_Ph[fill_dim] < xmin || x_Ph[fill_dim] > xmax) { - return ZERO; - } else { - return ONE; - } - } - - private: - real_t xmin, xmax; - in piston_direction; - }; - template struct Replenish : public SpatialDistribution { using SpatialDistribution::metric; const ndfield_t density; - const unsigned short idx; + const idx_t idx; const T target_density; const real_t target_max_density; Replenish(const M& metric, const ndfield_t& density, - unsigned short idx, + idx_t idx, const T& target_density, real_t target_max_density) : SpatialDistribution { metric } diff --git a/src/archetypes/tests/energy_dist.cpp b/src/archetypes/tests/energy_dist.cpp index bad1d0eb9..0d3fc8023 100644 --- a/src/archetypes/tests/energy_dist.cpp +++ b/src/archetypes/tests/energy_dist.cpp @@ -27,7 +27,7 @@ struct Caller { Inline void operator()(index_t) const { vec_t vp { ZERO }; coord_t xp { ZERO }; - for (unsigned short d = 0; d < D; ++d) { + for (dim_t d { 0u }; d < D; ++d) { xp[d] = 5.0; } dist(xp, vp); @@ -54,13 +54,13 @@ void testEnergyDist(const std::vector& res, if constexpr (M::Dim == Dim::_2D) { extent = { ext[0], - {ZERO, constant::PI} + { ZERO, constant::PI } }; } else if constexpr (M::Dim == Dim::_3D) { extent = { ext[0], - {ZERO, constant::PI}, - {ZERO, constant::TWO_PI} + { ZERO, constant::PI }, + { ZERO, constant::TWO_PI } }; } } diff --git a/src/archetypes/tests/powerlaw.cpp b/src/archetypes/tests/powerlaw.cpp index dfcb6b247..58df1f4cf 100644 --- a/src/archetypes/tests/powerlaw.cpp +++ b/src/archetypes/tests/powerlaw.cpp @@ -31,7 +31,7 @@ struct Caller { Inline void operator()(index_t) const { vec_t vp { ZERO }; coord_t xp { ZERO }; - for (unsigned short d = 0; d < D; ++d) { + for (dim_t d { 0u }; d < D; ++d) { xp[d] = 2.0; } dist(xp, vp); @@ -73,13 +73,13 @@ void testEnergyDist(const std::vector& res, if constexpr (M::Dim == Dim::_2D) { extent = { ext[0], - {ZERO, constant::PI} + { ZERO, constant::PI } }; } else if constexpr (M::Dim == Dim::_3D) { extent = { ext[0], - {ZERO, constant::PI}, - {ZERO, constant::TWO_PI} + { ZERO, constant::PI }, + { ZERO, constant::TWO_PI } }; } } diff --git a/src/archetypes/tests/spatial_dist.cpp b/src/archetypes/tests/spatial_dist.cpp index 5ab64a156..232ab1eb7 100644 --- a/src/archetypes/tests/spatial_dist.cpp +++ b/src/archetypes/tests/spatial_dist.cpp @@ -80,7 +80,7 @@ struct RadialDist : public SpatialDistribution { coord_t x_Sph { ZERO }; metric.template convert(x_Code, x_Sph); auto r { ZERO }; - for (unsigned short d = 0; d < M::Dim; ++d) { + for (dim_t d { 0u }; d < M::Dim; ++d) { r += SQR(x_Sph[d]); } return math::sqrt(r); @@ -91,14 +91,14 @@ auto main(int argc, char* argv[]) -> int { Kokkos::initialize(argc, argv); try { Minkowski m1 { - { 10, 10}, - {{ -10.0, 55.0 }, { -10.0, 55.0 }} + { 10, 10 }, + { { -10.0, 55.0 }, { -10.0, 55.0 } } }; RadialDist> r1 { m1 }; Minkowski m2 { - { 10, 10, 30}, - {{ -1.0, 1.0 }, { -1.0, 1.0 }, { -3.0, 3.0 }} + { 10, 10, 30 }, + { { -1.0, 1.0 }, { -1.0, 1.0 }, { -3.0, 3.0 } } }; RadialDist> r2 { m2 }; diff --git a/src/checkpoint/reader.cpp b/src/checkpoint/reader.cpp index 6e32cb3b9..24aa0f079 100644 --- a/src/checkpoint/reader.cpp +++ b/src/checkpoint/reader.cpp @@ -42,7 +42,7 @@ namespace checkpoint { auto ReadParticleCount(adios2::IO& io, adios2::Engine& reader, - unsigned short s, + spidx_t s, std::size_t local_dom, std::size_t ndomains) -> std::pair { logger::Checkpoint(fmt::format("Reading particle count for: %d", s + 1), HERE); @@ -85,7 +85,7 @@ namespace checkpoint { void ReadParticleData(adios2::IO& io, adios2::Engine& reader, const std::string& quantity, - unsigned short s, + spidx_t s, array_t& array, npart_t count, npart_t offset) { @@ -110,7 +110,7 @@ namespace checkpoint { void ReadParticlePayloads(adios2::IO& io, adios2::Engine& reader, - unsigned short s, + spidx_t s, array_t& array, std::size_t nplds, npart_t count, @@ -164,28 +164,28 @@ namespace checkpoint { template void ReadParticleData(adios2::IO&, adios2::Engine&, const std::string&, - unsigned short, + spidx_t, array_t&, npart_t, npart_t); template void ReadParticleData(adios2::IO&, adios2::Engine&, const std::string&, - unsigned short, + spidx_t, array_t&, npart_t, npart_t); template void ReadParticleData(adios2::IO&, adios2::Engine&, const std::string&, - unsigned short, + spidx_t, array_t&, npart_t, npart_t); template void ReadParticleData(adios2::IO&, adios2::Engine&, const std::string&, - unsigned short, + spidx_t, array_t&, npart_t, npart_t); diff --git a/src/checkpoint/reader.h b/src/checkpoint/reader.h index 883a1d125..7939ba82b 100644 --- a/src/checkpoint/reader.h +++ b/src/checkpoint/reader.h @@ -32,7 +32,7 @@ namespace checkpoint { auto ReadParticleCount(adios2::IO&, adios2::Engine&, - unsigned short, + spidx_t, std::size_t, std::size_t) -> std::pair; @@ -40,14 +40,14 @@ namespace checkpoint { void ReadParticleData(adios2::IO&, adios2::Engine&, const std::string&, - unsigned short, + spidx_t, array_t&, npart_t, npart_t); void ReadParticlePayloads(adios2::IO&, adios2::Engine&, - unsigned short, + spidx_t, array_t&, std::size_t, npart_t, diff --git a/src/checkpoint/writer.cpp b/src/checkpoint/writer.cpp index fe77d3b56..b8571c246 100644 --- a/src/checkpoint/writer.cpp +++ b/src/checkpoint/writer.cpp @@ -200,7 +200,7 @@ namespace checkpoint { m_writer.Put(var, &data); } - void Writer::saveAttrs(const ntt::SimulationParams& params, long double time) { + void Writer::saveAttrs(const ntt::SimulationParams& params, simtime_t time) { CallOnce([&]() { std::ofstream metadata; if (m_written.empty()) { diff --git a/src/checkpoint/writer.h b/src/checkpoint/writer.h index 992c54c96..91c9d7a41 100644 --- a/src/checkpoint/writer.h +++ b/src/checkpoint/writer.h @@ -54,7 +54,7 @@ namespace checkpoint { void beginSaving(timestep_t, simtime_t); void endSaving(); - void saveAttrs(const ntt::SimulationParams&, long double); + void saveAttrs(const ntt::SimulationParams&, simtime_t); template void savePerDomainVariable(const std::string&, std::size_t, std::size_t, T); diff --git a/src/engines/engine_init.cpp b/src/engines/engine_init.cpp index 7ce242bc6..833a7771f 100644 --- a/src/engines/engine_init.cpp +++ b/src/engines/engine_init.cpp @@ -21,6 +21,7 @@ namespace ntt { template void Engine::init() { if constexpr (pgen_is_ok) { + m_metadomain.InitStatsWriter(m_params, is_resuming); #if defined(OUTPUT_ENABLED) m_metadomain.InitWriter(&m_adios, m_params, is_resuming); m_metadomain.InitCheckpointWriter(&m_adios, m_params); diff --git a/src/engines/engine_printer.cpp b/src/engines/engine_printer.cpp index eb8ff402d..9d1f74a74 100644 --- a/src/engines/engine_printer.cpp +++ b/src/engines/engine_printer.cpp @@ -17,6 +17,8 @@ #if defined(CUDA_ENABLED) #include +#elif defined(HIP_ENABLED) + #include #endif #if defined(OUTPUT_ENABLED) @@ -177,8 +179,16 @@ namespace ntt { const auto minor { cuda_v % 1000 / 10 }; const auto patch { cuda_v % 10 }; const auto cuda_version = fmt::format("%d.%d.%d", major, minor, patch); -#else // not CUDA_ENABLED - const std::string cuda_version = "OFF"; +#elif defined(HIP_ENABLED) + int hip_v; + auto status = hipDriverGetVersion(&hip_v); + raise::ErrorIf(status != hipSuccess, + "hipDriverGetVersion failed with error code %d", + HERE); + const auto major { hip_v / 10000000 }; + const auto minor { (hip_v % 10000000) / 100000 }; + const auto patch { hip_v % 100000 }; + const auto hip_version = fmt::format("%d.%d.%d", major, minor, patch); #endif const auto kokkos_version = fmt::format("%d.%d.%d", @@ -204,16 +214,89 @@ namespace ntt { report += "\n\n"; add_header(report, { entity_version }, { color::BRIGHT_GREEN }); report += "\n"; + + /* + * Backend + */ add_category(report, 4, "Backend"); add_param(report, 4, "Build hash", "%s", hash.c_str()); add_param(report, 4, "CXX", "%s [%s]", ccx.c_str(), cpp_standard.c_str()); +#if defined(CUDA_ENABLED) add_param(report, 4, "CUDA", "%s", cuda_version.c_str()); +#elif defined(HIP_VERSION) + add_param(report, 4, "HIP", "%s", hip_version.c_str()); +#endif add_param(report, 4, "MPI", "%s", mpi_version.c_str()); +#if defined(MPI_ENABLED) && defined(DEVICE_ENABLED) + #if defined(GPU_AWARE_MPI) + const std::string gpu_aware_mpi = "ON"; + #else + const std::string gpu_aware_mpi = "OFF"; + #endif + add_param(report, 4, "GPU-aware MPI", "%s", gpu_aware_mpi.c_str()); +#endif add_param(report, 4, "Kokkos", "%s", kokkos_version.c_str()); add_param(report, 4, "ADIOS2", "%s", adios2_version.c_str()); add_param(report, 4, "Precision", "%s", precision); add_param(report, 4, "Debug", "%s", dbg.c_str()); report += "\n"; + + /* + * Compilation flags + */ + add_category(report, 4, "Compilation flags"); +#if defined(SINGLE_PRECISION) + add_param(report, 4, "SINGLE_PRECISION", "%s", "ON"); +#else + add_param(report, 4, "SINGLE_PRECISION", "%s", "OFF"); +#endif + +#if defined(OUTPUT_ENABLED) + add_param(report, 4, "OUTPUT_ENABLED", "%s", "ON"); +#else + add_param(report, 4, "OUTPUT_ENABLED", "%s", "OFF"); +#endif + +#if defined(DEBUG) + add_param(report, 4, "DEBUG", "%s", "ON"); +#else + add_param(report, 4, "DEBUG", "%s", "OFF"); +#endif + +#if defined(CUDA_ENABLED) + add_param(report, 4, "CUDA_ENABLED", "%s", "ON"); +#else + add_param(report, 4, "CUDA_ENABLED", "%s", "OFF"); +#endif + +#if defined(HIP_ENABLED) + add_param(report, 4, "HIP_ENABLED", "%s", "ON"); +#else + add_param(report, 4, "HIP_ENABLED", "%s", "OFF"); +#endif + +#if defined(DEVICE_ENABLED) + add_param(report, 4, "DEVICE_ENABLED", "%s", "ON"); +#else + add_param(report, 4, "DEVICE_ENABLED", "%s", "OFF"); +#endif + +#if defined(MPI_ENABLED) + add_param(report, 4, "MPI_ENABLED", "%s", "ON"); +#else + add_param(report, 4, "MPI_ENABLED", "%s", "OFF"); +#endif + +#if defined(GPU_AWARE_MPI) + add_param(report, 4, "GPU_AWARE_MPI", "%s", "ON"); +#else + add_param(report, 4, "GPU_AWARE_MPI", "%s", "OFF"); +#endif + report += "\n"; + + /* + * Simulation configs + */ add_category(report, 4, "Configuration"); add_param(report, 4, @@ -224,7 +307,7 @@ namespace ntt { add_param(report, 4, "Engine", "%s", SimEngine(S).to_string()); add_param(report, 4, "Metric", "%s", Metric(M::MetricType).to_string()); add_param(report, 4, "Timestep [dt]", "%.3e", dt); - add_param(report, 4, "Runtime", "%.3Le [%d steps]", runtime, max_steps); + add_param(report, 4, "Runtime", "%.3e [%d steps]", runtime, max_steps); report += "\n"; add_category(report, 4, "Global domain"); add_param(report, diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index 2d4b0d5ed..472acae46 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -84,6 +84,11 @@ namespace ntt { } else { print_output = m_metadomain.Write(m_params, step, step - 1, time, time - dt); } + print_output &= m_metadomain.WriteStats(m_params, + step, + step - 1, + time, + time - dt); timers.stop("Output"); timers.start("Checkpoint"); diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 62cd23ad9..0cdedb9b1 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -79,7 +79,7 @@ namespace ntt { "algorithms.toggles.fieldsolver"); const auto deposit_enabled = m_params.template get( "algorithms.toggles.deposit"); - const auto clear_interval = m_params.template get( + const auto clear_interval = m_params.template get( "particles.clear_interval"); if (step == 0) { @@ -279,6 +279,9 @@ namespace ntt { } } for (auto& species : domain.species) { + if ((species.pusher() == PrtlPusher::NONE) or (species.npart() == 0)) { + continue; + } species.set_unsorted(); logger::Checkpoint( fmt::format("Launching particle pusher kernel for %d [%s] : %lu", @@ -286,9 +289,6 @@ namespace ntt { species.label().c_str(), species.npart()), HERE); - if (species.npart() == 0) { - continue; - } const auto q_ovr_m = species.mass() > ZERO ? species.charge() / species.mass() : ZERO; @@ -481,6 +481,10 @@ namespace ntt { auto scatter_cur = Kokkos::Experimental::create_scatter_view( domain.fields.cur); for (auto& species : domain.species) { + if ((species.pusher() == PrtlPusher::NONE) or (species.npart() == 0) or + cmp::AlmostZero_host(species.charge())) { + continue; + } logger::Checkpoint( fmt::format("Launching currents deposit kernel for %d [%s] : %lu %f", species.index(), @@ -488,9 +492,6 @@ namespace ntt { species.npart(), (double)species.charge()), HERE); - if (species.npart() == 0 || cmp::AlmostZero(species.charge())) { - continue; - } Kokkos::parallel_for("CurrentsDeposit", species.rangeActiveParticles(), kernel::DepositCurrents_kernel( @@ -522,22 +523,41 @@ namespace ntt { void CurrentsAmpere(domain_t& domain) { logger::Checkpoint("Launching Ampere kernel for adding currents", HERE); - const auto q0 = m_params.template get("scales.q0"); - const auto n0 = m_params.template get("scales.n0"); - const auto B0 = m_params.template get("scales.B0"); - const auto coeff = -dt * q0 * n0 / B0; + const auto q0 = m_params.template get("scales.q0"); + const auto n0 = m_params.template get("scales.n0"); + const auto B0 = m_params.template get("scales.B0"); if constexpr (M::CoordType == Coord::Cart) { // minkowski case - const auto V0 = m_params.template get("scales.V0"); - - Kokkos::parallel_for( - "Ampere", - domain.mesh.rangeActiveCells(), - kernel::mink::CurrentsAmpere_kernel(domain.fields.em, - domain.fields.cur, - coeff / V0, - ONE / n0)); + const auto V0 = m_params.template get("scales.V0"); + const auto ppc0 = m_params.template get("particles.ppc0"); + const auto coeff = -dt * q0 / (B0 * V0); + if constexpr ( + traits::has_member::value) { + const std::vector xmin { domain.mesh.extent(in::x1).first, + domain.mesh.extent(in::x2).first, + domain.mesh.extent(in::x3).first }; + const auto ext_current = m_pgen.ext_current; + const auto dx = domain.mesh.metric.template sqrt_h_<1, 1>({}); + // clang-format off + Kokkos::parallel_for( + "Ampere", + domain.mesh.rangeActiveCells(), + kernel::mink::CurrentsAmpere_kernel( + domain.fields.em, domain.fields.cur, + coeff, ppc0, ext_current, xmin, dx)); + // clang-format on + } else { + Kokkos::parallel_for( + "Ampere", + domain.mesh.rangeActiveCells(), + kernel::mink::CurrentsAmpere_kernel(domain.fields.em, + domain.fields.cur, + coeff, + ppc0)); + } } else { + // non-minkowski + const auto coeff = -dt * q0 * n0 / B0; auto range = range_with_axis_BCs(domain); const auto ni2 = domain.mesh.n_active(in::x2); Kokkos::parallel_for( @@ -569,7 +589,7 @@ namespace ntt { size[2] = domain.mesh.n_active(in::x3); } // !TODO: this needs to be done more efficiently - for (unsigned short i = 0; i < nfilter; ++i) { + for (auto i { 0u }; i < nfilter; ++i) { Kokkos::deep_copy(domain.fields.buff, domain.fields.cur); Kokkos::parallel_for("CurrentsFilter", range, @@ -616,23 +636,27 @@ namespace ntt { /** * matching boundaries */ - const auto ds = m_params.template get("grid.boundaries.match.ds"); + const auto ds_array = m_params.template get>( + "grid.boundaries.match.ds"); const auto dim = direction.get_dim(); real_t xg_min, xg_max, xg_edge; auto sign = direction.get_sign(); + real_t ds; if (sign > 0) { // + direction + ds = ds_array[(short)dim].second; xg_max = m_metadomain.mesh().extent(dim).second; xg_min = xg_max - ds; xg_edge = xg_max; } else { // - direction + ds = ds_array[(short)dim].first; xg_min = m_metadomain.mesh().extent(dim).first; xg_max = xg_min + ds; xg_edge = xg_min; } boundaries_t box; boundaries_t incl_ghosts; - for (unsigned short d { 0 }; d < M::Dim; ++d) { - if (d == static_cast(dim)) { + for (dim_t d { 0 }; d < M::Dim; ++d) { + if (d == static_cast(dim)) { box.push_back({ xg_min, xg_max }); if (sign > 0) { incl_ghosts.push_back({ false, true }); @@ -655,50 +679,87 @@ namespace ntt { range_min[d] = intersect_range[d].first; range_max[d] = intersect_range[d].second; } - if constexpr (traits::has_member::value) { - auto match_fields = m_pgen.MatchFields(time); - if (dim == in::x1) { - Kokkos::parallel_for( - "MatchFields", - CreateRangePolicy(range_min, range_max), - kernel::bc::MatchBoundaries_kernel( - domain.fields.em, - match_fields, - domain.mesh.metric, - xg_edge, - ds, - tags)); - } else if (dim == in::x2) { - if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { - Kokkos::parallel_for( - "MatchFields", - CreateRangePolicy(range_min, range_max), - kernel::bc::MatchBoundaries_kernel( - domain.fields.em, - match_fields, - domain.mesh.metric, - xg_edge, - ds, - tags)); - } else { - raise::Error("Invalid dimension", HERE); + + if (dim == in::x1) { + if constexpr ( + traits::has_member::value) { + auto match_fields = m_pgen.MatchFields(time); + call_match_fields(domain.fields.em, + match_fields, + domain.mesh.metric, + xg_edge, + ds, + tags, + range_min, + range_max); + } else if constexpr ( + traits::has_member::value) { + auto match_fields = m_pgen.MatchFieldsInX1(time); + call_match_fields(domain.fields.em, + match_fields, + domain.mesh.metric, + xg_edge, + ds, + tags, + range_min, + range_max); + } + } else if (dim == in::x2) { + if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { + if constexpr ( + traits::has_member::value) { + auto match_fields = m_pgen.MatchFields(time); + call_match_fields(domain.fields.em, + match_fields, + domain.mesh.metric, + xg_edge, + ds, + tags, + range_min, + range_max); + } else if constexpr ( + traits::has_member::value) { + auto match_fields = m_pgen.MatchFieldsInX2(time); + call_match_fields(domain.fields.em, + match_fields, + domain.mesh.metric, + xg_edge, + ds, + tags, + range_min, + range_max); } - } else if (dim == in::x3) { - if constexpr (M::Dim == Dim::_3D) { - Kokkos::parallel_for( - "MatchFields", - CreateRangePolicy(range_min, range_max), - kernel::bc::MatchBoundaries_kernel( - domain.fields.em, - match_fields, - domain.mesh.metric, - xg_edge, - ds, - tags)); - } else { - raise::Error("Invalid dimension", HERE); + } else { + raise::Error("Invalid dimension", HERE); + } + } else if (dim == in::x3) { + if constexpr (M::Dim == Dim::_3D) { + if constexpr ( + traits::has_member::value) { + auto match_fields = m_pgen.MatchFields(time); + call_match_fields(domain.fields.em, + match_fields, + domain.mesh.metric, + xg_edge, + ds, + tags, + range_min, + range_max); + } else if constexpr ( + traits::has_member::value) { + auto match_fields = m_pgen.MatchFieldsInX3(time); + call_match_fields(domain.fields.em, + match_fields, + domain.mesh.metric, + xg_edge, + ds, + tags, + range_min, + range_max); } } + } else { + raise::Error("Invalid dimension", HERE); } } @@ -767,7 +828,7 @@ namespace ntt { } std::vector xi_min, xi_max; const std::vector all_dirs { in::x1, in::x2, in::x3 }; - for (unsigned short d { 0 }; d < static_cast(M::Dim); ++d) { + for (dim_t d { 0u }; d < M::Dim; ++d) { const auto dd = all_dirs[d]; if (dim == dd) { if (sign > 0) { // + direction @@ -866,7 +927,7 @@ namespace ntt { const std::vector all_dirs { in::x1, in::x2, in::x3 }; - for (unsigned short d { 0 }; d < static_cast(M::Dim); ++d) { + for (auto d { 0u }; d < M::Dim; ++d) { const auto dd = all_dirs[d]; if (dim == dd) { xi_min.push_back(0); @@ -893,6 +954,12 @@ namespace ntt { } else { raise::Error("Invalid dimension", HERE); } + std::size_t i_edge; + if (sign > 0) { + i_edge = domain.mesh.i_max(dim); + } else { + i_edge = domain.mesh.i_min(dim); + } if (dim == in::x1) { if (sign > 0) { @@ -901,6 +968,7 @@ namespace ntt { range, kernel::bc::ConductorBoundaries_kernel( domain.fields.em, + i_edge, tags)); } else { Kokkos::parallel_for( @@ -908,39 +976,52 @@ namespace ntt { range, kernel::bc::ConductorBoundaries_kernel( domain.fields.em, + i_edge, tags)); } } else if (dim == in::x2) { - if (sign > 0) { - Kokkos::parallel_for( - "ConductorFields", - range, - kernel::bc::ConductorBoundaries_kernel( - domain.fields.em, - tags)); + if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { + if (sign > 0) { + Kokkos::parallel_for( + "ConductorFields", + range, + kernel::bc::ConductorBoundaries_kernel( + domain.fields.em, + i_edge, + tags)); + } else { + Kokkos::parallel_for( + "ConductorFields", + range, + kernel::bc::ConductorBoundaries_kernel( + domain.fields.em, + i_edge, + tags)); + } } else { - Kokkos::parallel_for( - "ConductorFields", - range, - kernel::bc::ConductorBoundaries_kernel( - domain.fields.em, - tags)); + raise::Error("Invalid dimension", HERE); } } else { - if (sign > 0) { - Kokkos::parallel_for( - "ConductorFields", - range, - kernel::bc::ConductorBoundaries_kernel( - domain.fields.em, - tags)); + if constexpr (M::Dim == Dim::_3D) { + if (sign > 0) { + Kokkos::parallel_for( + "ConductorFields", + range, + kernel::bc::ConductorBoundaries_kernel( + domain.fields.em, + i_edge, + tags)); + } else { + Kokkos::parallel_for( + "ConductorFields", + range, + kernel::bc::ConductorBoundaries_kernel( + domain.fields.em, + i_edge, + tags)); + } } else { - Kokkos::parallel_for( - "ConductorFields", - range, - kernel::bc::ConductorBoundaries_kernel( - domain.fields.em, - tags)); + raise::Error("Invalid dimension", HERE); } } } @@ -954,10 +1035,10 @@ namespace ntt { */ if constexpr (traits::has_member::value) { const auto [sign, dim, xg_min, xg_max] = get_atm_extent(direction); - const auto dd = static_cast(dim); + const auto dd = static_cast(dim); boundaries_t box; boundaries_t incl_ghosts; - for (unsigned short d { 0 }; d < M::Dim; ++d) { + for (auto d { 0u }; d < M::Dim; ++d) { if (d == dd) { box.push_back({ xg_min, xg_max }); if (sign > 0) { @@ -977,7 +1058,7 @@ namespace ntt { tuple_t range_min { 0 }; tuple_t range_max { 0 }; - for (unsigned short d { 0 }; d < M::Dim; ++d) { + for (auto d { 0u }; d < M::Dim; ++d) { range_min[d] = intersect_range[d].first; range_max[d] = intersect_range[d].second; } @@ -1104,9 +1185,8 @@ namespace ntt { "grid.boundaries.atmosphere.temperature"); const auto height = m_params.template get( "grid.boundaries.atmosphere.height"); - const auto species = - m_params.template get>( - "grid.boundaries.atmosphere.species"); + const auto species = m_params.template get>( + "grid.boundaries.atmosphere.species"); const auto nmax = m_params.template get( "grid.boundaries.atmosphere.density"); @@ -1139,7 +1219,7 @@ namespace ntt { } } else { for (const auto& sp : - std::vector({ species.first, species.second })) { + std::vector { species.first, species.second }) { auto& prtl_spec = domain.species[sp - 1]; if (prtl_spec.npart() == 0) { continue; @@ -1381,6 +1461,26 @@ namespace ntt { } return range; } + + template + void call_match_fields(ndfield_t& fields, + const T& match_fields, + const M& metric, + real_t xg_edge, + real_t ds, + BCTags tags, + tuple_t& range_min, + tuple_t& range_max) { + Kokkos::parallel_for( + "MatchFields", + CreateRangePolicy(range_min, range_max), + kernel::bc::MatchBoundaries_kernel(fields, + match_fields, + metric, + xg_edge, + ds, + tags)); + } }; } // namespace ntt diff --git a/src/framework/CMakeLists.txt b/src/framework/CMakeLists.txt index 4c407fb0c..b74d11bec 100644 --- a/src/framework/CMakeLists.txt +++ b/src/framework/CMakeLists.txt @@ -12,6 +12,7 @@ # * domain/checkpoint.cpp # * containers/particles.cpp # * containers/fields.cpp +# * domain/stats.cpp # * domain/output.cpp # # @includes: @@ -23,7 +24,7 @@ # * ntt_global [required] # * ntt_metrics [required] # * ntt_kernels [required] -# * ntt_output [optional] +# * ntt_output [required] # # @uses: # @@ -40,6 +41,7 @@ set(SOURCES ${SRC_DIR}/domain/grid.cpp ${SRC_DIR}/domain/metadomain.cpp ${SRC_DIR}/domain/communications.cpp + ${SRC_DIR}/domain/stats.cpp ${SRC_DIR}/containers/particles.cpp ${SRC_DIR}/containers/fields.cpp) if(${output}) @@ -48,9 +50,8 @@ if(${output}) endif() add_library(ntt_framework ${SOURCES}) -set(libs ntt_global ntt_metrics ntt_kernels) +set(libs ntt_global ntt_metrics ntt_kernels ntt_output) if(${output}) - list(APPEND libs ntt_output) list(APPEND libs ntt_checkpoint) endif() add_dependencies(ntt_framework ${libs}) diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index 2f59a004d..b69e48de4 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -21,7 +21,7 @@ namespace ntt { template - Particles::Particles(unsigned short index, + Particles::Particles(spidx_t index, const std::string& label, float m, float ch, diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 5241822e2..8ff74be33 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -80,7 +80,7 @@ namespace ntt { * @param cooling The cooling mechanism assigned for the species * @param npld The number of payloads for the species */ - Particles(unsigned short index, + Particles(spidx_t index, const std::string& label, float m, float ch, diff --git a/src/framework/containers/species.h b/src/framework/containers/species.h index 6dd437819..ada0282e2 100644 --- a/src/framework/containers/species.h +++ b/src/framework/containers/species.h @@ -20,15 +20,15 @@ namespace ntt { class ParticleSpecies { protected: // Species index - const unsigned short m_index; + const spidx_t m_index; // Species label - const std::string m_label; + const std::string m_label; // Species mass in units of m0 - const float m_mass; + const float m_mass; // Species charge in units of q0 - const float m_charge; + const float m_charge; // Max number of allocated particles for the species - npart_t m_maxnpart; + npart_t m_maxnpart; // Pusher assigned for the species const PrtlPusher m_pusher; @@ -44,7 +44,7 @@ namespace ntt { public: ParticleSpecies() - : m_index { 0 } + : m_index { 0u } , m_label { "" } , m_mass { 0.0 } , m_charge { 0.0 } @@ -64,7 +64,7 @@ namespace ntt { * @param maxnpart The maximum number of allocated particles for the species. * @param pusher The pusher assigned for the species. */ - ParticleSpecies(unsigned short index, + ParticleSpecies(spidx_t index, const std::string& label, float m, float ch, @@ -91,7 +91,7 @@ namespace ntt { ~ParticleSpecies() = default; [[nodiscard]] - auto index() const -> unsigned short { + auto index() const -> spidx_t { return m_index; } diff --git a/src/framework/domain/checkpoint.cpp b/src/framework/domain/checkpoint.cpp index 978a1ad10..656ff57d0 100644 --- a/src/framework/domain/checkpoint.cpp +++ b/src/framework/domain/checkpoint.cpp @@ -307,7 +307,7 @@ namespace ntt { range3, domain.fields.cur0); } - for (auto s { 0u }; s < (unsigned short)(domain.species.size()); ++s) { + for (auto s { 0u }; s < domain.species.size(); ++s) { const auto [loc_npart, offset_npart] = checkpoint::ReadParticleCount(io, reader, s, ldidx, ndomains()); raise::ErrorIf(loc_npart > domain.species[s].maxnpart(), diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index d9783bdb6..e0d0cb4b2 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -33,6 +33,214 @@ namespace comm { using namespace ntt; + namespace flds { + template + void send_recv(ndarray_t& send_arr, + ndarray_t& recv_arr, + int send_rank, + int recv_rank, + ncells_t nsend, + ncells_t nrecv) { +#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) + MPI_Sendrecv(send_arr.data(), + nsend, + mpi::get_type(), + send_rank, + 0, + recv_arr.data(), + nrecv, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); +#else + auto send_arr_h = Kokkos::create_mirror_view(send_arr); + auto recv_arr_h = Kokkos::create_mirror_view(recv_arr); + Kokkos::deep_copy(send_arr_h, send_arr); + MPI_Sendrecv(send_arr_h.data(), + nsend, + mpi::get_type(), + send_rank, + 0, + recv_arr_h.data(), + nrecv, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + Kokkos::deep_copy(recv_arr, recv_arr_h); +#endif + } + + template + void send(ndarray_t& send_arr, int send_rank, ncells_t nsend) { +#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) + MPI_Send(send_arr.data(), nsend, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); +#else + auto send_arr_h = Kokkos::create_mirror_view(send_arr); + Kokkos::deep_copy(send_arr_h, send_arr); + MPI_Send(send_arr_h.data(), + nsend, + mpi::get_type(), + send_rank, + 0, + MPI_COMM_WORLD); +#endif + } + + template + void recv(ndarray_t& recv_arr, int recv_rank, ncells_t nrecv) { +#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) + MPI_Recv(recv_arr.data(), + nrecv, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); +#else + auto recv_arr_h = Kokkos::create_mirror_view(recv_arr); + MPI_Recv(recv_arr_h.data(), + nrecv, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + Kokkos::deep_copy(recv_arr, recv_arr_h); +#endif + } + + template + void communicate(ndarray_t& send_arr, + ndarray_t& recv_arr, + int send_rank, + int recv_rank, + ncells_t nsend, + ncells_t nrecv) { + if (send_rank >= 0 and recv_rank >= 0 and nsend > 0 and nrecv > 0) { + send_recv(send_arr, recv_arr, send_rank, recv_rank, nsend, nrecv); + } else if (send_rank >= 0 and nsend > 0) { + send(send_arr, send_rank, nsend); + } else if (recv_rank >= 0 and nrecv > 0) { + recv(recv_arr, recv_rank, nrecv); + } + } + + } // namespace flds + + namespace prtls { + template + void send_recv(array_t& send_arr, + array_t& recv_arr, + int send_rank, + int recv_rank, + npart_t nsend, + npart_t nrecv, + npart_t offset) { +#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) + MPI_Sendrecv(send_arr.data(), + nsend, + mpi::get_type(), + send_rank, + 0, + recv_arr.data() + offset, + nrecv, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); +#else + const auto slice = std::make_pair(offset, offset + nrecv); + + auto send_arr_h = Kokkos::create_mirror_view(send_arr); + auto recv_arr_h = Kokkos::create_mirror_view( + Kokkos::subview(recv_arr, slice)); + Kokkos::deep_copy(send_arr_h, send_arr); + MPI_Sendrecv(send_arr_h.data(), + nsend, + mpi::get_type(), + send_rank, + 0, + recv_arr_h.data(), + nrecv, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + Kokkos::deep_copy(Kokkos::subview(recv_arr, slice), recv_arr_h); +#endif + } + + template + void send(array_t& send_arr, int send_rank, npart_t nsend) { +#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) + MPI_Send(send_arr.data(), nsend, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); +#else + auto send_arr_h = Kokkos::create_mirror_view(send_arr); + Kokkos::deep_copy(send_arr_h, send_arr); + MPI_Send(send_arr_h.data(), nsend, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); +#endif + } + + template + void recv(array_t& recv_arr, int recv_rank, npart_t nrecv, npart_t offset) { +#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) + MPI_Recv(recv_arr.data() + offset, + nrecv, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); +#else + const auto slice = std::make_pair(offset, offset + nrecv); + + auto recv_arr_h = Kokkos::create_mirror_view( + Kokkos::subview(recv_arr, slice)); + MPI_Recv(recv_arr_h.data(), + nrecv, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + Kokkos::deep_copy(Kokkos::subview(recv_arr, slice), recv_arr_h); +#endif + } + + template + void communicate(array_t& send_arr, + array_t& recv_arr, + int send_rank, + int recv_rank, + npart_t nsend, + npart_t nrecv, + npart_t offset) { + if (send_rank >= 0 && recv_rank >= 0) { + raise::ErrorIf( + nrecv + offset > recv_arr.extent(0), + "recv_arr is not large enough to hold the received particles", + HERE); + send_recv(send_arr, recv_arr, send_rank, recv_rank, nsend, nrecv, offset); + } else if (send_rank >= 0) { + send(send_arr, send_rank, nsend); + } else if (recv_rank >= 0) { + raise::ErrorIf( + nrecv + offset > recv_arr.extent(0), + "recv_arr is not large enough to hold the received particles", + HERE); + recv(recv_arr, recv_rank, nrecv, offset); + } else { + raise::Error("CommunicateParticles called with negative ranks", HERE); + } + } + } // namespace prtls + template inline void CommunicateField(unsigned int idx, ndfield_t& fld, @@ -131,7 +339,7 @@ namespace comm { } else { ncells_t nsend { comps.second - comps.first }, nrecv { comps.second - comps.first }; - ndarray_t(D) + 1> send_fld, recv_fld; + ndarray_t(D) + 1> send_fld, recv_fld; for (short d { 0 }; d < (short)D; ++d) { if (send_rank >= 0) { @@ -185,38 +393,12 @@ namespace comm { } } - if (send_rank >= 0 && recv_rank >= 0) { - MPI_Sendrecv(send_fld.data(), - nsend, - mpi::get_type(), - send_rank, - 0, - recv_fld.data(), - nrecv, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } else if (send_rank >= 0) { - MPI_Send(send_fld.data(), - nsend, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - - } else if (recv_rank >= 0) { - MPI_Recv(recv_fld.data(), - nrecv, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } else { - raise::Error("CommunicateField called with negative ranks", HERE); - } + flds::communicate(D) + 1>(send_fld, + recv_fld, + send_rank, + recv_rank, + nsend, + nrecv); if (recv_rank >= 0) { @@ -398,124 +580,35 @@ namespace comm { const auto recv_offset_prtldx = current_received * NPRTLDX; const auto recv_offset_pld = current_received * NPLDS; - if ((send_rank >= 0) and (recv_rank >= 0) and (npart_send_in > 0) and - (npart_recv_in > 0)) { - raise::ErrorIf(recv_offset_int + npart_recv_in * NINTS > - recv_buff_int.extent(0), - "incorrect # of recv particles", - HERE); - MPI_Sendrecv(send_buff_int.data(), - npart_send_in * NINTS, - mpi::get_type(), - send_rank, - 0, - recv_buff_int.data() + recv_offset_int, - npart_recv_in * NINTS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buff_real.data(), - npart_send_in * NREALS, - mpi::get_type(), - send_rank, - 0, - recv_buff_real.data() + recv_offset_real, - npart_recv_in * NREALS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Sendrecv(send_buff_prtldx.data(), - npart_send_in * NPRTLDX, - mpi::get_type(), - send_rank, - 0, - recv_buff_prtldx.data() + recv_offset_prtldx, - npart_recv_in * NPRTLDX, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - if (NPLDS > 0) { - MPI_Sendrecv(send_buff_pld.data(), - npart_send_in * NPLDS, - mpi::get_type(), - send_rank, - 0, - recv_buff_pld.data() + recv_offset_pld, - npart_recv_in * NPLDS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } - } else if ((send_rank >= 0) and (npart_send_in > 0)) { - MPI_Send(send_buff_int.data(), - npart_send_in * NINTS, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - MPI_Send(send_buff_real.data(), - npart_send_in * NREALS, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - MPI_Send(send_buff_prtldx.data(), - npart_send_in * NPRTLDX, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - if (NPLDS > 0) { - MPI_Send(send_buff_pld.data(), - npart_send_in * NPLDS, - mpi::get_type(), - send_rank, - 0, - MPI_COMM_WORLD); - } - } else if ((recv_rank >= 0) and (npart_recv_in > 0)) { - raise::ErrorIf(recv_offset_int + npart_recv_in * NINTS > - recv_buff_int.extent(0), - "incorrect # of recv particles", - HERE); - MPI_Recv(recv_buff_int.data() + recv_offset_int, - npart_recv_in * NINTS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Recv(recv_buff_real.data() + recv_offset_real, - npart_recv_in * NREALS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - MPI_Recv(recv_buff_prtldx.data() + recv_offset_prtldx, - npart_recv_in * NPRTLDX, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - if (NPLDS > 0) { - MPI_Recv(recv_buff_pld.data() + recv_offset_pld, - npart_recv_in * NPLDS, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } + prtls::communicate(send_buff_int, + recv_buff_int, + send_rank, + recv_rank, + npart_send_in * NINTS, + npart_recv_in * NINTS, + recv_offset_int); + prtls::communicate(send_buff_real, + recv_buff_real, + send_rank, + recv_rank, + npart_send_in * NREALS, + npart_recv_in * NREALS, + recv_offset_real); + prtls::communicate(send_buff_prtldx, + recv_buff_prtldx, + send_rank, + recv_rank, + npart_send_in * NPRTLDX, + npart_recv_in * NPRTLDX, + recv_offset_prtldx); + if (NPLDS > 0) { + prtls::communicate(send_buff_pld, + recv_buff_pld, + send_rank, + recv_rank, + npart_send_in * NPLDS, + npart_recv_in * NPLDS, + recv_offset_pld); } current_received += npart_recv_in; iteration++; diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index cf7e04974..841c9a7da 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -36,10 +36,10 @@ namespace ntt { using comm_params_t = std::pair>; template - auto GetSendRecvRanks(Metadomain* metadomain, - Domain& domain, - dir::direction_t direction) - -> std::pair { + auto GetSendRecvRanks( + Metadomain* metadomain, + Domain& domain, + dir::direction_t direction) -> std::pair { Domain* send_to_nghbr_ptr = nullptr; Domain* recv_from_nghbr_ptr = nullptr; // set pointers to the correct send/recv domains @@ -119,11 +119,11 @@ namespace ntt { } template - auto GetSendRecvParams(Metadomain* metadomain, - Domain& domain, - dir::direction_t direction, - bool synchronize) - -> std::pair { + auto GetSendRecvParams( + Metadomain* metadomain, + Domain& domain, + dir::direction_t direction, + bool synchronize) -> std::pair { const auto [send_indrank, recv_indrank] = GetSendRecvRanks(metadomain, domain, direction); const auto [send_ind, send_rank] = send_indrank; diff --git a/src/framework/domain/domain.h b/src/framework/domain/domain.h index 7966cdb54..c26f5995f 100644 --- a/src/framework/domain/domain.h +++ b/src/framework/domain/domain.h @@ -146,7 +146,8 @@ namespace ntt { } /* setters -------------------------------------------------------------- */ - auto set_neighbor_idx(const dir::direction_t& dir, unsigned int idx) -> void { + auto set_neighbor_idx(const dir::direction_t& dir, unsigned int idx) + -> void { m_neighbor_idx[dir] = idx; } @@ -164,8 +165,8 @@ namespace ntt { }; template - inline auto operator<<(std::ostream& os, - const Domain& domain) -> std::ostream& { + inline auto operator<<(std::ostream& os, const Domain& domain) + -> std::ostream& { os << "Domain #" << domain.index(); #if defined(MPI_ENABLED) os << " [MPI rank: " << domain.mpi_rank() << "]"; @@ -184,23 +185,16 @@ namespace ntt { } os << "\n"; os << std::setw(19) << std::left << " physical extent: "; - for (auto dim = 0; dim < M::Dim; ++dim) { + for (auto dim { 0u }; dim < M::Dim; ++dim) { os << std::setw(15) << std::left << fmt::format("{%.2f; %.2f}", - domain.mesh.extent(dim).first, - domain.mesh.extent(dim).second); + domain.mesh.extent(static_cast(dim)).first, + domain.mesh.extent(static_cast(dim)).second); } os << "\n neighbors:\n"; for (auto& direction : dir::Directions::all) { - auto neighbor = domain.neighbor_in(direction); - os << " " << direction; - if (neighbor != nullptr) { - os << " -> #" << neighbor->index() << "\n"; - } else { - os << " -> " - << "N/A" - << "\n"; - } + auto neighbor_idx = domain.neighbor_idx_in(direction); + os << " " << direction << " -> #" << neighbor_idx << "\n"; } os << " field boundaries:\n"; for (auto& direction : dir::Directions::orth) { diff --git a/src/framework/domain/grid.cpp b/src/framework/domain/grid.cpp index c022184b1..ddbfc38f0 100644 --- a/src/framework/domain/grid.cpp +++ b/src/framework/domain/grid.cpp @@ -51,7 +51,7 @@ namespace ntt { template auto Grid::rangeCells(const box_region_t& region) const -> range_t { tuple_t imin, imax; - for (unsigned short i = 0; i < (unsigned short)D; i++) { + for (auto i { 0u }; i < D; i++) { switch (region[i]) { case CellLayer::allLayer: imin[i] = 0; @@ -86,10 +86,10 @@ namespace ntt { } template - auto Grid::rangeCellsOnHost( - const box_region_t& region) const -> range_h_t { + auto Grid::rangeCellsOnHost(const box_region_t& region) const + -> range_h_t { tuple_t imin, imax; - for (unsigned short i = 0; i < (unsigned short)D; i++) { + for (auto i { 0u }; i < D; i++) { switch (region[i]) { case CellLayer::allLayer: imin[i] = 0; @@ -163,10 +163,10 @@ namespace ntt { } template - auto Grid::rangeCells( - const tuple_t, D>& ranges) const -> range_t { + auto Grid::rangeCells(const tuple_t, D>& ranges) const + -> range_t { tuple_t imin, imax; - for (unsigned short i = 0; i < (unsigned short)D; i++) { + for (auto i { 0u }; i < D; i++) { raise::ErrorIf((ranges[i][0] < -(int)N_GHOSTS) || (ranges[i][1] > (int)N_GHOSTS), "Invalid cell layer picked", diff --git a/src/framework/domain/mesh.h b/src/framework/domain/mesh.h index 98fe68895..e4f2cba6d 100644 --- a/src/framework/domain/mesh.h +++ b/src/framework/domain/mesh.h @@ -74,7 +74,7 @@ namespace ntt { * @note pass Range::All to select the entire dimension */ [[nodiscard]] - auto Intersection(boundaries_t box) -> boundaries_t { + auto Intersection(boundaries_t box) const -> boundaries_t { raise::ErrorIf(box.size() != M::Dim, "Invalid box dimension", HERE); boundaries_t intersection; auto d = 0; @@ -109,7 +109,7 @@ namespace ntt { * @note pass Range::All to select the entire dimension */ [[nodiscard]] - auto Intersects(boundaries_t box) -> bool { + auto Intersects(boundaries_t box) const -> bool { raise::ErrorIf(box.size() != M::Dim, "Invalid box dimension", HERE); const auto intersection = Intersection(box); for (const auto& i : intersection) { @@ -131,8 +131,8 @@ namespace ntt { * @note indices are already shifted by N_GHOSTS (i.e. they start at N_GHOSTS not 0) */ [[nodiscard]] - auto ExtentToRange(boundaries_t box, - boundaries_t incl_ghosts) -> boundaries_t { + auto ExtentToRange(boundaries_t box, boundaries_t incl_ghosts) const + -> boundaries_t { raise::ErrorIf(box.size() != M::Dim, "Invalid box dimension", HERE); raise::ErrorIf(incl_ghosts.size() != M::Dim, "Invalid incl_ghosts dimension", diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index 8952d417d..4b9057e23 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -140,7 +140,7 @@ namespace ntt { boundaries_t l_extent; coord_t low_corner_Code { ZERO }, up_corner_Code { ZERO }; coord_t low_corner_Phys { ZERO }, up_corner_Phys { ZERO }; - for (unsigned short d { 0 }; d < (unsigned short)D; ++d) { + for (auto d { 0u }; d < D; d++) { low_corner_Code[d] = (real_t)l_offset_ncells[d]; up_corner_Code[d] = (real_t)(l_offset_ncells[d] + l_ncells[d]); } @@ -403,6 +403,142 @@ namespace ntt { #endif } + template + void Metadomain::setFldsBC(const bc_in& dir, const FldsBC& new_bcs) { + if (dir == bc_in::Mx1) { + if constexpr (M::Dim == Dim::_1D) { + g_mesh.set_flds_bc({ -1 }, new_bcs); + } else if constexpr (M::Dim == Dim::_2D) { + g_mesh.set_flds_bc({ -1, 0 }, new_bcs); + } else if constexpr (M::Dim == Dim::_3D) { + g_mesh.set_flds_bc({ -1, 0, 0 }, new_bcs); + } else { + raise::Error("Invalid dimension", HERE); + } + } else if (dir == bc_in::Px1) { + if constexpr (M::Dim == Dim::_1D) { + g_mesh.set_flds_bc({ +1 }, new_bcs); + } else if constexpr (M::Dim == Dim::_2D) { + g_mesh.set_flds_bc({ +1, 0 }, new_bcs); + } else if constexpr (M::Dim == Dim::_3D) { + g_mesh.set_flds_bc({ +1, 0, 0 }, new_bcs); + } else { + raise::Error("Invalid dimension", HERE); + } + } else if (dir == bc_in::Mx2) { + if constexpr (M::Dim == Dim::_1D) { + raise::Error("Cannot set -x2 BCs for 1D", HERE); + } else if constexpr (M::Dim == Dim::_2D) { + g_mesh.set_flds_bc({ -1, 0 }, new_bcs); + } else if constexpr (M::Dim == Dim::_3D) { + g_mesh.set_flds_bc({ -1, 0, 0 }, new_bcs); + } else { + raise::Error("Invalid dimension", HERE); + } + } else if (dir == bc_in::Px2) { + if constexpr (M::Dim == Dim::_1D) { + raise::Error("Cannot set +x2 BCs for 1D", HERE); + } else if constexpr (M::Dim == Dim::_2D) { + g_mesh.set_flds_bc({ +1, 0 }, new_bcs); + } else if constexpr (M::Dim == Dim::_3D) { + g_mesh.set_flds_bc({ +1, 0, 0 }, new_bcs); + } else { + raise::Error("Invalid dimension", HERE); + } + } else if (dir == bc_in::Mx3) { + if constexpr (M::Dim == Dim::_1D) { + raise::Error("Cannot set -x3 BCs for 1D", HERE); + } else if constexpr (M::Dim == Dim::_2D) { + raise::Error("Cannot set -x3 BCs for 2D", HERE); + } else if constexpr (M::Dim == Dim::_3D) { + g_mesh.set_flds_bc({ 0, 0, -1 }, new_bcs); + } else { + raise::Error("Invalid dimension", HERE); + } + } else if (dir == bc_in::Px3) { + if constexpr (M::Dim == Dim::_1D) { + raise::Error("Cannot set +x3 BCs for 1D", HERE); + } else if constexpr (M::Dim == Dim::_2D) { + raise::Error("Cannot set +x3 BCs for 2D", HERE); + } else if constexpr (M::Dim == Dim::_3D) { + g_mesh.set_flds_bc({ 0, 0, +1 }, new_bcs); + } else { + raise::Error("Invalid dimension", HERE); + } + } else { + raise::Error("Invalid direction", HERE); + } + redefineBoundaries(); + } + + template + void Metadomain::setPrtlBC(const bc_in& dir, const PrtlBC& new_bcs) { + if (dir == bc_in::Mx1) { + if constexpr (M::Dim == Dim::_1D) { + g_mesh.set_prtl_bc({ -1 }, new_bcs); + } else if constexpr (M::Dim == Dim::_2D) { + g_mesh.set_prtl_bc({ -1, 0 }, new_bcs); + } else if constexpr (M::Dim == Dim::_3D) { + g_mesh.set_prtl_bc({ -1, 0, 0 }, new_bcs); + } else { + raise::Error("Invalid dimension", HERE); + } + } else if (dir == bc_in::Px1) { + if constexpr (M::Dim == Dim::_1D) { + g_mesh.set_prtl_bc({ +1 }, new_bcs); + } else if constexpr (M::Dim == Dim::_2D) { + g_mesh.set_prtl_bc({ +1, 0 }, new_bcs); + } else if constexpr (M::Dim == Dim::_3D) { + g_mesh.set_prtl_bc({ +1, 0, 0 }, new_bcs); + } else { + raise::Error("Invalid dimension", HERE); + } + } else if (dir == bc_in::Mx2) { + if constexpr (M::Dim == Dim::_1D) { + raise::Error("Cannot set -x2 BCs for 1D", HERE); + } else if constexpr (M::Dim == Dim::_2D) { + g_mesh.set_prtl_bc({ -1, 0 }, new_bcs); + } else if constexpr (M::Dim == Dim::_3D) { + g_mesh.set_prtl_bc({ -1, 0, 0 }, new_bcs); + } else { + raise::Error("Invalid dimension", HERE); + } + } else if (dir == bc_in::Px2) { + if constexpr (M::Dim == Dim::_1D) { + raise::Error("Cannot set +x2 BCs for 1D", HERE); + } else if constexpr (M::Dim == Dim::_2D) { + g_mesh.set_prtl_bc({ +1, 0 }, new_bcs); + } else if constexpr (M::Dim == Dim::_3D) { + g_mesh.set_prtl_bc({ +1, 0, 0 }, new_bcs); + } else { + raise::Error("Invalid dimension", HERE); + } + } else if (dir == bc_in::Mx3) { + if constexpr (M::Dim == Dim::_1D) { + raise::Error("Cannot set -x3 BCs for 1D", HERE); + } else if constexpr (M::Dim == Dim::_2D) { + raise::Error("Cannot set -x3 BCs for 2D", HERE); + } else if constexpr (M::Dim == Dim::_3D) { + g_mesh.set_prtl_bc({ 0, 0, -1 }, new_bcs); + } else { + raise::Error("Invalid dimension", HERE); + } + } else if (dir == bc_in::Px3) { + if constexpr (M::Dim == Dim::_1D) { + raise::Error("Cannot set +x3 BCs for 1D", HERE); + } else if constexpr (M::Dim == Dim::_2D) { + raise::Error("Cannot set +x3 BCs for 2D", HERE); + } else if constexpr (M::Dim == Dim::_3D) { + g_mesh.set_prtl_bc({ 0, 0, +1 }, new_bcs); + } else { + raise::Error("Invalid dimension", HERE); + } + } else { + raise::Error("Invalid direction", HERE); + } + redefineBoundaries(); + } + template struct Metadomain>; template struct Metadomain>; template struct Metadomain>; diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 80f546664..1fb6ce007 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -25,6 +25,7 @@ #include "framework/domain/domain.h" #include "framework/domain/mesh.h" #include "framework/parameters.h" +#include "output/stats.h" #if defined(MPI_ENABLED) #include @@ -116,7 +117,7 @@ namespace ntt { ~Metadomain() = default; #if defined(OUTPUT_ENABLED) - void InitWriter(adios2::ADIOS*, const SimulationParams&, bool is_resuming); + void InitWriter(adios2::ADIOS*, const SimulationParams&, bool); auto Write(const SimulationParams&, timestep_t, timestep_t, @@ -136,7 +137,13 @@ namespace ntt { void ContinueFromCheckpoint(adios2::ADIOS*, const SimulationParams&); #endif + void InitStatsWriter(const SimulationParams&, bool); + auto WriteStats(const SimulationParams&, timestep_t, timestep_t, simtime_t, simtime_t) + -> bool; + /* setters -------------------------------------------------------------- */ + void setFldsBC(const bc_in&, const FldsBC&); + void setPrtlBC(const bc_in&, const PrtlBC&); /* getters -------------------------------------------------------------- */ [[nodiscard]] @@ -242,6 +249,8 @@ namespace ntt { const std::map g_metric_params; const std::vector g_species_params; + stats::Writer g_stats_writer; + #if defined(OUTPUT_ENABLED) out::Writer g_writer; checkpoint::Writer g_checkpoint_writer; diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 071618860..6903c7194 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -18,6 +18,7 @@ #include "framework/domain/metadomain.h" #include "framework/parameters.h" +#include "kernels/divergences.hpp" #include "kernels/fields_to_phys.hpp" #include "kernels/particle_moments.hpp" #include "kernels/prtls_to_phys.hpp" @@ -70,6 +71,7 @@ namespace ntt { g_writer.defineMeshLayout(glob_shape_with_ghosts, off_ncells_with_ghosts, loc_shape_with_ghosts, + { local_domain->index(), ndomains() }, params.template get>( "output.fields.downsampling"), incl_ghosts, @@ -84,12 +86,12 @@ namespace ntt { custom_fields_to_write.begin(), custom_fields_to_write.end(), std::back_inserter(all_fields_to_write)); - const auto species_to_write = params.template get>( + const auto species_to_write = params.template get>( "output.particles.species"); g_writer.defineFieldOutputs(S, all_fields_to_write); g_writer.defineParticleOutputs(M::PrtlDim, species_to_write); // spectra write all particle species - std::vector spectra_species {}; + std::vector spectra_species {}; for (const auto& sp : species_params()) { spectra_species.push_back(sp.index()); } @@ -112,11 +114,11 @@ namespace ntt { void ComputeMoments(const SimulationParams& params, const Mesh& mesh, const std::vector>& prtl_species, - const std::vector& species, + const std::vector& species, const std::vector& components, ndfield_t& buffer, - unsigned short buff_idx) { - std::vector specs = species; + idx_t buff_idx) { + std::vector specs = species; if (specs.size() == 0) { // if no species specified, take all massive species for (auto& sp : prtl_species) { @@ -164,7 +166,7 @@ namespace ntt { ndfield_t& fld_to, const range_tuple_t& from, const range_tuple_t& to) { - for (unsigned short d = 0; d < D; ++d) { + for (auto d { 0u }; d < D; ++d) { raise::ErrorIf(fld_from.extent(d) != fld_to.extent(d), "Fields have different sizes " + std::to_string(fld_from.extent(d)) + @@ -213,7 +215,9 @@ namespace ntt { g_writer.shouldWrite("spectra", finished_step, finished_time); - if (not(write_fields or write_particles or write_spectra)) { + const auto extension = params.template get("output.format"); + if (not(write_fields or write_particles or write_spectra) and + extension != "disabled") { return false; } auto local_domain = subdomain_ptr(l_subdomain_indices()[0]); @@ -227,7 +231,18 @@ namespace ntt { const auto dwn = params.template get>( "output.fields.downsampling"); - for (unsigned short dim = 0; dim < M::Dim; ++dim) { + auto off_ncells_with_ghosts = local_domain->offset_ncells(); + auto loc_shape_with_ghosts = local_domain->mesh.n_active(); + { // compute positions/sizes of meshblocks in cells in all dimensions + const auto off_ndomains = local_domain->offset_ndomains(); + if (incl_ghosts) { + for (auto d { 0 }; d <= M::Dim; ++d) { + off_ncells_with_ghosts[d] += 2 * N_GHOSTS * off_ndomains[d]; + loc_shape_with_ghosts[d] += 2 * N_GHOSTS; + } + } + } + for (auto dim { 0u }; dim < M::Dim; ++dim) { const auto l_size = local_domain->mesh.n_active()[dim]; const auto l_offset = local_domain->offset_ncells()[dim]; const auto g_size = mesh().n_active()[dim]; @@ -275,7 +290,11 @@ namespace ntt { xe(offset + i_dwn + 1) = x_Ph[dim]; } }); - g_writer.writeMesh(dim, xc, xe); + g_writer.writeMesh( + dim, + xc, + xe, + { off_ncells_with_ghosts[dim], loc_shape_with_ghosts[dim] }); } const auto output_asis = params.template get("output.debug.as_is"); // !TODO: this can probably be optimized to dump things at once @@ -289,7 +308,7 @@ namespace ntt { if (fld.is_moment()) { // output a particle distribution moment (single component) // this includes T, Rho, Charge, N, Nppc - const auto c = static_cast(addresses.back()); + const auto c = static_cast(addresses.back()); if (fld.id() == FldsID::T) { raise::ErrorIf(fld.comp.size() != 1, "Wrong # of components requested for T output", @@ -348,6 +367,16 @@ namespace ntt { } else { raise::Error("Wrong moment requested for output", HERE); } + } else if (fld.is_divergence()) { + // @TODO: is this correct for GR too? not em0? + const auto c = static_cast(addresses.back()); + Kokkos::parallel_for( + "ComputeDivergence", + local_domain->mesh.rangeActiveCells(), + kernel::ComputeDivergence_kernel(local_domain->mesh.metric, + local_domain->fields.em, + local_domain->fields.bckp, + c)); } else if (fld.is_custom()) { if (CustomFieldOutput) { CustomFieldOutput(fld.name().substr(1), @@ -373,7 +402,7 @@ namespace ntt { } if (fld.is_moment()) { for (auto i = 0; i < 3; ++i) { - const auto c = static_cast(addresses[i]); + const auto c = static_cast(addresses[i]); if (fld.id() == FldsID::T) { raise::ErrorIf(fld.comp[i].size() != 2, "Wrong # of components requested for moment", @@ -481,8 +510,8 @@ namespace ntt { if (not output_asis) { // copy fields from bckp(:, 0, 1, 2) -> bckp(:, 3, 4, 5) // converting to proper basis and properly interpolating - list_t comp_from = { 0, 1, 2 }; - list_t comp_to = { 3, 4, 5 }; + list_t comp_from = { 0, 1, 2 }; + list_t comp_to = { 3, 4, 5 }; DeepCopyFields(local_domain->fields.bckp, local_domain->fields.bckp, { 0, 3 }, @@ -505,7 +534,7 @@ namespace ntt { for (auto i = 0; i < 6; ++i) { names.push_back(fld.name(i)); addresses.push_back(i); - const auto c = static_cast(addresses.back()); + const auto c = static_cast(addresses.back()); raise::ErrorIf(fld.comp[i].size() != 2, "Wrong # of components requested for moment", HERE); diff --git a/src/framework/domain/stats.cpp b/src/framework/domain/stats.cpp new file mode 100644 index 000000000..60acd64a9 --- /dev/null +++ b/src/framework/domain/stats.cpp @@ -0,0 +1,204 @@ +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "utils/error.h" +#include "utils/log.h" +#include "utils/numeric.h" + +#include "metrics/kerr_schild.h" +#include "metrics/kerr_schild_0.h" +#include "metrics/minkowski.h" +#include "metrics/qkerr_schild.h" +#include "metrics/qspherical.h" +#include "metrics/spherical.h" + +#include "framework/containers/particles.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" +#include "framework/parameters.h" + +#include +#include +#include + +#include + +namespace ntt { + + template + void Metadomain::InitStatsWriter(const SimulationParams& params, + bool is_resuming) { + raise::ErrorIf( + l_subdomain_indices().size() != 1, + "StatsWriter for now is only supported for one subdomain per rank", + HERE); + auto local_domain = subdomain_ptr(l_subdomain_indices()[0]); + raise::ErrorIf(local_domain->is_placeholder(), + "local_domain is a placeholder", + HERE); + const auto filename = params.template get("simulation.name") + + "_stats.csv"; + const auto enable_stats = params.template get("output.stats.enable"); + if (enable_stats and (not is_resuming)) { + CallOnce( + [](auto& filename) { + if (std::filesystem::exists(filename)) { + std::filesystem::remove(filename); + } + }, + filename); + } + const auto stats_to_write = params.template get>( + "output.stats.quantities"); + g_stats_writer.init( + params.template get("output.stats.interval"), + params.template get("output.stats.interval_time")); + g_stats_writer.defineStatsFilename(filename); + g_stats_writer.defineStatsOutputs(stats_to_write); + + if (not std::filesystem::exists(filename)) { + g_stats_writer.writeHeader(); + } + } + + template + auto ComputeMoments(const SimulationParams& params, + const Mesh& mesh, + const std::vector>& prtl_species, + const std::vector& species, + const std::vector& components) -> T { + std::vector specs = species; + if (specs.size() == 0) { + // if no species specified, take all massive species + for (auto& sp : prtl_species) { + if (sp.mass() > 0) { + specs.push_back(sp.index()); + } + } + } + for (const auto& sp : specs) { + raise::ErrorIf((sp > prtl_species.size()) or (sp == 0), + "Invalid species index " + std::to_string(sp), + HERE); + } + // some parameters + const auto use_weights = params.template get("particles.use_weights"); + const auto inv_n0 = ONE / params.template get("scales.n0"); + + T buffer = static_cast(0); + for (const auto& sp : specs) { + auto& prtl_spec = prtl_species[sp - 1]; + // Kokkos::parallel_reduce( + // "ComputeMoments", + // prtl_spec.rangeActiveParticles(), + // // clang-format off + // kernel::ReducedParticleMoments_kernel(components, + // prtl_spec.i1, prtl_spec.i2, prtl_spec.i3, + // prtl_spec.dx1, prtl_spec.dx2, prtl_spec.dx3, + // prtl_spec.ux1, prtl_spec.ux2, prtl_spec.ux3, + // prtl_spec.phi, prtl_spec.weight, prtl_spec.tag, + // prtl_spec.mass(), prtl_spec.charge(), + // use_weights, mesh.metric, mesh.flds_bc(), inv_n0), + // // clang-format on + // buffer); + } + return buffer; + } + + template + auto ComputeFields(Domain* domain, + const std::vector& components) -> real_t { + auto buffer { ZERO }; + // Kokkos::parallel_reduce( + // "ComputeMoments", + // prtl_spec.rangeActiveParticles(), + // kernel::ReducedFields_kernel(components, + // domain->fields.em, + // domain->fields.cur, + // domain->mesh.metric), + // buffer); + return buffer; + } + + template + auto Metadomain::WriteStats(const SimulationParams& params, + timestep_t current_step, + timestep_t finished_step, + simtime_t current_time, + simtime_t finished_time) -> bool { + if (not(params.template get("output.stats.enable") and + g_stats_writer.shouldWrite(finished_step, finished_time))) { + return false; + } + auto local_domain = subdomain_ptr(l_subdomain_indices()[0]); + logger::Checkpoint("Writing stats", HERE); + g_stats_writer.write(current_step); + g_stats_writer.write(current_time); + for (const auto& stat : g_stats_writer.statsWriters()) { + if (stat.id() == StatsID::N) { + g_stats_writer.write( + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + stat.species, + {})); + } else if (stat.id() == StatsID::Npart) { + g_stats_writer.write( + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + stat.species, + {})); + } else if (stat.id() == StatsID::Rho) { + g_stats_writer.write( + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + stat.species, + {})); + } else if (stat.id() == StatsID::Charge) { + g_stats_writer.write( + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + stat.species, + {})); + } else if (stat.id() == StatsID::T) { + for (const auto& comp : stat.comp) { + g_stats_writer.write( + ComputeMoments(params, + local_domain->mesh, + local_domain->species, + stat.species, + comp)); + } + } else if (stat.id() == StatsID::JdotE) { + g_stats_writer.write(ComputeFields(local_domain, {})); + } else if (stat.id() == StatsID::E2) { + g_stats_writer.write(ComputeFields(local_domain, {})); + } else if (stat.id() == StatsID::B2) { + g_stats_writer.write(ComputeFields(local_domain, {})); + } else if (stat.id() == StatsID::ExB) { + for (const auto& comp : stat.comp) { + g_stats_writer.write( + ComputeFields(local_domain, comp)); + } + } else { + raise::Error("Unrecognized stats ID " + stat.name(), HERE); + } + } + g_stats_writer.endWriting(); + return true; + } + + template struct Metadomain>; + template struct Metadomain>; + template struct Metadomain>; + template struct Metadomain>; + template struct Metadomain>; + template struct Metadomain>; + template struct Metadomain>; + template struct Metadomain>; + +} // namespace ntt diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 556d9f547..168640e1f 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -31,14 +31,14 @@ namespace ntt { template - auto get_dx0_V0(const std::vector& resolution, - const boundaries_t& extent, - const std::map& params) - -> std::pair { + auto get_dx0_V0( + const std::vector& resolution, + const boundaries_t& extent, + const std::map& params) -> std::pair { const auto metric = M(resolution, extent, params); const auto dx0 = metric.dxMin(); coord_t x_corner { ZERO }; - for (unsigned short d { 0 }; d < (unsigned short)(M::Dim); ++d) { + for (auto d { 0u }; d < M::Dim; ++d) { x_corner[d] = HALF; } const auto V0 = metric.sqrt_det_h(x_corner); @@ -184,7 +184,7 @@ namespace ntt { toml::array {}); set("particles.nspec", species_tab.size()); - unsigned short idx = 1; + spidx_t idx = 1; for (const auto& sp : species_tab) { const auto label = toml::find_or(sp, "label", @@ -265,7 +265,7 @@ namespace ntt { } raise::ErrorIf(extent.size() != dim, "invalid inferred `grid.extent`", HERE); boundaries_t extent_pairwise; - for (unsigned short d = 0; d < (unsigned short)dim; ++d) { + for (auto d { 0u }; d < (dim_t)dim; ++d) { raise::ErrorIf(extent[d].size() != 2, fmt::format("invalid inferred `grid.extent[%d]`", d), HERE); @@ -353,7 +353,6 @@ namespace ntt { for (const auto& bc : bcs) { if (fmt::toLower(bc) == "match") { promiseToDefine("grid.boundaries.match.ds"); - promiseToDefine("grid.boundaries.match.coeff"); } if (fmt::toLower(bc) == "atmosphere") { raise::ErrorIf(atm_defined, @@ -458,15 +457,18 @@ namespace ntt { set("output.separate_files", toml::find_or(toml_data, "output", "separate_files", true)); + promiseToDefine("output.fields.enable"); promiseToDefine("output.fields.interval"); promiseToDefine("output.fields.interval_time"); - promiseToDefine("output.fields.enable"); + promiseToDefine("output.particles.enable"); promiseToDefine("output.particles.interval"); promiseToDefine("output.particles.interval_time"); - promiseToDefine("output.particles.enable"); + promiseToDefine("output.spectra.enable"); promiseToDefine("output.spectra.interval"); promiseToDefine("output.spectra.interval_time"); - promiseToDefine("output.spectra.enable"); + promiseToDefine("output.stats.enable"); + promiseToDefine("output.stats.interval"); + promiseToDefine("output.stats.interval_time"); const auto flds_out = toml::find_or(toml_data, "output", @@ -504,10 +506,10 @@ namespace ntt { set("output.fields.downsampling", field_dwn); // particles - auto all_specs = std::vector {}; + auto all_specs = std::vector {}; const auto nspec = get("particles.nspec"); for (auto i = 0u; i < nspec; ++i) { - all_specs.push_back(static_cast(i + 1)); + all_specs.push_back(static_cast(i + 1)); } const auto prtl_out = toml::find_or(toml_data, "output", @@ -540,8 +542,16 @@ namespace ntt { "n_bins", defaults::output::spec_nbins)); + // stats + set("output.stats.quantities", + toml::find_or(toml_data, + "output", + "stats", + "quantities", + defaults::output::stats_quantities)); + // intervals - for (const auto& type : { "fields", "particles", "spectra" }) { + for (const auto& type : { "fields", "particles", "spectra", "stats" }) { const auto q_int = toml::find_or(toml_data, "output", std::string(type), @@ -554,7 +564,7 @@ namespace ntt { -1.0); set("output." + std::string(type) + ".enable", toml::find_or(toml_data, "output", std::string(type), "enable", true)); - if (q_int == 0 && q_int_time == -1.0) { + if ((q_int == 0) and (q_int_time == -1.0)) { set("output." + std::string(type) + ".interval", get("output.interval")); set("output." + std::string(type) + ".interval_time", @@ -615,7 +625,7 @@ namespace ntt { raise::ErrorIf(prtl_bc.size() != (std::size_t)dim, "invalid `grid.boundaries.particles`", HERE); - for (unsigned short d = 0; d < (unsigned short)dim; ++d) { + for (auto d { 0u }; d < (dim_t)dim; ++d) { flds_bc_enum.push_back({}); prtl_bc_enum.push_back({}); const auto fbc = flds_bc[d]; @@ -717,7 +727,7 @@ namespace ntt { HERE); boundaries_t flds_bc_pairwise; boundaries_t prtl_bc_pairwise; - for (unsigned short d = 0; d < (unsigned short)dim; ++d) { + for (auto d { 0u }; d < (dim_t)dim; ++d) { raise::ErrorIf( flds_bc_enum[d].size() != 2, fmt::format("invalid inferred `grid.boundaries.fields[%d]`", d), @@ -738,30 +748,56 @@ namespace ntt { for (const auto& e : extent_pairwise) { min_extent = std::min(min_extent, e.second - e.first); } - set("grid.boundaries.match.ds", - toml::find_or(toml_data, - "grid", - "boundaries", - "match", - "ds", - min_extent * defaults::bc::match::ds_frac)); + const auto default_ds = min_extent * defaults::bc::match::ds_frac; + boundaries_t ds_array; + try { + auto ds = toml::find(toml_data, "grid", "boundaries", "match", "ds"); + for (auto d = 0u; d < dim; ++d) { + ds_array.push_back({ ds, ds }); + } + } catch (...) { + try { + const auto ds = toml::find>>( + toml_data, + "grid", + "boundaries", + "match", + "ds"); + raise::ErrorIf(ds.size() != dim, + "invalid # in `grid.boundaries.match.ds`", + HERE); + for (auto d = 0u; d < dim; ++d) { + if (ds[d].size() == 1) { + ds_array.push_back({ ds[d][0], ds[d][0] }); + } else if (ds[d].size() == 2) { + ds_array.push_back({ ds[d][0], ds[d][1] }); + } else if (ds[d].size() == 0) { + ds_array.push_back({}); + } else { + raise::Error("invalid `grid.boundaries.match.ds`", HERE); + } + } + } catch (...) { + for (auto d = 0u; d < dim; ++d) { + ds_array.push_back({ default_ds, default_ds }); + } + } + } + set("grid.boundaries.match.ds", ds_array); } else { auto r_extent = extent_pairwise[0].second - extent_pairwise[0].first; - set("grid.boundaries.match.ds", - toml::find_or(toml_data, - "grid", - "boundaries", - "match", - "ds", - r_extent * defaults::bc::match::ds_frac)); + const auto ds = toml::find_or( + toml_data, + "grid", + "boundaries", + "match", + "ds", + r_extent * defaults::bc::match::ds_frac); + boundaries_t ds_array { + { ds, ds } + }; + set("grid.boundaries.match.ds", ds_array); } - set("grid.boundaries.match.coeff", - toml::find_or(toml_data, - "grid", - "boundaries", - "match", - "coeff", - defaults::bc::match::coeff)); } if (isPromised("grid.boundaries.absorb.ds")) { @@ -807,7 +843,7 @@ namespace ntt { toml::find_or(toml_data, "grid", "boundaries", "atmosphere", "ds", ZERO)); set("grid.boundaries.atmosphere.height", atm_h); set("grid.boundaries.atmosphere.g", atm_T / atm_h); - const auto atm_species = toml::find>( + const auto atm_species = toml::find>( toml_data, "grid", "boundaries", diff --git a/src/framework/tests/comm_mpi.cpp b/src/framework/tests/comm_mpi.cpp index 5d2c8d4f0..487976f73 100644 --- a/src/framework/tests/comm_mpi.cpp +++ b/src/framework/tests/comm_mpi.cpp @@ -5,6 +5,11 @@ #include "arch/directions.h" #include "arch/kokkos_aliases.h" +#include "utils/error.h" +#include "utils/numeric.h" + +#include +#include #include #include @@ -13,49 +18,227 @@ using namespace ntt; auto main(int argc, char* argv[]) -> int { Kokkos::initialize(argc, argv); + MPI_Init(&argc, &argv); try { - const ncells_t nx1 = 15, nx2 = 15; - ndfield_t fld_b1 { "fld", nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }; - ndfield_t fld_b2 { "fld", nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }; + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + const ncells_t nx1 = 11, nx2 = 15; + ndfield_t fld { "fld", nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }; Kokkos::parallel_for( "Fill", CreateRangePolicy({ 0, 0 }, { nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }), Lambda(index_t i1, index_t i2) { - if ((i1 >= 2 * N_GHOSTS) and (i1 < nx1) and (i2 >= 2 * N_GHOSTS) and - (i2 < nx2)) { - fld_b1(i1, i2, 0) = 4.0; - fld_b1(i1, i2, 1) = 12.0; - fld_b1(i1, i2, 2) = 20.0; - fld_b2(i1, i2, 0) = 4.0; - fld_b2(i1, i2, 1) = 12.0; - fld_b2(i1, i2, 2) = 20.0; - } else if ( - ((i1 < 2 * N_GHOSTS or i1 >= nx1) and (i2 >= 2 * N_GHOSTS and i2 < nx2)) or - ((i2 < 2 * N_GHOSTS or i2 >= nx2) and (i1 >= 2 * N_GHOSTS and i1 < nx1))) { - fld_b1(i1, i2, 0) = 2.0; - fld_b1(i1, i2, 1) = 6.0; - fld_b1(i1, i2, 2) = 10.0; - fld_b2(i1, i2, 0) = 2.0; - fld_b2(i1, i2, 1) = 6.0; - fld_b2(i1, i2, 2) = 10.0; - } else { - fld_b1(i1, i2, 0) = 1.0; - fld_b1(i1, i2, 1) = 3.0; - fld_b1(i1, i2, 2) = 5.0; - fld_b2(i1, i2, 0) = 1.0; - fld_b2(i1, i2, 1) = 3.0; - fld_b2(i1, i2, 2) = 5.0; + if ((i1 >= N_GHOSTS) and (i1 < N_GHOSTS + nx1) and (i2 >= N_GHOSTS) and + (i2 < N_GHOSTS + nx2)) { + fld(i1, i2, 0) = static_cast(rank + 1) + 4.0; + fld(i1, i2, 1) = static_cast(rank + 1) + 12.0; + fld(i1, i2, 2) = static_cast(rank + 1) + 20.0; + } + }); + + { + // send right, recv left + const int send_idx = (rank + 1) % size; + const int recv_idx = (rank - 1 + size) % size; + const unsigned int send_rank = (unsigned int)send_idx; + const unsigned int recv_rank = (unsigned int)recv_idx; + + const std::vector send_slice { + { nx1, nx1 + N_GHOSTS }, + { N_GHOSTS, nx2 + N_GHOSTS } + }; + const std::vector recv_slice { + { 0, N_GHOSTS }, + { N_GHOSTS, nx2 + N_GHOSTS } + }; + const range_tuple_t comp_slice { 0, 3 }; + comm::CommunicateField((unsigned int)(rank), + fld, + fld, + send_idx, + recv_idx, + send_rank, + recv_rank, + send_slice, + recv_slice, + comp_slice, + false); + } + { + // recv right, send left + const int send_idx = (rank - 1 + size) % size; + const int recv_idx = (rank + 1) % size; + const unsigned int send_rank = (unsigned int)send_idx; + const unsigned int recv_rank = (unsigned int)recv_idx; + + const std::vector send_slice { + { N_GHOSTS, N_GHOSTS + 2 }, + { N_GHOSTS, nx2 + N_GHOSTS } + }; + const std::vector recv_slice { + { nx1 + N_GHOSTS, nx1 + 2 * N_GHOSTS }, + { N_GHOSTS, nx2 + N_GHOSTS } + }; + const range_tuple_t comp_slice { 0, 3 }; + comm::CommunicateField((unsigned int)(rank), + fld, + fld, + send_idx, + recv_idx, + send_rank, + recv_rank, + send_slice, + recv_slice, + comp_slice, + false); + } + + { + const auto left_expect = static_cast((rank - 1 + size) % size + 1); + const auto right_expect = static_cast((rank + 1) % size + 1); + + Kokkos::parallel_for( + "Check", + CreateRangePolicy({ N_GHOSTS }, { nx2 + N_GHOSTS }), + Lambda(index_t i2) { + for (auto i1 { 0u }; i1 < N_GHOSTS; ++i1) { + if (fld(i1, i2, 0) != left_expect + 4.0) { + raise::KernelError(HERE, "Left boundary not correct for #0"); + } + if (fld(i1, i2, 1) != left_expect + 12.0) { + raise::KernelError(HERE, "Left boundary not correct for #1"); + } + if (fld(i1, i2, 2) != left_expect + 20.0) { + raise::KernelError(HERE, "Left boundary not correct for #2"); + } + } + for (auto i1 { nx1 + N_GHOSTS }; i1 < nx1 + 2 * N_GHOSTS; ++i1) { + if (fld(i1, i2, 0) != right_expect + 4.0) { + raise::KernelError(HERE, "Right boundary not correct for #0"); + } + if (fld(i1, i2, 1) != right_expect + 12.0) { + raise::KernelError(HERE, "Right boundary not correct for #1"); + } + if (fld(i1, i2, 2) != right_expect + 20.0) { + raise::KernelError(HERE, "Right boundary not correct for #2"); + } + } + }); + } + + Kokkos::parallel_for( + "Carve", + CreateRangePolicy({ 0, 0 }, + { nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }), + Lambda(index_t i1, index_t i2) { + if (((i1 >= N_GHOSTS) and (i1 < 2 * N_GHOSTS)) or + ((i1 >= nx1) and (i1 < nx1 + N_GHOSTS))) { + fld(i1, i2, 0) = ZERO; + fld(i1, i2, 1) = ZERO; + fld(i1, i2, 2) = ZERO; } }); + + { + // send right, recv left + const int send_idx = (rank + 1) % size; + const int recv_idx = (rank - 1 + size) % size; + const unsigned int send_rank = (unsigned int)send_idx; + const unsigned int recv_rank = (unsigned int)recv_idx; + + const std::vector send_slice { + { nx1 + N_GHOSTS, nx1 + 2 * N_GHOSTS }, + { N_GHOSTS, nx2 + N_GHOSTS } + }; + const std::vector recv_slice { + { N_GHOSTS, 2 * N_GHOSTS }, + { N_GHOSTS, nx2 + N_GHOSTS } + }; + const range_tuple_t comp_slice { 0, 3 }; + comm::CommunicateField((unsigned int)(rank), + fld, + fld, + send_idx, + recv_idx, + send_rank, + recv_rank, + send_slice, + recv_slice, + comp_slice, + true); + } + { + // recv right, send left + const int send_idx = (rank - 1 + size) % size; + const int recv_idx = (rank + 1) % size; + const unsigned int send_rank = (unsigned int)send_idx; + const unsigned int recv_rank = (unsigned int)recv_idx; + + const std::vector send_slice { + { 0, N_GHOSTS }, + { N_GHOSTS, nx2 + N_GHOSTS } + }; + const std::vector recv_slice { + { nx1, nx1 + N_GHOSTS }, + { N_GHOSTS, nx2 + N_GHOSTS } + }; + const range_tuple_t comp_slice { 0, 3 }; + comm::CommunicateField((unsigned int)(rank), + fld, + fld, + send_idx, + recv_idx, + send_rank, + recv_rank, + send_slice, + recv_slice, + comp_slice, + true); + } + + { + const auto expect = static_cast(rank + 1); + Kokkos::parallel_for( + "Check", + CreateRangePolicy({ N_GHOSTS }, { nx2 + N_GHOSTS }), + Lambda(index_t i2) { + for (auto i1 { N_GHOSTS }; i1 < 2 * N_GHOSTS; ++i1) { + if (fld(i1, i2, 0) != expect + 4.0) { + raise::KernelError(HERE, "Left boundary not correct for #0"); + } + if (fld(i1, i2, 1) != expect + 12.0) { + raise::KernelError(HERE, "Left boundary not correct for #1"); + } + if (fld(i1, i2, 2) != expect + 20.0) { + raise::KernelError(HERE, "Left boundary not correct for #2"); + } + } + for (auto i1 { nx1 }; i1 < nx1 + N_GHOSTS; ++i1) { + if (fld(i1, i2, 0) != expect + 4.0) { + raise::KernelError(HERE, "Right boundary not correct for #0"); + } + if (fld(i1, i2, 1) != expect + 12.0) { + raise::KernelError(HERE, "Right boundary not correct for #1"); + } + if (fld(i1, i2, 2) != expect + 20.0) { + raise::KernelError(HERE, "Right boundary not correct for #2"); + } + } + }); + } } catch (std::exception& e) { std::cerr << "Exception: " << e.what() << std::endl; + MPI_Finalize(); Kokkos::finalize(); return 1; } + MPI_Finalize(); Kokkos::finalize(); return 0; } diff --git a/src/framework/tests/comm_nompi.cpp b/src/framework/tests/comm_nompi.cpp index 05d54d589..f9581c1e1 100644 --- a/src/framework/tests/comm_nompi.cpp +++ b/src/framework/tests/comm_nompi.cpp @@ -7,6 +7,8 @@ #include "arch/kokkos_aliases.h" #include "utils/numeric.h" +#include "framework/domain/comm_mpi.hpp" + #include #include @@ -45,12 +47,12 @@ auto main(int argc, char* argv[]) -> int { Kokkos::deep_copy(buff, ZERO); const auto send_slice = std::vector { - {nx1 + N_GHOSTS, nx1 + 2 * N_GHOSTS}, - {nx2 + N_GHOSTS, nx2 + 2 * N_GHOSTS} + { nx1 + N_GHOSTS, nx1 + 2 * N_GHOSTS }, + { nx2 + N_GHOSTS, nx2 + 2 * N_GHOSTS } }; const auto recv_slice = std::vector { - {N_GHOSTS, 2 * N_GHOSTS}, - {N_GHOSTS, 2 * N_GHOSTS} + { N_GHOSTS, 2 * N_GHOSTS }, + { N_GHOSTS, 2 * N_GHOSTS } }; const auto comp_slice = range_tuple_t(cur::jx1, cur::jx3 + 1); diff --git a/src/framework/tests/grid_mesh.cpp b/src/framework/tests/grid_mesh.cpp index 4dea275ce..952d9874d 100644 --- a/src/framework/tests/grid_mesh.cpp +++ b/src/framework/tests/grid_mesh.cpp @@ -21,27 +21,26 @@ auto main(int argc, char* argv[]) -> int { using namespace metric; const auto res = std::vector { 10, 10, 10 }; const auto ext = boundaries_t { - {-1.0, 1.0}, - {-1.0, 1.0}, - {-1.0, 1.0} + { -1.0, 1.0 }, + { -1.0, 1.0 }, + { -1.0, 1.0 } }; auto mesh = Mesh>(res, ext, {}); for (const auto& d : { in::x1, in::x2, in::x3 }) { raise::ErrorIf(mesh.i_min(d) != N_GHOSTS, "i_min != N_GHOSTS", HERE); - raise::ErrorIf(mesh.i_max(d) != res[(unsigned short)d] + N_GHOSTS, + raise::ErrorIf(mesh.i_max(d) != res[(dim_t)d] + N_GHOSTS, "i_max != res+N_GHOSTS", HERE); - raise::ErrorIf(mesh.n_active(d) != res[(unsigned short)d], - "n_active != res", - HERE); - raise::ErrorIf(mesh.n_all(d) != res[(unsigned short)d] + 2 * N_GHOSTS, + raise::ErrorIf(mesh.n_active(d) != res[(dim_t)d], "n_active != res", HERE); + raise::ErrorIf(mesh.n_all(d) != res[(dim_t)d] + 2 * N_GHOSTS, "n_all != res+2*N_GHOSTS", HERE); - raise::ErrorIf(mesh.extent(d) != ext[(unsigned short)d], "extent != ext", HERE); + raise::ErrorIf(mesh.extent(d) != ext[(dim_t)d], "extent != ext", HERE); } - raise::ErrorIf(not cmp::AlmostEqual(mesh.metric.dxMin(), (real_t)(0.2 / std::sqrt(3.0))), - "dxMin wrong", - HERE); + raise::ErrorIf( + not cmp::AlmostEqual(mesh.metric.dxMin(), (real_t)(0.2 / std::sqrt(3.0))), + "dxMin wrong", + HERE); } catch (const std::exception& e) { std::cerr << e.what() << std::endl; Kokkos::finalize(); diff --git a/src/framework/tests/parameters.cpp b/src/framework/tests/parameters.cpp index 7cd5ce46a..07b2c11b3 100644 --- a/src/framework/tests/parameters.cpp +++ b/src/framework/tests/parameters.cpp @@ -29,12 +29,11 @@ const auto mink_1d = u8R"( metric = "minkowski" [grid.boundaries] - fields = [["PERIODIC"]] + fields = [["MATCH", "MATCH"]] particles = [["ABSORB", "ABSORB"]] [grid.boundaries.match] - coeff = 10.0 - ds = 0.025 + ds = [[0.025, 0.1]] [scales] larmor0 = 0.1 @@ -104,9 +103,6 @@ const auto sph_2d = u8R"( fields = [["ATMOSPHERE", "MATCH"]] particles = [["ATMOSPHERE", "ABSORB"]] - [grid.boundaries.match] - coeff = 10.0 - [grid.boundaries.atmosphere] temperature = 0.1 density = 1.0 @@ -269,7 +265,7 @@ auto main(int argc, char* argv[]) -> int { (real_t)0.0078125, "scales.V0"); boundaries_t fbc = { - { FldsBC::PERIODIC, FldsBC::PERIODIC } + { FldsBC::MATCH, FldsBC::MATCH } }; assert_equal( params_mink_1d.get>("grid.boundaries.fields")[0].first, @@ -283,6 +279,14 @@ auto main(int argc, char* argv[]) -> int { params_mink_1d.get>("grid.boundaries.fields").size(), fbc.size(), "grid.boundaries.fields.size()"); + assert_equal( + params_mink_1d.get>("grid.boundaries.match.ds")[0].first, + (real_t)0.025, + "grid.boundaries.match.ds[0].first"); + assert_equal( + params_mink_1d.get>("grid.boundaries.match.ds")[0].second, + (real_t)0.1, + "grid.boundaries.match.ds[0].first"); const auto species = params_mink_1d.get>( "particles.species"); @@ -383,15 +387,10 @@ auto main(int argc, char* argv[]) -> int { // match coeffs assert_equal( - params_sph_2d.get("grid.boundaries.match.ds"), + params_sph_2d.get>("grid.boundaries.match.ds")[0].second, (real_t)(defaults::bc::match::ds_frac * 19.0), "grid.boundaries.match.ds"); - assert_equal( - params_sph_2d.get("grid.boundaries.match.coeff"), - (real_t)10.0, - "grid.boundaries.match.coeff"); - assert_equal(params_sph_2d.get("particles.use_weights"), true, "particles.use_weights"); @@ -539,15 +538,10 @@ auto main(int argc, char* argv[]) -> int { // match coeffs assert_equal( - params_qks_2d.get("grid.boundaries.match.ds"), + params_qks_2d.get>("grid.boundaries.match.ds")[0].second, (real_t)(defaults::bc::match::ds_frac * (100.0 - 0.8)), "grid.boundaries.match.ds"); - assert_equal( - params_qks_2d.get("grid.boundaries.match.coeff"), - defaults::bc::match::coeff, - "grid.boundaries.match.coeff"); - const auto species = params_qks_2d.get>( "particles.species"); assert_equal(species[0].label(), "e-", "species[0].label"); diff --git a/src/global/arch/directions.h b/src/global/arch/directions.h index 2ea009814..5f8281ed3 100644 --- a/src/global/arch/directions.h +++ b/src/global/arch/directions.h @@ -50,14 +50,14 @@ namespace dir { auto operator-() const -> direction_t { auto result = direction_t {}; - for (auto i = 0u; i < (unsigned short)D; ++i) { + for (auto i { 0u }; i < D; ++i) { result[i] = -(*this)[i]; } return result; } auto operator==(const direction_t& other) const -> bool { - for (auto i = 0u; i < (unsigned short)D; ++i) { + for (auto i { 0u }; i < D; ++i) { if ((*this)[i] != other[i]) { return false; } @@ -132,8 +132,8 @@ namespace dir { using dirs_t = std::vector>; template - inline auto operator<<(std::ostream& os, - const direction_t& dir) -> std::ostream& { + inline auto operator<<(std::ostream& os, const direction_t& dir) + -> std::ostream& { for (auto& d : dir) { os << std::setw(2) << std::left; if (d > 0) { diff --git a/src/global/arch/traits.h b/src/global/arch/traits.h index 65cc63cf8..6d6c51f73 100644 --- a/src/global/arch/traits.h +++ b/src/global/arch/traits.h @@ -10,6 +10,7 @@ * - traits::run_t, traits::to_string_t * - traits::pgen::init_flds_t * - traits::pgen::ext_force_t + * - traits::pgen::ext_current_t * - traits::pgen::atm_fields_t * - traits::pgen::match_fields_const_t * - traits::pgen::match_fields_t @@ -97,12 +98,24 @@ namespace traits { template using ext_force_t = decltype(&T::ext_force); + template + using ext_current_t = decltype(&T::ext_current); + template using atm_fields_t = decltype(&T::AtmFields); template using match_fields_t = decltype(&T::MatchFields); + template + using match_fields_in_x1_t = decltype(&T::MatchFieldsInX1); + + template + using match_fields_in_x2_t = decltype(&T::MatchFieldsInX2); + + template + using match_fields_in_x3_t = decltype(&T::MatchFieldsInX3); + template using match_fields_const_t = decltype(&T::MatchFieldsConst); diff --git a/src/global/defaults.h b/src/global/defaults.h index d17647ccd..d673a503c 100644 --- a/src/global/defaults.h +++ b/src/global/defaults.h @@ -43,7 +43,6 @@ namespace ntt::defaults { namespace bc { namespace match { const real_t ds_frac = 0.01; - const real_t coeff = 1.0; } // namespace match namespace absorb { @@ -52,14 +51,19 @@ namespace ntt::defaults { } // namespace bc namespace output { - const std::string format = "hdf5"; - const timestep_t interval = 100; - const unsigned short mom_smooth = 0; - const npart_t prtl_stride = 100; - const real_t spec_emin = 1e-3; - const real_t spec_emax = 1e3; - const bool spec_log = true; - const std::size_t spec_nbins = 200; + const std::string format = "hdf5"; + const timestep_t interval = 100; + const unsigned short mom_smooth = 0; + const npart_t prtl_stride = 100; + const real_t spec_emin = 1e-3; + const real_t spec_emax = 1e3; + const bool spec_log = true; + const std::size_t spec_nbins = 200; + const std::vector stats_quantities = { "B^2", + "E^2", + "ExB", + "Rho", + "T00" }; } // namespace output namespace checkpoint { diff --git a/src/global/enums.h b/src/global/enums.h index d80297d8d..7e06d4a8d 100644 --- a/src/global/enums.h +++ b/src/global/enums.h @@ -307,6 +307,32 @@ namespace ntt { static constexpr std::size_t total = sizeof(variants) / sizeof(variants[0]); }; + struct StatsID : public enums_hidden::BaseEnum { + static constexpr const char* label = "out_stats"; + + enum type : uint8_t { + INVALID = 0, + B2 = 1, + E2 = 2, + ExB = 3, + JdotE = 4, + T = 5, + Rho = 6, + Charge = 7, + N = 8, + Npart = 9, + }; + + constexpr StatsID(uint8_t c) : enums_hidden::BaseEnum { c } {} + + static constexpr type variants[] = { B2, E2, ExB, JdotE, T, + Rho, Charge, N, Npart }; + static constexpr const char* lookup[] = { "b^2", "e^2", "exb", + "j.e", "t", "rho", + "charge", "n", "npart" }; + static constexpr std::size_t total = sizeof(variants) / sizeof(variants[0]); + }; + } // namespace ntt #endif // GLOBAL_ENUMS_H diff --git a/src/global/global.h b/src/global/global.h index 577b13f1a..42114a1da 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -269,6 +269,7 @@ namespace WriteMode { Fields = 1 << 0, Particles = 1 << 1, Spectra = 1 << 2, + Stats = 1 << 3, }; } // namespace WriteMode @@ -346,11 +347,13 @@ using duration_t = double; using simtime_t = double; using timestep_t = std::size_t; using ncells_t = std::size_t; -using npart_t = unsigned long long int; +using npart_t = unsigned long int; // index/number using index_t = const std::size_t; using idx_t = unsigned short; +using spidx_t = unsigned short; +using dim_t = unsigned short; using range_tuple_t = std::pair; diff --git a/src/global/tests/enums.cpp b/src/global/tests/enums.cpp index ebc074b72..1190417ef 100644 --- a/src/global/tests/enums.cpp +++ b/src/global/tests/enums.cpp @@ -71,6 +71,9 @@ auto main() -> int { "h", "j", "a", "t", "rho", "charge", "n", "nppc", "v", "custom" }; + enum_str_t all_out_stats = { "b^2", "e^2", "exb", "j.e", "t", + "rho", "charge", "n", "npart" }; + checkEnum(all_coords); checkEnum(all_metrics); checkEnum(all_simulation_engines); @@ -79,6 +82,7 @@ auto main() -> int { checkEnum(all_particle_pushers); checkEnum(all_coolings); checkEnum(all_out_flds); + checkEnum(all_out_stats); return 0; } diff --git a/src/global/utils/diag.cpp b/src/global/utils/diag.cpp index 9a35e30c9..6764c773f 100644 --- a/src/global/utils/diag.cpp +++ b/src/global/utils/diag.cpp @@ -120,16 +120,16 @@ namespace diag { ss << fmt::alignedTable( { "Step:", fmt::format("%lu", step), fmt::format("[of %lu]", tot_steps) }, { c_reset, c_bgreen, c_bblack }, - { 0, -6, -15 }, - { ' ', ' ', ' ' }, + { 0, -6, -32 }, + { ' ', ' ', '.' }, c_bblack, c_reset); ss << fmt::alignedTable( { "Time:", fmt::format("%.4f", time), fmt::format("[Ξ”t = %.4f]", dt) }, { c_reset, c_bgreen, c_bblack }, - { 0, -6, -15 }, - { ' ', ' ', ' ' }, + { 0, -6, -32 }, + { ' ', ' ', '.' }, c_bblack, c_reset); }); diff --git a/src/global/utils/numeric.h b/src/global/utils/numeric.h index 719256d1d..cc1191b62 100644 --- a/src/global/utils/numeric.h +++ b/src/global/utils/numeric.h @@ -36,6 +36,7 @@ inline constexpr float TWO = 2.0f; inline constexpr float THREE = 3.0f; inline constexpr float FOUR = 4.0f; inline constexpr float FIVE = 5.0f; +inline constexpr float TWELVE = 12.0f; inline constexpr float ZERO = 0.0f; inline constexpr float HALF = 0.5f; inline constexpr float INV_2 = 0.5f; @@ -50,6 +51,7 @@ inline constexpr double TWO = 2.0; inline constexpr double THREE = 3.0; inline constexpr double FOUR = 4.0; inline constexpr double FIVE = 5.0; +inline constexpr double TWELVE = 12.0; inline constexpr double ZERO = 0.0; inline constexpr double HALF = 0.5; inline constexpr double INV_2 = 0.5; diff --git a/src/kernels/ampere_mink.hpp b/src/kernels/ampere_mink.hpp index 16ed1655a..45773ead3 100644 --- a/src/kernels/ampere_mink.hpp +++ b/src/kernels/ampere_mink.hpp @@ -15,10 +15,15 @@ #include "arch/kokkos_aliases.h" #include "utils/error.h" +#include "utils/numeric.h" namespace kernel::mink { using namespace ntt; + struct NoCurrent_t { + NoCurrent_t() {} + }; + /** * @brief Algorithm for the Ampere's law: `dE/dt = curl B` in Minkowski space. * @tparam D Dimension. @@ -88,33 +93,59 @@ namespace kernel::mink { * @brief `coeff` includes metric coefficient. * @tparam D Dimension. */ - template + template class CurrentsAmpere_kernel { - ndfield_t E; - ndfield_t J; - // coeff = -dt * q0 * n0 / (B0 * V0) - const real_t coeff; - const real_t inv_n0; + static constexpr auto ExtCurrent = not std::is_same::value; + ndfield_t E; + ndfield_t J; + // coeff = -dt * q0 / (B0 * V0) + const real_t coeff; + const real_t ppc0; + const C ext_current; + real_t x1min { ZERO }; + real_t x2min { ZERO }; + real_t x3min { ZERO }; + real_t dx; public: + CurrentsAmpere_kernel(const ndfield_t& E, + const ndfield_t J, + real_t coeff, + real_t ppc0, + const C& ext_current, + const std::vector xmin, + real_t dx) + : E { E } + , J { J } + , coeff { coeff } + , ppc0 { ppc0 } + , ext_current { ext_current } + , x1min { xmin.size() > 0 ? xmin[0] : ZERO } + , x2min { xmin.size() > 1 ? xmin[1] : ZERO } + , x3min { xmin.size() > 2 ? xmin[2] : ZERO } + , dx { dx } {} + CurrentsAmpere_kernel(const ndfield_t& E, const ndfield_t J, real_t coeff, real_t inv_n0) - : E { E } - , J { J } - , coeff { coeff } - , inv_n0 { inv_n0 } {} + : CurrentsAmpere_kernel { E, J, coeff, inv_n0, NoCurrent_t {}, {}, ZERO } {} Inline void operator()(index_t i1) const { if constexpr (D == Dim::_1D) { - J(i1, cur::jx1) *= inv_n0; - J(i1, cur::jx2) *= inv_n0; - J(i1, cur::jx3) *= inv_n0; - + if constexpr (ExtCurrent) { + const auto i1_ = COORD(i1); + J(i1, cur::jx1) += ppc0 * ext_current.jx1({ (i1_ + HALF) * dx + x1min }); + J(i1, cur::jx2) += ppc0 * ext_current.jx2({ i1_ * dx + x1min }); + J(i1, cur::jx3) += ppc0 * ext_current.jx3({ i1_ * dx + x1min }); + } E(i1, em::ex1) += J(i1, cur::jx1) * coeff; E(i1, em::ex2) += J(i1, cur::jx2) * coeff; E(i1, em::ex3) += J(i1, cur::jx3) * coeff; + + J(i1, cur::jx1) /= ppc0; + J(i1, cur::jx2) /= ppc0; + J(i1, cur::jx3) /= ppc0; } else { raise::KernelError( HERE, @@ -124,14 +155,24 @@ namespace kernel::mink { Inline void operator()(index_t i1, index_t i2) const { if constexpr (D == Dim::_2D) { - J(i1, i2, cur::jx1) *= inv_n0; - J(i1, i2, cur::jx2) *= inv_n0; - J(i1, i2, cur::jx3) *= inv_n0; - + if constexpr (ExtCurrent) { + const auto i1_ = COORD(i1); + const auto i2_ = COORD(i2); + J(i1, i2, cur::jx1) += ppc0 * ext_current.jx1({ (i1_ + HALF) * dx + x1min, + i2_ * dx + x2min }); + J(i1, i2, cur::jx2) += ppc0 * + ext_current.jx2({ i1_ * dx + x1min, + (i2_ + HALF) * dx + x2min }); + J(i1, i2, cur::jx3) += ppc0 * ext_current.jx3({ i1_ * dx + x1min, + i2_ * dx + x2min }); + } E(i1, i2, em::ex1) += J(i1, i2, cur::jx1) * coeff; E(i1, i2, em::ex2) += J(i1, i2, cur::jx2) * coeff; E(i1, i2, em::ex3) += J(i1, i2, cur::jx3) * coeff; + J(i1, i2, cur::jx1) /= ppc0; + J(i1, i2, cur::jx2) /= ppc0; + J(i1, i2, cur::jx3) /= ppc0; } else { raise::KernelError( HERE, @@ -141,13 +182,30 @@ namespace kernel::mink { Inline void operator()(index_t i1, index_t i2, index_t i3) const { if constexpr (D == Dim::_3D) { - J(i1, i2, i3, cur::jx1) *= inv_n0; - J(i1, i2, i3, cur::jx2) *= inv_n0; - J(i1, i2, i3, cur::jx3) *= inv_n0; - + if constexpr (ExtCurrent) { + const auto i1_ = COORD(i1); + const auto i2_ = COORD(i2); + const auto i3_ = COORD(i3); + J(i1, i2, i3, cur::jx1) += ppc0 * + ext_current.jx1({ (i1_ + HALF) * dx + x1min, + i2_ * dx + x2min, + i3_ * dx + x3min }); + J(i1, i2, i3, cur::jx2) += ppc0 * + ext_current.jx2({ i1_ * dx + x1min, + (i2_ + HALF) * dx + x2min, + i3_ * dx + x3min }); + J(i1, i2, i3, cur::jx3) += ppc0 * ext_current.jx3( + { i1_ * dx + x1min, + i2_ * dx + x2min, + (i3_ + HALF) * dx + x3min }); + } E(i1, i2, i3, em::ex1) += J(i1, i2, i3, cur::jx1) * coeff; E(i1, i2, i3, em::ex2) += J(i1, i2, i3, cur::jx2) * coeff; E(i1, i2, i3, em::ex3) += J(i1, i2, i3, cur::jx3) * coeff; + + J(i1, i2, i3, cur::jx1) /= ppc0; + J(i1, i2, i3, cur::jx2) /= ppc0; + J(i1, i2, i3, cur::jx3) /= ppc0; } else { raise::KernelError( HERE, diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index ca9a94878..98d00a9b0 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -67,8 +67,8 @@ namespace kernel { const array_t& weight, const array_t& tag, const M& metric, - const real_t& charge, - const real_t& dt) + real_t charge, + real_t dt) : J { scatter_cur } , i1 { i1 } , i2 { i2 } @@ -100,45 +100,70 @@ namespace kernel { if (tag(p) == ParticleTag::dead) { return; } - // _f = final, _i = initial - tuple_t Ip_f, Ip_i; - coord_t xp_f, xp_i, xp_r; + // recover particle velocity to deposit in unsimulated direction vec_t vp { ZERO }; + { + coord_t xp { ZERO }; + if constexpr (D == Dim::_1D) { + xp[0] = i_di_to_Xi(i1(p), dx1(p)); + } else if constexpr (D == Dim::_2D) { + if constexpr (M::PrtlDim == Dim::_3D) { + xp[0] = i_di_to_Xi(i1(p), dx1(p)); + xp[1] = i_di_to_Xi(i2(p), dx2(p)); + xp[2] = phi(p); + } else { + xp[0] = i_di_to_Xi(i1(p), dx1(p)); + xp[1] = i_di_to_Xi(i2(p), dx2(p)); + } + } else { + xp[0] = i_di_to_Xi(i1(p), dx1(p)); + xp[1] = i_di_to_Xi(i2(p), dx2(p)); + xp[2] = i_di_to_Xi(i3(p), dx3(p)); + } + auto inv_energy { ZERO }; + if constexpr (S == SimEngine::SRPIC) { + metric.template transform_xyz(xp, + { ux1(p), ux2(p), ux3(p) }, + vp); + inv_energy = ONE / math::sqrt(ONE + NORM_SQR(ux1(p), ux2(p), ux3(p))); + } else { + metric.template transform(xp, + { ux1(p), ux2(p), ux3(p) }, + vp); + inv_energy = ONE / math::sqrt(ONE + ux1(p) * vp[0] + ux2(p) * vp[1] + + ux3(p) * vp[2]); + } + if (Kokkos::isnan(vp[2]) || Kokkos::isinf(vp[2])) { + vp[2] = ZERO; + } + vp[0] *= inv_energy; + vp[1] *= inv_energy; + vp[2] *= inv_energy; + } - // get [i, di]_init and [i, di]_final (per dimension) - getDepositInterval(p, Ip_f, Ip_i, xp_f, xp_i, xp_r); - // recover particle velocity to deposit in unsimulated direction - getPrtl3Vel(p, vp); const real_t coeff { weight(p) * charge }; - depositCurrentsFromParticle(coeff, vp, Ip_f, Ip_i, xp_f, xp_i, xp_r); - } - /** - * @brief Deposit currents from a single particle. - * @param[in] coeff Particle weight x charge. - * @param[in] vp Particle 3-velocity. - * @param[in] Ip_f Final position of the particle (cell index). - * @param[in] Ip_i Initial position of the particle (cell index). - * @param[in] xp_f Final position. - * @param[in] xp_i Previous step position. - * @param[in] xp_r Intermediate point used in zig-zag deposit. - */ - Inline auto depositCurrentsFromParticle(const real_t& coeff, - const vec_t& vp, - const tuple_t& Ip_f, - const tuple_t& Ip_i, - const coord_t& xp_f, - const coord_t& xp_i, - const coord_t& xp_r) const -> void { - const real_t Wx1_1 { HALF * (xp_i[0] + xp_r[0]) - - static_cast(Ip_i[0]) }; - const real_t Wx1_2 { HALF * (xp_f[0] + xp_r[0]) - - static_cast(Ip_f[0]) }; - const real_t Fx1_1 { (xp_r[0] - xp_i[0]) * coeff * inv_dt }; - const real_t Fx1_2 { (xp_f[0] - xp_r[0]) * coeff * inv_dt }; + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; + + const real_t Wx1_1 { INV_2 * (dxp_r_1 + dx1_prev(p) + + static_cast(i1(p) > i1_prev(p))) }; + const real_t Wx1_2 { INV_2 * (dx1(p) + dxp_r_1 + + static_cast( + static_cast(i1(p) > i1_prev(p)) + + i1_prev(p) - i1(p))) }; + const real_t Fx1_1 { (static_cast(i1(p) > i1_prev(p)) + dxp_r_1 - + dx1_prev(p)) * + coeff * inv_dt }; + const real_t Fx1_2 { (static_cast( + i1(p) - i1_prev(p) - + static_cast(i1(p) > i1_prev(p))) + + dx1(p) - dxp_r_1) * + coeff * inv_dt }; auto J_acc = J.access(); + // tuple_t dxp_r; if constexpr (D == Dim::_1D) { const real_t Fx2_1 { HALF * vp[1] * coeff }; const real_t Fx2_2 { HALF * vp[1] * coeff }; @@ -146,265 +171,210 @@ namespace kernel { const real_t Fx3_1 { HALF * vp[2] * coeff }; const real_t Fx3_2 { HALF * vp[2] * coeff }; - J_acc(Ip_i[0] + N_GHOSTS, cur::jx1) += Fx1_1; - J_acc(Ip_f[0] + N_GHOSTS, cur::jx1) += Fx1_2; + J_acc(i1_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx1) += Fx1_2; - J_acc(Ip_i[0] + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); - J_acc(Ip_i[0] + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; - J_acc(Ip_f[0] + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); - J_acc(Ip_f[0] + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; + J_acc(i1_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; - J_acc(Ip_i[0] + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); - J_acc(Ip_i[0] + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; - J_acc(Ip_f[0] + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); - J_acc(Ip_f[0] + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; + J_acc(i1_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; } else if constexpr (D == Dim::_2D || D == Dim::_3D) { - const real_t Wx2_1 { HALF * (xp_i[1] + xp_r[1]) - - static_cast(Ip_i[1]) }; - const real_t Wx2_2 { HALF * (xp_f[1] + xp_r[1]) - - static_cast(Ip_f[1]) }; - const real_t Fx2_1 { (xp_r[1] - xp_i[1]) * coeff * inv_dt }; - const real_t Fx2_2 { (xp_f[1] - xp_r[1]) * coeff * inv_dt }; + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * + static_cast(INV_2) }; + + const real_t Wx2_1 { INV_2 * (dxp_r_2 + dx2_prev(p) + + static_cast(i2(p) > i2_prev(p))) }; + const real_t Wx2_2 { INV_2 * (dx2(p) + dxp_r_2 + + static_cast( + static_cast(i2(p) > i2_prev(p)) + + i2_prev(p) - i2(p))) }; + const real_t Fx2_1 { (static_cast(i2(p) > i2_prev(p)) + + dxp_r_2 - dx2_prev(p)) * + coeff * inv_dt }; + const real_t Fx2_2 { (static_cast( + i2(p) - i2_prev(p) - + static_cast(i2(p) > i2_prev(p))) + + dx2(p) - dxp_r_2) * + coeff * inv_dt }; if constexpr (D == Dim::_2D) { const real_t Fx3_1 { HALF * vp[2] * coeff }; const real_t Fx3_2 { HALF * vp[2] * coeff }; - J_acc(Ip_i[0] + N_GHOSTS, Ip_i[1] + N_GHOSTS, cur::jx1) += Fx1_1 * - (ONE - Wx2_1); - J_acc(Ip_i[0] + N_GHOSTS, Ip_i[1] + N_GHOSTS + 1, cur::jx1) += Fx1_1 * - Wx2_1; - J_acc(Ip_f[0] + N_GHOSTS, Ip_f[1] + N_GHOSTS, cur::jx1) += Fx1_2 * - (ONE - Wx2_2); - J_acc(Ip_f[0] + N_GHOSTS, Ip_f[1] + N_GHOSTS + 1, cur::jx1) += Fx1_2 * - Wx2_2; - - J_acc(Ip_i[0] + N_GHOSTS, Ip_i[1] + N_GHOSTS, cur::jx2) += Fx2_1 * - (ONE - Wx1_1); - J_acc(Ip_i[0] + N_GHOSTS + 1, Ip_i[1] + N_GHOSTS, cur::jx2) += Fx2_1 * - Wx1_1; - J_acc(Ip_f[0] + N_GHOSTS, Ip_f[1] + N_GHOSTS, cur::jx2) += Fx2_2 * - (ONE - Wx1_2); - J_acc(Ip_f[0] + N_GHOSTS + 1, Ip_f[1] + N_GHOSTS, cur::jx2) += Fx2_2 * - Wx1_2; - - J_acc(Ip_i[0] + N_GHOSTS, - Ip_i[1] + N_GHOSTS, + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * Wx2_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx1) += Fx1_2 * + (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * + (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(Ip_i[0] + N_GHOSTS + 1, - Ip_i[1] + N_GHOSTS, + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); - J_acc(Ip_i[0] + N_GHOSTS, - Ip_i[1] + N_GHOSTS + 1, + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(Ip_i[0] + N_GHOSTS + 1, - Ip_i[1] + N_GHOSTS + 1, + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - J_acc(Ip_f[0] + N_GHOSTS, - Ip_f[1] + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); - J_acc(Ip_f[0] + N_GHOSTS + 1, - Ip_f[1] + N_GHOSTS, + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx3) += Fx3_2 * + (ONE - Wx1_2) * + (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(Ip_f[0] + N_GHOSTS, - Ip_f[1] + N_GHOSTS + 1, + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(Ip_f[0] + N_GHOSTS + 1, - Ip_f[1] + N_GHOSTS + 1, - cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; + J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * + Wx1_2 * + Wx2_2; } else { - const real_t Wx3_1 { HALF * (xp_i[2] + xp_r[2]) - - static_cast(Ip_i[2]) }; - const real_t Wx3_2 { HALF * (xp_f[2] + xp_r[2]) - - static_cast(Ip_f[2]) }; - const real_t Fx3_1 { (xp_r[2] - xp_i[2]) * coeff * inv_dt }; - const real_t Fx3_2 { (xp_f[2] - xp_r[2]) * coeff * inv_dt }; - - J_acc(Ip_i[0] + N_GHOSTS, - Ip_i[1] + N_GHOSTS, - Ip_i[2] + N_GHOSTS, + const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * + (dx3(p) + dx3_prev(p)) * + static_cast(INV_2) }; + const real_t Wx3_1 { INV_2 * (dxp_r_3 + dx3_prev(p) + + static_cast(i3(p) > i3_prev(p))) }; + const real_t Wx3_2 { INV_2 * (dx3(p) + dxp_r_3 + + static_cast( + static_cast(i3(p) > i3_prev(p)) + + i3_prev(p) - i3(p))) }; + const real_t Fx3_1 { (static_cast(i3(p) > i3_prev(p)) + + dxp_r_3 - dx3_prev(p)) * + coeff * inv_dt }; + const real_t Fx3_2 { (static_cast( + i3(p) - i3_prev(p) - + static_cast(i3(p) > i3_prev(p))) + + dx3(p) - dxp_r_3) * + coeff * inv_dt }; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1 * (ONE - Wx2_1) * (ONE - Wx3_1); - J_acc(Ip_i[0] + N_GHOSTS, - Ip_i[1] + N_GHOSTS + 1, - Ip_i[2] + N_GHOSTS, + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1 * Wx2_1 * (ONE - Wx3_1); - J_acc(Ip_i[0] + N_GHOSTS, - Ip_i[1] + N_GHOSTS, - Ip_i[2] + N_GHOSTS + 1, + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, cur::jx1) += Fx1_1 * (ONE - Wx2_1) * Wx3_1; - J_acc(Ip_i[0] + N_GHOSTS, - Ip_i[1] + N_GHOSTS + 1, - Ip_i[2] + N_GHOSTS + 1, + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS + 1, cur::jx1) += Fx1_1 * Wx2_1 * Wx3_1; - J_acc(Ip_f[0] + N_GHOSTS, - Ip_f[1] + N_GHOSTS, - Ip_f[2] + N_GHOSTS, + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, cur::jx1) += Fx1_2 * (ONE - Wx2_2) * (ONE - Wx3_2); - J_acc(Ip_f[0] + N_GHOSTS, - Ip_f[1] + N_GHOSTS + 1, - Ip_f[2] + N_GHOSTS, + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, cur::jx1) += Fx1_2 * Wx2_2 * (ONE - Wx3_2); - J_acc(Ip_f[0] + N_GHOSTS, - Ip_f[1] + N_GHOSTS, - Ip_f[2] + N_GHOSTS + 1, + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * (ONE - Wx2_2) * Wx3_2; - J_acc(Ip_f[0] + N_GHOSTS, - Ip_f[1] + N_GHOSTS + 1, - Ip_f[2] + N_GHOSTS + 1, + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2 * Wx3_2; - J_acc(Ip_i[0] + N_GHOSTS, - Ip_i[1] + N_GHOSTS, - Ip_i[2] + N_GHOSTS, + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1) * (ONE - Wx3_1); - J_acc(Ip_i[0] + N_GHOSTS + 1, - Ip_i[1] + N_GHOSTS, - Ip_i[2] + N_GHOSTS, + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * Wx1_1 * (ONE - Wx3_1); - J_acc(Ip_i[0] + N_GHOSTS, - Ip_i[1] + N_GHOSTS, - Ip_i[2] + N_GHOSTS + 1, + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * (ONE - Wx1_1) * Wx3_1; - J_acc(Ip_i[0] + N_GHOSTS + 1, - Ip_i[1] + N_GHOSTS, - Ip_i[2] + N_GHOSTS + 1, + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1 * Wx3_1; - J_acc(Ip_f[0] + N_GHOSTS, - Ip_f[1] + N_GHOSTS, - Ip_f[2] + N_GHOSTS, + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2) * (ONE - Wx3_2); - J_acc(Ip_f[0] + N_GHOSTS + 1, - Ip_f[1] + N_GHOSTS, - Ip_f[2] + N_GHOSTS, + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2 * (ONE - Wx3_2); - J_acc(Ip_f[0] + N_GHOSTS, - Ip_f[1] + N_GHOSTS, - Ip_f[2] + N_GHOSTS + 1, + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * (ONE - Wx1_2) * Wx3_2; - J_acc(Ip_f[0] + N_GHOSTS + 1, - Ip_f[1] + N_GHOSTS, - Ip_f[2] + N_GHOSTS + 1, + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2 * Wx3_2; - J_acc(Ip_i[0] + N_GHOSTS, - Ip_i[1] + N_GHOSTS, - Ip_i[2] + N_GHOSTS, + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(Ip_i[0] + N_GHOSTS + 1, - Ip_i[1] + N_GHOSTS, - Ip_i[2] + N_GHOSTS, + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); - J_acc(Ip_i[0] + N_GHOSTS, - Ip_i[1] + N_GHOSTS + 1, - Ip_i[2] + N_GHOSTS, + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(Ip_i[0] + N_GHOSTS + 1, - Ip_i[1] + N_GHOSTS + 1, - Ip_i[2] + N_GHOSTS, + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - J_acc(Ip_f[0] + N_GHOSTS, - Ip_f[1] + N_GHOSTS, - Ip_f[2] + N_GHOSTS, + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); - J_acc(Ip_f[0] + N_GHOSTS + 1, - Ip_f[1] + N_GHOSTS, - Ip_f[2] + N_GHOSTS, + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(Ip_f[0] + N_GHOSTS, - Ip_f[1] + N_GHOSTS + 1, - Ip_f[2] + N_GHOSTS, + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(Ip_f[0] + N_GHOSTS + 1, - Ip_f[1] + N_GHOSTS + 1, - Ip_f[2] + N_GHOSTS, + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } } } - - /** - * @brief Get particle position in `coord_t` form. - * @param[in] p Index of particle. - * @param[out] Ip_f Final position of the particle (cell index). - * @param[out] Ip_i Initial position of the particle (cell index). - * @param[out] xp_f Final position. - * @param[out] xp_i Previous step position. - * @param[out] xp_r Intermediate point used in zig-zag deposit. - */ - Inline auto getDepositInterval(index_t& p, - tuple_t& Ip_f, - tuple_t& Ip_i, - coord_t& xp_f, - coord_t& xp_i, - coord_t& xp_r) const -> void { - Ip_f[0] = i1(p); - Ip_i[0] = i1_prev(p); - xp_f[0] = i_di_to_Xi(Ip_f[0], dx1(p)); - xp_i[0] = i_di_to_Xi(Ip_i[0], dx1_prev(p)); - if constexpr (D == Dim::_2D || D == Dim::_3D) { - Ip_f[1] = i2(p); - Ip_i[1] = i2_prev(p); - xp_f[1] = i_di_to_Xi(Ip_f[1], dx2(p)); - xp_i[1] = i_di_to_Xi(Ip_i[1], dx2_prev(p)); - } - if constexpr (D == Dim::_3D) { - Ip_f[2] = i3(p); - Ip_i[2] = i3_prev(p); - xp_f[2] = i_di_to_Xi(Ip_f[2], dx3(p)); - xp_i[2] = i_di_to_Xi(Ip_i[2], dx3_prev(p)); - } - for (auto i = 0u; i < D; ++i) { - xp_r[i] = math::fmin(static_cast(IMIN(Ip_i[i], Ip_f[i]) + 1), - math::fmax(static_cast(IMAX(Ip_i[i], Ip_f[i])), - HALF * (xp_i[i] + xp_f[i]))); - } - } - - // Getters - Inline void getPrtlPos(index_t& p, coord_t& xp) const { - if constexpr (D == Dim::_1D) { - xp[0] = i_di_to_Xi(i1(p), dx1(p)); - } else if constexpr (D == Dim::_2D) { - if constexpr (M::PrtlDim == Dim::_3D) { - xp[0] = i_di_to_Xi(i1(p), dx1(p)); - xp[1] = i_di_to_Xi(i2(p), dx2(p)); - xp[2] = phi(p); - } else { - xp[0] = i_di_to_Xi(i1(p), dx1(p)); - xp[1] = i_di_to_Xi(i2(p), dx2(p)); - } - } else { - xp[0] = i_di_to_Xi(i1(p), dx1(p)); - xp[1] = i_di_to_Xi(i2(p), dx2(p)); - xp[2] = i_di_to_Xi(i3(p), dx3(p)); - } - } - - Inline void getPrtl3Vel(index_t& p, vec_t& vp) const { - coord_t xp { ZERO }; - getPrtlPos(p, xp); - auto inv_energy { ZERO }; - if constexpr (S == SimEngine::SRPIC) { - metric.template transform_xyz(xp, - { ux1(p), ux2(p), ux3(p) }, - vp); - inv_energy = ONE / math::sqrt(ONE + NORM_SQR(ux1(p), ux2(p), ux3(p))); - } else { - metric.template transform(xp, { ux1(p), ux2(p), ux3(p) }, vp); - inv_energy = ONE / math::sqrt(ONE + ux1(p) * vp[0] + ux2(p) * vp[1] + - ux3(p) * vp[2]); - } - if (Kokkos::isnan(vp[2]) || Kokkos::isinf(vp[2])) { - vp[2] = ZERO; - } - vp[0] *= inv_energy; - vp[1] *= inv_energy; - vp[2] *= inv_energy; - } }; } // namespace kernel diff --git a/src/kernels/divergences.hpp b/src/kernels/divergences.hpp new file mode 100644 index 000000000..c60be564b --- /dev/null +++ b/src/kernels/divergences.hpp @@ -0,0 +1,123 @@ +/** + * @file kernels/divergences.hpp + * @brief Compute covariant divergences of fields + * @implements + * - kernel::ComputeDivergence_kernel<> + * @namespaces: + * - kernel:: + */ + +#ifndef KERNELS_DIVERGENCES_HPP +#define KERNELS_DIVERGENCES_HPP + +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "utils/error.h" + +namespace kernel { + using namespace ntt; + + // @TODO: take care of boundaries + template + class ComputeDivergence_kernel { + const M metric; + + const ndfield_t fields; + ndfield_t buff; + const idx_t buff_idx; + + public: + ComputeDivergence_kernel(const M& metric, + const ndfield_t& fields, + ndfield_t& buff, + idx_t buff_idx) + : metric { metric } + , fields { fields } + , buff { buff } + , buff_idx { buff_idx } { + raise::ErrorIf(buff_idx >= N, "Invalid component index", HERE); + } + + Inline void operator()(index_t i1) const { + if constexpr (M::Dim == Dim::_1D) { + if constexpr (M::CoordType == Coord::Cart) { + buff(i1, buff_idx) = fields(i1, em::ex1) - fields(i1 - 1, em::ex1); + } else { + const auto i1_ = COORD(i1); + buff(i1, buff_idx) = (fields(i1, em::ex1) * + metric.sqrt_det_h({ i1_ + HALF }) - + fields(i1 - 1, em::ex1) * + metric.sqrt_det_h({ i1_ - HALF })) / + metric.sqrt_det_h({ i1_ }); + } + } else { + raise::KernelError( + HERE, + "1D implementation of ComputeDivergence_kernel called for non-1D"); + } + } + + Inline void operator()(index_t i1, index_t i2) const { + if constexpr (M::Dim == Dim::_2D) { + if constexpr (M::CoordType == Coord::Cart) { + buff(i1, i2, buff_idx) = fields(i1, i2, em::ex1) - + fields(i1 - 1, i2, em::ex1) + + fields(i1, i2, em::ex2) - + fields(i1, i2 - 1, em::ex2); + } else { + const auto i1_ = COORD(i1); + const auto i2_ = COORD(i2); + buff(i1, i2, buff_idx) = + (fields(i1, i2, em::ex1) * metric.sqrt_det_h({ i1_ + HALF, i2_ }) - + fields(i1 - 1, i2, em::ex1) * metric.sqrt_det_h({ i1_ - HALF, i2_ }) + + fields(i1, i2, em::ex2) * metric.sqrt_det_h({ i1_, i2_ + HALF }) - + fields(i1, i2 - 1, em::ex2) * metric.sqrt_det_h({ i1_, i2_ - HALF })) / + metric.sqrt_det_h({ i1_ + HALF, i2_ + HALF }); + } + } else { + raise::KernelError( + HERE, + "2D implementation of ComputeDivergence_kernel called for non-2D"); + } + } + + Inline void operator()(index_t i1, index_t i2, index_t i3) const { + if constexpr (M::Dim == Dim::_3D) { + if constexpr (M::CoordType == Coord::Cart) { + buff(i1, i2, i3, buff_idx) = fields(i1, i2, i3, em::ex1) - + fields(i1 - 1, i2, i3, em::ex1) + + fields(i1, i2, i3, em::ex2) - + fields(i1, i2 - 1, i3, em::ex2) + + fields(i1, i2, i3, em::ex3) - + fields(i1, i2, i3 - 1, em::ex3); + } else { + const auto i1_ = COORD(i1); + const auto i2_ = COORD(i2); + const auto i3_ = COORD(i3); + buff(i1, i2, i3, buff_idx) = + (fields(i1, i2, i3, em::ex1) * + metric.sqrt_det_h({ i1_ + HALF, i2_, i3_ }) - + fields(i1 - 1, i2, i3, em::ex1) * + metric.sqrt_det_h({ i1_ - HALF, i2_, i3_ }) + + fields(i1, i2, i3, em::ex2) * + metric.sqrt_det_h({ i1_, i2_ + HALF, i3_ }) - + fields(i1, i2 - 1, i3, em::ex2) * + metric.sqrt_det_h({ i1_, i2_ - HALF, i3_ }) + + fields(i1, i2, i3, em::ex3) * + metric.sqrt_det_h({ i1_, i2_, i3_ + HALF }) - + fields(i1, i2, i3 - 1, em::ex3) * + metric.sqrt_det_h({ i1_, i2_, i3_ - HALF })) / + metric.sqrt_det_h({ i1_ + HALF, i2_ + HALF, i3_ + HALF }); + } + } else { + raise::KernelError( + HERE, + "3D implementation of ComputeDivergence_kernel called for non-3D"); + } + } + }; + +} // namespace kernel + +#endif // KERNELS_DIVERGENCES_HPP diff --git a/src/kernels/fields_bcs.hpp b/src/kernels/fields_bcs.hpp index 782200e29..0a5dc6168 100644 --- a/src/kernels/fields_bcs.hpp +++ b/src/kernels/fields_bcs.hpp @@ -36,8 +36,7 @@ namespace kernel::bc { template struct MatchBoundaries_kernel { static_assert(M::is_metric, "M must be a metric class"); - static_assert(static_cast(o) < - static_cast(M::Dim), + static_assert(static_cast(o) < static_cast(M::Dim), "Invalid component index"); static constexpr idx_t i = static_cast(o) + 1u; static constexpr bool defines_dx1 = traits::has_method::value; @@ -488,7 +487,7 @@ namespace kernel::bc { template struct ConductorBoundaries_kernel { - static_assert(static_cast(o) < static_cast(D), + static_assert(static_cast(o) < static_cast(D), "Invalid component index"); ndfield_t Fld; @@ -850,8 +849,7 @@ namespace kernel::bc { defines_bx2 or defines_bx3, "none of the components of E or B are specified in PGEN"); static_assert(M::is_metric, "M must be a metric class"); - static_assert(static_cast(O) < - static_cast(M::Dim), + static_assert(static_cast(O) < static_cast(M::Dim), "Invalid Orientation"); ndfield_t Fld; diff --git a/src/kernels/injectors.hpp b/src/kernels/injectors.hpp index 09bc7a180..8406fc4eb 100644 --- a/src/kernels/injectors.hpp +++ b/src/kernels/injectors.hpp @@ -24,7 +24,6 @@ namespace kernel { using namespace ntt; - using spidx_t = unsigned short; template struct UniformInjector_kernel { @@ -49,7 +48,7 @@ namespace kernel { npart_t offset1, offset2; const M metric; - const array_t ni; + const array_t xi_min, xi_max; const ED energy_dist; const real_t inv_V0; random_number_pool_t random_pool; @@ -61,7 +60,8 @@ namespace kernel { npart_t offset1, npart_t offset2, const M& metric, - const array_t& ni, + const array_t& xi_min, + const array_t& xi_max, const ED& energy_dist, real_t inv_V0, random_number_pool_t& random_pool) @@ -94,7 +94,8 @@ namespace kernel { , offset1 { offset1 } , offset2 { offset2 } , metric { metric } - , ni { ni } + , xi_min { xi_min } + , xi_max { xi_max } , energy_dist { energy_dist } , inv_V0 { inv_V0 } , random_pool { random_pool } {} @@ -104,12 +105,12 @@ namespace kernel { vec_t v1 { ZERO }, v2 { ZERO }; { // generate a random coordinate auto rand_gen = random_pool.get_state(); - x_Cd[0] = Random(rand_gen) * ni(0); + x_Cd[0] = xi_min(0) + Random(rand_gen) * (xi_max(0) - xi_min(0)); if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { - x_Cd[1] = Random(rand_gen) * ni(1); + x_Cd[1] = xi_min(1) + Random(rand_gen) * (xi_max(1) - xi_min(1)); } if constexpr (M::Dim == Dim::_3D) { - x_Cd[2] = Random(rand_gen) * ni(2); + x_Cd[2] = xi_min(2) + Random(rand_gen) * (xi_max(2) - xi_min(2)); } random_pool.free_state(rand_gen); } @@ -185,6 +186,179 @@ namespace kernel { } }; // struct UniformInjector_kernel + namespace experimental { + + template + struct UniformInjector_kernel { + static_assert(ED1::is_energy_dist, + "ED1 must be an energy distribution class"); + static_assert(ED2::is_energy_dist, + "ED2 must be an energy distribution class"); + static_assert(M::is_metric, "M must be a metric class"); + + const spidx_t spidx1, spidx2; + + array_t i1s_1, i2s_1, i3s_1; + array_t dx1s_1, dx2s_1, dx3s_1; + array_t ux1s_1, ux2s_1, ux3s_1; + array_t phis_1; + array_t weights_1; + array_t tags_1; + + array_t i1s_2, i2s_2, i3s_2; + array_t dx1s_2, dx2s_2, dx3s_2; + array_t ux1s_2, ux2s_2, ux3s_2; + array_t phis_2; + array_t weights_2; + array_t tags_2; + + npart_t offset1, offset2; + const M metric; + const array_t xi_min, xi_max; + const ED1 energy_dist_1; + const ED2 energy_dist_2; + const real_t inv_V0; + random_number_pool_t random_pool; + + UniformInjector_kernel(spidx_t spidx1, + spidx_t spidx2, + Particles& species1, + Particles& species2, + npart_t offset1, + npart_t offset2, + const M& metric, + const array_t& xi_min, + const array_t& xi_max, + const ED1& energy_dist_1, + const ED2& energy_dist_2, + real_t inv_V0, + random_number_pool_t& random_pool) + : spidx1 { spidx1 } + , spidx2 { spidx2 } + , i1s_1 { species1.i1 } + , i2s_1 { species1.i2 } + , i3s_1 { species1.i3 } + , dx1s_1 { species1.dx1 } + , dx2s_1 { species1.dx2 } + , dx3s_1 { species1.dx3 } + , ux1s_1 { species1.ux1 } + , ux2s_1 { species1.ux2 } + , ux3s_1 { species1.ux3 } + , phis_1 { species1.phi } + , weights_1 { species1.weight } + , tags_1 { species1.tag } + , i1s_2 { species2.i1 } + , i2s_2 { species2.i2 } + , i3s_2 { species2.i3 } + , dx1s_2 { species2.dx1 } + , dx2s_2 { species2.dx2 } + , dx3s_2 { species2.dx3 } + , ux1s_2 { species2.ux1 } + , ux2s_2 { species2.ux2 } + , ux3s_2 { species2.ux3 } + , phis_2 { species2.phi } + , weights_2 { species2.weight } + , tags_2 { species2.tag } + , offset1 { offset1 } + , offset2 { offset2 } + , metric { metric } + , xi_min { xi_min } + , xi_max { xi_max } + , energy_dist_1 { energy_dist_1 } + , energy_dist_2 { energy_dist_2 } + , inv_V0 { inv_V0 } + , random_pool { random_pool } {} + + Inline void operator()(index_t p) const { + coord_t x_Cd { ZERO }; + vec_t v1 { ZERO }, v2 { ZERO }; + { // generate a random coordinate + auto rand_gen = random_pool.get_state(); + x_Cd[0] = xi_min(0) + Random(rand_gen) * (xi_max(0) - xi_min(0)); + if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { + x_Cd[1] = xi_min(1) + + Random(rand_gen) * (xi_max(1) - xi_min(1)); + } + if constexpr (M::Dim == Dim::_3D) { + x_Cd[2] = xi_min(2) + + Random(rand_gen) * (xi_max(2) - xi_min(2)); + } + random_pool.free_state(rand_gen); + } + { // generate the velocity + coord_t x_Ph { ZERO }; + metric.template convert(x_Cd, x_Ph); + if constexpr (M::CoordType == Coord::Cart) { + vec_t v_Ph { ZERO }; + energy_dist_1(x_Ph, v_Ph, spidx1); + metric.template transform_xyz(x_Ph, v_Ph, v1); + energy_dist_2(x_Ph, v_Ph, spidx2); + metric.template transform_xyz(x_Ph, v_Ph, v2); + } else if constexpr (S == SimEngine::SRPIC) { + coord_t x_Ph_ { ZERO }; + x_Ph_[0] = x_Ph[0]; + x_Ph_[1] = x_Ph[1]; + x_Ph_[2] = ZERO; // phi = 0 + vec_t v_Ph { ZERO }; + energy_dist_1(x_Ph, v_Ph, spidx1); + metric.template transform_xyz(x_Ph_, v_Ph, v1); + energy_dist_2(x_Ph, v_Ph, spidx2); + metric.template transform_xyz(x_Ph_, v_Ph, v2); + } else if constexpr (S == SimEngine::GRPIC) { + vec_t v_Ph { ZERO }; + energy_dist_1(x_Ph, v_Ph, spidx1); + metric.template transform(x_Ph, v_Ph, v1); + energy_dist_2(x_Ph, v_Ph, spidx2); + metric.template transform(x_Ph, v_Ph, v2); + } else { + raise::KernelError(HERE, "Unknown simulation engine"); + } + } + // inject + i1s_1(p + offset1) = static_cast(x_Cd[0]); + dx1s_1(p + offset1) = static_cast( + x_Cd[0] - static_cast(i1s_1(p + offset1))); + i1s_2(p + offset2) = i1s_1(p + offset1); + dx1s_2(p + offset2) = dx1s_1(p + offset1); + if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { + i2s_1(p + offset1) = static_cast(x_Cd[1]); + dx2s_1(p + offset1) = static_cast( + x_Cd[1] - static_cast(i2s_1(p + offset1))); + i2s_2(p + offset2) = i2s_1(p + offset1); + dx2s_2(p + offset2) = dx2s_1(p + offset1); + if constexpr (S == SimEngine::SRPIC && M::CoordType != Coord::Cart) { + phis_1(p + offset1) = ZERO; + phis_2(p + offset2) = ZERO; + } + } + if constexpr (M::Dim == Dim::_3D) { + i3s_1(p + offset1) = static_cast(x_Cd[2]); + dx3s_1(p + offset1) = static_cast( + x_Cd[2] - static_cast(i3s_1(p + offset1))); + i3s_2(p + offset2) = i3s_1(p + offset1); + dx3s_2(p + offset2) = dx3s_1(p + offset1); + } + ux1s_1(p + offset1) = v1[0]; + ux2s_1(p + offset1) = v1[1]; + ux3s_1(p + offset1) = v1[2]; + ux1s_2(p + offset2) = v2[0]; + ux2s_2(p + offset2) = v2[1]; + ux3s_2(p + offset2) = v2[2]; + tags_1(p + offset1) = ParticleTag::alive; + tags_2(p + offset2) = ParticleTag::alive; + if constexpr (M::CoordType == Coord::Cart) { + weights_1(p + offset1) = ONE; + weights_2(p + offset2) = ONE; + } else { + const auto sqrt_det_h = metric.sqrt_det_h(x_Cd); + weights_1(p + offset1) = sqrt_det_h * inv_V0; + weights_2(p + offset2) = sqrt_det_h * inv_V0; + } + } + }; // struct UniformInjector_kernel + + } // namespace experimental + template struct GlobalInjector_kernel { static_assert(M::is_metric, "M must be a metric class"); diff --git a/src/kernels/particle_moments.hpp b/src/kernels/particle_moments.hpp index 18540a771..8be68f5d2 100644 --- a/src/kernels/particle_moments.hpp +++ b/src/kernels/particle_moments.hpp @@ -18,8 +18,6 @@ #include "utils/error.h" #include "utils/numeric.h" -#include - #include namespace kernel { @@ -49,7 +47,7 @@ namespace kernel { const unsigned short c1, c2; scatter_ndfield_t Buff; - const unsigned short buff_idx; + const idx_t buff_idx; const array_t i1, i2, i3; const array_t dx1, dx2, dx3; const array_t ux1, ux2, ux3; @@ -71,7 +69,7 @@ namespace kernel { public: ParticleMoments_kernel(const std::vector& components, const scatter_ndfield_t& scatter_buff, - unsigned short buff_idx, + idx_t buff_idx, const array_t& i1, const array_t& i2, const array_t& i3, diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 2e8a5f652..6bd4e1714 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -30,7 +30,9 @@ /* Local macros */ /* -------------------------------------------------------------------------- */ #define from_Xi_to_i(XI, I) \ - { I = static_cast((XI + 1)) - 1; } + { \ + I = static_cast((XI + 1)) - 1; \ + } #define from_Xi_to_i_di(XI, I, DI) \ { \ @@ -102,10 +104,10 @@ namespace kernel::sr { raise::ErrorIf(ExtForce, "External force not provided", HERE); } - Inline auto fx1(const unsigned short& sp, - const real_t& time, - bool ext_force, - const coord_t& x_Ph) const -> real_t { + Inline auto fx1(const spidx_t& sp, + const simtime_t& time, + bool ext_force, + const coord_t& x_Ph) const -> real_t { real_t f_x1 = ZERO; if constexpr (ExtForce) { if (ext_force) { @@ -128,10 +130,10 @@ namespace kernel::sr { return f_x1; } - Inline auto fx2(const unsigned short& sp, - const real_t& time, - bool ext_force, - const coord_t& x_Ph) const -> real_t { + Inline auto fx2(const spidx_t& sp, + const simtime_t& time, + bool ext_force, + const coord_t& x_Ph) const -> real_t { real_t f_x2 = ZERO; if constexpr (ExtForce) { if (ext_force) { @@ -154,10 +156,10 @@ namespace kernel::sr { return f_x2; } - Inline auto fx3(const unsigned short& sp, - const real_t& time, - bool ext_force, - const coord_t& x_Ph) const -> real_t { + Inline auto fx3(const spidx_t& sp, + const simtime_t& time, + bool ext_force, + const coord_t& x_Ph) const -> real_t { real_t f_x3 = ZERO; if constexpr (ExtForce) { if (ext_force) { @@ -198,7 +200,7 @@ namespace kernel::sr { const CoolingTags cooling; const randacc_ndfield_t EB; - const unsigned short sp; + const spidx_t sp; array_t i1, i2, i3; array_t i1_prev, i2_prev, i3_prev; array_t dx1, dx2, dx3; @@ -232,7 +234,7 @@ namespace kernel::sr { bool ext_force, CoolingTags cooling, const randacc_ndfield_t& EB, - unsigned short sp, + spidx_t sp, array_t& i1, array_t& i2, array_t& i3, @@ -336,7 +338,7 @@ namespace kernel::sr { bool ext_force, CoolingTags cooling, const ndfield_t& EB, - unsigned short sp, + spidx_t sp, array_t& i1, array_t& i2, array_t& i3, @@ -355,7 +357,7 @@ namespace kernel::sr { array_t& phi, array_t& tag, const M& metric, - real_t time, + simtime_t time, real_t coeff, real_t dt, int ni1, @@ -562,45 +564,85 @@ namespace kernel::sr { Inline void posUpd(bool massive, index_t& p, coord_t& xp) const { // get cartesian velocity - const real_t inv_energy { - massive ? ONE / math::sqrt(ONE + SQR(ux1(p)) + SQR(ux2(p)) + SQR(ux3(p))) - : ONE / math::sqrt(SQR(ux1(p)) + SQR(ux2(p)) + SQR(ux3(p))) - }; - vec_t vp_Cart { ux1(p) * inv_energy, - ux2(p) * inv_energy, - ux3(p) * inv_energy }; - // get cartesian position - coord_t xp_Cart { ZERO }; - metric.template convert_xyz(xp, xp_Cart); - // update cartesian position - for (auto d = 0u; d < M::PrtlDim; ++d) { - xp_Cart[d] += vp_Cart[d] * dt; - } - // transform back to code - metric.template convert_xyz(xp_Cart, xp); - - // update x1 - if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { - i1_prev(p) = i1(p); - dx1_prev(p) = dx1(p); - from_Xi_to_i_di(xp[0], i1(p), dx1(p)); - } + if constexpr (M::CoordType == Coord::Cart) { + // i+di push for Cartesian basis + const real_t dt_inv_energy { + massive + ? (dt / math::sqrt(ONE + SQR(ux1(p)) + SQR(ux2(p)) + SQR(ux3(p)))) + : (dt / math::sqrt(SQR(ux1(p)) + SQR(ux2(p)) + SQR(ux3(p)))) + }; + if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { + i1_prev(p) = i1(p); + dx1_prev(p) = dx1(p); + dx1(p) += metric.template transform<1, Idx::XYZ, Idx::U>(xp, ux1(p)) * + dt_inv_energy; + i1(p) += static_cast(dx1(p) >= ONE) - + static_cast(dx1(p) < ZERO); + dx1(p) -= (dx1(p) >= ONE); + dx1(p) += (dx1(p) < ZERO); + } + if constexpr (D == Dim::_2D || D == Dim::_3D) { + i2_prev(p) = i2(p); + dx2_prev(p) = dx2(p); + dx2(p) += metric.template transform<2, Idx::XYZ, Idx::U>(xp, ux2(p)) * + dt_inv_energy; + i2(p) += static_cast(dx2(p) >= ONE) - + static_cast(dx2(p) < ZERO); + dx2(p) -= (dx2(p) >= ONE); + dx2(p) += (dx2(p) < ZERO); + } + if constexpr (D == Dim::_3D) { + i3_prev(p) = i3(p); + dx3_prev(p) = dx3(p); + dx3(p) += metric.template transform<3, Idx::XYZ, Idx::U>(xp, ux3(p)) * + dt_inv_energy; + i3(p) += static_cast(dx3(p) >= ONE) - + static_cast(dx3(p) < ZERO); + dx3(p) -= (dx3(p) >= ONE); + dx3(p) += (dx3(p) < ZERO); + } + } else { + // full Cartesian coordinate push in non-Cartesian basis + const real_t inv_energy { + massive ? ONE / math::sqrt(ONE + SQR(ux1(p)) + SQR(ux2(p)) + SQR(ux3(p))) + : ONE / math::sqrt(SQR(ux1(p)) + SQR(ux2(p)) + SQR(ux3(p))) + }; + vec_t vp_Cart { ux1(p) * inv_energy, + ux2(p) * inv_energy, + ux3(p) * inv_energy }; + // get cartesian position + coord_t xp_Cart { ZERO }; + metric.template convert_xyz(xp, xp_Cart); + // update cartesian position + for (auto d = 0u; d < M::PrtlDim; ++d) { + xp_Cart[d] += vp_Cart[d] * dt; + } + // transform back to code + metric.template convert_xyz(xp_Cart, xp); + + // update x1 + if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { + i1_prev(p) = i1(p); + dx1_prev(p) = dx1(p); + from_Xi_to_i_di(xp[0], i1(p), dx1(p)); + } - // update x2 & phi - if constexpr (D == Dim::_2D || D == Dim::_3D) { - i2_prev(p) = i2(p); - dx2_prev(p) = dx2(p); - from_Xi_to_i_di(xp[1], i2(p), dx2(p)); - if constexpr (D == Dim::_2D && M::PrtlDim == Dim::_3D) { - phi(p) = xp[2]; + // update x2 & phi + if constexpr (D == Dim::_2D || D == Dim::_3D) { + i2_prev(p) = i2(p); + dx2_prev(p) = dx2(p); + from_Xi_to_i_di(xp[1], i2(p), dx2(p)); + if constexpr (D == Dim::_2D && M::PrtlDim == Dim::_3D) { + phi(p) = xp[2]; + } } - } - // update x3 - if constexpr (D == Dim::_3D) { - i3_prev(p) = i3(p); - dx3_prev(p) = dx3(p); - from_Xi_to_i_di(xp[2], i3(p), dx3(p)); + // update x3 + if constexpr (D == Dim::_3D) { + i3_prev(p) = i3(p); + dx3_prev(p) = dx3(p); + from_Xi_to_i_di(xp[2], i3(p), dx3(p)); + } } boundaryConditions(p, xp); } diff --git a/src/kernels/tests/CMakeLists.txt b/src/kernels/tests/CMakeLists.txt index 7579eb6d3..551f9012f 100644 --- a/src/kernels/tests/CMakeLists.txt +++ b/src/kernels/tests/CMakeLists.txt @@ -33,3 +33,5 @@ gen_test(prtls_to_phys) gen_test(gca_pusher) gen_test(prtl_bc) gen_test(flds_bc) +gen_test(pusher) +gen_test(ext_force) diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index ec364a313..e6967eb14 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -29,7 +29,8 @@ void errorIf(bool condition, const std::string& message) { inline static constexpr auto epsilon = std::numeric_limits::epsilon(); -Inline auto equal(real_t a, real_t b, const char* msg = "", real_t acc = ONE) -> bool { +Inline auto equal(real_t a, real_t b, const char* msg = "", real_t acc = ONE) + -> bool { const auto eps = epsilon * acc; if (not cmp::AlmostEqual(a, b, eps)) { printf("%.12e != %.12e %s\n", a, b, msg); @@ -176,7 +177,7 @@ auto main(int argc, char* argv[]) -> int { }, { { 0.0, 55.0 }, { 0.0, 55.0 } }, {}, - 30); + 500); testDeposit, SimEngine::SRPIC>( { @@ -185,7 +186,7 @@ auto main(int argc, char* argv[]) -> int { }, { { 1.0, 100.0 } }, {}, - 30); + 500); testDeposit, SimEngine::SRPIC>( { @@ -194,7 +195,7 @@ auto main(int argc, char* argv[]) -> int { }, { { 1.0, 100.0 } }, { { "r0", 0.0 }, { "h", 0.25 } }, - 30); + 500); testDeposit, SimEngine::GRPIC>( { @@ -203,7 +204,7 @@ auto main(int argc, char* argv[]) -> int { }, { { 1.0, 100.0 } }, { { "a", 0.9 } }, - 30); + 500); testDeposit, SimEngine::GRPIC>( { @@ -212,7 +213,7 @@ auto main(int argc, char* argv[]) -> int { }, { { 1.0, 100.0 } }, { { "r0", 0.0 }, { "h", 0.25 }, { "a", 0.9 } }, - 30); + 500); testDeposit, SimEngine::GRPIC>( { @@ -221,7 +222,7 @@ auto main(int argc, char* argv[]) -> int { }, { { 1.0, 100.0 } }, { { "a", 0.9 } }, - 30); + 500); } catch (std::exception& e) { std::cerr << e.what() << std::endl; diff --git a/src/kernels/tests/ext_force.cpp b/src/kernels/tests/ext_force.cpp new file mode 100644 index 000000000..7f760e939 --- /dev/null +++ b/src/kernels/tests/ext_force.cpp @@ -0,0 +1,287 @@ +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "utils/error.h" +#include "utils/numeric.h" +#include "utils/plog.h" + +#include "metrics/minkowski.h" + +#include "kernels/particle_pusher_sr.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +using namespace ntt; +using namespace metric; + +void check_value(unsigned int t, + real_t target, + real_t value, + real_t eps, + const std::string& msg) { + const auto msg_ = fmt::format("%s: %e != %e @ %u", msg.c_str(), target, value, t); + const auto diff = math::abs(target - value); + const auto sum = HALF * (math::abs(target) + math::abs(value)); + raise::ErrorIf(((sum > eps) and (diff / sum > eps)) or + ((sum <= eps) and (diff > eps / 10.0)), + msg_ + " " + fmt::format("%.12e, %.12e", diff, sum), + HERE); +} + +template +void put_value(array_t& arr, T v, index_t p) { + auto h = Kokkos::create_mirror_view(arr); + Kokkos::deep_copy(h, arr); + h(p) = v; + Kokkos::deep_copy(arr, h); +} + +struct Force { + const std::vector species { 1 }; + + Force(real_t force) : force { force } {} + + Inline auto fx1(const spidx_t&, const simtime_t&, const coord_t&) const + -> real_t { + return force * math::sin(ONE) * math::sin(ONE); + } + + Inline auto fx2(const spidx_t&, const simtime_t&, const coord_t&) const + -> real_t { + return force * math::sin(ONE) * math::cos(ONE); + } + + Inline auto fx3(const spidx_t&, const simtime_t&, const coord_t&) const + -> real_t { + return force * math::cos(ONE); + } + +private: + const real_t force; +}; + +template +void testPusher(const std::vector& res) { + static_assert(M::Dim == 3); + raise::ErrorIf(res.size() != M::Dim, "res.size() != M::Dim", HERE); + + M metric { + res, + { { 0.0, (real_t)(res[0]) }, { 0.0, (real_t)(res[1]) }, { 0.0, (real_t)(res[2]) } }, + {} + }; + + const int nx1 = res[0]; + const int nx2 = res[1]; + const int nx3 = res[2]; + + const auto range_ext = CreateRangePolicy( + { 0, 0, 0 }, + { res[0] + 2 * N_GHOSTS, res[1] + 2 * N_GHOSTS, res[2] + 2 * N_GHOSTS }); + + auto emfield = ndfield_t { "emfield", + res[0] + 2 * N_GHOSTS, + res[1] + 2 * N_GHOSTS, + res[2] + 2 * N_GHOSTS }; + + const real_t x1_0 = 1.15, x2_0 = 1.85, x3_0 = 1.25; + const real_t ux1_0 = 0.02, ux2_0 = -0.2, ux3_0 = 0.1; + // const real_t gamma_0 = math::sqrt(ONE + NORM_SQR(ux1_0, ux2_0, ux3_0)); + const real_t omegaB0 = 1.0; + const real_t dt = 0.01; + const real_t f_mag = 0.01; + + Kokkos::parallel_for( + "init 3D", + range_ext, + Lambda(index_t i1, index_t i2, index_t i3) { + emfield(i1, i2, i3, em::ex1) = ZERO; + emfield(i1, i2, i3, em::ex2) = ZERO; + emfield(i1, i2, i3, em::ex3) = ZERO; + emfield(i1, i2, i3, em::bx1) = ZERO; + emfield(i1, i2, i3, em::bx2) = ZERO; + emfield(i1, i2, i3, em::bx3) = ZERO; + }); + + array_t i1 { "i1", 2 }; + array_t i2 { "i2", 2 }; + array_t i3 { "i3", 2 }; + array_t i1_prev { "i1_prev", 2 }; + array_t i2_prev { "i2_prev", 2 }; + array_t i3_prev { "i3_prev", 2 }; + array_t dx1 { "dx1", 2 }; + array_t dx2 { "dx2", 2 }; + array_t dx3 { "dx3", 2 }; + array_t dx1_prev { "dx1_prev", 2 }; + array_t dx2_prev { "dx2_prev", 2 }; + array_t dx3_prev { "dx3_prev", 2 }; + array_t ux1 { "ux1", 2 }; + array_t ux2 { "ux2", 2 }; + array_t ux3 { "ux3", 2 }; + array_t phi { "phi", 2 }; + array_t weight { "weight", 2 }; + array_t tag { "tag", 2 }; + + put_value(i1, (int)(x1_0), 0); + put_value(i2, (int)(x2_0), 0); + put_value(i3, (int)(x3_0), 0); + put_value(dx1, (prtldx_t)(x1_0 - (int)(x1_0)), 0); + put_value(dx2, (prtldx_t)(x2_0 - (int)(x2_0)), 0); + put_value(dx3, (prtldx_t)(x3_0 - (int)(x3_0)), 0); + put_value(ux1, ux1_0, 0); + put_value(ux2, ux2_0, 0); + put_value(ux3, ux3_0, 0); + put_value(tag, ParticleTag::alive, 0); + + put_value(i1, (int)(x1_0), 1); + put_value(i2, (int)(x2_0), 1); + put_value(i3, (int)(x3_0), 1); + put_value(dx1, (prtldx_t)(x1_0 - (int)(x1_0)), 1); + put_value(dx2, (prtldx_t)(x2_0 - (int)(x2_0)), 1); + put_value(dx3, (prtldx_t)(x3_0 - (int)(x3_0)), 1); + put_value(ux1, -ux1_0, 1); + put_value(ux2, -ux2_0, 1); + put_value(ux3, -ux3_0, 1); + put_value(tag, ParticleTag::alive, 1); + + // Particle boundaries + auto boundaries = boundaries_t {}; + boundaries = { + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC } + }; + + const spidx_t sp { 1u }; + + const real_t coeff = HALF * dt * omegaB0; + + const real_t eps = std::is_same_v ? 1e-4 : 1e-6; + + const auto ext_force = Force { f_mag }; + const auto force = + kernel::sr::Force { ext_force }; + + static plog::RollingFileAppender file_appender( + "pusher_log.csv"); + plog::init(plog::verbose, &file_appender); + PLOGD << "t,i1,i2,i3,dx1,dx2,dx3,ux1,ux2,ux3"; + + for (auto t { 0u }; t < 100; ++t) { + const real_t time = t * dt; + + // clang-format off + Kokkos::parallel_for( + "pusher", + CreateRangePolicy({0}, {2}), + kernel::sr::Pusher_kernel, decltype(force)>(PrtlPusher::BORIS, + false, true, kernel::sr::Cooling::None, + emfield, + sp, + i1, i2, i3, + i1_prev, i2_prev, i3_prev, + dx1, dx2, dx3, + dx1_prev, dx2_prev, dx3_prev, + ux1, ux2, ux3, + phi, tag, + metric, force, + (simtime_t)time, coeff, dt, + nx1, nx2, nx3, + boundaries, + ZERO, ZERO, ZERO)); + + auto i1_prev_ = Kokkos::create_mirror_view(i1_prev); + auto i2_prev_ = Kokkos::create_mirror_view(i2_prev); + auto i3_prev_ = Kokkos::create_mirror_view(i3_prev); + auto i1_ = Kokkos::create_mirror_view(i1); + auto i2_ = Kokkos::create_mirror_view(i2); + auto i3_ = Kokkos::create_mirror_view(i3); + Kokkos::deep_copy(i1_prev_, i1_prev); + Kokkos::deep_copy(i2_prev_, i2_prev); + Kokkos::deep_copy(i3_prev_, i3_prev); + Kokkos::deep_copy(i1_, i1); + Kokkos::deep_copy(i2_, i2); + Kokkos::deep_copy(i3_, i3); + + auto dx1_prev_ = Kokkos::create_mirror_view(dx1_prev); + auto dx2_prev_ = Kokkos::create_mirror_view(dx2_prev); + auto dx3_prev_ = Kokkos::create_mirror_view(dx3_prev); + auto dx1_ = Kokkos::create_mirror_view(dx1); + auto dx2_ = Kokkos::create_mirror_view(dx2); + auto dx3_ = Kokkos::create_mirror_view(dx3); + auto ux1_ = Kokkos::create_mirror_view(ux1); + auto ux2_ = Kokkos::create_mirror_view(ux2); + auto ux3_ = Kokkos::create_mirror_view(ux3); + Kokkos::deep_copy(dx1_prev_, dx1_prev); + Kokkos::deep_copy(dx2_prev_, dx2_prev); + Kokkos::deep_copy(dx3_prev_, dx3_prev); + Kokkos::deep_copy(dx1_, dx1); + Kokkos::deep_copy(dx2_, dx2); + Kokkos::deep_copy(dx3_, dx3); + Kokkos::deep_copy(ux1_, ux1); + Kokkos::deep_copy(ux2_, ux2); + Kokkos::deep_copy(ux3_, ux3); + + PLOGD.printf("%e,%d,%d,%d,%e,%e,%e,%e,%e,%e", + time, + i1_(1), + i2_(1), + i3_(1), + dx1_( 1), + dx2_( 1), + dx3_( 1), + ux1_( 1), + ux2_( 1), + ux3_( 1)); + + { + const real_t ux1_expect = ux1_0 + (time + dt) * f_mag * std::sin(ONE) * std::sin(ONE); + const real_t ux2_expect = ux2_0 + (time + dt) * f_mag * std::sin(ONE) * std::cos(ONE); + const real_t ux3_expect = ux3_0 + (time + dt) * f_mag * std::cos(ONE); + + check_value(t, ux1_(0), ux1_expect, eps, "Particle #1 ux1"); + check_value(t, ux2_(0), ux2_expect, eps, "Particle #1 ux2"); + check_value(t, ux3_(0), ux3_expect, eps, "Particle #1 ux3"); + } + + { + const real_t ux1_expect = -ux1_0 + (time + dt) * f_mag * std::sin(ONE) * std::sin(ONE); + const real_t ux2_expect = -ux2_0 + (time + dt) * f_mag * std::sin(ONE) * std::cos(ONE); + const real_t ux3_expect = -ux3_0 + (time + dt) * f_mag * std::cos(ONE); + + check_value(t, ux1_(1), ux1_expect, eps, "Particle #2 ux1"); + check_value(t, ux2_(1), ux2_expect, eps, "Particle #2 ux2"); + check_value(t, ux3_(1), ux3_expect, eps, "Particle #2 ux3"); + } + + } +} + +auto main(int argc, char* argv[]) -> int { + Kokkos::initialize(argc, argv); + + try { + using namespace ntt; + + testPusher>({ 10, 10, 10 }); + + } catch (std::exception& e) { + std::cerr << e.what() << std::endl; + Kokkos::finalize(); + return 1; + } + Kokkos::finalize(); + return 0; +} diff --git a/src/kernels/tests/flds_bc.cpp b/src/kernels/tests/flds_bc.cpp index aba829e8b..c5675cdec 100644 --- a/src/kernels/tests/flds_bc.cpp +++ b/src/kernels/tests/flds_bc.cpp @@ -56,7 +56,7 @@ Inline auto equal(real_t a, real_t b, const char* msg, real_t acc) -> bool { template void testFldsBCs(const std::vector& res) { - errorIf(res.size() != (unsigned short)D, "res.size() != D"); + errorIf(res.size() != (dim_t)D, "res.size() != D"); boundaries_t sx; for (const auto& r : res) { sx.emplace_back(ZERO, r); diff --git a/src/kernels/tests/gca_pusher.cpp b/src/kernels/tests/gca_pusher.cpp index c96ce3d66..5630de414 100644 --- a/src/kernels/tests/gca_pusher.cpp +++ b/src/kernels/tests/gca_pusher.cpp @@ -2,6 +2,7 @@ #include "global.h" #include "arch/kokkos_aliases.h" +#include "utils/error.h" #include "utils/numeric.h" #include "metrics/minkowski.h" @@ -10,20 +11,35 @@ #include #include +#include +#include +#include +#include +#include #include -#include -#include #include #include using namespace ntt; using namespace metric; -void errorIf(bool condition, const std::string& message) { - if (condition) { - throw std::runtime_error(message); - } +void check_value(unsigned int t, + real_t target, + real_t value, + real_t eps, + const std::string& msg) { + const auto msg_ = fmt::format("%s: %.12e != %.12e @ %u", + msg.c_str(), + target, + value, + t); + const auto diff = math::abs(target - value); + const auto sum = HALF * (math::abs(target) + math::abs(value)); + raise::ErrorIf(((sum > eps) and (diff / sum > eps)) or + ((sum <= eps) and (diff > eps / 10.0)), + msg_ + " " + fmt::format("%.12e, %.12e", diff, sum), + HERE); } template @@ -35,24 +51,20 @@ void put_value(array_t& arr, T v, index_t p) { } template -void testGCAPusher(const std::vector& res, - const boundaries_t& ext, - const std::map& params = {}) { +void testPusher(const std::vector& res) { static_assert(M::Dim == 3); - errorIf(res.size() != M::Dim, "res.size() != M::Dim"); - - boundaries_t extent; - extent = ext; + raise::ErrorIf(res.size() != M::Dim, "res.size() != M::Dim", HERE); - M metric { res, extent, params }; + M metric { + res, + { { 0.0, (real_t)(res[0]) }, { 0.0, (real_t)(res[1]) }, { 0.0, (real_t)(res[2]) } }, + {} + }; const int nx1 = res[0]; const int nx2 = res[1]; const int nx3 = res[2]; - auto coeff = real_t { 1.0 }; - auto dt = real_t { 0.01 }; - const auto range_ext = CreateRangePolicy( { 0, 0, 0 }, { res[0] + 2 * N_GHOSTS, res[1] + 2 * N_GHOSTS, res[2] + 2 * N_GHOSTS }); @@ -62,16 +74,29 @@ void testGCAPusher(const std::vector& res, res[1] + 2 * N_GHOSTS, res[2] + 2 * N_GHOSTS }; + const real_t bx1 = 0.66, bx2 = 0.55, bx3 = 0.44; + const real_t x1_0 = 1.15, x2_0 = 1.85, x3_0 = 1.25; + const real_t ux1_0 = 1.0, ux2_0 = -2.0, ux3_0 = 0.1; + const real_t omegaB0 = 0.2; + const real_t dt = 0.01; + + const real_t b_mag = math::sqrt(NORM_SQR(bx1, bx2, bx3)); + const real_t upar_0 = DOT(ux1_0, ux2_0, ux3_0, bx1, bx2, bx3) / b_mag; + + const real_t ux1_expect = bx1 * upar_0 / (b_mag); + const real_t ux2_expect = bx2 * upar_0 / (b_mag); + const real_t ux3_expect = bx3 * upar_0 / (b_mag); + Kokkos::parallel_for( "init 3D", range_ext, Lambda(index_t i1, index_t i2, index_t i3) { - emfield(i1, i2, i3, em::ex1) = 0.0; - emfield(i1, i2, i3, em::ex2) = 0.0; - emfield(i1, i2, i3, em::ex3) = 0.0; - emfield(i1, i2, i3, em::bx1) = 0.22; - emfield(i1, i2, i3, em::bx2) = 0.44; - emfield(i1, i2, i3, em::bx3) = 0.66; + emfield(i1, i2, i3, em::ex1) = ZERO; + emfield(i1, i2, i3, em::ex2) = ZERO; + emfield(i1, i2, i3, em::ex3) = ZERO; + emfield(i1, i2, i3, em::bx1) = bx1; + emfield(i1, i2, i3, em::bx2) = bx2; + emfield(i1, i2, i3, em::bx3) = bx3; }); array_t i1 { "i1", 2 }; @@ -93,119 +118,77 @@ void testGCAPusher(const std::vector& res, array_t weight { "weight", 2 }; array_t tag { "tag", 2 }; - put_value(i1, 5, 0); - put_value(i2, 5, 0); - put_value(i3, 5, 0); - put_value(dx1, (prtldx_t)(0.15), 0); - put_value(dx2, (prtldx_t)(0.85), 0); - put_value(dx3, (prtldx_t)(0.25), 0); - put_value(ux1, (real_t)(1.0), 0); - put_value(ux2, (real_t)(-2.0), 0); - put_value(ux3, (real_t)(0.1), 0); + put_value(i1, (int)(x1_0), 0); + put_value(i2, (int)(x2_0), 0); + put_value(i3, (int)(x3_0), 0); + put_value(dx1, (prtldx_t)(x1_0 - (int)(x1_0)), 0); + put_value(dx2, (prtldx_t)(x2_0 - (int)(x2_0)), 0); + put_value(dx3, (prtldx_t)(x3_0 - (int)(x3_0)), 0); + put_value(ux1, ux1_0, 0); + put_value(ux2, ux2_0, 0); + put_value(ux3, ux3_0, 0); put_value(tag, ParticleTag::alive, 0); - put_value(i1, 5, 1); - put_value(i2, 5, 1); - put_value(i3, 5, 1); - put_value(dx1, (prtldx_t)(0.15), 1); - put_value(dx2, (prtldx_t)(0.85), 1); - put_value(dx3, (prtldx_t)(0.25), 1); - put_value(ux1, (real_t)(1.0), 1); - put_value(ux2, (real_t)(-2.0), 1); - put_value(ux3, (real_t)(0.1), 1); + put_value(i1, (int)(x1_0), 1); + put_value(i2, (int)(x2_0), 1); + put_value(i3, (int)(x3_0), 1); + put_value(dx1, (prtldx_t)(x1_0 - (int)(x1_0)), 1); + put_value(dx2, (prtldx_t)(x2_0 - (int)(x2_0)), 1); + put_value(dx3, (prtldx_t)(x3_0 - (int)(x3_0)), 1); + put_value(ux1, -ux1_0, 1); + put_value(ux2, -ux2_0, 1); + put_value(ux3, -ux3_0, 1); put_value(tag, ParticleTag::alive, 1); // Particle boundaries auto boundaries = boundaries_t {}; boundaries = { - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC} + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC } }; - // clang-format off - Kokkos::parallel_for( - "pusher", - 1, - kernel::sr::Pusher_kernel>(PrtlPusher::BORIS, - true, false, kernel::sr::Cooling::None, - emfield, - 1, - i1, i2, i3, - i1_prev, i2_prev, i3_prev, - dx1, dx2, dx3, - dx1_prev, dx2_prev, dx3_prev, - ux1, ux2, ux3, - phi, tag, - metric, - ZERO, coeff, dt, - nx1, nx2, nx3, - boundaries, - (real_t)100000.0, (real_t)1.0, ZERO)); - - Kokkos::parallel_for( - "pusher", - CreateRangePolicy({ 0 }, { 1 }), - kernel::sr::Pusher_kernel>(PrtlPusher::BORIS, - true, false, kernel::sr::Cooling::None, - emfield, - 1, - i1, i2, i3, - i1_prev, i2_prev, i3_prev, - dx1, dx2, dx3, - dx1_prev, dx2_prev, dx3_prev, - ux1, ux2, ux3, - phi, tag, - metric, - ZERO, -coeff, dt, - nx1, nx2, nx3, - boundaries, - (real_t)100000.0, (real_t)1.0, ZERO)); - // clang-format on - - auto i1_prev_ = Kokkos::create_mirror_view(i1_prev); - auto i2_prev_ = Kokkos::create_mirror_view(i2_prev); - auto i3_prev_ = Kokkos::create_mirror_view(i3_prev); - auto i1_ = Kokkos::create_mirror_view(i1); - auto i2_ = Kokkos::create_mirror_view(i2); - auto i3_ = Kokkos::create_mirror_view(i3); - Kokkos::deep_copy(i1_prev_, i1_prev); - Kokkos::deep_copy(i2_prev_, i2_prev); - Kokkos::deep_copy(i3_prev_, i3_prev); - Kokkos::deep_copy(i1_, i1); - Kokkos::deep_copy(i2_, i2); - Kokkos::deep_copy(i3_, i3); - - auto dx1_prev_ = Kokkos::create_mirror_view(dx1_prev); - auto dx2_prev_ = Kokkos::create_mirror_view(dx2_prev); - auto dx3_prev_ = Kokkos::create_mirror_view(dx3_prev); - auto dx1_ = Kokkos::create_mirror_view(dx1); - auto dx2_ = Kokkos::create_mirror_view(dx2); - auto dx3_ = Kokkos::create_mirror_view(dx3); - Kokkos::deep_copy(dx1_prev_, dx1_prev); - Kokkos::deep_copy(dx2_prev_, dx2_prev); - Kokkos::deep_copy(dx3_prev_, dx3_prev); - Kokkos::deep_copy(dx1_, dx1); - Kokkos::deep_copy(dx2_, dx2); - Kokkos::deep_copy(dx3_, dx3); - - auto disx = i1_[0] + dx1_[0] - i1_prev_[0] - dx1_prev_[0]; - auto disy = i2_[0] + dx2_[0] - i2_prev_[0] - dx2_prev_[0]; - auto disz = i3_[0] + dx3_[0] - i3_prev_[0] - dx3_prev_[0]; - - auto disdotB = (disx * 0.22 + disy * 0.44 + disz * 0.66) / - (0.823165 * math::sqrt(SQR(disx) + SQR(disy) + SQR(disz))); - - printf("%.12e \n", (1 - math::abs(disdotB))); - - disx = i1_[1] + dx1_[1] - i1_prev_[1] - dx1_prev_[1]; - disy = i2_[1] + dx2_[1] - i2_prev_[1] - dx2_prev_[1]; - disz = i3_[1] + dx3_[1] - i3_prev_[1] - dx3_prev_[1]; - - disdotB = (disx * 0.22 + disy * 0.44 + disz * 0.66) / - (0.823165 * math::sqrt(SQR(disx) + SQR(disy) + SQR(disz))); - - printf("%.12e \n", (1 - math::abs(disdotB))); + const spidx_t sp { 1u }; + + const real_t coeff = HALF * dt * omegaB0; + + const real_t eps = std::is_same_v ? 1e-3 : 1e-6; + + for (auto t { 0u }; t < 2000; ++t) { + // clang-format off + Kokkos::parallel_for( + "pusher", + CreateRangePolicy({0}, {2}), + kernel::sr::Pusher_kernel>(PrtlPusher::BORIS, + true, false, kernel::sr::Cooling::None, + emfield, + sp, + i1, i2, i3, + i1_prev, i2_prev, i3_prev, + dx1, dx2, dx3, + dx1_prev, dx2_prev, dx3_prev, + ux1, ux2, ux3, + phi, tag, + metric, + ZERO, coeff, dt, + nx1, nx2, nx3, + boundaries, + (real_t)10000.0, ONE, ZERO)); + + auto ux1_ = Kokkos::create_mirror_view(ux1); + auto ux2_ = Kokkos::create_mirror_view(ux2); + auto ux3_ = Kokkos::create_mirror_view(ux3); + Kokkos::deep_copy(ux1_, ux1); + Kokkos::deep_copy(ux2_, ux2); + Kokkos::deep_copy(ux3_, ux3); + + check_value(t, ux1_(0), ux1_expect, eps, "Particle #1 ux1"); + check_value(t, ux2_(0), ux2_expect, eps, "Particle #1 ux2"); + check_value(t, ux3_(0), ux3_expect, eps, "Particle #1 ux3"); + check_value(t, ux1_(1), -ux1_expect, eps, "Particle #2 ux1"); + check_value(t, ux2_(1), -ux2_expect, eps, "Particle #2 ux2"); + check_value(t, ux3_(1), -ux3_expect, eps, "Particle #2 ux3"); + } } auto main(int argc, char* argv[]) -> int { @@ -214,14 +197,7 @@ auto main(int argc, char* argv[]) -> int { try { using namespace ntt; - testGCAPusher>( - { - 10, - 10, - 10 - }, - { { 0.0, 10.0 }, { 0.0, 10.0 }, { 0.0, 10.0 } }, - {}); + testPusher>({ 10, 10, 10 }); } catch (std::exception& e) { std::cerr << e.what() << std::endl; diff --git a/src/kernels/tests/pusher.cpp b/src/kernels/tests/pusher.cpp new file mode 100644 index 000000000..8496b592d --- /dev/null +++ b/src/kernels/tests/pusher.cpp @@ -0,0 +1,274 @@ +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "utils/error.h" +#include "utils/numeric.h" + +#include "metrics/minkowski.h" + +#include "kernels/particle_pusher_sr.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +using namespace ntt; +using namespace metric; + +void check_value(unsigned int t, + real_t target, + real_t value, + real_t eps, + const std::string& msg) { + const auto msg_ = fmt::format("%s: %.12e != %.12e @ %u", + msg.c_str(), + target, + value, + t); + const auto diff = math::abs(target - value); + const auto sum = HALF * (math::abs(target) + math::abs(value)); + raise::ErrorIf(((sum > eps) and (diff / sum > eps)) or + ((sum <= eps) and (diff > eps / 10.0)), + msg_ + " " + fmt::format("%.12e, %.12e", diff, sum), + HERE); +} + +template +void put_value(array_t& arr, T v, index_t p) { + auto h = Kokkos::create_mirror_view(arr); + Kokkos::deep_copy(h, arr); + h(p) = v; + Kokkos::deep_copy(arr, h); +} + +template +void testPusher(const std::vector& res) { + static_assert(M::Dim == 3); + raise::ErrorIf(res.size() != M::Dim, "res.size() != M::Dim", HERE); + + M metric { + res, + { { 0.0, (real_t)(res[0]) }, { 0.0, (real_t)(res[1]) }, { 0.0, (real_t)(res[2]) } }, + {} + }; + + const int nx1 = res[0]; + const int nx2 = res[1]; + const int nx3 = res[2]; + + const auto range_ext = CreateRangePolicy( + { 0, 0, 0 }, + { res[0] + 2 * N_GHOSTS, res[1] + 2 * N_GHOSTS, res[2] + 2 * N_GHOSTS }); + + auto emfield = ndfield_t { "emfield", + res[0] + 2 * N_GHOSTS, + res[1] + 2 * N_GHOSTS, + res[2] + 2 * N_GHOSTS }; + + const real_t bx1 = 0.66, bx2 = 0.55, bx3 = 0.44; + const real_t b_mag = math::sqrt(NORM_SQR(bx1, bx2, bx3)); + const real_t x1_0 = 1.15, x2_0 = 1.85, x3_0 = 1.25; + const real_t ux1_0 = 1.0, ux2_0 = -2.0, ux3_0 = 0.1; + const real_t gamma_0 = math::sqrt(ONE + NORM_SQR(ux1_0, ux2_0, ux3_0)); + const real_t omegaB0 = 0.2; + const real_t dt = 0.01; + + Kokkos::parallel_for( + "init 3D", + range_ext, + Lambda(index_t i1, index_t i2, index_t i3) { + emfield(i1, i2, i3, em::ex1) = ZERO; + emfield(i1, i2, i3, em::ex2) = ZERO; + emfield(i1, i2, i3, em::ex3) = ZERO; + emfield(i1, i2, i3, em::bx1) = bx1; + emfield(i1, i2, i3, em::bx2) = bx2; + emfield(i1, i2, i3, em::bx3) = bx3; + }); + + array_t i1 { "i1", 2 }; + array_t i2 { "i2", 2 }; + array_t i3 { "i3", 2 }; + array_t i1_prev { "i1_prev", 2 }; + array_t i2_prev { "i2_prev", 2 }; + array_t i3_prev { "i3_prev", 2 }; + array_t dx1 { "dx1", 2 }; + array_t dx2 { "dx2", 2 }; + array_t dx3 { "dx3", 2 }; + array_t dx1_prev { "dx1_prev", 2 }; + array_t dx2_prev { "dx2_prev", 2 }; + array_t dx3_prev { "dx3_prev", 2 }; + array_t ux1 { "ux1", 2 }; + array_t ux2 { "ux2", 2 }; + array_t ux3 { "ux3", 2 }; + array_t phi { "phi", 2 }; + array_t weight { "weight", 2 }; + array_t tag { "tag", 2 }; + + put_value(i1, (int)(x1_0), 0); + put_value(i2, (int)(x2_0), 0); + put_value(i3, (int)(x3_0), 0); + put_value(dx1, (prtldx_t)(x1_0 - (int)(x1_0)), 0); + put_value(dx2, (prtldx_t)(x2_0 - (int)(x2_0)), 0); + put_value(dx3, (prtldx_t)(x3_0 - (int)(x3_0)), 0); + put_value(ux1, ux1_0, 0); + put_value(ux2, ux2_0, 0); + put_value(ux3, ux3_0, 0); + put_value(tag, ParticleTag::alive, 0); + + put_value(i1, (int)(x1_0), 1); + put_value(i2, (int)(x2_0), 1); + put_value(i3, (int)(x3_0), 1); + put_value(dx1, (prtldx_t)(x1_0 - (int)(x1_0)), 1); + put_value(dx2, (prtldx_t)(x2_0 - (int)(x2_0)), 1); + put_value(dx3, (prtldx_t)(x3_0 - (int)(x3_0)), 1); + put_value(ux1, ux1_0, 1); + put_value(ux2, ux2_0, 1); + put_value(ux3, ux3_0, 1); + put_value(tag, ParticleTag::alive, 1); + + // Particle boundaries + auto boundaries = boundaries_t {}; + boundaries = { + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC }, + { PrtlBC::PERIODIC, PrtlBC::PERIODIC } + }; + + const spidx_t sp { 1u }; + + const real_t coeff = HALF * dt * omegaB0; + + const auto u0_dot_b = (ux1_0 * bx1 + ux2_0 * bx2 + ux3_0 * bx3) / b_mag; + const auto u0_cross_b_x1 = (ux2_0 * bx3 - ux3_0 * bx2) / b_mag; + const auto u0_cross_b_x2 = (ux3_0 * bx1 - ux1_0 * bx3) / b_mag; + const auto u0_cross_b_x3 = (ux1_0 * bx2 - ux2_0 * bx1) / b_mag; + + const real_t eps = std::is_same_v ? 1e-2 : 1e-3; + + for (auto t { 0u }; t < 2000; ++t) { + const real_t time = t * dt; + + // clang-format off + Kokkos::parallel_for( + "pusher", + CreateRangePolicy({0}, {1}), + kernel::sr::Pusher_kernel>(PrtlPusher::BORIS, + false, false, kernel::sr::Cooling::None, + emfield, + sp, + i1, i2, i3, + i1_prev, i2_prev, i3_prev, + dx1, dx2, dx3, + dx1_prev, dx2_prev, dx3_prev, + ux1, ux2, ux3, + phi, tag, + metric, + ZERO, coeff, dt, + nx1, nx2, nx3, + boundaries, + ZERO, ZERO, ZERO)); + + Kokkos::parallel_for( + "pusher", + CreateRangePolicy({1}, {2}), + kernel::sr::Pusher_kernel>(PrtlPusher::VAY, + false, false, kernel::sr::Cooling::None, + emfield, + sp, + i1, i2, i3, + i1_prev, i2_prev, i3_prev, + dx1, dx2, dx3, + dx1_prev, dx2_prev, dx3_prev, + ux1, ux2, ux3, + phi, tag, + metric, + ZERO, coeff, dt, + nx1, nx2, nx3, + boundaries, + ZERO, ZERO, ZERO)); + + auto i1_prev_ = Kokkos::create_mirror_view(i1_prev); + auto i2_prev_ = Kokkos::create_mirror_view(i2_prev); + auto i3_prev_ = Kokkos::create_mirror_view(i3_prev); + auto i1_ = Kokkos::create_mirror_view(i1); + auto i2_ = Kokkos::create_mirror_view(i2); + auto i3_ = Kokkos::create_mirror_view(i3); + Kokkos::deep_copy(i1_prev_, i1_prev); + Kokkos::deep_copy(i2_prev_, i2_prev); + Kokkos::deep_copy(i3_prev_, i3_prev); + Kokkos::deep_copy(i1_, i1); + Kokkos::deep_copy(i2_, i2); + Kokkos::deep_copy(i3_, i3); + + auto dx1_prev_ = Kokkos::create_mirror_view(dx1_prev); + auto dx2_prev_ = Kokkos::create_mirror_view(dx2_prev); + auto dx3_prev_ = Kokkos::create_mirror_view(dx3_prev); + auto dx1_ = Kokkos::create_mirror_view(dx1); + auto dx2_ = Kokkos::create_mirror_view(dx2); + auto dx3_ = Kokkos::create_mirror_view(dx3); + auto ux1_ = Kokkos::create_mirror_view(ux1); + auto ux2_ = Kokkos::create_mirror_view(ux2); + auto ux3_ = Kokkos::create_mirror_view(ux3); + Kokkos::deep_copy(dx1_prev_, dx1_prev); + Kokkos::deep_copy(dx2_prev_, dx2_prev); + Kokkos::deep_copy(dx3_prev_, dx3_prev); + Kokkos::deep_copy(dx1_, dx1); + Kokkos::deep_copy(dx2_, dx2); + Kokkos::deep_copy(dx3_, dx3); + Kokkos::deep_copy(ux1_, ux1); + Kokkos::deep_copy(ux2_, ux2); + Kokkos::deep_copy(ux3_, ux3); + + const real_t gamma1 = math::sqrt(ONE + NORM_SQR(ux1_(0), ux2_(0), ux3_(0))); + const real_t gamma2 = math::sqrt(ONE + NORM_SQR(ux1_(1), ux2_(1), ux3_(1))); + + check_value(t, gamma1, gamma_0, eps, "Particle #1 Lorentz factor"); + check_value(t, gamma2, gamma_0, eps, "Particle #2 Lorentz factor"); + + const real_t arg = (b_mag * omegaB0 * (time + dt)) / gamma_0; + const real_t ux1_expect = (bx1 / b_mag) * u0_dot_b + + (-(bx1 / b_mag) * u0_dot_b + ux1_0) * math::cos(arg) + + u0_cross_b_x1 * math::sin(arg); + const real_t ux2_expect = (bx2 / b_mag) * u0_dot_b + + (-(bx2 / b_mag) * u0_dot_b + ux2_0) * math::cos(arg) + + u0_cross_b_x2 * math::sin(arg); + const real_t ux3_expect = (bx3 / b_mag) * u0_dot_b + + (-(bx3 / b_mag) * u0_dot_b + ux3_0) * math::cos(arg) + + u0_cross_b_x3 * math::sin(arg); + + check_value(t, ux1_(0), ux1_expect, eps, "Particle #1 ux1"); + check_value(t, ux2_(0), ux2_expect, eps, "Particle #1 ux2"); + check_value(t, ux3_(0), ux3_expect, eps, "Particle #1 ux3"); + + check_value(t, ux1_(1), ux1_expect, eps, "Particle #2 ux1"); + check_value(t, ux2_(1), ux2_expect, eps, "Particle #2 ux2"); + check_value(t, ux3_(1), ux3_expect, eps, "Particle #2 ux3"); + + } +} + +auto main(int argc, char* argv[]) -> int { + Kokkos::initialize(argc, argv); + + try { + using namespace ntt; + + testPusher>({ 10, 10, 10 }); + + } catch (std::exception& e) { + std::cerr << e.what() << std::endl; + Kokkos::finalize(); + return 1; + } + Kokkos::finalize(); + return 0; +} diff --git a/src/kernels/utils.hpp b/src/kernels/utils.hpp new file mode 100644 index 000000000..628ed267f --- /dev/null +++ b/src/kernels/utils.hpp @@ -0,0 +1,66 @@ +/** + * @file kernels/utils.hpp + * @brief Commonly used generic kernels + * @implements + * - kernel::ComputeSum_kernel<> + * - kernel::ComputeDivergence_kernel<> + * @namespaces: + * - kernel:: + */ + +#ifndef KERNELS_UTILS_HPP +#define KERNELS_UTILS_HPP + +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "utils/error.h" + +namespace kernel { + + template + class ComputeSum_kernel { + const ndfield_t buff; + const idx_t buff_idx; + + public: + ComputeSum_kernel(const ndfield_t& buff, idx_t buff_idx) + : buff { buff } + , buff_idx { buff_idx } { + raise::ErrorIf(buff_idx >= N, "Invalid component index", HERE); + } + + Inline void operator()(index_t i1, real_t& lsum) const { + if constexpr (D == Dim::_1D) { + lsum += buff(i1, buff_idx); + } else { + raise::KernelError( + HERE, + "1D implementation of ComputeSum_kernel called for non-1D"); + } + } + + Inline void operator()(index_t i1, index_t i2, real_t& lsum) const { + if (D == Dim::_2D) { + lsum += buff(i1, i2, buff_idx); + } else { + raise::KernelError( + HERE, + "2D implementation of ComputeSum_kernel called for non-2D"); + } + } + + Inline void operator()(index_t i1, index_t i2, index_t i3, real_t& lsum) const { + if (D == Dim::_3D) { + lsum += buff(i1, i2, i3, buff_idx); + } else { + raise::KernelError( + HERE, + "3D implementation of ComputeSum_kernel called for non-3D"); + } + } + }; + +} // namespace kernel + +#endif // KERNELS_UTILS_HPP diff --git a/src/metrics/tests/coord_trans.cpp b/src/metrics/tests/coord_trans.cpp index f3779a852..67dcdda53 100644 --- a/src/metrics/tests/coord_trans.cpp +++ b/src/metrics/tests/coord_trans.cpp @@ -31,7 +31,7 @@ Inline auto equal(const coord_t& a, const char* msg, real_t acc = ONE) -> bool { const auto eps = epsilon * acc; - for (unsigned short d = 0; d < D; ++d) { + for (auto d { 0u }; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { printf("%d : %.12e != %.12e %s\n", d, a[d], b[d], msg); return false; @@ -44,7 +44,7 @@ template Inline void unravel(std::size_t idx, tuple_t& ijk, const tuple_t& res) { - for (unsigned short d = 0; d < D; ++d) { + for (auto d { 0u }; d < D; ++d) { ijk[d] = idx % res[d]; idx /= res[d]; } @@ -82,7 +82,7 @@ void testMetric(const std::vector& res, coord_t x_Code_2 { ZERO }; coord_t x_Phys_1 { ZERO }; coord_t x_Sph_1 { ZERO }; - for (unsigned short d = 0; d < M::Dim; ++d) { + for (auto d { 0u }; d < M::Dim; ++d) { x_Code_1[d] = (real_t)(idx[d]) + HALF; } metric.template convert(x_Code_1, x_Phys_1); @@ -127,24 +127,24 @@ auto main(int argc, char* argv[]) -> int { const auto res2d = std::vector { 64, 32 }; const auto res3d = std::vector { 64, 32, 16 }; const auto ext1dcart = boundaries_t { - {10.0, 20.0} + { 10.0, 20.0 } }; const auto ext2dcart = boundaries_t { - {0.0, 20.0}, - {0.0, 10.0} + { 0.0, 20.0 }, + { 0.0, 10.0 } }; const auto ext3dcart = boundaries_t { - {-2.0, 2.0}, - {-1.0, 1.0}, - {-0.5, 0.5} + { -2.0, 2.0 }, + { -1.0, 1.0 }, + { -0.5, 0.5 } }; const auto extsph = boundaries_t { - {1.0, 10.0}, - {0.0, constant::PI} + { 1.0, 10.0 }, + { 0.0, constant::PI } }; const auto params = std::map { - {"r0", -ONE}, - { "h", (real_t)0.25} + { "r0", -ONE }, + { "h", (real_t)0.25 } }; testMetric>({ 128 }, ext1dcart); @@ -155,30 +155,30 @@ auto main(int argc, char* argv[]) -> int { const auto resks = std::vector { 64, 54 }; const auto extsks = boundaries_t { - {0.8, 50.0}, - {0.0, constant::PI} + { 0.8, 50.0 }, + { 0.0, constant::PI } }; const auto paramsks = std::map { - {"a", (real_t)0.95} + { "a", (real_t)0.95 } }; testMetric>(resks, extsks, 150, paramsks); const auto resqks = std::vector { 64, 42 }; const auto extqks = boundaries_t { - {0.8, 10.0}, - {0.0, constant::PI} + { 0.8, 10.0 }, + { 0.0, constant::PI } }; const auto paramsqks = std::map { - {"r0", -TWO}, - { "h", ZERO}, - { "a", (real_t)0.8} + { "r0", -TWO }, + { "h", ZERO }, + { "a", (real_t)0.8 } }; testMetric>(resqks, extqks, 500, paramsqks); const auto resks0 = std::vector { 64, 54 }; const auto extks0 = boundaries_t { - {0.5, 20.0}, - {0.0, constant::PI} + { 0.5, 20.0 }, + { 0.0, constant::PI } }; testMetric>(resks0, extks0, 150); diff --git a/src/metrics/tests/ks-qks.cpp b/src/metrics/tests/ks-qks.cpp index 167f564ee..fce9004d1 100644 --- a/src/metrics/tests/ks-qks.cpp +++ b/src/metrics/tests/ks-qks.cpp @@ -25,7 +25,7 @@ Inline auto equal(const vec_t& a, const char* msg, real_t acc = ONE) -> bool { const auto eps = epsilon * acc; - for (unsigned short d = 0; d < D; ++d) { + for (auto d { 0u }; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { printf("%s: %.12e : %.12e\n", msg, a[d], b[d]); return false; @@ -38,7 +38,7 @@ template Inline void unravel(std::size_t idx, tuple_t& ijk, const tuple_t& res) { - for (unsigned short d = 0; d < D; ++d) { + for (auto d { 0u }; d < D; ++d) { ijk[d] = idx % res[d]; idx /= res[d]; } @@ -75,7 +75,7 @@ void testMetric(const std::vector& res, coord_t x_Code { ZERO }; coord_t x_Phys { ZERO }; - for (unsigned short d = 0; d < M::Dim; ++d) { + for (auto d { 0u }; d < M::Dim; ++d) { x_Code[d] = (real_t)(idx[d]) + HALF; } diff --git a/src/metrics/tests/minkowski.cpp b/src/metrics/tests/minkowski.cpp index 1ef27b4fa..6073b810d 100644 --- a/src/metrics/tests/minkowski.cpp +++ b/src/metrics/tests/minkowski.cpp @@ -21,7 +21,7 @@ inline static constexpr auto epsilon = std::numeric_limits::epsilon(); template Inline auto equal(const coord_t& a, const coord_t& b, real_t acc = ONE) -> bool { - for (unsigned short d = 0; d < D; ++d) { + for (auto d { 0u }; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], epsilon * acc)) { printf("%d : %.12f != %.12f\n", d, a[d], b[d]); return false; diff --git a/src/metrics/tests/sph-qsph.cpp b/src/metrics/tests/sph-qsph.cpp index 230a763e1..2ad802c20 100644 --- a/src/metrics/tests/sph-qsph.cpp +++ b/src/metrics/tests/sph-qsph.cpp @@ -25,7 +25,7 @@ Inline auto equal(const vec_t& a, const char* msg, real_t acc = ONE) -> bool { const auto eps = epsilon * acc; - for (unsigned short d = 0; d < D; ++d) { + for (auto d { 0u }; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { printf("%d : %.12e != %.12e %s\n", d, a[d], b[d], msg); return false; @@ -38,7 +38,7 @@ template Inline void unravel(std::size_t idx, tuple_t& ijk, const tuple_t& res) { - for (unsigned short d = 0; d < D; ++d) { + for (auto d { 0u }; d < D; ++d) { ijk[d] = idx % res[d]; idx /= res[d]; } @@ -74,7 +74,7 @@ void testMetric(const std::vector& res, coord_t x_Code { ZERO }; coord_t x_Phys { ZERO }; - for (unsigned short d = 0; d < M::Dim; ++d) { + for (auto d { 0u }; d < M::Dim; ++d) { x_Code[d] = (real_t)(idx[d]) + HALF; } diff --git a/src/metrics/tests/sr-cart-sph.cpp b/src/metrics/tests/sr-cart-sph.cpp index 42aa5d639..b3e4e163f 100644 --- a/src/metrics/tests/sr-cart-sph.cpp +++ b/src/metrics/tests/sr-cart-sph.cpp @@ -28,7 +28,7 @@ Inline auto equal(const coord_t& a, const char* msg, real_t acc = ONE) -> bool { const auto eps = epsilon * acc; - for (unsigned short d = 0; d < D; ++d) { + for (auto d { 0u }; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { printf("%d : %.12e != %.12e %s\n", d, a[d], b[d], msg); return false; @@ -41,7 +41,7 @@ template Inline void unravel(std::size_t idx, tuple_t& ijk, const tuple_t& res) { - for (unsigned short d = 0; d < D; ++d) { + for (auto d { 0u }; d < D; ++d) { ijk[d] = idx % res[d]; idx /= res[d]; } @@ -81,7 +81,7 @@ void testMetric(const std::vector& res, coord_t x_Code_2 { ZERO }; coord_t x_Cart { ZERO }; - for (unsigned short d = 0; d < M::Dim; ++d) { + for (auto d { 0u }; d < M::Dim; ++d) { x_Code_1[d] = (real_t)(idx[d]) + HALF; } metric.template convert_xyz(x_Code_1, x_Cart); @@ -95,7 +95,7 @@ void testMetric(const std::vector& res, coord_t x_Code_r1 { ZERO }; coord_t x_Code_r2 { ZERO }; coord_t x_Sph { ZERO }; - for (unsigned short d = 0; d < M::Dim; ++d) { + for (auto d { 0u }; d < M::Dim; ++d) { x_Code_r1[d] = x_Code_1[d]; } metric.template convert(x_Code_r1, x_Sph); diff --git a/src/metrics/tests/vec_trans.cpp b/src/metrics/tests/vec_trans.cpp index 31015115c..af7c08813 100644 --- a/src/metrics/tests/vec_trans.cpp +++ b/src/metrics/tests/vec_trans.cpp @@ -31,7 +31,7 @@ Inline auto equal(const vec_t& a, const char* msg, real_t acc = ONE) -> bool { const auto eps = epsilon * acc; - for (unsigned short d = 0; d < D; ++d) { + for (auto d { 0u }; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { printf("%d : %.12e != %.12e %s\n", d, a[d], b[d], msg); return false; @@ -44,7 +44,7 @@ template Inline void unravel(std::size_t idx, tuple_t& ijk, const tuple_t& res) { - for (unsigned short d = 0; d < D; ++d) { + for (auto d { 0u }; d < D; ++d) { ijk[d] = idx % res[d]; idx /= res[d]; } @@ -79,7 +79,7 @@ void testMetric(const std::vector& res, tuple_t idx; unravel(n, idx, res_tup); coord_t x_Code { ZERO }; - for (unsigned short d = 0; d < M::Dim; ++d) { + for (auto d { 0u }; d < M::Dim; ++d) { x_Code[d] = (real_t)(idx[d]) + HALF; } vec_t v_Hat_1 { ZERO }; @@ -94,7 +94,7 @@ void testMetric(const std::vector& res, vec_t v_PhysCov_2 { ZERO }; // init - for (unsigned short d = 0; d < Dim::_3D; ++d) { + for (auto d { 0u }; d < 3u; ++d) { v_Hat_1[d] += ONE; v_PhysCntrv_1[d] += ONE; v_PhysCov_1[d] += ONE; @@ -102,12 +102,12 @@ void testMetric(const std::vector& res, // hat <-> cntrv metric.template transform(x_Code, v_Hat_1, v_Cntrv_1); - for (unsigned short d = 0; d < Dim::_3D; ++d) { + for (auto d { 0u }; d < 3u; ++d) { vec_t e_d { ZERO }; vec_t v_Cntrv_temp { ZERO }; e_d[d] = ONE; metric.template transform(x_Code, e_d, v_Cntrv_temp); - for (unsigned short d = 0; d < Dim::_3D; ++d) { + for (auto d { 0u }; d < 3u; ++d) { v_Cntrv_2[d] += v_Cntrv_temp[d]; } } @@ -123,15 +123,15 @@ void testMetric(const std::vector& res, v_Cov_2, "cntrv->cov is equal to hat->cov", acc); - for (unsigned short d = 0; d < Dim::_3D; ++d) { + for (auto d { 0u }; d < 3u; ++d) { v_Cov_2[d] = ZERO; } - for (unsigned short d = 0; d < Dim::_3D; ++d) { + for (auto d { 0u }; d < 3u; ++d) { vec_t e_d { ZERO }; vec_t v_Cov_temp { ZERO }; e_d[d] = ONE; metric.template transform(x_Code, e_d, v_Cov_temp); - for (unsigned short d = 0; d < Dim::_3D; ++d) { + for (auto d { 0u }; d < 3u; ++d) { v_Cov_2[d] += v_Cov_temp[d]; } } @@ -179,24 +179,24 @@ auto main(int argc, char* argv[]) -> int { const auto res2d = std::vector { 64, 32 }; const auto res3d = std::vector { 64, 32, 16 }; const auto ext1dcart = boundaries_t { - {10.0, 20.0} + { 10.0, 20.0 } }; const auto ext2dcart = boundaries_t { - {0.0, 20.0}, - {0.0, 10.0} + { 0.0, 20.0 }, + { 0.0, 10.0 } }; const auto ext3dcart = boundaries_t { - {-2.0, 2.0}, - {-1.0, 1.0}, - {-0.5, 0.5} + { -2.0, 2.0 }, + { -1.0, 1.0 }, + { -0.5, 0.5 } }; const auto extsph = boundaries_t { - {1.0, 10.0}, - {0.0, constant::PI} + { 1.0, 10.0 }, + { 0.0, constant::PI } }; const auto params = std::map { - {"r0", -ONE}, - { "h", (real_t)0.25} + { "r0", -ONE }, + { "h", (real_t)0.25 } }; // testMetric>({ 128 }, ext1dcart); @@ -219,13 +219,13 @@ auto main(int argc, char* argv[]) -> int { // const auto resqks = std::vector { 64, 42 }; const auto extqks = boundaries_t { - {0.8, 10.0}, - {0.0, constant::PI} + { 0.8, 10.0 }, + { 0.0, constant::PI } }; const auto paramsqks = std::map { - {"r0", -TWO}, - { "h", ZERO}, - { "a", (real_t)0.8} + { "r0", -TWO }, + { "h", ZERO }, + { "a", (real_t)0.8 } }; testMetric>(resqks, extqks, 500, paramsqks); // diff --git a/src/output/CMakeLists.txt b/src/output/CMakeLists.txt index 8a2ea0f16..1b132fb60 100644 --- a/src/output/CMakeLists.txt +++ b/src/output/CMakeLists.txt @@ -6,6 +6,7 @@ # # * writer.cpp # * fields.cpp +# * stats.cpp # * utils/interpret_prompt.cpp # # @includes: @@ -19,13 +20,17 @@ # @uses: # # * kokkos [required] -# * ADIOS2 [required] +# * ADIOS2 [optional] # * mpi [optional] # ------------------------------ set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES ${SRC_DIR}/writer.cpp ${SRC_DIR}/fields.cpp + +set(SOURCES ${SRC_DIR}/stats.cpp ${SRC_DIR}/fields.cpp ${SRC_DIR}/utils/interpret_prompt.cpp) +if(${output}) + list(APPEND SOURCES ${SRC_DIR}/writer.cpp) +endif() add_library(ntt_output ${SOURCES}) set(libs ntt_global) diff --git a/src/output/fields.cpp b/src/output/fields.cpp index 25267bdee..e6b86296f 100644 --- a/src/output/fields.cpp +++ b/src/output/fields.cpp @@ -23,7 +23,10 @@ namespace out { // determine the field ID const auto pos = name.find("_"); auto name_raw = (pos == std::string::npos) ? name : name.substr(0, pos); - name_raw = name_raw.substr(0, name_raw.find_first_of("0123ijxyzt")); + if ((fmt::toLower(name_raw) != "dive") and + (fmt::toLower(name_raw) != "divd")) { + name_raw = name_raw.substr(0, name_raw.find_first_of("0123ijxyzt")); + } if (FldsID::contains(fmt::toLower(name_raw).c_str())) { m_id = FldsID::pick(fmt::toLower(name_raw).c_str()); } else { @@ -59,7 +62,7 @@ namespace out { comp = {}; } // data preparation flags - if (not is_moment() && not is_custom()) { + if (not(is_moment() or is_custom() or is_divergence())) { if (S == SimEngine::SRPIC) { prepare_flag = PrepareOutput::ConvertToHat; } else { diff --git a/src/output/fields.h b/src/output/fields.h index cdfda9272..bc1271084 100644 --- a/src/output/fields.h +++ b/src/output/fields.h @@ -34,7 +34,7 @@ namespace out { PrepareOutputFlags interp_flag { PrepareOutput::None }; std::vector> comp {}; - std::vector species {}; + std::vector species {}; OutputField(const SimEngine& S, const std::string&); @@ -105,6 +105,10 @@ namespace out { } tmp.pop_back(); } + if (tmp == "dive" || tmp == "divd") { + // capitalize E/D + tmp[3] = std::toupper(tmp[3]); + } // capitalize the first letter tmp[0] = std::toupper(tmp[0]); } @@ -138,6 +142,10 @@ namespace out { } tmp.pop_back(); } + if (tmp == "dive" || tmp == "divd") { + // capitalize E/D + tmp[3] = std::toupper(tmp[3]); + } // capitalize the first letter tmp[0] = std::toupper(tmp[0]); return "f" + tmp; diff --git a/src/output/particles.h b/src/output/particles.h index fb05fec7d..0936e66f9 100644 --- a/src/output/particles.h +++ b/src/output/particles.h @@ -8,20 +8,22 @@ #ifndef OUTPUT_PARTICLES_H #define OUTPUT_PARTICLES_H +#include "global.h" + #include namespace out { class OutputSpecies { - const unsigned short m_sp; + const spidx_t m_sp; public: - OutputSpecies(unsigned short sp) : m_sp { sp } {} + OutputSpecies(spidx_t sp) : m_sp { sp } {} ~OutputSpecies() = default; [[nodiscard]] - auto species() const -> unsigned short { + auto species() const -> spidx_t { return m_sp; } diff --git a/src/output/spectra.h b/src/output/spectra.h index 119495cd3..c3e5d13d7 100644 --- a/src/output/spectra.h +++ b/src/output/spectra.h @@ -8,20 +8,22 @@ #ifndef OUTPUT_SPECTRA_H #define OUTPUT_SPECTRA_H +#include "global.h" + #include namespace out { class OutputSpectra { - const unsigned short m_sp; + const spidx_t m_sp; public: - OutputSpectra(unsigned short sp) : m_sp { sp } {} + OutputSpectra(spidx_t sp) : m_sp { sp } {} ~OutputSpectra() = default; [[nodiscard]] - auto species() const -> unsigned short { + auto species() const -> spidx_t { return m_sp; } diff --git a/src/output/stats.cpp b/src/output/stats.cpp new file mode 100644 index 000000000..ba3e85c40 --- /dev/null +++ b/src/output/stats.cpp @@ -0,0 +1,101 @@ +#include "output/stats.h" + +#include "enums.h" +#include "global.h" + +#include "arch/mpi_aliases.h" +#include "utils/error.h" +#include "utils/formatting.h" + +#include "output/utils/interpret_prompt.h" + +#include + +#include +#include + +using namespace ntt; +using namespace out; + +namespace stats { + + OutputStats::OutputStats(const std::string& name) : m_name { name } { + // determine the stats ID + const auto pos = name.find("_"); + auto name_raw = (pos == std::string::npos) ? name : name.substr(0, pos); + if ((name_raw[0] != 'E') and (name_raw[0] != 'B') and (name_raw[0] != 'J')) { + name_raw = name_raw.substr(0, name_raw.find_first_of("0123ijxyzt")); + } + if (StatsID::contains(fmt::toLower(name_raw).c_str())) { + m_id = StatsID::pick(fmt::toLower(name_raw).c_str()); + } else { + raise::Error("Unrecognized stats ID " + fmt::toLower(name_raw), HERE); + } + // determine the species and components to output + if (is_moment()) { + species = InterpretSpecies(name); + } else { + species = {}; + } + if (is_vector()) { + // always write all the ExB and V components + comp = { { 1 }, { 2 }, { 3 } }; + } else if (id() == StatsID::T) { + // energy-momentum tensor + comp = InterpretComponents({ name.substr(1, 1), name.substr(2, 1) }); + } else { + // scalar (e.g., Rho, E^2, etc.) + comp = {}; + } + } + + void Writer::init(timestep_t interval, simtime_t interval_time) { + m_tracker = tools::Tracker("stats", interval, interval_time); + } + + auto Writer::shouldWrite(timestep_t step, simtime_t time) -> bool { + return m_tracker.shouldWrite(step, time); + } + + void Writer::defineStatsFilename(const std::string& filename) { + m_fname = filename; + } + + void Writer::defineStatsOutputs(const std::vector& stats_to_write) { + for (const auto& stat : stats_to_write) { + m_stat_writers.emplace_back(stat); + } + } + + void Writer::writeHeader() { + CallOnce( + [](auto& fname, auto& stat_writers) { + std::fstream StatsOut(fname, std::fstream::out | std::fstream::app); + StatsOut << "step,time,"; + for (const auto& stat : stat_writers) { + if (stat.is_vector()) { + for (auto i { 0u }; i < stat.comp.size(); ++i) { + StatsOut << stat.name(i) << ","; + } + } else { + StatsOut << stat.name() << ","; + } + } + StatsOut << std::endl; + StatsOut.close(); + }, + m_fname, + m_stat_writers); + } + + void Writer::endWriting() { + CallOnce( + [](auto& fname) { + std::fstream StatsOut(fname, std::fstream::out | std::fstream::app); + StatsOut << std::endl; + StatsOut.close(); + }, + m_fname); + } + +} // namespace stats diff --git a/src/output/stats.h b/src/output/stats.h new file mode 100644 index 000000000..c81b9b7b3 --- /dev/null +++ b/src/output/stats.h @@ -0,0 +1,170 @@ +/** + * @file output/stats.h + * @brief Class defining the metadata necessary to prepare the stats for output + * @implements + * - out::OutputStats + * @cpp: + * - stats.cpp + * @namespaces: + * - out:: + */ + +#ifndef OUTPUT_STATS_H +#define OUTPUT_STATS_H + +#include "enums.h" +#include "global.h" + +#include "utils/error.h" +#include "utils/tools.h" + +#include +#include +#include +#include + +using namespace ntt; + +namespace stats { + + class OutputStats { + const std::string m_name; + StatsID m_id { StatsID::INVALID }; + + public: + std::vector> comp {}; + std::vector species {}; + + OutputStats(const std::string&); + + ~OutputStats() = default; + + [[nodiscard]] + auto is_moment() const -> bool { + return (id() == StatsID::T || id() == StatsID::Rho || id() == StatsID::Npart || + id() == StatsID::N || id() == StatsID::Charge); + } + + [[nodiscard]] + auto is_vector() const -> bool { + return id() == StatsID::ExB; + } + + [[nodiscard]] + inline auto name() const -> std::string { + // generate the name + std::string tmp = std::string(id().to_string()); + if (tmp == "exb") { + tmp = "ExB"; + } else if (tmp == "j.e") { + tmp = "J.E"; + } else { + // capitalize the first letter + tmp[0] = std::toupper(tmp[0]); + } + if (id() == StatsID::T) { + tmp += m_name.substr(1, 2); + } else if (is_vector()) { + tmp += "i"; + } + if (species.size() > 0) { + tmp += "_"; + for (auto& s : species) { + tmp += std::to_string(s); + tmp += "_"; + } + tmp.pop_back(); + } + return tmp; + } + + [[nodiscard]] + inline auto name(std::size_t ci) const -> std::string { + raise::ErrorIf( + comp.size() == 0, + "OutputField::name(ci) called but no components were available", + HERE); + raise::ErrorIf( + ci >= comp.size(), + "OutputField::name(ci) called with an invalid component index", + HERE); + raise::ErrorIf( + comp[ci].size() == 0, + "OutputField::name(ci) called but no components were available", + HERE); + // generate the name + auto tmp = std::string(id().to_string()); + // capitalize the first letter + if (tmp == "exb") { + tmp = "ExB"; + } else { + // capitalize the first letter + tmp[0] = std::toupper(tmp[0]); + } + for (auto& c : comp[ci]) { + tmp += std::to_string(c); + } + if (species.size() > 0) { + tmp += "_"; + for (auto& s : species) { + tmp += std::to_string(s); + tmp += "_"; + } + tmp.pop_back(); + } + return tmp; + } + + [[nodiscard]] + auto id() const -> StatsID { + return m_id; + } + }; + + class Writer { + std::string m_fname; + std::vector m_stat_writers; + tools::Tracker m_tracker; + + public: + Writer() {} + + ~Writer() = default; + + Writer(Writer&&) = default; + + void init(timestep_t, simtime_t); + void defineStatsFilename(const std::string&); + void defineStatsOutputs(const std::vector&); + + void writeHeader(); + + [[nodiscard]] + auto shouldWrite(timestep_t, simtime_t) -> bool; + + template + inline void write(const T& value) const { +#if defined(MPI_ENABLED) + // @TODO: reduce +#endif + CallOnce( + [](auto& fname, auto& value) { + std::fstream StatsOut(fname, std::fstream::out | std::fstream::app); + StatsOut << value << ","; + StatsOut.close(); + }, + m_fname, + value); + } + + void endWriting(); + + [[nodiscard]] + auto statsWriters() const -> const std::vector& { + return m_stat_writers; + } + }; + +} // namespace stats + +#endif // OUTPUT_STATS_H diff --git a/src/output/tests/CMakeLists.txt b/src/output/tests/CMakeLists.txt index 835bb532f..f6f460ae9 100644 --- a/src/output/tests/CMakeLists.txt +++ b/src/output/tests/CMakeLists.txt @@ -29,9 +29,13 @@ function(gen_test title is_parallel) endif() endfunction() -if(NOT ${mpi}) - gen_test(fields false) - gen_test(writer-nompi false) -else() - gen_test(writer-mpi true) +gen_test(stats false) + +if(${output}) + if(NOT ${mpi}) + gen_test(fields false) + gen_test(writer-nompi false) + else() + gen_test(writer-mpi true) + endif() endif() diff --git a/src/output/tests/fields.cpp b/src/output/tests/fields.cpp index e09bed142..de86af2f4 100644 --- a/src/output/tests/fields.cpp +++ b/src/output/tests/fields.cpp @@ -45,11 +45,30 @@ auto main() -> int { raise::ErrorIf(rho.interp_flag != PrepareOutput::None, "Rho should not have any interp flags", HERE); - raise::ErrorIf(not(rho.species == std::vector { 1, 3 }), + raise::ErrorIf(not(rho.species == std::vector { 1, 3 }), "Rho should have species 1 and 3", HERE); } + { + const auto dive = OutputField(SimEngine::SRPIC, "divE"); + raise::ErrorIf(dive.is_moment(), "divE should not be a moment", HERE); + raise::ErrorIf(dive.is_field(), "divE should not be a field", HERE); + raise::ErrorIf(not dive.is_divergence(), "divE should be a divergence", HERE); + raise::ErrorIf(dive.id() != FldsID::divE, + "divE should have ID FldsID::divE", + HERE); + raise::ErrorIf(dive.name() != "fDivE", "divE should have name `fDivE`", HERE); + raise::ErrorIf(dive.comp.size() != 0, "divE should have 0 components", HERE); + raise::ErrorIf(dive.species.size() != 0, "divE should have no species", HERE); + raise::ErrorIf(dive.prepare_flag != PrepareOutput::None, + "divE should not have any prepare flags", + HERE); + raise::ErrorIf(dive.interp_flag != PrepareOutput::None, + "divE should not have any interp flags", + HERE); + } + { const auto t = OutputField(SimEngine::GRPIC, "Tti_2_3"); raise::ErrorIf(not t.is_moment(), "T should be a moment", HERE); diff --git a/src/output/tests/stats.cpp b/src/output/tests/stats.cpp new file mode 100644 index 000000000..db6730a89 --- /dev/null +++ b/src/output/tests/stats.cpp @@ -0,0 +1,102 @@ +#include "output/stats.h" + +#include "enums.h" + +#include "utils/error.h" + +#include +#include +#include + +auto main() -> int { + using namespace stats; + using namespace ntt; + try { + { + const auto e = OutputStats("E^2"); + raise::ErrorIf(e.is_vector(), "E^2 should not be a vector quantity", HERE); + raise::ErrorIf(e.is_moment(), "E^2 should not be a moment", HERE); + raise::ErrorIf(e.id() != StatsID::E2, "E^2 should have ID StatsID::E2", HERE); + raise::ErrorIf(e.species.size() != 0, "E^2 should have no species", HERE); + raise::ErrorIf(e.comp.size() != 0, "E^2 should have no components", HERE); + raise::ErrorIf(e.name() != "E^2", "E^2 should have name `E^2`", HERE); + } + + { + const auto e = OutputStats("ExB"); + raise::ErrorIf(not e.is_vector(), "ExB should be a vector quantity", HERE); + raise::ErrorIf(e.is_moment(), "ExB should not be a moment", HERE); + raise::ErrorIf(e.id() != StatsID::ExB, "ExB should have ID StatsID::ExB", HERE); + raise::ErrorIf(e.species.size() != 0, "ExB should have no species", HERE); + raise::ErrorIf(e.comp.size() != 3, "ExB should have 3 components", HERE); + raise::ErrorIf(e.name() != "ExBi", "ExB should have name `ExBi`", HERE); + } + + { + const auto e = OutputStats("J.E"); + raise::ErrorIf(e.is_vector(), "J.E should not be a vector quantity", HERE); + raise::ErrorIf(e.is_moment(), "J.E should not be a moment", HERE); + raise::ErrorIf(e.id() != StatsID::JdotE, + "J.E should have ID StatsID::JdotE", + HERE); + raise::ErrorIf(e.species.size() != 0, "J.E should have no species", HERE); + raise::ErrorIf(e.comp.size() != 0, "J.E should have no components", HERE); + raise::ErrorIf(e.name() != "J.E", "J.E should have name `J.E`", HERE); + } + + { + const auto rho = OutputStats("Rho_1_3"); + raise::ErrorIf(not rho.is_moment(), "Rho should be a moment", HERE); + raise::ErrorIf(rho.id() != StatsID::Rho, + "Rho should have ID StatsID::Rho", + HERE); + raise::ErrorIf(rho.name() != "Rho_1_3", "Rho should have name `Rho_1_3`", HERE); + raise::ErrorIf(rho.comp.size() != 0, "Rho should have 0 components", HERE); + raise::ErrorIf(not(rho.species == std::vector { 1, 3 }), + "Rho should have species 1 and 3", + HERE); + } + + { + const auto t = OutputStats("Tti_2_3"); + raise::ErrorIf(not t.is_moment(), "T should be a moment", HERE); + raise::ErrorIf(t.is_vector(), "T should not be a vector quantity", HERE); + raise::ErrorIf(t.id() != StatsID::T, "T should have ID StatsID::T", HERE); + raise::ErrorIf(t.name() != "Tti_2_3", "T should have name `Tti_2_3`", HERE); + raise::ErrorIf(t.name(0) != "T01_2_3", "T should have name `T01_2_3`", HERE); + raise::ErrorIf(t.name(1) != "T02_2_3", "T should have name `T02_2_3`", HERE); + raise::ErrorIf(t.name(2) != "T03_2_3", "T should have name `T03_2_3`", HERE); + raise::ErrorIf(t.comp.size() != 3, "T should have 3 component", HERE); + raise::ErrorIf(t.comp[0].size() != 2, + "T.comp[0] should have 2 components", + HERE); + raise::ErrorIf(t.comp[1].size() != 2, + "T.comp[1] should have 2 components", + HERE); + raise::ErrorIf(t.comp[2].size() != 2, + "T.comp[2] should have 2 components", + HERE); + raise::ErrorIf(t.comp[0] != std::vector { 0, 1 }, + "T.comp[0] should be {0, 1}", + HERE); + raise::ErrorIf(t.comp[1] != std::vector { 0, 2 }, + "T.comp[1] should be {0, 2}", + HERE); + raise::ErrorIf(t.comp[2] != std::vector { 0, 3 }, + "T.comp[2] should be {0, 3}", + HERE); + raise::ErrorIf(t.species.size() != 2, "T should have 2 species", HERE); + raise::ErrorIf(t.species[0] != 2, "T should have specie 2", HERE); + raise::ErrorIf(t.species[1] != 3, "T should have specie 3", HERE); + } + + { + const auto t = OutputStats("Tij"); + raise::ErrorIf(t.comp.size() != 6, "T should have 6 component", HERE); + } + } catch (const std::exception& e) { + std::cerr << e.what() << std::endl; + return 1; + } + return 0; +} diff --git a/src/output/tests/writer-mpi.cpp b/src/output/tests/writer-mpi.cpp index af8a38ef1..0770b8f80 100644 --- a/src/output/tests/writer-mpi.cpp +++ b/src/output/tests/writer-mpi.cpp @@ -65,6 +65,7 @@ auto main(int argc, char* argv[]) -> int { writer.defineMeshLayout({ static_cast(mpi_size) * nx1 }, { static_cast(mpi_rank) * nx1 }, { nx1 }, + { mpi_rank, mpi_size }, { dwn1 }, false, Coord::Cart); diff --git a/src/output/tests/writer-nompi.cpp b/src/output/tests/writer-nompi.cpp index 593f37f92..70024d968 100644 --- a/src/output/tests/writer-nompi.cpp +++ b/src/output/tests/writer-nompi.cpp @@ -75,6 +75,7 @@ auto main(int argc, char* argv[]) -> int { writer.defineMeshLayout({ nx1, nx2, nx3 }, { 0, 0, 0 }, { nx1, nx2, nx3 }, + { 0, 1 }, { dwn1, dwn2, dwn3 }, false, Coord::Cart); diff --git a/src/output/utils/interpret_prompt.cpp b/src/output/utils/interpret_prompt.cpp index 7e6d92971..8506b29ff 100644 --- a/src/output/utils/interpret_prompt.cpp +++ b/src/output/utils/interpret_prompt.cpp @@ -10,12 +10,12 @@ namespace out { - auto InterpretSpecies(const std::string& in) -> std::vector { - std::vector species; + auto InterpretSpecies(const std::string& in) -> std::vector { + std::vector species; if (in.find("_") < in.size()) { auto species_str = fmt::splitString(in.substr(in.find("_") + 1), "_"); for (const auto& specie : species_str) { - species.push_back((unsigned short)(std::stoi(specie))); + species.push_back((spidx_t)(std::stoi(specie))); } } return species; diff --git a/src/output/utils/interpret_prompt.h b/src/output/utils/interpret_prompt.h index ebacaa980..488d81101 100644 --- a/src/output/utils/interpret_prompt.h +++ b/src/output/utils/interpret_prompt.h @@ -4,7 +4,7 @@ * Defines the function that interprets ... * ... the user-defined species, e.g. when computing moments * @implements - * - out::InterpretSpecies -> std::vector + * - out::InterpretSpecies -> std::vector * - out::InterpretComponents -> std::vector> * @cpp: * - interpret_prompt.cpp @@ -17,12 +17,14 @@ #ifndef OUTPUT_UTILS_INTERPRET_PROMPT_H #define OUTPUT_UTILS_INTERPRET_PROMPT_H +#include "global.h" + #include #include namespace out { - auto InterpretSpecies(const std::string&) -> std::vector; + auto InterpretSpecies(const std::string&) -> std::vector; auto InterpretComponents(const std::vector&) -> std::vector>; diff --git a/src/output/writer.cpp b/src/output/writer.cpp index 4350e8442..a259ba889 100644 --- a/src/output/writer.cpp +++ b/src/output/writer.cpp @@ -29,7 +29,7 @@ namespace out { const std::string& title, bool use_separate_files) { m_separate_files = use_separate_files; - m_engine = engine; + m_engine = fmt::toLower(engine); p_adios = ptr_adios; raise::ErrorIf(p_adios == nullptr, "ADIOS pointer is null", HERE); @@ -48,9 +48,8 @@ namespace out { m_trackers.insert({ type, tools::Tracker(type, interval, interval_time) }); } - auto Writer::shouldWrite(const std::string& type, - timestep_t step, - simtime_t time) -> bool { + auto Writer::shouldWrite(const std::string& type, timestep_t step, simtime_t time) + -> bool { if (m_trackers.find(type) != m_trackers.end()) { return m_trackers.at(type).shouldWrite(step, time); } else { @@ -63,12 +62,14 @@ namespace out { m_mode = mode; } - void Writer::defineMeshLayout(const std::vector& glob_shape, - const std::vector& loc_corner, - const std::vector& loc_shape, - const std::vector& dwn, - bool incl_ghosts, - Coord coords) { + void Writer::defineMeshLayout( + const std::vector& glob_shape, + const std::vector& loc_corner, + const std::vector& loc_shape, + const std::pair& domain_idx, + const std::vector& dwn, + bool incl_ghosts, + Coord coords) { m_flds_ghosts = incl_ghosts; m_dwn = dwn; @@ -98,24 +99,24 @@ namespace out { for (auto i { 0u }; i < m_flds_g_shape.size(); ++i) { // cell-centers - adios2::Dims g_shape = { m_flds_g_shape_dwn[i] }; - adios2::Dims l_corner = { m_flds_l_corner_dwn[i] }; - adios2::Dims l_shape = { m_flds_l_shape_dwn[i] }; m_io.DefineVariable("X" + std::to_string(i + 1), - g_shape, - l_corner, - l_shape, + { m_flds_g_shape_dwn[i] }, + { m_flds_l_corner_dwn[i] }, + { m_flds_l_shape_dwn[i] }, adios2::ConstantDims); // cell-edges - const auto is_last = (m_flds_l_corner[i] + m_flds_l_shape[i] == + const auto is_last = (m_flds_l_corner[i] + m_flds_l_shape[i] == m_flds_g_shape[i]); - adios2::Dims g_shape1 = { m_flds_g_shape_dwn[i] + 1 }; - adios2::Dims l_shape1 = { m_flds_l_shape_dwn[i] + (is_last ? 1 : 0) }; m_io.DefineVariable("X" + std::to_string(i + 1) + "e", - g_shape1, - l_corner, - l_shape1, + { m_flds_g_shape_dwn[i] + 1 }, + { m_flds_l_corner_dwn[i] }, + { m_flds_l_shape_dwn[i] + (is_last ? 1 : 0) }, adios2::ConstantDims); + m_io.DefineVariable("N" + std::to_string(i + 1) + "l", + { 2 * domain_idx.second }, + { 2 * domain_idx.first }, + { 2 }, + adios2::ConstantDims); } if constexpr (std::is_same::array_layout, @@ -161,8 +162,8 @@ namespace out { } } - void Writer::defineParticleOutputs(Dimension dim, - const std::vector& specs) { + void Writer::defineParticleOutputs(Dimension dim, + const std::vector& specs) { m_prtl_writers.clear(); for (const auto& s : specs) { m_prtl_writers.emplace_back(s); @@ -187,7 +188,7 @@ namespace out { } } - void Writer::defineSpectraOutputs(const std::vector& specs) { + void Writer::defineSpectraOutputs(const std::vector& specs) { m_spectra_writers.clear(); for (const auto& s : specs) { m_spectra_writers.emplace_back(s); @@ -401,9 +402,10 @@ namespace out { m_writer.Put(var, e_bins_h); } - void Writer::writeMesh(unsigned short dim, - const array_t& xc, - const array_t& xe) { + void Writer::writeMesh(unsigned short dim, + const array_t& xc, + const array_t& xe, + const std::vector& loc_off_sz) { auto varc = m_io.InquireVariable("X" + std::to_string(dim + 1)); auto vare = m_io.InquireVariable("X" + std::to_string(dim + 1) + "e"); auto xc_h = Kokkos::create_mirror_view(xc); @@ -412,6 +414,9 @@ namespace out { Kokkos::deep_copy(xe_h, xe); m_writer.Put(varc, xc_h); m_writer.Put(vare, xe_h); + auto vard = m_io.InquireVariable( + "N" + std::to_string(dim + 1) + "l"); + m_writer.Put(vard, loc_off_sz.data()); } void Writer::beginWriting(WriteModeTags write_mode, @@ -425,7 +430,7 @@ namespace out { m_active_mode = write_mode; try { std::string filename; - const std::string ext = m_engine == "hdf5" ? "h5" : "bp"; + const std::string ext = (m_engine == "hdf5") ? "h5" : "bp"; if (m_separate_files) { std::string mode_str; if (m_active_mode == WriteMode::Fields) { diff --git a/src/output/writer.h b/src/output/writer.h index 5484aa6d7..2e8d20de5 100644 --- a/src/output/writer.h +++ b/src/output/writer.h @@ -83,18 +83,22 @@ namespace out { void writeAttrs(const prm::Parameters&); - void defineMeshLayout(const std::vector&, - const std::vector&, - const std::vector&, + void defineMeshLayout(const std::vector&, + const std::vector&, + const std::vector&, + const std::pair&, const std::vector&, bool, Coord); void defineFieldOutputs(const SimEngine&, const std::vector&); - void defineParticleOutputs(Dimension, const std::vector&); - void defineSpectraOutputs(const std::vector&); + void defineParticleOutputs(Dimension, const std::vector&); + void defineSpectraOutputs(const std::vector&); - void writeMesh(unsigned short, const array_t&, const array_t&); + void writeMesh(unsigned short, + const array_t&, + const array_t&, + const std::vector&); template void writeField(const std::vector&,