From e58be3fedfdd1508fced336a0e4154f5b3478b26 Mon Sep 17 00:00:00 2001 From: StaticObserver Date: Fri, 21 Jun 2024 11:32:43 +0800 Subject: [PATCH 01/53] fix bug in metadomain.cpp; link library stdc++fs --- src/CMakeLists.txt | 3 ++- src/framework/domain/metadomain.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 162d48df9..c15a90795 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,4 +37,5 @@ add_subdirectory(${SRC_DIR}/../setups ${CMAKE_CURRENT_BINARY_DIR}/setups) set(libs ntt_global ntt_framework ntt_metrics ntt_engines ntt_pgen) add_dependencies(${ENTITY} ${libs}) -target_link_libraries(${ENTITY} PUBLIC ${libs}) \ No newline at end of file +target_link_libraries(${ENTITY} PUBLIC ${libs}) +target_link_libraries(${ENTITY} PUBLIC stdc++fs) \ No newline at end of file diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index 86a9420fd..584bfa551 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -375,7 +375,7 @@ namespace ntt { template void Metadomain::metricCompatibilityCheck() const { const auto dx_min = g_mesh.metric.dxMin(); - auto dx_min_from_domains = INFINITY; + auto dx_min_from_domains = std::numeric_limits::infinity(); for (unsigned int idx { 0 }; idx < g_ndomains; ++idx) { const auto& current_domain = g_subdomains[idx]; const auto current_dx_min = current_domain.mesh.metric.dxMin(); From 107a9683f13a48bd0e0174d5b76d8162d663fb94 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 24 Jun 2024 15:38:16 -0400 Subject: [PATCH 02/53] 1.1.0 tag bump --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b380f115d..0e68751f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ set(PROJECT_NAME entity) project( ${PROJECT_NAME} - VERSION 1.0.0 + VERSION 1.1.0 LANGUAGES CXX C) add_compile_options("-D ENTITY_VERSION=\"${PROJECT_VERSION}\"") execute_process(COMMAND From e71fa797d32c7a5117c67abaec9c253e6dd663a2 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 24 Jun 2024 15:39:43 -0400 Subject: [PATCH 03/53] workflows disabled --- .github/workflows/actions.yml | 35 ---------------- .github/workflows/compilation.yml | 69 ------------------------------- 2 files changed, 104 deletions(-) delete mode 100644 .github/workflows/actions.yml delete mode 100644 .github/workflows/compilation.yml diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml deleted file mode 100644 index e436c9acf..000000000 --- a/.github/workflows/actions.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Unit tests - -on: - pull_request: - branches: - - master - -jobs: - tests: - strategy: - matrix: - cuda: ["ON", "OFF"] - precision: ["double", "single"] - runs-on: - - self-hosted - steps: - - name: Checkout - uses: actions/checkout@v3.3.0 - - name: Load CUDA Entity - if: ${{ matrix.cuda == 'ON' }} - run: | - module load entity/cuda/volta70/skx - - name: Load Entity without CUDA - if: ${{ matrix.cuda == 'OFF' }} - run: | - module load entity/skx - - name: Compile - run: | - cmake -B build -D TESTS=ON -D precision=${{ matrix.precision }} - cd build - make -j $(exec nproc) - - name: Run tests - run: | - cd build/tests - ctest diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml deleted file mode 100644 index 373488c1c..000000000 --- a/.github/workflows/compilation.yml +++ /dev/null @@ -1,69 +0,0 @@ -name: Compilation - -on: - pull_request: - branches: - - 'release/**' - -jobs: - build-pic: - strategy: - matrix: - include: - - pgen: "weibel" - metric: "minkowski" - cuda: "ON" - - pgen: "weibel" - metric: "minkowski" - cuda: "OFF" - - pgen: "magnetosphere" - metric: "qspherical" - cuda: "ON" - - pgen: "magnetosphere" - metric: "qspherical" - cuda: "OFF" - - pgen: "magnetosphere" - metric: "spherical" - cuda: "ON" - - pgen: "magnetosphere" - metric: "spherical" - cuda: "OFF" - - runs-on: - - self-hosted - steps: - - name: Checkout - uses: actions/checkout@v3.3.0 - - name: Compile - run: | - cmake -B build -D pgen=${{ matrix.pgen }} -D metric=${{ matrix.metric }} -D Kokkos_ENABLE_CUDA=${{ matrix.cuda }} -D output=ON - cd build - make -j - - build-grpic: - strategy: - matrix: - include: - - pgen: "wald" - metric: "kerr_schild" - cuda: "ON" - - pgen: "wald" - metric: "kerr_schild" - cuda: "OFF" - - pgen: "wald" - metric: "qkerr_schild" - cuda: "ON" - - pgen: "wald" - metric: "qkerr_schild" - cuda: "OFF" - - runs-on: - - self-hosted - steps: - - name: Checkout - uses: actions/checkout@v3.3.0 - - name: Compile - run: | - cmake -B build -D engine=grpic -D pgen=${{ matrix.pgen }} -D metric=${{ matrix.metric }} -D Kokkos_ENABLE_CUDA=${{ matrix.cuda }} -D output=ON - cd build - make -j From 75133f8f4fc8ef7e8cb45b673d3c5c7f1b8f4f81 Mon Sep 17 00:00:00 2001 From: hayk Date: Tue, 25 Jun 2024 01:45:19 -0400 Subject: [PATCH 04/53] containers made better --- dev/Dockerfile.common | 81 ++++++++++++++++++++++++++++ dev/Dockerfile.cuda | 76 +++----------------------- dev/Dockerfile.rocm | 26 +++++++++ dev/welcome.cuda | 8 +++ dev/welcome.rocm | 121 ++++++++++++++++++++++++++++++++++++++++++ docker-compose.yml | 43 +++++++++++---- 6 files changed, 277 insertions(+), 78 deletions(-) create mode 100644 dev/Dockerfile.common create mode 100755 dev/Dockerfile.rocm create mode 100644 dev/welcome.rocm diff --git a/dev/Dockerfile.common b/dev/Dockerfile.common new file mode 100644 index 000000000..88e963b98 --- /dev/null +++ b/dev/Dockerfile.common @@ -0,0 +1,81 @@ +ARG DEBIAN_FRONTEND=noninteractive +ENV DISPLAY=host.docker.internal:0.0 + +# upgrade +RUN apt-get update && apt-get upgrade -y + +# cmake & build tools +RUN apt-get remove -y --purge cmake && \ + apt-get install -y wget build-essential && \ + wget "https://github.com/Kitware/CMake/releases/download/v3.29.6/cmake-3.29.6-linux-x86_64.tar.gz" -P /opt && \ + tar xvf /opt/cmake-3.29.6-linux-x86_64.tar.gz -C /opt && \ + rm /opt/cmake-3.29.6-linux-x86_64.tar.gz +ENV PATH=/opt/cmake-3.29.6-linux-x86_64/bin:$PATH + +# adios2 +RUN apt-get update && apt-get install -y git libhdf5-dev && \ + git clone https://github.com/ornladios/ADIOS2.git /opt/adios2-src && \ + cd /opt/adios2-src && \ + cmake -B build \ + -D CMAKE_CXX_STANDARD=17 \ + -D CMAKE_CXX_EXTENSIONS=OFF \ + -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ + -D BUILD_SHARED_LIBS=ON \ + -D ADIOS2_USE_HDF5=ON \ + -D ADIOS2_USE_Python=OFF \ + -D ADIOS2_USE_Fortran=OFF \ + -D ADIOS2_USE_ZeroMQ=OFF \ + -D BUILD_TESTING=OFF \ + -D ADIOS2_BUILD_EXAMPLES=OFF \ + -D ADIOS2_USE_MPI=OFF \ + -D ADIOS2_HAVE_HDF5_VOL=OFF \ + -D CMAKE_INSTALL_PREFIX=/opt/adios2 && \ + cmake --build build -j && \ + cmake --install build && \ + rm -rf /opt/adios2-src +ENV ADIOS2_DIR=/opt/adios2 +ENV HDF5_ROOT=/usr + +# other packages + python +RUN apt-get update && \ + apt-get install -y bc gpg curl ssh vim emacs bat fzf ffmpeg hdf5-tools software-properties-common && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get update && apt-get install -y python3.12-dev python3.12-venv + +# eza +RUN mkdir -p /etc/apt/keyrings && \ + wget -qO- https://raw.githubusercontent.com/eza-community/eza/main/deb.asc | gpg --dearmor -o /etc/apt/keyrings/gierens.gpg && \ + echo "deb [signed-by=/etc/apt/keyrings/gierens.gpg] http://deb.gierens.de stable main" | tee /etc/apt/sources.list.d/gierens.list && \ + chmod 644 /etc/apt/keyrings/gierens.gpg /etc/apt/sources.list.d/gierens.list && \ + apt-get update && apt-get install -y eza + +# cleanup +RUN apt-get clean && \ + apt-get autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/cache/* && \ + rm -rf /var/lib/log/* && \ + rm -rf /var/lib/apt/lists/* + +# python +RUN ln -s $(which python3.12) /bin/python && \ + python -m venv /opt/venv && \ + /opt/venv/bin/pip install --upgrade pip && \ + /opt/venv/bin/pip install black numpy myplotlib nt2py jupyterlab ipykernel +ENV VIRTUAL_ENV=/opt/venv + +# user environment +ARG HOME=/root +WORKDIR $HOME +RUN echo "alias ls='eza -a --sort=type'" >> $HOME/.bashrc && \ + echo "alias ll='eza -a --long --header --sort=type --time-style=long-iso'" >> $HOME/.bashrc && \ + echo "alias cat='batcat -pp'" >> $HOME/.bashrc + +RUN curl -sS https://starship.rs/install.sh | sh -s -- --yes && \ + echo eval "\$(starship init bash)" >> $HOME/.bashrc && \ + mkdir -p $HOME/.config && \ + starship preset pure-preset -o $HOME/.config/starship.toml && \ + sed -i 's/$python\\//g' $HOME/.config/starship.toml +ENV STARSHIP_CONFIG=$HOME/.config/starship.toml + +ENV PATH=/opt/adios2/bin:/opt/venv/bin:$PATH diff --git a/dev/Dockerfile.cuda b/dev/Dockerfile.cuda index 6c2dce664..dad5b0faf 100755 --- a/dev/Dockerfile.cuda +++ b/dev/Dockerfile.cuda @@ -1,77 +1,15 @@ +# syntax = devthefuture/dockerfile-x + FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 -ENV CUDA_HOME=/usr/local/cuda MAINTAINER "@haykh" -ARG DEBIAN_FRONTEND=noninteractive -ENV DISPLAY=host.docker.internal:0.0 - -# basic packages -RUN apt-get update -RUN apt-get install -y bc zsh git wget curl ssh build-essential cmake vim emacs bat fzf -RUN apt-get install -y ffmpeg libhdf5-dev python3 python3-pip python3-venv - -# eza -RUN apt install -y gpg -RUN mkdir -p /etc/apt/keyrings -RUN wget -qO- https://raw.githubusercontent.com/eza-community/eza/main/deb.asc | gpg --dearmor -o /etc/apt/keyrings/gierens.gpg -RUN echo "deb [signed-by=/etc/apt/keyrings/gierens.gpg] http://deb.gierens.de stable main" | tee /etc/apt/sources.list.d/gierens.list -RUN chmod 644 /etc/apt/keyrings/gierens.gpg /etc/apt/sources.list.d/gierens.list -RUN apt update -RUN apt install -y eza - -# cleanup -RUN apt-get clean && \ - apt-get autoclean && \ - apt-get autoremove -y && \ - rm -rf /var/lib/cache/* && \ - rm -rf /var/lib/log/* - -# adios2 -ENV HDF5_ROOT=/usr -RUN git clone https://github.com/ornladios/ADIOS2.git /opt/adios2-src -RUN cd /opt/adios2-src && \ - cmake -B build \ - -D CMAKE_CXX_STANDARD=17 \ - -D CMAKE_CXX_EXTENSIONS=OFF \ - -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ - -D BUILD_SHARED_LIBS=ON \ - -D ADIOS2_USE_HDF5=ON \ - -D ADIOS2_USE_Python=OFF \ - -D ADIOS2_USE_Fortran=OFF \ - -D ADIOS2_USE_ZeroMQ=OFF \ - -D BUILD_TESTING=OFF \ - -D ADIOS2_BUILD_EXAMPLES=OFF \ - -D ADIOS2_USE_MPI=OFF \ - -D ADIOS2_HAVE_HDF5_VOL=OFF \ - -D CMAKE_INSTALL_PREFIX=/opt/adios2 && \ - cmake --build build && \ - cmake --install build -ENV ADIOS2_DIR=/opt/adios2 - -# python -RUN ln -s /bin/python3 /bin/python -RUN python3 -m venv /opt/venv && \ - /opt/venv/bin/pip install --upgrade pip && \ - /opt/venv/bin/pip install black numpy myplotlib nt2py jupyterlab ipykernel -ENV VIRTUAL_ENV=/opt/venv - -# user environment -ENV USER=ntt -WORKDIR /home -ENV PATH=/opt/adios2/bin:/usr/local/cuda/bin:/opt/venv/bin:$PATH -RUN echo "alias ls='eza -a --sort=type'" >> /root/.bashrc -RUN echo "alias ll='eza -a --long --header --sort=type --time-style=long-iso'" >> /root/.bashrc -RUN echo "alias cat='batcat -pp'" >> /root/.bashrc +ENV CUDA_HOME=/usr/local/cuda +ENV PATH=/usr/local/cuda/bin:$PATH -RUN curl -sS https://starship.rs/install.sh | sh -s -- --yes -RUN echo eval "\$(starship init bash)" >> /root/.bashrc -RUN mkdir -p /home/.config -RUN starship preset pure-preset -o /home/.config/starship.toml -RUN sed -i 's/$python\\//g' /home/.config/starship.toml -ENV STARSHIP_CONFIG=/home/.config/starship.toml +INCLUDE Dockerfile.common # welcome message -COPY welcome.cuda /home/.welcome.cuda -RUN echo '/usr/bin/bash ./.welcome.cuda' > /etc/profile.d/welcome.sh +COPY welcome.cuda /root/.welcome.cuda +RUN echo '/usr/bin/bash /root/.welcome.cuda' | tee -a /etc/profile.d/welcome.sh ENTRYPOINT ["/usr/bin/bash", "-l"] diff --git a/dev/Dockerfile.rocm b/dev/Dockerfile.rocm new file mode 100755 index 000000000..739f2d856 --- /dev/null +++ b/dev/Dockerfile.rocm @@ -0,0 +1,26 @@ +# syntax = devthefuture/dockerfile-x + +FROM rocm/rocm-terminal:latest +MAINTAINER "@haykh" + +USER root +ENV PATH=/opt/rocm/bin:$PATH + +INCLUDE Dockerfile.common + +# additional ROCm packages +RUN git clone -b release/rocm-rel-6.1.1.1 https://github.com/ROCm/rocThrust.git /opt/rocthrust-src && \ + git clone -b release/rocm-rel-6.1.00.36 https://github.com/ROCm/rocPRIM.git /opt/rocprim-src && \ + cd /opt/rocthrust-src && ./install --install && \ + cd /opt/rocprim-src && ./install --install && \ + rm -rf /opt/rocthrust-src /opt/rocprim-src + +ENV CMAKE_PREFIX_PATH=/opt/rocm +ENV CC=hipcc +ENV CXX=hipcc + +# welcome message +COPY welcome.rocm /root/.welcome.rocm +RUN echo '/usr/bin/bash /root/.welcome.rocm' | tee -a /etc/profile.d/welcome.sh + +ENTRYPOINT ["/usr/bin/bash", "-l"] diff --git a/dev/welcome.cuda b/dev/welcome.cuda index 486c811b4..ff5b53203 100644 --- a/dev/welcome.cuda +++ b/dev/welcome.cuda @@ -47,6 +47,13 @@ else cmake_version=${error_msg} fi +hdf5_or_error=$(h5cc -showconfig | grep Version 2>&1) +if [ $? -eq 0 ]; then + hdf5_version=${FgYellow}$(h5cc -showconfig | grep Version | xargs)${Reset} +else + hdf5_version=${error_msg} +fi + adios2_or_error=$(adios2-config -v 2>&1) if [ $? -eq 0 ]; then adios2_version=${FgYellow}$(adios2-config -v | grep ADIOS)${Reset} @@ -76,6 +83,7 @@ ${Reset}${Underscore}Entity CUDA container${Reset}${FgBlue} \\/__/ - ${gcc_version} - ${nvcc_version} - ${cmake_version} + - ${hdf5_version} - ${adios2_version} - ${python_version} diff --git a/dev/welcome.rocm b/dev/welcome.rocm new file mode 100644 index 000000000..35e5e15d9 --- /dev/null +++ b/dev/welcome.rocm @@ -0,0 +1,121 @@ +#!/bin/sh +Reset="\x1b[0m" +Bright="\x1b[1m" +Dim="\x1b[2m" +Underscore="\x1b[4m" +Blink="\x1b[5m" +Reverse="\x1b[7m" +Hidden="\x1b[8m" + +FgBlack="\x1b[30m" +FgRed="\x1b[31m" +FgGreen="\x1b[32m" +FgYellow="\x1b[33m" +FgBlue="\x1b[34m" +FgMagenta="\x1b[35m" +FgCyan="\x1b[36m" +FgWhite="\x1b[37m" + +BgBlack="\x1b[40m" +BgRed="\x1b[41m" +BgGreen="\x1b[42m" +BgYellow="\x1b[43m" +BgBlue="\x1b[44m" +BgMagenta="\x1b[45m" +BgCyan="\x1b[46m" +BgWhite="\x1b[47m" + +error_msg=${FgRed}"NOT FOUND"${Reset} +gcc_or_error=$(gcc --version 2>&1) +if [ $? -eq 0 ]; then + gcc_version=${FgYellow}$(gcc --version | grep gcc)${Reset} +else + gcc_version=${error_msg} +fi + +rocm_or_error=$(hipcc --version | grep HIP 2>&1) +if [ $? -eq 0 ]; then + rocm_version=${FgYellow}$(hipcc --version | grep HIP)${Reset} +else + rocm_version=${error_msg} +fi + +cmake_or_error=$(cmake --version 2>&1) +if [ $? -eq 0 ]; then + cmake_version=${FgYellow}$(cmake --version | grep ver)${Reset} +else + cmake_version=${error_msg} +fi + +hdf5_or_error=$(h5cc -showconfig | grep Version 2>&1) +if [ $? -eq 0 ]; then + hdf5_version=${FgYellow}$(h5cc -showconfig | grep Version | xargs)${Reset} +else + hdf5_version=${error_msg} +fi + +adios2_or_error=$(adios2-config -v 2>&1) +if [ $? -eq 0 ]; then + adios2_version=${FgYellow}$(adios2-config -v | grep ADIOS)${Reset} +else + adios2_version=${error_msg} +fi + +python_or_error=$(python --version 2>&1) +if [ $? -eq 0 ]; then + python_version=${FgYellow}$(python --version)${Reset} +else + python_version=${error_msg} +fi + +str=$(cat <${Reset} + +## Testing the code + +* To build the tests, remove the \`build\` directory, and preconfigure with: + ${FgGreen}$ cmake -B build -D TESTS=ON -D output=ON -D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_***=ON${Reset} + +* Then run the tests with: + ${FgGreen}$ ctest --test-dir build${Reset} + + +${Red}Enjoy!${Reset} 🚀 +EOF +) + + +printf "$str\n" diff --git a/docker-compose.yml b/docker-compose.yml index 7e82bbebf..3de0516a3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,12 +1,37 @@ +x-common-config: &common-config + stdin_open: true + tty: true + ports: + - "8080:8080" + volumes: + - type: bind + source: . + target: /root/entity/ + +x-cuda-base: &cuda-base + image: morninbru/entity:cuda + +x-rocm-base: &rocm-base + image: morninbru/entity:rocm + services: + entity-cuda-compilers: + container_name: entity_cuda_compilers + <<: [*cuda-base, *common-config] entity-cuda: - image: morninbru/entity:cuda container_name: entity_cuda - stdin_open: true - tty: true - ports: - - "8080:8080" - volumes: - - type: bind - source: . - target: /home/entity/ + runtime: nvidia + environment: + - NVIDIA_VISIBLE_DEVICES=all + <<: [*cuda-base, *common-config] + entity-rocm-compilers: + container_name: entity_rocm_compilers + <<: [*common-config, *rocm-base] + entity-rocm: + container_name: entity_rocm + devices: + - "/dev/fdk" + - "/dev/dri" + security_opt: + - seccomp:unconfined + <<: [*common-config, *rocm-base] From 2b3b0b298b2e93d0d9a9deb697d0dee5e61ae336 Mon Sep 17 00:00:00 2001 From: hayk Date: Tue, 25 Jun 2024 11:23:42 -0400 Subject: [PATCH 05/53] branch-policy-added --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 2b5b159b1..047da4b2c 100644 --- a/README.md +++ b/README.md @@ -27,3 +27,7 @@ Our [detailed documentation](https://entity-toolkit.github.io/) includes everyth 🤷 __Arno Vanthieghem__ {[@vanthieg](https://github.com/vanthieg): framework, PIC} 😺 __Muni Zhou__ {[@munizhou](https://github.com/munizhou): PIC} + +## Branch policy + +Master branch contains the latest stable version of the code which has already been released. Development on the core is done on branches starting with `dev/`. User-specific modifications (i.e., new problem generators plus perhaps minor corrections in the core) are done on branches starting with `pgen/`. Before merging to the master branch, all the branches must first be merged to the latest release-candidate branch, which ends with `rc`, via a pull request. After which, when all the release goals are met, the `rc` branch is merged to the master and released as a new stable version. Stale branches will be archived with a tag starting with `archive/` (can still be accessed via the "Tags" tab) and removed. From ec3c868d9084b5173a708f211d17da047f24d298 Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 27 Jun 2024 15:10:47 -0400 Subject: [PATCH 06/53] added special check for 2D in sph/qsph --- README.md | 2 +- src/metrics/qspherical.h | 4 ++++ src/metrics/spherical.h | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 047da4b2c..41a5fe47e 100644 --- a/README.md +++ b/README.md @@ -30,4 +30,4 @@ Our [detailed documentation](https://entity-toolkit.github.io/) includes everyth ## Branch policy -Master branch contains the latest stable version of the code which has already been released. Development on the core is done on branches starting with `dev/`. User-specific modifications (i.e., new problem generators plus perhaps minor corrections in the core) are done on branches starting with `pgen/`. Before merging to the master branch, all the branches must first be merged to the latest release-candidate branch, which ends with `rc`, via a pull request. After which, when all the release goals are met, the `rc` branch is merged to the master and released as a new stable version. Stale branches will be archived with a tag starting with `archive/` (can still be accessed via the "Tags" tab) and removed. +Master branch contains the latest stable version of the code which has already been released. Development on the core is done on branches starting with `dev/`, while fixing bugs is done in branches that start with `bug/`. User-specific modifications (i.e., new problem generators plus perhaps minor corrections in the core) are done on branches starting with `pgen/`. Before merging to the master branch, all the branches must first be merged to the latest release-candidate branch, which ends with `rc`, via a pull request. After which, when all the release goals are met, the `rc` branch is merged to the master and released as a new stable version. Stale branches will be archived with a tag starting with `archive/` (can still be accessed via the "Tags" tab) and removed. diff --git a/src/metrics/qspherical.h b/src/metrics/qspherical.h index 3f370d03a..8062f7589 100644 --- a/src/metrics/qspherical.h +++ b/src/metrics/qspherical.h @@ -317,6 +317,8 @@ namespace metric { return v_in * math::exp(xi[0] * dchi + chi_min) * dchi; } else if constexpr (i == 2) { return v_in * (dtheta_deta(xi[1] * deta + eta_min) * deta); + } else if constexpr (D == Dim::_2D) { + return v_in; } else { return v_in * dphi; } @@ -327,6 +329,8 @@ namespace metric { return v_in * dchi_inv / (math::exp(xi[0] * dchi + chi_min)); } else if constexpr (i == 2) { return v_in * deta_inv / (dtheta_deta(xi[1] * deta + eta_min)); + } else if constexpr (D == Dim::_2D) { + return v_in; } else { return v_in * dphi_inv; } diff --git a/src/metrics/spherical.h b/src/metrics/spherical.h index 1e6f83dbc..f4bbe2eea 100644 --- a/src/metrics/spherical.h +++ b/src/metrics/spherical.h @@ -285,6 +285,8 @@ namespace metric { return v_in * dr; } else if constexpr (i == 2) { return v_in * dtheta; + } else if constexpr (D == Dim::_2D) { + return v_in; } else { return v_in * dphi; } @@ -295,6 +297,8 @@ namespace metric { return v_in * dr_inv; } else if constexpr (i == 2) { return v_in * dtheta_inv; + } else if constexpr (D == Dim::_2D) { + return v_in; } else { return v_in * dphi_inv; } From a98f023247bda5a90b24213b3ac791a07e9fb921 Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 27 Jun 2024 15:42:06 -0400 Subject: [PATCH 07/53] Update CMakeLists.txt putting stdc++fs only where necessary --- src/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c15a90795..38478ad11 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,5 +37,4 @@ add_subdirectory(${SRC_DIR}/../setups ${CMAKE_CURRENT_BINARY_DIR}/setups) set(libs ntt_global ntt_framework ntt_metrics ntt_engines ntt_pgen) add_dependencies(${ENTITY} ${libs}) -target_link_libraries(${ENTITY} PUBLIC ${libs}) -target_link_libraries(${ENTITY} PUBLIC stdc++fs) \ No newline at end of file +target_link_libraries(${ENTITY} ${libs}) From 1aa8c8c5394a5de86c6662c4a6d905ff7a7b9f6d Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 27 Jun 2024 15:43:39 -0400 Subject: [PATCH 08/53] Update CMakeLists.txt needs to be PUBLIC (fixing my own bug) --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 38478ad11..5d7f0abb4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,4 +37,4 @@ add_subdirectory(${SRC_DIR}/../setups ${CMAKE_CURRENT_BINARY_DIR}/setups) set(libs ntt_global ntt_framework ntt_metrics ntt_engines ntt_pgen) add_dependencies(${ENTITY} ${libs}) -target_link_libraries(${ENTITY} ${libs}) +target_link_libraries(${ENTITY} PUBLIC ${libs}) From 4bd5728dd32ae452a70e6186040963fa8e95966c Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 27 Jun 2024 15:59:39 -0400 Subject: [PATCH 09/53] added lib --- src/framework/domain/metadomain.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/framework/domain/metadomain.cpp b/src/framework/domain/metadomain.cpp index 584bfa551..b13475fd6 100644 --- a/src/framework/domain/metadomain.cpp +++ b/src/framework/domain/metadomain.cpp @@ -21,6 +21,7 @@ #include #endif +#include #include #include #include From 96345578c8c7f0462aeb44c8b47d8f641f11289d Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 27 Jun 2024 16:54:48 -0400 Subject: [PATCH 10/53] unit tests as gh actions + self-hosted runner tags --- .github/workflows/actions.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .github/workflows/actions.yml diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml new file mode 100644 index 000000000..49524cf0a --- /dev/null +++ b/.github/workflows/actions.yml @@ -0,0 +1,32 @@ +name: Unit tests + +on: + pull_request: + branches: + - '**rc' + - 'master' + +jobs: + tests: + strategy: + matrix: + gpu: ["amd", "nvidia"] + precision: ["double", "single"] + runs-on: [self-hosted, "${{ matrix.gpu }}-gpu"] + steps: + - name: Checkout + uses: actions/checkout@v3.3.0 + - name: Configure + run: | + if [ "${{ matrix.gpu }}" = "nvidia" ]; then + FLAGS="-D Kokkos_ENABLE_CUDA=ON -D Kokkos_ARCH_AMPERE86=ON" + elif [ "${{ matrix.gpu }}" = "amd" ]; then + FLAGS="-D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_AMD_GFX1100=ON" + fi + cmake -B build -D TESTS=ON -D output=ON -D precision=${{ matrix.precision }} $FLAGS + - name: Compile + run: | + cmake --build build -j $(exec nproc) + - name: Run tests + run: | + ctest --test-dir build --output-on-failure --verbose \ No newline at end of file From 58f9ff6b78a7d7a42ea32c0f68b600b8dcf45e28 Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 27 Jun 2024 17:57:48 -0400 Subject: [PATCH 11/53] dockerfile + readme + actions fastfail --- .github/workflows/actions.yml | 1 + dev/Dockerfile.cuda | 1 - dev/Dockerfile.rocm | 1 - dev/runners/Dockerfile.runner.nvidia | 72 ++++++++++++++++++++++++++++ dev/runners/README.md | 15 ++++++ 5 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 dev/runners/Dockerfile.runner.nvidia create mode 100644 dev/runners/README.md diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 49524cf0a..c9a7a314b 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -9,6 +9,7 @@ on: jobs: tests: strategy: + fail-fast: false matrix: gpu: ["amd", "nvidia"] precision: ["double", "single"] diff --git a/dev/Dockerfile.cuda b/dev/Dockerfile.cuda index dad5b0faf..be0051150 100755 --- a/dev/Dockerfile.cuda +++ b/dev/Dockerfile.cuda @@ -1,7 +1,6 @@ # syntax = devthefuture/dockerfile-x FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 -MAINTAINER "@haykh" ENV CUDA_HOME=/usr/local/cuda ENV PATH=/usr/local/cuda/bin:$PATH diff --git a/dev/Dockerfile.rocm b/dev/Dockerfile.rocm index 739f2d856..8978b6994 100755 --- a/dev/Dockerfile.rocm +++ b/dev/Dockerfile.rocm @@ -1,7 +1,6 @@ # syntax = devthefuture/dockerfile-x FROM rocm/rocm-terminal:latest -MAINTAINER "@haykh" USER root ENV PATH=/opt/rocm/bin:$PATH diff --git a/dev/runners/Dockerfile.runner.nvidia b/dev/runners/Dockerfile.runner.nvidia new file mode 100644 index 000000000..d7925ec6a --- /dev/null +++ b/dev/runners/Dockerfile.runner.nvidia @@ -0,0 +1,72 @@ +FROM nvidia/cuda:12.5.0-devel-ubuntu22.04 + +ARG DEBIAN_FRONTEND=noninteractive +ENV DISPLAY=host.docker.internal:0.0 + +# upgrade +RUN apt-get update && apt-get upgrade -y + +# cmake & build tools +RUN apt-get remove -y --purge cmake && \ + apt-get install -y sudo wget curl build-essential && \ + wget "https://github.com/Kitware/CMake/releases/download/v3.29.6/cmake-3.29.6-linux-x86_64.tar.gz" -P /opt && \ + tar xvf /opt/cmake-3.29.6-linux-x86_64.tar.gz -C /opt && \ + rm /opt/cmake-3.29.6-linux-x86_64.tar.gz +ENV PATH=/opt/cmake-3.29.6-linux-x86_64/bin:$PATH + +# adios2 +RUN apt-get update && apt-get install -y git libhdf5-dev && \ + git clone https://github.com/ornladios/ADIOS2.git /opt/adios2-src && \ + cd /opt/adios2-src && \ + cmake -B build \ + -D CMAKE_CXX_STANDARD=17 \ + -D CMAKE_CXX_EXTENSIONS=OFF \ + -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ + -D BUILD_SHARED_LIBS=ON \ + -D ADIOS2_USE_HDF5=ON \ + -D ADIOS2_USE_Python=OFF \ + -D ADIOS2_USE_Fortran=OFF \ + -D ADIOS2_USE_ZeroMQ=OFF \ + -D BUILD_TESTING=OFF \ + -D ADIOS2_BUILD_EXAMPLES=OFF \ + -D ADIOS2_USE_MPI=OFF \ + -D ADIOS2_HAVE_HDF5_VOL=OFF \ + -D CMAKE_INSTALL_PREFIX=/opt/adios2 && \ + cmake --build build -j && \ + cmake --install build && \ + rm -rf /opt/adios2-src + +ENV CUDA_HOME=/usr/local/cuda +ENV HDF5_ROOT=/usr +ENV ADIOS2_DIR=/opt/adios2 +ENV PATH=/opt/adios2/bin:/usr/local/cuda/bin:$PATH + +# cleanup +RUN apt-get clean && \ + apt-get autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/cache/* && \ + rm -rf /var/lib/log/* && \ + rm -rf /var/lib/apt/lists/* + +ARG USER=runner +RUN useradd -ms /usr/bin/zsh $USER && \ + usermod -aG sudo $USER && \ + echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers + +USER $USER +ARG HOME=/home/$USER +WORKDIR $HOME + +# gh runner +ARG TOKEN +RUN mkdir actions-runner +WORKDIR $HOME/actions-runner + +RUN curl -o actions-runner-linux-x64-2.317.0.tar.gz \ + -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz && \ + tar xzf ./actions-runner-linux-x64-2.317.0.tar.gz && \ + sudo ./bin/installdependencies.sh && \ + ./config.sh --url https://github.com/entity-toolkit/entity --token $TOKEN --labels nvidia-gpu + +ENTRYPOINT ["./run.sh"] diff --git a/dev/runners/README.md b/dev/runners/README.md new file mode 100644 index 000000000..526068bea --- /dev/null +++ b/dev/runners/README.md @@ -0,0 +1,15 @@ +## Self-hosted runners + +GitHub allows to listen to repository changes and run the so-called "actions" (e.g., tests) if a particular event has been triggered (e.g., a pull request has been created). To test Entity on GPUs, we provide Docker runner images, with all the proper compilers already preinstalled, which can fetch the actions directly from GitHub and run them within the container environment. + +To do that, one needs to create an image with the corresponding `Dockerfile`, and then launch a Docker container which will run in the background, listening to commands and running any actions forwarded from the GitHub. + +### NVIDIA GPUs + +```sh +# 1. Create the image +docker build -t ghrunner:nvidia -f Dockerfile.runner.nvidia . +# 2. Run a container from the image with GPU support +# ... (see wiki for instructions on NVIDIA runtime) +docker run --runtime=nvidia --gpus=all -dt ghrunner:nvidia +``` From 6d4880bb8c4ad3956037a451ecaa7ac0964224ba Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 27 Jun 2024 19:12:22 -0400 Subject: [PATCH 12/53] tests for double prec --- src/framework/tests/grid_mesh.cpp | 2 +- src/framework/tests/parameters.cpp | 14 +--------- src/kernels/tests/deposit.cpp | 26 +++++++++--------- src/kernels/tests/particle_moments.cpp | 29 ++++++++++---------- src/kernels/tests/prtl_bc.cpp | 37 +++++++++++++------------- 5 files changed, 48 insertions(+), 60 deletions(-) diff --git a/src/framework/tests/grid_mesh.cpp b/src/framework/tests/grid_mesh.cpp index 17f734a19..4dea275ce 100644 --- a/src/framework/tests/grid_mesh.cpp +++ b/src/framework/tests/grid_mesh.cpp @@ -39,7 +39,7 @@ auto main(int argc, char* argv[]) -> int { HERE); raise::ErrorIf(mesh.extent(d) != ext[(unsigned short)d], "extent != ext", HERE); } - raise::ErrorIf(not cmp::AlmostEqual(mesh.metric.dxMin(), (real_t)0.1154700538), + raise::ErrorIf(not cmp::AlmostEqual(mesh.metric.dxMin(), (real_t)(0.2 / std::sqrt(3.0))), "dxMin wrong", HERE); } catch (const std::exception& e) { diff --git a/src/framework/tests/parameters.cpp b/src/framework/tests/parameters.cpp index 7b353a542..8f02d1750 100644 --- a/src/framework/tests/parameters.cpp +++ b/src/framework/tests/parameters.cpp @@ -275,10 +275,6 @@ auto main(int argc, char* argv[]) -> int { fbc.size(), "grid.boundaries.fields.size()"); - assert_equal(params_mink_1d.get("particles.nspec"), - (std::size_t)2, - "particles.nspec"); - const auto species = params_mink_1d.get>( "particles.species"); assert_equal(species[0].label(), "e-", "species[0].label"); @@ -380,10 +376,6 @@ auto main(int argc, char* argv[]) -> int { true, "particles.use_weights"); - assert_equal(params_sph_2d.get("particles.nspec"), - (std::size_t)3, - "particles.nspec"); - assert_equal(params_sph_2d.get("algorithms.gca.e_ovr_b_max"), (real_t)0.95, "algorithms.gca.e_ovr_b_max"); @@ -460,7 +452,7 @@ auto main(int argc, char* argv[]) -> int { (real_t)(0.99), "grid.metric.ks_a"); assert_equal(params_qks_2d.get("grid.metric.ks_rh"), - (real_t)(1.1410673598), + (real_t)((1.0 + std::sqrt(1 - 0.99 * 0.99))), "grid.metric.ks_rh"); const auto expect = std::map { @@ -532,10 +524,6 @@ auto main(int argc, char* argv[]) -> int { defaults::bc::absorb::coeff, "grid.boundaries.absorb.coeff"); - assert_equal(params_qks_2d.get("particles.nspec"), - (std::size_t)2, - "particles.nspec"); - const auto species = params_qks_2d.get>( "particles.species"); assert_equal(species[0].label(), "e-", "species[0].label"); diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index 26d2e71eb..02292c643 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -85,17 +85,21 @@ void testDeposit(const std::vector& res, auto J_scat = Kokkos::Experimental::create_scatter_view(J); - const real_t xi = 0.53, xf = 0.47; - const real_t yi = 0.34, yf = 0.52; + const int i0 = 4, j0 = 4; + + const prtldx_t dxi = 0.53, dxf = 0.47; + const prtldx_t dyi = 0.34, dyf = 0.52; + const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0 + (real_t)dxf; + const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0 + (real_t)dyf; const real_t xr = 0.5 * (xi + xf); const real_t yr = 0.5 * (yi + yf); - const real_t Wx1 = 0.5 * (xi + xr); - const real_t Wx2 = 0.5 * (xf + xr); + const real_t Wx1 = 0.5 * (xi + xr) - (real_t)i0; + const real_t Wx2 = 0.5 * (xf + xr) - (real_t)i0; - const real_t Wy1 = 0.5 * (yi + yr); - const real_t Wy2 = 0.5 * (yf + yr); + const real_t Wy1 = 0.5 * (yi + yr) - (real_t)j0; + const real_t Wy2 = 0.5 * (yf + yr) - (real_t)j0; const real_t Fx1 = (xr - xi); const real_t Fx2 = (xf - xr); @@ -109,16 +113,14 @@ void testDeposit(const std::vector& res, const real_t Jy1 = Fy1 * (1 - Wx1) + Fy2 * (1 - Wx2); const real_t Jy2 = Fy1 * Wx1 + Fy2 * Wx2; - const int i0 = 4, j0 = 4; - put_value(i1, i0, 0); put_value(i2, j0, 0); put_value(i1_prev, i0, 0); put_value(i2_prev, j0, 0); - put_value(dx1, xf, 0); - put_value(dx2, yf, 0); - put_value(dx1_prev, xi, 0); - put_value(dx2_prev, yi, 0); + put_value(dx1, dxf, 0); + put_value(dx2, dyf, 0); + put_value(dx1_prev, dxi, 0); + put_value(dx2_prev, dyi, 0); put_value(weight, 1.0, 0); put_value(tag, ParticleTag::alive, 0); diff --git a/src/kernels/tests/particle_moments.cpp b/src/kernels/tests/particle_moments.cpp index 29b1c3598..25ed7d4d9 100644 --- a/src/kernels/tests/particle_moments.cpp +++ b/src/kernels/tests/particle_moments.cpp @@ -225,24 +225,22 @@ void testParticleMoments(const std::vector& res, } } } - const real_t gamma_1 = math::sqrt( - ONE + v1[0] * v1[0] + v1[1] * v1[1] + v1[2] * v1[2]); - const real_t gamma_2 = math::sqrt( - ONE + v2[0] * v2[0] + v2[1] * v2[1] + v2[2] * v2[2]); + const real_t gammaSQR_1 = ONE + v1[0] * v1[0] + v1[1] * v1[1] + v1[2] * v1[2]; + const real_t gammaSQR_2 = ONE + v2[0] * v2[0] + v2[1] * v2[1] + v2[2] * v2[2]; - const real_t gamma_1_expect = math::sqrt(1.0 + 1.0 + 4.0 + 9.0); - const real_t gamma_2_expect = math::sqrt(1.0 + 9.0 + 4.0 + 1.0); + const real_t gammaSQR_1_expect = 15.0; + const real_t gammaSQR_2_expect = 15.0; - errorIf(not cmp::AlmostEqual(gamma_1, gamma_1_expect, epsilon * acc), - fmt::format("wrong gamma_1 %.8e %.8e for %dD %s", - gamma_1, - gamma_1_expect, + errorIf(not cmp::AlmostEqual_host(gammaSQR_1, gammaSQR_1_expect, epsilon * acc), + fmt::format("wrong gamma_1 %.12e %.12e for %dD %s", + gammaSQR_1, + gammaSQR_1_expect, metric.Dim, metric.Label)); - errorIf(not cmp::AlmostEqual(gamma_2, gamma_2_expect, epsilon * acc), - fmt::format("wrong gamma_2 %.8e %.8e for %dD %s", - gamma_2, - gamma_2_expect, + errorIf(not cmp::AlmostEqual_host(gammaSQR_2, gammaSQR_2_expect, epsilon * acc), + fmt::format("wrong gamma_2 %.12e %.12e for %dD %s", + gammaSQR_2, + gammaSQR_2_expect, metric.Dim, metric.Label)); } @@ -260,7 +258,8 @@ auto main(int argc, char* argv[]) -> int { 10 }, { { 0.0, 10.0 }, { 0.0, 10.0 } }, - {}); + {}, + 10); testParticleMoments>( { diff --git a/src/kernels/tests/prtl_bc.cpp b/src/kernels/tests/prtl_bc.cpp index a8817c36f..fa1103043 100644 --- a/src/kernels/tests/prtl_bc.cpp +++ b/src/kernels/tests/prtl_bc.cpp @@ -266,7 +266,6 @@ void testPeriodicBC(const std::vector& res, metric.template convert_xyz(xCd_1, xPh_1); metric.template convert_xyz(xCd_2, xPh_2); - // printf("t = %f\n", time); if constexpr (M::Dim == Dim::_1D or M::Dim == Dim::_2D or M::Dim == Dim::_3D) { xi_1 += dt * ux_1 / gamma_1; xi_2 += dt * ux_2 / gamma_2; @@ -282,12 +281,12 @@ void testPeriodicBC(const std::vector& res, if (xi_2 < extent[0].first) { xi_2 += sx; } - // printf(" x_1 = %.6e | %.6e\n", xPh_1[0], xi_1); - // printf(" x_2 = %.6e | %.6e\n", xPh_2[0], xi_2); - errorIf( - not equal(xPh_1[0], xi_1, fmt::format("xPh_1[0] != xi_1 @ t = %f", time))); - errorIf( - not equal(xPh_2[0], xi_2, fmt::format("xPh_2[0] != xi_2 @ t = %f", time))); + errorIf(not equal(xPh_1[0] / sx, + xi_1 / sx, + fmt::format("xPh_1[0] != xi_1 @ t = %f", time))); + errorIf(not equal(xPh_2[0] / sx, + xi_2 / sx, + fmt::format("xPh_2[0] != xi_2 @ t = %f", time))); } if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { yi_1 += dt * uy_1 / gamma_1; @@ -304,12 +303,12 @@ void testPeriodicBC(const std::vector& res, if (yi_2 < extent[1].first) { yi_2 += sy; } - // printf(" y_1 = %.6e | %.6e\n", xPh_1[1], yi_1); - // printf(" y_2 = %.6e | %.6e\n", xPh_2[1], yi_2); - errorIf( - not equal(xPh_1[1], yi_1, fmt::format("xPh_1[1] != yi_1 @ t = %f", time))); - errorIf( - not equal(xPh_2[1], yi_2, fmt::format("xPh_2[1] != yi_2 @ t = %f", time))); + errorIf(not equal(xPh_1[1] / sy, + yi_1 / sy, + fmt::format("xPh_1[1] != yi_1 @ t = %f", time))); + errorIf(not equal(xPh_2[1] / sy, + yi_2 / sy, + fmt::format("xPh_2[1] != yi_2 @ t = %f", time))); } if constexpr (M::Dim == Dim::_3D) { zi_1 += dt * uz_1 / gamma_1; @@ -326,12 +325,12 @@ void testPeriodicBC(const std::vector& res, if (zi_2 < extent[2].first) { zi_2 += sz; } - // printf(" z_1 = %.6e | %.6e\n", xPh_1[2], zi_1); - // printf(" z_2 = %.6e | %.6e\n", xPh_2[2], zi_2); - errorIf( - not equal(xPh_1[2], zi_1, fmt::format("xPh_1[2] != zi_1 @ t = %f", time))); - errorIf( - not equal(xPh_2[2], zi_2, fmt::format("xPh_2[2] != zi_2 @ t = %f", time))); + errorIf(not equal(xPh_1[2] / sz, + zi_1 / sz, + fmt::format("xPh_1[2] != zi_1 @ t = %f", time))); + errorIf(not equal(xPh_2[2] / sz, + zi_2 / sz, + fmt::format("xPh_2[2] != zi_2 @ t = %f", time))); } time += dt; } From 9cf718f0e73f28d37f0f98ab6cf90f8d6128dea7 Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 28 Jun 2024 21:57:55 -0400 Subject: [PATCH 13/53] new pgen for bc test --- setups/tests/srpic_bc/pgen.hpp | 172 ++++++++++++++++++++++++++++ setups/tests/srpic_bc/srpic_bc.toml | 68 +++++++++++ 2 files changed, 240 insertions(+) create mode 100644 setups/tests/srpic_bc/pgen.hpp create mode 100644 setups/tests/srpic_bc/srpic_bc.toml diff --git a/setups/tests/srpic_bc/pgen.hpp b/setups/tests/srpic_bc/pgen.hpp new file mode 100644 index 000000000..3a1ee25f5 --- /dev/null +++ b/setups/tests/srpic_bc/pgen.hpp @@ -0,0 +1,172 @@ +#ifndef PROBLEM_GENERATOR_H +#define PROBLEM_GENERATOR_H + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "arch/traits.h" +#include "utils/numeric.h" + +#include "archetypes/energy_dist.h" +#include "archetypes/particle_injector.h" +#include "archetypes/problem_generator.h" +#include "archetypes/spatial_dist.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" + +namespace user { + using namespace ntt; + + template + struct GaussianBlob : public arch::SpatialDistribution { + GaussianBlob(const M& metric, const coord_t& c, real_t s) + : arch::SpatialDistribution { metric } + , spreadSqr { SQR(s) } { + for (auto d = 0u; d < M::Dim; ++d) { + center[d] = c[d]; + } + } + + Inline auto operator()(const coord_t& x_Ph) const -> real_t override { + // assuming spherical + auto rSqr = SQR(x_Ph[0]) + SQR(center[0]) - + TWO * x_Ph[0] * center[0] * math::cos(x_Ph[1] - center[1]); + return std::exp(-rSqr / (2 * spreadSqr)); + } + + private: + tuple_t center; + const real_t spreadSqr; + }; + + template + struct PGen : public arch::ProblemGenerator { + + // compatibility traits for the problem generator + static constexpr auto engines = traits::compatible_with::value; + static constexpr auto metrics = + traits::compatible_with::value; + static constexpr auto dimensions = traits::compatible_with::value; + + // for easy access to variables in the child class + using arch::ProblemGenerator::D; + using arch::ProblemGenerator::C; + using arch::ProblemGenerator::params; + + const real_t r0, theta0, dr; + + inline PGen(const SimulationParams& p, const Metadomain&) + : arch::ProblemGenerator { p } + , r0 { p.template get("setup.r0") } + , theta0 { p.template get("setup.theta0") } + , dr { p.template get("setup.dr") } {} + + inline void InitPrtls(Domain& domain) { + const auto injector = arch::NonUniformInjector( + arch::ColdDist(domain.mesh.metric), + GaussianBlob(domain.mesh.metric, { r0, theta0 }, dr), + { 1, 2 }); + arch::InjectNonUniform(params, + domain, + injector, + 1.0, + true); + const auto& metric = domain.mesh.metric; + for (auto& species : domain.species) { + auto i1 = species.i1; + auto i2 = species.i2; + auto dx1 = species.dx1; + auto dx2 = species.dx2; + auto phi = species.phi; + auto pld1 = species.pld[0]; + auto pld2 = species.pld[1]; + auto pld3 = species.pld[2]; + Kokkos::parallel_for( + "SavePositions", + species.npart(), + Lambda(index_t p) { + const coord_t x_Code { + static_cast(i1(p)) + static_cast(dx1(p)), + static_cast(i2(p)) + static_cast(dx2(p)), + phi(p) + }; + coord_t x_XYZ { ZERO }; + + metric.template convert_xyz(x_Code, x_XYZ); + pld1(p) = x_XYZ[0]; + pld2(p) = x_XYZ[1]; + pld3(p) = x_XYZ[2]; + }); + } + } + + void CustomPostStep(std::size_t, long double time, Domain& domain) { + const auto& metric = domain.mesh.metric; + auto& random_pool = domain.random_pool; + const auto time_dbl = static_cast(time); + for (auto s = 0u; s < domain.species.size(); ++s) { + auto& species = domain.species[s]; + auto ux1 = species.ux1; + auto ux2 = species.ux2; + auto ux3 = species.ux3; + auto i1 = species.i1; + auto i2 = species.i2; + auto dx1 = species.dx1; + auto dx2 = species.dx2; + auto phi = species.phi; + auto pld1 = species.pld[0]; + auto pld2 = species.pld[1]; + auto pld3 = species.pld[2]; + Kokkos::parallel_for( + "SavePositions", + species.npart(), + ClassLambda(index_t p) { + const coord_t x_Code { + static_cast(i1(p)) + static_cast(dx1(p)), + static_cast(i2(p)) + static_cast(dx2(p)), + phi(p) + }; + coord_t x_XYZ { ZERO }; + + metric.template convert_xyz(x_Code, x_XYZ); + auto rand_gen = random_pool.get_state(); + if (s == 0) { + const auto dist = math::sqrt(SQR(x_XYZ[0] - pld1(p)) + + SQR(x_XYZ[1] - pld2(p)) + + SQR(x_XYZ[2] - pld3(p))); + if (time_dbl < 20.0) { + ux1(p) = 0.0; + ux2(p) = 0.0; + ux3(p) = -0.9; + } else if (time_dbl < 25.0) { + ux1(p) = 0.9; + ux2(p) = 0.0; + ux3(p) = 0.0; + } else if (time_dbl < 45.0) { + ux1(p) = 0.0; + ux2(p) = 0.0; + ux3(p) = 0.9; + } else if (time_dbl < 50.0) { + ux1(p) = 0.9; + ux2(p) = 0.0; + ux3(p) = 0.0; + } else { + ux1(p) = 5.0 * (Random(rand_gen) - HALF); + ux2(p) = 5.0 * (Random(rand_gen) - HALF); + ux3(p) = 5.0 * (Random(rand_gen) - HALF); + } + } + // add a slight spread + ux1(p) += 0.002 * (Random(rand_gen) - HALF); + ux2(p) += 0.002 * (Random(rand_gen) - HALF); + ux3(p) += 0.002 * (Random(rand_gen) - HALF); + random_pool.free_state(rand_gen); + }); + } + } + }; + +} // namespace user + +#endif \ No newline at end of file diff --git a/setups/tests/srpic_bc/srpic_bc.toml b/setups/tests/srpic_bc/srpic_bc.toml new file mode 100644 index 000000000..7b4aa3b84 --- /dev/null +++ b/setups/tests/srpic_bc/srpic_bc.toml @@ -0,0 +1,68 @@ +[simulation] + name = "srpic_bc" + engine = "srpic" + runtime = 100.0 + +[grid] + resolution = [512, 512] + extent = [[1.0, 30.0]] + + [grid.metric] + metric = "qspherical" + + [grid.boundaries] + fields = [["CONDUCTOR", "CONDUCTOR"]] + particles = [["REFLECT", "REFLECT"]] + +[scales] + larmor0 = 1e-4 + skindepth0 = 0.01 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 64.0 + use_weights = true + sort_interval = 100 + + [[particles.species]] + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e4 + pusher = "Boris" + n_payloads = 3 + + [[particles.species]] + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e4 + pusher = "Boris" + n_payloads = 3 + +[setup] + r0 = 10.0 + theta0 = 0.3 + dr = 0.2 + +[output] + format = "hdf5" + + [output.fields] + interval_time = 1.0 + quantities = ["N_1", "N_2", "E", "B", "T00"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + +[diagnostics] + interval = 5 + colored_stdout = true From 0d4db39c1d8eca744931d69de9d721fb6dc5121a Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 1 Jul 2024 05:27:48 +0000 Subject: [PATCH 14/53] linked stdc++fs --- src/global/CMakeLists.txt | 3 ++- src/output/tests/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/global/CMakeLists.txt b/src/global/CMakeLists.txt index 1038bc4b7..d00689283 100644 --- a/src/global/CMakeLists.txt +++ b/src/global/CMakeLists.txt @@ -22,4 +22,5 @@ add_library(ntt_global ${SOURCES}) target_include_directories(ntt_global PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} INTERFACE ${CMAKE_CURRENT_SOURCE_DIR} -) \ No newline at end of file +) +target_link_libraries(ntt_global PRIVATE stdc++fs) \ No newline at end of file diff --git a/src/output/tests/CMakeLists.txt b/src/output/tests/CMakeLists.txt index 926a3a89e..d33cc6c54 100644 --- a/src/output/tests/CMakeLists.txt +++ b/src/output/tests/CMakeLists.txt @@ -17,7 +17,7 @@ function(gen_test title) set (libs ntt_output ntt_global ntt_metrics ntt_framework) add_dependencies(${exec} ${libs}) - target_link_libraries(${exec} PRIVATE ${libs}) + target_link_libraries(${exec} PRIVATE ${libs} stdc++fs) add_test(NAME "OUTPUT::${title}" COMMAND "${exec}") endfunction() From 74ee866e8a72dd75c672da114de56a6edc3aa70f Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 5 Jul 2024 04:35:56 -0400 Subject: [PATCH 15/53] minor bug in srpic::currentampere range --- src/engines/srpic.hpp | 25 ++++++++++++++++++------- src/kernels/ampere_sr.hpp | 8 ++++---- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index ea01f591b..e5fd78929 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -538,11 +538,22 @@ namespace ntt { coeff / V0, ONE / n0)); } else { - range_t range {}; + auto range = domain.mesh.rangeActiveCells(); if constexpr (M::Dim == Dim::_2D) { - range = CreateRangePolicy( - { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, - { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); + if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { + range = CreateRangePolicy( + { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, + { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); + } + } else if constexpr (M::Dim == Dim::_3D) { + if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { + range = CreateRangePolicy({ domain.mesh.i_min(in::x1), + domain.mesh.i_min(in::x2), + domain.mesh.i_min(in::x3) }, + { domain.mesh.i_max(in::x1), + domain.mesh.i_max(in::x2) + 1, + domain.mesh.i_max(in::x3) }); + } } const auto ni2 = domain.mesh.n_active(in::x2); Kokkos::parallel_for( @@ -572,13 +583,13 @@ namespace ntt { * . . . . . */ if constexpr (M::Dim == Dim::_2D) { - if (domain.mesh.flds_bc_in({ +1, 0 }) == FldsBC::AXIS) { + if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { range = CreateRangePolicy( { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); } } else if constexpr (M::Dim == Dim::_3D) { - if (domain.mesh.flds_bc_in({ +1, 0, 0 }) == FldsBC::AXIS) { + if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { range = CreateRangePolicy({ domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2), domain.mesh.i_min(in::x3) }, @@ -600,7 +611,7 @@ namespace ntt { if constexpr (M::Dim == Dim::_3D) { size[2] = domain.mesh.n_active(in::x3); } - // !TODO: this needs to be done more efficiently + // @TODO: this needs to be done more efficiently for (unsigned short i = 0; i < nfilter; ++i) { Kokkos::deep_copy(domain.fields.buff, domain.fields.cur); Kokkos::parallel_for("CurrentsFilter", diff --git a/src/kernels/ampere_sr.hpp b/src/kernels/ampere_sr.hpp index 6dde42ff0..e4faec6ce 100644 --- a/src/kernels/ampere_sr.hpp +++ b/src/kernels/ampere_sr.hpp @@ -122,7 +122,8 @@ namespace kernel::sr { */ template class CurrentsAmpere_kernel { - static constexpr auto D = M::Dim; + static constexpr auto D = M::Dim; + static constexpr std::size_t i2min = N_GHOSTS; ndfield_t E; ndfield_t J; @@ -161,9 +162,8 @@ namespace kernel::sr { Inline void operator()(index_t i1, index_t i2) const { if constexpr (D == Dim::_2D) { - constexpr std::size_t i2min { N_GHOSTS }; - const real_t i1_ { COORD(i1) }; - const real_t i2_ { COORD(i2) }; + const real_t i1_ { COORD(i1) }; + const real_t i2_ { COORD(i2) }; // convert the "curly" current, to contravariant, normalized to // `J0=n0*q0` then add "curly" current with the right coefficient From 657505386dcb4f64680d7e584c73e4a72a30cf71 Mon Sep 17 00:00:00 2001 From: hayk Date: Sat, 6 Jul 2024 21:17:22 -0400 Subject: [PATCH 16/53] pgen-specific field output --- input.example.toml | 4 ++++ src/engines/engine_run.cpp | 15 +++++++++++- src/framework/domain/metadomain.h | 8 ++++++- src/framework/domain/output.cpp | 40 +++++++++++++++++++++++-------- src/framework/parameters.cpp | 8 ++++++- src/global/arch/traits.h | 4 ++++ src/global/enums.h | 15 ++++++------ src/global/tests/enums.cpp | 5 ++-- src/output/fields.cpp | 10 +++++--- src/output/fields.h | 5 ++++ 10 files changed, 89 insertions(+), 25 deletions(-) diff --git a/input.example.toml b/input.example.toml index 2423ffff0..88589495c 100644 --- a/input.example.toml +++ b/input.example.toml @@ -336,6 +336,10 @@ # @note: For T, in cartesian can also use "x" "y" "z" instead of "1" "2" "3" # @note: By default, we accumulate moments from all massive species, one can specify only specific species: e.g., Ttt_1_2, Rho_1, Rho_3_4 quantities = "" + # Custom (user-defined) field quantities: + # @type: array of strings + # @default: [] + custom = "" # @NOT_IMPLEMENTED: Stride for the output of fields: # @type: unsigned short: > 1 # @default: 1 diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index f69b4301e..771287c0d 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -61,7 +61,20 @@ namespace ntt { auto print_output = false; #if defined(OUTPUT_ENABLED) timers.start("Output"); - print_output = m_metadomain.Write(m_params, step, time); + if constexpr ( + traits::has_method::value) { + auto lambda_custom_field_output = [&m_pgen](const std::string& name, + ndfield_t& buff, + std::size_t idx) { + m_pgen.CustomFieldOutput(name, buff, idx); + }; + print_output = m_metadomain.Write(m_params, + step, + time, + lambda_custom_field_output); + } else { + print_output = m_metadomain.Write(m_params, step, time); + } timers.stop("Output"); #endif diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 74e5da2ce..a44fb3c09 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -33,6 +33,7 @@ #include "output/writer.h" #endif +#include #include #include #include @@ -111,7 +112,12 @@ namespace ntt { #if defined(OUTPUT_ENABLED) void InitWriter(const SimulationParams&); - auto Write(const SimulationParams&, std::size_t, long double) -> bool; + auto Write( + const SimulationParams&, + std::size_t, + long double, + std::function&, std::size_t)> = {}) + -> bool; #endif Metadomain(const Metadomain&) = delete; diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 77c41ee25..243ceb614 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -25,6 +25,8 @@ #include #include +#include +#include #include namespace ntt { @@ -61,9 +63,17 @@ namespace ntt { M::CoordType); const auto fields_to_write = params.template get>( "output.fields.quantities"); + const auto custom_fields_to_write = params.template get>( + "output.fields.custom"); + const std::vector all_fields_to_write; + std::merge(fields_to_write.begin(), + fields_to_write.end(), + custom_fields_to_write.begin(), + custom_fields_to_write.end(), + std::back_inserter(all_fields_to_write)); const auto species_to_write = params.template get>( "output.particles.species"); - g_writer.defineFieldOutputs(S, fields_to_write); + g_writer.defineFieldOutputs(S, all_fields_to_write); g_writer.defineParticleOutputs(M::PrtlDim, species_to_write); // spectra write all particle species std::vector spectra_species {}; @@ -153,9 +163,12 @@ namespace ntt { } template - auto Metadomain::Write(const SimulationParams& params, - std::size_t step, - long double time) -> bool { + auto Metadomain::Write( + const SimulationParams& params, + std::size_t step, + long double time, + std::function&, std::size_t)> CustomOutput) + -> bool { raise::ErrorIf( local_subdomain_indices().size() != 1, "Output for now is only supported for one subdomain per rank", @@ -278,14 +291,21 @@ namespace ntt { } else { raise::Error("Wrong moment requested for output", HERE); } - SynchronizeFields(*local_domain, - Comm::Bckp, - { addresses.back(), addresses.back() + 1 }); + } else if (fld.is_custom()) { + if (CustomOutput) { + CustomOutput(fld.name(), local_domain->fields.bckp, addresses.back()); + } else { + raise::Error("Custom output requested but no function provided", + HERE); + } } else { - raise::Error( - "Wrong # of components requested for non-moment output", - HERE); + raise::Error("Wrong # of components requested for " + "non-moment/non-custom output", + HERE); } + SynchronizeFields(*local_domain, + Comm::Bckp, + { addresses.back(), addresses.back() + 1 }); } else if (fld.comp.size() == 3) { // vector for (auto i = 0; i < 3; ++i) { names.push_back(fld.name(i)); diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 19f9a4683..6e581a6b7 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -383,15 +383,21 @@ namespace ntt { promiseToDefine("output.spectra.interval_time"); promiseToDefine("output.spectra.enable"); - const auto flds_out = toml::find_or(raw_data, + const auto flds_out = toml::find_or(raw_data, "output", "fields", "quantities", std::vector {}); + const auto custom_flds_out = toml::find_or(raw_data, + "output", + "fields", + "custom", + std::vector {}); if (flds_out.size() == 0) { raise::Warning("No fields output specified", HERE); } set("output.fields.quantities", flds_out); + set("output.fields.custom", custom_flds_out); set("output.fields.mom_smooth", toml::find_or(raw_data, "output", diff --git a/src/global/arch/traits.h b/src/global/arch/traits.h index ea93a8571..e915bdf1a 100644 --- a/src/global/arch/traits.h +++ b/src/global/arch/traits.h @@ -13,6 +13,7 @@ * - traits::pgen::field_driver_t * - traits::pgen::init_prtls_t * - traits::pgen::custom_fields_t + * - traits::pgen::custom_field_output_t * - traits::pgen::custom_poststep_t * - traits::check_compatibility<> * - traits::compatibility<> @@ -100,6 +101,9 @@ namespace traits { template using custom_poststep_t = decltype(&T::CustomPostStep); + + template + using custom_field_output_t = decltype(&T::CustomFieldOutput); } // namespace pgen // for pgen extforce diff --git a/src/global/enums.h b/src/global/enums.h index 2dc492dac..57822dec4 100644 --- a/src/global/enums.h +++ b/src/global/enums.h @@ -13,7 +13,7 @@ * - enum ntt::PrtlPusher // boris, vay, photon, none * - enum ntt::Cooling // synchrotron, none * - enum ntt::FldsID // e, dive, d, divd, b, h, j, - * a, t, rho, charge, n, nppc + * a, t, rho, charge, n, nppc, custom * @namespaces: * - ntt:: * @note Enums of the same type can be compared with each other and with strings @@ -288,16 +288,17 @@ namespace ntt { Charge = 11, N = 12, Nppc = 13, + Custom = 14, }; constexpr FldsID(uint8_t c) : enums_hidden::BaseEnum { c } {} - static constexpr type variants[] = { E, divE, D, divD, B, H, J, - A, T, Rho, Charge, N, Nppc }; - static constexpr const char* lookup[] = { "e", "dive", "d", "divd", - "b", "h", "j", "a", - "t", "rho", "charge", "n", - "nppc" }; + static constexpr type variants[] = { E, divE, D, divD, B, H, J, + A, T, Rho, Charge, N, Nppc, Custom }; + static constexpr const char* lookup[] = { "e", "dive", "d", "divd", + "b", "h", "j", "a", + "t", "rho", "charge", "n", + "nppc", "custom" }; static constexpr std::size_t total = sizeof(variants) / sizeof(variants[0]); }; diff --git a/src/global/tests/enums.cpp b/src/global/tests/enums.cpp index 81e63d57a..1fc57398f 100644 --- a/src/global/tests/enums.cpp +++ b/src/global/tests/enums.cpp @@ -66,8 +66,9 @@ auto main() -> int { enum_str_t all_particle_pushers = { "boris", "vay", "photon", "none" }; enum_str_t all_coolings = { "synchrotron", "none" }; - enum_str_t all_out_flds = { "e", "dive", "d", "divd", "b", "h", "j", - "a", "t", "rho", "charge", "n", "nppc" }; + enum_str_t all_out_flds = { "e", "dive", "d", "divd", "b", + "h", "j", "a", "t", "rho", + "charge", "n", "nppc", "custom" }; checkEnum(all_coords); checkEnum(all_metrics); diff --git a/src/output/fields.cpp b/src/output/fields.cpp index e6a1f4e12..5f9b1bd47 100644 --- a/src/output/fields.cpp +++ b/src/output/fields.cpp @@ -24,7 +24,11 @@ namespace out { const auto pos = name.find("_"); auto name_raw = (pos == std::string::npos) ? name : name.substr(0, pos); name_raw = name_raw.substr(0, name_raw.find_first_of("0123ijxyzt")); - m_id = FldsID::pick(fmt::toLower(name_raw).c_str()); + if (FldsID::contains(fmt::toLower(name_raw).c_str())) { + m_id = FldsID::pick(fmt::toLower(name_raw).c_str()); + } else { + m_id = FldsID::Custom; + } // determine the species and components to output if (is_moment()) { species = InterpretSpecies(name); @@ -41,11 +45,11 @@ namespace out { // energy-momentum tensor comp = InterpretComponents({ name.substr(1, 1), name.substr(2, 1) }); } else { - // scalar (Rho, divE, etc.) + // scalar (Rho, divE, Custom, etc.) comp = {}; } // data preparation flags - if (not is_moment()) { + if (not is_moment() && not is_custom()) { if (S == SimEngine::SRPIC) { prepare_flag = PrepareOutput::ConvertToHat; } else { diff --git a/src/output/fields.h b/src/output/fields.h index d1399e981..dcc2deb0a 100644 --- a/src/output/fields.h +++ b/src/output/fields.h @@ -77,6 +77,11 @@ namespace out { return (id() == FldsID::A); } + [[nodiscard]] + auto is_custom() const -> bool { + return (id() == FldsID::Custom); + } + [[nodiscard]] inline auto name() const -> std::string { // generate the name From 1d106cca3be2c07cb6676109c50e0ac97bbc0382 Mon Sep 17 00:00:00 2001 From: hayk Date: Sat, 6 Jul 2024 21:33:37 -0400 Subject: [PATCH 17/53] (minor) rm const --- src/framework/domain/output.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 243ceb614..290580a85 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -65,7 +65,7 @@ namespace ntt { "output.fields.quantities"); const auto custom_fields_to_write = params.template get>( "output.fields.custom"); - const std::vector all_fields_to_write; + std::vector all_fields_to_write; std::merge(fields_to_write.begin(), fields_to_write.end(), custom_fields_to_write.begin(), From eb88241750e274c09b732b257787f8e127def949 Mon Sep 17 00:00:00 2001 From: hayk Date: Sat, 6 Jul 2024 21:38:58 -0400 Subject: [PATCH 18/53] (minor) naming --- src/engines/engine_run.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index 771287c0d..0809b0968 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -62,7 +62,7 @@ namespace ntt { #if defined(OUTPUT_ENABLED) timers.start("Output"); if constexpr ( - traits::has_method::value) { + traits::has_method::value) { auto lambda_custom_field_output = [&m_pgen](const std::string& name, ndfield_t& buff, std::size_t idx) { From 9cddc371238a7461a44473fa5952319ef8bb91aa Mon Sep 17 00:00:00 2001 From: hayk Date: Sat, 6 Jul 2024 21:41:16 -0400 Subject: [PATCH 19/53] (minor) address in lambda --- src/engines/engine_run.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index 0809b0968..1734dddc5 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -63,9 +63,9 @@ namespace ntt { timers.start("Output"); if constexpr ( traits::has_method::value) { - auto lambda_custom_field_output = [&m_pgen](const std::string& name, - ndfield_t& buff, - std::size_t idx) { + auto lambda_custom_field_output = [&](const std::string& name, + ndfield_t& buff, + std::size_t idx) { m_pgen.CustomFieldOutput(name, buff, idx); }; print_output = m_metadomain.Write(m_params, From e9c3baa51f4eda6b6c7ac8fc2152327057090d19 Mon Sep 17 00:00:00 2001 From: hayk Date: Sat, 6 Jul 2024 23:02:02 -0400 Subject: [PATCH 20/53] added range as arg --- src/engines/engine_run.cpp | 9 +++++---- src/framework/domain/metadomain.h | 14 ++++++++------ src/framework/domain/output.cpp | 12 ++++++++---- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index 1734dddc5..1694db15c 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -63,10 +63,11 @@ namespace ntt { timers.start("Output"); if constexpr ( traits::has_method::value) { - auto lambda_custom_field_output = [&](const std::string& name, - ndfield_t& buff, - std::size_t idx) { - m_pgen.CustomFieldOutput(name, buff, idx); + auto lambda_custom_field_output = [&](const std::string& name, + ndfield_t& buff, + std::size_t idx, + const range_t& range) { + m_pgen.CustomFieldOutput(name, buff, idx, range); }; print_output = m_metadomain.Write(m_params, step, diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index a44fb3c09..375c476c0 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -18,6 +18,7 @@ #include "enums.h" #include "global.h" +#include "arch/kokkos_aliases.h" #include "utils/timer.h" #include "framework/containers/species.h" @@ -112,12 +113,13 @@ namespace ntt { #if defined(OUTPUT_ENABLED) void InitWriter(const SimulationParams&); - auto Write( - const SimulationParams&, - std::size_t, - long double, - std::function&, std::size_t)> = {}) - -> bool; + auto Write(const SimulationParams&, + std::size_t, + long double, + std::function&, + std::size_t, + const range_t&)> = {}) -> bool; #endif Metadomain(const Metadomain&) = delete; diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 290580a85..2f6d75cb2 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -167,8 +167,9 @@ namespace ntt { const SimulationParams& params, std::size_t step, long double time, - std::function&, std::size_t)> CustomOutput) - -> bool { + std::function< + void(const std::string&, ndfield_t&, std::size_t, const range_t&)> + CustomFieldOutput) -> bool { raise::ErrorIf( local_subdomain_indices().size() != 1, "Output for now is only supported for one subdomain per rank", @@ -292,8 +293,11 @@ namespace ntt { raise::Error("Wrong moment requested for output", HERE); } } else if (fld.is_custom()) { - if (CustomOutput) { - CustomOutput(fld.name(), local_domain->fields.bckp, addresses.back()); + if (CustomFieldOutput) { + CustomFieldOutput(fld.name(), + local_domain->fields.bckp, + addresses.back(), + local_domain->mesh.rangeActiveCells()); } else { raise::Error("Custom output requested but no function provided", HERE); From 25b40096939e424e30994a49344f83421fe92b7a Mon Sep 17 00:00:00 2001 From: hayk Date: Sun, 7 Jul 2024 04:18:56 -0400 Subject: [PATCH 21/53] example added + simple bugfixes --- setups/tests/customout/customout.toml | 50 ++++++++++++++++++ setups/tests/customout/pgen.hpp | 73 +++++++++++++++++++++++++++ src/framework/domain/output.cpp | 2 +- src/output/fields.cpp | 3 +- src/output/fields.h | 35 +++++++------ 5 files changed, 146 insertions(+), 17 deletions(-) create mode 100644 setups/tests/customout/customout.toml create mode 100644 setups/tests/customout/pgen.hpp diff --git a/setups/tests/customout/customout.toml b/setups/tests/customout/customout.toml new file mode 100644 index 000000000..e9c7671d6 --- /dev/null +++ b/setups/tests/customout/customout.toml @@ -0,0 +1,50 @@ +[simulation] + name = "customout" + engine = "srpic" + runtime = 10.0 + +[grid] + resolution = [256, 256] + extent = [[-1.0, 1.0], [-1.0, 1.0]] + + [grid.metric] + metric = "minkowski" + + [grid.boundaries] + fields = [["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"]] + +[scales] + larmor0 = 0.01 + skindepth0 = 0.01 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 20.0 + + [[particles.species]] + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 + pusher = "Boris" + + [[particles.species]] + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 + pusher = "Boris" + +[output] + format = "hdf5" + interval_time = 0.02 + + [output.fields] + quantities = ["E", "B", "J"] + custom = ["mybuff"] diff --git a/setups/tests/customout/pgen.hpp b/setups/tests/customout/pgen.hpp new file mode 100644 index 000000000..5d1ff2a6a --- /dev/null +++ b/setups/tests/customout/pgen.hpp @@ -0,0 +1,73 @@ +#ifndef PROBLEM_GENERATOR_H +#define PROBLEM_GENERATOR_H + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "arch/traits.h" + +#include "archetypes/problem_generator.h" +#include "framework/domain/metadomain.h" + +namespace user { + using namespace ntt; + + template + struct PGen : public arch::ProblemGenerator { + // compatibility traits for the problem generator + static constexpr auto engines { traits::compatible_with::value }; + static constexpr auto metrics { traits::compatible_with::value }; + static constexpr auto dimensions { traits::compatible_with::value }; + + // for easy access to variables in the child class + using arch::ProblemGenerator::D; + using arch::ProblemGenerator::C; + using arch::ProblemGenerator::params; + + array_t cbuff; + + inline PGen(const SimulationParams& p, const Metadomain&) + : arch::ProblemGenerator(p) {} + + inline PGen() {} + + void CustomPostStep(std::size_t step, long double, Domain& domain) { + if (step == 0) { + // allocate the array at time = 0 + cbuff = array_t("cbuff", + domain.mesh.n_all(in::x1), + domain.mesh.n_all(in::x2)); + } + // fill with zeros + Kokkos::deep_copy(cbuff, ZERO); + // populate the array atomically (here it's not strictly necessary) + auto cbuff_sc = Kokkos::Experimental::create_scatter_view(cbuff); + Kokkos::parallel_for( + "FillCbuff", + domain.mesh.rangeActiveCells(), + Lambda(index_t i1, index_t i2) { + auto cbuff_acc = cbuff_sc.access(); + cbuff_acc(i1, i2) += static_cast(i1 + i2); + }); + Kokkos::Experimental::contribute(cbuff, cbuff_sc); + } + + void CustomFieldOutput(const std::string& name, + ndfield_t buffer, + std::size_t index, + const range_t) { + printf("CustomFieldOutput: %s\n", name.c_str()); + if (name == "mybuff") { + // copy the custom buffer to the buffer output + Kokkos::deep_copy(Kokkos::subview(buffer, Kokkos::ALL, Kokkos::ALL, index), + cbuff); + } else { + raise::Error("Custom output not provided", HERE); + } + } + }; + +} // namespace user + +#endif diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 2f6d75cb2..272cf1134 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -294,7 +294,7 @@ namespace ntt { } } else if (fld.is_custom()) { if (CustomFieldOutput) { - CustomFieldOutput(fld.name(), + CustomFieldOutput(fld.name().substr(1), local_domain->fields.bckp, addresses.back(), local_domain->mesh.rangeActiveCells()); diff --git a/src/output/fields.cpp b/src/output/fields.cpp index 5f9b1bd47..aa5a752d4 100644 --- a/src/output/fields.cpp +++ b/src/output/fields.cpp @@ -62,7 +62,8 @@ namespace out { interp_flag = PrepareOutput::InterpToCellCenterFromEdges; } else if (is_field() || is_gr_aux_field()) { interp_flag = PrepareOutput::InterpToCellCenterFromFaces; - } else if (not(is_moment() || is_vpotential() || is_divergence())) { + } else if ( + not(is_moment() || is_vpotential() || is_divergence() || is_custom())) { raise::Error("Unrecognized field type for output", HERE); } } diff --git a/src/output/fields.h b/src/output/fields.h index dcc2deb0a..a520a246d 100644 --- a/src/output/fields.h +++ b/src/output/fields.h @@ -85,24 +85,29 @@ namespace out { [[nodiscard]] inline auto name() const -> std::string { // generate the name - auto tmp = std::string(id().to_string()); - if (id() == FldsID::T) { - tmp += m_name.substr(1, 2); - } else if (id() == FldsID::A) { - tmp += "3"; - } else if (is_field()) { - tmp += "i"; - } - if (species.size() > 0) { - tmp += "_"; - for (auto& s : species) { - tmp += std::to_string(s); + std::string tmp; + if (id() == FldsID::Custom) { + tmp = m_name; + } else { + tmp = std::string(id().to_string()); + if (id() == FldsID::T) { + tmp += m_name.substr(1, 2); + } else if (id() == FldsID::A) { + tmp += "3"; + } else if (is_field()) { + tmp += "i"; + } + if (species.size() > 0) { tmp += "_"; + for (auto& s : species) { + tmp += std::to_string(s); + tmp += "_"; + } + tmp.pop_back(); } - tmp.pop_back(); + // capitalize the first letter + tmp[0] = std::toupper(tmp[0]); } - // capitalize the first letter - tmp[0] = std::toupper(tmp[0]); return "f" + tmp; } From 959cf5e5c6867852c5bd19e8973806b98e81d791 Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 28 Jun 2024 21:57:55 -0400 Subject: [PATCH 22/53] new pgen for bc test --- setups/tests/srpic_bc/pgen.hpp | 172 ++++++++++++++++++++++++++++ setups/tests/srpic_bc/srpic_bc.toml | 68 +++++++++++ 2 files changed, 240 insertions(+) create mode 100644 setups/tests/srpic_bc/pgen.hpp create mode 100644 setups/tests/srpic_bc/srpic_bc.toml diff --git a/setups/tests/srpic_bc/pgen.hpp b/setups/tests/srpic_bc/pgen.hpp new file mode 100644 index 000000000..3a1ee25f5 --- /dev/null +++ b/setups/tests/srpic_bc/pgen.hpp @@ -0,0 +1,172 @@ +#ifndef PROBLEM_GENERATOR_H +#define PROBLEM_GENERATOR_H + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "arch/traits.h" +#include "utils/numeric.h" + +#include "archetypes/energy_dist.h" +#include "archetypes/particle_injector.h" +#include "archetypes/problem_generator.h" +#include "archetypes/spatial_dist.h" +#include "framework/domain/domain.h" +#include "framework/domain/metadomain.h" + +namespace user { + using namespace ntt; + + template + struct GaussianBlob : public arch::SpatialDistribution { + GaussianBlob(const M& metric, const coord_t& c, real_t s) + : arch::SpatialDistribution { metric } + , spreadSqr { SQR(s) } { + for (auto d = 0u; d < M::Dim; ++d) { + center[d] = c[d]; + } + } + + Inline auto operator()(const coord_t& x_Ph) const -> real_t override { + // assuming spherical + auto rSqr = SQR(x_Ph[0]) + SQR(center[0]) - + TWO * x_Ph[0] * center[0] * math::cos(x_Ph[1] - center[1]); + return std::exp(-rSqr / (2 * spreadSqr)); + } + + private: + tuple_t center; + const real_t spreadSqr; + }; + + template + struct PGen : public arch::ProblemGenerator { + + // compatibility traits for the problem generator + static constexpr auto engines = traits::compatible_with::value; + static constexpr auto metrics = + traits::compatible_with::value; + static constexpr auto dimensions = traits::compatible_with::value; + + // for easy access to variables in the child class + using arch::ProblemGenerator::D; + using arch::ProblemGenerator::C; + using arch::ProblemGenerator::params; + + const real_t r0, theta0, dr; + + inline PGen(const SimulationParams& p, const Metadomain&) + : arch::ProblemGenerator { p } + , r0 { p.template get("setup.r0") } + , theta0 { p.template get("setup.theta0") } + , dr { p.template get("setup.dr") } {} + + inline void InitPrtls(Domain& domain) { + const auto injector = arch::NonUniformInjector( + arch::ColdDist(domain.mesh.metric), + GaussianBlob(domain.mesh.metric, { r0, theta0 }, dr), + { 1, 2 }); + arch::InjectNonUniform(params, + domain, + injector, + 1.0, + true); + const auto& metric = domain.mesh.metric; + for (auto& species : domain.species) { + auto i1 = species.i1; + auto i2 = species.i2; + auto dx1 = species.dx1; + auto dx2 = species.dx2; + auto phi = species.phi; + auto pld1 = species.pld[0]; + auto pld2 = species.pld[1]; + auto pld3 = species.pld[2]; + Kokkos::parallel_for( + "SavePositions", + species.npart(), + Lambda(index_t p) { + const coord_t x_Code { + static_cast(i1(p)) + static_cast(dx1(p)), + static_cast(i2(p)) + static_cast(dx2(p)), + phi(p) + }; + coord_t x_XYZ { ZERO }; + + metric.template convert_xyz(x_Code, x_XYZ); + pld1(p) = x_XYZ[0]; + pld2(p) = x_XYZ[1]; + pld3(p) = x_XYZ[2]; + }); + } + } + + void CustomPostStep(std::size_t, long double time, Domain& domain) { + const auto& metric = domain.mesh.metric; + auto& random_pool = domain.random_pool; + const auto time_dbl = static_cast(time); + for (auto s = 0u; s < domain.species.size(); ++s) { + auto& species = domain.species[s]; + auto ux1 = species.ux1; + auto ux2 = species.ux2; + auto ux3 = species.ux3; + auto i1 = species.i1; + auto i2 = species.i2; + auto dx1 = species.dx1; + auto dx2 = species.dx2; + auto phi = species.phi; + auto pld1 = species.pld[0]; + auto pld2 = species.pld[1]; + auto pld3 = species.pld[2]; + Kokkos::parallel_for( + "SavePositions", + species.npart(), + ClassLambda(index_t p) { + const coord_t x_Code { + static_cast(i1(p)) + static_cast(dx1(p)), + static_cast(i2(p)) + static_cast(dx2(p)), + phi(p) + }; + coord_t x_XYZ { ZERO }; + + metric.template convert_xyz(x_Code, x_XYZ); + auto rand_gen = random_pool.get_state(); + if (s == 0) { + const auto dist = math::sqrt(SQR(x_XYZ[0] - pld1(p)) + + SQR(x_XYZ[1] - pld2(p)) + + SQR(x_XYZ[2] - pld3(p))); + if (time_dbl < 20.0) { + ux1(p) = 0.0; + ux2(p) = 0.0; + ux3(p) = -0.9; + } else if (time_dbl < 25.0) { + ux1(p) = 0.9; + ux2(p) = 0.0; + ux3(p) = 0.0; + } else if (time_dbl < 45.0) { + ux1(p) = 0.0; + ux2(p) = 0.0; + ux3(p) = 0.9; + } else if (time_dbl < 50.0) { + ux1(p) = 0.9; + ux2(p) = 0.0; + ux3(p) = 0.0; + } else { + ux1(p) = 5.0 * (Random(rand_gen) - HALF); + ux2(p) = 5.0 * (Random(rand_gen) - HALF); + ux3(p) = 5.0 * (Random(rand_gen) - HALF); + } + } + // add a slight spread + ux1(p) += 0.002 * (Random(rand_gen) - HALF); + ux2(p) += 0.002 * (Random(rand_gen) - HALF); + ux3(p) += 0.002 * (Random(rand_gen) - HALF); + random_pool.free_state(rand_gen); + }); + } + } + }; + +} // namespace user + +#endif \ No newline at end of file diff --git a/setups/tests/srpic_bc/srpic_bc.toml b/setups/tests/srpic_bc/srpic_bc.toml new file mode 100644 index 000000000..7b4aa3b84 --- /dev/null +++ b/setups/tests/srpic_bc/srpic_bc.toml @@ -0,0 +1,68 @@ +[simulation] + name = "srpic_bc" + engine = "srpic" + runtime = 100.0 + +[grid] + resolution = [512, 512] + extent = [[1.0, 30.0]] + + [grid.metric] + metric = "qspherical" + + [grid.boundaries] + fields = [["CONDUCTOR", "CONDUCTOR"]] + particles = [["REFLECT", "REFLECT"]] + +[scales] + larmor0 = 1e-4 + skindepth0 = 0.01 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 64.0 + use_weights = true + sort_interval = 100 + + [[particles.species]] + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e4 + pusher = "Boris" + n_payloads = 3 + + [[particles.species]] + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e4 + pusher = "Boris" + n_payloads = 3 + +[setup] + r0 = 10.0 + theta0 = 0.3 + dr = 0.2 + +[output] + format = "hdf5" + + [output.fields] + interval_time = 1.0 + quantities = ["N_1", "N_2", "E", "B", "T00"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + +[diagnostics] + interval = 5 + colored_stdout = true From 13c164decf7497d9e7e4723a6c6d1c30e02777f4 Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 5 Jul 2024 04:35:56 -0400 Subject: [PATCH 23/53] minor bug in srpic::currentampere range --- src/engines/srpic.hpp | 25 ++++++++++++++++++------- src/kernels/ampere_sr.hpp | 8 ++++---- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index ea01f591b..e5fd78929 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -538,11 +538,22 @@ namespace ntt { coeff / V0, ONE / n0)); } else { - range_t range {}; + auto range = domain.mesh.rangeActiveCells(); if constexpr (M::Dim == Dim::_2D) { - range = CreateRangePolicy( - { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, - { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); + if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { + range = CreateRangePolicy( + { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, + { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); + } + } else if constexpr (M::Dim == Dim::_3D) { + if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { + range = CreateRangePolicy({ domain.mesh.i_min(in::x1), + domain.mesh.i_min(in::x2), + domain.mesh.i_min(in::x3) }, + { domain.mesh.i_max(in::x1), + domain.mesh.i_max(in::x2) + 1, + domain.mesh.i_max(in::x3) }); + } } const auto ni2 = domain.mesh.n_active(in::x2); Kokkos::parallel_for( @@ -572,13 +583,13 @@ namespace ntt { * . . . . . */ if constexpr (M::Dim == Dim::_2D) { - if (domain.mesh.flds_bc_in({ +1, 0 }) == FldsBC::AXIS) { + if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { range = CreateRangePolicy( { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); } } else if constexpr (M::Dim == Dim::_3D) { - if (domain.mesh.flds_bc_in({ +1, 0, 0 }) == FldsBC::AXIS) { + if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { range = CreateRangePolicy({ domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2), domain.mesh.i_min(in::x3) }, @@ -600,7 +611,7 @@ namespace ntt { if constexpr (M::Dim == Dim::_3D) { size[2] = domain.mesh.n_active(in::x3); } - // !TODO: this needs to be done more efficiently + // @TODO: this needs to be done more efficiently for (unsigned short i = 0; i < nfilter; ++i) { Kokkos::deep_copy(domain.fields.buff, domain.fields.cur); Kokkos::parallel_for("CurrentsFilter", diff --git a/src/kernels/ampere_sr.hpp b/src/kernels/ampere_sr.hpp index 6dde42ff0..e4faec6ce 100644 --- a/src/kernels/ampere_sr.hpp +++ b/src/kernels/ampere_sr.hpp @@ -122,7 +122,8 @@ namespace kernel::sr { */ template class CurrentsAmpere_kernel { - static constexpr auto D = M::Dim; + static constexpr auto D = M::Dim; + static constexpr std::size_t i2min = N_GHOSTS; ndfield_t E; ndfield_t J; @@ -161,9 +162,8 @@ namespace kernel::sr { Inline void operator()(index_t i1, index_t i2) const { if constexpr (D == Dim::_2D) { - constexpr std::size_t i2min { N_GHOSTS }; - const real_t i1_ { COORD(i1) }; - const real_t i2_ { COORD(i2) }; + const real_t i1_ { COORD(i1) }; + const real_t i2_ { COORD(i2) }; // convert the "curly" current, to contravariant, normalized to // `J0=n0*q0` then add "curly" current with the right coefficient From f308a4d9f2cace30699e64c76cc23ae613a9880c Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:44:59 -0400 Subject: [PATCH 24/53] const in range policy --- src/framework/domain/grid.cpp | 33 +++++++++++++++++---------------- src/framework/domain/grid.h | 14 +++++++------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/framework/domain/grid.cpp b/src/framework/domain/grid.cpp index d189eaf5d..9302386e1 100644 --- a/src/framework/domain/grid.cpp +++ b/src/framework/domain/grid.cpp @@ -9,19 +9,19 @@ namespace ntt { template <> - auto Grid::rangeAllCells() -> range_t { + auto Grid::rangeAllCells() const -> range_t { box_region_t region { CellLayer::allLayer }; return rangeCells(region); } template <> - auto Grid::rangeAllCells() -> range_t { + auto Grid::rangeAllCells() const -> range_t { box_region_t region { CellLayer::allLayer, CellLayer::allLayer }; return rangeCells(region); } template <> - auto Grid::rangeAllCells() -> range_t { + auto Grid::rangeAllCells() const -> range_t { box_region_t region { CellLayer::allLayer, CellLayer::allLayer, CellLayer::allLayer }; @@ -29,19 +29,19 @@ namespace ntt { } template <> - auto Grid::rangeActiveCells() -> range_t { + auto Grid::rangeActiveCells() const -> range_t { box_region_t region { CellLayer::activeLayer }; return rangeCells(region); } template <> - auto Grid::rangeActiveCells() -> range_t { + auto Grid::rangeActiveCells() const -> range_t { box_region_t region { CellLayer::activeLayer, CellLayer::activeLayer }; return rangeCells(region); } template <> - auto Grid::rangeActiveCells() -> range_t { + auto Grid::rangeActiveCells() const -> range_t { box_region_t region { CellLayer::activeLayer, CellLayer::activeLayer, CellLayer::activeLayer }; @@ -49,7 +49,7 @@ namespace ntt { } template - auto Grid::rangeCells(const box_region_t& region) -> range_t { + auto Grid::rangeCells(const box_region_t& region) const -> range_t { tuple_t imin, imax; for (unsigned short i = 0; i < (unsigned short)D; i++) { switch (region[i]) { @@ -87,7 +87,8 @@ namespace ntt { // !TODO: too ugly, implement a better solution (combine with device) template - auto Grid::rangeCellsOnHost(const box_region_t& region) -> range_h_t { + auto Grid::rangeCellsOnHost(const box_region_t& region) const + -> range_h_t { tuple_t imin, imax; for (unsigned short i = 0; i < (unsigned short)D; i++) { switch (region[i]) { @@ -123,19 +124,19 @@ namespace ntt { } template <> - auto Grid::rangeAllCellsOnHost() -> range_h_t { + auto Grid::rangeAllCellsOnHost() const -> range_h_t { box_region_t region { CellLayer::allLayer }; return rangeCellsOnHost(region); } template <> - auto Grid::rangeAllCellsOnHost() -> range_h_t { + auto Grid::rangeAllCellsOnHost() const -> range_h_t { box_region_t region { CellLayer::allLayer, CellLayer::allLayer }; return rangeCellsOnHost(region); } template <> - auto Grid::rangeAllCellsOnHost() -> range_h_t { + auto Grid::rangeAllCellsOnHost() const -> range_h_t { box_region_t region { CellLayer::allLayer, CellLayer::allLayer, CellLayer::allLayer }; @@ -143,19 +144,19 @@ namespace ntt { } template <> - auto Grid::rangeActiveCellsOnHost() -> range_h_t { + auto Grid::rangeActiveCellsOnHost() const -> range_h_t { box_region_t region { CellLayer::activeLayer }; return rangeCellsOnHost(region); } template <> - auto Grid::rangeActiveCellsOnHost() -> range_h_t { + auto Grid::rangeActiveCellsOnHost() const -> range_h_t { box_region_t region { CellLayer::activeLayer, CellLayer::activeLayer }; return rangeCellsOnHost(region); } template <> - auto Grid::rangeActiveCellsOnHost() -> range_h_t { + auto Grid::rangeActiveCellsOnHost() const -> range_h_t { box_region_t region { CellLayer::activeLayer, CellLayer::activeLayer, CellLayer::activeLayer }; @@ -163,7 +164,7 @@ namespace ntt { } template - auto Grid::rangeCells(const tuple_t, D>& ranges) + auto Grid::rangeCells(const tuple_t, D>& ranges) const -> range_t { tuple_t imin, imax; for (unsigned short i = 0; i < (unsigned short)D; i++) { @@ -182,4 +183,4 @@ namespace ntt { template struct Grid; template struct Grid; -} // namespace ntt \ No newline at end of file +} // namespace ntt diff --git a/src/framework/domain/grid.h b/src/framework/domain/grid.h index 176263189..97a939117 100644 --- a/src/framework/domain/grid.h +++ b/src/framework/domain/grid.h @@ -159,19 +159,19 @@ namespace ntt { * @brief Loop over all active cells (disregard ghost cells) * @returns Kokkos range policy with proper min/max indices and dimension */ - auto rangeActiveCells() -> range_t; + auto rangeActiveCells() const -> range_t; /** * @brief Loop over all cells * @returns Kokkos range policy with proper min/max indices and dimension */ - auto rangeAllCells() -> range_t; + auto rangeAllCells() const -> range_t; /** * @brief Pick a particular region of cells * @param box_region_t region of cells to pick: tuple of cellLayer objects * @returns Kokkos range policy with proper min/max indices and dimension */ - auto rangeCells(const box_region_t&) -> range_t; + auto rangeCells(const box_region_t&) const -> range_t; /** * @brief Pick a particular region of cells * @overload @@ -180,7 +180,7 @@ namespace ntt { * @example {{0, 0}, {0, 0}, {0, 0}} corresponds to allActiveLayer in all 3 dimensions * @returns Kokkos range policy with proper min/max indices and dimension */ - auto rangeCells(const tuple_t, D>&) -> range_t; + auto rangeCells(const tuple_t, D>&) const -> range_t; /* Ranges in the host execution space ----------------------------------- */ /** @@ -188,20 +188,20 @@ namespace ntt { * @returns Kokkos range policy in the host space with proper min/max * indices and dimension. */ - auto rangeActiveCellsOnHost() -> range_h_t; + auto rangeActiveCellsOnHost() const -> range_h_t; /** * @brief Loop over all cells * @returns Kokkos range policy in the host space with proper min/max * indices and dimension. */ - auto rangeAllCellsOnHost() -> range_h_t; + auto rangeAllCellsOnHost() const -> range_h_t; /** * @brief Pick a particular region of cells * @param box_region_t region of cells to pick: tuple of cellLayer objects * @returns Kokkos range policy in the host space with proper min/max * indices and dimension. */ - auto rangeCellsOnHost(const box_region_t&) -> range_h_t; + auto rangeCellsOnHost(const box_region_t&) const -> range_h_t; protected: std::vector m_resolution; From 09eceb78825c7f65ab628f83a1fbc2cae72576e5 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:45:48 -0400 Subject: [PATCH 25/53] header added --- src/archetypes/particle_injector.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/archetypes/particle_injector.h b/src/archetypes/particle_injector.h index dd1aa957c..4e75003bb 100644 --- a/src/archetypes/particle_injector.h +++ b/src/archetypes/particle_injector.h @@ -1,13 +1,17 @@ /** * @file archetypes/particle_injector.h - * @brief Particle injector routines - * ... + * @brief Particle injector routines and classes + * @implements + * - arch::UniformInjector<> + * - arch::NonUniformInjector<> + * - arch::AtmosphereInjector<> + * - arch::InjectUniform<> -> void + * - arch::InjectGlobally<> -> void + * - arch::InjectNonUniform<> -> void + * @namespaces: + * - arch:: */ -/* -------------------------------------------------------------------------- */ -/* This header file is still under construction */ -/* -------------------------------------------------------------------------- */ - #ifndef ARCHETYPES_PARTICLE_INJECTOR_H #define ARCHETYPES_PARTICLE_INJECTOR_H From 3b32aaec7ad41850fa9886b38856aa8db1c14b41 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:46:20 -0400 Subject: [PATCH 26/53] precision as print --- src/engines/engine_printer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/engines/engine_printer.cpp b/src/engines/engine_printer.cpp index d94d00376..90dec3326 100644 --- a/src/engines/engine_printer.cpp +++ b/src/engines/engine_printer.cpp @@ -137,6 +137,7 @@ namespace ntt { const auto hash = std::string(ENTITY_GIT_HASH); const auto pgen = std::string(PGEN); const auto nspec = metadomain.species_params().size(); + const auto precision = (sizeof(real_t) == 4) ? "single" : "double"; #if defined(__clang__) const std::string ccx = "Clang/LLVM " __clang_version__; @@ -219,6 +220,7 @@ namespace ntt { add_param(report, 4, "HDF5", "%s", hdf5_version.c_str()); add_param(report, 4, "Kokkos", "%s", kokkos_version.c_str()); add_param(report, 4, "ADIOS2", "%s", adios2_version.c_str()); + add_param(report, 4, "Precision", "%s", precision); add_param(report, 4, "Debug", "%s", dbg.c_str()); report += "\n"; add_category(report, 4, "Configuration"); From 7bcd17a8026774b59dd63c84b948d819d57b1951 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:47:45 -0400 Subject: [PATCH 27/53] ranges for AXIS bc separate function --- src/engines/srpic.hpp | 84 ++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 53 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index e5fd78929..482de8ef1 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -208,6 +208,7 @@ namespace ntt { m_params.template get( "algorithms.timestep.correction") * dt; + auto range = range_with_axis_BCs(domain); if constexpr (M::CoordType == Coord::Cart) { // minkowski case const auto dx = math::sqrt(domain.mesh.metric.template h_<1, 1>({})); @@ -222,15 +223,9 @@ namespace ntt { Kokkos::parallel_for( "Ampere", - domain.mesh.rangeActiveCells(), + range, kernel::mink::Ampere_kernel(domain.fields.em, coeff1, coeff2)); } else { - range_t range {}; - if constexpr (M::Dim == Dim::_2D) { - range = CreateRangePolicy( - { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, - { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); - } const auto ni2 = domain.mesh.n_active(in::x2); Kokkos::parallel_for("Ampere", range, @@ -538,24 +533,8 @@ namespace ntt { coeff / V0, ONE / n0)); } else { - auto range = domain.mesh.rangeActiveCells(); - if constexpr (M::Dim == Dim::_2D) { - if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { - range = CreateRangePolicy( - { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, - { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); - } - } else if constexpr (M::Dim == Dim::_3D) { - if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { - range = CreateRangePolicy({ domain.mesh.i_min(in::x1), - domain.mesh.i_min(in::x2), - domain.mesh.i_min(in::x3) }, - { domain.mesh.i_max(in::x1), - domain.mesh.i_max(in::x2) + 1, - domain.mesh.i_max(in::x3) }); - } - } - const auto ni2 = domain.mesh.n_active(in::x2); + auto range = range_with_axis_BCs(domain); + const auto ni2 = domain.mesh.n_active(in::x2); Kokkos::parallel_for( "Ampere", range, @@ -571,34 +550,7 @@ namespace ntt { void CurrentsFilter(domain_t& domain) { logger::Checkpoint("Launching currents filtering kernels", HERE); - range_t range = domain.mesh.rangeActiveCells(); - if constexpr (M::CoordType != Coord::Cart) { - /** - * @brief taking one extra cell in the x2 direction if AXIS BCs - * . . . . . - * . ^= =^ . - * . |* *\*. - * . |* *\*. - * . ^- -^ . - * . . . . . - */ - if constexpr (M::Dim == Dim::_2D) { - if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { - range = CreateRangePolicy( - { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, - { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); - } - } else if constexpr (M::Dim == Dim::_3D) { - if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { - range = CreateRangePolicy({ domain.mesh.i_min(in::x1), - domain.mesh.i_min(in::x2), - domain.mesh.i_min(in::x3) }, - { domain.mesh.i_max(in::x1), - domain.mesh.i_max(in::x2) + 1, - domain.mesh.i_max(in::x3) }); - } - } - } + auto range = range_with_axis_BCs(domain); const auto nfilter = m_params.template get( "algorithms.current_filters"); tuple_t size; @@ -1239,6 +1191,32 @@ namespace ntt { } return { sign, dim, xg_min, xg_max }; } + + auto range_with_axis_BCs(const domain_t& domain) -> range_t { + auto range = domain.mesh.rangeActiveCells(); + if constexpr (M::CoordType != Coord::Cart) { + /** + * @brief taking one extra cell in the x2 direction if AXIS BCs + */ + if constexpr (M::Dim == Dim::_2D) { + if (domain.mesh.flds_bc_in({ 0, +1 }) == FldsBC::AXIS) { + range = CreateRangePolicy( + { domain.mesh.i_min(in::x1), domain.mesh.i_min(in::x2) }, + { domain.mesh.i_max(in::x1), domain.mesh.i_max(in::x2) + 1 }); + } + } else if constexpr (M::Dim == Dim::_3D) { + if (domain.mesh.flds_bc_in({ 0, +1, 0 }) == FldsBC::AXIS) { + range = CreateRangePolicy({ domain.mesh.i_min(in::x1), + domain.mesh.i_min(in::x2), + domain.mesh.i_min(in::x3) }, + { domain.mesh.i_max(in::x1), + domain.mesh.i_max(in::x2) + 1, + domain.mesh.i_max(in::x3) }); + } + } + } + return range; + } }; } // namespace ntt From 2d2594db2925354a3503cae12eb186f11802f466 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:47:59 -0400 Subject: [PATCH 28/53] rm temp pgen --- setups/tests/srpic_bc/pgen.hpp | 172 ---------------------------- setups/tests/srpic_bc/srpic_bc.toml | 68 ----------- 2 files changed, 240 deletions(-) delete mode 100644 setups/tests/srpic_bc/pgen.hpp delete mode 100644 setups/tests/srpic_bc/srpic_bc.toml diff --git a/setups/tests/srpic_bc/pgen.hpp b/setups/tests/srpic_bc/pgen.hpp deleted file mode 100644 index 3a1ee25f5..000000000 --- a/setups/tests/srpic_bc/pgen.hpp +++ /dev/null @@ -1,172 +0,0 @@ -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "enums.h" -#include "global.h" - -#include "arch/kokkos_aliases.h" -#include "arch/traits.h" -#include "utils/numeric.h" - -#include "archetypes/energy_dist.h" -#include "archetypes/particle_injector.h" -#include "archetypes/problem_generator.h" -#include "archetypes/spatial_dist.h" -#include "framework/domain/domain.h" -#include "framework/domain/metadomain.h" - -namespace user { - using namespace ntt; - - template - struct GaussianBlob : public arch::SpatialDistribution { - GaussianBlob(const M& metric, const coord_t& c, real_t s) - : arch::SpatialDistribution { metric } - , spreadSqr { SQR(s) } { - for (auto d = 0u; d < M::Dim; ++d) { - center[d] = c[d]; - } - } - - Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - // assuming spherical - auto rSqr = SQR(x_Ph[0]) + SQR(center[0]) - - TWO * x_Ph[0] * center[0] * math::cos(x_Ph[1] - center[1]); - return std::exp(-rSqr / (2 * spreadSqr)); - } - - private: - tuple_t center; - const real_t spreadSqr; - }; - - template - struct PGen : public arch::ProblemGenerator { - - // compatibility traits for the problem generator - static constexpr auto engines = traits::compatible_with::value; - static constexpr auto metrics = - traits::compatible_with::value; - static constexpr auto dimensions = traits::compatible_with::value; - - // for easy access to variables in the child class - using arch::ProblemGenerator::D; - using arch::ProblemGenerator::C; - using arch::ProblemGenerator::params; - - const real_t r0, theta0, dr; - - inline PGen(const SimulationParams& p, const Metadomain&) - : arch::ProblemGenerator { p } - , r0 { p.template get("setup.r0") } - , theta0 { p.template get("setup.theta0") } - , dr { p.template get("setup.dr") } {} - - inline void InitPrtls(Domain& domain) { - const auto injector = arch::NonUniformInjector( - arch::ColdDist(domain.mesh.metric), - GaussianBlob(domain.mesh.metric, { r0, theta0 }, dr), - { 1, 2 }); - arch::InjectNonUniform(params, - domain, - injector, - 1.0, - true); - const auto& metric = domain.mesh.metric; - for (auto& species : domain.species) { - auto i1 = species.i1; - auto i2 = species.i2; - auto dx1 = species.dx1; - auto dx2 = species.dx2; - auto phi = species.phi; - auto pld1 = species.pld[0]; - auto pld2 = species.pld[1]; - auto pld3 = species.pld[2]; - Kokkos::parallel_for( - "SavePositions", - species.npart(), - Lambda(index_t p) { - const coord_t x_Code { - static_cast(i1(p)) + static_cast(dx1(p)), - static_cast(i2(p)) + static_cast(dx2(p)), - phi(p) - }; - coord_t x_XYZ { ZERO }; - - metric.template convert_xyz(x_Code, x_XYZ); - pld1(p) = x_XYZ[0]; - pld2(p) = x_XYZ[1]; - pld3(p) = x_XYZ[2]; - }); - } - } - - void CustomPostStep(std::size_t, long double time, Domain& domain) { - const auto& metric = domain.mesh.metric; - auto& random_pool = domain.random_pool; - const auto time_dbl = static_cast(time); - for (auto s = 0u; s < domain.species.size(); ++s) { - auto& species = domain.species[s]; - auto ux1 = species.ux1; - auto ux2 = species.ux2; - auto ux3 = species.ux3; - auto i1 = species.i1; - auto i2 = species.i2; - auto dx1 = species.dx1; - auto dx2 = species.dx2; - auto phi = species.phi; - auto pld1 = species.pld[0]; - auto pld2 = species.pld[1]; - auto pld3 = species.pld[2]; - Kokkos::parallel_for( - "SavePositions", - species.npart(), - ClassLambda(index_t p) { - const coord_t x_Code { - static_cast(i1(p)) + static_cast(dx1(p)), - static_cast(i2(p)) + static_cast(dx2(p)), - phi(p) - }; - coord_t x_XYZ { ZERO }; - - metric.template convert_xyz(x_Code, x_XYZ); - auto rand_gen = random_pool.get_state(); - if (s == 0) { - const auto dist = math::sqrt(SQR(x_XYZ[0] - pld1(p)) + - SQR(x_XYZ[1] - pld2(p)) + - SQR(x_XYZ[2] - pld3(p))); - if (time_dbl < 20.0) { - ux1(p) = 0.0; - ux2(p) = 0.0; - ux3(p) = -0.9; - } else if (time_dbl < 25.0) { - ux1(p) = 0.9; - ux2(p) = 0.0; - ux3(p) = 0.0; - } else if (time_dbl < 45.0) { - ux1(p) = 0.0; - ux2(p) = 0.0; - ux3(p) = 0.9; - } else if (time_dbl < 50.0) { - ux1(p) = 0.9; - ux2(p) = 0.0; - ux3(p) = 0.0; - } else { - ux1(p) = 5.0 * (Random(rand_gen) - HALF); - ux2(p) = 5.0 * (Random(rand_gen) - HALF); - ux3(p) = 5.0 * (Random(rand_gen) - HALF); - } - } - // add a slight spread - ux1(p) += 0.002 * (Random(rand_gen) - HALF); - ux2(p) += 0.002 * (Random(rand_gen) - HALF); - ux3(p) += 0.002 * (Random(rand_gen) - HALF); - random_pool.free_state(rand_gen); - }); - } - } - }; - -} // namespace user - -#endif \ No newline at end of file diff --git a/setups/tests/srpic_bc/srpic_bc.toml b/setups/tests/srpic_bc/srpic_bc.toml deleted file mode 100644 index 7b4aa3b84..000000000 --- a/setups/tests/srpic_bc/srpic_bc.toml +++ /dev/null @@ -1,68 +0,0 @@ -[simulation] - name = "srpic_bc" - engine = "srpic" - runtime = 100.0 - -[grid] - resolution = [512, 512] - extent = [[1.0, 30.0]] - - [grid.metric] - metric = "qspherical" - - [grid.boundaries] - fields = [["CONDUCTOR", "CONDUCTOR"]] - particles = [["REFLECT", "REFLECT"]] - -[scales] - larmor0 = 1e-4 - skindepth0 = 0.01 - -[algorithms] - current_filters = 4 - - [algorithms.timestep] - CFL = 0.5 - -[particles] - ppc0 = 64.0 - use_weights = true - sort_interval = 100 - - [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e4 - pusher = "Boris" - n_payloads = 3 - - [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e4 - pusher = "Boris" - n_payloads = 3 - -[setup] - r0 = 10.0 - theta0 = 0.3 - dr = 0.2 - -[output] - format = "hdf5" - - [output.fields] - interval_time = 1.0 - quantities = ["N_1", "N_2", "E", "B", "T00"] - - [output.particles] - enable = false - - [output.spectra] - enable = false - -[diagnostics] - interval = 5 - colored_stdout = true From 260bfd5e7a8bfbe9d943c2e60018a982f22a4e35 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:48:18 -0400 Subject: [PATCH 29/53] potential race condition fixed --- src/kernels/injectors.hpp | 48 +++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/kernels/injectors.hpp b/src/kernels/injectors.hpp index ae8440342..7ad945ad5 100644 --- a/src/kernels/injectors.hpp +++ b/src/kernels/injectors.hpp @@ -568,21 +568,21 @@ namespace kernel { if (ppc == 0) { return; } - const auto index = Kokkos::atomic_fetch_add(&idx(), ppc); - auto rand_gen = random_pool.get_state(); + auto rand_gen = random_pool.get_state(); for (auto p { 0u }; p < ppc; ++p) { - const auto dx1 = Random(rand_gen); - const auto dx2 = Random(rand_gen); + const auto index = Kokkos::atomic_fetch_add(&idx(), 1); + const auto dx1 = Random(rand_gen); + const auto dx2 = Random(rand_gen); - i1s_1(index + offset1 - p) = static_cast(i1) - N_GHOSTS; - dx1s_1(index + offset1 - p) = dx1; - i1s_2(index + offset2 - p) = static_cast(i1) - N_GHOSTS; - dx1s_2(index + offset2 - p) = dx1; + i1s_1(index + offset1) = static_cast(i1) - N_GHOSTS; + dx1s_1(index + offset1) = dx1; + i1s_2(index + offset2) = static_cast(i1) - N_GHOSTS; + dx1s_2(index + offset2) = dx1; - i2s_1(index + offset1 - p) = static_cast(i2) - N_GHOSTS; - dx2s_1(index + offset1 - p) = dx2; - i2s_2(index + offset2 - p) = static_cast(i2) - N_GHOSTS; - dx2s_2(index + offset2 - p) = dx2; + i2s_1(index + offset1) = static_cast(i2) - N_GHOSTS; + dx2s_1(index + offset1) = dx2; + i2s_2(index + offset2) = static_cast(i2) - N_GHOSTS; + dx2s_2(index + offset2) = dx2; vec_t v_T { ZERO }, v_Cd { ZERO }; energy_dist(x_Ph, v_T, spidx1); @@ -591,28 +591,28 @@ namespace kernel { } else if constexpr (S == SimEngine::GRPIC) { metric.template transform(x_Cd_, v_T, v_Cd); } - ux1s_1(index + offset1 - p) = v_Cd[0]; - ux2s_1(index + offset1 - p) = v_Cd[1]; - ux3s_1(index + offset1 - p) = v_Cd[2]; + ux1s_1(index + offset1) = v_Cd[0]; + ux2s_1(index + offset1) = v_Cd[1]; + ux3s_1(index + offset1) = v_Cd[2]; energy_dist(x_Ph, v_T, spidx2); if constexpr (S == SimEngine::SRPIC) { metric.template transform_xyz(x_Cd_, v_T, v_Cd); } else if constexpr (S == SimEngine::GRPIC) { metric.template transform(x_Cd_, v_T, v_Cd); } - ux1s_2(index + offset2 - p) = v_Cd[0]; - ux2s_2(index + offset2 - p) = v_Cd[1]; - ux3s_2(index + offset2 - p) = v_Cd[2]; + ux1s_2(index + offset2) = v_Cd[0]; + ux2s_2(index + offset2) = v_Cd[1]; + ux3s_2(index + offset2) = v_Cd[2]; - tags_1(index + offset1 - p) = ParticleTag::alive; - tags_2(index + offset2 - p) = ParticleTag::alive; + tags_1(index + offset1) = ParticleTag::alive; + tags_2(index + offset2) = ParticleTag::alive; if (M::CoordType == Coord::Cart) { - weights_1(index + offset1 - p) = ONE; - weights_2(index + offset2 - p) = ONE; + weights_1(index + offset1) = ONE; + weights_2(index + offset2) = ONE; } else { const auto wei = metric.sqrt_det_h({ i1_ + HALF, i2_ + HALF }) * inv_V0; - weights_1(index + offset1 - p) = wei; - weights_2(index + offset2 - p) = wei; + weights_1(index + offset1) = wei; + weights_2(index + offset2) = wei; } } random_pool.free_state(rand_gen); From 7d4d3052421fb7e752087f6c160ad7deccdd3dc0 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:51:48 -0400 Subject: [PATCH 30/53] culprit fixed for 1D and 3D too --- src/kernels/injectors.hpp | 98 +++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/src/kernels/injectors.hpp b/src/kernels/injectors.hpp index 7ad945ad5..9d3fd7d81 100644 --- a/src/kernels/injectors.hpp +++ b/src/kernels/injectors.hpp @@ -512,37 +512,37 @@ namespace kernel { if (ppc == 0) { return; } - const auto index = Kokkos::atomic_fetch_add(&idx(), ppc); - auto rand_gen = random_pool.get_state(); + auto rand_gen = random_pool.get_state(); for (auto p { 0u }; p < ppc; ++p) { - const auto dx1 = Random(rand_gen); + const auto index = Kokkos::atomic_fetch_add(&idx(), 1); + const auto dx1 = Random(rand_gen); - i1s_1(index + offset1 - p) = static_cast(i1) - N_GHOSTS; - dx1s_1(index + offset1 - p) = dx1; - i1s_2(index + offset2 - p) = static_cast(i1) - N_GHOSTS; - dx1s_2(index + offset2 - p) = dx1; + i1s_1(index + offset1) = static_cast(i1) - N_GHOSTS; + dx1s_1(index + offset1) = dx1; + i1s_2(index + offset2) = static_cast(i1) - N_GHOSTS; + dx1s_2(index + offset2) = dx1; vec_t v_T { ZERO }, v_XYZ { ZERO }; energy_dist(x_Ph, v_T, spidx1); metric.template transform_xyz(x_Cd, v_T, v_XYZ); - ux1s_1(index + offset1 - p) = v_XYZ[0]; - ux2s_1(index + offset1 - p) = v_XYZ[1]; - ux3s_1(index + offset1 - p) = v_XYZ[2]; + ux1s_1(index + offset1) = v_XYZ[0]; + ux2s_1(index + offset1) = v_XYZ[1]; + ux3s_1(index + offset1) = v_XYZ[2]; energy_dist(x_Ph, v_T, spidx2); metric.template transform_xyz(x_Cd, v_T, v_XYZ); - ux1s_2(index + offset2 - p) = v_XYZ[0]; - ux2s_2(index + offset2 - p) = v_XYZ[1]; - ux3s_2(index + offset2 - p) = v_XYZ[2]; + ux1s_2(index + offset2) = v_XYZ[0]; + ux2s_2(index + offset2) = v_XYZ[1]; + ux3s_2(index + offset2) = v_XYZ[2]; - tags_1(index + offset1 - p) = ParticleTag::alive; - tags_2(index + offset2 - p) = ParticleTag::alive; + tags_1(index + offset1) = ParticleTag::alive; + tags_2(index + offset2) = ParticleTag::alive; if (M::CoordType == Coord::Cart) { - weights_1(index + offset1 - p) = ONE; - weights_2(index + offset2 - p) = ONE; + weights_1(index + offset1) = ONE; + weights_2(index + offset2) = ONE; } else { - const auto wei = metric.sqrt_det_h({ i1_ + HALF }); - weights_1(index + offset1 - p) = wei; - weights_2(index + offset2 - p) = wei; + const auto wei = metric.sqrt_det_h({ i1_ + HALF }) * inv_V0; + weights_1(index + offset1) = wei; + weights_2(index + offset2) = wei; } } random_pool.free_state(rand_gen); @@ -635,27 +635,27 @@ namespace kernel { if (ppc == 0) { return; } - const auto index = Kokkos::atomic_fetch_add(&idx(), ppc); - auto rand_gen = random_pool.get_state(); + auto rand_gen = random_pool.get_state(); for (auto p { 0u }; p < ppc; ++p) { - const auto dx1 = Random(rand_gen); - const auto dx2 = Random(rand_gen); - const auto dx3 = Random(rand_gen); + const auto index = Kokkos::atomic_fetch_add(&idx(), 1); + const auto dx1 = Random(rand_gen); + const auto dx2 = Random(rand_gen); + const auto dx3 = Random(rand_gen); - i1s_1(index + offset1 - p) = static_cast(i1) - N_GHOSTS; - dx1s_1(index + offset1 - p) = dx1; - i1s_2(index + offset2 - p) = static_cast(i1) - N_GHOSTS; - dx1s_2(index + offset2 - p) = dx1; + i1s_1(index + offset1) = static_cast(i1) - N_GHOSTS; + dx1s_1(index + offset1) = dx1; + i1s_2(index + offset2) = static_cast(i1) - N_GHOSTS; + dx1s_2(index + offset2) = dx1; - i2s_1(index + offset1 - p) = static_cast(i2) - N_GHOSTS; - dx2s_1(index + offset1 - p) = dx2; - i2s_2(index + offset2 - p) = static_cast(i2) - N_GHOSTS; - dx2s_2(index + offset2 - p) = dx2; + i2s_1(index + offset1) = static_cast(i2) - N_GHOSTS; + dx2s_1(index + offset1) = dx2; + i2s_2(index + offset2) = static_cast(i2) - N_GHOSTS; + dx2s_2(index + offset2) = dx2; - i3s_1(index + offset1 - p) = static_cast(i3) - N_GHOSTS; - dx3s_1(index + offset1 - p) = dx3; - i3s_2(index + offset2 - p) = static_cast(i3) - N_GHOSTS; - dx3s_2(index + offset2 - p) = dx3; + i3s_1(index + offset1) = static_cast(i3) - N_GHOSTS; + dx3s_1(index + offset1) = dx3; + i3s_2(index + offset2) = static_cast(i3) - N_GHOSTS; + dx3s_2(index + offset2) = dx3; vec_t v_T { ZERO }, v_Cd { ZERO }; energy_dist(x_Ph, v_T, spidx1); @@ -664,30 +664,30 @@ namespace kernel { } else if constexpr (S == SimEngine::GRPIC) { metric.template transform(x_Cd, v_T, v_Cd); } - ux1s_1(index + offset1 - p) = v_Cd[0]; - ux2s_1(index + offset1 - p) = v_Cd[1]; - ux3s_1(index + offset1 - p) = v_Cd[2]; + ux1s_1(index + offset1) = v_Cd[0]; + ux2s_1(index + offset1) = v_Cd[1]; + ux3s_1(index + offset1) = v_Cd[2]; energy_dist(x_Ph, v_T, spidx2); if constexpr (S == SimEngine::SRPIC) { metric.template transform_xyz(x_Cd, v_T, v_Cd); } else if constexpr (S == SimEngine::GRPIC) { metric.template transform(x_Cd, v_T, v_Cd); } - ux1s_2(index + offset2 - p) = v_Cd[0]; - ux2s_2(index + offset2 - p) = v_Cd[1]; - ux3s_2(index + offset2 - p) = v_Cd[2]; + ux1s_2(index + offset2) = v_Cd[0]; + ux2s_2(index + offset2) = v_Cd[1]; + ux3s_2(index + offset2) = v_Cd[2]; - tags_1(index + offset1 - p) = ParticleTag::alive; - tags_2(index + offset2 - p) = ParticleTag::alive; + tags_1(index + offset1) = ParticleTag::alive; + tags_2(index + offset2) = ParticleTag::alive; if (M::CoordType == Coord::Cart) { - weights_1(index + offset1 - p) = ONE; - weights_2(index + offset2 - p) = ONE; + weights_1(index + offset1) = ONE; + weights_2(index + offset2) = ONE; } else { const auto wei = metric.sqrt_det_h( { i1_ + HALF, i2_ + HALF, i3_ + HALF }) * inv_V0; - weights_1(index + offset1 - p) = wei; - weights_2(index + offset2 - p) = wei; + weights_1(index + offset1) = wei; + weights_2(index + offset2) = wei; } } random_pool.free_state(rand_gen); From e8a753696d338d6abce960dc30170878b27a3d51 Mon Sep 17 00:00:00 2001 From: hayk Date: Wed, 17 Jul 2024 15:54:54 -0400 Subject: [PATCH 31/53] srpic_bc rm --- setups/tests/srpic_bc/pgen.hpp | 172 ---------------------------- setups/tests/srpic_bc/srpic_bc.toml | 68 ----------- 2 files changed, 240 deletions(-) delete mode 100644 setups/tests/srpic_bc/pgen.hpp delete mode 100644 setups/tests/srpic_bc/srpic_bc.toml diff --git a/setups/tests/srpic_bc/pgen.hpp b/setups/tests/srpic_bc/pgen.hpp deleted file mode 100644 index 3a1ee25f5..000000000 --- a/setups/tests/srpic_bc/pgen.hpp +++ /dev/null @@ -1,172 +0,0 @@ -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "enums.h" -#include "global.h" - -#include "arch/kokkos_aliases.h" -#include "arch/traits.h" -#include "utils/numeric.h" - -#include "archetypes/energy_dist.h" -#include "archetypes/particle_injector.h" -#include "archetypes/problem_generator.h" -#include "archetypes/spatial_dist.h" -#include "framework/domain/domain.h" -#include "framework/domain/metadomain.h" - -namespace user { - using namespace ntt; - - template - struct GaussianBlob : public arch::SpatialDistribution { - GaussianBlob(const M& metric, const coord_t& c, real_t s) - : arch::SpatialDistribution { metric } - , spreadSqr { SQR(s) } { - for (auto d = 0u; d < M::Dim; ++d) { - center[d] = c[d]; - } - } - - Inline auto operator()(const coord_t& x_Ph) const -> real_t override { - // assuming spherical - auto rSqr = SQR(x_Ph[0]) + SQR(center[0]) - - TWO * x_Ph[0] * center[0] * math::cos(x_Ph[1] - center[1]); - return std::exp(-rSqr / (2 * spreadSqr)); - } - - private: - tuple_t center; - const real_t spreadSqr; - }; - - template - struct PGen : public arch::ProblemGenerator { - - // compatibility traits for the problem generator - static constexpr auto engines = traits::compatible_with::value; - static constexpr auto metrics = - traits::compatible_with::value; - static constexpr auto dimensions = traits::compatible_with::value; - - // for easy access to variables in the child class - using arch::ProblemGenerator::D; - using arch::ProblemGenerator::C; - using arch::ProblemGenerator::params; - - const real_t r0, theta0, dr; - - inline PGen(const SimulationParams& p, const Metadomain&) - : arch::ProblemGenerator { p } - , r0 { p.template get("setup.r0") } - , theta0 { p.template get("setup.theta0") } - , dr { p.template get("setup.dr") } {} - - inline void InitPrtls(Domain& domain) { - const auto injector = arch::NonUniformInjector( - arch::ColdDist(domain.mesh.metric), - GaussianBlob(domain.mesh.metric, { r0, theta0 }, dr), - { 1, 2 }); - arch::InjectNonUniform(params, - domain, - injector, - 1.0, - true); - const auto& metric = domain.mesh.metric; - for (auto& species : domain.species) { - auto i1 = species.i1; - auto i2 = species.i2; - auto dx1 = species.dx1; - auto dx2 = species.dx2; - auto phi = species.phi; - auto pld1 = species.pld[0]; - auto pld2 = species.pld[1]; - auto pld3 = species.pld[2]; - Kokkos::parallel_for( - "SavePositions", - species.npart(), - Lambda(index_t p) { - const coord_t x_Code { - static_cast(i1(p)) + static_cast(dx1(p)), - static_cast(i2(p)) + static_cast(dx2(p)), - phi(p) - }; - coord_t x_XYZ { ZERO }; - - metric.template convert_xyz(x_Code, x_XYZ); - pld1(p) = x_XYZ[0]; - pld2(p) = x_XYZ[1]; - pld3(p) = x_XYZ[2]; - }); - } - } - - void CustomPostStep(std::size_t, long double time, Domain& domain) { - const auto& metric = domain.mesh.metric; - auto& random_pool = domain.random_pool; - const auto time_dbl = static_cast(time); - for (auto s = 0u; s < domain.species.size(); ++s) { - auto& species = domain.species[s]; - auto ux1 = species.ux1; - auto ux2 = species.ux2; - auto ux3 = species.ux3; - auto i1 = species.i1; - auto i2 = species.i2; - auto dx1 = species.dx1; - auto dx2 = species.dx2; - auto phi = species.phi; - auto pld1 = species.pld[0]; - auto pld2 = species.pld[1]; - auto pld3 = species.pld[2]; - Kokkos::parallel_for( - "SavePositions", - species.npart(), - ClassLambda(index_t p) { - const coord_t x_Code { - static_cast(i1(p)) + static_cast(dx1(p)), - static_cast(i2(p)) + static_cast(dx2(p)), - phi(p) - }; - coord_t x_XYZ { ZERO }; - - metric.template convert_xyz(x_Code, x_XYZ); - auto rand_gen = random_pool.get_state(); - if (s == 0) { - const auto dist = math::sqrt(SQR(x_XYZ[0] - pld1(p)) + - SQR(x_XYZ[1] - pld2(p)) + - SQR(x_XYZ[2] - pld3(p))); - if (time_dbl < 20.0) { - ux1(p) = 0.0; - ux2(p) = 0.0; - ux3(p) = -0.9; - } else if (time_dbl < 25.0) { - ux1(p) = 0.9; - ux2(p) = 0.0; - ux3(p) = 0.0; - } else if (time_dbl < 45.0) { - ux1(p) = 0.0; - ux2(p) = 0.0; - ux3(p) = 0.9; - } else if (time_dbl < 50.0) { - ux1(p) = 0.9; - ux2(p) = 0.0; - ux3(p) = 0.0; - } else { - ux1(p) = 5.0 * (Random(rand_gen) - HALF); - ux2(p) = 5.0 * (Random(rand_gen) - HALF); - ux3(p) = 5.0 * (Random(rand_gen) - HALF); - } - } - // add a slight spread - ux1(p) += 0.002 * (Random(rand_gen) - HALF); - ux2(p) += 0.002 * (Random(rand_gen) - HALF); - ux3(p) += 0.002 * (Random(rand_gen) - HALF); - random_pool.free_state(rand_gen); - }); - } - } - }; - -} // namespace user - -#endif \ No newline at end of file diff --git a/setups/tests/srpic_bc/srpic_bc.toml b/setups/tests/srpic_bc/srpic_bc.toml deleted file mode 100644 index 7b4aa3b84..000000000 --- a/setups/tests/srpic_bc/srpic_bc.toml +++ /dev/null @@ -1,68 +0,0 @@ -[simulation] - name = "srpic_bc" - engine = "srpic" - runtime = 100.0 - -[grid] - resolution = [512, 512] - extent = [[1.0, 30.0]] - - [grid.metric] - metric = "qspherical" - - [grid.boundaries] - fields = [["CONDUCTOR", "CONDUCTOR"]] - particles = [["REFLECT", "REFLECT"]] - -[scales] - larmor0 = 1e-4 - skindepth0 = 0.01 - -[algorithms] - current_filters = 4 - - [algorithms.timestep] - CFL = 0.5 - -[particles] - ppc0 = 64.0 - use_weights = true - sort_interval = 100 - - [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e4 - pusher = "Boris" - n_payloads = 3 - - [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e4 - pusher = "Boris" - n_payloads = 3 - -[setup] - r0 = 10.0 - theta0 = 0.3 - dr = 0.2 - -[output] - format = "hdf5" - - [output.fields] - interval_time = 1.0 - quantities = ["N_1", "N_2", "E", "B", "T00"] - - [output.particles] - enable = false - - [output.spectra] - enable = false - -[diagnostics] - interval = 5 - colored_stdout = true From 6bcb4c22d4438070b5bd221a3f3d343701218abc Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 02:29:52 -0400 Subject: [PATCH 32/53] rm const in metric tests --- src/metrics/tests/coord_trans.cpp | 4 ++-- src/metrics/tests/ks-qks.cpp | 4 ++-- src/metrics/tests/minkowski.cpp | 4 ++-- src/metrics/tests/sph-qsph.cpp | 4 ++-- src/metrics/tests/sr-cart-sph.cpp | 4 ++-- src/metrics/tests/vec_trans.cpp | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/metrics/tests/coord_trans.cpp b/src/metrics/tests/coord_trans.cpp index 027b7677a..285621e16 100644 --- a/src/metrics/tests/coord_trans.cpp +++ b/src/metrics/tests/coord_trans.cpp @@ -29,7 +29,7 @@ template Inline auto equal(const coord_t& a, const coord_t& b, const char* msg, - const real_t acc = ONE) -> bool { + real_t acc = ONE) -> bool { const auto eps = epsilon * acc; for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { @@ -189,4 +189,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/metrics/tests/ks-qks.cpp b/src/metrics/tests/ks-qks.cpp index 195465701..f7f20b94f 100644 --- a/src/metrics/tests/ks-qks.cpp +++ b/src/metrics/tests/ks-qks.cpp @@ -23,7 +23,7 @@ template Inline auto equal(const vec_t& a, const vec_t& b, const char* msg, - const real_t acc = ONE) -> bool { + real_t acc = ONE) -> bool { const auto eps = epsilon * acc; for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { @@ -182,4 +182,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/metrics/tests/minkowski.cpp b/src/metrics/tests/minkowski.cpp index 8fce450e9..1ef27b4fa 100644 --- a/src/metrics/tests/minkowski.cpp +++ b/src/metrics/tests/minkowski.cpp @@ -19,7 +19,7 @@ void errorIf(bool condition, const std::string& message) { inline static constexpr auto epsilon = std::numeric_limits::epsilon(); template -Inline auto equal(const coord_t& a, const coord_t& b, const real_t acc = ONE) +Inline auto equal(const coord_t& a, const coord_t& b, real_t acc = ONE) -> bool { for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], epsilon * acc)) { @@ -111,4 +111,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/metrics/tests/sph-qsph.cpp b/src/metrics/tests/sph-qsph.cpp index 13b1101a4..230a763e1 100644 --- a/src/metrics/tests/sph-qsph.cpp +++ b/src/metrics/tests/sph-qsph.cpp @@ -23,7 +23,7 @@ template Inline auto equal(const vec_t& a, const vec_t& b, const char* msg, - const real_t acc = ONE) -> bool { + real_t acc = ONE) -> bool { const auto eps = epsilon * acc; for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { @@ -133,4 +133,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/metrics/tests/sr-cart-sph.cpp b/src/metrics/tests/sr-cart-sph.cpp index 126dea027..fd3f7351d 100644 --- a/src/metrics/tests/sr-cart-sph.cpp +++ b/src/metrics/tests/sr-cart-sph.cpp @@ -26,7 +26,7 @@ template Inline auto equal(const coord_t& a, const coord_t& b, const char* msg, - const real_t acc = ONE) -> bool { + real_t acc = ONE) -> bool { const auto eps = epsilon * acc; for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { @@ -156,4 +156,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/metrics/tests/vec_trans.cpp b/src/metrics/tests/vec_trans.cpp index a249c1d41..fb041d9df 100644 --- a/src/metrics/tests/vec_trans.cpp +++ b/src/metrics/tests/vec_trans.cpp @@ -29,7 +29,7 @@ template Inline auto equal(const vec_t& a, const vec_t& b, const char* msg, - const real_t acc = ONE) -> bool { + real_t acc = ONE) -> bool { const auto eps = epsilon * acc; for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { From c64c5a296097be3df4645a8aa68d764c581e04da Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 02:30:43 -0400 Subject: [PATCH 33/53] rm const in kernel tests + uint->int --- src/kernels/particle_moments.hpp | 4 ++-- src/kernels/tests/ampere_mink.cpp | 5 ++--- src/kernels/tests/deposit.cpp | 10 ++++------ src/kernels/tests/faraday_mink.cpp | 5 ++--- src/kernels/tests/prtl_bc.cpp | 3 +-- 5 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/kernels/particle_moments.hpp b/src/kernels/particle_moments.hpp index 5776fad38..8b668a036 100644 --- a/src/kernels/particle_moments.hpp +++ b/src/kernels/particle_moments.hpp @@ -57,7 +57,7 @@ namespace kernel { const float charge; const bool use_weights; const M metric; - const std::size_t ni2; + const int ni2; const real_t inv_n0; const unsigned short window; @@ -111,7 +111,7 @@ namespace kernel { , charge { charge } , use_weights { use_weights } , metric { metric } - , ni2 { ni2 } + , ni2 { static_cast(ni2) } , inv_n0 { inv_n0 } , window { window } , contrib { get_contrib(mass, charge) } diff --git a/src/kernels/tests/ampere_mink.cpp b/src/kernels/tests/ampere_mink.cpp index a7e2594a0..80af88fa5 100644 --- a/src/kernels/tests/ampere_mink.cpp +++ b/src/kernels/tests/ampere_mink.cpp @@ -23,8 +23,7 @@ void errorIf(bool condition, const std::string& message) { } } -Inline auto equal(const real_t& a, const real_t& b, const char* msg, const real_t acc) - -> bool { +Inline auto equal(real_t a, real_t b, const char* msg, real_t acc) -> bool { if (not(math::abs(a - b) < acc)) { printf("%.12e != %.12e [%.12e] %s\n", a, b, math::abs(a - b), msg); return false; @@ -368,4 +367,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index 02292c643..9a8ae1cc6 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -29,10 +29,8 @@ void errorIf(bool condition, const std::string& message) { inline static constexpr auto epsilon = std::numeric_limits::epsilon(); -Inline auto equal(const real_t& a, - const real_t& b, - const char* msg = "", - const real_t acc = ONE) -> bool { +Inline auto equal(real_t a, real_t b, const char* msg = "", real_t acc = ONE) + -> bool { const auto eps = epsilon * acc; if (not cmp::AlmostEqual(a, b, eps)) { printf("%.12e != %.12e %s\n", a, b, msg); @@ -89,8 +87,8 @@ void testDeposit(const std::vector& res, const prtldx_t dxi = 0.53, dxf = 0.47; const prtldx_t dyi = 0.34, dyf = 0.52; - const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0 + (real_t)dxf; - const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0 + (real_t)dyf; + const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0 + (real_t)dxf; + const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0 + (real_t)dyf; const real_t xr = 0.5 * (xi + xf); const real_t yr = 0.5 * (yi + yf); diff --git a/src/kernels/tests/faraday_mink.cpp b/src/kernels/tests/faraday_mink.cpp index e11c8d1ff..74c2b9b1a 100644 --- a/src/kernels/tests/faraday_mink.cpp +++ b/src/kernels/tests/faraday_mink.cpp @@ -23,8 +23,7 @@ void errorIf(bool condition, const std::string& message) { } } -Inline auto equal(const real_t& a, const real_t& b, const char* msg, const real_t acc) - -> bool { +Inline auto equal(real_t a, real_t b, const char* msg, real_t acc) -> bool { if (not(math::abs(a - b) < acc)) { printf("%.12e != %.12e [%.12e] %s\n", a, b, math::abs(a - b), msg); return false; @@ -344,4 +343,4 @@ auto main(int argc, char* argv[]) -> int { } Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/src/kernels/tests/prtl_bc.cpp b/src/kernels/tests/prtl_bc.cpp index fa1103043..c8f9eae04 100644 --- a/src/kernels/tests/prtl_bc.cpp +++ b/src/kernels/tests/prtl_bc.cpp @@ -27,8 +27,7 @@ void errorIf(bool condition, const std::string& message = "") { } } -Inline auto equal(const real_t& a, const real_t& b, const std::string& msg) - -> bool { +Inline auto equal(real_t a, real_t b, const std::string& msg) -> bool { if (not(math::abs(a - b) < 1e-4)) { printf("%.12e != %.12e %s\n", a, b, msg.c_str()); return false; From 358e1eeccf5ef46f5a5b8369386dc47c5ad921eb Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 03:40:43 -0400 Subject: [PATCH 34/53] rocm tests --- src/metrics/tests/coord_trans.cpp | 2 +- src/metrics/tests/ks-qks.cpp | 52 ++++++++++++++++++++----------- src/metrics/tests/sr-cart-sph.cpp | 2 +- src/metrics/tests/vec_trans.cpp | 50 ++++++++++++++--------------- 4 files changed, 61 insertions(+), 45 deletions(-) diff --git a/src/metrics/tests/coord_trans.cpp b/src/metrics/tests/coord_trans.cpp index 285621e16..f3779a852 100644 --- a/src/metrics/tests/coord_trans.cpp +++ b/src/metrics/tests/coord_trans.cpp @@ -149,7 +149,7 @@ auto main(int argc, char* argv[]) -> int { testMetric>({ 128 }, ext1dcart); testMetric>(res2d, ext2dcart, 200); - testMetric>(res3d, ext3dcart, 200); + testMetric>(res3d, ext3dcart, 500); testMetric>(res2d, extsph, 10); testMetric>(res2d, extsph, 100, params); diff --git a/src/metrics/tests/ks-qks.cpp b/src/metrics/tests/ks-qks.cpp index f7f20b94f..bed051e16 100644 --- a/src/metrics/tests/ks-qks.cpp +++ b/src/metrics/tests/ks-qks.cpp @@ -27,7 +27,6 @@ Inline auto equal(const vec_t& a, const auto eps = epsilon * acc; for (unsigned short d = 0; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], eps)) { - printf("%d : %.12e != %.12e %s\n", d, a[d], b[d], msg); return false; } } @@ -64,13 +63,12 @@ void testMetric(const std::vector& res, npts *= res[d]; } - unsigned long all_wrongs = 0; - const auto rg = metric.rg(); - const auto a = metric.spin(); - Kokkos::parallel_reduce( + const auto rg = metric.rg(); + const auto a = metric.spin(); + Kokkos::parallel_for( "h_ij/hij", npts, - Lambda(index_t n, unsigned long& wrongs) { + Lambda(index_t n) { tuple_t idx; unravel(n, idx, res_tup); coord_t x_Code { ZERO }; @@ -138,17 +136,35 @@ void testMetric(const std::vector& res, vec_t hij_expect { h11_expect, h22_expect, h33_expect }; vec_t h_ij_expect { h_11_expect, h_22_expect, h_33_expect }; - wrongs += not equal(h_ij_predict, h_ij_expect, "h_ij", acc); - wrongs += not equal(h_13_predict, { h_13_expect }, "h_13", acc); - wrongs += not equal(hij_predict, hij_expect, "hij", acc); - wrongs += not equal(h13_predict, { h13_expect }, "h13", acc); - }, - all_wrongs); - - errorIf(all_wrongs != 0, - "wrong h_ij/hij for " + std::to_string(M::Dim) + "D " + - std::string(metric.Label) + " with " + std::to_string(all_wrongs) + - " errors"); + if (not equal(h_ij_predict, h_ij_expect, "h_ij", acc)) { + printf("h_ij: %.12e %.12e %.12e : %.12e %.12e %.12e\n", + h_ij_predict[0], + h_ij_predict[1], + h_ij_predict[2], + h_ij_expect[0], + h_ij_expect[1], + h_ij_expect[2]); + Kokkos::abort("h_ij"); + } + if (not equal(h_13_predict, { h_13_expect }, "h_13", acc)) { + printf("h_13: %.12e : %.12e\n", h_13_predict[0], h_13_expect); + Kokkos::abort("h_13"); + } + if (not equal(hij_predict, hij_expect, "hij", acc)) { + printf("hij: %.12e %.12e %.12e : %.12e %.12e %.12e\n", + hij_predict[0], + hij_predict[1], + hij_predict[2], + hij_expect[0], + hij_expect[1], + hij_expect[2]); + Kokkos::abort("hij"); + } + if (not equal(h13_predict, { h13_expect }, "h13", acc)) { + printf("h13: %.12e : %.12e\n", h13_predict[0], h13_expect); + Kokkos::abort("h13"); + } + }); } auto main(int argc, char* argv[]) -> int { @@ -168,7 +184,7 @@ auto main(int argc, char* argv[]) -> int { testMetric>( { - 64, + 32, 42 }, { { 0.8, 10.0 }, { 0.0, constant::PI } }, diff --git a/src/metrics/tests/sr-cart-sph.cpp b/src/metrics/tests/sr-cart-sph.cpp index fd3f7351d..ec2f6ddc0 100644 --- a/src/metrics/tests/sr-cart-sph.cpp +++ b/src/metrics/tests/sr-cart-sph.cpp @@ -145,7 +145,7 @@ auto main(int argc, char* argv[]) -> int { testMetric>({ 128 }, ext1dcart); testMetric>(res2d, ext2dcart, 200); - testMetric>(res3d, ext3dcart, 200); + testMetric>(res3d, ext3dcart, 500); testMetric>(res2d, extsph, 10); testMetric>(res2d, extsph, 200, params); diff --git a/src/metrics/tests/vec_trans.cpp b/src/metrics/tests/vec_trans.cpp index fb041d9df..31015115c 100644 --- a/src/metrics/tests/vec_trans.cpp +++ b/src/metrics/tests/vec_trans.cpp @@ -199,24 +199,24 @@ auto main(int argc, char* argv[]) -> int { { "h", (real_t)0.25} }; - testMetric>({ 128 }, ext1dcart); - testMetric>(res2d, ext2dcart, 200); - testMetric>(res3d, ext3dcart, 200); - testMetric>(res2d, extsph, 10); - testMetric>(res2d, extsph, 10, params); - - const auto resks = std::vector { 64, 54 }; - const auto extsgr = boundaries_t { - {0.8, 50.0}, - {0.0, constant::PI} - }; - const auto paramsks = std::map { - {"r0", -TWO}, - { "h", ZERO}, - { "a", (real_t)0.95} - }; - testMetric>(resks, extsgr, 150, paramsks); - + // testMetric>({ 128 }, ext1dcart); + // testMetric>(res2d, ext2dcart, 200); + // testMetric>(res3d, ext3dcart, 200); + // testMetric>(res2d, extsph, 10); + // testMetric>(res2d, extsph, 10, params); + + // const auto resks = std::vector { 64, 54 }; + // const auto extsgr = boundaries_t { + // {0.8, 50.0}, + // {0.0, constant::PI} + // }; + // const auto paramsks = std::map { + // {"r0", -TWO}, + // { "h", ZERO}, + // { "a", (real_t)0.95} + // }; + // testMetric>(resks, extsgr, 150, paramsks); + // const auto resqks = std::vector { 64, 42 }; const auto extqks = boundaries_t { {0.8, 10.0}, @@ -228,13 +228,13 @@ auto main(int argc, char* argv[]) -> int { { "a", (real_t)0.8} }; testMetric>(resqks, extqks, 500, paramsqks); - - const auto resks0 = std::vector { 64, 54 }; - const auto extks0 = boundaries_t { - {0.5, 20.0}, - {0.0, constant::PI} - }; - testMetric>(resks0, extks0, 10); + // + // const auto resks0 = std::vector { 64, 54 }; + // const auto extks0 = boundaries_t { + // {0.5, 20.0}, + // {0.0, constant::PI} + // }; + // testMetric>(resks0, extks0, 10); } catch (std::exception& e) { std::cerr << e.what() << std::endl; From ac5389b1158f514180f8e4fd1627d6b63e817986 Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 03:42:00 -0400 Subject: [PATCH 35/53] minor --- src/engines/srpic.hpp | 2 +- src/framework/domain/comm_mpi.hpp | 213 ------------------------------ 2 files changed, 1 insertion(+), 214 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 482de8ef1..bddc557c9 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -563,7 +563,7 @@ namespace ntt { if constexpr (M::Dim == Dim::_3D) { size[2] = domain.mesh.n_active(in::x3); } - // @TODO: this needs to be done more efficiently + // !TODO: this needs to be done more efficiently for (unsigned short i = 0; i < nfilter; ++i) { Kokkos::deep_copy(domain.fields.buff, domain.fields.cur); Kokkos::parallel_for("CurrentsFilter", diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index 2854599ef..63dd8271a 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -55,9 +55,6 @@ namespace comm { if ((send_idx == idx) and (recv_idx == idx)) { // trivial copy if sending to self and receiving from self - // raise::ErrorIf((recv_idx != idx) || (send_idx != idx), - // "Cannot send to self and receive from another domain", - // HERE); if (not additive) { // simply filling the ghost cells if constexpr (D == Dim::_1D) { @@ -446,214 +443,4 @@ namespace comm { } // namespace comm -/** - * attempt to receive directly into fields - */ -// template -// auto to_tuple_impl(const Array& a, std::index_sequence) { -// return std::make_tuple(a[I]...); -// } - -// template > auto to_tuple(const std::array& a) { -// return to_tuple_impl(a, Indices {}); -// } - -// template -// inline void CommunicateField(unsigned int idx, -// ndfield_t& fld, -// unsigned int send_idx, -// unsigned int recv_idx, -// int send_rank, -// int recv_rank, -// const std::vector& send_slice, -// const std::vector& recv_slice, -// const range_tuple_t& comps, -// bool additive) { -// raise::ErrorIf(send_rank < 0 && recv_rank < 0, -// "CommunicateField called with negative ranks", -// HERE); - -// int rank; -// MPI_Comm_rank(MPI_COMM_WORLD, &rank); - -// raise::ErrorIf( -// (send_rank == rank && send_idx != idx) || -// (recv_rank == rank && recv_idx != idx), -// "Multiple-domain single-rank communication not yet implemented", -// HERE); - -// // trivial copy if sending to self and receiving from self -// if ((send_idx == idx) || (recv_idx == idx)) { -// raise::ErrorIf((recv_idx != idx) || (send_idx != idx), -// "Cannot send to self and receive from another domain", -// HERE); -// // sending/recv to/from self -// if constexpr (D == Dim::_1D) { -// Kokkos::deep_copy(Kokkos::subview(fld, recv_slice[0], comps), -// Kokkos::subview(fld, send_slice[0], comps)); -// } else if constexpr (D == Dim::_2D) { -// Kokkos::deep_copy( -// Kokkos::subview(fld, recv_slice[0], recv_slice[1], comps), -// Kokkos::subview(fld, send_slice[0], send_slice[1], comps)); -// } else if constexpr (D == Dim::_3D) { -// Kokkos::deep_copy( -// Kokkos::subview(fld, recv_slice[0], recv_slice[1], recv_slice[2], comps), -// Kokkos::subview(fld, send_slice[0], send_slice[1], send_slice[2], comps)); -// } -// return; -// } - -// std::size_t nsend { comps.second - comps.first }, -// nrecv { comps.second - comps.first }; -// // ndarray_t(D) + 1> send_fld, recv_fld; - -// for (short d { 0 }; d < (short)D; ++d) { -// if (send_rank >= 0) { -// nsend *= (send_slice[d].second - send_slice[d].first); -// } -// if (recv_rank >= 0) { -// nrecv *= (recv_slice[d].second - recv_slice[d].first); -// } -// } - -// std::vector> send_init_list; -// std::vector> recv_init_list; - -// if (send_rank >= 0) { -// for (unsigned short d = 0; d < D; ++d) { -// send_init_list.push_back({ send_slice[d].first, send_slice[d].second }); -// recv_init_list.push_back({ recv_slice[d].first, recv_slice[d].second }); -// } -// send_init_list.push_back({ comps.first, comps.second }); -// recv_init_list.push_back({ comps.first, comps.second }); -// } - -// std::array, D + 1> send_array; -// std::copy(send_init_list.begin(), send_init_list.end(), send_array.begin()); - -// std::array, D + 1> recv_array; -// std::copy(recv_init_list.begin(), recv_init_list.end(), recv_array.begin()); - -// if (send_rank >= 0 && recv_rank >= 0) { -// auto send_subview = std::apply( -// [&](auto... args) { -// return Kokkos::subview(fld, args...); -// }, -// to_tuple(send_array)); -// auto recv_subview = std::apply( -// [&](auto... args) { -// return Kokkos::subview(fld, args...); -// }, -// to_tuple(recv_array)); - -// MPI_Sendrecv(send_subview.data(), -// nsend, -// mpi::get_type(), -// send_rank, -// 0, -// recv_subview.data(), -// nrecv, -// mpi::get_type(), -// recv_rank, -// 0, -// MPI_COMM_WORLD, -// MPI_STATUS_IGNORE); -// } else if (send_rank >= 0) { -// auto send_subview = std::apply( -// [&](auto... args) { -// return Kokkos::subview(fld, args...); -// }, -// to_tuple(send_array)); - -// MPI_Send(send_subview.data(), -// nsend, -// mpi::get_type(), -// send_rank, -// 0, -// MPI_COMM_WORLD); -// } else if (recv_rank >= 0) { -// auto recv_subview = std::apply( -// [&](auto... args) { -// return Kokkos::subview(fld, args...); -// }, -// to_tuple(recv_array)); - -// MPI_Recv(recv_subview.data(), -// nrecv, -// mpi::get_type(), -// recv_rank, -// 0, -// MPI_COMM_WORLD, -// MPI_STATUS_IGNORE); -// } else { -// raise::Error("CommunicateField called with negative ranks", HERE); -// } -// if (recv_rank >= 0) { -// // // !TODO: perhaps directly recv to the fld? -// // if (not additive) { -// // if constexpr (D == Dim::_1D) { -// // Kokkos::deep_copy(Kokkos::subview(fld, recv_slice[0], comps), recv_fld); -// // } else if constexpr (D == Dim::_2D) { -// // Kokkos::deep_copy( -// // Kokkos::subview(fld, recv_slice[0], recv_slice[1], comps), -// // recv_fld); -// // } else if constexpr (D == Dim::_3D) { -// // Kokkos::deep_copy( -// // Kokkos::subview(fld, recv_slice[0], recv_slice[1], recv_slice[2], comps), -// // recv_fld); -// // } -// // } else { -// // if constexpr (D == Dim::_1D) { -// // const auto offset_x1 = recv_slice[0].first; -// // const auto offset_c = comps.first; -// // Kokkos::parallel_for( -// // "CommunicateField-extract", -// // Kokkos::MDRangePolicy, AccelExeSpace>( -// // { recv_slice[0].first, comps.first }, -// // { recv_slice[0].second, comps.second }), -// // Lambda(index_t i1, index_t ci) { -// // fld(i1, ci) += recv_fld(i1 - offset_x1, ci - offset_c); -// // }); -// // } else if constexpr (D == Dim::_2D) { -// // const auto offset_x1 = recv_slice[0].first; -// // const auto offset_x2 = recv_slice[1].first; -// // const auto offset_c = comps.first; -// // Kokkos::parallel_for( -// // "CommunicateField-extract", -// // Kokkos::MDRangePolicy, AccelExeSpace>( -// // { recv_slice[0].first, recv_slice[1].first, comps.first }, -// // { recv_slice[0].second, recv_slice[1].second, comps.second }), -// // Lambda(index_t i1, index_t i2, index_t ci) { -// // fld(i1, i2, ci) += recv_fld(i1 - offset_x1, -// // i2 - offset_x2, -// // ci - offset_c); -// // }); -// // } else if constexpr (D == Dim::_3D) { -// // const auto offset_x1 = recv_slice[0].first; -// // const auto offset_x2 = recv_slice[1].first; -// // const auto offset_x3 = recv_slice[2].first; -// // const auto offset_c = comps.first; -// // Kokkos::parallel_for( -// // "CommunicateField-extract", -// // Kokkos::MDRangePolicy, AccelExeSpace>( -// // { recv_slice[0].first, -// // recv_slice[1].first, -// // recv_slice[2].first, -// // comps.first }, -// // { recv_slice[0].second, -// // recv_slice[1].second, -// // recv_slice[2].second, -// // comps.second }), -// // Lambda(index_t i1, index_t i2, index_t i3, index_t ci) { -// // fld(i1, i2, i3, ci) += recv_fld(i1 - offset_x1, -// // i2 - offset_x2, -// // i3 - offset_x3, -// // ci - offset_c); -// // }); -// // } -// // } -// } -// } - #endif // FRAMEWORK_DOMAIN_COMM_MPI_HPP From fabeaa98e20ff281f693758670d44989cc05ca1c Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 03:43:23 -0400 Subject: [PATCH 36/53] extern + docker --- docker-compose.yml | 2 +- extern/Kokkos | 2 +- extern/adios2 | 2 +- extern/toml11 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 3de0516a3..cb0093f4b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,7 +30,7 @@ services: entity-rocm: container_name: entity_rocm devices: - - "/dev/fdk" + - "/dev/kfd" - "/dev/dri" security_opt: - seccomp:unconfined diff --git a/extern/Kokkos b/extern/Kokkos index 486cc745c..eb11070f6 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit 486cc745cb9a287f3915061455105a3ee588c616 +Subproject commit eb11070f67565b2e660659f5207f0363bdf3b882 diff --git a/extern/adios2 b/extern/adios2 index c52d755bb..b8761e2af 160000 --- a/extern/adios2 +++ b/extern/adios2 @@ -1 +1 @@ -Subproject commit c52d755bb2ef1730a4a93b9243e2414e5a2d8456 +Subproject commit b8761e2afab2cd05b89d09b2ee4da1cd7a834225 diff --git a/extern/toml11 b/extern/toml11 index 85faca9cb..12c0f379f 160000 --- a/extern/toml11 +++ b/extern/toml11 @@ -1 +1 @@ -Subproject commit 85faca9cbe8d76324ff38c1801be44c63e12d5be +Subproject commit 12c0f379f2e865b4ce984758d5ae004f9de07d69 From 4ff47edefafe0880d693ae487480773fa29dfc2b Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 04:00:33 -0400 Subject: [PATCH 37/53] action upd --- .github/workflows/actions.yml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index c9a7a314b..c639ca298 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -11,23 +11,28 @@ jobs: strategy: fail-fast: false matrix: - gpu: ["amd", "nvidia"] - precision: ["double", "single"] - runs-on: [self-hosted, "${{ matrix.gpu }}-gpu"] + device: [amd-gpu, nvidia-gpu] + precision: [double, single] + exclude: + - device: amd-gpu + precision: double + # my AMD GPUs doesn't support fp64 atomics : ( + runs-on: [self-hosted, "${{ matrix.device }}"] steps: - name: Checkout uses: actions/checkout@v3.3.0 - name: Configure run: | - if [ "${{ matrix.gpu }}" = "nvidia" ]; then + if [ "${{ matrix.device }}" = "nvidia-gpu" ]; then FLAGS="-D Kokkos_ENABLE_CUDA=ON -D Kokkos_ARCH_AMPERE86=ON" - elif [ "${{ matrix.gpu }}" = "amd" ]; then + elif [ "${{ matrix.device }}" = "amd-gpu" ]; then FLAGS="-D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_AMD_GFX1100=ON" + export HSA_OVERRIDE_GFX_VERSION=11.0.0 HIP_VISIBLE_DEVICES=0 ROCR_VISIBLE_DEVICES=0 fi cmake -B build -D TESTS=ON -D output=ON -D precision=${{ matrix.precision }} $FLAGS - name: Compile run: | - cmake --build build -j $(exec nproc) + cmake --build build - name: Run tests run: | - ctest --test-dir build --output-on-failure --verbose \ No newline at end of file + ctest --test-dir build --output-on-failure --verbose From 81a9663b77829870bae1073fbda0ba3515a76973 Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 04:44:45 -0400 Subject: [PATCH 38/53] amd runner --- .gitignore | 1 + dev/runners/Dockerfile.runner.rocm | 85 ++++++++++++++++++++++++++++++ dev/runners/README.md | 20 +++++-- 3 files changed, 102 insertions(+), 4 deletions(-) create mode 100644 dev/runners/Dockerfile.runner.rocm diff --git a/.gitignore b/.gitignore index 76f69e364..20bfe33a3 100644 --- a/.gitignore +++ b/.gitignore @@ -53,3 +53,4 @@ Testing/ .schema.json *_old/ +action-token diff --git a/dev/runners/Dockerfile.runner.rocm b/dev/runners/Dockerfile.runner.rocm new file mode 100644 index 000000000..97059c9b9 --- /dev/null +++ b/dev/runners/Dockerfile.runner.rocm @@ -0,0 +1,85 @@ +FROM rocm/rocm-terminal:latest + +USER root +ENV PATH=/opt/rocm/bin:$PATH + +ARG DEBIAN_FRONTEND=noninteractive +ENV DISPLAY=host.docker.internal:0.0 + +# upgrade +RUN apt-get update && apt-get upgrade -y + +# cmake & build tools +RUN apt-get remove -y --purge cmake && \ + apt-get install -y sudo wget curl build-essential && \ + wget "https://github.com/Kitware/CMake/releases/download/v3.29.6/cmake-3.29.6-linux-x86_64.tar.gz" -P /opt && \ + tar xvf /opt/cmake-3.29.6-linux-x86_64.tar.gz -C /opt && \ + rm /opt/cmake-3.29.6-linux-x86_64.tar.gz +ENV PATH=/opt/cmake-3.29.6-linux-x86_64/bin:$PATH + +# adios2 +RUN apt-get update && apt-get install -y git libhdf5-dev && \ + git clone https://github.com/ornladios/ADIOS2.git /opt/adios2-src && \ + cd /opt/adios2-src && \ + cmake -B build \ + -D CMAKE_CXX_STANDARD=17 \ + -D CMAKE_CXX_EXTENSIONS=OFF \ + -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ + -D BUILD_SHARED_LIBS=ON \ + -D ADIOS2_USE_HDF5=ON \ + -D ADIOS2_USE_Python=OFF \ + -D ADIOS2_USE_Fortran=OFF \ + -D ADIOS2_USE_ZeroMQ=OFF \ + -D BUILD_TESTING=OFF \ + -D ADIOS2_BUILD_EXAMPLES=OFF \ + -D ADIOS2_USE_MPI=OFF \ + -D ADIOS2_HAVE_HDF5_VOL=OFF \ + -D CMAKE_INSTALL_PREFIX=/opt/adios2 && \ + cmake --build build -j && \ + cmake --install build && \ + rm -rf /opt/adios2-src + +ENV HDF5_ROOT=/usr +ENV ADIOS2_DIR=/opt/adios2 +ENV PATH=/opt/adios2/bin:$PATH + +# additional ROCm packages +RUN git clone -b release/rocm-rel-6.1.1.1 https://github.com/ROCm/rocThrust.git /opt/rocthrust-src && \ + git clone -b release/rocm-rel-6.1.00.36 https://github.com/ROCm/rocPRIM.git /opt/rocprim-src && \ + cd /opt/rocthrust-src && ./install --install && \ + cd /opt/rocprim-src && ./install --install && \ + rm -rf /opt/rocthrust-src /opt/rocprim-src + +ENV CMAKE_PREFIX_PATH=/opt/rocm +ENV CC=hipcc +ENV CXX=hipcc + +# cleanup +RUN apt-get clean && \ + apt-get autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/cache/* && \ + rm -rf /var/lib/log/* && \ + rm -rf /var/lib/apt/lists/* + +ARG USER=runner +RUN useradd -ms /usr/bin/zsh $USER && \ + usermod -aG sudo $USER && \ + echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers + +USER $USER +ARG HOME=/home/$USER +WORKDIR $HOME + +# gh runner +RUN mkdir actions-runner +WORKDIR $HOME/actions-runner + +RUN --mount=type=secret,id=ghtoken \ + curl -o actions-runner-linux-x64-2.317.0.tar.gz \ + -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz && \ + tar xzf ./actions-runner-linux-x64-2.317.0.tar.gz && \ + sudo ./bin/installdependencies.sh && \ + ./config.sh --url https://github.com/entity-toolkit/entity --token "$(sudo cat /run/secrets/ghtoken)" --labels amd-gpu + +ENTRYPOINT ["./run.sh"] diff --git a/dev/runners/README.md b/dev/runners/README.md index 526068bea..2b9a5b4f6 100644 --- a/dev/runners/README.md +++ b/dev/runners/README.md @@ -4,12 +4,24 @@ GitHub allows to listen to repository changes and run the so-called "actions" (e To do that, one needs to create an image with the corresponding `Dockerfile`, and then launch a Docker container which will run in the background, listening to commands and running any actions forwarded from the GitHub. +First, you will need to obtain a runner token from the Entity GitHub repo, by going to Settings -> Actions -> Runners -> New self-hosted runner. Copy the token, and save it to a file: + +```sh +echo ******TOKEN****** > action-token +``` + +Images differ slightly from the type of runner. + ### NVIDIA GPUs ```sh -# 1. Create the image -docker build -t ghrunner:nvidia -f Dockerfile.runner.nvidia . -# 2. Run a container from the image with GPU support -# ... (see wiki for instructions on NVIDIA runtime) +docker build -t ghrunner:nvidia -f Dockerfile.runner.nvidia --secret id=ghtoken,src=./action-token . docker run --runtime=nvidia --gpus=all -dt ghrunner:nvidia ``` + +### AMD GPUs + +```sh +docker build -t ghrunner:amd -f Dockerfile.runner.rocm --secret id=ghtoken,src=./action-token . +docker run --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video -dt ghrunner:amd +``` From a86b33634f7a497d651fee89bf7579d18ed1a3ac Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 05:26:59 -0400 Subject: [PATCH 39/53] actions upd --- .github/workflows/actions.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index c639ca298..253e96ed1 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -27,12 +27,11 @@ jobs: FLAGS="-D Kokkos_ENABLE_CUDA=ON -D Kokkos_ARCH_AMPERE86=ON" elif [ "${{ matrix.device }}" = "amd-gpu" ]; then FLAGS="-D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_AMD_GFX1100=ON" - export HSA_OVERRIDE_GFX_VERSION=11.0.0 HIP_VISIBLE_DEVICES=0 ROCR_VISIBLE_DEVICES=0 fi cmake -B build -D TESTS=ON -D output=ON -D precision=${{ matrix.precision }} $FLAGS - name: Compile run: | - cmake --build build + cmake --build build -j $(nproc) - name: Run tests run: | ctest --test-dir build --output-on-failure --verbose From e478ccd9e4fff69b6f7b780d69ca4b963ebe2d79 Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 16:53:58 -0400 Subject: [PATCH 40/53] actions updated --- .github/workflows/actions.yml | 9 +++++++- ...e.runner.nvidia => Dockerfile.runner.cuda} | 6 +++--- dev/runners/Dockerfile.runner.rocm | 17 ++++++++------- dev/runners/README.md | 16 +++++--------- dev/runners/start.sh | 21 +++++++++++++++++++ 5 files changed, 47 insertions(+), 22 deletions(-) rename dev/runners/{Dockerfile.runner.nvidia => Dockerfile.runner.cuda} (93%) create mode 100644 dev/runners/start.sh diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 253e96ed1..68a0a7f4a 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -24,7 +24,14 @@ jobs: - name: Configure run: | if [ "${{ matrix.device }}" = "nvidia-gpu" ]; then - FLAGS="-D Kokkos_ENABLE_CUDA=ON -D Kokkos_ARCH_AMPERE86=ON" + FLAGS="-D Kokkos_ENABLE_CUDA=ON" + if [[ ! -z $(nvidia-smi | grep "V100" ]]; then + FLAGS+=" -D Kokkos_ARCH_VOLTA70=ON" + elif [[ ! -z $(nvidia-smi | grep "A100" ]]; then + FLAGS+=" -D Kokkos_ARCH_AMPERE80=ON" + else + FLAGS+=" -D Kokkos_ARCH_AMPERE86=ON" + fi elif [ "${{ matrix.device }}" = "amd-gpu" ]; then FLAGS="-D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_AMD_GFX1100=ON" fi diff --git a/dev/runners/Dockerfile.runner.nvidia b/dev/runners/Dockerfile.runner.cuda similarity index 93% rename from dev/runners/Dockerfile.runner.nvidia rename to dev/runners/Dockerfile.runner.cuda index d7925ec6a..4ff132990 100644 --- a/dev/runners/Dockerfile.runner.nvidia +++ b/dev/runners/Dockerfile.runner.cuda @@ -59,14 +59,14 @@ ARG HOME=/home/$USER WORKDIR $HOME # gh runner -ARG TOKEN RUN mkdir actions-runner WORKDIR $HOME/actions-runner -RUN curl -o actions-runner-linux-x64-2.317.0.tar.gz \ +RUN --mount=type=secret,id=ghtoken \ + curl -o actions-runner-linux-x64-2.317.0.tar.gz \ -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz && \ tar xzf ./actions-runner-linux-x64-2.317.0.tar.gz && \ sudo ./bin/installdependencies.sh && \ - ./config.sh --url https://github.com/entity-toolkit/entity --token $TOKEN --labels nvidia-gpu + ./config.sh --url https://github.com/entity-toolkit/entity --token "$(sudo cat /run/secrets/ghtoken)" --labels nvidia-gpu ENTRYPOINT ["./run.sh"] diff --git a/dev/runners/Dockerfile.runner.rocm b/dev/runners/Dockerfile.runner.rocm index 97059c9b9..63579dcd5 100644 --- a/dev/runners/Dockerfile.runner.rocm +++ b/dev/runners/Dockerfile.runner.rocm @@ -72,14 +72,17 @@ ARG HOME=/home/$USER WORKDIR $HOME # gh runner +ARG RUNNER_VERSION=2.317.0 RUN mkdir actions-runner WORKDIR $HOME/actions-runner -RUN --mount=type=secret,id=ghtoken \ - curl -o actions-runner-linux-x64-2.317.0.tar.gz \ - -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz && \ - tar xzf ./actions-runner-linux-x64-2.317.0.tar.gz && \ - sudo ./bin/installdependencies.sh && \ - ./config.sh --url https://github.com/entity-toolkit/entity --token "$(sudo cat /run/secrets/ghtoken)" --labels amd-gpu +RUN curl -o actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz \ + -L https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz && \ + tar xzf ./actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz && \ + sudo ./bin/installdependencies.sh -ENTRYPOINT ["./run.sh"] +ADD start.sh start.sh +RUN sudo chown $USER:$USER start.sh && \ + sudo chmod +x start.sh + +ENTRYPOINT ["./start.sh"] diff --git a/dev/runners/README.md b/dev/runners/README.md index 2b9a5b4f6..08d0cd176 100644 --- a/dev/runners/README.md +++ b/dev/runners/README.md @@ -4,24 +4,18 @@ GitHub allows to listen to repository changes and run the so-called "actions" (e To do that, one needs to create an image with the corresponding `Dockerfile`, and then launch a Docker container which will run in the background, listening to commands and running any actions forwarded from the GitHub. -First, you will need to obtain a runner token from the Entity GitHub repo, by going to Settings -> Actions -> Runners -> New self-hosted runner. Copy the token, and save it to a file: - -```sh -echo ******TOKEN****** > action-token -``` - -Images differ slightly from the type of runner. +First, you will need to obtain a runner token from the Entity GitHub repo, by going to Settings -> Actions -> Runners -> New self-hosted runner. Copy the token to use it later. The images differ slightly from the type of runner. ### NVIDIA GPUs ```sh -docker build -t ghrunner:nvidia -f Dockerfile.runner.nvidia --secret id=ghtoken,src=./action-token . -docker run --runtime=nvidia --gpus=all -dt ghrunner:nvidia +docker build -t ghrunner:nvidia -f Dockerfile.runner.cuda . +docker run -e TOKEN= -e LABEL=nvidia-gpu --runtime=nvidia --gpus=all -dt ghrunner:nvidia ``` ### AMD GPUs ```sh -docker build -t ghrunner:amd -f Dockerfile.runner.rocm --secret id=ghtoken,src=./action-token . -docker run --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video -dt ghrunner:amd +docker build -t ghrunner:amd -f Dockerfile.runner.rocm . +docker run -e TOKEN= -e LABEL=amd-gpu --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video -dt ghrunner:amd ``` diff --git a/dev/runners/start.sh b/dev/runners/start.sh new file mode 100644 index 000000000..c44e26f16 --- /dev/null +++ b/dev/runners/start.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +./config.sh --unattended --url https://github.com/entity-toolkit/entity --token ${TOKEN} --labels ${LABEL} + +if [[ ${LABEL} == "amd-gpu" ]]; then + echo "AMD GPU runner detected" + export HSA_OVERRIDE_GFX_VERSION=11.0.0 HIP_VISIBLE_DEVICES=0 ROCR_VISIBLE_DEVICES=0 +else + echo "Non-AMD runner" +fi + +cleanup() { + echo "Removing runner..." + ./config.sh remove --unattended --token ${TOKEN} +} + +trap 'cleanup; exit 130' INT +trap 'cleanup; exit 143' TERM + +./run.sh & +wait $! From 1b22bf3292f7ebde54fdd74af3bfd13359d61b2e Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 16:55:10 -0400 Subject: [PATCH 41/53] actions updated --- .github/workflows/actions.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 68a0a7f4a..04bc34050 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -25,9 +25,9 @@ jobs: run: | if [ "${{ matrix.device }}" = "nvidia-gpu" ]; then FLAGS="-D Kokkos_ENABLE_CUDA=ON" - if [[ ! -z $(nvidia-smi | grep "V100" ]]; then + if [[ ! -z $(nvidia-smi | grep "V100") ]]; then FLAGS+=" -D Kokkos_ARCH_VOLTA70=ON" - elif [[ ! -z $(nvidia-smi | grep "A100" ]]; then + elif [[ ! -z $(nvidia-smi | grep "A100") ]]; then FLAGS+=" -D Kokkos_ARCH_AMPERE80=ON" else FLAGS+=" -D Kokkos_ARCH_AMPERE86=ON" From 46817b84bc12a83f336480b242445c103dd33f7c Mon Sep 17 00:00:00 2001 From: hayk Date: Sun, 28 Jul 2024 17:06:30 -0400 Subject: [PATCH 42/53] mv shock to regular directory --- setups/{wip => srpic}/shock/pgen.hpp | 24 ------------------------ setups/srpic/shock/shock.py | 0 setups/{wip => srpic}/shock/shock.toml | 12 +++++++----- 3 files changed, 7 insertions(+), 29 deletions(-) rename setups/{wip => srpic}/shock/pgen.hpp (65%) create mode 100644 setups/srpic/shock/shock.py rename setups/{wip => srpic}/shock/shock.toml (84%) diff --git a/setups/wip/shock/pgen.hpp b/setups/srpic/shock/pgen.hpp similarity index 65% rename from setups/wip/shock/pgen.hpp rename to setups/srpic/shock/pgen.hpp index 4b1cd392e..f07b99878 100644 --- a/setups/wip/shock/pgen.hpp +++ b/setups/srpic/shock/pgen.hpp @@ -14,23 +14,6 @@ namespace user { using namespace ntt; - template - struct DriftDist : public arch::EnergyDistribution { - DriftDist(const M& metric, real_t ux) - : arch::EnergyDistribution { metric } - , ux { ux } {} - - Inline void operator()(const coord_t&, - vec_t& v, - unsigned short) const override { - v[0] = -ux; - v[1] = 0.1 * ux; - } - - private: - const real_t ux; - }; - template struct PGen : public arch::ProblemGenerator { // compatibility traits for the problem generator @@ -68,13 +51,6 @@ namespace user { local_domain, injector, 1.0); - // const auto energy_dist = DriftDist(local_domain.mesh.metric, drift_ux); - // const auto injector = arch::UniformInjector(energy_dist, - // { 1, 2 }); - // arch::InjectUniform>(params, - // local_domain, - // injector, - // 1.0); } }; diff --git a/setups/srpic/shock/shock.py b/setups/srpic/shock/shock.py new file mode 100644 index 000000000..e69de29bb diff --git a/setups/wip/shock/shock.toml b/setups/srpic/shock/shock.toml similarity index 84% rename from setups/wip/shock/shock.toml rename to setups/srpic/shock/shock.toml index c67ee181a..150face38 100644 --- a/setups/wip/shock/shock.toml +++ b/setups/srpic/shock/shock.toml @@ -16,7 +16,7 @@ [scales] larmor0 = 1e-2 - skindepth0 = 1e-3 + skindepth0 = 1e-2 [algorithms] current_filters = 4 @@ -40,10 +40,12 @@ maxnpart = 1e7 [setup] - drift_ux = -0.1 - temperature = 0.01 + drift_ux = 0.1 + temperature = 1e-4 [output] - fields = ["N_1", "N_2", "E", "B", "J"] - format = "hdf5" interval_time = 0.1 + format = "hdf5" + + [output.fields] + quantities = ["N_1", "N_2", "E", "B", "J"] From 82aeccd184cf57962bb16c5eafc075719906894e Mon Sep 17 00:00:00 2001 From: hayk Date: Sun, 28 Jul 2024 17:10:06 -0400 Subject: [PATCH 43/53] shock upd --- setups/srpic/shock/shock.py | 75 +++++++++++++++++++++++++++++++++++ setups/srpic/shock/shock.toml | 26 ++++++------ 2 files changed, 88 insertions(+), 13 deletions(-) diff --git a/setups/srpic/shock/shock.py b/setups/srpic/shock/shock.py index e69de29bb..64224c728 100644 --- a/setups/srpic/shock/shock.py +++ b/setups/srpic/shock/shock.py @@ -0,0 +1,75 @@ +import nt2.read as nt2r +import matplotlib.pyplot as plt +import matplotlib as mpl + +data = nt2r.Data("shock-03.h5") + + +def frame(ti, f): + quantities = [ + { + "name": "density", + "compute": lambda f: f.N_2 + f.N_1, + "cmap": "inferno", + "norm": mpl.colors.Normalize(0, 5), + }, + { + "name": r"$E_x$", + "compute": lambda f: f.Ex, + "cmap": "RdBu_r", + "norm": mpl.colors.Normalize(-0.05, 0.05), + }, + { + "name": r"$E_y$", + "compute": lambda f: f.Ey, + "cmap": "RdBu_r", + "norm": mpl.colors.Normalize(-0.05, 0.05), + }, + { + "name": r"$E_z$", + "compute": lambda f: f.Ez, + "cmap": "RdBu_r", + "norm": mpl.colors.Normalize(-0.05, 0.05), + }, + { + "name": r"$B_x$", + "compute": lambda f: f.Bx, + "cmap": "BrBG", + "norm": mpl.colors.Normalize(-0.05, 0.05), + }, + { + "name": r"$B_y$", + "compute": lambda f: f.By, + "cmap": "BrBG", + "norm": mpl.colors.Normalize(-0.05, 0.05), + }, + { + "name": r"$B_z$", + "compute": lambda f: f.Bz, + "cmap": "BrBG", + "norm": mpl.colors.Normalize(-0.05, 0.05), + }, + ] + fig = plt.figure(figsize=(12, 5.5), dpi=300) + gs = fig.add_gridspec(len(quantities), 1, hspace=0.02) + axs = [fig.add_subplot(gs[i]) for i in range(len(quantities))] + + for ax, q in zip(axs, quantities): + q["compute"](f).coarsen(x=2, y=2).mean().plot( + ax=ax, + cmap=q["cmap"], + norm=q["norm"], + cbar_kwargs={"label": q["name"], "shrink": 0.8, "aspect": 10, "pad": 0.005}, + ) + for i, ax in enumerate(axs): + ax.set(aspect=1) + if i != 0: + ax.set(title=None) + if i != len(axs) - 1: + ax.set( + xticks=[], + xticklabels=[], + xlabel=None, + title=ax.get_title().split(",")[0], + ) + return fig diff --git a/setups/srpic/shock/shock.toml b/setups/srpic/shock/shock.toml index 150face38..f48edb2d6 100644 --- a/setups/srpic/shock/shock.toml +++ b/setups/srpic/shock/shock.toml @@ -1,7 +1,7 @@ [simulation] name = "shock" engine = "srpic" - runtime = 100.0 + runtime = 50.0 [grid] resolution = [2048, 128] @@ -19,33 +19,33 @@ skindepth0 = 1e-2 [algorithms] - current_filters = 4 + current_filters = 8 [algorithms.timestep] CFL = 0.5 [particles] - ppc0 = 4.0 + ppc0 = 16.0 [[particles.species]] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e7 + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e8 [[particles.species]] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 1e7 + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e8 [setup] drift_ux = 0.1 - temperature = 1e-4 + temperature = 1e-3 [output] interval_time = 0.1 format = "hdf5" [output.fields] - quantities = ["N_1", "N_2", "E", "B", "J"] + quantities = ["N_1", "N_2", "E", "B", "T0i_1", "T0i_2", "J"] From 8d97b6b0e56d606b8f00b21f56a3736190946599 Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 18:48:32 -0400 Subject: [PATCH 44/53] passing domain to customout --- dev/welcome.rocm | 11 ++++++----- setups/tests/customout/customout.toml | 2 +- setups/tests/customout/pgen.hpp | 15 ++++++++++++++- src/engines/engine_run.cpp | 12 +++++++----- src/framework/domain/metadomain.h | 2 +- src/framework/domain/output.cpp | 5 +++-- 6 files changed, 32 insertions(+), 15 deletions(-) diff --git a/dev/welcome.rocm b/dev/welcome.rocm index 35e5e15d9..780375684 100644 --- a/dev/welcome.rocm +++ b/dev/welcome.rocm @@ -68,7 +68,8 @@ else python_version=${error_msg} fi -str=$(cat <${Reset} ## Testing the code * To build the tests, remove the \`build\` directory, and preconfigure with: - ${FgGreen}$ cmake -B build -D TESTS=ON -D output=ON -D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_***=ON${Reset} + ${FgGreen}$ cmake -B build -D TESTS=ON -D output=ON -D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_AMD_***=ON${Reset} * Then run the tests with: ${FgGreen}$ ctest --test-dir build${Reset} @@ -117,5 +119,4 @@ ${Red}Enjoy!${Reset} 🚀 EOF ) - printf "$str\n" diff --git a/setups/tests/customout/customout.toml b/setups/tests/customout/customout.toml index e9c7671d6..497b96dc2 100644 --- a/setups/tests/customout/customout.toml +++ b/setups/tests/customout/customout.toml @@ -47,4 +47,4 @@ [output.fields] quantities = ["E", "B", "J"] - custom = ["mybuff"] + custom = ["mybuff", "EdotB+1"] diff --git a/setups/tests/customout/pgen.hpp b/setups/tests/customout/pgen.hpp index 5d1ff2a6a..22c8f6564 100644 --- a/setups/tests/customout/pgen.hpp +++ b/setups/tests/customout/pgen.hpp @@ -56,12 +56,25 @@ namespace user { void CustomFieldOutput(const std::string& name, ndfield_t buffer, std::size_t index, - const range_t) { + const Domain& domain) { printf("CustomFieldOutput: %s\n", name.c_str()); + // examples for 2D if (name == "mybuff") { // copy the custom buffer to the buffer output Kokkos::deep_copy(Kokkos::subview(buffer, Kokkos::ALL, Kokkos::ALL, index), cbuff); + } else if (name == "EdotB+1") { + // calculate the custom buffer from EM fields + const auto& EM = domain.fields.em; + Kokkos::parallel_for( + "EdotB+1", + domain.mesh.rangeActiveCells(), + Lambda(index_t i1, index_t i2) { + buffer(i1, i2, index) = EM(i1, i2, em::ex1) * EM(i1, i2, em::bx1) + + EM(i1, i2, em::ex2) * EM(i1, i2, em::bx2) + + EM(i1, i2, em::ex3) * EM(i1, i2, em::bx3) + + ONE; + }); } else { raise::Error("Custom output not provided", HERE); } diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index 1694db15c..4485e0e40 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -9,6 +9,8 @@ #include "metrics/qspherical.h" #include "metrics/spherical.h" +#include "framework/domain/domain.h" + #include "engines/engine.hpp" namespace ntt { @@ -63,11 +65,11 @@ namespace ntt { timers.start("Output"); if constexpr ( traits::has_method::value) { - auto lambda_custom_field_output = [&](const std::string& name, - ndfield_t& buff, - std::size_t idx, - const range_t& range) { - m_pgen.CustomFieldOutput(name, buff, idx, range); + auto lambda_custom_field_output = [&](const std::string& name, + ndfield_t& buff, + std::size_t idx, + const Domain& dom) { + m_pgen.CustomFieldOutput(name, buff, idx, dom); }; print_output = m_metadomain.Write(m_params, step, diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 375c476c0..fb81fcfca 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -119,7 +119,7 @@ namespace ntt { std::function&, std::size_t, - const range_t&)> = {}) -> bool; + const Domain&)> = {}) -> bool; #endif Metadomain(const Metadomain&) = delete; diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 272cf1134..cabb37093 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -14,6 +14,7 @@ #include "metrics/spherical.h" #include "framework/containers/particles.h" +#include "framework/domain/domain.h" #include "framework/domain/metadomain.h" #include "framework/parameters.h" @@ -168,7 +169,7 @@ namespace ntt { std::size_t step, long double time, std::function< - void(const std::string&, ndfield_t&, std::size_t, const range_t&)> + void(const std::string&, ndfield_t&, std::size_t, const Domain&)> CustomFieldOutput) -> bool { raise::ErrorIf( local_subdomain_indices().size() != 1, @@ -297,7 +298,7 @@ namespace ntt { CustomFieldOutput(fld.name().substr(1), local_domain->fields.bckp, addresses.back(), - local_domain->mesh.rangeActiveCells()); + *local_domain); } else { raise::Error("Custom output requested but no function provided", HERE); From 3cc100605c98d9ce02630af6d98ca31066a3e0bc Mon Sep 17 00:00:00 2001 From: hayk Date: Sat, 6 Jul 2024 21:17:22 -0400 Subject: [PATCH 45/53] pgen-specific field output --- input.example.toml | 4 ++++ src/engines/engine_run.cpp | 15 +++++++++++- src/framework/domain/metadomain.h | 8 ++++++- src/framework/domain/output.cpp | 40 +++++++++++++++++++++++-------- src/framework/parameters.cpp | 8 ++++++- src/global/arch/traits.h | 4 ++++ src/global/enums.h | 15 ++++++------ src/global/tests/enums.cpp | 5 ++-- src/output/fields.cpp | 10 +++++--- src/output/fields.h | 5 ++++ 10 files changed, 89 insertions(+), 25 deletions(-) diff --git a/input.example.toml b/input.example.toml index 2423ffff0..88589495c 100644 --- a/input.example.toml +++ b/input.example.toml @@ -336,6 +336,10 @@ # @note: For T, in cartesian can also use "x" "y" "z" instead of "1" "2" "3" # @note: By default, we accumulate moments from all massive species, one can specify only specific species: e.g., Ttt_1_2, Rho_1, Rho_3_4 quantities = "" + # Custom (user-defined) field quantities: + # @type: array of strings + # @default: [] + custom = "" # @NOT_IMPLEMENTED: Stride for the output of fields: # @type: unsigned short: > 1 # @default: 1 diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index f69b4301e..771287c0d 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -61,7 +61,20 @@ namespace ntt { auto print_output = false; #if defined(OUTPUT_ENABLED) timers.start("Output"); - print_output = m_metadomain.Write(m_params, step, time); + if constexpr ( + traits::has_method::value) { + auto lambda_custom_field_output = [&m_pgen](const std::string& name, + ndfield_t& buff, + std::size_t idx) { + m_pgen.CustomFieldOutput(name, buff, idx); + }; + print_output = m_metadomain.Write(m_params, + step, + time, + lambda_custom_field_output); + } else { + print_output = m_metadomain.Write(m_params, step, time); + } timers.stop("Output"); #endif diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 74e5da2ce..a44fb3c09 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -33,6 +33,7 @@ #include "output/writer.h" #endif +#include #include #include #include @@ -111,7 +112,12 @@ namespace ntt { #if defined(OUTPUT_ENABLED) void InitWriter(const SimulationParams&); - auto Write(const SimulationParams&, std::size_t, long double) -> bool; + auto Write( + const SimulationParams&, + std::size_t, + long double, + std::function&, std::size_t)> = {}) + -> bool; #endif Metadomain(const Metadomain&) = delete; diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 77c41ee25..243ceb614 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -25,6 +25,8 @@ #include #include +#include +#include #include namespace ntt { @@ -61,9 +63,17 @@ namespace ntt { M::CoordType); const auto fields_to_write = params.template get>( "output.fields.quantities"); + const auto custom_fields_to_write = params.template get>( + "output.fields.custom"); + const std::vector all_fields_to_write; + std::merge(fields_to_write.begin(), + fields_to_write.end(), + custom_fields_to_write.begin(), + custom_fields_to_write.end(), + std::back_inserter(all_fields_to_write)); const auto species_to_write = params.template get>( "output.particles.species"); - g_writer.defineFieldOutputs(S, fields_to_write); + g_writer.defineFieldOutputs(S, all_fields_to_write); g_writer.defineParticleOutputs(M::PrtlDim, species_to_write); // spectra write all particle species std::vector spectra_species {}; @@ -153,9 +163,12 @@ namespace ntt { } template - auto Metadomain::Write(const SimulationParams& params, - std::size_t step, - long double time) -> bool { + auto Metadomain::Write( + const SimulationParams& params, + std::size_t step, + long double time, + std::function&, std::size_t)> CustomOutput) + -> bool { raise::ErrorIf( local_subdomain_indices().size() != 1, "Output for now is only supported for one subdomain per rank", @@ -278,14 +291,21 @@ namespace ntt { } else { raise::Error("Wrong moment requested for output", HERE); } - SynchronizeFields(*local_domain, - Comm::Bckp, - { addresses.back(), addresses.back() + 1 }); + } else if (fld.is_custom()) { + if (CustomOutput) { + CustomOutput(fld.name(), local_domain->fields.bckp, addresses.back()); + } else { + raise::Error("Custom output requested but no function provided", + HERE); + } } else { - raise::Error( - "Wrong # of components requested for non-moment output", - HERE); + raise::Error("Wrong # of components requested for " + "non-moment/non-custom output", + HERE); } + SynchronizeFields(*local_domain, + Comm::Bckp, + { addresses.back(), addresses.back() + 1 }); } else if (fld.comp.size() == 3) { // vector for (auto i = 0; i < 3; ++i) { names.push_back(fld.name(i)); diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 19f9a4683..6e581a6b7 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -383,15 +383,21 @@ namespace ntt { promiseToDefine("output.spectra.interval_time"); promiseToDefine("output.spectra.enable"); - const auto flds_out = toml::find_or(raw_data, + const auto flds_out = toml::find_or(raw_data, "output", "fields", "quantities", std::vector {}); + const auto custom_flds_out = toml::find_or(raw_data, + "output", + "fields", + "custom", + std::vector {}); if (flds_out.size() == 0) { raise::Warning("No fields output specified", HERE); } set("output.fields.quantities", flds_out); + set("output.fields.custom", custom_flds_out); set("output.fields.mom_smooth", toml::find_or(raw_data, "output", diff --git a/src/global/arch/traits.h b/src/global/arch/traits.h index ea93a8571..e915bdf1a 100644 --- a/src/global/arch/traits.h +++ b/src/global/arch/traits.h @@ -13,6 +13,7 @@ * - traits::pgen::field_driver_t * - traits::pgen::init_prtls_t * - traits::pgen::custom_fields_t + * - traits::pgen::custom_field_output_t * - traits::pgen::custom_poststep_t * - traits::check_compatibility<> * - traits::compatibility<> @@ -100,6 +101,9 @@ namespace traits { template using custom_poststep_t = decltype(&T::CustomPostStep); + + template + using custom_field_output_t = decltype(&T::CustomFieldOutput); } // namespace pgen // for pgen extforce diff --git a/src/global/enums.h b/src/global/enums.h index 2dc492dac..57822dec4 100644 --- a/src/global/enums.h +++ b/src/global/enums.h @@ -13,7 +13,7 @@ * - enum ntt::PrtlPusher // boris, vay, photon, none * - enum ntt::Cooling // synchrotron, none * - enum ntt::FldsID // e, dive, d, divd, b, h, j, - * a, t, rho, charge, n, nppc + * a, t, rho, charge, n, nppc, custom * @namespaces: * - ntt:: * @note Enums of the same type can be compared with each other and with strings @@ -288,16 +288,17 @@ namespace ntt { Charge = 11, N = 12, Nppc = 13, + Custom = 14, }; constexpr FldsID(uint8_t c) : enums_hidden::BaseEnum { c } {} - static constexpr type variants[] = { E, divE, D, divD, B, H, J, - A, T, Rho, Charge, N, Nppc }; - static constexpr const char* lookup[] = { "e", "dive", "d", "divd", - "b", "h", "j", "a", - "t", "rho", "charge", "n", - "nppc" }; + static constexpr type variants[] = { E, divE, D, divD, B, H, J, + A, T, Rho, Charge, N, Nppc, Custom }; + static constexpr const char* lookup[] = { "e", "dive", "d", "divd", + "b", "h", "j", "a", + "t", "rho", "charge", "n", + "nppc", "custom" }; static constexpr std::size_t total = sizeof(variants) / sizeof(variants[0]); }; diff --git a/src/global/tests/enums.cpp b/src/global/tests/enums.cpp index 81e63d57a..1fc57398f 100644 --- a/src/global/tests/enums.cpp +++ b/src/global/tests/enums.cpp @@ -66,8 +66,9 @@ auto main() -> int { enum_str_t all_particle_pushers = { "boris", "vay", "photon", "none" }; enum_str_t all_coolings = { "synchrotron", "none" }; - enum_str_t all_out_flds = { "e", "dive", "d", "divd", "b", "h", "j", - "a", "t", "rho", "charge", "n", "nppc" }; + enum_str_t all_out_flds = { "e", "dive", "d", "divd", "b", + "h", "j", "a", "t", "rho", + "charge", "n", "nppc", "custom" }; checkEnum(all_coords); checkEnum(all_metrics); diff --git a/src/output/fields.cpp b/src/output/fields.cpp index e6a1f4e12..5f9b1bd47 100644 --- a/src/output/fields.cpp +++ b/src/output/fields.cpp @@ -24,7 +24,11 @@ namespace out { const auto pos = name.find("_"); auto name_raw = (pos == std::string::npos) ? name : name.substr(0, pos); name_raw = name_raw.substr(0, name_raw.find_first_of("0123ijxyzt")); - m_id = FldsID::pick(fmt::toLower(name_raw).c_str()); + if (FldsID::contains(fmt::toLower(name_raw).c_str())) { + m_id = FldsID::pick(fmt::toLower(name_raw).c_str()); + } else { + m_id = FldsID::Custom; + } // determine the species and components to output if (is_moment()) { species = InterpretSpecies(name); @@ -41,11 +45,11 @@ namespace out { // energy-momentum tensor comp = InterpretComponents({ name.substr(1, 1), name.substr(2, 1) }); } else { - // scalar (Rho, divE, etc.) + // scalar (Rho, divE, Custom, etc.) comp = {}; } // data preparation flags - if (not is_moment()) { + if (not is_moment() && not is_custom()) { if (S == SimEngine::SRPIC) { prepare_flag = PrepareOutput::ConvertToHat; } else { diff --git a/src/output/fields.h b/src/output/fields.h index d1399e981..dcc2deb0a 100644 --- a/src/output/fields.h +++ b/src/output/fields.h @@ -77,6 +77,11 @@ namespace out { return (id() == FldsID::A); } + [[nodiscard]] + auto is_custom() const -> bool { + return (id() == FldsID::Custom); + } + [[nodiscard]] inline auto name() const -> std::string { // generate the name From d343cf5b023505a09561c9a8312f826e4e45b7dd Mon Sep 17 00:00:00 2001 From: hayk Date: Sat, 6 Jul 2024 21:33:37 -0400 Subject: [PATCH 46/53] (minor) rm const --- src/framework/domain/output.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 243ceb614..290580a85 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -65,7 +65,7 @@ namespace ntt { "output.fields.quantities"); const auto custom_fields_to_write = params.template get>( "output.fields.custom"); - const std::vector all_fields_to_write; + std::vector all_fields_to_write; std::merge(fields_to_write.begin(), fields_to_write.end(), custom_fields_to_write.begin(), From 9f38e3a262041b5126c1e90a06a11c53133af400 Mon Sep 17 00:00:00 2001 From: hayk Date: Sat, 6 Jul 2024 21:38:58 -0400 Subject: [PATCH 47/53] (minor) naming --- src/engines/engine_run.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index 771287c0d..0809b0968 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -62,7 +62,7 @@ namespace ntt { #if defined(OUTPUT_ENABLED) timers.start("Output"); if constexpr ( - traits::has_method::value) { + traits::has_method::value) { auto lambda_custom_field_output = [&m_pgen](const std::string& name, ndfield_t& buff, std::size_t idx) { From 140420257a86715f3aa7fa52821f1b060b6d1212 Mon Sep 17 00:00:00 2001 From: hayk Date: Sat, 6 Jul 2024 21:41:16 -0400 Subject: [PATCH 48/53] (minor) address in lambda --- src/engines/engine_run.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index 0809b0968..1734dddc5 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -63,9 +63,9 @@ namespace ntt { timers.start("Output"); if constexpr ( traits::has_method::value) { - auto lambda_custom_field_output = [&m_pgen](const std::string& name, - ndfield_t& buff, - std::size_t idx) { + auto lambda_custom_field_output = [&](const std::string& name, + ndfield_t& buff, + std::size_t idx) { m_pgen.CustomFieldOutput(name, buff, idx); }; print_output = m_metadomain.Write(m_params, From 4e8a9cd3fb5d3792d13fe6890a941fff7e3ebe6e Mon Sep 17 00:00:00 2001 From: hayk Date: Sat, 6 Jul 2024 23:02:02 -0400 Subject: [PATCH 49/53] added range as arg --- src/engines/engine_run.cpp | 9 +++++---- src/framework/domain/metadomain.h | 14 ++++++++------ src/framework/domain/output.cpp | 12 ++++++++---- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index 1734dddc5..1694db15c 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -63,10 +63,11 @@ namespace ntt { timers.start("Output"); if constexpr ( traits::has_method::value) { - auto lambda_custom_field_output = [&](const std::string& name, - ndfield_t& buff, - std::size_t idx) { - m_pgen.CustomFieldOutput(name, buff, idx); + auto lambda_custom_field_output = [&](const std::string& name, + ndfield_t& buff, + std::size_t idx, + const range_t& range) { + m_pgen.CustomFieldOutput(name, buff, idx, range); }; print_output = m_metadomain.Write(m_params, step, diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index a44fb3c09..375c476c0 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -18,6 +18,7 @@ #include "enums.h" #include "global.h" +#include "arch/kokkos_aliases.h" #include "utils/timer.h" #include "framework/containers/species.h" @@ -112,12 +113,13 @@ namespace ntt { #if defined(OUTPUT_ENABLED) void InitWriter(const SimulationParams&); - auto Write( - const SimulationParams&, - std::size_t, - long double, - std::function&, std::size_t)> = {}) - -> bool; + auto Write(const SimulationParams&, + std::size_t, + long double, + std::function&, + std::size_t, + const range_t&)> = {}) -> bool; #endif Metadomain(const Metadomain&) = delete; diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 290580a85..2f6d75cb2 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -167,8 +167,9 @@ namespace ntt { const SimulationParams& params, std::size_t step, long double time, - std::function&, std::size_t)> CustomOutput) - -> bool { + std::function< + void(const std::string&, ndfield_t&, std::size_t, const range_t&)> + CustomFieldOutput) -> bool { raise::ErrorIf( local_subdomain_indices().size() != 1, "Output for now is only supported for one subdomain per rank", @@ -292,8 +293,11 @@ namespace ntt { raise::Error("Wrong moment requested for output", HERE); } } else if (fld.is_custom()) { - if (CustomOutput) { - CustomOutput(fld.name(), local_domain->fields.bckp, addresses.back()); + if (CustomFieldOutput) { + CustomFieldOutput(fld.name(), + local_domain->fields.bckp, + addresses.back(), + local_domain->mesh.rangeActiveCells()); } else { raise::Error("Custom output requested but no function provided", HERE); From ae3683c413f34c0ef5928fd119b75d925ad8c4b4 Mon Sep 17 00:00:00 2001 From: hayk Date: Sun, 7 Jul 2024 04:18:56 -0400 Subject: [PATCH 50/53] example added + simple bugfixes --- setups/tests/customout/customout.toml | 50 ++++++++++++++++++ setups/tests/customout/pgen.hpp | 73 +++++++++++++++++++++++++++ src/framework/domain/output.cpp | 2 +- src/output/fields.cpp | 3 +- src/output/fields.h | 35 +++++++------ 5 files changed, 146 insertions(+), 17 deletions(-) create mode 100644 setups/tests/customout/customout.toml create mode 100644 setups/tests/customout/pgen.hpp diff --git a/setups/tests/customout/customout.toml b/setups/tests/customout/customout.toml new file mode 100644 index 000000000..e9c7671d6 --- /dev/null +++ b/setups/tests/customout/customout.toml @@ -0,0 +1,50 @@ +[simulation] + name = "customout" + engine = "srpic" + runtime = 10.0 + +[grid] + resolution = [256, 256] + extent = [[-1.0, 1.0], [-1.0, 1.0]] + + [grid.metric] + metric = "minkowski" + + [grid.boundaries] + fields = [["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"]] + +[scales] + larmor0 = 0.01 + skindepth0 = 0.01 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 20.0 + + [[particles.species]] + label = "e-" + mass = 1.0 + charge = -1.0 + maxnpart = 1e7 + pusher = "Boris" + + [[particles.species]] + label = "e+" + mass = 1.0 + charge = 1.0 + maxnpart = 1e7 + pusher = "Boris" + +[output] + format = "hdf5" + interval_time = 0.02 + + [output.fields] + quantities = ["E", "B", "J"] + custom = ["mybuff"] diff --git a/setups/tests/customout/pgen.hpp b/setups/tests/customout/pgen.hpp new file mode 100644 index 000000000..5d1ff2a6a --- /dev/null +++ b/setups/tests/customout/pgen.hpp @@ -0,0 +1,73 @@ +#ifndef PROBLEM_GENERATOR_H +#define PROBLEM_GENERATOR_H + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "arch/traits.h" + +#include "archetypes/problem_generator.h" +#include "framework/domain/metadomain.h" + +namespace user { + using namespace ntt; + + template + struct PGen : public arch::ProblemGenerator { + // compatibility traits for the problem generator + static constexpr auto engines { traits::compatible_with::value }; + static constexpr auto metrics { traits::compatible_with::value }; + static constexpr auto dimensions { traits::compatible_with::value }; + + // for easy access to variables in the child class + using arch::ProblemGenerator::D; + using arch::ProblemGenerator::C; + using arch::ProblemGenerator::params; + + array_t cbuff; + + inline PGen(const SimulationParams& p, const Metadomain&) + : arch::ProblemGenerator(p) {} + + inline PGen() {} + + void CustomPostStep(std::size_t step, long double, Domain& domain) { + if (step == 0) { + // allocate the array at time = 0 + cbuff = array_t("cbuff", + domain.mesh.n_all(in::x1), + domain.mesh.n_all(in::x2)); + } + // fill with zeros + Kokkos::deep_copy(cbuff, ZERO); + // populate the array atomically (here it's not strictly necessary) + auto cbuff_sc = Kokkos::Experimental::create_scatter_view(cbuff); + Kokkos::parallel_for( + "FillCbuff", + domain.mesh.rangeActiveCells(), + Lambda(index_t i1, index_t i2) { + auto cbuff_acc = cbuff_sc.access(); + cbuff_acc(i1, i2) += static_cast(i1 + i2); + }); + Kokkos::Experimental::contribute(cbuff, cbuff_sc); + } + + void CustomFieldOutput(const std::string& name, + ndfield_t buffer, + std::size_t index, + const range_t) { + printf("CustomFieldOutput: %s\n", name.c_str()); + if (name == "mybuff") { + // copy the custom buffer to the buffer output + Kokkos::deep_copy(Kokkos::subview(buffer, Kokkos::ALL, Kokkos::ALL, index), + cbuff); + } else { + raise::Error("Custom output not provided", HERE); + } + } + }; + +} // namespace user + +#endif diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 2f6d75cb2..272cf1134 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -294,7 +294,7 @@ namespace ntt { } } else if (fld.is_custom()) { if (CustomFieldOutput) { - CustomFieldOutput(fld.name(), + CustomFieldOutput(fld.name().substr(1), local_domain->fields.bckp, addresses.back(), local_domain->mesh.rangeActiveCells()); diff --git a/src/output/fields.cpp b/src/output/fields.cpp index 5f9b1bd47..aa5a752d4 100644 --- a/src/output/fields.cpp +++ b/src/output/fields.cpp @@ -62,7 +62,8 @@ namespace out { interp_flag = PrepareOutput::InterpToCellCenterFromEdges; } else if (is_field() || is_gr_aux_field()) { interp_flag = PrepareOutput::InterpToCellCenterFromFaces; - } else if (not(is_moment() || is_vpotential() || is_divergence())) { + } else if ( + not(is_moment() || is_vpotential() || is_divergence() || is_custom())) { raise::Error("Unrecognized field type for output", HERE); } } diff --git a/src/output/fields.h b/src/output/fields.h index dcc2deb0a..a520a246d 100644 --- a/src/output/fields.h +++ b/src/output/fields.h @@ -85,24 +85,29 @@ namespace out { [[nodiscard]] inline auto name() const -> std::string { // generate the name - auto tmp = std::string(id().to_string()); - if (id() == FldsID::T) { - tmp += m_name.substr(1, 2); - } else if (id() == FldsID::A) { - tmp += "3"; - } else if (is_field()) { - tmp += "i"; - } - if (species.size() > 0) { - tmp += "_"; - for (auto& s : species) { - tmp += std::to_string(s); + std::string tmp; + if (id() == FldsID::Custom) { + tmp = m_name; + } else { + tmp = std::string(id().to_string()); + if (id() == FldsID::T) { + tmp += m_name.substr(1, 2); + } else if (id() == FldsID::A) { + tmp += "3"; + } else if (is_field()) { + tmp += "i"; + } + if (species.size() > 0) { tmp += "_"; + for (auto& s : species) { + tmp += std::to_string(s); + tmp += "_"; + } + tmp.pop_back(); } - tmp.pop_back(); + // capitalize the first letter + tmp[0] = std::toupper(tmp[0]); } - // capitalize the first letter - tmp[0] = std::toupper(tmp[0]); return "f" + tmp; } From 2a1cdd0f6c118e9ad72513ee1ca0abdaf4569215 Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 18:48:32 -0400 Subject: [PATCH 51/53] passing domain to customout --- dev/welcome.rocm | 11 ++++++----- setups/tests/customout/customout.toml | 2 +- setups/tests/customout/pgen.hpp | 15 ++++++++++++++- src/engines/engine_run.cpp | 12 +++++++----- src/framework/domain/metadomain.h | 2 +- src/framework/domain/output.cpp | 5 +++-- 6 files changed, 32 insertions(+), 15 deletions(-) diff --git a/dev/welcome.rocm b/dev/welcome.rocm index 35e5e15d9..780375684 100644 --- a/dev/welcome.rocm +++ b/dev/welcome.rocm @@ -68,7 +68,8 @@ else python_version=${error_msg} fi -str=$(cat <${Reset} ## Testing the code * To build the tests, remove the \`build\` directory, and preconfigure with: - ${FgGreen}$ cmake -B build -D TESTS=ON -D output=ON -D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_***=ON${Reset} + ${FgGreen}$ cmake -B build -D TESTS=ON -D output=ON -D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_AMD_***=ON${Reset} * Then run the tests with: ${FgGreen}$ ctest --test-dir build${Reset} @@ -117,5 +119,4 @@ ${Red}Enjoy!${Reset} 🚀 EOF ) - printf "$str\n" diff --git a/setups/tests/customout/customout.toml b/setups/tests/customout/customout.toml index e9c7671d6..497b96dc2 100644 --- a/setups/tests/customout/customout.toml +++ b/setups/tests/customout/customout.toml @@ -47,4 +47,4 @@ [output.fields] quantities = ["E", "B", "J"] - custom = ["mybuff"] + custom = ["mybuff", "EdotB+1"] diff --git a/setups/tests/customout/pgen.hpp b/setups/tests/customout/pgen.hpp index 5d1ff2a6a..22c8f6564 100644 --- a/setups/tests/customout/pgen.hpp +++ b/setups/tests/customout/pgen.hpp @@ -56,12 +56,25 @@ namespace user { void CustomFieldOutput(const std::string& name, ndfield_t buffer, std::size_t index, - const range_t) { + const Domain& domain) { printf("CustomFieldOutput: %s\n", name.c_str()); + // examples for 2D if (name == "mybuff") { // copy the custom buffer to the buffer output Kokkos::deep_copy(Kokkos::subview(buffer, Kokkos::ALL, Kokkos::ALL, index), cbuff); + } else if (name == "EdotB+1") { + // calculate the custom buffer from EM fields + const auto& EM = domain.fields.em; + Kokkos::parallel_for( + "EdotB+1", + domain.mesh.rangeActiveCells(), + Lambda(index_t i1, index_t i2) { + buffer(i1, i2, index) = EM(i1, i2, em::ex1) * EM(i1, i2, em::bx1) + + EM(i1, i2, em::ex2) * EM(i1, i2, em::bx2) + + EM(i1, i2, em::ex3) * EM(i1, i2, em::bx3) + + ONE; + }); } else { raise::Error("Custom output not provided", HERE); } diff --git a/src/engines/engine_run.cpp b/src/engines/engine_run.cpp index 1694db15c..4485e0e40 100644 --- a/src/engines/engine_run.cpp +++ b/src/engines/engine_run.cpp @@ -9,6 +9,8 @@ #include "metrics/qspherical.h" #include "metrics/spherical.h" +#include "framework/domain/domain.h" + #include "engines/engine.hpp" namespace ntt { @@ -63,11 +65,11 @@ namespace ntt { timers.start("Output"); if constexpr ( traits::has_method::value) { - auto lambda_custom_field_output = [&](const std::string& name, - ndfield_t& buff, - std::size_t idx, - const range_t& range) { - m_pgen.CustomFieldOutput(name, buff, idx, range); + auto lambda_custom_field_output = [&](const std::string& name, + ndfield_t& buff, + std::size_t idx, + const Domain& dom) { + m_pgen.CustomFieldOutput(name, buff, idx, dom); }; print_output = m_metadomain.Write(m_params, step, diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 375c476c0..fb81fcfca 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -119,7 +119,7 @@ namespace ntt { std::function&, std::size_t, - const range_t&)> = {}) -> bool; + const Domain&)> = {}) -> bool; #endif Metadomain(const Metadomain&) = delete; diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 272cf1134..cabb37093 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -14,6 +14,7 @@ #include "metrics/spherical.h" #include "framework/containers/particles.h" +#include "framework/domain/domain.h" #include "framework/domain/metadomain.h" #include "framework/parameters.h" @@ -168,7 +169,7 @@ namespace ntt { std::size_t step, long double time, std::function< - void(const std::string&, ndfield_t&, std::size_t, const range_t&)> + void(const std::string&, ndfield_t&, std::size_t, const Domain&)> CustomFieldOutput) -> bool { raise::ErrorIf( local_subdomain_indices().size() != 1, @@ -297,7 +298,7 @@ namespace ntt { CustomFieldOutput(fld.name().substr(1), local_domain->fields.bckp, addresses.back(), - local_domain->mesh.rangeActiveCells()); + *local_domain); } else { raise::Error("Custom output requested but no function provided", HERE); From 8e0e7c543dca3675f2f1615e281a9124d2601ba3 Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 28 Jul 2024 20:15:35 -0400 Subject: [PATCH 52/53] added report on arch & rocm+cuda versions --- cmake/report.cmake | 50 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/cmake/report.cmake b/cmake/report.cmake index e15074943..fe914baa8 100644 --- a/cmake/report.cmake +++ b/cmake/report.cmake @@ -150,6 +150,16 @@ PrintChoices("CUDA" 0 42 ) +PrintChoices("HIP" + "Kokkos_ENABLE_HIP" + "${ON_OFF_VALUES}" + ${Kokkos_ENABLE_HIP} + "OFF" + "${Green}" + HIP_REPORT + 0 + 42 +) PrintChoices("OpenMP" "Kokkos_ENABLE_OPENMP" "${ON_OFF_VALUES}" @@ -183,6 +193,28 @@ PrintChoices("C compiler" 42 ) +get_cmake_property(_variableNames VARIABLES) +foreach (_variableName ${_variableNames}) + string(REGEX MATCH "Kokkos_ARCH_*" _isMatched ${_variableName}) + if(_isMatched) + get_property(isSet CACHE ${_variableName} PROPERTY VALUE) + if(isSet STREQUAL "ON") + string(REGEX REPLACE "Kokkos_ARCH_" "" ARCH ${_variableName}) + break() + endif() + endif() +endforeach() +PrintChoices("Architecture" + "Kokkos_ARCH_*" + "${ARCH}" + "${ARCH}" + "N/A" + "${ColorReset}" + ARCH_REPORT + 0 + 42 +) + if(${Kokkos_ENABLE_CUDA}) if("${CMAKE_CUDA_COMPILER}" STREQUAL "") execute_process(COMMAND which nvcc OUTPUT_VARIABLE CUDACOMP) @@ -194,14 +226,13 @@ if(${Kokkos_ENABLE_CUDA}) message(STATUS "CUDA compiler: ${CUDACOMP}") execute_process(COMMAND bash -c "${CUDACOMP} --version | grep release | sed -e 's/.*release //' -e 's/,.*//'" - OUTPUT_VARIABLE CUDACOMP_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) PrintChoices("CUDA compiler" "CMAKE_CUDA_COMPILER" - "${CUDACOMP} v${CUDACOMP_VERSION}" - "${CUDACOMP} v${CUDACOMP_VERSION}" + "${CUDACOMP}" + "${CUDACOMP}" "N/A" "${ColorReset}" CUDA_COMPILER_REPORT @@ -210,6 +241,12 @@ if(${Kokkos_ENABLE_CUDA}) ) endif() +if (${Kokkos_ENABLE_HIP}) + execute_process(COMMAND bash -c "hipcc --version | grep HIP | cut -d ':' -f 2 | tr -d ' '" + OUTPUT_VARIABLE ROCM_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() + set(DOT_SYMBOL "${ColorReset}.") set(DOTTED_LINE_SYMBOL "${ColorReset}. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ") @@ -242,7 +279,9 @@ message(" ${OUTPUT_REPORT}") message("${DASHED_LINE_SYMBOL} Compile configurations") +message(" ${ARCH_REPORT}") message(" ${CUDA_REPORT}") +message(" ${HIP_REPORT}") message(" ${OPENMP_REPORT}") message(" ${C_COMPILER_REPORT}") @@ -259,6 +298,11 @@ message(" ${DEBUG_REPORT}") message("${DASHED_LINE_SYMBOL}\nDependencies") +if (NOT "${CUDACOMP_VERSION}" STREQUAL "") + message(" - CUDA:\tv${CUDACOMP_VERSION}") +elseif(NOT "${ROCM_VERSION}" STREQUAL "") + message(" - ROCm:\tv${ROCM_VERSION}") +endif() message(" - Kokkos:\tv${Kokkos_VERSION}") if(${output}) message(" - ADIOS2:\tv${adios2_VERSION}") From 52a4fd509def28c62f99955dd84429c9352eb996 Mon Sep 17 00:00:00 2001 From: jmahlmann Date: Sun, 28 Jul 2024 23:15:22 -0400 Subject: [PATCH 53/53] Update magnetar setup. --- setups/srpic/magnetar/magnetar.py | 8 +- setups/srpic/magnetar/magnetar.toml | 28 +-- setups/srpic/magnetar/pgen.hpp | 329 +++++++++++++++------------- 3 files changed, 190 insertions(+), 175 deletions(-) diff --git a/setups/srpic/magnetar/magnetar.py b/setups/srpic/magnetar/magnetar.py index b9b72d73d..0bbf790e5 100644 --- a/setups/srpic/magnetar/magnetar.py +++ b/setups/srpic/magnetar/magnetar.py @@ -1,9 +1,9 @@ import nt2.read as nt2r import matplotlib as mpl -data = nt2r.Data("magnetosphere.h5") +data = nt2r.Data("magnetar.h5") def plot (ti, data): - (data.N_2 - data.N_1).isel(t=ti).polar.pcolor( - norm=mpl.colors.SymLogNorm(vmin=-5, vmax=5, linthresh=1e-2, linscale=1), - cmap="RdBu_r") \ No newline at end of file + (data.Bph*(data.r*np.sin(data.th))).isel(t=ti).polar.pcolor( + norm=mpl.colors.Normalize(vmin=-0.075, vmax=0.075), + cmap="PuOr") \ No newline at end of file diff --git a/setups/srpic/magnetar/magnetar.toml b/setups/srpic/magnetar/magnetar.toml index 76bb3af8e..2a2260af5 100644 --- a/setups/srpic/magnetar/magnetar.toml +++ b/setups/srpic/magnetar/magnetar.toml @@ -1,10 +1,10 @@ [simulation] name = "magnetar" engine = "srpic" - runtime = 200.0 + runtime = 50.0 [grid] - resolution = [4096,2048] + resolution = [2048,1024] extent = [[1.0, 400.0]] [grid.metric] @@ -25,7 +25,7 @@ ds = 0.5 [scales] - larmor0 = 5e-6 + larmor0 = 1e-5 skindepth0 = 0.01 [algorithms] @@ -47,54 +47,56 @@ label = "e-" mass = 1.0 charge = -1.0 - maxnpart = 1e8 + maxnpart = 5e7 pusher = "Boris,GCA" [[particles.species]] label = "e+" mass = 1.0 charge = 1.0 - maxnpart = 1e8 + maxnpart = 5e7 pusher = "Boris,GCA" - [[particles.species]] + [[particles.species]] label = "e-" mass = 1.0 charge = -1.0 - maxnpart = 1e4 + maxnpart = 5e7 pusher = "Boris,GCA" [[particles.species]] label = "e+" mass = 1.0 charge = 1.0 - maxnpart = 1e4 + maxnpart = 5e7 pusher = "Boris,GCA" - [[particles.species]] + [[particles.species]] label = "e-" mass = 1.0 charge = -1.0 - maxnpart = 2e8 + maxnpart = 5e7 pusher = "Boris,GCA" [[particles.species]] label = "e+" mass = 1.0 charge = 1.0 - maxnpart = 2e8 + maxnpart = 5e7 pusher = "Boris,GCA" [setup] Bsurf = 1.0 omega = 0.0125 + pp_thres = 10.0 + gamma_pairs = 1.75 [output] format = "hdf5" [output.fields] interval_time = 0.5 - quantities = ["N_1", "N_2", "N_5", "N_6", "B", "E", "J"] + quantities = ["N_1", "N_2", "N_3", "N_4", "N_5", "N_6", "B", "E", "J"] [output.particles] enable = false @@ -103,4 +105,4 @@ enable = false [diagnostics] - interval = 100 + interval = 1 diff --git a/setups/srpic/magnetar/pgen.hpp b/setups/srpic/magnetar/pgen.hpp index 24dcb9d3a..cacbb7c9a 100644 --- a/setups/srpic/magnetar/pgen.hpp +++ b/setups/srpic/magnetar/pgen.hpp @@ -53,7 +53,7 @@ namespace user { } Inline auto ex2(const coord_t& x_Ph) const -> real_t { - auto sigma = (x_Ph[1] - 0.5 * constant::PI) / + auto sigma = (x_Ph[1] - HALF * constant::PI) / (static_cast(0.2) * constant::PI); return -Omega * bx1(x_Ph) * x_Ph[0] * math::sin(x_Ph[1]) * sigma * math::exp((ONE - SQR(SQR(sigma))) * INV_4); @@ -83,52 +83,22 @@ namespace user { const Metadomain& global_domain; - const real_t Bsurf, Rstar, Omega; + const real_t Bsurf, Rstar, Omega, gamma_pairs, pp_thres; InitFields init_flds; - + inline PGen(const SimulationParams& p, const Metadomain& m) : arch::ProblemGenerator(p) , global_domain { m } , Bsurf { p.template get("setup.Bsurf", ONE) } , Rstar { m.mesh().extent(in::x1).first } , Omega { p.template get("setup.omega") } - , init_flds { Bsurf, Rstar } {} + , pp_thres { p.template get("setup.pp_thres") } + , gamma_pairs { p.template get("setup.gamma_pairs") } + , init_flds { Bsurf, Rstar } { + } inline PGen() {} - // inline void InitPrtls(Domain& local_domain) { - // Kokkos::deep_copy(local_domain.fields.bckp, ZERO); - // // @HACK - // const auto x_surf = 1.1f; - // const auto ds = params.template get( - // "grid.boundaries.atmosphere.ds"); - // const auto temp = params.template get( - // "grid.boundaries.atmosphere.temperature"); - // const auto height = params.template get( - // "grid.boundaries.atmosphere.height"); - // const auto species = params.template get>( - // "grid.boundaries.atmosphere.species"); - // const auto nmax = params.template get( - // "grid.boundaries.atmosphere.density"); - // const auto atm_injector = - // arch::AtmosphereInjector { - // local_domain.mesh.metric, - // local_domain.fields.bckp, - // nmax, - // height, - // x_surf, - // ds, - // temp, - // local_domain.random_pool, - // species - // }; - // arch::InjectNonUniform(params, - // local_domain, - // atm_injector, - // nmax, - // true); - // } - auto FieldDriver(real_t time) const -> DriveFields { const real_t omega_t = Omega * @@ -139,127 +109,170 @@ namespace user { return DriveFields { time, Bsurf, Rstar, omega_t }; } - // void CustomPostStep(std::size_t time, long double, Domain& domain) { - - // const auto pp_thres = 1*10.0; - // const auto gamma_pairs = 1*0.5 * 3.5; - - // auto& species_e = domain.species[4]; - // auto& species_p = domain.species[5]; - // auto metric = domain.mesh.metric; - - // for (std::size_t s { 0 }; s < 6; ++s) { - // if ((s == 1) || (s == 2) || (s == 3)) { - // continue; - // } - - // array_t elec_ind("elec_ind"); - // array_t pos_ind("pos_ind"); - // auto offset_e = species_e.npart(); - // auto offset_p = species_p.npart(); - - // auto ux1_e = species_e.ux1; - // auto ux2_e = species_e.ux2; - // auto ux3_e = species_e.ux3; - // auto i1_e = species_e.i1; - // auto i2_e = species_e.i2; - // auto dx1_e = species_e.dx1; - // auto dx2_e = species_e.dx2; - // auto phi_e = species_e.phi; - // auto weight_e = species_e.weight; - // auto tag_e = species_e.tag; - - // auto ux1_p = species_p.ux1; - // auto ux2_p = species_p.ux2; - // auto ux3_p = species_p.ux3; - // auto i1_p = species_p.i1; - // auto i2_p = species_p.i2; - // auto dx1_p = species_p.dx1; - // auto dx2_p = species_p.dx2; - // auto phi_p = species_p.phi; - // auto weight_p = species_p.weight; - // auto tag_p = species_p.tag; - - // auto& species = domain.species[s]; - // auto ux1 = species.ux1; - // auto ux2 = species.ux2; - // auto ux3 = species.ux3; - // auto i1 = species.i1; - // auto i2 = species.i2; - // auto dx1 = species.dx1; - // auto dx2 = species.dx2; - // auto phi = species.phi; - // auto weight = species.weight; - // auto tag = species.tag; - - // Kokkos::parallel_for( - // "InjectPairs", species.rangeActiveParticles(), Lambda(index_t p) { - // if (tag(p) == ParticleTag::dead) { - // return; - // } - - // auto px = ux1(p); - // auto py = ux2(p); - // auto pz = ux3(p); - // auto gamma = math::sqrt(ONE + SQR(px) + SQR(py) + SQR(pz)); - - // const coord_t xCd{ - // static_cast(i1(p)) + dx1(p), - // static_cast(i2(p)) + dx2(p)}; - - // coord_t xPh { ZERO }; - // metric.template convert(xCd, xPh); - - // if ((gamma > pp_thres) && (math::sin(xPh[1]) > 0.1)) { - - // auto new_gamma = gamma - 2.0 * gamma_pairs; - // auto new_fac = math::sqrt(SQR(new_gamma) - 1.0) / math::sqrt(SQR(gamma) - 1.0); - // auto pair_fac = math::sqrt(SQR(gamma_pairs) - 1.0) / math::sqrt(SQR(gamma) - 1.0); - - // auto elec_p = Kokkos::atomic_fetch_add(&elec_ind(), 1); - // auto pos_p = Kokkos::atomic_fetch_add(&pos_ind(), 1); - - // i1_e(elec_p + offset_e) = i1(p); - // dx1_e(elec_p + offset_e) = dx1(p); - // i2_e(elec_p + offset_e) = i2(p); - // dx2_e(elec_p + offset_e) = dx2(p); - // phi_e(elec_p + offset_e) = phi(p); - // ux1_e(elec_p + offset_e) = px * pair_fac; - // ux2_e(elec_p + offset_e) = py * pair_fac; - // ux3_e(elec_p + offset_e) = pz * pair_fac; - // weight_e(elec_p + offset_e) = weight(p); - // tag_e(elec_p + offset_e) = ParticleTag::alive; - - // i1_p(pos_p + offset_p) = i1(p); - // dx1_p(pos_p + offset_p) = dx1(p); - // i2_p(pos_p + offset_p) = i2(p); - // dx2_p(pos_p + offset_p) = dx2(p); - // phi_p(pos_p + offset_p) = phi(p); - // ux1_p(pos_p + offset_p) = px * pair_fac; - // ux2_p(pos_p + offset_p) = py * pair_fac; - // ux3_p(pos_p + offset_p) = pz * pair_fac; - // weight_p(pos_p + offset_p) = weight(p); - // tag_p(pos_p + offset_p) = ParticleTag::alive; - - // ux1(p) *= new_fac; - // ux2(p) *= new_fac; - // ux3(p) *= new_fac; - - // } - - // }); - - // auto elec_ind_h = Kokkos::create_mirror(elec_ind); - // Kokkos::deep_copy(elec_ind_h, elec_ind); - // species_e.set_npart(offset_e + elec_ind_h()); - - // auto pos_ind_h = Kokkos::create_mirror(pos_ind); - // Kokkos::deep_copy(pos_ind_h, pos_ind); - // species_p.set_npart(offset_p + pos_ind_h()); - - // } - - // } + void CustomPostStep(std::size_t , long double, Domain& domain) { + + // Ad-hoc PP kernel + { + + auto& species2_e = domain.species[2]; + auto& species2_p = domain.species[3]; + auto& species3_e = domain.species[4]; + auto& species3_p = domain.species[5]; + auto metric = domain.mesh.metric; + auto pp_thres_ = this->pp_thres; + auto gamma_pairs_ = this->gamma_pairs; + + for (std::size_t s { 0 }; s < 6; ++s) { + if (s == 1) { + continue; + } + + array_t elec_ind("elec_ind"); + array_t pos_ind("pos_ind"); + + auto offset_e = species3_e.npart(); + auto offset_p = species3_p.npart(); + + auto ux1_e = species3_e.ux1; + auto ux2_e = species3_e.ux2; + auto ux3_e = species3_e.ux3; + auto i1_e = species3_e.i1; + auto i2_e = species3_e.i2; + auto dx1_e = species3_e.dx1; + auto dx2_e = species3_e.dx2; + auto phi_e = species3_e.phi; + auto weight_e = species3_e.weight; + auto tag_e = species3_e.tag; + + auto ux1_p = species3_p.ux1; + auto ux2_p = species3_p.ux2; + auto ux3_p = species3_p.ux3; + auto i1_p = species3_p.i1; + auto i2_p = species3_p.i2; + auto dx1_p = species3_p.dx1; + auto dx2_p = species3_p.dx2; + auto phi_p = species3_p.phi; + auto weight_p = species3_p.weight; + auto tag_p = species3_p.tag; + + if (s == 0) { + + offset_e = species2_e.npart(); + offset_p = species2_p.npart(); + + ux1_e = species2_e.ux1; + ux2_e = species2_e.ux2; + ux3_e = species2_e.ux3; + i1_e = species2_e.i1; + i2_e = species2_e.i2; + dx1_e = species2_e.dx1; + dx2_e = species2_e.dx2; + phi_e = species2_e.phi; + weight_e = species2_e.weight; + tag_e = species2_e.tag; + + ux1_p = species2_p.ux1; + ux2_p = species2_p.ux2; + ux3_p = species2_p.ux3; + i1_p = species2_p.i1; + i2_p = species2_p.i2; + dx1_p = species2_p.dx1; + dx2_p = species2_p.dx2; + phi_p = species2_p.phi; + weight_p = species2_p.weight; + tag_p = species2_p.tag; + + } + + auto& species = domain.species[s]; + auto ux1 = species.ux1; + auto ux2 = species.ux2; + auto ux3 = species.ux3; + auto i1 = species.i1; + auto i2 = species.i2; + auto dx1 = species.dx1; + auto dx2 = species.dx2; + auto phi = species.phi; + auto weight = species.weight; + auto tag = species.tag; + + Kokkos::parallel_for( + "InjectPairs", species.rangeActiveParticles(), Lambda(index_t p) { + if (tag(p) == ParticleTag::dead) { + return; + } + + auto px = ux1(p); + auto py = ux2(p); + auto pz = ux3(p); + auto gamma = math::sqrt(ONE + SQR(px) + SQR(py) + SQR(pz)); + + const coord_t xCd{ + static_cast(i1(p)) + dx1(p), + static_cast(i2(p)) + dx2(p)}; + + coord_t xPh { ZERO }; + metric.template convert(xCd, xPh); + + if ((gamma > pp_thres_) && (math::sin(xPh[1]) > 0.1)) { + + auto new_gamma = gamma - 2.0 * gamma_pairs_; + auto new_fac = math::sqrt(SQR(new_gamma) - 1.0) / math::sqrt(SQR(gamma) - 1.0); + auto pair_fac = math::sqrt(SQR(gamma_pairs_) - 1.0) / math::sqrt(SQR(gamma) - 1.0); + + auto elec_p = Kokkos::atomic_fetch_add(&elec_ind(), 1); + auto pos_p = Kokkos::atomic_fetch_add(&pos_ind(), 1); + + i1_e(elec_p + offset_e) = i1(p); + dx1_e(elec_p + offset_e) = dx1(p); + i2_e(elec_p + offset_e) = i2(p); + dx2_e(elec_p + offset_e) = dx2(p); + phi_e(elec_p + offset_e) = phi(p); + ux1_e(elec_p + offset_e) = px * pair_fac; + ux2_e(elec_p + offset_e) = py * pair_fac; + ux3_e(elec_p + offset_e) = pz * pair_fac; + weight_e(elec_p + offset_e) = weight(p); + tag_e(elec_p + offset_e) = ParticleTag::alive; + + i1_p(pos_p + offset_p) = i1(p); + dx1_p(pos_p + offset_p) = dx1(p); + i2_p(pos_p + offset_p) = i2(p); + dx2_p(pos_p + offset_p) = dx2(p); + phi_p(pos_p + offset_p) = phi(p); + ux1_p(pos_p + offset_p) = px * pair_fac; + ux2_p(pos_p + offset_p) = py * pair_fac; + ux3_p(pos_p + offset_p) = pz * pair_fac; + weight_p(pos_p + offset_p) = weight(p); + tag_p(pos_p + offset_p) = ParticleTag::alive; + + ux1(p) *= new_fac; + ux2(p) *= new_fac; + ux3(p) *= new_fac; + } + + }); + + auto elec_ind_h = Kokkos::create_mirror(elec_ind); + Kokkos::deep_copy(elec_ind_h, elec_ind); + if (s == 0) { + species2_e.set_npart(offset_e + elec_ind_h()); + } else { + species3_e.set_npart(offset_e + elec_ind_h()); + } + + auto pos_ind_h = Kokkos::create_mirror(pos_ind); + Kokkos::deep_copy(pos_ind_h, pos_ind); + if (s == 0) { + species2_p.set_npart(offset_p + pos_ind_h()); + } else { + species3_p.set_npart(offset_p + pos_ind_h()); + } + + + } + } // Ad-hoc PP kernel + } + }; } // namespace user