diff --git a/dev/nix/kokkos.nix b/dev/nix/kokkos.nix index e12b9d57..2f6ee6b9 100644 --- a/dev/nix/kokkos.nix +++ b/dev/nix/kokkos.nix @@ -10,6 +10,7 @@ let pversion = "4.6.01"; compilerPkgs = { "HIP" = with pkgs.rocmPackages; [ + llvm.rocm-merged-llvm rocm-core clr rocthrust @@ -18,11 +19,13 @@ let rocm-smi ]; "CUDA" = with pkgs.cudaPackages; [ + llvmPackages_18.clang-tools cudatoolkit cuda_cudart pkgs.gcc13 ]; "NONE" = [ + pkgs.llvmPackages_18.clang-tools pkgs.gcc13 ]; }; @@ -36,9 +39,7 @@ let "HIP" = [ "-D Kokkos_ENABLE_HIP=ON" "-D Kokkos_ARCH_${getArch { }}=ON" - # remove leading AMD_ "-D AMDGPU_TARGETS=${builtins.replaceStrings [ "amd_" ] [ "" ] (pkgs.lib.toLower (getArch { }))}" - "-D CMAKE_C_COMPILER=hipcc" "-D CMAKE_CXX_COMPILER=hipcc" ]; "CUDA" = [ diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix index ae5451ec..33ae5709 100644 --- a/dev/nix/shell.nix +++ b/dev/nix/shell.nix @@ -42,9 +42,6 @@ pkgs.mkShell { zlib cmake - llvmPackages_18.clang-tools - libgcc - adios2Pkg kokkosPkg diff --git a/pgens/wald/pgen.hpp b/pgens/wald/pgen.hpp index b2c2aeb0..71ee905e 100644 --- a/pgens/wald/pgen.hpp +++ b/pgens/wald/pgen.hpp @@ -7,6 +7,7 @@ #include "arch/kokkos_aliases.h" #include "arch/traits.h" #include "utils/comparators.h" +#include "utils/error.h" #include "utils/formatting.h" #include "utils/log.h" #include "utils/numeric.h" @@ -17,14 +18,31 @@ #include "framework/domain/domain.h" #include "framework/domain/metadomain.h" +#include #include +enum InitFieldGeometry { + Wald, + Vertical, +}; + namespace user { using namespace ntt; template struct InitFields { - InitFields(M metric_) : metric { metric_ } {} + InitFields(M metric_, const std::string& init_field_geometry) + : metric { metric_ } { + if (init_field_geometry == "wald") { + field_geometry = InitFieldGeometry::Wald; + } else if (init_field_geometry == "vertical") { + field_geometry = InitFieldGeometry::Vertical; + } else { + raise::Error(fmt::format("Unrecognized field geometry: %s", + init_field_geometry.c_str()), + HERE); + } + } Inline auto A_3(const coord_t& x_Cd) const -> real_t { return HALF * (metric.template h_<3, 3>(x_Cd) + @@ -83,102 +101,132 @@ namespace user { Inline auto bx3( const coord_t& x_Ph) const -> real_t { // at ( i + HALF , j + HALF ) - coord_t xi { ZERO }, x0m { ZERO }, x0p { ZERO }; - metric.template convert(x_Ph, xi); - - x0m[0] = xi[0]; - x0m[1] = xi[1] - HALF; - x0p[0] = xi[0]; - x0p[1] = xi[1] + HALF; - - real_t inv_sqrt_detH_iPjP { ONE / metric.sqrt_det_h({ xi[0], xi[1] }) }; - return -(A_1(x0p) - A_1(x0m)) * inv_sqrt_detH_iPjP; + if (field_geometry == InitFieldGeometry::Wald) { + coord_t xi { ZERO }, x0m { ZERO }, x0p { ZERO }; + metric.template convert(x_Ph, xi); + + x0m[0] = xi[0]; + x0m[1] = xi[1] - HALF; + x0p[0] = xi[0]; + x0p[1] = xi[1] + HALF; + + real_t inv_sqrt_detH_iPjP { ONE / metric.sqrt_det_h({ xi[0], xi[1] }) }; + return -(A_1(x0p) - A_1(x0m)) * inv_sqrt_detH_iPjP; + } else if (field_geometry == InitFieldGeometry::Vertical) { + return ZERO; + } else { + raise::KernelError(HERE, "Unrecognized field geometry"); + return ZERO; + } } Inline auto dx1(const coord_t& x_Ph) const -> real_t { // at ( i + HALF , j ) - coord_t xi { ZERO }, x0m { ZERO }, x0p { ZERO }; - metric.template convert(x_Ph, xi); - - real_t alpha_iPj { metric.alpha({ xi[0], xi[1] }) }; - real_t inv_sqrt_detH_ij { ONE / metric.sqrt_det_h({ xi[0] - HALF, xi[1] }) }; - real_t sqrt_detH_ij { metric.sqrt_det_h({ xi[0] - HALF, xi[1] }) }; - real_t beta_ij { metric.beta1({ xi[0] - HALF, xi[1] }) }; - real_t alpha_ij { metric.alpha({ xi[0] - HALF, xi[1] }) }; - - // D1 at ( i + HALF , j ) - x0m[0] = xi[0] - HALF; - x0m[1] = xi[1]; - x0p[0] = xi[0] + HALF; - x0p[1] = xi[1]; - real_t E1d { (A_0(x0p) - A_0(x0m)) }; - real_t D1d { E1d / alpha_iPj }; - - // D3 at ( i , j ) - x0m[0] = xi[0] - HALF - HALF; - x0m[1] = xi[1]; - x0p[0] = xi[0] - HALF + HALF; - x0p[1] = xi[1]; - real_t D3d { (A_3(x0p) - A_3(x0m)) * beta_ij / alpha_ij }; - - real_t D1u { metric.template h<1, 1>({ xi[0], xi[1] }) * D1d + - metric.template h<1, 3>({ xi[0], xi[1] }) * D3d }; - - return D1u; + if (field_geometry == InitFieldGeometry::Wald) { + coord_t xi { ZERO }, x0m { ZERO }, x0p { ZERO }; + metric.template convert(x_Ph, xi); + + real_t alpha_iPj { metric.alpha({ xi[0], xi[1] }) }; + real_t inv_sqrt_detH_ij { ONE / metric.sqrt_det_h({ xi[0] - HALF, xi[1] }) }; + real_t sqrt_detH_ij { metric.sqrt_det_h({ xi[0] - HALF, xi[1] }) }; + real_t beta_ij { metric.beta1({ xi[0] - HALF, xi[1] }) }; + real_t alpha_ij { metric.alpha({ xi[0] - HALF, xi[1] }) }; + + // D1 at ( i + HALF , j ) + x0m[0] = xi[0] - HALF; + x0m[1] = xi[1]; + x0p[0] = xi[0] + HALF; + x0p[1] = xi[1]; + real_t E1d { (A_0(x0p) - A_0(x0m)) }; + real_t D1d { E1d / alpha_iPj }; + + // D3 at ( i , j ) + x0m[0] = xi[0] - HALF - HALF; + x0m[1] = xi[1]; + x0p[0] = xi[0] - HALF + HALF; + x0p[1] = xi[1]; + real_t D3d { (A_3(x0p) - A_3(x0m)) * beta_ij / alpha_ij }; + + real_t D1u { metric.template h<1, 1>({ xi[0], xi[1] }) * D1d + + metric.template h<1, 3>({ xi[0], xi[1] }) * D3d }; + + return D1u; + } else if (field_geometry == InitFieldGeometry::Vertical) { + return ZERO; + } else { + raise::KernelError(HERE, "Unrecognized field geometry"); + return ZERO; + } } Inline auto dx2(const coord_t& x_Ph) const -> real_t { // at ( i , j + HALF ) - coord_t xi { ZERO }, x0m { ZERO }, x0p { ZERO }; - metric.template convert(x_Ph, xi); - x0m[0] = xi[0]; - x0m[1] = xi[1] - HALF; - x0p[0] = xi[0]; - x0p[1] = xi[1] + HALF; - real_t inv_sqrt_detH_ijP { ONE / metric.sqrt_det_h({ xi[0], xi[1] }) }; - real_t sqrt_detH_ijP { metric.sqrt_det_h({ xi[0], xi[1] }) }; - real_t alpha_ijP { metric.alpha({ xi[0], xi[1] }) }; - real_t beta_ijP { metric.beta1({ xi[0], xi[1] }) }; - - real_t E2d { (A_0(x0p) - A_0(x0m)) }; - real_t D2d { E2d / alpha_ijP - (A_1(x0p) - A_1(x0m)) * beta_ijP / alpha_ijP }; - real_t D2u { metric.template h<2, 2>({ xi[0], xi[1] }) * D2d }; - - return D2u; + if (field_geometry == InitFieldGeometry::Wald) { + coord_t xi { ZERO }, x0m { ZERO }, x0p { ZERO }; + metric.template convert(x_Ph, xi); + x0m[0] = xi[0]; + x0m[1] = xi[1] - HALF; + x0p[0] = xi[0]; + x0p[1] = xi[1] + HALF; + real_t inv_sqrt_detH_ijP { ONE / metric.sqrt_det_h({ xi[0], xi[1] }) }; + real_t sqrt_detH_ijP { metric.sqrt_det_h({ xi[0], xi[1] }) }; + real_t alpha_ijP { metric.alpha({ xi[0], xi[1] }) }; + real_t beta_ijP { metric.beta1({ xi[0], xi[1] }) }; + + real_t E2d { (A_0(x0p) - A_0(x0m)) }; + real_t D2d { E2d / alpha_ijP - + (A_1(x0p) - A_1(x0m)) * beta_ijP / alpha_ijP }; + real_t D2u { metric.template h<2, 2>({ xi[0], xi[1] }) * D2d }; + + return D2u; + } else if (field_geometry == InitFieldGeometry::Vertical) { + return ZERO; + } else { + raise::KernelError(HERE, "Unrecognized field geometry"); + return ZERO; + } } Inline auto dx3(const coord_t& x_Ph) const -> real_t { // at ( i , j ) - coord_t xi { ZERO }, x0m { ZERO }, x0p { ZERO }; - metric.template convert(x_Ph, xi); - real_t inv_sqrt_detH_ij { ONE / metric.sqrt_det_h({ xi[0], xi[1] }) }; - real_t sqrt_detH_ij { metric.sqrt_det_h({ xi[0], xi[1] }) }; - real_t beta_ij { metric.beta1({ xi[0], xi[1] }) }; - real_t alpha_ij { metric.alpha({ xi[0], xi[1] }) }; - real_t alpha_iPj { metric.alpha({ xi[0] + HALF, xi[1] }) }; - - // D3 at ( i , j ) - x0m[0] = xi[0] - HALF; - x0m[1] = xi[1]; - x0p[0] = xi[0] + HALF; - x0p[1] = xi[1]; - real_t D3d { (A_3(x0p) - A_3(x0m)) * beta_ij / alpha_ij }; - - // D1 at ( i + HALF , j ) - x0m[0] = xi[0] + HALF - HALF; - x0m[1] = xi[1]; - x0p[0] = xi[0] + HALF + HALF; - x0p[1] = xi[1]; - real_t E1d { (A_0(x0p) - A_0(x0m)) }; - real_t D1d { E1d / alpha_iPj }; - - if (cmp::AlmostZero(x_Ph[1])) { - return metric.template h<1, 3>({ xi[0], xi[1] }) * D1d; + if (field_geometry == InitFieldGeometry::Wald) { + coord_t xi { ZERO }, x0m { ZERO }, x0p { ZERO }; + metric.template convert(x_Ph, xi); + real_t inv_sqrt_detH_ij { ONE / metric.sqrt_det_h({ xi[0], xi[1] }) }; + real_t sqrt_detH_ij { metric.sqrt_det_h({ xi[0], xi[1] }) }; + real_t beta_ij { metric.beta1({ xi[0], xi[1] }) }; + real_t alpha_ij { metric.alpha({ xi[0], xi[1] }) }; + real_t alpha_iPj { metric.alpha({ xi[0] + HALF, xi[1] }) }; + + // D3 at ( i , j ) + x0m[0] = xi[0] - HALF; + x0m[1] = xi[1]; + x0p[0] = xi[0] + HALF; + x0p[1] = xi[1]; + real_t D3d { (A_3(x0p) - A_3(x0m)) * beta_ij / alpha_ij }; + + // D1 at ( i + HALF , j ) + x0m[0] = xi[0] + HALF - HALF; + x0m[1] = xi[1]; + x0p[0] = xi[0] + HALF + HALF; + x0p[1] = xi[1]; + real_t E1d { (A_0(x0p) - A_0(x0m)) }; + real_t D1d { E1d / alpha_iPj }; + + if (cmp::AlmostZero(x_Ph[1])) { + return metric.template h<1, 3>({ xi[0], xi[1] }) * D1d; + } else { + return metric.template h<3, 3>({ xi[0], xi[1] }) * D3d + + metric.template h<1, 3>({ xi[0], xi[1] }) * D1d; + } + } else if (field_geometry == InitFieldGeometry::Vertical) { + return ZERO; } else { - return metric.template h<3, 3>({ xi[0], xi[1] }) * D3d + - metric.template h<1, 3>({ xi[0], xi[1] }) * D1d; + raise::KernelError(HERE, "Unrecognized field geometry"); + return ZERO; } } private: - const M metric; + const M metric; + InitFieldGeometry field_geometry; }; template @@ -201,7 +249,8 @@ namespace user { inline PGen(const SimulationParams& p, const Metadomain& m) : arch::ProblemGenerator { p } , global_domain { m } - , init_flds { m.mesh().metric } {} + , init_flds { m.mesh().metric, + p.template get("setup.init_field", "wald") } {} }; } // namespace user diff --git a/pgens/wald/wald.toml b/pgens/wald/wald.toml index 1104f7a0..2b05fbac 100644 --- a/pgens/wald/wald.toml +++ b/pgens/wald/wald.toml @@ -4,11 +4,11 @@ runtime = 100.0 [grid] - resolution = [128, 128] + resolution = [512, 512] extent = [[1.0, 10.0]] [grid.metric] - metric = "kerr_schild" + metric = "qkerr_schild" qsph_r0 = 0.0 qsph_h = 0.0 ks_a = 0.95 @@ -38,10 +38,10 @@ ppc0 = 2.0 [setup] + init_field = "wald" # or "vertical" [output] - format = "hdf5" - separate_files = false + format = "hdf5" [output.fields] interval_time = 1.0 diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index efc639a8..7ddacffb 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -261,6 +261,9 @@ namespace ntt { #if defined(OUTPUT_ENABLED) out::Writer g_writer; checkpoint::Writer g_checkpoint_writer; + #if defined(MPI_ENABLED) + void CommunicateVectorPotential(unsigned short); + #endif #endif #if defined(MPI_ENABLED) diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index ea04b10a..960b2c71 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -192,26 +192,56 @@ namespace ntt { unsigned short buff_idx, const Mesh mesh) { if constexpr (M::Dim == Dim::_2D) { - const auto i2_min = mesh.i_min(in::x2); - // !TODO: this is quite slow + const auto metric = mesh.metric; Kokkos::parallel_for( "ComputeVectorPotential", mesh.rangeActiveCells(), - Lambda(index_t i, index_t j) { - const real_t i_ { static_cast(static_cast(i) - (N_GHOSTS)) }; - const auto k_min = (i2_min - (N_GHOSTS)) + 1; - const auto k_max = (j - (N_GHOSTS)); - real_t A3 = ZERO; + Lambda(index_t i1, index_t i2) { + const real_t i1_ { COORD(i1) }; + const ncells_t k_min = 0; + const ncells_t k_max = (i2 - (N_GHOSTS)); + real_t A3 = ZERO; for (auto k { k_min }; k <= k_max; ++k) { real_t k_ = static_cast(k); - real_t sqrt_detH_ij1 { mesh.metric.sqrt_det_h({ i_, k_ - HALF }) }; - real_t sqrt_detH_ij2 { mesh.metric.sqrt_det_h({ i_, k_ + HALF }) }; + real_t sqrt_detH_ij1 { metric.sqrt_det_h({ i1_, k_ - HALF }) }; + real_t sqrt_detH_ij2 { metric.sqrt_det_h({ i1_, k_ + HALF }) }; auto k1 { k + N_GHOSTS }; - A3 += HALF * (sqrt_detH_ij1 * EM(i, k1 - 1, em::bx1) + - sqrt_detH_ij2 * EM(i, k1, em::bx1)); + A3 += HALF * (sqrt_detH_ij1 * EM(i1, k + N_GHOSTS - 1, em::bx1) + + sqrt_detH_ij2 * EM(i1, k + N_GHOSTS, em::bx1)); } - buffer(i, j, buff_idx) = A3; + buffer(i1, i2, buff_idx) = A3; }); + + // @TODO: Implementation with team policies works on AMD, but not on NVIDIA GPUs + // + // using TeamPolicy = Kokkos::TeamPolicy; + // const auto nx1 = mesh.n_active(in::x1); + // const auto nx2 = mesh.n_active(in::x2); + // + // TeamPolicy policy(nx1, Kokkos::AUTO); + // + // Kokkos::parallel_for( + // "ComputeVectorPotential", + // policy, + // Lambda(const TeamPolicy::member_type& team_member) { + // index_t i1 = team_member.league_rank(); + // Kokkos::parallel_scan( + // Kokkos::TeamThreadRange(team_member, nx2), + // [=](index_t i2, real_t& update, const bool final_pass) { + // const auto i1_ { static_cast(i1) }; + // const auto i2_ { static_cast(i2) }; + // const real_t sqrt_detH_ijM { metric.sqrt_det_h({ i1_, i2_ - HALF }) }; + // const real_t sqrt_detH_ijP { metric.sqrt_det_h({ i1_, i2_ + HALF }) }; + // const auto input_val = + // HALF * + // (sqrt_detH_ijM * EM(i1 + N_GHOSTS, i2 + N_GHOSTS - 1, em::bx1) + + // sqrt_detH_ijP * EM(i1 + N_GHOSTS, i2 + N_GHOSTS, em::bx1)); + // if (final_pass) { + // buffer(i1 + N_GHOSTS, i2 + N_GHOSTS, buff_idx) = update; + // } + // update += input_val; + // }); + // }); } else { raise::KernelError( HERE, @@ -219,6 +249,73 @@ namespace ntt { } } +#if defined(MPI_ENABLED) && defined(OUTPUT_ENABLED) + template + void ExtractVectorPotential(ndfield_t& buffer, + array_t& aphi_r, + unsigned short buff_idx, + const Mesh mesh) { + Kokkos::parallel_for( + "AddVectorPotential", + mesh.rangeActiveCells(), + Lambda(index_t i1, index_t i2) { + buffer(i1, i2, buff_idx) += aphi_r(i1 - N_GHOSTS); + }); + } + + template + void Metadomain::CommunicateVectorPotential(unsigned short buff_idx) { + if constexpr (M::Dim == Dim::_2D) { + auto local_domain = subdomain_ptr(l_subdomain_indices()[0]); + const auto nx1 = local_domain->mesh.n_active(in::x1); + const auto nx2 = local_domain->mesh.n_active(in::x2); + + auto& buffer = local_domain->fields.bckp; + + const auto nranks_x1 = ndomains_per_dim()[0]; + const auto nranks_x2 = ndomains_per_dim()[1]; + + for (auto nr2 { 1u }; nr2 < nranks_x2; ++nr2) { + const auto rank_send_pre = (nr2 - 1u) * nranks_x1; + const auto rank_recv_pre = nr2 * nranks_x1; + for (auto nr1 { 0u }; nr1 < nranks_x1; ++nr1) { + const auto rank_send = rank_send_pre + nr1; + const auto rank_recv = rank_recv_pre + nr1; + if (local_domain->mpi_rank() == rank_send) { + array_t aphi_r { "Aphi_r", nx1 }; + Kokkos::deep_copy( + aphi_r, + Kokkos::subview(buffer, + std::make_pair(N_GHOSTS, N_GHOSTS + nx1), + N_GHOSTS + nx2 - 1, + buff_idx)); + MPI_Send(aphi_r.data(), + nx1, + mpi::get_type(), + rank_recv, + 0, + MPI_COMM_WORLD); + } else if (local_domain->mpi_rank() == rank_recv) { + array_t aphi_r { "Aphi_r", nx1 }; + MPI_Recv(aphi_r.data(), + nx1, + mpi::get_type(), + rank_send, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + ExtractVectorPotential(buffer, aphi_r, buff_idx, local_domain->mesh); + } + } + } + } else { + raise::Error("CommunicateVectorPotential: comm vector potential only " + "possible for 2D", + HERE); + } + } +#endif + template auto Metadomain::Write( const SimulationParams& params, @@ -426,12 +523,15 @@ namespace ntt { HERE); } } else if (fld.is_vpotential()) { - if (S == SimEngine::GRPIC && M::Dim == Dim::_2D) { + if constexpr (S == SimEngine::GRPIC && M::Dim == Dim::_2D) { const auto c = static_cast(addresses.back()); ComputeVectorPotential(local_domain->fields.bckp, local_domain->fields.em, c, local_domain->mesh); +#if defined(MPI_ENABLED) + CommunicateVectorPotential(c); +#endif } else { raise::Error( "Vector potential can only be computed for GRPIC in 2D", @@ -785,4 +885,15 @@ namespace ntt { #undef METADOMAIN_OUTPUT +#if defined(MPI_ENABLED) + #define COMMVECTORPOTENTIAL(S, M) \ + template void Metadomain::CommunicateVectorPotential(unsigned short); + + COMMVECTORPOTENTIAL(SimEngine::GRPIC, metric::KerrSchild) + COMMVECTORPOTENTIAL(SimEngine::GRPIC, metric::QKerrSchild) + COMMVECTORPOTENTIAL(SimEngine::GRPIC, metric::KerrSchild0) + + #undef COMMVECTORPOTENTIAL +#endif + } // namespace ntt