diff --git a/docs/source/backends/hdf5.rst b/docs/source/backends/hdf5.rst index 0f839648c8..1d1866d874 100644 --- a/docs/source/backends/hdf5.rst +++ b/docs/source/backends/hdf5.rst @@ -22,7 +22,7 @@ Rudimentary support for HDF5 VFDs (`virtual file driver `_ for further details. +Refer to the page on :ref:`JSON/TOML configuration ` for further details. Backend-Specific Controls @@ -54,6 +54,10 @@ Although we choose the default to be non-collective (independent) for ease of us For independent parallel I/O, potentially prefer using a modern version of the MPICH implementation (especially, use ROMIO instead of OpenMPI's ompio implementation). Please refer to the `HDF5 manual, function H5Pset_dxpl_mpio `_ for more details. +.. tip:: + + Instead of using an environment variable, independent/collective data transfer can also be configured at the API level via :ref:`JSON/TOML `. + ``OPENPMD_HDF5_ALIGNMENT``: this sets the alignment in Bytes for writes via the ``H5Pset_alignment`` function. According to the `HDF5 documentation `_: *For MPI IO and other parallel systems, choose an alignment which is a multiple of the disk block size.* diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index fae114a01c..dca9b046e3 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -211,6 +211,13 @@ Explanation of the single keys: * ``hdf5.vfd.stripe_size``: Must be an integer * ``hdf5.vfd.stripe_count``: Must be an integer +Flush calls, e.g. ``Series::flush()`` can be configured via JSON/TOML as well. +The parameters eligible for being passed to flush calls may be configured globally as well, i.e. in the constructor of ``Series``, to provide default settings used for the entire Series. + +* ``hdf5.independent_stores``: A boolean that sets the ``H5FD_MPIO_INDEPENDENT`` dataset transfer property if true, otherwise ``H5FD_MPIO_COLLECTIVE``. + Only available when using HDF5 in combination with MPI. + See the `HDF5 subpage `_ for further information on independent vs. collective flushing. + .. _backendconfig-other: Other backends diff --git a/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp b/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp index 20309e79fc..e4efc06ea6 100644 --- a/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp +++ b/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp @@ -20,6 +20,7 @@ */ #pragma once +#include "openPMD/IO/AbstractIOHandler.hpp" #include "openPMD/config.hpp" #if openPMD_HAVE_HDF5 #include "openPMD/IO/AbstractIOHandlerImpl.hpp" @@ -37,6 +38,9 @@ namespace openPMD #if openPMD_HAVE_HDF5 class HDF5IOHandlerImpl : public AbstractIOHandlerImpl { + friend class HDF5IOHandler; + friend class ParallelHDF5IOHandler; + public: HDF5IOHandlerImpl( AbstractIOHandler *, @@ -109,6 +113,8 @@ class HDF5IOHandlerImpl : public AbstractIOHandlerImpl hid_t m_H5T_LONG_DOUBLE_80_LE; hid_t m_H5T_CLONG_DOUBLE_80_LE; + std::future flush(internal::ParsedFlushParams &); + protected: #if openPMD_HAVE_MPI /* @@ -119,7 +125,8 @@ class HDF5IOHandlerImpl : public AbstractIOHandlerImpl #endif json::TracingJSON m_config; - std::optional m_buffered_dataset_config; + nlohmann::json m_global_dataset_config; + nlohmann::json m_global_flush_config; private: struct File diff --git a/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp b/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp index e1190b3d71..3b214b64cb 100644 --- a/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp +++ b/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp @@ -43,6 +43,8 @@ class ParallelHDF5IOHandlerImpl : public HDF5IOHandlerImpl MPI_Comm m_mpiComm; MPI_Info m_mpiInfo; + + std::future flush(internal::ParsedFlushParams &); }; // ParallelHDF5IOHandlerImpl #else class ParallelHDF5IOHandlerImpl diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 75166f91a3..32a9e80d74 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -19,6 +19,9 @@ * If not, see . */ #include "openPMD/IO/HDF5/HDF5IOHandler.hpp" +#include "openPMD/IO/AbstractIOHandler.hpp" +#include "openPMD/IO/AbstractIOHandlerImpl.hpp" +#include "openPMD/IO/FlushParametersInternal.hpp" #include "openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp" #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" @@ -39,6 +42,7 @@ #include "openPMD/auxiliary/TypeTraits.hpp" #include "openPMD/backend/Attribute.hpp" +#include #include #endif @@ -146,11 +150,29 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl( { constexpr char const *const init_json_shadow_str = R"( + { + "dataset": { + "chunks": null + }, + "independent_stores": null + })"; + constexpr char const *const dataset_cfg_mask = R"( { "dataset": { "chunks": null } })"; + constexpr char const *const flush_cfg_mask = R"( + { + "independent_stores": null + })"; + m_global_dataset_config = m_config.json(); + json::filterByTemplate( + m_global_dataset_config, + nlohmann::json::parse(dataset_cfg_mask)); + m_global_flush_config = m_config.json(); + json::filterByTemplate( + m_global_flush_config, nlohmann::json::parse(flush_cfg_mask)); auto init_json_shadow = nlohmann::json::parse(init_json_shadow_str); json::merge(m_config.getShadow(), init_json_shadow); } @@ -480,34 +502,18 @@ void HDF5IOHandlerImpl::createDataset( } json::TracingJSON config = [&]() { - if (!m_buffered_dataset_config.has_value()) - { - // we are only interested in these values from the global config - constexpr char const *const mask_for_global_conf = R"( - { - "dataset": { - "chunks": null - } - })"; - m_buffered_dataset_config = m_config.json(); - json::filterByTemplate( - *m_buffered_dataset_config, - nlohmann::json::parse(mask_for_global_conf)); - } - auto const &buffered_config = *m_buffered_dataset_config; auto parsed_config = json::parseOptions( parameters.options, /* considerFiles = */ false); if (auto hdf5_config_it = parsed_config.config.find("hdf5"); hdf5_config_it != parsed_config.config.end()) { - auto copy = buffered_config; + auto copy = m_global_dataset_config; json::merge(copy, hdf5_config_it.value()); - copy = nlohmann::json{{"hdf5", std::move(copy)}}; - parsed_config.config = std::move(copy); + hdf5_config_it.value() = std::move(copy); } else { - parsed_config.config["hdf5"] = buffered_config; + parsed_config.config["hdf5"] = m_global_dataset_config; } return parsed_config; }(); @@ -2934,6 +2940,37 @@ HDF5IOHandlerImpl::getFile(Writable *writable) res.id = it2->second; return std::make_optional(std::move(res)); } + +std::future HDF5IOHandlerImpl::flush(internal::ParsedFlushParams ¶ms) +{ + auto res = AbstractIOHandlerImpl::flush(); + + if (params.backendConfig.json().contains("hdf5")) + { + auto hdf5_config = params.backendConfig["hdf5"]; + + if (auto shadow = hdf5_config.invertShadow(); shadow.size() > 0) + { + switch (hdf5_config.originallySpecifiedAs) + { + case json::SupportedLanguages::JSON: + std::cerr << "Warning: parts of the backend configuration for " + "HDF5 remain unused:\n" + << shadow << std::endl; + break; + case json::SupportedLanguages::TOML: { + auto asToml = json::jsonToToml(shadow); + std::cerr << "Warning: parts of the backend configuration for " + "HDF5 remain unused:\n" + << json::format_toml(asToml) << std::endl; + break; + } + } + } + } + + return res; +} #endif #if openPMD_HAVE_HDF5 @@ -2945,9 +2982,9 @@ HDF5IOHandler::HDF5IOHandler( HDF5IOHandler::~HDF5IOHandler() = default; -std::future HDF5IOHandler::flush(internal::ParsedFlushParams &) +std::future HDF5IOHandler::flush(internal::ParsedFlushParams ¶ms) { - return m_impl->flush(); + return m_impl->flush(params); } #else diff --git a/src/IO/HDF5/ParallelHDF5IOHandler.cpp b/src/IO/HDF5/ParallelHDF5IOHandler.cpp index 5d1968193a..00d5741457 100644 --- a/src/IO/HDF5/ParallelHDF5IOHandler.cpp +++ b/src/IO/HDF5/ParallelHDF5IOHandler.cpp @@ -20,6 +20,8 @@ */ #include "openPMD/IO/HDF5/ParallelHDF5IOHandler.hpp" #include "openPMD/Error.hpp" +#include "openPMD/IO/FlushParametersInternal.hpp" +#include "openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp" #include "openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp" #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" @@ -63,9 +65,21 @@ ParallelHDF5IOHandler::ParallelHDF5IOHandler( ParallelHDF5IOHandler::~ParallelHDF5IOHandler() = default; -std::future ParallelHDF5IOHandler::flush(internal::ParsedFlushParams &) +std::future +ParallelHDF5IOHandler::flush(internal::ParsedFlushParams ¶ms) { - return m_impl->flush(); + if (auto hdf5_config_it = params.backendConfig.json().find("hdf5"); + hdf5_config_it != params.backendConfig.json().end()) + { + auto copied_global_cfg = m_impl->m_global_flush_config; + json::merge(copied_global_cfg, hdf5_config_it.value()); + hdf5_config_it.value() = std::move(copied_global_cfg); + } + else + { + params.backendConfig["hdf5"].json() = m_impl->m_global_flush_config; + } + return m_impl->flush(params); } ParallelHDF5IOHandlerImpl::ParallelHDF5IOHandlerImpl( @@ -121,14 +135,14 @@ ParallelHDF5IOHandlerImpl::ParallelHDF5IOHandlerImpl( } H5FD_mpio_xfer_t xfer_mode = H5FD_MPIO_COLLECTIVE; - auto const hdf5_collective = + auto const hdf5_independent = auxiliary::getEnvString("OPENPMD_HDF5_INDEPENDENT", "ON"); - if (hdf5_collective == "ON") + if (hdf5_independent == "ON") xfer_mode = H5FD_MPIO_INDEPENDENT; else { VERIFY( - hdf5_collective == "OFF", + hdf5_independent == "OFF", "[HDF5] Internal error: OPENPMD_HDF5_INDEPENDENT property must be " "either ON or OFF"); } @@ -318,26 +332,26 @@ ParallelHDF5IOHandlerImpl::ParallelHDF5IOHandlerImpl( {"hdf5", "vfd", "type"}, "Unknown value: '" + user_specified_type + "'."); } + } - // unused params - auto shadow = m_config.invertShadow(); - if (shadow.size() > 0) + // unused params + auto shadow = m_config.invertShadow(); + if (shadow.size() > 0) + { + switch (m_config.originallySpecifiedAs) { - switch (m_config.originallySpecifiedAs) - { - case json::SupportedLanguages::JSON: - std::cerr << "Warning: parts of the backend configuration for " - "HDF5 remain unused:\n" - << shadow << std::endl; - break; - case json::SupportedLanguages::TOML: { - auto asToml = json::jsonToToml(shadow); - std::cerr << "Warning: parts of the backend configuration for " - "HDF5 remain unused:\n" - << json::format_toml(asToml) << std::endl; - break; - } - } + case json::SupportedLanguages::JSON: + std::cerr << "Warning: parts of the backend configuration for " + "HDF5 remain unused:\n" + << shadow << std::endl; + break; + case json::SupportedLanguages::TOML: { + auto asToml = json::jsonToToml(shadow); + std::cerr << "Warning: parts of the backend configuration for " + "HDF5 remain unused:\n" + << json::format_toml(asToml) << std::endl; + break; + } } } } @@ -355,6 +369,55 @@ ParallelHDF5IOHandlerImpl::~ParallelHDF5IOHandlerImpl() m_openFileIDs.erase(file); } } + +std::future +ParallelHDF5IOHandlerImpl::flush(internal::ParsedFlushParams ¶ms) +{ + std::optional old_value; + if (params.backendConfig.json().contains("hdf5")) + { + auto hdf5_config = params.backendConfig["hdf5"]; + + if (hdf5_config.json().contains("independent_stores")) + { + auto independent_stores_json = hdf5_config["independent_stores"]; + if (!independent_stores_json.json().is_boolean()) + { + throw error::BackendConfigSchema( + {"hdf5", "independent_stores"}, "Requires boolean value."); + } + bool independent_stores = + independent_stores_json.json().get(); + old_value = std::make_optional(); + herr_t status = + H5Pget_dxpl_mpio(m_datasetTransferProperty, &*old_value); + VERIFY( + status >= 0, + "[HDF5] Internal error: Failed to query the global data " + "transfer mode before flushing."); + H5FD_mpio_xfer_t new_value = independent_stores + ? H5FD_MPIO_INDEPENDENT + : H5FD_MPIO_COLLECTIVE; + status = H5Pset_dxpl_mpio(m_datasetTransferProperty, new_value); + VERIFY( + status >= 0, + "[HDF5] Internal error: Failed to set the local data " + "transfer mode before flushing."); + } + } + auto res = HDF5IOHandlerImpl::flush(params); + + if (old_value.has_value()) + { + herr_t status = H5Pset_dxpl_mpio(m_datasetTransferProperty, *old_value); + VERIFY( + status >= 0, + "[HDF5] Internal error: Failed to reset the global data " + "transfer mode after flushing."); + } + + return res; +} #else #if openPMD_HAVE_MPI diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index e803e1a0f7..5819ba899d 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -302,8 +302,11 @@ TEST_CASE("hdf5_write_test", "[parallel][hdf5]") MPI_Comm_rank(MPI_COMM_WORLD, &mpi_r); auto mpi_size = static_cast(mpi_s); auto mpi_rank = static_cast(mpi_r); - Series o = - Series("../samples/parallel_write.h5", Access::CREATE, MPI_COMM_WORLD); + Series o = Series( + "../samples/parallel_write.h5", + Access::CREATE, + MPI_COMM_WORLD, + "hdf5.independent_stores = false"); o.setAuthor("Parallel HDF5"); ParticleSpecies &e = o.iterations[1].particles["e"]; @@ -322,6 +325,8 @@ TEST_CASE("hdf5_write_test", "[parallel][hdf5]") "hdf5.dataset.chunks = [1]")); e["position"]["x"].storeChunk(position_local, {mpi_rank}, {1}); + o.flush("hdf5.independent_stores = true"); + std::vector positionOffset_global(mpi_size); uint64_t posOff{0}; std::generate( @@ -344,7 +349,7 @@ TEST_CASE("hdf5_write_test", "[parallel][hdf5]") e["positionOffset"]["y"].storeChunk( std::make_unique(3.141592654), {0}, {1}); - o.flush(); + o.flush("hdf5.independent_stores = false"); } TEST_CASE("hdf5_write_test_zero_extent", "[parallel][hdf5]")