From ca21c119029cddf6cb09f45239e6982830561af7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 2 Jul 2024 17:31:27 +0200 Subject: [PATCH 01/56] Preparation work: Refactoring --- src/IO/HDF5/HDF5IOHandler.cpp | 174 +++++++++++++++++++--------------- 1 file changed, 99 insertions(+), 75 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index f09fd5ba4c..03b7ca93a9 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -76,6 +76,26 @@ namespace openPMD } while (0) #endif +constexpr char const *const init_json_shadow_str = &R"( +{ + "dataset": { + "chunks": null + }, + "independent_stores": null +})"[1]; +constexpr char const *dataset_cfg_mask = &R"( +{ + "dataset": { + "chunks": null, + "permanent_filters": null + } +} +)"[1]; +constexpr char const *const flush_cfg_mask = &R"( +{ + "independent_stores": null +})"[1]; + HDF5IOHandlerImpl::HDF5IOHandlerImpl( AbstractIOHandler *handler, bool do_warn_unused_params) : AbstractIOHandlerImpl(handler) @@ -151,23 +171,6 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl( m_config = config["hdf5"]; { - constexpr char const *const init_json_shadow_str = R"( - { - "dataset": { - "chunks": null - }, - "independent_stores": null - })"; - constexpr char const *const dataset_cfg_mask = R"( - { - "dataset": { - "chunks": null - } - })"; - constexpr char const *const flush_cfg_mask = R"( - { - "independent_stores": null - })"; m_global_dataset_config = m_config.json(); json::filterByTemplate( m_global_dataset_config, @@ -466,74 +469,28 @@ void HDF5IOHandlerImpl::createPath( "creation"); } -void HDF5IOHandlerImpl::createDataset( - Writable *writable, Parameter const ¶meters) +namespace { - if (access::readOnly(m_handler->m_backendAccess)) - throw std::runtime_error( - "[HDF5] Creating a dataset in a file opened as read only is not " - "possible."); - - if (parameters.joinedDimension.has_value()) + using chunking_t = std::vector; + struct DatasetParams { - error::throwOperationUnsupportedInBackend( - "HDF5", "Joined Arrays currently only supported in ADIOS2"); - } + std::optional chunking; + bool resizable = false; + }; - if (!writable->written) + auto parse_dataset_config( + json::TracingJSON &config, + std::vector const &dims, + Datatype const d) -> DatasetParams { - /* Sanitize name */ - std::string name = parameters.name; - if (auxiliary::starts_with(name, '/')) - name = auxiliary::replace_first(name, "/", ""); - if (auxiliary::ends_with(name, '/')) - name = auxiliary::replace_last(name, "/", ""); - - std::vector dims; - std::uint64_t num_elements = 1u; - for (auto const &val : parameters.extent) - { - dims.push_back(static_cast(val)); - num_elements *= val; - } - - Datatype d = parameters.dtype; - if (d == Datatype::UNDEFINED) - { - // TODO handle unknown dtype - std::cerr << "[HDF5] Datatype::UNDEFINED caught during dataset " - "creation (serial HDF5)" - << std::endl; - d = Datatype::BOOL; - } - - json::TracingJSON config = [&]() { - auto parsed_config = - parameters.compileJSONConfig( - writable, *m_handler->jsonMatcher, "hdf5"); - if (auto hdf5_config_it = parsed_config.config.find("hdf5"); - hdf5_config_it != parsed_config.config.end()) - { - auto copy = m_global_dataset_config; - json::merge_internal( - copy, hdf5_config_it.value(), /* do_prune = */ true); - hdf5_config_it.value() = std::move(copy); - } - else - { - parsed_config.config["hdf5"] = m_global_dataset_config; - } - return parsed_config; - }(); + DatasetParams res; // general - bool is_resizable_dataset = false; if (config.json().contains("resizable")) { - is_resizable_dataset = config["resizable"].json().get(); + res.resizable = config["resizable"].json().get(); } - using chunking_t = std::vector; using compute_chunking_t = std::variant; @@ -616,6 +573,73 @@ void HDF5IOHandlerImpl::createDataset( }}, std::move(compute_chunking)); + return res; + } +} // namespace + +void HDF5IOHandlerImpl::createDataset( + Writable *writable, Parameter const ¶meters) +{ + if (access::readOnly(m_handler->m_backendAccess)) + throw std::runtime_error( + "[HDF5] Creating a dataset in a file opened as read only is not " + "possible."); + + if (parameters.joinedDimension.has_value()) + { + error::throwOperationUnsupportedInBackend( + "HDF5", "Joined Arrays currently only supported in ADIOS2"); + } + + if (!writable->written) + { + /* Sanitize name */ + std::string name = parameters.name; + if (auxiliary::starts_with(name, '/')) + name = auxiliary::replace_first(name, "/", ""); + if (auxiliary::ends_with(name, '/')) + name = auxiliary::replace_last(name, "/", ""); + + std::vector dims; + std::uint64_t num_elements = 1u; + for (auto const &val : parameters.extent) + { + dims.push_back(static_cast(val)); + num_elements *= val; + } + + Datatype d = parameters.dtype; + if (d == Datatype::UNDEFINED) + { + // TODO handle unknown dtype + std::cerr << "[HDF5] Datatype::UNDEFINED caught during dataset " + "creation (serial HDF5)" + << std::endl; + d = Datatype::BOOL; + } + + json::TracingJSON config = [&]() { + auto parsed_config = + parameters.compileJSONConfig( + writable, *m_handler->jsonMatcher, "hdf5"); + if (auto hdf5_config_it = parsed_config.config.find("hdf5"); + hdf5_config_it != parsed_config.config.end()) + { + auto copy = m_global_dataset_config; + json::merge_internal( + copy, hdf5_config_it.value(), /* do_prune = */ true); + hdf5_config_it.value() = std::move(copy); + } + else + { + parsed_config.config["hdf5"] = m_global_dataset_config; + } + return parsed_config; + }(); + + auto [chunking, is_resizable_dataset] = + parse_dataset_config(config, dims, d); + parameters.warnUnusedParameters( config, "hdf5", From 6706b6655a2976a7fdbb16e19ba25adc0a58b4bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 2 Jul 2024 18:55:10 +0200 Subject: [PATCH 02/56] Basic compression/filtering in HDF5 --- examples/7_extended_write_serial.cpp | 12 ++- src/IO/HDF5/HDF5IOHandler.cpp | 120 +++++++++++++++++++++++++-- 2 files changed, 124 insertions(+), 8 deletions(-) diff --git a/examples/7_extended_write_serial.cpp b/examples/7_extended_write_serial.cpp index e9dd61afec..6fb3b7a21e 100644 --- a/examples/7_extended_write_serial.cpp +++ b/examples/7_extended_write_serial.cpp @@ -146,7 +146,17 @@ int main() d = io::Dataset(dtype, mpiDims); electrons["positionOffset"]["x"].resetDataset(d); - auto dset = io::Dataset(io::determineDatatype(), {2}); + auto dset = io::Dataset( + io::determineDatatype(), + {2}, + R"( + hdf5.dataset.chunks = "auto" + + hdf5.dataset.permanent_filters = [ + {type = "zlib", aggression = 5}, + 2 + ] + )"); electrons.particlePatches["numParticles"].resetDataset(dset); electrons.particlePatches["numParticlesOffset"].resetDataset(dset); diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 03b7ca93a9..f70b7d5b03 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -27,6 +27,7 @@ #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/auxiliary/Variant.hpp" +#include #include #include #include @@ -474,8 +475,19 @@ namespace using chunking_t = std::vector; struct DatasetParams { + struct Zlib + { + unsigned aggression = 1; + }; + using filter_t = std::variant< + // generic + H5Z_filter_t, + // H5Pset_deflate + Zlib>; + std::optional chunking; bool resizable = false; + std::vector filters; }; auto parse_dataset_config( @@ -510,6 +522,19 @@ namespace } }; + auto filter_error = []() { + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters"}, + "Must be either a scalar filter or a vector of filters, " + "where a filter is either an integer ID for the filter or " + "a JSON object identifying a builtin filter."); + }; + auto builtin_filter_error = []() { + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters"}, + R"(A builtin filter is a JSON object with mandatory string type key "type". The only supported filter is currently "zlib", which optionally takes an unsigned integer type key "aggression" (default value 1).)"); + }; + compute_chunking_t compute_chunking = auxiliary::getEnvString("OPENPMD_HDF5_CHUNKS", "auto"); @@ -547,8 +572,75 @@ namespace throw_chunking_error(); } } + + if (datasetConfig.json().contains("permanent_filters")) + { + auto parse_filter = + [&filter_error, &builtin_filter_error]( + auto &filter_config, + auto &&json_accessor) -> DatasetParams::filter_t { + if (json_accessor(filter_config).is_number_integer()) + { + return json_accessor(filter_config) + .template get(); + } + else if (json_accessor(filter_config).is_object()) + { + if (!json_accessor(filter_config).contains("type")) + { + throw builtin_filter_error(); + } + if (auto const &type_config = + json::asLowerCaseStringDynamic( + json_accessor(filter_config["type"])); + !type_config.has_value() || *type_config != "zlib") + { + throw builtin_filter_error(); + } + + DatasetParams::Zlib zlib; + if (json_accessor(filter_config).contains("aggression")) + { + auto const &aggression_config = + json_accessor(filter_config["aggression"]); + if (!aggression_config.is_number_integer()) + { + throw builtin_filter_error(); + } + zlib.aggression = + aggression_config.template get(); + } + return zlib; + } + else + { + throw filter_error(); + } + }; + auto permanent_filters = datasetConfig["permanent_filters"]; + if (permanent_filters.json().is_array()) + { + permanent_filters.declareFullyRead(); + res.filters.reserve(permanent_filters.json().size()); + for (auto const &entry : permanent_filters.json()) + { + res.filters.push_back(parse_filter( + entry, [](auto const &j) -> nlohmann::json const & { + return j; + })); + } + } + else + { + res.filters = {parse_filter( + permanent_filters, + [](auto &&j) -> nlohmann::json const & { + return j.json(); + })}; + } + } } - std::optional chunking = std::visit( + res.chunking = std::visit( auxiliary::overloaded{ [&](chunking_t &&explicitly_specified) -> std::optional { @@ -637,7 +729,7 @@ void HDF5IOHandlerImpl::createDataset( return parsed_config; }(); - auto [chunking, is_resizable_dataset] = + auto [chunking, is_resizable_dataset, filters] = parse_dataset_config(config, dims, d); parameters.warnUnusedParameters( @@ -762,11 +854,25 @@ void HDF5IOHandlerImpl::createDataset( } } - std::string const &compression = ""; // @todo read from JSON - if (!compression.empty()) - std::cerr - << "[HDF5] Compression not yet implemented in HDF5 backend." - << std::endl; + for (auto const &filter : filters) + { + herr_t status = std::visit( + auxiliary::overloaded{ + [&](H5Z_filter_t filter_id) { + return H5Pset_filter( + datasetCreationProperty, filter_id, 0, 0, nullptr); + }, + [&](DatasetParams::Zlib const &zlib) { + return H5Pset_deflate( + datasetCreationProperty, zlib.aggression); + }}, + filter); + VERIFY( + status == 0, + "[HDF5] Internal error: Failed to set filter during dataset " + "creation"); + } + /* { std::vector< std::string > args = auxiliary::split(compression, From 8280eb5c77117d7aff1fce4a234f03815b8f00b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 3 Jul 2024 11:18:41 +0200 Subject: [PATCH 03/56] Configure generic filters via JSON object --- examples/7_extended_write_serial.cpp | 2 +- src/IO/HDF5/HDF5IOHandler.cpp | 205 +++++++++++++++++++-------- 2 files changed, 148 insertions(+), 59 deletions(-) diff --git a/examples/7_extended_write_serial.cpp b/examples/7_extended_write_serial.cpp index 6fb3b7a21e..30d707dfa4 100644 --- a/examples/7_extended_write_serial.cpp +++ b/examples/7_extended_write_serial.cpp @@ -154,7 +154,7 @@ int main() hdf5.dataset.permanent_filters = [ {type = "zlib", aggression = 5}, - 2 + {id = "shuffle"} ] )"); electrons.particlePatches["numParticles"].resetDataset(dset); diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index f70b7d5b03..35b1411f54 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -475,13 +475,19 @@ namespace using chunking_t = std::vector; struct DatasetParams { + struct ByID + { + H5Z_filter_t id = 0; + unsigned int flags = 0; + std::vector c_values; + }; struct Zlib { unsigned aggression = 1; }; using filter_t = std::variant< // generic - H5Z_filter_t, + ByID, // H5Pset_deflate Zlib>; @@ -490,6 +496,140 @@ namespace std::vector filters; }; + template + auto parse_filter(JSON &filter_config, Accessor &&json_accessor) + -> DatasetParams::filter_t + { + auto filter_error = []() { + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters"}, + "Must be either a JSON object or a vector of JSON objects."); + }; + if (!json_accessor(filter_config).is_object()) + { + throw filter_error(); + } + + enum class filter_type + { + ByID, + Zlib + }; + + filter_type type = [&]() -> filter_type { + if (json_accessor(filter_config).contains("type")) + { + auto res = json::asLowerCaseStringDynamic( + json_accessor(filter_config["type"])); + if (!res.has_value()) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "type"}, + "Must be of type string."); + } + using pair_t = std::pair; + std::array filter_types{ + pair_t{"by_id", filter_type::ByID}, + pair_t{"zlib", filter_type::Zlib}}; + for (auto const &[key, res_type] : filter_types) + { + if (*res == key) + { + return res_type; + } + } + std::stringstream error; + error << "Must be one of:"; + for (auto const &pair : filter_types) + { + error << " '" << pair.first << "'"; + } + error << "."; + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "type"}, + error.str()); + } + else + { + return filter_type::ByID; + } + }(); + + switch (type) + { + case filter_type::ByID: { + DatasetParams::ByID byID; + if (!json_accessor(filter_config).contains("id")) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "id"}, + "Required key for selecting a filter by ID."); + } + byID.id = [&]() -> H5Z_filter_t { + auto const &id_config = json_accessor(filter_config["id"]); + using pair_t = std::pair; + std::array filter_types{ + pair_t{"deflate", H5Z_FILTER_DEFLATE}, + pair_t{"shuffle", H5Z_FILTER_SHUFFLE}, + pair_t{"fletcher32", H5Z_FILTER_FLETCHER32}, + pair_t{"szip", H5Z_FILTER_SZIP}, + pair_t{"nbit", H5Z_FILTER_NBIT}, + pair_t{"scaleoffset", H5Z_FILTER_SCALEOFFSET}}; + auto id_error = [&]() { + std::stringstream error; + error + << "Must be either of unsigned integer type or one of:"; + for (auto const &pair : filter_types) + { + error << " '" << pair.first << "'"; + } + error << "."; + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "id"}, + error.str()); + }; + if (id_config.is_number_integer()) + { + return id_config.template get(); + } + auto maybe_string = json::asLowerCaseStringDynamic(id_config); + if (!maybe_string.has_value()) + { + throw id_error(); + } + for (auto const &[key, res_type] : filter_types) + { + if (*maybe_string == key) + { + return res_type; + } + } + throw id_error(); + }(); + return byID; + } + break; + case filter_type::Zlib: { + DatasetParams::Zlib zlib; + if (json_accessor(filter_config).contains("aggression")) + { + auto const &aggression_config = + json_accessor(filter_config["aggression"]); + if (!aggression_config.is_number_integer()) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "aggression"}, + "Must be of unsigned integer type."); + } + zlib.aggression = aggression_config.template get(); + } + return zlib; + } + break; + } + throw std::runtime_error("Unreachable!"); + } + auto parse_dataset_config( json::TracingJSON &config, std::vector const &dims, @@ -522,19 +662,6 @@ namespace } }; - auto filter_error = []() { - return error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters"}, - "Must be either a scalar filter or a vector of filters, " - "where a filter is either an integer ID for the filter or " - "a JSON object identifying a builtin filter."); - }; - auto builtin_filter_error = []() { - return error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters"}, - R"(A builtin filter is a JSON object with mandatory string type key "type". The only supported filter is currently "zlib", which optionally takes an unsigned integer type key "aggression" (default value 1).)"); - }; - compute_chunking_t compute_chunking = auxiliary::getEnvString("OPENPMD_HDF5_CHUNKS", "auto"); @@ -575,48 +702,6 @@ namespace if (datasetConfig.json().contains("permanent_filters")) { - auto parse_filter = - [&filter_error, &builtin_filter_error]( - auto &filter_config, - auto &&json_accessor) -> DatasetParams::filter_t { - if (json_accessor(filter_config).is_number_integer()) - { - return json_accessor(filter_config) - .template get(); - } - else if (json_accessor(filter_config).is_object()) - { - if (!json_accessor(filter_config).contains("type")) - { - throw builtin_filter_error(); - } - if (auto const &type_config = - json::asLowerCaseStringDynamic( - json_accessor(filter_config["type"])); - !type_config.has_value() || *type_config != "zlib") - { - throw builtin_filter_error(); - } - - DatasetParams::Zlib zlib; - if (json_accessor(filter_config).contains("aggression")) - { - auto const &aggression_config = - json_accessor(filter_config["aggression"]); - if (!aggression_config.is_number_integer()) - { - throw builtin_filter_error(); - } - zlib.aggression = - aggression_config.template get(); - } - return zlib; - } - else - { - throw filter_error(); - } - }; auto permanent_filters = datasetConfig["permanent_filters"]; if (permanent_filters.json().is_array()) { @@ -858,9 +943,13 @@ void HDF5IOHandlerImpl::createDataset( { herr_t status = std::visit( auxiliary::overloaded{ - [&](H5Z_filter_t filter_id) { + [&](DatasetParams::ByID const &by_id) { return H5Pset_filter( - datasetCreationProperty, filter_id, 0, 0, nullptr); + datasetCreationProperty, + by_id.id, + by_id.flags, + by_id.c_values.size(), + by_id.c_values.data()); }, [&](DatasetParams::Zlib const &zlib) { return H5Pset_deflate( From aecdd8848b8170e2c745c243cf88f4fc75991693 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 3 Jul 2024 11:40:46 +0200 Subject: [PATCH 04/56] Full support for the set_filter API --- examples/7_extended_write_serial.cpp | 11 ++++- src/IO/HDF5/HDF5IOHandler.cpp | 60 ++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/examples/7_extended_write_serial.cpp b/examples/7_extended_write_serial.cpp index 30d707dfa4..ccebe17dd4 100644 --- a/examples/7_extended_write_serial.cpp +++ b/examples/7_extended_write_serial.cpp @@ -120,6 +120,15 @@ int main() } ] } + }, + "hdf5": { + "dataset": { + "chunks": "auto", + "permanent_filters": { + "id": "fletcher32", + "flags": "optional" + } + } } })END"; d.options = datasetConfig; @@ -154,7 +163,7 @@ int main() hdf5.dataset.permanent_filters = [ {type = "zlib", aggression = 5}, - {id = "shuffle"} + {id = "shuffle", "flags" = "MANDATORY"} ] )"); electrons.particlePatches["numParticles"].resetDataset(dset); diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 35b1411f54..1f82b0cb8a 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -28,6 +28,7 @@ #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/auxiliary/Variant.hpp" #include +#include #include #include #include @@ -606,6 +607,65 @@ namespace } throw id_error(); }(); + byID.flags = [&]() -> unsigned int { + if (!json_accessor(filter_config).contains("flags")) + { + return 0; + } + auto const &flag_config = json_accessor(filter_config["flags"]); + using pair_t = std::pair; + std::array filter_types{ + pair_t{"optional", H5Z_FLAG_OPTIONAL}, + pair_t{"mandatory", H5Z_FLAG_MANDATORY}}; + auto flag_error = [&]() { + std::stringstream error; + error + << "Must be either of unsigned integer type or one of:"; + for (auto const &pair : filter_types) + { + error << " '" << pair.first << "'"; + } + error << "."; + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "flags"}, + error.str()); + }; + if (flag_config.is_number_integer()) + { + return flag_config.template get(); + } + auto maybe_string = json::asLowerCaseStringDynamic(flag_config); + if (!maybe_string.has_value()) + { + throw flag_error(); + } + for (auto const &[key, res_type] : filter_types) + { + if (*maybe_string == key) + { + return res_type; + } + } + throw flag_error(); + }(); + if (json_accessor(filter_config).contains("c_values")) + { + auto const &c_values_config = + json_accessor(filter_config["c_values"]); + try + { + + byID.c_values = + c_values_config + .template get>(); + } + catch (nlohmann::json::type_error const &) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "c_values"}, + "Must be an array of unsigned integers."); + } + } return byID; } break; From 2154e8f9cc98b67e0183fd641f7007df23d05b70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 3 Jul 2024 12:16:33 +0200 Subject: [PATCH 05/56] Fix: captured structured bindings are a C++20 extension --- src/IO/HDF5/HDF5IOHandler.cpp | 36 ++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 1f82b0cb8a..e844d7b5dd 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -961,25 +961,27 @@ void HDF5IOHandlerImpl::createDataset( { if (chunking->size() != parameters.extent.size()) { - std::string chunking_printed = [&]() { - if (chunking->empty()) - { - return std::string("[]"); - } - else - { - std::stringstream s; - auto it = chunking->begin(); - auto end = chunking->end(); - s << '[' << *it++; - for (; it != end; ++it) + // captured structured bindings are a C++20 extension + std::string chunking_printed = + [&, &captured_chunking = chunking]() { + if (captured_chunking->empty()) { - s << ", " << *it; + return std::string("[]"); } - s << ']'; - return s.str(); - } - }(); + else + { + std::stringstream s; + auto it = captured_chunking->begin(); + auto end = captured_chunking->end(); + s << '[' << *it++; + for (; it != end; ++it) + { + s << ", " << *it; + } + s << ']'; + return s.str(); + } + }(); std::cerr << "[HDF5] Chunking for dataset '" << name << "' was specified as " << chunking_printed << ", but dataset has dimensionality " From e9561280c7f219afd3b3d3d8d9b6542a91ac03be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 3 Jul 2024 12:26:00 +0200 Subject: [PATCH 06/56] Refactoring to satisfy the Github bot --- src/IO/HDF5/HDF5IOHandler.cpp | 263 +++++++++++++++++----------------- 1 file changed, 135 insertions(+), 128 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index e844d7b5dd..7312c37b10 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -497,6 +497,137 @@ namespace std::vector filters; }; + template + auto parse_filter_by_id(JSON &filter_config, Accessor &&json_accessor) + -> DatasetParams::ByID + { + DatasetParams::ByID byID; + if (!json_accessor(filter_config).contains("id")) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "id"}, + "Required key for selecting a filter by ID."); + } + byID.id = [&]() -> H5Z_filter_t { + auto const &id_config = json_accessor(filter_config["id"]); + using pair_t = std::pair; + std::array filter_types{ + pair_t{"deflate", H5Z_FILTER_DEFLATE}, + pair_t{"shuffle", H5Z_FILTER_SHUFFLE}, + pair_t{"fletcher32", H5Z_FILTER_FLETCHER32}, + pair_t{"szip", H5Z_FILTER_SZIP}, + pair_t{"nbit", H5Z_FILTER_NBIT}, + pair_t{"scaleoffset", H5Z_FILTER_SCALEOFFSET}}; + auto id_error = [&]() { + std::stringstream error; + error << "Must be either of unsigned integer type or one of:"; + for (auto const &pair : filter_types) + { + error << " '" << pair.first << "'"; + } + error << "."; + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "id"}, + error.str()); + }; + if (id_config.is_number_integer()) + { + return id_config.template get(); + } + auto maybe_string = json::asLowerCaseStringDynamic(id_config); + if (!maybe_string.has_value()) + { + throw id_error(); + } + for (auto const &[key, res_type] : filter_types) + { + if (*maybe_string == key) + { + return res_type; + } + } + throw id_error(); + }(); + byID.flags = [&]() -> unsigned int { + if (!json_accessor(filter_config).contains("flags")) + { + return 0; + } + auto const &flag_config = json_accessor(filter_config["flags"]); + using pair_t = std::pair; + std::array filter_types{ + pair_t{"optional", H5Z_FLAG_OPTIONAL}, + pair_t{"mandatory", H5Z_FLAG_MANDATORY}}; + auto flag_error = [&]() { + std::stringstream error; + error << "Must be either of unsigned integer type or one of:"; + for (auto const &pair : filter_types) + { + error << " '" << pair.first << "'"; + } + error << "."; + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "flags"}, + error.str()); + }; + if (flag_config.is_number_integer()) + { + return flag_config.template get(); + } + auto maybe_string = json::asLowerCaseStringDynamic(flag_config); + if (!maybe_string.has_value()) + { + throw flag_error(); + } + for (auto const &[key, res_type] : filter_types) + { + if (*maybe_string == key) + { + return res_type; + } + } + throw flag_error(); + }(); + if (json_accessor(filter_config).contains("c_values")) + { + auto const &c_values_config = + json_accessor(filter_config["c_values"]); + try + { + + byID.c_values = + c_values_config.template get>(); + } + catch (nlohmann::json::type_error const &) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "c_values"}, + "Must be an array of unsigned integers."); + } + } + return byID; + } + + template + auto parse_filter_zlib(JSON &filter_config, Accessor &&json_accessor) + -> DatasetParams::Zlib + { + DatasetParams::Zlib zlib; + if (json_accessor(filter_config).contains("aggression")) + { + auto const &aggression_config = + json_accessor(filter_config["aggression"]); + if (!aggression_config.is_number_integer()) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "aggression"}, + "Must be of unsigned integer type."); + } + zlib.aggression = aggression_config.template get(); + } + return zlib; + } + template auto parse_filter(JSON &filter_config, Accessor &&json_accessor) -> DatasetParams::filter_t @@ -558,134 +689,10 @@ namespace switch (type) { - case filter_type::ByID: { - DatasetParams::ByID byID; - if (!json_accessor(filter_config).contains("id")) - { - throw error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters", "id"}, - "Required key for selecting a filter by ID."); - } - byID.id = [&]() -> H5Z_filter_t { - auto const &id_config = json_accessor(filter_config["id"]); - using pair_t = std::pair; - std::array filter_types{ - pair_t{"deflate", H5Z_FILTER_DEFLATE}, - pair_t{"shuffle", H5Z_FILTER_SHUFFLE}, - pair_t{"fletcher32", H5Z_FILTER_FLETCHER32}, - pair_t{"szip", H5Z_FILTER_SZIP}, - pair_t{"nbit", H5Z_FILTER_NBIT}, - pair_t{"scaleoffset", H5Z_FILTER_SCALEOFFSET}}; - auto id_error = [&]() { - std::stringstream error; - error - << "Must be either of unsigned integer type or one of:"; - for (auto const &pair : filter_types) - { - error << " '" << pair.first << "'"; - } - error << "."; - return error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters", "id"}, - error.str()); - }; - if (id_config.is_number_integer()) - { - return id_config.template get(); - } - auto maybe_string = json::asLowerCaseStringDynamic(id_config); - if (!maybe_string.has_value()) - { - throw id_error(); - } - for (auto const &[key, res_type] : filter_types) - { - if (*maybe_string == key) - { - return res_type; - } - } - throw id_error(); - }(); - byID.flags = [&]() -> unsigned int { - if (!json_accessor(filter_config).contains("flags")) - { - return 0; - } - auto const &flag_config = json_accessor(filter_config["flags"]); - using pair_t = std::pair; - std::array filter_types{ - pair_t{"optional", H5Z_FLAG_OPTIONAL}, - pair_t{"mandatory", H5Z_FLAG_MANDATORY}}; - auto flag_error = [&]() { - std::stringstream error; - error - << "Must be either of unsigned integer type or one of:"; - for (auto const &pair : filter_types) - { - error << " '" << pair.first << "'"; - } - error << "."; - return error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters", "flags"}, - error.str()); - }; - if (flag_config.is_number_integer()) - { - return flag_config.template get(); - } - auto maybe_string = json::asLowerCaseStringDynamic(flag_config); - if (!maybe_string.has_value()) - { - throw flag_error(); - } - for (auto const &[key, res_type] : filter_types) - { - if (*maybe_string == key) - { - return res_type; - } - } - throw flag_error(); - }(); - if (json_accessor(filter_config).contains("c_values")) - { - auto const &c_values_config = - json_accessor(filter_config["c_values"]); - try - { - - byID.c_values = - c_values_config - .template get>(); - } - catch (nlohmann::json::type_error const &) - { - throw error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters", "c_values"}, - "Must be an array of unsigned integers."); - } - } - return byID; - } - break; - case filter_type::Zlib: { - DatasetParams::Zlib zlib; - if (json_accessor(filter_config).contains("aggression")) - { - auto const &aggression_config = - json_accessor(filter_config["aggression"]); - if (!aggression_config.is_number_integer()) - { - throw error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters", "aggression"}, - "Must be of unsigned integer type."); - } - zlib.aggression = aggression_config.template get(); - } - return zlib; - } - break; + case filter_type::ByID: + return parse_filter_by_id(filter_config, json_accessor); + case filter_type::Zlib: + return parse_filter_zlib(filter_config, json_accessor); } throw std::runtime_error("Unreachable!"); } From 48d1fea29e1711fa2b5c78723f859ba44c7ce314 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 3 Jul 2024 12:29:06 +0200 Subject: [PATCH 07/56] Fix includes --- src/IO/HDF5/HDF5IOHandler.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 7312c37b10..757c018b41 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -27,8 +27,6 @@ #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/auxiliary/Variant.hpp" -#include -#include #include #include #include From 2c28a9babff0d5fe978565cd1a536e9ff64292c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 3 Jul 2024 14:07:30 +0200 Subject: [PATCH 08/56] Switch to JSON config for NVidia compiler's benefit --- examples/7_extended_write_serial.cpp | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/examples/7_extended_write_serial.cpp b/examples/7_extended_write_serial.cpp index ccebe17dd4..42b689d4fe 100644 --- a/examples/7_extended_write_serial.cpp +++ b/examples/7_extended_write_serial.cpp @@ -159,13 +159,23 @@ int main() io::determineDatatype(), {2}, R"( - hdf5.dataset.chunks = "auto" - - hdf5.dataset.permanent_filters = [ - {type = "zlib", aggression = 5}, - {id = "shuffle", "flags" = "MANDATORY"} - ] - )"); + { + "hdf5": { + "dataset": { + "chunks": "auto", + "permanent_filters": [ + { + "aggression": 5, + "type": "zlib" + }, + { + "flags": "MANDATORY", + "id": "shuffle" + } + ] + } + } + })"); electrons.particlePatches["numParticles"].resetDataset(dset); electrons.particlePatches["numParticlesOffset"].resetDataset(dset); From cc763882615fdc54aa72661afb9af249fb190470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 9 Dec 2024 12:15:46 +0100 Subject: [PATCH 09/56] Verbose CI debugging lets goo --- test/ParallelIOTest.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index 84d746aeb7..4d55e7be62 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -1898,8 +1898,12 @@ void append_mode( TEST_CASE("append_mode", "[serial]") { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); for (auto const &t : testedFileExtensions()) { + std::cout << "RANK " << rank << " ABOUT TO TEST '" << t << "'" + << std::endl; std::string jsonConfigOld = R"END( { "adios2": From ac30fd7a60e879f40fab5cb9c67b8da8f35eef1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 9 Dec 2024 12:41:12 +0100 Subject: [PATCH 10/56] Revert "Verbose CI debugging lets goo" This reverts commit abefc3addc863b01218180a6fd59adad7d872177. --- test/ParallelIOTest.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index 4d55e7be62..84d746aeb7 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -1898,12 +1898,8 @@ void append_mode( TEST_CASE("append_mode", "[serial]") { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); for (auto const &t : testedFileExtensions()) { - std::cout << "RANK " << rank << " ABOUT TO TEST '" << t << "'" - << std::endl; std::string jsonConfigOld = R"END( { "adios2": From 773bf4af4433c39d10fe7b9e8534fb5a86c49e08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 23 Jul 2025 16:11:32 +0200 Subject: [PATCH 11/56] Use Blosc2 filter not yet integrated into CI --- examples/7_extended_write_serial.cpp | 75 +++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/examples/7_extended_write_serial.cpp b/examples/7_extended_write_serial.cpp index 42b689d4fe..3dfd7808d7 100644 --- a/examples/7_extended_write_serial.cpp +++ b/examples/7_extended_write_serial.cpp @@ -1,12 +1,40 @@ #include -#include +#if __has_include() +#include +#define OPENPMD_USE_BLOSC2_FILTER 1 +#else +#define OPENPMD_USE_BLOSC2_FILTER 0 +#endif + #include +#include +#include int main() { namespace io = openPMD; +#if OPENPMD_USE_BLOSC2_FILTER + /* + * This registers the Blosc2 plugin from + * https://github.com/Blosc/HDF5-Blosc2 as a demonstration on how to + * activate and configure dynamic HDF5 filter plugins through openPMD. + */ + + char *version, *date; + int r = register_blosc2(&version, &date); + if (r < 1) + { + throw std::runtime_error("Unable to register Blosc2 plugin with HDF5."); + } + else + { + std::cout << "Blosc2 plugin registered in version " << version + << " and date " << date << "." << std::endl; + } +#endif + { auto f = io::Series( "working/directory/2D_simData.h5", @@ -91,7 +119,7 @@ int main() } io::Mesh mesh = cur_it.meshes["lowRez_2D_field"]; - mesh.setAxisLabels({"x", "y"}); + mesh.setAxisLabels({"x", "y", "z"}); // data is assumed to reside behind a pointer as a contiguous // column-major array shared data ownership during IO is indicated with @@ -134,6 +162,49 @@ int main() d.options = datasetConfig; mesh["x"].resetDataset(d); +#if OPENPMD_USE_BLOSC2_FILTER + /* + * FILTER_BLOSC2 resolves to 32026, the permanent plugin ID registered + * with the HDF Group. Plugin-specific options are given via c_values, + * refer to the specific plugin's documentation. For the Blosc2 plugin, + * parameters 0, 1, 2 and 3 are reserved. Parameter 4 is the compression + * level, parameter 5 is a boolean for activating shuffling and + * parameter 6 denotes the compression method. + */ + d.options = R"END( +{ + "adios2": { + "dataset": { + "operators": [ + { + "type": "zlib", + "parameters": { + "clevel": 9 + } + } + ] + } + }, + "hdf5": { + "dataset": { + "chunks": "auto", + "permanent_filters": { + "id": )END" + + std::to_string(FILTER_BLOSC2) + R"END(, + "flags": "mandatory", + "c_values": [0, 0, 0, 0, 4, 1, )END" + + std::to_string(BLOSC_ZSTD) + R"END(] + } + } + } +})END"; +#endif + d.extent = {500, 500}; + mesh["z"].resetDataset(d); + auto span = mesh["z"].storeChunk({0, 0}, {500, 500}); + auto span_data = span.currentBuffer(); + std::iota(span_data.begin(), span_data.end(), 41.); + io::ParticleSpecies electrons = cur_it.particles["electrons"]; io::Extent mpiDims{4}; From 853ea34bdb1c6cebea553e2c0758be3f43e3cbfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 24 Jul 2025 14:32:10 +0200 Subject: [PATCH 12/56] Add compression example --- CMakeLists.txt | 1 + examples/15_compression.cpp | 276 +++++++++++++++++++++++++++ examples/7_extended_write_serial.cpp | 10 +- src/IO/HDF5/HDF5IOHandler.cpp | 44 ++--- 4 files changed, 294 insertions(+), 37 deletions(-) create mode 100644 examples/15_compression.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 11d09e498e..cebd3a6bdf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -718,6 +718,7 @@ set(openPMD_EXAMPLE_NAMES 12_span_write 13_write_dynamic_configuration 14_toml_template + 15_compression ) set(openPMD_PYTHON_EXAMPLE_NAMES 2_read_serial diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp new file mode 100644 index 0000000000..74cf48f60e --- /dev/null +++ b/examples/15_compression.cpp @@ -0,0 +1,276 @@ +/* Copyright 2025 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ + +#include + +#if openPMD_HAVE_HDF5 && __has_include() +#include +#define OPENPMD_USE_BLOSC2_FILTER 1 +#else +#define OPENPMD_USE_BLOSC2_FILTER 0 +#endif + +#include +#include +#include + +void init_blosc_for_hdf5() +{ +#if OPENPMD_USE_BLOSC2_FILTER + /* + * This registers the Blosc2 plugin from + * https://github.com/Blosc/HDF5-Blosc2 as a demonstration on how to + * activate and configure dynamic HDF5 filter plugins through openPMD. + */ + + char *version, *date; + int r = register_blosc2(&version, &date); + if (r < 1) + { + throw std::runtime_error("Unable to register Blosc2 plugin with HDF5."); + } + else + { + std::cout << "Blosc2 plugin registered in version '" << version + << "' and date '" << date << "'." << std::endl; + } +#endif +} + +void write(std::string const &filename, std::string const &config) +{ + using namespace openPMD; + std::cout << "Config for '" << filename << "' as JSON:\n" + << json::merge(config, "{}") << "\n\n"; + Series series( + "../samples/compression/" + filename, Access::CREATE_LINEAR, config); + + for (size_t i = 0; i < 10; ++i) + { + auto ¤t_iteration = series.snapshots()[i]; + + // First, write an E mesh. + auto &E = current_iteration.meshes["E"]; + E.setAxisLabels({"x", "y"}); + for (auto const &dim : {"x", "y"}) + { + auto &component = E[dim]; + component.resetDataset({Datatype::FLOAT, {10, 10}}); + auto buffer_view = + component.storeChunk({0, 0}, {10, 10}).currentBuffer(); + // Now fill the prepared buffer with some nonsense data. + std::iota(buffer_view.begin(), buffer_view.end(), i * 100); + } + + // Now, write some e particles. + auto &e = current_iteration.particles["e"]; + for (auto const &dim : {"x", "y"}) + { + // Do not bother with a positionOffset + auto &position_offset = e["positionOffset"][dim]; + position_offset.makeConstant(0); + + auto &position = e["position"][dim]; + position.resetDataset({Datatype::FLOAT, {100}}); + auto buffer_view = + position.storeChunk({0}, {100}).currentBuffer(); + // Now fill the prepared buffer with some nonsense data. + std::iota(buffer_view.begin(), buffer_view.end(), i * 100); + } + } +} + +int main() +{ + init_blosc_for_hdf5(); + + // Backend specific configuration can be given in either JSON or TOML. + // We will stick with TOML in this example, since it allows inline comments + // and remains more legible for larger configurations. + // If you are interested in the configurations as JSON, run the example and + // their JSON equivalents will be printed to stdout. + +#if openPMD_HAVE_ADIOS2 + // We start with two examples for ADIOS2. + std::string const simple_adios2_config = R"( + + # Backend can either be inferred from the filename ending, or specified + # explicitly. In the latter case, the filename ending can be given as + # a wildcard %E, openPMD will then pick a default ending. + backend = "adios2" + + # ADIOS2 supports adding multiple operators to a variable, hence we + # specify a list of operators here (using TOML's double bracket syntax). + # How much sense this makes depends on the specific operators in use. + + [[adios2.dataset.operators]] + type = "bzip2" + parameters.clevel = 9 # The available parameters depend + # on the operator. + # Here, we specify zlib's compression level. + )"; + write("adios2_with_zlib.%E", simple_adios2_config); + + // The compression can also be specified per-dataset. + // For more details, also check: + // https://openpmd-api.readthedocs.io/en/latest/details/backendconfig.html#dataset-specific-configuration + + // This example will demonstrate the use of pattern matching. + // adios2.dataset is now a list of dataset configurations. The specific + // configuration to be used for a dataset will be determined by matching + // the dataset name against the patterns specified by the 'select' keys. + std::string const extended_adios2_config = R"( + backend = "adios2" + + [[adios2.dataset]] + # This uses egrep-type regular expressions. + select = "meshes/.*" + # Now, specify the operators list again. Let's use Blosc for this. + [[adios2.dataset.cfg.operators]] + type = "blosc" + parameters.doshuffle = "BLOSC_BITSHUFFLE" + parameters.clevel = 1 + + # Now, configure the particles. + [[adios2.dataset]] + # The match can either be against the path within the containing + # Iteration (e.g. 'meshes/E/x', as above) or (as in this example), + # against the full path (e.g. '/data/0/particles/e/position/x'). + # In this example, completely deactivate compression specifically for + # 'particles/e/position/x'. All other particle datasets will + # fall back to the default configuration specified below. + # Be careful when specifying compression per-Iteration. While this + # syntax fundamentally allows doing that, compressions once specified + # on an ADIOS2 variable will not be removed again. + # Since variable-encoding reuses ADIOS2 variables from previous + # Iterations, the compression configuration of the first Iteration will + # leak into all subsequent Iterations. + select = "/data/[0-9]*/particles/e/position/x" + cfg.operators = [] + + # Now, the default configuration. + # In general, the dataset configurations are matched top-down, going for + # the first matching configuration. So, a default configuration could + # theoretically be specified by emplacing a catch-all pattern + # (regex: ".*") as the last option. + # However, we also define an explicit shorthand for specifying default + # configurations: Just omit the 'select' key. This special syntax is + # understood as the default configuration no matter where in the list it + # is emplaced, and it allows the backends to initialize the default + # configuration globally, instead of applying it selectively to each + # dataset that matches a catch-all pattern. + [[adios2.dataset]] + [[adios2.dataset.cfg.operators]] + type = "bzip2" + parameters.clevel = 2 + )"; + write( + "adios2_with_dataset_specific_configurations.%E", + extended_adios2_config); +#endif // openPMD_HAVE_ADIOS2 + +#if openPMD_HAVE_HDF5 + // Now, let's continue with HDF5. + // HDF5 supports compression via so-called filters. These can be permanent + // (applied to an entire dataset) and transient (applied to individual I/O + // operations). The openPMD-api currently supports permanent filters. Refer + // also to https://web.ics.purdue.edu/~aai/HDF5/html/Filters.html. + + // Filters are additionally distinguished by how tightly they integrate with + // HDF5. The most tightly-integrated filter is Zlib, which has its own API + // calls and hence also a special JSON/TOML configuration in openPMD: + + std::string const hdf5_zlib_config = R"( + backend = "hdf5" + + [hdf5.dataset.permanent_filters] + type = "zlib" # mandatory parameter + aggression = 5 # optional, defaults to 1 + )"; + write("hdf5_zlib.%E", hdf5_zlib_config); + + // All other filters have a common API and are identified by global IDs + // registered with the HDF Group. More details can be found in the + // H5Zpublic.h header. That header predefines a small number of filter IDs. + // These are directly supported by the openPMD-api: deflate, shuffle, + // fletcher32, szip, nbit, scaleoffset. + + std::string const hdf5_predefined_filter_ids = R"( + backend = "hdf5" + + [hdf5.dataset.permanent_filters] + id = "fletcher32" # mandatory parameter + # A filter can be applied as mandatory (execution should abort if the + # filter cannot be applied) or as optional (execution should ignore when + # the filter cannot be applied). + flags = "mandatory" # optional parameter + type = "by_id" # optional parameter for filters identified by ID, + # mandatory only for zlib (see above) + )"; + write("hdf5_predefined_filter_id.%E", hdf5_predefined_filter_ids); + + // Just like ADIOS2 with their operations, also HDF5 supports adding + // multiple filters into a filter pipeline. The permanent_filters key can + // hence also be given as a list. + + std::string const hdf5_filter_pipeline = R"( + backend = "hdf5" + + # pipeline consisting of two filters + + [[hdf5.dataset.permanent_filters]] + type = "zlib" + aggression = 5 + + [[hdf5.dataset.permanent_filters]] + id = "shuffle" + flags = "mandatory" + )"; + write("hdf5_filter_pipeline.%E", hdf5_filter_pipeline); + + // For non-predefined IDs, the ID must be given as a number. This example + // uses the Blosc2 filter available from + // https://github.com/Blosc/HDF5-Blosc2, with the permanent plugin ID 32026 + // (defined in blosc2_filter.h as FILTER_BLOSC2). Generic filters referenced + // by ID can be configured via the cd_values field. This field is an array + // of unsigned integers and plugin-specific interpretation. For the Blosc2 + // plugin, indexes 0, 1, 2 and 3 are reserved. index 4 is the compression + // level, index 5 is a boolean for activating shuffling and index 6 denotes + // the compression method. +#if OPENPMD_USE_BLOSC2_FILTER + std::stringstream hdf5_blosc_filter; + hdf5_blosc_filter << R"( + backend = "hdf5" + + [hdf5.dataset] + chunks = "auto" + + [hdf5.dataset.permanent_filters] + id = )" << FILTER_BLOSC2 + << R"( + flags = "mandatory" + cd_values = [0, 0, 0, 0, 4, 1, )" + << BLOSC_ZSTD << R"(] + )"; + write("hdf5_blosc_filter.%E", hdf5_blosc_filter.str()); +#endif // OPENPMD_USE_BLOSC2_FILTER +#endif // openPMD_HAVE_HDF5 +} diff --git a/examples/7_extended_write_serial.cpp b/examples/7_extended_write_serial.cpp index 3dfd7808d7..59e6fed19a 100644 --- a/examples/7_extended_write_serial.cpp +++ b/examples/7_extended_write_serial.cpp @@ -165,11 +165,11 @@ int main() #if OPENPMD_USE_BLOSC2_FILTER /* * FILTER_BLOSC2 resolves to 32026, the permanent plugin ID registered - * with the HDF Group. Plugin-specific options are given via c_values, + * with the HDF Group. Plugin-specific options are given via cd_values, * refer to the specific plugin's documentation. For the Blosc2 plugin, - * parameters 0, 1, 2 and 3 are reserved. Parameter 4 is the compression - * level, parameter 5 is a boolean for activating shuffling and - * parameter 6 denotes the compression method. + * indexes 0, 1, 2 and 3 are reserved. Index 4 is the compression + * level, index 5 is a boolean for activating shuffling and + * index 6 denotes the compression method. */ d.options = R"END( { @@ -192,7 +192,7 @@ int main() "id": )END" + std::to_string(FILTER_BLOSC2) + R"END(, "flags": "mandatory", - "c_values": [0, 0, 0, 0, 4, 1, )END" + + "cd_values": [0, 0, 0, 0, 4, 1, )END" + std::to_string(BLOSC_ZSTD) + R"END(] } } diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 757c018b41..c723544a50 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -79,7 +79,8 @@ namespace openPMD constexpr char const *const init_json_shadow_str = &R"( { "dataset": { - "chunks": null + "chunks": null, + "permanent_filters": null }, "independent_stores": null })"[1]; @@ -181,6 +182,7 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl( auto init_json_shadow = nlohmann::json::parse(init_json_shadow_str); json::merge_internal( m_config.getShadow(), init_json_shadow, /* do_prune = */ false); + m_config["dataset"]["permanent_filters"].declareFullyRead(); } // unused params @@ -478,7 +480,7 @@ namespace { H5Z_filter_t id = 0; unsigned int flags = 0; - std::vector c_values; + std::vector cd_values; }; struct Zlib { @@ -586,20 +588,20 @@ namespace } throw flag_error(); }(); - if (json_accessor(filter_config).contains("c_values")) + if (json_accessor(filter_config).contains("cd_values")) { - auto const &c_values_config = - json_accessor(filter_config["c_values"]); + auto const &cd_values_config = + json_accessor(filter_config["cd_values"]); try { - byID.c_values = - c_values_config.template get>(); + byID.cd_values = + cd_values_config.template get>(); } catch (nlohmann::json::type_error const &) { throw error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters", "c_values"}, + {"hdf5", "dataset", "permanent_filters", "cd_values"}, "Must be an array of unsigned integers."); } } @@ -1015,8 +1017,8 @@ void HDF5IOHandlerImpl::createDataset( datasetCreationProperty, by_id.id, by_id.flags, - by_id.c_values.size(), - by_id.c_values.data()); + by_id.cd_values.size(), + by_id.cd_values.data()); }, [&](DatasetParams::Zlib const &zlib) { return H5Pset_deflate( @@ -1029,28 +1031,6 @@ void HDF5IOHandlerImpl::createDataset( "creation"); } - /* - { - std::vector< std::string > args = auxiliary::split(compression, - ":"); std::string const& format = args[0]; if( (format == "zlib" || - format == "gzip" || format == "deflate") - && args.size() == 2 ) - { - status = H5Pset_deflate(datasetCreationProperty, - std::stoi(args[1])); VERIFY(status == 0, "[HDF5] Internal error: Failed - to set deflate compression during dataset creation"); } else if( format - == "szip" || format == "nbit" || format == "scaleoffset" ) std::cerr << - "[HDF5] Compression format " << format - << " not yet implemented. Data will not be - compressed!" - << std::endl; - else - std::cerr << "[HDF5] Compression format " << format - << " unknown. Data will not be compressed!" - << std::endl; - } - */ - GetH5DataType getH5DataType({ {typeid(bool).name(), m_H5T_BOOL_ENUM}, {typeid(std::complex).name(), m_H5T_CFLOAT}, From 2beab51fe0f054024f43da45621093890c06a2e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 29 Jul 2025 18:16:18 +0200 Subject: [PATCH 13/56] Add HDF5-Blosc2 to some Linux workflow --- .../dependencies/install_hdf5_blosc2 | 19 +++++++++++++++++++ .github/workflows/linux.yml | 4 ++++ 2 files changed, 23 insertions(+) create mode 100755 .github/workflows/dependencies/install_hdf5_blosc2 diff --git a/.github/workflows/dependencies/install_hdf5_blosc2 b/.github/workflows/dependencies/install_hdf5_blosc2 new file mode 100755 index 0000000000..398c7a32a1 --- /dev/null +++ b/.github/workflows/dependencies/install_hdf5_blosc2 @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -eu -o pipefail +cd /opt + +version="v2.0.0" +patch_url="https://github.com/franzpoeschel/HDF5-Blosc2/commit/55b1feea7bf18a539dfbe4413a920bc9570aa0c6.diff" +patch_path="$(pwd)/hdf5_blosc2_cmake_fixes.diff" + +git config user.email "tooling@tools.com" +git config user.name "Tooling" + +curl -sLo "$patch_path" "$patch_url" +git clone -b "$version" https://github.com/Blosc/HDF5-Blosc2 +cd HDF5-Blosc2 +git am "$patch_path" + +cmake . -B build -DCMAKE_INSTALL_PREFIX="/usr/local" +cmake --build build --parallel 4 --target install diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 37b5331ddf..a84b0e134c 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -241,6 +241,10 @@ jobs: sudo ln -s "$(which cmake)" /usr/bin/cmake eval $(spack env activate --sh .github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/) spack install + ./.github/workflows/dependencies/install_hdf5_blosc2 + # the HDF5 Blosc2 plugin exports no configuration files + # for build systems, so we will need to link it manually + export LDFLAGS="-lblosc2_filter $LDFLAGS" share/openPMD/download_samples.sh build cmake -S . -B build \ From 0157cf74bd66fc5b8afe455f21c91a024d3cb995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 29 Jul 2025 21:31:03 +0200 Subject: [PATCH 14/56] Update .github/workflows/dependencies/install_hdf5_blosc2 --- .github/workflows/dependencies/install_hdf5_blosc2 | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/dependencies/install_hdf5_blosc2 b/.github/workflows/dependencies/install_hdf5_blosc2 index 398c7a32a1..d8e12e4699 100755 --- a/.github/workflows/dependencies/install_hdf5_blosc2 +++ b/.github/workflows/dependencies/install_hdf5_blosc2 @@ -1,6 +1,7 @@ #!/usr/bin/env bash set -eu -o pipefail +set -x cd /opt version="v2.0.0" From d13708f248fc3ea4d5260b4dbe045625994b0847 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 29 Jul 2025 19:13:28 +0200 Subject: [PATCH 15/56] Add Python example --- CMakeLists.txt | 1 + examples/15_compression.py | 245 +++++++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+) create mode 100644 examples/15_compression.py diff --git a/CMakeLists.txt b/CMakeLists.txt index cebd3a6bdf..3811ca7ab3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -735,6 +735,7 @@ set(openPMD_PYTHON_EXAMPLE_NAMES 11_particle_dataframe 12_span_write 13_write_dynamic_configuration + 15_compression ) if(openPMD_USE_INVASIVE_TESTS) diff --git a/examples/15_compression.py b/examples/15_compression.py new file mode 100644 index 0000000000..91cba9eaaa --- /dev/null +++ b/examples/15_compression.py @@ -0,0 +1,245 @@ +# Copyright 2025 Franz Poeschel +# +# This file is part of openPMD-api. +# +# openPMD-api is free software: you can redistribute it and/or modify +# it under the terms of of either the GNU General Public License or +# the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# openPMD-api is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License and the GNU Lesser General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# and the GNU Lesser General Public License along with openPMD-api. +# If not, see . +# + +import numpy as np +import openpmd_api as opmd + +try: + import hdf5plugin + + HAS_HDF5_PLUGIN = True +except ImportError: + HAS_HDF5_PLUGIN = False + + +def write(filename, config): + + series = opmd.Series( + f"../samples/compression_python/{filename}", + opmd.Access.create_linear, + config, + ) + + for i in range(10): + current_iteration = series.snapshots()[i] + + # First, write an E mesh. + E = current_iteration.meshes["E"] + E.axis_labels = ["x", "y"] + for dim in ["x", "y"]: + component = E[dim] + component.reset_dataset(opmd.Dataset(np.dtype("float"), [10, 10])) + component[:, :] = np.reshape( + np.arange(i * 100, (i + 1) * 100), [10, 10] + ) + + # Now, write some e particles. + e = current_iteration.particles["e"] + for dim in ["x", "y"]: + # Do not bother with a positionOffset + position_offset = e["positionOffset"][dim] + position_offset.make_constant(0) + + position = e["position"][dim] + position.reset_dataset(opmd.Dataset(np.dtype("float"), [100])) + buffer_view = position.store_chunk([0], [100]).current_buffer() + position[:] = np.arange(i * 100, (i + 1) * 100) + + +def main(): + + # We start with two examples for ADIOS2. + if "adios2" in opmd.variants and opmd.variants["adios2"]: + simple_adios2_config = { + # Backend can either be inferred from the filename ending, or + # specified explicitly. In the latter case, the filename ending can + # be given as a wildcard %E, openPMD will then pick a + # default ending. + "backend": "adios2", + "adios2": { + "dataset": { + # ADIOS2 supports adding multiple operators to a variable, + # hence we specify a list of operators here. + # How much sense this makes depends on the specific + # operators in use. + "operators": [ + { + "type": "bzip2", + "parameters": { + # The available parameters depend + # on the operator. + # Here, we specify bzip2's compression level. + "clevel": 9 + }, + } + ] + } + }, + } + write("adios2_with_bzip2.%E", simple_adios2_config) + + extended_adios2_config = { + "backend": "adios2", + "adios2": { + "dataset": [ + { + # This uses egrep-type regular expressions. + "select": "meshes/.*", + # Inside the cfg key, specify the actual config to + # be forwarded to the ADIOS2 dataset. + # So, specify the operators list again. + # Let's use Blosc for this. + "cfg": { + "operators": [ + { + "parameters": { + "clevel": 1, + "doshuffle": "BLOSC_BITSHUFFLE", + }, + "type": "blosc", + } + ] + }, + }, + # Now, configure the particles. + { + # The match can either be against the path within the + # containing Iteration (e.g. 'meshes/E/x', as above) + # or (as in this example), against the full path + # (e.g. '/data/0/particles/e/position/x'). In this + # example, completely deactivate compression + # specifically for 'particles/e/position/x'. + # All other particle datasets will fall back to + # the default configuration specified below. + # Be careful when specifying compression per-Iteration. + # While this syntax fundamentally allows doing that, + # compressions once specified on an ADIOS2 variable + # will not be removed again. Since variable-encoding + # reuses ADIOS2 variables from previous Iterations, + # the compression configuration of the first Iteration + # will leak into all subsequent Iterations. + "select": "/data/[0-9]*/particles/e/position/x", + "cfg": {"operators": []}, + }, + # Now, the default configuration. In general, the dataset + # configurations are matched top-down, going for + # the first matching configuration. So, a default + # configuration could theoretically be specified + # by emplacing a catch-all pattern (regex: ".*") as the + # last option. However, we also define an explicit s + # horthand for specifying default configurations: + # Just omit the 'select' key. This special syntax + # is understood as the default configuration no matter + # where in the list it is emplaced, and it allows + # the backends to initialize the default configuration + # globally, instead of applying it selectively + # to each dataset that matches a catch-all pattern. + { + "cfg": { + "operators": [ + {"parameters": {"clevel": 2}, "type": "bzip2"} + ] + } + }, + ] + }, + } + write( + "adios2_with_dataset_specific_configurations.%E", + extended_adios2_config, + ) + + # Now, let's continue with HDF5. + # HDF5 supports compression via so-called filters. These can be permanent + # (applied to an entire dataset) and transient (applied to individual I/O + # operations). The openPMD-api currently supports permanent filters. Refer + # also to https://web.ics.purdue.edu/~aai/HDF5/html/Filters.html. + + # Filters are additionally distinguished by how tightly they integrate with + # HDF5. The most tightly-integrated filter is Zlib, which has its own API + # calls and hence also a special JSON/TOML configuration in openPMD: + if "hdf5" in opmd.variants and opmd.variants["hdf5"]: + hdf5_zlib_config = { + "backend": "hdf5", + "hdf5": { + "dataset": { + "permanent_filters": { + "type": "zlib", # mandatory parameter + "aggression": 5, # optional, defaults to 1 + } + } + }, + } + write("hdf5_zlib.%E", hdf5_zlib_config) + + # All other filters have a common API and are identified by global IDs + # registered with the HDF Group. More details can be found in the + # H5Zpublic.h header. That header predefines a small number + # of filter IDs. + # These are directly supported by the openPMD-api: deflate, shuffle, + # fletcher32, szip, nbit, scaleoffset. + hdf5_predefined_filter_ids = { + "backend": "hdf5", + "hdf5": { + "dataset": { + "permanent_filters": { + # mandatory parameter + "id": "fletcher32", + # optional parameter + "flags": "mandatory", + # optional parameter for filters identified by ID, + # mandatory only for zlib (see above) + "type": "by_id", + } + } + }, + } + write("hdf5_predefined_filter_id.%E", hdf5_predefined_filter_ids) + + # For non-predefined IDs, the ID must be given as a number. This example + # uses the Blosc2 filter with the permanent plugin ID 32026, + # (defined in hdf5plugin.FILTERS["blosc2"]), available as part of Python's + # hdf5plugin package. Generic filters referenced by ID can be configured + # via the cd_values field. This field is an array of unsigned integers and + # plugin-specific interpretation. For the Blosc2 plugin, indexes 0, 1, 2 + # and 3 are reserved. index 4 is the compression level, index 5 is a + # boolean for activating shuffling and index 6 denotes + # the compression method. + + if "hdf5" in opmd.variants and opmd.variants["hdf5"] and HAS_HDF5_PLUGIN: + hdf5_blosc2_filter = { + "backend": "hdf5", + "hdf5": { + "dataset": { + "chunks": "auto", + "permanent_filters": { + "cd_values": [0, 0, 0, 0, 4, 1, 5], + "flags": "mandatory", + "id": hdf5plugin.FILTERS["blosc2"], + }, + } + }, + } + + write("hdf5_blosc_filter.%E", hdf5_blosc2_filter) + + +main() From 9ab5eff7e3559e441e64fc7e371c983b7ea2474a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 11:04:22 +0200 Subject: [PATCH 16/56] Some documentation fixes --- examples/15_compression.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index 74cf48f60e..73d10a537e 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -125,9 +125,9 @@ int main() type = "bzip2" parameters.clevel = 9 # The available parameters depend # on the operator. - # Here, we specify zlib's compression level. + # Here, we specify bzip2's compression level. )"; - write("adios2_with_zlib.%E", simple_adios2_config); + write("adios2_with_bzip2.%E", simple_adios2_config); // The compression can also be specified per-dataset. // For more details, also check: @@ -143,7 +143,9 @@ int main() [[adios2.dataset]] # This uses egrep-type regular expressions. select = "meshes/.*" - # Now, specify the operators list again. Let's use Blosc for this. + # Inside the cfg key, specify the actual config to be forwarded to the + # ADIOS2 dataset. + # So, specify the operators list again. Let's use Blosc for this. [[adios2.dataset.cfg.operators]] type = "blosc" parameters.doshuffle = "BLOSC_BITSHUFFLE" From f0726b4c1182b77b7aed9becff7d82e540a29728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 11:05:14 +0200 Subject: [PATCH 17/56] Fix install_hdf5_blosc2 script --- .github/workflows/dependencies/install_hdf5_blosc2 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dependencies/install_hdf5_blosc2 b/.github/workflows/dependencies/install_hdf5_blosc2 index d8e12e4699..4f8e11ad65 100755 --- a/.github/workflows/dependencies/install_hdf5_blosc2 +++ b/.github/workflows/dependencies/install_hdf5_blosc2 @@ -2,18 +2,19 @@ set -eu -o pipefail set -x + cd /opt version="v2.0.0" patch_url="https://github.com/franzpoeschel/HDF5-Blosc2/commit/55b1feea7bf18a539dfbe4413a920bc9570aa0c6.diff" patch_path="$(pwd)/hdf5_blosc2_cmake_fixes.diff" -git config user.email "tooling@tools.com" -git config user.name "Tooling" curl -sLo "$patch_path" "$patch_url" git clone -b "$version" https://github.com/Blosc/HDF5-Blosc2 cd HDF5-Blosc2 +git config user.email "tooling@tools.com" +git config user.name "Tooling" git am "$patch_path" cmake . -B build -DCMAKE_INSTALL_PREFIX="/usr/local" From d1863e5a713373eb96ed1af15feda2e5467976ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 11:31:48 +0200 Subject: [PATCH 18/56] Complete examples --- examples/15_compression.cpp | 25 +++++++++++++++- examples/15_compression.py | 57 +++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index 73d10a537e..77e580f7c6 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -136,7 +136,9 @@ int main() // This example will demonstrate the use of pattern matching. // adios2.dataset is now a list of dataset configurations. The specific // configuration to be used for a dataset will be determined by matching - // the dataset name against the patterns specified by the 'select' keys. + // the dataset name against the patterns specified by the 'select' key. + // The actual configuration to be forwarded to the backend is stored under + // the 'cfg' key. std::string const extended_adios2_config = R"( backend = "adios2" @@ -248,6 +250,27 @@ int main() )"; write("hdf5_filter_pipeline.%E", hdf5_filter_pipeline); + // Dataset-specific backend configuration works independently from the + // chosen backend and can hence also be used in HDF5. We will apply both a + // zlib and a fletcher32 filter, one to the meshes and one to the particles. + std::string const extended_hdf5_config = R"( + backend = "hdf5" + + [[hdf5.dataset]] + select = "meshes/.*" + [hdf5.dataset.cfg.permanent_filters] + type = "zlib" + aggression = 5 + + # Now, configure the particles. + [[hdf5.dataset]] + select = "particles/.*" + [hdf5.dataset.cfg.permanent_filters] + id = "fletcher32" + flags = "mandatory" + )"; + write("hdf5_with_dataset_specific_configurations.%E", extended_hdf5_config); + // For non-predefined IDs, the ID must be given as a number. This example // uses the Blosc2 filter available from // https://github.com/Blosc/HDF5-Blosc2, with the permanent plugin ID 32026 diff --git a/examples/15_compression.py b/examples/15_compression.py index 91cba9eaaa..415296dd38 100644 --- a/examples/15_compression.py +++ b/examples/15_compression.py @@ -96,6 +96,16 @@ def main(): } write("adios2_with_bzip2.%E", simple_adios2_config) + # The compression can also be specified per-dataset. + # For more details, also check: + # https://openpmd-api.readthedocs.io/en/latest/details/backendconfig.html#dataset-specific-configuration + + # This example will demonstrate the use of pattern matching. + # adios2.dataset is now a list of dataset configurations. The specific + # configuration to be used for a dataset will be determined by matching + # the dataset name against the patterns specified by the 'select' key. + # The actual configuration to be forwarded to the backend is stored + # under the 'cfg' key. extended_adios2_config = { "backend": "adios2", "adios2": { @@ -214,6 +224,53 @@ def main(): } write("hdf5_predefined_filter_id.%E", hdf5_predefined_filter_ids) + # Just like ADIOS2 with their operations, also HDF5 supports adding + # multiple filters into a filter pipeline. The permanent_filters key + # can hence also be given as a list. + hdf5_filter_pipeline = { + "backend": "hdf5", + "hdf5": { + "dataset": { + "permanent_filters": [ + {"aggression": 5, "type": "zlib"}, + {"flags": "mandatory", "id": "shuffle"}, + ] + } + }, + } + write("hdf5_filter_pipeline.%E", hdf5_filter_pipeline) + + # Dataset-specific backend configuration works independently from the + # chosen backend and can hence also be used in HDF5. We will apply both + # zlib and a fletcher32 filter, one to the meshes and one + # to the particles. + extended_hdf5_config = { + "backend": "hdf5", + "hdf5": { + "dataset": [ + { + "select": "meshes/.*", + "cfg": { + "permanent_filters": { + "type": "zlib", + "aggression": 5, + } + }, + }, + { + "select": "particles/.*", + "cfg": { + "permanent_filters": { + "id": "fletcher32", + "flags": "mandatory", + } + }, + }, + ] + }, + } + write("hdf5_with_dataset_specific_configurations.%E", extended_hdf5_config) + # For non-predefined IDs, the ID must be given as a number. This example # uses the Blosc2 filter with the permanent plugin ID 32026, # (defined in hdf5plugin.FILTERS["blosc2"]), available as part of Python's From af60b172856be9352d63b4354b54e9ee6aa106c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 12:03:17 +0200 Subject: [PATCH 19/56] ADIOS2 shorthand: dataset.operators may also be a single element --- examples/15_compression.cpp | 4 ++- examples/15_compression.py | 10 ++++-- src/IO/ADIOS/ADIOS2IOHandler.cpp | 60 ++++++++++++++++++++++++-------- 3 files changed, 55 insertions(+), 19 deletions(-) diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index 77e580f7c6..57da44654a 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -120,6 +120,8 @@ int main() # ADIOS2 supports adding multiple operators to a variable, hence we # specify a list of operators here (using TOML's double bracket syntax). # How much sense this makes depends on the specific operators in use. + # If specifying only one operator, you can also replace the list by its + # only element as a shorthand (see next config example). [[adios2.dataset.operators]] type = "bzip2" @@ -182,7 +184,7 @@ int main() # configuration globally, instead of applying it selectively to each # dataset that matches a catch-all pattern. [[adios2.dataset]] - [[adios2.dataset.cfg.operators]] + [adios2.dataset.cfg.operators] type = "bzip2" parameters.clevel = 2 )"; diff --git a/examples/15_compression.py b/examples/15_compression.py index 415296dd38..7b7a9233e1 100644 --- a/examples/15_compression.py +++ b/examples/15_compression.py @@ -80,6 +80,9 @@ def main(): # hence we specify a list of operators here. # How much sense this makes depends on the specific # operators in use. + # If specifying only one operator, you can also replace the + # list by its only element as a shorthand + # (see next config example). "operators": [ { "type": "bzip2", @@ -164,9 +167,10 @@ def main(): # to each dataset that matches a catch-all pattern. { "cfg": { - "operators": [ - {"parameters": {"clevel": 2}, "type": "bzip2"} - ] + "operators": { + "parameters": {"clevel": 2}, + "type": "bzip2", + } } }, ] diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index 06aa172ea5..9e56e98d68 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -314,6 +314,9 @@ void ADIOS2IOHandlerImpl::init( } } +namespace +{} + std::optional> ADIOS2IOHandlerImpl::getOperators(json::TracingJSON cfg) { @@ -328,18 +331,14 @@ ADIOS2IOHandlerImpl::getOperators(json::TracingJSON cfg) { return ret_t(); } - auto _operators = datasetConfig["operators"]; - nlohmann::json const &operators = _operators.json(); - for (auto operatorIterator = operators.begin(); - operatorIterator != operators.end(); - ++operatorIterator) - { - nlohmann::json const &op = operatorIterator.value(); - std::string const &type = op["type"]; + + auto parse_single_operator = [this](auto &op, auto &&json_accessor) + -> std::optional { + std::string const &type = *json_accessor(op["type"]); adios2::Params adiosParams; - if (op.contains("parameters")) + if (json_accessor(op)->contains("parameters")) { - nlohmann::json const ¶ms = op["parameters"]; + nlohmann::json const ¶ms = *json_accessor(op["parameters"]); for (auto paramIterator = params.begin(); paramIterator != params.end(); ++paramIterator) @@ -360,14 +359,45 @@ ADIOS2IOHandlerImpl::getOperators(json::TracingJSON cfg) } std::optional adiosOperator = getCompressionOperator(type); - if (adiosOperator) + if (!adiosOperator.has_value()) + { + return std::nullopt; + } + else + { + return ParameterizedOperator{ + std::move(*adiosOperator), std::move(adiosParams)}; + } + }; + + auto _operators = datasetConfig["operators"]; + nlohmann::json const &operators = _operators.json(); + if (operators.is_array()) + { + for (auto const &op : operators) + { + auto parsed_operator = + parse_single_operator(op, [](auto &j) { return &j; }); + if (parsed_operator) + { + res.emplace_back(std::move(*parsed_operator)); + } + } + _operators.declareFullyRead(); + } + else + { + auto parsed_operator = parse_single_operator( + _operators, [](auto &&j) { return &j.json(); }); + if (parsed_operator) + { + res.emplace_back(std::move(*parsed_operator)); + } + if (operators.contains("parameters")) { - res.emplace_back( - ParameterizedOperator{ - adiosOperator.value(), std::move(adiosParams)}); + _operators["parameters"].declareFullyRead(); } } - _operators.declareFullyRead(); return std::make_optional(std::move(res)); } From 6c23bddfc9bd2f053b424fca4c87cdacbc6f98cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 13:43:02 +0200 Subject: [PATCH 20/56] Fix indentation --- examples/15_compression.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/15_compression.py b/examples/15_compression.py index 7b7a9233e1..57152645d3 100644 --- a/examples/15_compression.py +++ b/examples/15_compression.py @@ -273,7 +273,10 @@ def main(): ] }, } - write("hdf5_with_dataset_specific_configurations.%E", extended_hdf5_config) + write( + "hdf5_with_dataset_specific_configurations.%E", + extended_hdf5_config, + ) # For non-predefined IDs, the ID must be given as a number. This example # uses the Blosc2 filter with the permanent plugin ID 32026, From 76adc5864a68200748b168dcf9624b7be4a911dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 13:48:42 +0200 Subject: [PATCH 21/56] Fix patch URL --- .github/workflows/dependencies/install_hdf5_blosc2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dependencies/install_hdf5_blosc2 b/.github/workflows/dependencies/install_hdf5_blosc2 index 4f8e11ad65..e9100a0000 100755 --- a/.github/workflows/dependencies/install_hdf5_blosc2 +++ b/.github/workflows/dependencies/install_hdf5_blosc2 @@ -6,8 +6,8 @@ set -x cd /opt version="v2.0.0" -patch_url="https://github.com/franzpoeschel/HDF5-Blosc2/commit/55b1feea7bf18a539dfbe4413a920bc9570aa0c6.diff" -patch_path="$(pwd)/hdf5_blosc2_cmake_fixes.diff" +patch_url="https://github.com/franzpoeschel/HDF5-Blosc2/commit/55b1feea7bf18a539dfbe4413a920bc9570aa0c6.patch" +patch_path="$(pwd)/hdf5_blosc2_cmake_fixes.patch" curl -sLo "$patch_path" "$patch_url" From 69468b152cac37630d3508751d98ff4d4359af49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 14:18:03 +0200 Subject: [PATCH 22/56] Update documentation and tests for ADIOS2 --- docs/source/details/backendconfig.rst | 4 +-- examples/13_write_dynamic_configuration.cpp | 13 +++++---- examples/13_write_dynamic_configuration.py | 5 ++-- test/SerialIOTest.cpp | 29 +++++++++------------ 4 files changed, 23 insertions(+), 28 deletions(-) diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index 123b0a58e0..e23b9e3fde 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -185,8 +185,8 @@ Explanation of the single keys: Additionally, specifying ``"disk_override"``, ``"buffer_override"`` or ``"new_step_override"`` will take precedence over options specified without the ``_override`` suffix, allowing to invert the normal precedence order. This way, a data producing code can hardcode the preferred flush target per ``flush()`` call, but users can e.g. still entirely deactivate flushing to disk in the ``Series`` constructor by specifying ``preferred_flush_target = buffer_override``. This is useful when applying the asynchronous IO capabilities of the BP5 engine. -* ``adios2.dataset.operators``: This key contains a list of ADIOS2 `operators `_, used to enable compression or dataset transformations. - Each object in the list has two keys: +* ``adios2.dataset.operators``: This key contains either a single ADIOS2 `operator `_ or a list of operators, used to enable compression or dataset transformations. + Each operator is an object with two keys: * ``type`` supported ADIOS operator type, e.g. zfp, sz * ``parameters`` is an associative map of string parameters for the operator (e.g. compression levels) diff --git a/examples/13_write_dynamic_configuration.cpp b/examples/13_write_dynamic_configuration.cpp index 56d441d76d..3250cb162e 100644 --- a/examples/13_write_dynamic_configuration.cpp +++ b/examples/13_write_dynamic_configuration.cpp @@ -47,6 +47,7 @@ type = "bp4" # ADIOS2 allows adding several operators # Lists are given in TOML by using double brackets +# For specifying a single operator only, the list may be skipped. [[adios2.dataset.operators]] type = "zlib" @@ -192,14 +193,12 @@ CFG.CHUNKS = [10] "resizable": true, "adios2": { "dataset": { - "operators": [ - { - "type": "zlib", - "parameters": { - "clevel": 9 - } + "operators": { + "type": "zlib", + "parameters": { + "clevel": 9 } - ] + } } } })END"; diff --git a/examples/13_write_dynamic_configuration.py b/examples/13_write_dynamic_configuration.py index fa40e61985..0dc67a8e5c 100644 --- a/examples/13_write_dynamic_configuration.py +++ b/examples/13_write_dynamic_configuration.py @@ -31,6 +31,7 @@ # ADIOS2 allows adding several operators # Lists are given in TOML by using double brackets +# For specifying a single operator only, the list may be skipped. [[adios2.dataset.operators]] type = "zlib" @@ -106,12 +107,12 @@ def main(): } } config['adios2']['dataset'] = { - 'operators': [{ + 'operators': { 'type': 'zlib', 'parameters': { 'clevel': 9 } - }] + } } temperature = iteration.meshes["temperature"] diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 3a14187a93..64d41adadb 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -4991,7 +4991,8 @@ unused = "as well" BufferGrowthFactor = "2.0" Profile = "On" -[[adios2.dataset.operators]] +# single brackets, because an operator may also be given as a single object +[adios2.dataset.operators] type = "blosc" parameters.clevel = "1" parameters.doshuffle = "BLOSC_BITSHUFFLE" @@ -5022,7 +5023,7 @@ unused = "dataset parameter" [adios2.dataset] unused = "too" -[[adios2.dataset.operators]] +[adios2.dataset.operators] type = "blosc" [adios2.dataset.operators.parameters] clevel = 3 @@ -6225,11 +6226,9 @@ TEST_CASE("automatically_deactivate_span", "[serial][adios2]") { "adios2": { "dataset": { - "operators": [ - { - "type": "bzip2" - } - ] + "operators": { + "type": "bzip2" + } } } })END"; @@ -6276,11 +6275,9 @@ TEST_CASE("automatically_deactivate_span", "[serial][adios2]") { "adios2": { "dataset": { - "operators": [ - { - "type": "bzip2" - } - ] + "operators": { + "type": "bzip2" + } } } })END"; @@ -6341,11 +6338,9 @@ TEST_CASE("automatically_deactivate_span", "[serial][adios2]") { "adios2": { "dataset": { - "operators": [ - { - "type": "bzip2" - } - ] + "operators": { + "type": "bzip2" + } } } })END"; From ae3285732f11a6dfbe7be09459efcfd7e15b0cbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 14:29:37 +0200 Subject: [PATCH 23/56] Deactivate tests for HDF5-Blosc2 --- .github/workflows/dependencies/install_hdf5_blosc2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dependencies/install_hdf5_blosc2 b/.github/workflows/dependencies/install_hdf5_blosc2 index e9100a0000..b125c0bdd7 100755 --- a/.github/workflows/dependencies/install_hdf5_blosc2 +++ b/.github/workflows/dependencies/install_hdf5_blosc2 @@ -17,5 +17,5 @@ git config user.email "tooling@tools.com" git config user.name "Tooling" git am "$patch_path" -cmake . -B build -DCMAKE_INSTALL_PREFIX="/usr/local" +cmake . -B build -DCMAKE_INSTALL_PREFIX="/usr/local" -DBUILD_TESTS=OFF cmake --build build --parallel 4 --target install From ab177c0c5898d581392f600f38dab70e13c4f917 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 15:22:09 +0200 Subject: [PATCH 24/56] Add documentation --- docs/source/backends/hdf5.rst | 13 +++++++++++++ docs/source/details/backendconfig.rst | 18 ++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/docs/source/backends/hdf5.rst b/docs/source/backends/hdf5.rst index 1d1866d874..5b64893625 100644 --- a/docs/source/backends/hdf5.rst +++ b/docs/source/backends/hdf5.rst @@ -25,6 +25,19 @@ Virtual file drivers are configured via JSON/TOML. Refer to the page on :ref:`JSON/TOML configuration ` for further details. +Filters (compression) +********************* + +HDF5 supports so-called filters for transformations such as compression on datasets. +These can be permanent (applied to an entire dataset) and transient (applied to individual I/O operations). +The openPMD-api currently supports permanent filters. +Pipelines of multiple subsequent filters are supported. +Refer also to `this documentation `_. + +Filters are applied via :ref:`JSON/TOML configuration `, see there for detailed instructions on how to apply filters. +There are also extended examples on how to apply compression options to ADIOS2 and HDF5 in the examples: `Python `_ / `C++ `_. + + Backend-Specific Controls ------------------------- diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index e23b9e3fde..9d5e1dcf2c 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -247,6 +247,24 @@ Explanation of the single keys: An explicit chunk size can be specified as a list of positive integers, e.g. ``hdf5.dataset.chunks = [10, 100]``. Note that this specification should only be used per-dataset, e.g. in ``resetDataset()``/``reset_dataset()``. Chunking generally improves performance and only needs to be disabled in corner-cases, e.g. when heavily relying on independent, parallel I/O that non-collectively declares data records. +* ``hdf5.datasets.permanent_filters``: Either a single HDF5 permanent filter specification or a list of HDF5 permanent filter specifications. + Each filter specification is a JSON/TOML object, but there are multiple options: + + * Zlib: The Zlib filter has a distinct API in HDF5 and the configuration for Zlib in openPMD is hence also different. It is activated by the mandatory key ``type = "zlib"`` and configured by the optional integer key ``aggression``. + Example: ``{"type": "zlib", "aggression": 5}``. + * Filters identified by their global ID `registered with the HDF group `_. + They are activated by the mandatory integer key ``id`` containing this global ID. + All other keys are optional: + + * ``type = "by_id"`` may optionally be specified for clarity and consistency. + * The string key ``flags`` can take the values ``"mandatory"`` or ``"optional"``, indicating if HDF5 should abort execution if the filter cannot be applied for some reason. + * The key ``cd_values`` points to a list of nonnegative integers. + These are filter-specific configuration options. + Refer to the specific filter's documentation. + + Alternatively to an integer ID, the key ``id`` may also be of string type, identifying one of the six builtin filters of HDF5: ``"deflate", "shuffle", "fletcher32", "szip", "nbit", "scaleoffset"``. + + * ``hdf5.vfd.type`` selects the HDF5 virtual file driver. Currently available are: From dfc85e0f01257e94900b626e1484bcaa7ec7946b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 15:22:35 +0200 Subject: [PATCH 25/56] Some more consistency in examples --- examples/15_compression.cpp | 2 +- examples/15_compression.py | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index 57da44654a..fe47fe1ae0 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -150,7 +150,7 @@ int main() # Inside the cfg key, specify the actual config to be forwarded to the # ADIOS2 dataset. # So, specify the operators list again. Let's use Blosc for this. - [[adios2.dataset.cfg.operators]] + [adios2.dataset.cfg.operators] type = "blosc" parameters.doshuffle = "BLOSC_BITSHUFFLE" parameters.clevel = 1 diff --git a/examples/15_compression.py b/examples/15_compression.py index 57152645d3..06ab7551d9 100644 --- a/examples/15_compression.py +++ b/examples/15_compression.py @@ -121,15 +121,13 @@ def main(): # So, specify the operators list again. # Let's use Blosc for this. "cfg": { - "operators": [ - { - "parameters": { - "clevel": 1, - "doshuffle": "BLOSC_BITSHUFFLE", - }, - "type": "blosc", - } - ] + "operators": { + "parameters": { + "clevel": 1, + "doshuffle": "BLOSC_BITSHUFFLE", + }, + "type": "blosc", + } }, }, # Now, configure the particles. From 2c0d8e474718852748742c9d4b839c2520c3149b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 15:24:32 +0200 Subject: [PATCH 26/56] Install with sudo rights --- .github/workflows/dependencies/install_hdf5_blosc2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/dependencies/install_hdf5_blosc2 b/.github/workflows/dependencies/install_hdf5_blosc2 index b125c0bdd7..f4bd471efc 100755 --- a/.github/workflows/dependencies/install_hdf5_blosc2 +++ b/.github/workflows/dependencies/install_hdf5_blosc2 @@ -18,4 +18,5 @@ git config user.name "Tooling" git am "$patch_path" cmake . -B build -DCMAKE_INSTALL_PREFIX="/usr/local" -DBUILD_TESTS=OFF -cmake --build build --parallel 4 --target install +cmake --build build --parallel 4 +sudo cmake --build build --parallel 4 --target install From 57120343a908bac8704f071e5499af81c1abc2b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 15:25:22 +0200 Subject: [PATCH 27/56] Erase unnecessary line from example --- examples/15_compression.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/15_compression.py b/examples/15_compression.py index 06ab7551d9..65aae3a995 100644 --- a/examples/15_compression.py +++ b/examples/15_compression.py @@ -60,7 +60,6 @@ def write(filename, config): position = e["position"][dim] position.reset_dataset(opmd.Dataset(np.dtype("float"), [100])) - buffer_view = position.store_chunk([0], [100]).current_buffer() position[:] = np.arange(i * 100, (i + 1) * 100) From 2d3e2d3e10ea84f519ed70ffb794b3efae265566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 16:54:02 +0200 Subject: [PATCH 28/56] Fix datatypes in Python example --- examples/15_compression.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/15_compression.py b/examples/15_compression.py index 65aae3a995..afe1184e80 100644 --- a/examples/15_compression.py +++ b/examples/15_compression.py @@ -48,7 +48,8 @@ def write(filename, config): component = E[dim] component.reset_dataset(opmd.Dataset(np.dtype("float"), [10, 10])) component[:, :] = np.reshape( - np.arange(i * 100, (i + 1) * 100), [10, 10] + np.arange(i * 100, (i + 1) * 100, dtype=np.dtype("float")), + [10, 10], ) # Now, write some e particles. @@ -60,7 +61,9 @@ def write(filename, config): position = e["position"][dim] position.reset_dataset(opmd.Dataset(np.dtype("float"), [100])) - position[:] = np.arange(i * 100, (i + 1) * 100) + position[:] = np.arange( + i * 100, (i + 1) * 100, dtype=np.dtype("float") + ) def main(): From d10ed1cd33299903558b14178344abe1d7521355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 30 Jul 2025 16:58:41 +0200 Subject: [PATCH 29/56] Use CMake flag directly... --- .github/workflows/linux.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index a84b0e134c..8d72140e51 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -235,6 +235,7 @@ jobs: python3 -m pip install -U pandas python3 -m pip install -U dask python3 -m pip install -U pyarrow + python3 -m pip install hdf5plugin - name: Build env: {CC: gcc-12, CXX: g++-12, CXXFLAGS: -Werror} run: | @@ -244,7 +245,6 @@ jobs: ./.github/workflows/dependencies/install_hdf5_blosc2 # the HDF5 Blosc2 plugin exports no configuration files # for build systems, so we will need to link it manually - export LDFLAGS="-lblosc2_filter $LDFLAGS" share/openPMD/download_samples.sh build cmake -S . -B build \ @@ -252,7 +252,9 @@ jobs: -DopenPMD_USE_MPI=ON \ -DopenPMD_USE_HDF5=ON \ -DopenPMD_USE_ADIOS2=ON \ - -DopenPMD_USE_INVASIVE_TESTS=ON + -DopenPMD_USE_INVASIVE_TESTS=ON \ + -DCMAKE_EXE_LINKER_FLAGS="-lblosc2_filter" \ + -DCMAKE_VERBOSE_MAKEFILE=ON cmake --build build --parallel 4 ctest --test-dir build --output-on-failure From aaf467ddc790b7e89fa8237e1e56d27d6946bbec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 31 Jul 2025 10:56:04 +0200 Subject: [PATCH 30/56] Reset extended write example to dev Compression example is moved to 15_compression now --- examples/7_extended_write_serial.cpp | 106 +-------------------------- 1 file changed, 3 insertions(+), 103 deletions(-) diff --git a/examples/7_extended_write_serial.cpp b/examples/7_extended_write_serial.cpp index 59e6fed19a..e9dd61afec 100644 --- a/examples/7_extended_write_serial.cpp +++ b/examples/7_extended_write_serial.cpp @@ -1,40 +1,12 @@ #include -#if __has_include() -#include -#define OPENPMD_USE_BLOSC2_FILTER 1 -#else -#define OPENPMD_USE_BLOSC2_FILTER 0 -#endif - +#include #include -#include -#include int main() { namespace io = openPMD; -#if OPENPMD_USE_BLOSC2_FILTER - /* - * This registers the Blosc2 plugin from - * https://github.com/Blosc/HDF5-Blosc2 as a demonstration on how to - * activate and configure dynamic HDF5 filter plugins through openPMD. - */ - - char *version, *date; - int r = register_blosc2(&version, &date); - if (r < 1) - { - throw std::runtime_error("Unable to register Blosc2 plugin with HDF5."); - } - else - { - std::cout << "Blosc2 plugin registered in version " << version - << " and date " << date << "." << std::endl; - } -#endif - { auto f = io::Series( "working/directory/2D_simData.h5", @@ -119,7 +91,7 @@ int main() } io::Mesh mesh = cur_it.meshes["lowRez_2D_field"]; - mesh.setAxisLabels({"x", "y", "z"}); + mesh.setAxisLabels({"x", "y"}); // data is assumed to reside behind a pointer as a contiguous // column-major array shared data ownership during IO is indicated with @@ -148,63 +120,11 @@ int main() } ] } - }, - "hdf5": { - "dataset": { - "chunks": "auto", - "permanent_filters": { - "id": "fletcher32", - "flags": "optional" - } - } } })END"; d.options = datasetConfig; mesh["x"].resetDataset(d); -#if OPENPMD_USE_BLOSC2_FILTER - /* - * FILTER_BLOSC2 resolves to 32026, the permanent plugin ID registered - * with the HDF Group. Plugin-specific options are given via cd_values, - * refer to the specific plugin's documentation. For the Blosc2 plugin, - * indexes 0, 1, 2 and 3 are reserved. Index 4 is the compression - * level, index 5 is a boolean for activating shuffling and - * index 6 denotes the compression method. - */ - d.options = R"END( -{ - "adios2": { - "dataset": { - "operators": [ - { - "type": "zlib", - "parameters": { - "clevel": 9 - } - } - ] - } - }, - "hdf5": { - "dataset": { - "chunks": "auto", - "permanent_filters": { - "id": )END" + - std::to_string(FILTER_BLOSC2) + R"END(, - "flags": "mandatory", - "cd_values": [0, 0, 0, 0, 4, 1, )END" + - std::to_string(BLOSC_ZSTD) + R"END(] - } - } - } -})END"; -#endif - d.extent = {500, 500}; - mesh["z"].resetDataset(d); - auto span = mesh["z"].storeChunk({0, 0}, {500, 500}); - auto span_data = span.currentBuffer(); - std::iota(span_data.begin(), span_data.end(), 41.); - io::ParticleSpecies electrons = cur_it.particles["electrons"]; io::Extent mpiDims{4}; @@ -226,27 +146,7 @@ int main() d = io::Dataset(dtype, mpiDims); electrons["positionOffset"]["x"].resetDataset(d); - auto dset = io::Dataset( - io::determineDatatype(), - {2}, - R"( - { - "hdf5": { - "dataset": { - "chunks": "auto", - "permanent_filters": [ - { - "aggression": 5, - "type": "zlib" - }, - { - "flags": "MANDATORY", - "id": "shuffle" - } - ] - } - } - })"); + auto dset = io::Dataset(io::determineDatatype(), {2}); electrons.particlePatches["numParticles"].resetDataset(dset); electrons.particlePatches["numParticlesOffset"].resetDataset(dset); From 3a9a1cbfe55126e69844def41e83ad6ab87e9da0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 31 Jul 2025 11:47:12 +0200 Subject: [PATCH 31/56] Do we need -L/usr/local/lib ?? --- .github/workflows/linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 8d72140e51..8d71885c3f 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -253,7 +253,7 @@ jobs: -DopenPMD_USE_HDF5=ON \ -DopenPMD_USE_ADIOS2=ON \ -DopenPMD_USE_INVASIVE_TESTS=ON \ - -DCMAKE_EXE_LINKER_FLAGS="-lblosc2_filter" \ + -DCMAKE_EXE_LINKER_FLAGS="-lblosc2_filter -L/usr/local/lib" \ -DCMAKE_VERBOSE_MAKEFILE=ON cmake --build build --parallel 4 ctest --test-dir build --output-on-failure From 621598ef1afdddfed0e6200785367dd80e99609d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 31 Jul 2025 16:23:48 +0200 Subject: [PATCH 32/56] Try if HDF5 finds the filter on its own... --- .../dependencies/install_hdf5_blosc2 | 2 +- .github/workflows/linux.yml | 1 - examples/15_compression.cpp | 25 ------------------- 3 files changed, 1 insertion(+), 27 deletions(-) diff --git a/.github/workflows/dependencies/install_hdf5_blosc2 b/.github/workflows/dependencies/install_hdf5_blosc2 index f4bd471efc..139add4b4e 100755 --- a/.github/workflows/dependencies/install_hdf5_blosc2 +++ b/.github/workflows/dependencies/install_hdf5_blosc2 @@ -17,6 +17,6 @@ git config user.email "tooling@tools.com" git config user.name "Tooling" git am "$patch_path" -cmake . -B build -DCMAKE_INSTALL_PREFIX="/usr/local" -DBUILD_TESTS=OFF +cmake . -B build -DBUILD_TESTS=OFF cmake --build build --parallel 4 sudo cmake --build build --parallel 4 --target install diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 8d71885c3f..2350ecfe42 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -253,7 +253,6 @@ jobs: -DopenPMD_USE_HDF5=ON \ -DopenPMD_USE_ADIOS2=ON \ -DopenPMD_USE_INVASIVE_TESTS=ON \ - -DCMAKE_EXE_LINKER_FLAGS="-lblosc2_filter -L/usr/local/lib" \ -DCMAKE_VERBOSE_MAKEFILE=ON cmake --build build --parallel 4 ctest --test-dir build --output-on-failure diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index fe47fe1ae0..faf378fe15 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -32,29 +32,6 @@ #include #include -void init_blosc_for_hdf5() -{ -#if OPENPMD_USE_BLOSC2_FILTER - /* - * This registers the Blosc2 plugin from - * https://github.com/Blosc/HDF5-Blosc2 as a demonstration on how to - * activate and configure dynamic HDF5 filter plugins through openPMD. - */ - - char *version, *date; - int r = register_blosc2(&version, &date); - if (r < 1) - { - throw std::runtime_error("Unable to register Blosc2 plugin with HDF5."); - } - else - { - std::cout << "Blosc2 plugin registered in version '" << version - << "' and date '" << date << "'." << std::endl; - } -#endif -} - void write(std::string const &filename, std::string const &config) { using namespace openPMD; @@ -100,8 +77,6 @@ void write(std::string const &filename, std::string const &config) int main() { - init_blosc_for_hdf5(); - // Backend specific configuration can be given in either JSON or TOML. // We will stick with TOML in this example, since it allows inline comments // and remains more legible for larger configurations. From 18dcfcc82aeacbf13e4a5414d7f20691cb7dd00f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 31 Jul 2025 17:43:48 +0200 Subject: [PATCH 33/56] Ok that works, so cleanup --- .github/workflows/linux.yml | 2 -- examples/15_compression.cpp | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 2350ecfe42..393dbb8219 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -243,8 +243,6 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/) spack install ./.github/workflows/dependencies/install_hdf5_blosc2 - # the HDF5 Blosc2 plugin exports no configuration files - # for build systems, so we will need to link it manually share/openPMD/download_samples.sh build cmake -S . -B build \ diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index faf378fe15..03310e501d 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -32,6 +32,37 @@ #include #include +/* + * If installed into a folder known to HDF5, then HDF5 will find the filter on + * its own. In other contexts, it might become necessary to manually register + * the filter into HDF5. For this, link the application against + * libblosc2_filter.so and set the below define to true. + */ +#define OPENPMD_INIT_BLOSC2_FILTER_MANUALLY false + +void init_blosc_for_hdf5() +{ +#if OPENPMD_USE_BLOSC2_FILTER && OPENPMD_INIT_BLOSC2_FILTER_MANUALLY + /* + * This registers the Blosc2 plugin from + * https://github.com/Blosc/HDF5-Blosc2 as a demonstration on how to + * activate and configure dynamic HDF5 filter plugins through openPMD. + */ + + char *version, *date; + int r = register_blosc2(&version, &date); + if (r < 1) + { + throw std::runtime_error("Unable to register Blosc2 plugin with HDF5."); + } + else + { + std::cout << "Blosc2 plugin registered in version '" << version + << "' and date '" << date << "'." << std::endl; + } +#endif +} + void write(std::string const &filename, std::string const &config) { using namespace openPMD; @@ -77,6 +108,8 @@ void write(std::string const &filename, std::string const &config) int main() { + init_blosc_for_hdf5(); + // Backend specific configuration can be given in either JSON or TOML. // We will stick with TOML in this example, since it allows inline comments // and remains more legible for larger configurations. From 0f39f62491c9aa0cb3188cd173448fae3f5c10fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 31 Jul 2025 17:54:22 +0200 Subject: [PATCH 34/56] Explicitly set chunks = "auto" --- examples/15_compression.cpp | 17 ++++++++++++++++- examples/15_compression.py | 5 +++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index 03310e501d..62e8597c19 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -38,7 +38,7 @@ * the filter into HDF5. For this, link the application against * libblosc2_filter.so and set the below define to true. */ -#define OPENPMD_INIT_BLOSC2_FILTER_MANUALLY false +#define OPENPMD_INIT_BLOSC2_FILTER_MANUALLY true void init_blosc_for_hdf5() { @@ -215,6 +215,9 @@ int main() std::string const hdf5_zlib_config = R"( backend = "hdf5" + [hdf5.dataset] + chunks = "auto" + [hdf5.dataset.permanent_filters] type = "zlib" # mandatory parameter aggression = 5 # optional, defaults to 1 @@ -230,6 +233,9 @@ int main() std::string const hdf5_predefined_filter_ids = R"( backend = "hdf5" + [hdf5.dataset] + chunks = "auto" + [hdf5.dataset.permanent_filters] id = "fletcher32" # mandatory parameter # A filter can be applied as mandatory (execution should abort if the @@ -248,6 +254,9 @@ int main() std::string const hdf5_filter_pipeline = R"( backend = "hdf5" + [hdf5.dataset] + chunks = "auto" + # pipeline consisting of two filters [[hdf5.dataset.permanent_filters]] @@ -268,6 +277,9 @@ int main() [[hdf5.dataset]] select = "meshes/.*" + + [hdf5.dataset.cfg] + chunks = "auto" [hdf5.dataset.cfg.permanent_filters] type = "zlib" aggression = 5 @@ -275,6 +287,9 @@ int main() # Now, configure the particles. [[hdf5.dataset]] select = "particles/.*" + + [hdf5.dataset.cfg] + chunks = "auto" [hdf5.dataset.cfg.permanent_filters] id = "fletcher32" flags = "mandatory" diff --git a/examples/15_compression.py b/examples/15_compression.py index afe1184e80..a0e7823d97 100644 --- a/examples/15_compression.py +++ b/examples/15_compression.py @@ -195,6 +195,7 @@ def main(): "backend": "hdf5", "hdf5": { "dataset": { + "chunks": "auto", "permanent_filters": { "type": "zlib", # mandatory parameter "aggression": 5, # optional, defaults to 1 @@ -214,6 +215,7 @@ def main(): "backend": "hdf5", "hdf5": { "dataset": { + "chunks": "auto", "permanent_filters": { # mandatory parameter "id": "fletcher32", @@ -235,6 +237,7 @@ def main(): "backend": "hdf5", "hdf5": { "dataset": { + "chunks": "auto", "permanent_filters": [ {"aggression": 5, "type": "zlib"}, {"flags": "mandatory", "id": "shuffle"}, @@ -255,6 +258,7 @@ def main(): { "select": "meshes/.*", "cfg": { + "chunks": "auto", "permanent_filters": { "type": "zlib", "aggression": 5, @@ -264,6 +268,7 @@ def main(): { "select": "particles/.*", "cfg": { + "chunks": "auto", "permanent_filters": { "id": "fletcher32", "flags": "mandatory", From c951c733678b258f2ddf010b1111983d7ae66442 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 31 Jul 2025 18:03:52 +0200 Subject: [PATCH 35/56] CI fixes --- src/IO/ADIOS/ADIOS2IOHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index 9e56e98d68..884a4b6341 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -366,7 +366,7 @@ ADIOS2IOHandlerImpl::getOperators(json::TracingJSON cfg) else { return ParameterizedOperator{ - std::move(*adiosOperator), std::move(adiosParams)}; + *adiosOperator, std::move(adiosParams)}; } }; From 99e8c3ed1b969bf8fd62ec4c7c57986e95836353 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 31 Jul 2025 18:07:48 +0200 Subject: [PATCH 36/56] Add HDF5-Blosc2 to further CI runs --- .github/workflows/linux.yml | 6 ++++++ .github/workflows/tooling.yml | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 393dbb8219..ad58e10646 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -67,6 +67,8 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/clang11_nopy_ompi_h5_ad2_libcpp/) spack install + ./.github/workflows/dependencies/install_hdf5_blosc2 + share/openPMD/download_samples.sh build cmake -S . -B build \ -DopenPMD_USE_PYTHON=OFF \ @@ -104,6 +106,8 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/clang11_nopy_ompi_h5_ad2/) spack install + ./.github/workflows/dependencies/install_hdf5_blosc2 + share/openPMD/download_samples.sh build cmake -S . -B build \ -DopenPMD_USE_PYTHON=OFF \ @@ -190,6 +194,8 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/gcc13_py312_mpich_h5_ad2/) spack install + ./.github/workflows/dependencies/install_hdf5_blosc2 + share/openPMD/download_samples.sh build cmake -S . -B build \ -DopenPMD_USE_PYTHON=OFF \ diff --git a/.github/workflows/tooling.yml b/.github/workflows/tooling.yml index 94afb20c89..dcc8601d68 100644 --- a/.github/workflows/tooling.yml +++ b/.github/workflows/tooling.yml @@ -29,6 +29,8 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/clangtidy_nopy_ompi_h5_ad2/) spack install + ./.github/workflows/dependencies/install_hdf5_blosc2 + share/openPMD/download_samples.sh build cmake -S . -B build \ -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy);-system-headers=0" \ @@ -58,6 +60,9 @@ jobs: sudo ln -s "$(which cmake)" /usr/bin/cmake eval $(spack env activate --sh .github/ci/spack-envs/clangtidy_nopy_ompi_h5_ad2/) spack install + + ./.github/workflows/dependencies/install_hdf5_blosc2 + SOURCEPATH="$(pwd)" share/openPMD/download_samples.sh build export LDFLAGS="${LDFLAGS} -fsanitize=address,undefined -shared-libsan" From 3465eab4ccdef7bf932750b309f3caf803d6537e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 31 Jul 2025 18:18:32 +0200 Subject: [PATCH 37/56] Add hdf5plugin to some Python runs --- .github/workflows/linux.yml | 9 ++++++--- .github/workflows/tooling.yml | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index ad58e10646..538b909750 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -97,7 +97,8 @@ jobs: - name: Install run: | sudo apt-get update - sudo apt-get install clang-11 gfortran libopenmpi-dev python3 + sudo apt-get install clang-11 gfortran libopenmpi-dev python3 python3-pip + python3 -m pip install -U hdf5plugin sudo .github/workflows/dependencies/install_spack - name: Build env: {CC: clang-11, CXX: clang++-11, CXXFLAGS: -Werror} @@ -176,7 +177,9 @@ jobs: run: | sudo apt-get update sudo apt-get remove openmpi* libopenmpi* *hdf5* || true - sudo apt-get install g++ gfortran python3 + sudo apt-get install g++ gfortran python3 python3-pip + + python3 -m pip install hdf5plugin sudo .github/workflows/dependencies/install_spack # Need to build this manually due to broken MPICH package in Ubuntu 24.04 @@ -241,7 +244,7 @@ jobs: python3 -m pip install -U pandas python3 -m pip install -U dask python3 -m pip install -U pyarrow - python3 -m pip install hdf5plugin + python3 -m pip install -U hdf5plugin - name: Build env: {CC: gcc-12, CXX: g++-12, CXXFLAGS: -Werror} run: | diff --git a/.github/workflows/tooling.yml b/.github/workflows/tooling.yml index dcc8601d68..9ed7c7ca54 100644 --- a/.github/workflows/tooling.yml +++ b/.github/workflows/tooling.yml @@ -51,7 +51,7 @@ jobs: - name: Install run: | sudo apt-get update - sudo apt-get install clang-19 libc++-dev libc++abi-dev python3 gfortran libopenmpi-dev python3-numpy + sudo apt-get install clang-19 libc++-dev libc++abi-dev python3 gfortran libopenmpi-dev python3-numpy python3-hdf5plugin SPACK_VER=1.0.1 sudo -E .github/workflows/dependencies/install_spack echo "SPACK VERSION: $(spack --version)" - name: Build From f86d5113c36b822ba0f586b39d54cd46193e94db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 31 Jul 2025 18:27:06 +0200 Subject: [PATCH 38/56] Skip patch in Clang runs --- .github/workflows/dependencies/install_hdf5_blosc2 | 11 +++++++---- .github/workflows/linux.yml | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/dependencies/install_hdf5_blosc2 b/.github/workflows/dependencies/install_hdf5_blosc2 index 139add4b4e..3ffc8b81a1 100755 --- a/.github/workflows/dependencies/install_hdf5_blosc2 +++ b/.github/workflows/dependencies/install_hdf5_blosc2 @@ -10,12 +10,15 @@ patch_url="https://github.com/franzpoeschel/HDF5-Blosc2/commit/55b1feea7bf18a539 patch_path="$(pwd)/hdf5_blosc2_cmake_fixes.patch" -curl -sLo "$patch_path" "$patch_url" git clone -b "$version" https://github.com/Blosc/HDF5-Blosc2 cd HDF5-Blosc2 -git config user.email "tooling@tools.com" -git config user.name "Tooling" -git am "$patch_path" + +if [[ -z "$SKIP_HDF5_BLOSC2_PATCH" ]]; then + curl -sLo "$patch_path" "$patch_url" + git config user.email "tooling@tools.com" + git config user.name "Tooling" + git am "$patch_path" +fi cmake . -B build -DBUILD_TESTS=OFF cmake --build build --parallel 4 diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 538b909750..2517d393e1 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -67,7 +67,7 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/clang11_nopy_ompi_h5_ad2_libcpp/) spack install - ./.github/workflows/dependencies/install_hdf5_blosc2 + SKIP_HDF5_BLOSC2_PATCH=1 ./.github/workflows/dependencies/install_hdf5_blosc2 share/openPMD/download_samples.sh build cmake -S . -B build \ @@ -107,7 +107,7 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/clang11_nopy_ompi_h5_ad2/) spack install - ./.github/workflows/dependencies/install_hdf5_blosc2 + SKIP_HDF5_BLOSC2_PATCH=1 ./.github/workflows/dependencies/install_hdf5_blosc2 share/openPMD/download_samples.sh build cmake -S . -B build \ From 264b255674b5862184b63ea2cad07bdc4ae5bc84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 31 Jul 2025 18:49:30 +0200 Subject: [PATCH 39/56] Fix includes --- examples/15_compression.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index 62e8597c19..32011613aa 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -21,9 +21,19 @@ #include +/* + * If installed into a folder known to HDF5, then HDF5 will find the filter on + * its own. In other contexts, it might become necessary to manually register + * the filter into HDF5. For this, link the application against + * libblosc2_filter.so and set the below define to true. + */ +#define OPENPMD_INIT_BLOSC2_FILTER_MANUALLY true + #if openPMD_HAVE_HDF5 && __has_include() -#include #define OPENPMD_USE_BLOSC2_FILTER 1 +#if OPENPMD_INIT_BLOSC2_FILTER_MANUALLY +#include +#endif #else #define OPENPMD_USE_BLOSC2_FILTER 0 #endif @@ -32,14 +42,6 @@ #include #include -/* - * If installed into a folder known to HDF5, then HDF5 will find the filter on - * its own. In other contexts, it might become necessary to manually register - * the filter into HDF5. For this, link the application against - * libblosc2_filter.so and set the below define to true. - */ -#define OPENPMD_INIT_BLOSC2_FILTER_MANUALLY true - void init_blosc_for_hdf5() { #if OPENPMD_USE_BLOSC2_FILTER && OPENPMD_INIT_BLOSC2_FILTER_MANUALLY From 6118a3ad846643e6f41c3767620488209b118fd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 1 Aug 2025 10:31:09 +0200 Subject: [PATCH 40/56] Fixes --- .github/workflows/dependencies/install_hdf5_blosc2 | 2 +- examples/15_compression.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dependencies/install_hdf5_blosc2 b/.github/workflows/dependencies/install_hdf5_blosc2 index 3ffc8b81a1..863dc753dd 100755 --- a/.github/workflows/dependencies/install_hdf5_blosc2 +++ b/.github/workflows/dependencies/install_hdf5_blosc2 @@ -13,7 +13,7 @@ patch_path="$(pwd)/hdf5_blosc2_cmake_fixes.patch" git clone -b "$version" https://github.com/Blosc/HDF5-Blosc2 cd HDF5-Blosc2 -if [[ -z "$SKIP_HDF5_BLOSC2_PATCH" ]]; then +if [[ -z "${SKIP_HDF5_BLOSC2_PATCH:-}" ]]; then curl -sLo "$patch_path" "$patch_url" git config user.email "tooling@tools.com" git config user.name "Tooling" diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index 32011613aa..5c8a0d7d06 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -27,7 +27,7 @@ * the filter into HDF5. For this, link the application against * libblosc2_filter.so and set the below define to true. */ -#define OPENPMD_INIT_BLOSC2_FILTER_MANUALLY true +#define OPENPMD_INIT_BLOSC2_FILTER_MANUALLY false #if openPMD_HAVE_HDF5 && __has_include() #define OPENPMD_USE_BLOSC2_FILTER 1 From 624e430e99da7f4e95068a2e92c795cdfff6f215 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 1 Aug 2025 11:16:09 +0200 Subject: [PATCH 41/56] Further fixes --- .github/workflows/linux.yml | 2 +- examples/15_compression.cpp | 25 ++++++++++++------------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 2517d393e1..8a037eae2d 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -197,7 +197,7 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/gcc13_py312_mpich_h5_ad2/) spack install - ./.github/workflows/dependencies/install_hdf5_blosc2 + SKIP_HDF5_BLOSC2_PATCH=1 ./.github/workflows/dependencies/install_hdf5_blosc2 share/openPMD/download_samples.sh build cmake -S . -B build \ diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index 5c8a0d7d06..c2c0cb93ec 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -301,28 +301,27 @@ int main() // For non-predefined IDs, the ID must be given as a number. This example // uses the Blosc2 filter available from // https://github.com/Blosc/HDF5-Blosc2, with the permanent plugin ID 32026 - // (defined in blosc2_filter.h as FILTER_BLOSC2). Generic filters referenced - // by ID can be configured via the cd_values field. This field is an array - // of unsigned integers and plugin-specific interpretation. For the Blosc2 - // plugin, indexes 0, 1, 2 and 3 are reserved. index 4 is the compression - // level, index 5 is a boolean for activating shuffling and index 6 denotes - // the compression method. + // (alternatively defined in blosc2_filter.h as FILTER_BLOSC2). Generic + // filters referenced by ID can be configured via the cd_values field. This + // field is an array of unsigned integers and plugin-specific + // interpretation. For the Blosc2 plugin, indexes 0, 1, 2 and 3 are + // reserved. index 4 is the compression level, index 5 is a boolean for + // activating shuffling and index 6 denotes the compression method. + // Compression method 5 is BLOSC_ZSTD, alternatively also defined in + // blosc2_filter.h. #if OPENPMD_USE_BLOSC2_FILTER - std::stringstream hdf5_blosc_filter; - hdf5_blosc_filter << R"( + std::string hdf5_blosc_filter = R"( backend = "hdf5" [hdf5.dataset] chunks = "auto" [hdf5.dataset.permanent_filters] - id = )" << FILTER_BLOSC2 - << R"( + id = 32026 flags = "mandatory" - cd_values = [0, 0, 0, 0, 4, 1, )" - << BLOSC_ZSTD << R"(] + cd_values = [0, 0, 0, 0, 4, 1, 5] )"; - write("hdf5_blosc_filter.%E", hdf5_blosc_filter.str()); + write("hdf5_blosc_filter.%E", hdf5_blosc_filter); #endif // OPENPMD_USE_BLOSC2_FILTER #endif // openPMD_HAVE_HDF5 } From 43001736a269cfc6dc7c2756a3b004756801c864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 1 Aug 2025 12:59:24 +0200 Subject: [PATCH 42/56] Remove blosc filter from some runs again This is too bothersome to set up and the runs that we have are enough. --- .github/workflows/linux.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 8a037eae2d..90b24d8194 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -67,8 +67,6 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/clang11_nopy_ompi_h5_ad2_libcpp/) spack install - SKIP_HDF5_BLOSC2_PATCH=1 ./.github/workflows/dependencies/install_hdf5_blosc2 - share/openPMD/download_samples.sh build cmake -S . -B build \ -DopenPMD_USE_PYTHON=OFF \ @@ -107,8 +105,6 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/clang11_nopy_ompi_h5_ad2/) spack install - SKIP_HDF5_BLOSC2_PATCH=1 ./.github/workflows/dependencies/install_hdf5_blosc2 - share/openPMD/download_samples.sh build cmake -S . -B build \ -DopenPMD_USE_PYTHON=OFF \ @@ -197,8 +193,6 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/gcc13_py312_mpich_h5_ad2/) spack install - SKIP_HDF5_BLOSC2_PATCH=1 ./.github/workflows/dependencies/install_hdf5_blosc2 - share/openPMD/download_samples.sh build cmake -S . -B build \ -DopenPMD_USE_PYTHON=OFF \ From d9c51b779e7ac80255739ff69e301c060115c253 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 8 Aug 2025 15:54:40 +0200 Subject: [PATCH 43/56] Add missing dataset definition --- examples/15_compression.cpp | 1 + examples/15_compression.py | 1 + 2 files changed, 2 insertions(+) diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index c2c0cb93ec..23afc2b0f1 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -96,6 +96,7 @@ void write(std::string const &filename, std::string const &config) { // Do not bother with a positionOffset auto &position_offset = e["positionOffset"][dim]; + position_offset.resetDataset({Datatype::INT, {100}}); position_offset.makeConstant(0); auto &position = e["position"][dim]; diff --git a/examples/15_compression.py b/examples/15_compression.py index a0e7823d97..c7f8e0fe95 100644 --- a/examples/15_compression.py +++ b/examples/15_compression.py @@ -57,6 +57,7 @@ def write(filename, config): for dim in ["x", "y"]: # Do not bother with a positionOffset position_offset = e["positionOffset"][dim] + position_offset.reset_dataset(opmd.Dataset(np.dtype("int"), [100])) position_offset.make_constant(0) position = e["position"][dim] From dfd192457178f9f13959ab4ea442bea112c56665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 3 Sep 2025 12:01:18 +0200 Subject: [PATCH 44/56] Pull the Blosc2 stuff down in the example file --- examples/15_compression.cpp | 96 ++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 45 deletions(-) diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index 23afc2b0f1..1921714d4c 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -21,49 +21,10 @@ #include -/* - * If installed into a folder known to HDF5, then HDF5 will find the filter on - * its own. In other contexts, it might become necessary to manually register - * the filter into HDF5. For this, link the application against - * libblosc2_filter.so and set the below define to true. - */ -#define OPENPMD_INIT_BLOSC2_FILTER_MANUALLY false - -#if openPMD_HAVE_HDF5 && __has_include() -#define OPENPMD_USE_BLOSC2_FILTER 1 -#if OPENPMD_INIT_BLOSC2_FILTER_MANUALLY -#include -#endif -#else -#define OPENPMD_USE_BLOSC2_FILTER 0 -#endif - #include #include -#include -void init_blosc_for_hdf5() -{ -#if OPENPMD_USE_BLOSC2_FILTER && OPENPMD_INIT_BLOSC2_FILTER_MANUALLY - /* - * This registers the Blosc2 plugin from - * https://github.com/Blosc/HDF5-Blosc2 as a demonstration on how to - * activate and configure dynamic HDF5 filter plugins through openPMD. - */ - - char *version, *date; - int r = register_blosc2(&version, &date); - if (r < 1) - { - throw std::runtime_error("Unable to register Blosc2 plugin with HDF5."); - } - else - { - std::cout << "Blosc2 plugin registered in version '" << version - << "' and date '" << date << "'." << std::endl; - } -#endif -} +void run_blosc2_filter_for_hdf5_example(); void write(std::string const &filename, std::string const &config) { @@ -111,8 +72,6 @@ void write(std::string const &filename, std::string const &config) int main() { - init_blosc_for_hdf5(); - // Backend specific configuration can be given in either JSON or TOML. // We will stick with TOML in this example, since it allows inline comments // and remains more legible for larger configurations. @@ -299,6 +258,55 @@ int main() )"; write("hdf5_with_dataset_specific_configurations.%E", extended_hdf5_config); + run_blosc2_filter_for_hdf5_example(); +#endif // openPMD_HAVE_HDF5 +} + +/* This example runs the Blosc2 filter for HDF5 if it can find the filter's + * header somewhere in the system. This is a convention for this example, but + * the header is not needed in general for running the filter, as it contains + * only helpers and some defines. + */ +#define openPMD_USE_BLOSC2_FILTER (openPMD_HAVE_HDF5 && __has_include()) + +/* This below block is only necessary if the Blosc2 filter was installed to a + * nonstandard directory that the HDF5 library cannot find on its own. In this + * case, link the application against libblosc2_filter.so and set the below + * define to true. The blosc2_filter.h header provides a helper function to + * manually register the filter to the HDF5 library. + */ +#define OPENPMD_INIT_BLOSC2_FILTER_MANUALLY false +#if OPENPMD_INIT_BLOSC2_FILTER_MANUALLY +#include +void init_blosc_for_hdf5() +{ + /* + * This registers the Blosc2 plugin from + * https://github.com/Blosc/HDF5-Blosc2 as a demonstration on how to + * activate and configure dynamic HDF5 filter plugins through openPMD. + */ + + char *version, *date; + int r = register_blosc2(&version, &date); + if (r < 1) + { + throw std::runtime_error("Unable to register Blosc2 plugin with HDF5."); + } + else + { + std::cout << "Blosc2 plugin registered in version '" << version + << "' and date '" << date << "'." << std::endl; + } +} +#endif + +void run_blosc2_filter_for_hdf5_example() +{ +#if openPMD_HAVE_HDF5 && openPMD_USE_BLOSC2_FILTER +#if OPENPMD_INIT_BLOSC2_FILTER_MANUALLY + init_blosc_for_hdf5(); +#endif + // For non-predefined IDs, the ID must be given as a number. This example // uses the Blosc2 filter available from // https://github.com/Blosc/HDF5-Blosc2, with the permanent plugin ID 32026 @@ -310,7 +318,6 @@ int main() // activating shuffling and index 6 denotes the compression method. // Compression method 5 is BLOSC_ZSTD, alternatively also defined in // blosc2_filter.h. -#if OPENPMD_USE_BLOSC2_FILTER std::string hdf5_blosc_filter = R"( backend = "hdf5" @@ -323,6 +330,5 @@ int main() cd_values = [0, 0, 0, 0, 4, 1, 5] )"; write("hdf5_blosc_filter.%E", hdf5_blosc_filter); -#endif // OPENPMD_USE_BLOSC2_FILTER -#endif // openPMD_HAVE_HDF5 +#endif } From ab45489666a83626e2238b4a5eacd0f0bf1c1287 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 18 Sep 2025 12:28:31 +0200 Subject: [PATCH 45/56] Ditch self-compiled Blosc2 plugin, use hdf5plugin package --- .../dependencies/install_hdf5_blosc2 | 25 ----- .github/workflows/linux.yml | 4 +- .github/workflows/tooling.yml | 3 +- examples/15_compression.cpp | 100 +++++------------- 4 files changed, 34 insertions(+), 98 deletions(-) delete mode 100755 .github/workflows/dependencies/install_hdf5_blosc2 diff --git a/.github/workflows/dependencies/install_hdf5_blosc2 b/.github/workflows/dependencies/install_hdf5_blosc2 deleted file mode 100755 index 863dc753dd..0000000000 --- a/.github/workflows/dependencies/install_hdf5_blosc2 +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash - -set -eu -o pipefail -set -x - -cd /opt - -version="v2.0.0" -patch_url="https://github.com/franzpoeschel/HDF5-Blosc2/commit/55b1feea7bf18a539dfbe4413a920bc9570aa0c6.patch" -patch_path="$(pwd)/hdf5_blosc2_cmake_fixes.patch" - - -git clone -b "$version" https://github.com/Blosc/HDF5-Blosc2 -cd HDF5-Blosc2 - -if [[ -z "${SKIP_HDF5_BLOSC2_PATCH:-}" ]]; then - curl -sLo "$patch_path" "$patch_url" - git config user.email "tooling@tools.com" - git config user.name "Tooling" - git am "$patch_path" -fi - -cmake . -B build -DBUILD_TESTS=OFF -cmake --build build --parallel 4 -sudo cmake --build build --parallel 4 --target install diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 90b24d8194..bf4be7e6bb 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -245,7 +245,9 @@ jobs: sudo ln -s "$(which cmake)" /usr/bin/cmake eval $(spack env activate --sh .github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/) spack install - ./.github/workflows/dependencies/install_hdf5_blosc2 + + # Use this to make the plugins available also from the C/C++ API + export HDF5_PLUGIN_PATH="$(python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);')/plugins" share/openPMD/download_samples.sh build cmake -S . -B build \ diff --git a/.github/workflows/tooling.yml b/.github/workflows/tooling.yml index 9ed7c7ca54..03bf06f945 100644 --- a/.github/workflows/tooling.yml +++ b/.github/workflows/tooling.yml @@ -61,7 +61,8 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/clangtidy_nopy_ompi_h5_ad2/) spack install - ./.github/workflows/dependencies/install_hdf5_blosc2 + # Use this to make the plugins available also from the C/C++ API + export HDF5_PLUGIN_PATH="$(python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);')/plugins" SOURCEPATH="$(pwd)" share/openPMD/download_samples.sh build diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index 1921714d4c..2f73f480f6 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -258,77 +258,35 @@ int main() )"; write("hdf5_with_dataset_specific_configurations.%E", extended_hdf5_config); - run_blosc2_filter_for_hdf5_example(); -#endif // openPMD_HAVE_HDF5 -} - -/* This example runs the Blosc2 filter for HDF5 if it can find the filter's - * header somewhere in the system. This is a convention for this example, but - * the header is not needed in general for running the filter, as it contains - * only helpers and some defines. - */ -#define openPMD_USE_BLOSC2_FILTER (openPMD_HAVE_HDF5 && __has_include()) - -/* This below block is only necessary if the Blosc2 filter was installed to a - * nonstandard directory that the HDF5 library cannot find on its own. In this - * case, link the application against libblosc2_filter.so and set the below - * define to true. The blosc2_filter.h header provides a helper function to - * manually register the filter to the HDF5 library. - */ -#define OPENPMD_INIT_BLOSC2_FILTER_MANUALLY false -#if OPENPMD_INIT_BLOSC2_FILTER_MANUALLY -#include -void init_blosc_for_hdf5() -{ - /* - * This registers the Blosc2 plugin from - * https://github.com/Blosc/HDF5-Blosc2 as a demonstration on how to - * activate and configure dynamic HDF5 filter plugins through openPMD. - */ - - char *version, *date; - int r = register_blosc2(&version, &date); - if (r < 1) - { - throw std::runtime_error("Unable to register Blosc2 plugin with HDF5."); - } - else + // The following example runs the Blosc2 plugin which must be separately + // installed. One simple way is to install the Python package hdf5plugin + // which contains precompiled filters and then point HDF5_PLUGIN_PATH toward + // the plugins directory therein (containing libh5blosc2.so). This example + // assumes such a setup. + if (getenv("HDF5_PLUGIN_PATH")) { - std::cout << "Blosc2 plugin registered in version '" << version - << "' and date '" << date << "'." << std::endl; + // For non-predefined IDs, the ID must be given as a number. This + // example uses the Blosc2 filter available from + // https://pypi.org/project/hdf5plugin/, + // with the permanent plugin ID 32026. + // Generic filters referenced by ID can be configured via the cd_values + // field. This field is an array of unsigned integers and + // plugin-specific interpretation. For the Blosc2 plugin, indexes 0, 1, + // 2 and 3 are reserved. index 4 is the compression level, index 5 is a + // boolean for activating shuffling and index 6 denotes the compression + // method. Compression method 5 is BLOSC_ZSTD. + std::string hdf5_blosc_filter = R"( + backend = "hdf5" + + [hdf5.dataset] + chunks = "auto" + + [hdf5.dataset.permanent_filters] + id = 32026 + flags = "mandatory" + cd_values = [0, 0, 0, 0, 4, 1, 5] + )"; + write("hdf5_blosc_filter.%E", hdf5_blosc_filter); } -} -#endif - -void run_blosc2_filter_for_hdf5_example() -{ -#if openPMD_HAVE_HDF5 && openPMD_USE_BLOSC2_FILTER -#if OPENPMD_INIT_BLOSC2_FILTER_MANUALLY - init_blosc_for_hdf5(); -#endif - - // For non-predefined IDs, the ID must be given as a number. This example - // uses the Blosc2 filter available from - // https://github.com/Blosc/HDF5-Blosc2, with the permanent plugin ID 32026 - // (alternatively defined in blosc2_filter.h as FILTER_BLOSC2). Generic - // filters referenced by ID can be configured via the cd_values field. This - // field is an array of unsigned integers and plugin-specific - // interpretation. For the Blosc2 plugin, indexes 0, 1, 2 and 3 are - // reserved. index 4 is the compression level, index 5 is a boolean for - // activating shuffling and index 6 denotes the compression method. - // Compression method 5 is BLOSC_ZSTD, alternatively also defined in - // blosc2_filter.h. - std::string hdf5_blosc_filter = R"( - backend = "hdf5" - - [hdf5.dataset] - chunks = "auto" - - [hdf5.dataset.permanent_filters] - id = 32026 - flags = "mandatory" - cd_values = [0, 0, 0, 0, 4, 1, 5] - )"; - write("hdf5_blosc_filter.%E", hdf5_blosc_filter); -#endif +#endif // openPMD_HAVE_HDF5 } From a93e87e636f615af48079dd18045d92b23adf0a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 18 Sep 2025 13:58:47 +0200 Subject: [PATCH 46/56] CI fixes --- .github/workflows/linux.yml | 2 ++ .github/workflows/tooling.yml | 7 ++++++- src/IO/HDF5/HDF5IOHandler.cpp | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index bf4be7e6bb..0a7028d2dd 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -248,6 +248,8 @@ jobs: # Use this to make the plugins available also from the C/C++ API export HDF5_PLUGIN_PATH="$(python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);')/plugins" + echo "$HDF5_PLUGIN_PATH" + ls "$HDF5_PLUGIN_PATH" share/openPMD/download_samples.sh build cmake -S . -B build \ diff --git a/.github/workflows/tooling.yml b/.github/workflows/tooling.yml index 03bf06f945..0c7106e668 100644 --- a/.github/workflows/tooling.yml +++ b/.github/workflows/tooling.yml @@ -29,7 +29,10 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/clangtidy_nopy_ompi_h5_ad2/) spack install - ./.github/workflows/dependencies/install_hdf5_blosc2 + # Use this to make the plugins available also from the C/C++ API + export HDF5_PLUGIN_PATH="$(python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);')/plugins" + echo "$HDF5_PLUGIN_PATH" + ls "$HDF5_PLUGIN_PATH" share/openPMD/download_samples.sh build cmake -S . -B build \ @@ -63,6 +66,8 @@ jobs: # Use this to make the plugins available also from the C/C++ API export HDF5_PLUGIN_PATH="$(python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);')/plugins" + echo "$HDF5_PLUGIN_PATH" + ls "$HDF5_PLUGIN_PATH" SOURCEPATH="$(pwd)" share/openPMD/download_samples.sh build diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index c723544a50..f75fe7a527 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -642,7 +642,7 @@ namespace throw filter_error(); } - enum class filter_type + enum class filter_type : uint8_t { ByID, Zlib From 60db3b90200af8f5ced966cfd7c66c438467eb60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 23 Sep 2025 11:28:56 +0200 Subject: [PATCH 47/56] Try installing the deb package for h5pl... --- .../dependencies/install_hdf5_plugins | 11 +++++++++ .github/workflows/linux.yml | 24 +++++++++++++------ .github/workflows/tooling.yml | 22 ++++++++--------- 3 files changed, 39 insertions(+), 18 deletions(-) create mode 100755 .github/workflows/dependencies/install_hdf5_plugins diff --git a/.github/workflows/dependencies/install_hdf5_plugins b/.github/workflows/dependencies/install_hdf5_plugins new file mode 100755 index 0000000000..21b5eb1423 --- /dev/null +++ b/.github/workflows/dependencies/install_hdf5_plugins @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +version_major=1.14 +version_minor=6 +build_var=ubuntu-2404_gcc + +cd /opt +wget "https://github.com/HDFGroup/hdf5_plugins/releases/download/hdf5-${version_major}.${version_minor}/hdf5_plugins-${version_major}-${build_var}.deb" > &2 +sudo dpkg -i "hdf5_plugins-${version_major}-${build_var}.deb" > &2 +rm "hdf5_plugins-${version_major}-${build_var}.deb" +echo "/HDF_Group/HDF5/${version_major}.${version_minor}/lib/plugin/" diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 0a7028d2dd..a6c604c539 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -98,6 +98,11 @@ jobs: sudo apt-get install clang-11 gfortran libopenmpi-dev python3 python3-pip python3 -m pip install -U hdf5plugin sudo .github/workflows/dependencies/install_spack + + # Use this to make the HDF5 plugins available from the C/C++ API. + export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" + echo "$HDF5_PLUGIN_PATH" + ls "$HDF5_PLUGIN_PATH" - name: Build env: {CC: clang-11, CXX: clang++-11, CXXFLAGS: -Werror} run: | @@ -173,11 +178,15 @@ jobs: run: | sudo apt-get update sudo apt-get remove openmpi* libopenmpi* *hdf5* || true - sudo apt-get install g++ gfortran python3 python3-pip + sudo apt-get install g++ gfortran python3 - python3 -m pip install hdf5plugin sudo .github/workflows/dependencies/install_spack + # Use this to make the HDF5 plugins available from the C/C++ API. + export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" + echo "$HDF5_PLUGIN_PATH" + ls "$HDF5_PLUGIN_PATH" + # Need to build this manually due to broken MPICH package in Ubuntu 24.04 # https://bugs.launchpad.net/ubuntu/+source/mpich/+bug/2072338 sudo .github/workflows/dependencies/install_mpich @@ -232,6 +241,12 @@ jobs: sudo apt-get update sudo apt-get install g++-12 gfortran libopenmpi-dev python3 python3-setuptools sudo .github/workflows/dependencies/install_spack + + # Use this to make the HDF5 plugins available from the C/C++ API. + export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" + echo "$HDF5_PLUGIN_PATH" + ls "$HDF5_PLUGIN_PATH" + python3 -m pip install -U pip python3 -m pip install -U numpy python3 -m pip install -U mpi4py @@ -246,11 +261,6 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/) spack install - # Use this to make the plugins available also from the C/C++ API - export HDF5_PLUGIN_PATH="$(python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);')/plugins" - echo "$HDF5_PLUGIN_PATH" - ls "$HDF5_PLUGIN_PATH" - share/openPMD/download_samples.sh build cmake -S . -B build \ -DopenPMD_USE_PYTHON=ON \ diff --git a/.github/workflows/tooling.yml b/.github/workflows/tooling.yml index 0c7106e668..a3842762d8 100644 --- a/.github/workflows/tooling.yml +++ b/.github/workflows/tooling.yml @@ -22,6 +22,11 @@ jobs: sudo apt-get install clang clang-tidy gfortran libopenmpi-dev python-is-python3 SPACK_VER=1.0.1 sudo -E .github/workflows/dependencies/install_spack echo "SPACK VERSION: $(spack --version)" + + # Use this to make the HDF5 plugins available from the C/C++ API. + export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" + echo "$HDF5_PLUGIN_PATH" + ls "$HDF5_PLUGIN_PATH" - name: Build env: {CC: clang, CXX: clang++} run: | @@ -29,11 +34,6 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/clangtidy_nopy_ompi_h5_ad2/) spack install - # Use this to make the plugins available also from the C/C++ API - export HDF5_PLUGIN_PATH="$(python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);')/plugins" - echo "$HDF5_PLUGIN_PATH" - ls "$HDF5_PLUGIN_PATH" - share/openPMD/download_samples.sh build cmake -S . -B build \ -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy);-system-headers=0" \ @@ -54,9 +54,14 @@ jobs: - name: Install run: | sudo apt-get update - sudo apt-get install clang-19 libc++-dev libc++abi-dev python3 gfortran libopenmpi-dev python3-numpy python3-hdf5plugin + sudo apt-get install clang-19 libc++-dev libc++abi-dev python3 gfortran libopenmpi-dev python3-numpy SPACK_VER=1.0.1 sudo -E .github/workflows/dependencies/install_spack echo "SPACK VERSION: $(spack --version)" + + # Use this to make the HDF5 plugins available from the C/C++ API. + export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" + echo "$HDF5_PLUGIN_PATH" + ls "$HDF5_PLUGIN_PATH" - name: Build env: {CC: mpicc, CXX: mpic++, OMPI_CC: clang-19, OMPI_CXX: clang++-19, CXXFLAGS: -Werror, OPENPMD_HDF5_CHUNKS: none, OPENPMD_TEST_NFILES_MAX: 100} run: | @@ -64,11 +69,6 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/clangtidy_nopy_ompi_h5_ad2/) spack install - # Use this to make the plugins available also from the C/C++ API - export HDF5_PLUGIN_PATH="$(python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);')/plugins" - echo "$HDF5_PLUGIN_PATH" - ls "$HDF5_PLUGIN_PATH" - SOURCEPATH="$(pwd)" share/openPMD/download_samples.sh build export LDFLAGS="${LDFLAGS} -fsanitize=address,undefined -shared-libsan" From d3d9d4fc0930d6b58a23bffbc48895777fa5ad4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 23 Sep 2025 11:34:22 +0200 Subject: [PATCH 48/56] tmp: check if python example for hdf5+blosc2 runs --- examples/15_compression.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/15_compression.py b/examples/15_compression.py index c7f8e0fe95..cdfe288da7 100644 --- a/examples/15_compression.py +++ b/examples/15_compression.py @@ -295,6 +295,7 @@ def main(): # the compression method. if "hdf5" in opmd.variants and opmd.variants["hdf5"] and HAS_HDF5_PLUGIN: + raise RuntimeError("hiii") hdf5_blosc2_filter = { "backend": "hdf5", "hdf5": { From a0ed3a61f3f87fb638165d548c6d84305c128ace Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 23 Sep 2025 11:36:11 +0200 Subject: [PATCH 49/56] fixes --- .github/workflows/dependencies/install_hdf5_plugins | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dependencies/install_hdf5_plugins b/.github/workflows/dependencies/install_hdf5_plugins index 21b5eb1423..0c26265765 100755 --- a/.github/workflows/dependencies/install_hdf5_plugins +++ b/.github/workflows/dependencies/install_hdf5_plugins @@ -5,7 +5,7 @@ version_minor=6 build_var=ubuntu-2404_gcc cd /opt -wget "https://github.com/HDFGroup/hdf5_plugins/releases/download/hdf5-${version_major}.${version_minor}/hdf5_plugins-${version_major}-${build_var}.deb" > &2 -sudo dpkg -i "hdf5_plugins-${version_major}-${build_var}.deb" > &2 +wget "https://github.com/HDFGroup/hdf5_plugins/releases/download/hdf5-${version_major}.${version_minor}/hdf5_plugins-${version_major}-${build_var}.deb" >&2 +sudo dpkg -i "hdf5_plugins-${version_major}-${build_var}.deb" >&2 rm "hdf5_plugins-${version_major}-${build_var}.deb" echo "/HDF_Group/HDF5/${version_major}.${version_minor}/lib/plugin/" From eb989953f557424d0b3c58de48c19c8d16178ad1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 23 Sep 2025 15:14:31 +0200 Subject: [PATCH 50/56] Move hdf5plugin Python tests to other runs --- .github/workflows/linux.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index a6c604c539..416e73c054 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -96,7 +96,6 @@ jobs: run: | sudo apt-get update sudo apt-get install clang-11 gfortran libopenmpi-dev python3 python3-pip - python3 -m pip install -U hdf5plugin sudo .github/workflows/dependencies/install_spack # Use this to make the HDF5 plugins available from the C/C++ API. @@ -253,7 +252,6 @@ jobs: python3 -m pip install -U pandas python3 -m pip install -U dask python3 -m pip install -U pyarrow - python3 -m pip install -U hdf5plugin - name: Build env: {CC: gcc-12, CXX: g++-12, CXXFLAGS: -Werror} run: | @@ -281,6 +279,7 @@ jobs: run: | sudo apt-get update sudo apt-get install g++ libopenmpi-dev libhdf5-openmpi-dev python3 python3-numpy python3-mpi4py python3-pandas python3-h5py-mpi python3-pip + python3 -m pip install jsonschema==4.* referencing hdf5plugin # TODO ADIOS2 - name: Build env: {CXXFLAGS: -Werror, PKG_CONFIG_PATH: /usr/lib/x86_64-linux-gnu/pkgconfig} @@ -298,7 +297,6 @@ jobs: cmake --build build --parallel 4 ctest --test-dir build --output-on-failure - python3 -m pip install jsonschema==4.* referencing cd share/openPMD/json_schema PATH="../../../build/bin:$PATH" make -j 2 # We need to exclude the thetaMode example since that has a different @@ -325,7 +323,7 @@ jobs: run: | apk update apk add hdf5-dev - python3.10 -m pip install numpy h5py + python3.10 -m pip install numpy h5py hdf5plugin - name: Build env: {CXXFLAGS: -Werror} run: | From 6c6297c85c472a08b380b4d81bf71ea066374936 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 23 Sep 2025 15:57:32 +0200 Subject: [PATCH 51/56] Revert "tmp: check if python example for hdf5+blosc2 runs" This reverts commit b81437b1101cef89cf76c41e6940c6b56045001f. --- examples/15_compression.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/15_compression.py b/examples/15_compression.py index cdfe288da7..c7f8e0fe95 100644 --- a/examples/15_compression.py +++ b/examples/15_compression.py @@ -295,7 +295,6 @@ def main(): # the compression method. if "hdf5" in opmd.variants and opmd.variants["hdf5"] and HAS_HDF5_PLUGIN: - raise RuntimeError("hiii") hdf5_blosc2_filter = { "backend": "hdf5", "hdf5": { From e2dd692669d232d1ce1d4569dd6071f95a22f8ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 24 Sep 2025 19:36:29 +0200 Subject: [PATCH 52/56] .... --- .github/workflows/linux.yml | 21 ++++++++++++--------- examples/15_compression.cpp | 2 ++ examples/15_compression.py | 1 + 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 416e73c054..3bedd272ae 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -100,8 +100,6 @@ jobs: # Use this to make the HDF5 plugins available from the C/C++ API. export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" - echo "$HDF5_PLUGIN_PATH" - ls "$HDF5_PLUGIN_PATH" - name: Build env: {CC: clang-11, CXX: clang++-11, CXXFLAGS: -Werror} run: | @@ -177,14 +175,12 @@ jobs: run: | sudo apt-get update sudo apt-get remove openmpi* libopenmpi* *hdf5* || true - sudo apt-get install g++ gfortran python3 + sudo apt-get install g++ gfortran python3 python3-hdf5plugin sudo .github/workflows/dependencies/install_spack # Use this to make the HDF5 plugins available from the C/C++ API. export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" - echo "$HDF5_PLUGIN_PATH" - ls "$HDF5_PLUGIN_PATH" # Need to build this manually due to broken MPICH package in Ubuntu 24.04 # https://bugs.launchpad.net/ubuntu/+source/mpich/+bug/2072338 @@ -198,8 +194,12 @@ jobs: mpicxx --version perl --version python --version + echo "BEFORE ACTIVATING SPACK:" + python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);' eval $(spack env activate --sh .github/ci/spack-envs/gcc13_py312_mpich_h5_ad2/) spack install + echo "AFTER ACTIVATING SPACK:" + python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);' share/openPMD/download_samples.sh build cmake -S . -B build \ @@ -243,8 +243,6 @@ jobs: # Use this to make the HDF5 plugins available from the C/C++ API. export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" - echo "$HDF5_PLUGIN_PATH" - ls "$HDF5_PLUGIN_PATH" python3 -m pip install -U pip python3 -m pip install -U numpy @@ -252,12 +250,17 @@ jobs: python3 -m pip install -U pandas python3 -m pip install -U dask python3 -m pip install -U pyarrow + python3 -m pip install -U hdf5plugin - name: Build env: {CC: gcc-12, CXX: g++-12, CXXFLAGS: -Werror} run: | sudo ln -s "$(which cmake)" /usr/bin/cmake + echo "BEFORE ACTIVATING SPACK:" + python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);' eval $(spack env activate --sh .github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/) spack install + echo "AFTER ACTIVATING SPACK:" + python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);' share/openPMD/download_samples.sh build cmake -S . -B build \ @@ -279,7 +282,7 @@ jobs: run: | sudo apt-get update sudo apt-get install g++ libopenmpi-dev libhdf5-openmpi-dev python3 python3-numpy python3-mpi4py python3-pandas python3-h5py-mpi python3-pip - python3 -m pip install jsonschema==4.* referencing hdf5plugin + python3 -m pip install jsonschema==4.* referencing # TODO ADIOS2 - name: Build env: {CXXFLAGS: -Werror, PKG_CONFIG_PATH: /usr/lib/x86_64-linux-gnu/pkgconfig} @@ -323,7 +326,7 @@ jobs: run: | apk update apk add hdf5-dev - python3.10 -m pip install numpy h5py hdf5plugin + python3.10 -m pip install numpy h5py - name: Build env: {CXXFLAGS: -Werror} run: | diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index 2f73f480f6..5ab70acff1 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -23,6 +23,7 @@ #include #include +#include void run_blosc2_filter_for_hdf5_example(); @@ -287,6 +288,7 @@ int main() cd_values = [0, 0, 0, 0, 4, 1, 5] )"; write("hdf5_blosc_filter.%E", hdf5_blosc_filter); + throw std::runtime_error("ACTIVATED BLOSC2 FILTER"); } #endif // openPMD_HAVE_HDF5 } diff --git a/examples/15_compression.py b/examples/15_compression.py index c7f8e0fe95..d25f45e9da 100644 --- a/examples/15_compression.py +++ b/examples/15_compression.py @@ -310,6 +310,7 @@ def main(): } write("hdf5_blosc_filter.%E", hdf5_blosc2_filter) + raise RuntimeError("Selected Blosc2 filter") main() From 5f766ce61ea71f2ddc85d4ae30df91825fec7b18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 25 Sep 2025 10:45:51 +0200 Subject: [PATCH 53/56] ... --- .github/workflows/linux.yml | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 3bedd272ae..0aad544363 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -98,11 +98,11 @@ jobs: sudo apt-get install clang-11 gfortran libopenmpi-dev python3 python3-pip sudo .github/workflows/dependencies/install_spack - # Use this to make the HDF5 plugins available from the C/C++ API. - export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" - name: Build env: {CC: clang-11, CXX: clang++-11, CXXFLAGS: -Werror} run: | + # Use this to make the HDF5 plugins available from the C/C++ API. + export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" sudo ln -s "$(which cmake)" /usr/bin/cmake eval $(spack env activate --sh .github/ci/spack-envs/clang11_nopy_ompi_h5_ad2/) spack install @@ -179,8 +179,6 @@ jobs: sudo .github/workflows/dependencies/install_spack - # Use this to make the HDF5 plugins available from the C/C++ API. - export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" # Need to build this manually due to broken MPICH package in Ubuntu 24.04 # https://bugs.launchpad.net/ubuntu/+source/mpich/+bug/2072338 @@ -189,6 +187,8 @@ jobs: - name: Build env: {CC: gcc, CXX: g++, MPICH_CC: gcc, MPICH_CXX: g++, CXXFLAGS: -Werror} run: | + # Use this to make the HDF5 plugins available from the C/C++ API. + export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" cmake --version mpiexec --version mpicxx --version @@ -203,7 +203,7 @@ jobs: share/openPMD/download_samples.sh build cmake -S . -B build \ - -DopenPMD_USE_PYTHON=OFF \ + -DopenPMD_USE_PYTHON=ON \ -DopenPMD_USE_MPI=ON \ -DopenPMD_USE_HDF5=ON \ -DopenPMD_USE_ADIOS2=ON \ @@ -241,8 +241,7 @@ jobs: sudo apt-get install g++-12 gfortran libopenmpi-dev python3 python3-setuptools sudo .github/workflows/dependencies/install_spack - # Use this to make the HDF5 plugins available from the C/C++ API. - export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" + python3 -m pip install -U pip python3 -m pip install -U numpy @@ -250,17 +249,14 @@ jobs: python3 -m pip install -U pandas python3 -m pip install -U dask python3 -m pip install -U pyarrow - python3 -m pip install -U hdf5plugin - name: Build env: {CC: gcc-12, CXX: g++-12, CXXFLAGS: -Werror} run: | + # Use this to make the HDF5 plugins available from the C/C++ API. + export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" sudo ln -s "$(which cmake)" /usr/bin/cmake - echo "BEFORE ACTIVATING SPACK:" - python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);' eval $(spack env activate --sh .github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/) spack install - echo "AFTER ACTIVATING SPACK:" - python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);' share/openPMD/download_samples.sh build cmake -S . -B build \ From 4a05428653be8a4270b8107c6b4b5b1e9426368e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 25 Sep 2025 15:10:42 +0200 Subject: [PATCH 54/56] Install hdf5plugin into venv --- .github/workflows/linux.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 0aad544363..353c7fc6e5 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -175,7 +175,7 @@ jobs: run: | sudo apt-get update sudo apt-get remove openmpi* libopenmpi* *hdf5* || true - sudo apt-get install g++ gfortran python3 python3-hdf5plugin + sudo apt-get install g++ gfortran python3 python3-venv sudo .github/workflows/dependencies/install_spack @@ -194,12 +194,12 @@ jobs: mpicxx --version perl --version python --version - echo "BEFORE ACTIVATING SPACK:" - python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);' eval $(spack env activate --sh .github/ci/spack-envs/gcc13_py312_mpich_h5_ad2/) spack install - echo "AFTER ACTIVATING SPACK:" - python -c 'import hdf5plugin; print(hdf5plugin.__path__[0]);' + + python -m venv venv + source venv/bin/activate + pip install mpi4py numpy hdf5plugin share/openPMD/download_samples.sh build cmake -S . -B build \ From 325ee1f618d2fccd9ffe08c6ec2ae49c3816b7be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 25 Sep 2025 17:00:08 +0200 Subject: [PATCH 55/56] Remove CI debugging --- examples/15_compression.cpp | 2 -- examples/15_compression.py | 1 - 2 files changed, 3 deletions(-) diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp index 5ab70acff1..2f73f480f6 100644 --- a/examples/15_compression.cpp +++ b/examples/15_compression.cpp @@ -23,7 +23,6 @@ #include #include -#include void run_blosc2_filter_for_hdf5_example(); @@ -288,7 +287,6 @@ int main() cd_values = [0, 0, 0, 0, 4, 1, 5] )"; write("hdf5_blosc_filter.%E", hdf5_blosc_filter); - throw std::runtime_error("ACTIVATED BLOSC2 FILTER"); } #endif // openPMD_HAVE_HDF5 } diff --git a/examples/15_compression.py b/examples/15_compression.py index d25f45e9da..c7f8e0fe95 100644 --- a/examples/15_compression.py +++ b/examples/15_compression.py @@ -310,7 +310,6 @@ def main(): } write("hdf5_blosc_filter.%E", hdf5_blosc2_filter) - raise RuntimeError("Selected Blosc2 filter") main() From deada72ed2c779e9a3f9992e21723582596fc32d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 26 Sep 2025 14:24:36 +0200 Subject: [PATCH 56/56] Cleanup --- .github/workflows/linux.yml | 5 +---- .github/workflows/tooling.yml | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 353c7fc6e5..73489847ad 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -95,7 +95,7 @@ jobs: - name: Install run: | sudo apt-get update - sudo apt-get install clang-11 gfortran libopenmpi-dev python3 python3-pip + sudo apt-get install clang-11 gfortran libopenmpi-dev python3 sudo .github/workflows/dependencies/install_spack - name: Build @@ -240,9 +240,6 @@ jobs: sudo apt-get update sudo apt-get install g++-12 gfortran libopenmpi-dev python3 python3-setuptools sudo .github/workflows/dependencies/install_spack - - - python3 -m pip install -U pip python3 -m pip install -U numpy python3 -m pip install -U mpi4py diff --git a/.github/workflows/tooling.yml b/.github/workflows/tooling.yml index a3842762d8..d37a449fe0 100644 --- a/.github/workflows/tooling.yml +++ b/.github/workflows/tooling.yml @@ -68,7 +68,6 @@ jobs: sudo ln -s "$(which cmake)" /usr/bin/cmake eval $(spack env activate --sh .github/ci/spack-envs/clangtidy_nopy_ompi_h5_ad2/) spack install - SOURCEPATH="$(pwd)" share/openPMD/download_samples.sh build export LDFLAGS="${LDFLAGS} -fsanitize=address,undefined -shared-libsan"