diff --git a/.github/workflows/dependencies/install_hdf5_plugins b/.github/workflows/dependencies/install_hdf5_plugins new file mode 100755 index 0000000000..0c26265765 --- /dev/null +++ b/.github/workflows/dependencies/install_hdf5_plugins @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +version_major=1.14 +version_minor=6 +build_var=ubuntu-2404_gcc + +cd /opt +wget "https://github.com/HDFGroup/hdf5_plugins/releases/download/hdf5-${version_major}.${version_minor}/hdf5_plugins-${version_major}-${build_var}.deb" >&2 +sudo dpkg -i "hdf5_plugins-${version_major}-${build_var}.deb" >&2 +rm "hdf5_plugins-${version_major}-${build_var}.deb" +echo "/HDF_Group/HDF5/${version_major}.${version_minor}/lib/plugin/" diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 37b5331ddf..73489847ad 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -97,9 +97,12 @@ jobs: sudo apt-get update sudo apt-get install clang-11 gfortran libopenmpi-dev python3 sudo .github/workflows/dependencies/install_spack + - name: Build env: {CC: clang-11, CXX: clang++-11, CXXFLAGS: -Werror} run: | + # Use this to make the HDF5 plugins available from the C/C++ API. + export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" sudo ln -s "$(which cmake)" /usr/bin/cmake eval $(spack env activate --sh .github/ci/spack-envs/clang11_nopy_ompi_h5_ad2/) spack install @@ -172,9 +175,11 @@ jobs: run: | sudo apt-get update sudo apt-get remove openmpi* libopenmpi* *hdf5* || true - sudo apt-get install g++ gfortran python3 + sudo apt-get install g++ gfortran python3 python3-venv + sudo .github/workflows/dependencies/install_spack + # Need to build this manually due to broken MPICH package in Ubuntu 24.04 # https://bugs.launchpad.net/ubuntu/+source/mpich/+bug/2072338 sudo .github/workflows/dependencies/install_mpich @@ -182,6 +187,8 @@ jobs: - name: Build env: {CC: gcc, CXX: g++, MPICH_CC: gcc, MPICH_CXX: g++, CXXFLAGS: -Werror} run: | + # Use this to make the HDF5 plugins available from the C/C++ API. + export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" cmake --version mpiexec --version mpicxx --version @@ -190,9 +197,13 @@ jobs: eval $(spack env activate --sh .github/ci/spack-envs/gcc13_py312_mpich_h5_ad2/) spack install + python -m venv venv + source venv/bin/activate + pip install mpi4py numpy hdf5plugin + share/openPMD/download_samples.sh build cmake -S . -B build \ - -DopenPMD_USE_PYTHON=OFF \ + -DopenPMD_USE_PYTHON=ON \ -DopenPMD_USE_MPI=ON \ -DopenPMD_USE_HDF5=ON \ -DopenPMD_USE_ADIOS2=ON \ @@ -238,6 +249,8 @@ jobs: - name: Build env: {CC: gcc-12, CXX: g++-12, CXXFLAGS: -Werror} run: | + # Use this to make the HDF5 plugins available from the C/C++ API. + export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" sudo ln -s "$(which cmake)" /usr/bin/cmake eval $(spack env activate --sh .github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/) spack install @@ -248,7 +261,8 @@ jobs: -DopenPMD_USE_MPI=ON \ -DopenPMD_USE_HDF5=ON \ -DopenPMD_USE_ADIOS2=ON \ - -DopenPMD_USE_INVASIVE_TESTS=ON + -DopenPMD_USE_INVASIVE_TESTS=ON \ + -DCMAKE_VERBOSE_MAKEFILE=ON cmake --build build --parallel 4 ctest --test-dir build --output-on-failure @@ -261,6 +275,7 @@ jobs: run: | sudo apt-get update sudo apt-get install g++ libopenmpi-dev libhdf5-openmpi-dev python3 python3-numpy python3-mpi4py python3-pandas python3-h5py-mpi python3-pip + python3 -m pip install jsonschema==4.* referencing # TODO ADIOS2 - name: Build env: {CXXFLAGS: -Werror, PKG_CONFIG_PATH: /usr/lib/x86_64-linux-gnu/pkgconfig} @@ -278,7 +293,6 @@ jobs: cmake --build build --parallel 4 ctest --test-dir build --output-on-failure - python3 -m pip install jsonschema==4.* referencing cd share/openPMD/json_schema PATH="../../../build/bin:$PATH" make -j 2 # We need to exclude the thetaMode example since that has a different diff --git a/.github/workflows/tooling.yml b/.github/workflows/tooling.yml index 94afb20c89..d37a449fe0 100644 --- a/.github/workflows/tooling.yml +++ b/.github/workflows/tooling.yml @@ -22,6 +22,11 @@ jobs: sudo apt-get install clang clang-tidy gfortran libopenmpi-dev python-is-python3 SPACK_VER=1.0.1 sudo -E .github/workflows/dependencies/install_spack echo "SPACK VERSION: $(spack --version)" + + # Use this to make the HDF5 plugins available from the C/C++ API. + export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" + echo "$HDF5_PLUGIN_PATH" + ls "$HDF5_PLUGIN_PATH" - name: Build env: {CC: clang, CXX: clang++} run: | @@ -52,6 +57,11 @@ jobs: sudo apt-get install clang-19 libc++-dev libc++abi-dev python3 gfortran libopenmpi-dev python3-numpy SPACK_VER=1.0.1 sudo -E .github/workflows/dependencies/install_spack echo "SPACK VERSION: $(spack --version)" + + # Use this to make the HDF5 plugins available from the C/C++ API. + export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)" + echo "$HDF5_PLUGIN_PATH" + ls "$HDF5_PLUGIN_PATH" - name: Build env: {CC: mpicc, CXX: mpic++, OMPI_CC: clang-19, OMPI_CXX: clang++-19, CXXFLAGS: -Werror, OPENPMD_HDF5_CHUNKS: none, OPENPMD_TEST_NFILES_MAX: 100} run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index 11d09e498e..3811ca7ab3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -718,6 +718,7 @@ set(openPMD_EXAMPLE_NAMES 12_span_write 13_write_dynamic_configuration 14_toml_template + 15_compression ) set(openPMD_PYTHON_EXAMPLE_NAMES 2_read_serial @@ -734,6 +735,7 @@ set(openPMD_PYTHON_EXAMPLE_NAMES 11_particle_dataframe 12_span_write 13_write_dynamic_configuration + 15_compression ) if(openPMD_USE_INVASIVE_TESTS) diff --git a/docs/source/backends/hdf5.rst b/docs/source/backends/hdf5.rst index 1d1866d874..5b64893625 100644 --- a/docs/source/backends/hdf5.rst +++ b/docs/source/backends/hdf5.rst @@ -25,6 +25,19 @@ Virtual file drivers are configured via JSON/TOML. Refer to the page on :ref:`JSON/TOML configuration ` for further details. +Filters (compression) +********************* + +HDF5 supports so-called filters for transformations such as compression on datasets. +These can be permanent (applied to an entire dataset) and transient (applied to individual I/O operations). +The openPMD-api currently supports permanent filters. +Pipelines of multiple subsequent filters are supported. +Refer also to `this documentation `_. + +Filters are applied via :ref:`JSON/TOML configuration `, see there for detailed instructions on how to apply filters. +There are also extended examples on how to apply compression options to ADIOS2 and HDF5 in the examples: `Python `_ / `C++ `_. + + Backend-Specific Controls ------------------------- diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index 123b0a58e0..9d5e1dcf2c 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -185,8 +185,8 @@ Explanation of the single keys: Additionally, specifying ``"disk_override"``, ``"buffer_override"`` or ``"new_step_override"`` will take precedence over options specified without the ``_override`` suffix, allowing to invert the normal precedence order. This way, a data producing code can hardcode the preferred flush target per ``flush()`` call, but users can e.g. still entirely deactivate flushing to disk in the ``Series`` constructor by specifying ``preferred_flush_target = buffer_override``. This is useful when applying the asynchronous IO capabilities of the BP5 engine. -* ``adios2.dataset.operators``: This key contains a list of ADIOS2 `operators `_, used to enable compression or dataset transformations. - Each object in the list has two keys: +* ``adios2.dataset.operators``: This key contains either a single ADIOS2 `operator `_ or a list of operators, used to enable compression or dataset transformations. + Each operator is an object with two keys: * ``type`` supported ADIOS operator type, e.g. zfp, sz * ``parameters`` is an associative map of string parameters for the operator (e.g. compression levels) @@ -247,6 +247,24 @@ Explanation of the single keys: An explicit chunk size can be specified as a list of positive integers, e.g. ``hdf5.dataset.chunks = [10, 100]``. Note that this specification should only be used per-dataset, e.g. in ``resetDataset()``/``reset_dataset()``. Chunking generally improves performance and only needs to be disabled in corner-cases, e.g. when heavily relying on independent, parallel I/O that non-collectively declares data records. +* ``hdf5.datasets.permanent_filters``: Either a single HDF5 permanent filter specification or a list of HDF5 permanent filter specifications. + Each filter specification is a JSON/TOML object, but there are multiple options: + + * Zlib: The Zlib filter has a distinct API in HDF5 and the configuration for Zlib in openPMD is hence also different. It is activated by the mandatory key ``type = "zlib"`` and configured by the optional integer key ``aggression``. + Example: ``{"type": "zlib", "aggression": 5}``. + * Filters identified by their global ID `registered with the HDF group `_. + They are activated by the mandatory integer key ``id`` containing this global ID. + All other keys are optional: + + * ``type = "by_id"`` may optionally be specified for clarity and consistency. + * The string key ``flags`` can take the values ``"mandatory"`` or ``"optional"``, indicating if HDF5 should abort execution if the filter cannot be applied for some reason. + * The key ``cd_values`` points to a list of nonnegative integers. + These are filter-specific configuration options. + Refer to the specific filter's documentation. + + Alternatively to an integer ID, the key ``id`` may also be of string type, identifying one of the six builtin filters of HDF5: ``"deflate", "shuffle", "fletcher32", "szip", "nbit", "scaleoffset"``. + + * ``hdf5.vfd.type`` selects the HDF5 virtual file driver. Currently available are: diff --git a/examples/13_write_dynamic_configuration.cpp b/examples/13_write_dynamic_configuration.cpp index 56d441d76d..3250cb162e 100644 --- a/examples/13_write_dynamic_configuration.cpp +++ b/examples/13_write_dynamic_configuration.cpp @@ -47,6 +47,7 @@ type = "bp4" # ADIOS2 allows adding several operators # Lists are given in TOML by using double brackets +# For specifying a single operator only, the list may be skipped. [[adios2.dataset.operators]] type = "zlib" @@ -192,14 +193,12 @@ CFG.CHUNKS = [10] "resizable": true, "adios2": { "dataset": { - "operators": [ - { - "type": "zlib", - "parameters": { - "clevel": 9 - } + "operators": { + "type": "zlib", + "parameters": { + "clevel": 9 } - ] + } } } })END"; diff --git a/examples/13_write_dynamic_configuration.py b/examples/13_write_dynamic_configuration.py index fa40e61985..0dc67a8e5c 100644 --- a/examples/13_write_dynamic_configuration.py +++ b/examples/13_write_dynamic_configuration.py @@ -31,6 +31,7 @@ # ADIOS2 allows adding several operators # Lists are given in TOML by using double brackets +# For specifying a single operator only, the list may be skipped. [[adios2.dataset.operators]] type = "zlib" @@ -106,12 +107,12 @@ def main(): } } config['adios2']['dataset'] = { - 'operators': [{ + 'operators': { 'type': 'zlib', 'parameters': { 'clevel': 9 } - }] + } } temperature = iteration.meshes["temperature"] diff --git a/examples/15_compression.cpp b/examples/15_compression.cpp new file mode 100644 index 0000000000..2f73f480f6 --- /dev/null +++ b/examples/15_compression.cpp @@ -0,0 +1,292 @@ +/* Copyright 2025 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ + +#include + +#include +#include + +void run_blosc2_filter_for_hdf5_example(); + +void write(std::string const &filename, std::string const &config) +{ + using namespace openPMD; + std::cout << "Config for '" << filename << "' as JSON:\n" + << json::merge(config, "{}") << "\n\n"; + Series series( + "../samples/compression/" + filename, Access::CREATE_LINEAR, config); + + for (size_t i = 0; i < 10; ++i) + { + auto ¤t_iteration = series.snapshots()[i]; + + // First, write an E mesh. + auto &E = current_iteration.meshes["E"]; + E.setAxisLabels({"x", "y"}); + for (auto const &dim : {"x", "y"}) + { + auto &component = E[dim]; + component.resetDataset({Datatype::FLOAT, {10, 10}}); + auto buffer_view = + component.storeChunk({0, 0}, {10, 10}).currentBuffer(); + // Now fill the prepared buffer with some nonsense data. + std::iota(buffer_view.begin(), buffer_view.end(), i * 100); + } + + // Now, write some e particles. + auto &e = current_iteration.particles["e"]; + for (auto const &dim : {"x", "y"}) + { + // Do not bother with a positionOffset + auto &position_offset = e["positionOffset"][dim]; + position_offset.resetDataset({Datatype::INT, {100}}); + position_offset.makeConstant(0); + + auto &position = e["position"][dim]; + position.resetDataset({Datatype::FLOAT, {100}}); + auto buffer_view = + position.storeChunk({0}, {100}).currentBuffer(); + // Now fill the prepared buffer with some nonsense data. + std::iota(buffer_view.begin(), buffer_view.end(), i * 100); + } + } +} + +int main() +{ + // Backend specific configuration can be given in either JSON or TOML. + // We will stick with TOML in this example, since it allows inline comments + // and remains more legible for larger configurations. + // If you are interested in the configurations as JSON, run the example and + // their JSON equivalents will be printed to stdout. + +#if openPMD_HAVE_ADIOS2 + // We start with two examples for ADIOS2. + std::string const simple_adios2_config = R"( + + # Backend can either be inferred from the filename ending, or specified + # explicitly. In the latter case, the filename ending can be given as + # a wildcard %E, openPMD will then pick a default ending. + backend = "adios2" + + # ADIOS2 supports adding multiple operators to a variable, hence we + # specify a list of operators here (using TOML's double bracket syntax). + # How much sense this makes depends on the specific operators in use. + # If specifying only one operator, you can also replace the list by its + # only element as a shorthand (see next config example). + + [[adios2.dataset.operators]] + type = "bzip2" + parameters.clevel = 9 # The available parameters depend + # on the operator. + # Here, we specify bzip2's compression level. + )"; + write("adios2_with_bzip2.%E", simple_adios2_config); + + // The compression can also be specified per-dataset. + // For more details, also check: + // https://openpmd-api.readthedocs.io/en/latest/details/backendconfig.html#dataset-specific-configuration + + // This example will demonstrate the use of pattern matching. + // adios2.dataset is now a list of dataset configurations. The specific + // configuration to be used for a dataset will be determined by matching + // the dataset name against the patterns specified by the 'select' key. + // The actual configuration to be forwarded to the backend is stored under + // the 'cfg' key. + std::string const extended_adios2_config = R"( + backend = "adios2" + + [[adios2.dataset]] + # This uses egrep-type regular expressions. + select = "meshes/.*" + # Inside the cfg key, specify the actual config to be forwarded to the + # ADIOS2 dataset. + # So, specify the operators list again. Let's use Blosc for this. + [adios2.dataset.cfg.operators] + type = "blosc" + parameters.doshuffle = "BLOSC_BITSHUFFLE" + parameters.clevel = 1 + + # Now, configure the particles. + [[adios2.dataset]] + # The match can either be against the path within the containing + # Iteration (e.g. 'meshes/E/x', as above) or (as in this example), + # against the full path (e.g. '/data/0/particles/e/position/x'). + # In this example, completely deactivate compression specifically for + # 'particles/e/position/x'. All other particle datasets will + # fall back to the default configuration specified below. + # Be careful when specifying compression per-Iteration. While this + # syntax fundamentally allows doing that, compressions once specified + # on an ADIOS2 variable will not be removed again. + # Since variable-encoding reuses ADIOS2 variables from previous + # Iterations, the compression configuration of the first Iteration will + # leak into all subsequent Iterations. + select = "/data/[0-9]*/particles/e/position/x" + cfg.operators = [] + + # Now, the default configuration. + # In general, the dataset configurations are matched top-down, going for + # the first matching configuration. So, a default configuration could + # theoretically be specified by emplacing a catch-all pattern + # (regex: ".*") as the last option. + # However, we also define an explicit shorthand for specifying default + # configurations: Just omit the 'select' key. This special syntax is + # understood as the default configuration no matter where in the list it + # is emplaced, and it allows the backends to initialize the default + # configuration globally, instead of applying it selectively to each + # dataset that matches a catch-all pattern. + [[adios2.dataset]] + [adios2.dataset.cfg.operators] + type = "bzip2" + parameters.clevel = 2 + )"; + write( + "adios2_with_dataset_specific_configurations.%E", + extended_adios2_config); +#endif // openPMD_HAVE_ADIOS2 + +#if openPMD_HAVE_HDF5 + // Now, let's continue with HDF5. + // HDF5 supports compression via so-called filters. These can be permanent + // (applied to an entire dataset) and transient (applied to individual I/O + // operations). The openPMD-api currently supports permanent filters. Refer + // also to https://web.ics.purdue.edu/~aai/HDF5/html/Filters.html. + + // Filters are additionally distinguished by how tightly they integrate with + // HDF5. The most tightly-integrated filter is Zlib, which has its own API + // calls and hence also a special JSON/TOML configuration in openPMD: + + std::string const hdf5_zlib_config = R"( + backend = "hdf5" + + [hdf5.dataset] + chunks = "auto" + + [hdf5.dataset.permanent_filters] + type = "zlib" # mandatory parameter + aggression = 5 # optional, defaults to 1 + )"; + write("hdf5_zlib.%E", hdf5_zlib_config); + + // All other filters have a common API and are identified by global IDs + // registered with the HDF Group. More details can be found in the + // H5Zpublic.h header. That header predefines a small number of filter IDs. + // These are directly supported by the openPMD-api: deflate, shuffle, + // fletcher32, szip, nbit, scaleoffset. + + std::string const hdf5_predefined_filter_ids = R"( + backend = "hdf5" + + [hdf5.dataset] + chunks = "auto" + + [hdf5.dataset.permanent_filters] + id = "fletcher32" # mandatory parameter + # A filter can be applied as mandatory (execution should abort if the + # filter cannot be applied) or as optional (execution should ignore when + # the filter cannot be applied). + flags = "mandatory" # optional parameter + type = "by_id" # optional parameter for filters identified by ID, + # mandatory only for zlib (see above) + )"; + write("hdf5_predefined_filter_id.%E", hdf5_predefined_filter_ids); + + // Just like ADIOS2 with their operations, also HDF5 supports adding + // multiple filters into a filter pipeline. The permanent_filters key can + // hence also be given as a list. + + std::string const hdf5_filter_pipeline = R"( + backend = "hdf5" + + [hdf5.dataset] + chunks = "auto" + + # pipeline consisting of two filters + + [[hdf5.dataset.permanent_filters]] + type = "zlib" + aggression = 5 + + [[hdf5.dataset.permanent_filters]] + id = "shuffle" + flags = "mandatory" + )"; + write("hdf5_filter_pipeline.%E", hdf5_filter_pipeline); + + // Dataset-specific backend configuration works independently from the + // chosen backend and can hence also be used in HDF5. We will apply both a + // zlib and a fletcher32 filter, one to the meshes and one to the particles. + std::string const extended_hdf5_config = R"( + backend = "hdf5" + + [[hdf5.dataset]] + select = "meshes/.*" + + [hdf5.dataset.cfg] + chunks = "auto" + [hdf5.dataset.cfg.permanent_filters] + type = "zlib" + aggression = 5 + + # Now, configure the particles. + [[hdf5.dataset]] + select = "particles/.*" + + [hdf5.dataset.cfg] + chunks = "auto" + [hdf5.dataset.cfg.permanent_filters] + id = "fletcher32" + flags = "mandatory" + )"; + write("hdf5_with_dataset_specific_configurations.%E", extended_hdf5_config); + + // The following example runs the Blosc2 plugin which must be separately + // installed. One simple way is to install the Python package hdf5plugin + // which contains precompiled filters and then point HDF5_PLUGIN_PATH toward + // the plugins directory therein (containing libh5blosc2.so). This example + // assumes such a setup. + if (getenv("HDF5_PLUGIN_PATH")) + { + // For non-predefined IDs, the ID must be given as a number. This + // example uses the Blosc2 filter available from + // https://pypi.org/project/hdf5plugin/, + // with the permanent plugin ID 32026. + // Generic filters referenced by ID can be configured via the cd_values + // field. This field is an array of unsigned integers and + // plugin-specific interpretation. For the Blosc2 plugin, indexes 0, 1, + // 2 and 3 are reserved. index 4 is the compression level, index 5 is a + // boolean for activating shuffling and index 6 denotes the compression + // method. Compression method 5 is BLOSC_ZSTD. + std::string hdf5_blosc_filter = R"( + backend = "hdf5" + + [hdf5.dataset] + chunks = "auto" + + [hdf5.dataset.permanent_filters] + id = 32026 + flags = "mandatory" + cd_values = [0, 0, 0, 0, 4, 1, 5] + )"; + write("hdf5_blosc_filter.%E", hdf5_blosc_filter); + } +#endif // openPMD_HAVE_HDF5 +} diff --git a/examples/15_compression.py b/examples/15_compression.py new file mode 100644 index 0000000000..c7f8e0fe95 --- /dev/null +++ b/examples/15_compression.py @@ -0,0 +1,315 @@ +# Copyright 2025 Franz Poeschel +# +# This file is part of openPMD-api. +# +# openPMD-api is free software: you can redistribute it and/or modify +# it under the terms of of either the GNU General Public License or +# the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# openPMD-api is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License and the GNU Lesser General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# and the GNU Lesser General Public License along with openPMD-api. +# If not, see . +# + +import numpy as np +import openpmd_api as opmd + +try: + import hdf5plugin + + HAS_HDF5_PLUGIN = True +except ImportError: + HAS_HDF5_PLUGIN = False + + +def write(filename, config): + + series = opmd.Series( + f"../samples/compression_python/{filename}", + opmd.Access.create_linear, + config, + ) + + for i in range(10): + current_iteration = series.snapshots()[i] + + # First, write an E mesh. + E = current_iteration.meshes["E"] + E.axis_labels = ["x", "y"] + for dim in ["x", "y"]: + component = E[dim] + component.reset_dataset(opmd.Dataset(np.dtype("float"), [10, 10])) + component[:, :] = np.reshape( + np.arange(i * 100, (i + 1) * 100, dtype=np.dtype("float")), + [10, 10], + ) + + # Now, write some e particles. + e = current_iteration.particles["e"] + for dim in ["x", "y"]: + # Do not bother with a positionOffset + position_offset = e["positionOffset"][dim] + position_offset.reset_dataset(opmd.Dataset(np.dtype("int"), [100])) + position_offset.make_constant(0) + + position = e["position"][dim] + position.reset_dataset(opmd.Dataset(np.dtype("float"), [100])) + position[:] = np.arange( + i * 100, (i + 1) * 100, dtype=np.dtype("float") + ) + + +def main(): + + # We start with two examples for ADIOS2. + if "adios2" in opmd.variants and opmd.variants["adios2"]: + simple_adios2_config = { + # Backend can either be inferred from the filename ending, or + # specified explicitly. In the latter case, the filename ending can + # be given as a wildcard %E, openPMD will then pick a + # default ending. + "backend": "adios2", + "adios2": { + "dataset": { + # ADIOS2 supports adding multiple operators to a variable, + # hence we specify a list of operators here. + # How much sense this makes depends on the specific + # operators in use. + # If specifying only one operator, you can also replace the + # list by its only element as a shorthand + # (see next config example). + "operators": [ + { + "type": "bzip2", + "parameters": { + # The available parameters depend + # on the operator. + # Here, we specify bzip2's compression level. + "clevel": 9 + }, + } + ] + } + }, + } + write("adios2_with_bzip2.%E", simple_adios2_config) + + # The compression can also be specified per-dataset. + # For more details, also check: + # https://openpmd-api.readthedocs.io/en/latest/details/backendconfig.html#dataset-specific-configuration + + # This example will demonstrate the use of pattern matching. + # adios2.dataset is now a list of dataset configurations. The specific + # configuration to be used for a dataset will be determined by matching + # the dataset name against the patterns specified by the 'select' key. + # The actual configuration to be forwarded to the backend is stored + # under the 'cfg' key. + extended_adios2_config = { + "backend": "adios2", + "adios2": { + "dataset": [ + { + # This uses egrep-type regular expressions. + "select": "meshes/.*", + # Inside the cfg key, specify the actual config to + # be forwarded to the ADIOS2 dataset. + # So, specify the operators list again. + # Let's use Blosc for this. + "cfg": { + "operators": { + "parameters": { + "clevel": 1, + "doshuffle": "BLOSC_BITSHUFFLE", + }, + "type": "blosc", + } + }, + }, + # Now, configure the particles. + { + # The match can either be against the path within the + # containing Iteration (e.g. 'meshes/E/x', as above) + # or (as in this example), against the full path + # (e.g. '/data/0/particles/e/position/x'). In this + # example, completely deactivate compression + # specifically for 'particles/e/position/x'. + # All other particle datasets will fall back to + # the default configuration specified below. + # Be careful when specifying compression per-Iteration. + # While this syntax fundamentally allows doing that, + # compressions once specified on an ADIOS2 variable + # will not be removed again. Since variable-encoding + # reuses ADIOS2 variables from previous Iterations, + # the compression configuration of the first Iteration + # will leak into all subsequent Iterations. + "select": "/data/[0-9]*/particles/e/position/x", + "cfg": {"operators": []}, + }, + # Now, the default configuration. In general, the dataset + # configurations are matched top-down, going for + # the first matching configuration. So, a default + # configuration could theoretically be specified + # by emplacing a catch-all pattern (regex: ".*") as the + # last option. However, we also define an explicit s + # horthand for specifying default configurations: + # Just omit the 'select' key. This special syntax + # is understood as the default configuration no matter + # where in the list it is emplaced, and it allows + # the backends to initialize the default configuration + # globally, instead of applying it selectively + # to each dataset that matches a catch-all pattern. + { + "cfg": { + "operators": { + "parameters": {"clevel": 2}, + "type": "bzip2", + } + } + }, + ] + }, + } + write( + "adios2_with_dataset_specific_configurations.%E", + extended_adios2_config, + ) + + # Now, let's continue with HDF5. + # HDF5 supports compression via so-called filters. These can be permanent + # (applied to an entire dataset) and transient (applied to individual I/O + # operations). The openPMD-api currently supports permanent filters. Refer + # also to https://web.ics.purdue.edu/~aai/HDF5/html/Filters.html. + + # Filters are additionally distinguished by how tightly they integrate with + # HDF5. The most tightly-integrated filter is Zlib, which has its own API + # calls and hence also a special JSON/TOML configuration in openPMD: + if "hdf5" in opmd.variants and opmd.variants["hdf5"]: + hdf5_zlib_config = { + "backend": "hdf5", + "hdf5": { + "dataset": { + "chunks": "auto", + "permanent_filters": { + "type": "zlib", # mandatory parameter + "aggression": 5, # optional, defaults to 1 + } + } + }, + } + write("hdf5_zlib.%E", hdf5_zlib_config) + + # All other filters have a common API and are identified by global IDs + # registered with the HDF Group. More details can be found in the + # H5Zpublic.h header. That header predefines a small number + # of filter IDs. + # These are directly supported by the openPMD-api: deflate, shuffle, + # fletcher32, szip, nbit, scaleoffset. + hdf5_predefined_filter_ids = { + "backend": "hdf5", + "hdf5": { + "dataset": { + "chunks": "auto", + "permanent_filters": { + # mandatory parameter + "id": "fletcher32", + # optional parameter + "flags": "mandatory", + # optional parameter for filters identified by ID, + # mandatory only for zlib (see above) + "type": "by_id", + } + } + }, + } + write("hdf5_predefined_filter_id.%E", hdf5_predefined_filter_ids) + + # Just like ADIOS2 with their operations, also HDF5 supports adding + # multiple filters into a filter pipeline. The permanent_filters key + # can hence also be given as a list. + hdf5_filter_pipeline = { + "backend": "hdf5", + "hdf5": { + "dataset": { + "chunks": "auto", + "permanent_filters": [ + {"aggression": 5, "type": "zlib"}, + {"flags": "mandatory", "id": "shuffle"}, + ] + } + }, + } + write("hdf5_filter_pipeline.%E", hdf5_filter_pipeline) + + # Dataset-specific backend configuration works independently from the + # chosen backend and can hence also be used in HDF5. We will apply both + # zlib and a fletcher32 filter, one to the meshes and one + # to the particles. + extended_hdf5_config = { + "backend": "hdf5", + "hdf5": { + "dataset": [ + { + "select": "meshes/.*", + "cfg": { + "chunks": "auto", + "permanent_filters": { + "type": "zlib", + "aggression": 5, + } + }, + }, + { + "select": "particles/.*", + "cfg": { + "chunks": "auto", + "permanent_filters": { + "id": "fletcher32", + "flags": "mandatory", + } + }, + }, + ] + }, + } + write( + "hdf5_with_dataset_specific_configurations.%E", + extended_hdf5_config, + ) + + # For non-predefined IDs, the ID must be given as a number. This example + # uses the Blosc2 filter with the permanent plugin ID 32026, + # (defined in hdf5plugin.FILTERS["blosc2"]), available as part of Python's + # hdf5plugin package. Generic filters referenced by ID can be configured + # via the cd_values field. This field is an array of unsigned integers and + # plugin-specific interpretation. For the Blosc2 plugin, indexes 0, 1, 2 + # and 3 are reserved. index 4 is the compression level, index 5 is a + # boolean for activating shuffling and index 6 denotes + # the compression method. + + if "hdf5" in opmd.variants and opmd.variants["hdf5"] and HAS_HDF5_PLUGIN: + hdf5_blosc2_filter = { + "backend": "hdf5", + "hdf5": { + "dataset": { + "chunks": "auto", + "permanent_filters": { + "cd_values": [0, 0, 0, 0, 4, 1, 5], + "flags": "mandatory", + "id": hdf5plugin.FILTERS["blosc2"], + }, + } + }, + } + + write("hdf5_blosc_filter.%E", hdf5_blosc2_filter) + + +main() diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index 06aa172ea5..884a4b6341 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -314,6 +314,9 @@ void ADIOS2IOHandlerImpl::init( } } +namespace +{} + std::optional> ADIOS2IOHandlerImpl::getOperators(json::TracingJSON cfg) { @@ -328,18 +331,14 @@ ADIOS2IOHandlerImpl::getOperators(json::TracingJSON cfg) { return ret_t(); } - auto _operators = datasetConfig["operators"]; - nlohmann::json const &operators = _operators.json(); - for (auto operatorIterator = operators.begin(); - operatorIterator != operators.end(); - ++operatorIterator) - { - nlohmann::json const &op = operatorIterator.value(); - std::string const &type = op["type"]; + + auto parse_single_operator = [this](auto &op, auto &&json_accessor) + -> std::optional { + std::string const &type = *json_accessor(op["type"]); adios2::Params adiosParams; - if (op.contains("parameters")) + if (json_accessor(op)->contains("parameters")) { - nlohmann::json const ¶ms = op["parameters"]; + nlohmann::json const ¶ms = *json_accessor(op["parameters"]); for (auto paramIterator = params.begin(); paramIterator != params.end(); ++paramIterator) @@ -360,14 +359,45 @@ ADIOS2IOHandlerImpl::getOperators(json::TracingJSON cfg) } std::optional adiosOperator = getCompressionOperator(type); - if (adiosOperator) + if (!adiosOperator.has_value()) + { + return std::nullopt; + } + else + { + return ParameterizedOperator{ + *adiosOperator, std::move(adiosParams)}; + } + }; + + auto _operators = datasetConfig["operators"]; + nlohmann::json const &operators = _operators.json(); + if (operators.is_array()) + { + for (auto const &op : operators) + { + auto parsed_operator = + parse_single_operator(op, [](auto &j) { return &j; }); + if (parsed_operator) + { + res.emplace_back(std::move(*parsed_operator)); + } + } + _operators.declareFullyRead(); + } + else + { + auto parsed_operator = parse_single_operator( + _operators, [](auto &&j) { return &j.json(); }); + if (parsed_operator) + { + res.emplace_back(std::move(*parsed_operator)); + } + if (operators.contains("parameters")) { - res.emplace_back( - ParameterizedOperator{ - adiosOperator.value(), std::move(adiosParams)}); + _operators["parameters"].declareFullyRead(); } } - _operators.declareFullyRead(); return std::make_optional(std::move(res)); } diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index f09fd5ba4c..f75fe7a527 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -76,6 +76,27 @@ namespace openPMD } while (0) #endif +constexpr char const *const init_json_shadow_str = &R"( +{ + "dataset": { + "chunks": null, + "permanent_filters": null + }, + "independent_stores": null +})"[1]; +constexpr char const *dataset_cfg_mask = &R"( +{ + "dataset": { + "chunks": null, + "permanent_filters": null + } +} +)"[1]; +constexpr char const *const flush_cfg_mask = &R"( +{ + "independent_stores": null +})"[1]; + HDF5IOHandlerImpl::HDF5IOHandlerImpl( AbstractIOHandler *handler, bool do_warn_unused_params) : AbstractIOHandlerImpl(handler) @@ -151,23 +172,6 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl( m_config = config["hdf5"]; { - constexpr char const *const init_json_shadow_str = R"( - { - "dataset": { - "chunks": null - }, - "independent_stores": null - })"; - constexpr char const *const dataset_cfg_mask = R"( - { - "dataset": { - "chunks": null - } - })"; - constexpr char const *const flush_cfg_mask = R"( - { - "independent_stores": null - })"; m_global_dataset_config = m_config.json(); json::filterByTemplate( m_global_dataset_config, @@ -178,6 +182,7 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl( auto init_json_shadow = nlohmann::json::parse(init_json_shadow_str); json::merge_internal( m_config.getShadow(), init_json_shadow, /* do_prune = */ false); + m_config["dataset"]["permanent_filters"].declareFullyRead(); } // unused params @@ -466,74 +471,245 @@ void HDF5IOHandlerImpl::createPath( "creation"); } -void HDF5IOHandlerImpl::createDataset( - Writable *writable, Parameter const ¶meters) +namespace { - if (access::readOnly(m_handler->m_backendAccess)) - throw std::runtime_error( - "[HDF5] Creating a dataset in a file opened as read only is not " - "possible."); - - if (parameters.joinedDimension.has_value()) + using chunking_t = std::vector; + struct DatasetParams { - error::throwOperationUnsupportedInBackend( - "HDF5", "Joined Arrays currently only supported in ADIOS2"); + struct ByID + { + H5Z_filter_t id = 0; + unsigned int flags = 0; + std::vector cd_values; + }; + struct Zlib + { + unsigned aggression = 1; + }; + using filter_t = std::variant< + // generic + ByID, + // H5Pset_deflate + Zlib>; + + std::optional chunking; + bool resizable = false; + std::vector filters; + }; + + template + auto parse_filter_by_id(JSON &filter_config, Accessor &&json_accessor) + -> DatasetParams::ByID + { + DatasetParams::ByID byID; + if (!json_accessor(filter_config).contains("id")) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "id"}, + "Required key for selecting a filter by ID."); + } + byID.id = [&]() -> H5Z_filter_t { + auto const &id_config = json_accessor(filter_config["id"]); + using pair_t = std::pair; + std::array filter_types{ + pair_t{"deflate", H5Z_FILTER_DEFLATE}, + pair_t{"shuffle", H5Z_FILTER_SHUFFLE}, + pair_t{"fletcher32", H5Z_FILTER_FLETCHER32}, + pair_t{"szip", H5Z_FILTER_SZIP}, + pair_t{"nbit", H5Z_FILTER_NBIT}, + pair_t{"scaleoffset", H5Z_FILTER_SCALEOFFSET}}; + auto id_error = [&]() { + std::stringstream error; + error << "Must be either of unsigned integer type or one of:"; + for (auto const &pair : filter_types) + { + error << " '" << pair.first << "'"; + } + error << "."; + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "id"}, + error.str()); + }; + if (id_config.is_number_integer()) + { + return id_config.template get(); + } + auto maybe_string = json::asLowerCaseStringDynamic(id_config); + if (!maybe_string.has_value()) + { + throw id_error(); + } + for (auto const &[key, res_type] : filter_types) + { + if (*maybe_string == key) + { + return res_type; + } + } + throw id_error(); + }(); + byID.flags = [&]() -> unsigned int { + if (!json_accessor(filter_config).contains("flags")) + { + return 0; + } + auto const &flag_config = json_accessor(filter_config["flags"]); + using pair_t = std::pair; + std::array filter_types{ + pair_t{"optional", H5Z_FLAG_OPTIONAL}, + pair_t{"mandatory", H5Z_FLAG_MANDATORY}}; + auto flag_error = [&]() { + std::stringstream error; + error << "Must be either of unsigned integer type or one of:"; + for (auto const &pair : filter_types) + { + error << " '" << pair.first << "'"; + } + error << "."; + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "flags"}, + error.str()); + }; + if (flag_config.is_number_integer()) + { + return flag_config.template get(); + } + auto maybe_string = json::asLowerCaseStringDynamic(flag_config); + if (!maybe_string.has_value()) + { + throw flag_error(); + } + for (auto const &[key, res_type] : filter_types) + { + if (*maybe_string == key) + { + return res_type; + } + } + throw flag_error(); + }(); + if (json_accessor(filter_config).contains("cd_values")) + { + auto const &cd_values_config = + json_accessor(filter_config["cd_values"]); + try + { + + byID.cd_values = + cd_values_config.template get>(); + } + catch (nlohmann::json::type_error const &) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "cd_values"}, + "Must be an array of unsigned integers."); + } + } + return byID; } - if (!writable->written) + template + auto parse_filter_zlib(JSON &filter_config, Accessor &&json_accessor) + -> DatasetParams::Zlib { - /* Sanitize name */ - std::string name = parameters.name; - if (auxiliary::starts_with(name, '/')) - name = auxiliary::replace_first(name, "/", ""); - if (auxiliary::ends_with(name, '/')) - name = auxiliary::replace_last(name, "/", ""); - - std::vector dims; - std::uint64_t num_elements = 1u; - for (auto const &val : parameters.extent) + DatasetParams::Zlib zlib; + if (json_accessor(filter_config).contains("aggression")) { - dims.push_back(static_cast(val)); - num_elements *= val; + auto const &aggression_config = + json_accessor(filter_config["aggression"]); + if (!aggression_config.is_number_integer()) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "aggression"}, + "Must be of unsigned integer type."); + } + zlib.aggression = aggression_config.template get(); } + return zlib; + } - Datatype d = parameters.dtype; - if (d == Datatype::UNDEFINED) + template + auto parse_filter(JSON &filter_config, Accessor &&json_accessor) + -> DatasetParams::filter_t + { + auto filter_error = []() { + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters"}, + "Must be either a JSON object or a vector of JSON objects."); + }; + if (!json_accessor(filter_config).is_object()) { - // TODO handle unknown dtype - std::cerr << "[HDF5] Datatype::UNDEFINED caught during dataset " - "creation (serial HDF5)" - << std::endl; - d = Datatype::BOOL; + throw filter_error(); } - json::TracingJSON config = [&]() { - auto parsed_config = - parameters.compileJSONConfig( - writable, *m_handler->jsonMatcher, "hdf5"); - if (auto hdf5_config_it = parsed_config.config.find("hdf5"); - hdf5_config_it != parsed_config.config.end()) + enum class filter_type : uint8_t + { + ByID, + Zlib + }; + + filter_type type = [&]() -> filter_type { + if (json_accessor(filter_config).contains("type")) { - auto copy = m_global_dataset_config; - json::merge_internal( - copy, hdf5_config_it.value(), /* do_prune = */ true); - hdf5_config_it.value() = std::move(copy); + auto res = json::asLowerCaseStringDynamic( + json_accessor(filter_config["type"])); + if (!res.has_value()) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "type"}, + "Must be of type string."); + } + using pair_t = std::pair; + std::array filter_types{ + pair_t{"by_id", filter_type::ByID}, + pair_t{"zlib", filter_type::Zlib}}; + for (auto const &[key, res_type] : filter_types) + { + if (*res == key) + { + return res_type; + } + } + std::stringstream error; + error << "Must be one of:"; + for (auto const &pair : filter_types) + { + error << " '" << pair.first << "'"; + } + error << "."; + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "type"}, + error.str()); } else { - parsed_config.config["hdf5"] = m_global_dataset_config; + return filter_type::ByID; } - return parsed_config; }(); + switch (type) + { + case filter_type::ByID: + return parse_filter_by_id(filter_config, json_accessor); + case filter_type::Zlib: + return parse_filter_zlib(filter_config, json_accessor); + } + throw std::runtime_error("Unreachable!"); + } + + auto parse_dataset_config( + json::TracingJSON &config, + std::vector const &dims, + Datatype const d) -> DatasetParams + { + DatasetParams res; + // general - bool is_resizable_dataset = false; if (config.json().contains("resizable")) { - is_resizable_dataset = config["resizable"].json().get(); + res.resizable = config["resizable"].json().get(); } - using chunking_t = std::vector; using compute_chunking_t = std::variant; @@ -590,8 +766,33 @@ void HDF5IOHandlerImpl::createDataset( throw_chunking_error(); } } + + if (datasetConfig.json().contains("permanent_filters")) + { + auto permanent_filters = datasetConfig["permanent_filters"]; + if (permanent_filters.json().is_array()) + { + permanent_filters.declareFullyRead(); + res.filters.reserve(permanent_filters.json().size()); + for (auto const &entry : permanent_filters.json()) + { + res.filters.push_back(parse_filter( + entry, [](auto const &j) -> nlohmann::json const & { + return j; + })); + } + } + else + { + res.filters = {parse_filter( + permanent_filters, + [](auto &&j) -> nlohmann::json const & { + return j.json(); + })}; + } + } } - std::optional chunking = std::visit( + res.chunking = std::visit( auxiliary::overloaded{ [&](chunking_t &&explicitly_specified) -> std::optional { @@ -616,6 +817,73 @@ void HDF5IOHandlerImpl::createDataset( }}, std::move(compute_chunking)); + return res; + } +} // namespace + +void HDF5IOHandlerImpl::createDataset( + Writable *writable, Parameter const ¶meters) +{ + if (access::readOnly(m_handler->m_backendAccess)) + throw std::runtime_error( + "[HDF5] Creating a dataset in a file opened as read only is not " + "possible."); + + if (parameters.joinedDimension.has_value()) + { + error::throwOperationUnsupportedInBackend( + "HDF5", "Joined Arrays currently only supported in ADIOS2"); + } + + if (!writable->written) + { + /* Sanitize name */ + std::string name = parameters.name; + if (auxiliary::starts_with(name, '/')) + name = auxiliary::replace_first(name, "/", ""); + if (auxiliary::ends_with(name, '/')) + name = auxiliary::replace_last(name, "/", ""); + + std::vector dims; + std::uint64_t num_elements = 1u; + for (auto const &val : parameters.extent) + { + dims.push_back(static_cast(val)); + num_elements *= val; + } + + Datatype d = parameters.dtype; + if (d == Datatype::UNDEFINED) + { + // TODO handle unknown dtype + std::cerr << "[HDF5] Datatype::UNDEFINED caught during dataset " + "creation (serial HDF5)" + << std::endl; + d = Datatype::BOOL; + } + + json::TracingJSON config = [&]() { + auto parsed_config = + parameters.compileJSONConfig( + writable, *m_handler->jsonMatcher, "hdf5"); + if (auto hdf5_config_it = parsed_config.config.find("hdf5"); + hdf5_config_it != parsed_config.config.end()) + { + auto copy = m_global_dataset_config; + json::merge_internal( + copy, hdf5_config_it.value(), /* do_prune = */ true); + hdf5_config_it.value() = std::move(copy); + } + else + { + parsed_config.config["hdf5"] = m_global_dataset_config; + } + return parsed_config; + }(); + + auto [chunking, is_resizable_dataset, filters] = + parse_dataset_config(config, dims, d); + parameters.warnUnusedParameters( config, "hdf5", @@ -700,25 +968,27 @@ void HDF5IOHandlerImpl::createDataset( { if (chunking->size() != parameters.extent.size()) { - std::string chunking_printed = [&]() { - if (chunking->empty()) - { - return std::string("[]"); - } - else - { - std::stringstream s; - auto it = chunking->begin(); - auto end = chunking->end(); - s << '[' << *it++; - for (; it != end; ++it) + // captured structured bindings are a C++20 extension + std::string chunking_printed = + [&, &captured_chunking = chunking]() { + if (captured_chunking->empty()) { - s << ", " << *it; + return std::string("[]"); } - s << ']'; - return s.str(); - } - }(); + else + { + std::stringstream s; + auto it = captured_chunking->begin(); + auto end = captured_chunking->end(); + s << '[' << *it++; + for (; it != end; ++it) + { + s << ", " << *it; + } + s << ']'; + return s.str(); + } + }(); std::cerr << "[HDF5] Chunking for dataset '" << name << "' was specified as " << chunking_printed << ", but dataset has dimensionality " @@ -738,32 +1008,28 @@ void HDF5IOHandlerImpl::createDataset( } } - std::string const &compression = ""; // @todo read from JSON - if (!compression.empty()) - std::cerr - << "[HDF5] Compression not yet implemented in HDF5 backend." - << std::endl; - /* - { - std::vector< std::string > args = auxiliary::split(compression, - ":"); std::string const& format = args[0]; if( (format == "zlib" || - format == "gzip" || format == "deflate") - && args.size() == 2 ) - { - status = H5Pset_deflate(datasetCreationProperty, - std::stoi(args[1])); VERIFY(status == 0, "[HDF5] Internal error: Failed - to set deflate compression during dataset creation"); } else if( format - == "szip" || format == "nbit" || format == "scaleoffset" ) std::cerr << - "[HDF5] Compression format " << format - << " not yet implemented. Data will not be - compressed!" - << std::endl; - else - std::cerr << "[HDF5] Compression format " << format - << " unknown. Data will not be compressed!" - << std::endl; + for (auto const &filter : filters) + { + herr_t status = std::visit( + auxiliary::overloaded{ + [&](DatasetParams::ByID const &by_id) { + return H5Pset_filter( + datasetCreationProperty, + by_id.id, + by_id.flags, + by_id.cd_values.size(), + by_id.cd_values.data()); + }, + [&](DatasetParams::Zlib const &zlib) { + return H5Pset_deflate( + datasetCreationProperty, zlib.aggression); + }}, + filter); + VERIFY( + status == 0, + "[HDF5] Internal error: Failed to set filter during dataset " + "creation"); } - */ GetH5DataType getH5DataType({ {typeid(bool).name(), m_H5T_BOOL_ENUM}, diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 3a14187a93..64d41adadb 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -4991,7 +4991,8 @@ unused = "as well" BufferGrowthFactor = "2.0" Profile = "On" -[[adios2.dataset.operators]] +# single brackets, because an operator may also be given as a single object +[adios2.dataset.operators] type = "blosc" parameters.clevel = "1" parameters.doshuffle = "BLOSC_BITSHUFFLE" @@ -5022,7 +5023,7 @@ unused = "dataset parameter" [adios2.dataset] unused = "too" -[[adios2.dataset.operators]] +[adios2.dataset.operators] type = "blosc" [adios2.dataset.operators.parameters] clevel = 3 @@ -6225,11 +6226,9 @@ TEST_CASE("automatically_deactivate_span", "[serial][adios2]") { "adios2": { "dataset": { - "operators": [ - { - "type": "bzip2" - } - ] + "operators": { + "type": "bzip2" + } } } })END"; @@ -6276,11 +6275,9 @@ TEST_CASE("automatically_deactivate_span", "[serial][adios2]") { "adios2": { "dataset": { - "operators": [ - { - "type": "bzip2" - } - ] + "operators": { + "type": "bzip2" + } } } })END"; @@ -6341,11 +6338,9 @@ TEST_CASE("automatically_deactivate_span", "[serial][adios2]") { "adios2": { "dataset": { - "operators": [ - { - "type": "bzip2" - } - ] + "operators": { + "type": "bzip2" + } } } })END";