From 41d1fedb3c648943a78a03fb4629cdf160b4a448 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 21 Feb 2022 12:01:37 +0100 Subject: [PATCH 01/14] error::Internal --- include/openPMD/Error.hpp | 12 ++++++++++++ src/Error.cpp | 4 ++++ src/binding/python/Error.cpp | 1 + 3 files changed, 17 insertions(+) diff --git a/include/openPMD/Error.hpp b/include/openPMD/Error.hpp index 4fcda11eeb..e9267a07a3 100644 --- a/include/openPMD/Error.hpp +++ b/include/openPMD/Error.hpp @@ -69,5 +69,17 @@ namespace error BackendConfigSchema(std::vector, std::string what); }; + + /** + * @brief Internal errors that should not happen. Please report. + * + * Example: A nullpointer is observed somewhere. + */ + + class Internal : public Error + { + public: + Internal(std::string const &what); + }; } // namespace error } // namespace openPMD diff --git a/src/Error.cpp b/src/Error.cpp index aa6723e63f..eb333c502f 100644 --- a/src/Error.cpp +++ b/src/Error.cpp @@ -45,5 +45,9 @@ namespace error concatVector(errorLocation_in) + "': " + std::move(what)) , errorLocation(std::move(errorLocation_in)) {} + + Internal::Internal(std::string const &what) + : Error("Internal error: " + what + "\nThis is a bug. Please report.") + {} } // namespace error } // namespace openPMD diff --git a/src/binding/python/Error.cpp b/src/binding/python/Error.cpp index ea5712296b..3b81ebe923 100644 --- a/src/binding/python/Error.cpp +++ b/src/binding/python/Error.cpp @@ -14,6 +14,7 @@ void init_Error(py::module &m) m, "ErrorWrongAPIUsage", baseError); py::register_exception( m, "ErrorBackendConfigSchema", baseError); + py::register_exception(m, "ErrorInternal", baseError); #ifndef NDEBUG m.def("test_throw", [](std::string description) { From 0b0524d40b0a766347326d79c4391facb5fa3073 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 21 Feb 2022 12:01:38 +0100 Subject: [PATCH 02/14] Frontend implementation of appending Also use switch statement in many frontend places for the Access enum. With 4 variants, if statements become increasingly unreadable and it's good to have compiler warnings if a case is forgotten. Try to avoid default branches. Also refactor `autoDetectPadding()` into a separate function since it is now needed in two places. --- include/openPMD/IO/AbstractIOHandler.hpp | 18 +++ include/openPMD/IO/Access.hpp | 3 +- src/Iteration.cpp | 98 ++++++++------ src/Mesh.cpp | 11 +- src/ParticleSpecies.cpp | 11 +- src/Record.cpp | 11 +- src/RecordComponent.cpp | 12 +- src/Series.cpp | 161 ++++++++++++++++++----- src/backend/PatchRecordComponent.cpp | 11 +- 9 files changed, 247 insertions(+), 89 deletions(-) diff --git a/include/openPMD/IO/AbstractIOHandler.hpp b/include/openPMD/IO/AbstractIOHandler.hpp index a7239a3375..80616e62bc 100644 --- a/include/openPMD/IO/AbstractIOHandler.hpp +++ b/include/openPMD/IO/AbstractIOHandler.hpp @@ -121,6 +121,23 @@ namespace internal */ class AbstractIOHandler { + friend class Series; + +private: + void setIterationEncoding(IterationEncoding encoding) + { + /* + * In file-based iteration encoding, the APPEND mode is handled entirely + * by the frontend, the backend should just treat it as CREATE mode + */ + if (encoding == IterationEncoding::fileBased && + m_backendAccess == Access::APPEND) + { + // do we really want to have those as const members..? + *const_cast(&m_backendAccess) = Access::CREATE; + } + } + public: #if openPMD_HAVE_MPI AbstractIOHandler(std::string path, Access at, MPI_Comm) @@ -153,6 +170,7 @@ class AbstractIOHandler virtual std::string backendName() const = 0; std::string const directory; + // why do these need to be separate? Access const m_backendAccess; Access const m_frontendAccess; std::queue m_work; diff --git a/include/openPMD/IO/Access.hpp b/include/openPMD/IO/Access.hpp index 93ba662241..2b5d37f260 100644 --- a/include/openPMD/IO/Access.hpp +++ b/include/openPMD/IO/Access.hpp @@ -28,7 +28,8 @@ enum class Access { READ_ONLY, //!< open series as read-only, fails if series is not found READ_WRITE, //!< open existing series as writable - CREATE //!< create new series and truncate existing (files) + CREATE, //!< create new series and truncate existing (files) + APPEND //!< write new iterations to an existing series without reading }; // Access // deprecated name (used prior to 0.12.0) diff --git a/src/Iteration.cpp b/src/Iteration.cpp index 8991f0ebad..91a97ea030 100644 --- a/src/Iteration.cpp +++ b/src/Iteration.cpp @@ -298,15 +298,18 @@ void Iteration::flushVariableBased( void Iteration::flush(internal::FlushParams const &flushParams) { - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: { for (auto &m : meshes) m.second.flush(m.first, flushParams); for (auto &species : particles) species.second.flush(species.first, flushParams); + break; } - else - { + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { /* Find the root point [Series] of this file, * meshesPath and particlesPath are stored there */ Series s = retrieveSeries(); @@ -344,6 +347,8 @@ void Iteration::flush(internal::FlushParams const &flushParams) } flushAttributes(flushParams); + break; + } } } @@ -591,15 +596,26 @@ AdvanceStatus Iteration::beginStep(bool reread) (this->IOHandler()->m_frontendAccess == Access::READ_ONLY || this->IOHandler()->m_frontendAccess == Access::READ_WRITE)) { - bool previous = series.iterations.written(); - series.iterations.written() = false; - auto oldType = this->IOHandler()->m_frontendAccess; - auto newType = - const_cast(&this->IOHandler()->m_frontendAccess); - *newType = Access::READ_WRITE; - series.readGorVBased(false); - *newType = oldType; - series.iterations.written() = previous; + switch (IOHandler()->m_frontendAccess) + { + case Access::READ_ONLY: + case Access::READ_WRITE: { + bool previous = series.iterations.written(); + series.iterations.written() = false; + auto oldType = this->IOHandler()->m_frontendAccess; + auto newType = + const_cast(&this->IOHandler()->m_frontendAccess); + *newType = Access::READ_WRITE; + series.readGorVBased(false); + *newType = oldType; + series.iterations.written() = previous; + break; + } + case Access::CREATE: + case Access::APPEND: + // no re-reading necessary + break; + } } return status; @@ -696,42 +712,48 @@ void Iteration::linkHierarchy(Writable &w) void Iteration::runDeferredParseAccess() { - if (IOHandler()->m_frontendAccess == Access::CREATE) + switch (IOHandler()->m_frontendAccess) { - return; - } - - auto &it = get(); - if (!it.m_deferredParseAccess.has_value()) - { - return; - } - auto const &deferred = it.m_deferredParseAccess.value(); + case Access::READ_ONLY: + case Access::READ_WRITE: { + auto &it = get(); + if (!it.m_deferredParseAccess.has_value()) + { + return; + } + auto const &deferred = it.m_deferredParseAccess.value(); - auto oldAccess = IOHandler()->m_frontendAccess; - auto newAccess = const_cast(&IOHandler()->m_frontendAccess); - *newAccess = Access::READ_WRITE; - try - { - if (deferred.fileBased) + auto oldAccess = IOHandler()->m_frontendAccess; + auto newAccess = const_cast(&IOHandler()->m_frontendAccess); + *newAccess = Access::READ_WRITE; + try { - readFileBased(deferred.filename, deferred.path, deferred.beginStep); + if (deferred.fileBased) + { + readFileBased( + deferred.filename, deferred.path, deferred.beginStep); + } + else + { + readGorVBased(deferred.path, deferred.beginStep); + } } - else + catch (...) { - readGorVBased(deferred.path, deferred.beginStep); + // reset this thing + it.m_deferredParseAccess = std::optional(); + *newAccess = oldAccess; } - } - catch (...) - { // reset this thing it.m_deferredParseAccess = std::optional(); *newAccess = oldAccess; - throw; + break; + } + case Access::CREATE: + case Access::APPEND: + // no parsing in those modes + return; } - // reset this thing - it.m_deferredParseAccess = std::optional(); - *newAccess = oldAccess; } template float Iteration::time() const; diff --git a/src/Mesh.cpp b/src/Mesh.cpp index 761a9a304a..6f9b891635 100644 --- a/src/Mesh.cpp +++ b/src/Mesh.cpp @@ -216,13 +216,16 @@ template Mesh &Mesh::setTimeOffset(float); void Mesh::flush_impl( std::string const &name, internal::FlushParams const &flushParams) { - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: { for (auto &comp : *this) comp.second.flush(comp.first, flushParams); + break; } - else - { + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { if (!written()) { if (scalar()) @@ -260,6 +263,8 @@ void Mesh::flush_impl( } flushAttributes(flushParams); + break; + } } } diff --git a/src/ParticleSpecies.cpp b/src/ParticleSpecies.cpp index fdc4fa6f88..24ebadadec 100644 --- a/src/ParticleSpecies.cpp +++ b/src/ParticleSpecies.cpp @@ -141,15 +141,18 @@ namespace void ParticleSpecies::flush( std::string const &path, internal::FlushParams const &flushParams) { - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: { for (auto &record : *this) record.second.flush(record.first, flushParams); for (auto &patch : particlePatches) patch.second.flush(patch.first, flushParams); + break; } - else - { + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { auto it = find("position"); if (it != end()) it->second.setUnitDimension({{UnitDimension::L, 1}}); @@ -168,6 +171,8 @@ void ParticleSpecies::flush( for (auto &patch : particlePatches) patch.second.flush(patch.first, flushParams); } + break; + } } } diff --git a/src/Record.cpp b/src/Record.cpp index 90b0c61a91..bbd74ed0f5 100644 --- a/src/Record.cpp +++ b/src/Record.cpp @@ -46,13 +46,16 @@ Record &Record::setUnitDimension(std::map const &udim) void Record::flush_impl( std::string const &name, internal::FlushParams const &flushParams) { - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: { for (auto &comp : *this) comp.second.flush(comp.first, flushParams); + break; } - else - { + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { if (!written()) { if (scalar()) @@ -90,6 +93,8 @@ void Record::flush_impl( } flushAttributes(flushParams); + break; + } } } diff --git a/src/RecordComponent.cpp b/src/RecordComponent.cpp index c69c09e997..9e7a93e6ea 100644 --- a/src/RecordComponent.cpp +++ b/src/RecordComponent.cpp @@ -200,16 +200,18 @@ void RecordComponent::flush( rc.m_name = name; return; } - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: while (!rc.m_chunks.empty()) { IOHandler()->enqueue(rc.m_chunks.front()); rc.m_chunks.pop(); } - } - else - { + break; + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { /* * This catches when a user forgets to use resetDataset. */ @@ -275,6 +277,8 @@ void RecordComponent::flush( } flushAttributes(flushParams); + break; + } } } diff --git a/src/Series.cpp b/src/Series.cpp index 6adeefad55..36a7bd90d6 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -301,6 +301,7 @@ Series &Series::setIterationEncoding(IterationEncoding ie) setAttribute("iterationEncoding", std::string("variableBased")); break; } + IOHandler()->setIterationEncoding(ie); return *this; } @@ -469,6 +470,56 @@ bool Series::reparseExpansionPattern(std::string filenameWithExtension) return true; } +namespace +{ + /* + * Negative return values: + * -1: No padding detected, just keep the default from the file name + * -2: Contradicting paddings detected + */ + template + int autoDetectPadding( + std::function isPartOfSeries, + std::string const &directory, + MappingFunction &&mappingFunction) + { + bool isContained; + int padding; + uint64_t iterationIndex; + std::set paddings; + for (auto const &entry : auxiliary::list_directory(directory)) + { + std::tie(isContained, padding, iterationIndex) = + isPartOfSeries(entry); + if (isContained) + { + paddings.insert(padding); + // no std::forward as this is called repeatedly + mappingFunction(iterationIndex, entry); + } + } + if (paddings.size() == 1u) + return *paddings.begin(); + else if (paddings.empty()) + return -1; + else + return -2; + } + + int autoDetectPadding( + std::function isPartOfSeries, + std::string const &directory) + { + return autoDetectPadding( + std::move(isPartOfSeries), + directory, + [](uint64_t index, std::string const &filename) { + (void)index; + (void)filename; + }); + } +} // namespace + void Series::init( std::shared_ptr ioHandler, std::unique_ptr input) @@ -501,9 +552,10 @@ Given file pattern: ')END" << series.m_name << "'" << std::endl; } - if (IOHandler()->m_frontendAccess == Access::READ_ONLY || - IOHandler()->m_frontendAccess == Access::READ_WRITE) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: + case Access::READ_WRITE: { /* Allow creation of values in Containers and setting of Attributes * Would throw for Access::READ_ONLY */ auto oldType = IOHandler()->m_frontendAccess; @@ -528,11 +580,43 @@ Given file pattern: ')END" } *newType = oldType; + break; } - else - { + case Access::CREATE: { + initDefaults(input->iterationEncoding); + setIterationEncoding(input->iterationEncoding); + break; + } + case Access::APPEND: { initDefaults(input->iterationEncoding); setIterationEncoding(input->iterationEncoding); + if (input->iterationEncoding != IterationEncoding::fileBased) + { + break; + } + int padding = autoDetectPadding( + matcher( + series.m_filenamePrefix, + series.m_filenamePadding, + series.m_filenamePostfix, + series.m_format), + IOHandler()->directory); + switch (padding) + { + case -2: + throw std::runtime_error( + "Cannot write to a series with inconsistent iteration padding. " + "Please specify '%0T' or open as read-only."); + case -1: + std::cerr << "No matching iterations found: " << name() + << std::endl; + break; + default: + series.m_filenamePadding = padding; + break; + } + break; + } } series.m_lastFlushSuccessful = true; } @@ -609,7 +693,9 @@ void Series::flushFileBased( throw std::runtime_error( "fileBased output can not be written with no iterations."); - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) + { + case Access::READ_ONLY: for (auto it = begin; it != end; ++it) { // Phase 1 @@ -639,8 +725,10 @@ void Series::flushFileBased( IOHandler()->flush(flushParams); } } - else - { + break; + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { bool allDirty = dirty(); for (auto it = begin; it != end; ++it) { @@ -692,6 +780,8 @@ void Series::flushFileBased( dirty() = allDirty; } dirty() = false; + break; + } } } @@ -702,7 +792,9 @@ void Series::flushGorVBased( bool flushIOHandler) { auto &series = get(); - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) + { + case Access::READ_ONLY: for (auto it = begin; it != end; ++it) { // Phase 1 @@ -731,8 +823,10 @@ void Series::flushGorVBased( IOHandler()->flush(flushParams); } } - else - { + break; + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { if (!written()) { Parameter fCreate; @@ -788,6 +882,8 @@ void Series::flushGorVBased( { IOHandler()->flush(flushParams); } + break; + } } } @@ -827,23 +923,15 @@ void Series::readFileBased() series.m_filenamePadding, series.m_filenamePostfix, series.m_format); - bool isContained; - int padding; - uint64_t iterationIndex; - std::set paddings; - for (auto const &entry : auxiliary::list_directory(IOHandler()->directory)) - { - std::tie(isContained, padding, iterationIndex) = isPartOfSeries(entry); - if (isContained) - { - Iteration &i = series.iterations[iterationIndex]; - i.deferParseAccess( - {std::to_string(iterationIndex), iterationIndex, true, entry}); - // TODO skip if the padding is exact the number of chars in an - // iteration? - paddings.insert(padding); - } - } + + int padding = autoDetectPadding( + std::move(isPartOfSeries), + IOHandler()->directory, + // foreach found file with `filename` and `index`: + [&series](uint64_t index, std::string const &filename) { + Iteration &i = series.iterations[index]; + i.deferParseAccess({std::to_string(index), index, true, filename}); + }); if (series.iterations.empty()) { @@ -885,14 +973,15 @@ void Series::readFileBased() } } - if (paddings.size() == 1u) - series.m_filenamePadding = *paddings.begin(); + if (padding > 0) + series.m_filenamePadding = padding; - /* Frontend access type might change during Series::read() to allow - * parameter modification. Backend access type stays unchanged for the - * lifetime of a Series. */ - if (paddings.size() > 1u && - IOHandler()->m_backendAccess == Access::READ_WRITE) + /* Frontend access type might change during SeriesInterface::read() to allow + parameter modification. + * Backend access type stays unchanged for the lifetime of a Series. + autoDetectPadding() announces contradicting paddings with return status + -2. */ + if (padding == -2 && IOHandler()->m_backendAccess == Access::READ_WRITE) throw std::runtime_error( "Cannot write to a series with inconsistent iteration padding. " "Please specify '%0T' or open as read-only."); @@ -1639,6 +1728,10 @@ namespace internal Series impl{{this, [](auto const *) {}}}; impl.flush(); } + if (m_writeIterations.has_value()) + { + m_writeIterations = std::optional(); + } } catch (std::exception const &ex) { diff --git a/src/backend/PatchRecordComponent.cpp b/src/backend/PatchRecordComponent.cpp index ecc44625a2..a5178a9911 100644 --- a/src/backend/PatchRecordComponent.cpp +++ b/src/backend/PatchRecordComponent.cpp @@ -84,16 +84,19 @@ void PatchRecordComponent::flush( std::string const &name, internal::FlushParams const &flushParams) { auto &rc = get(); - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: { while (!rc.m_chunks.empty()) { IOHandler()->enqueue(rc.m_chunks.front()); rc.m_chunks.pop(); } + break; } - else - { + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { if (!written()) { Parameter dCreate; @@ -111,6 +114,8 @@ void PatchRecordComponent::flush( } flushAttributes(flushParams); + break; + } } } From d9067758ab8aaaecda17ad056bc931d227ffbf48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 21 Feb 2022 12:01:39 +0100 Subject: [PATCH 03/14] Backend implementation 1) Make backends aware of Append mode 2) In ADIOS1, fix use of namespaces, only use #include statements outside of namespaces --- src/IO/ADIOS/ADIOS2IOHandler.cpp | 23 ++++--------- src/IO/ADIOS/CommonADIOS1IOHandler.cpp | 10 ++++++ src/IO/HDF5/HDF5IOHandler.cpp | 46 ++++++++++++++++++++++---- src/IO/JSON/JSONIOHandlerImpl.cpp | 20 +++++++++-- 4 files changed, 74 insertions(+), 25 deletions(-) diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index 3d55598f25..072c5cd285 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -315,9 +315,6 @@ void ADIOS2IOHandlerImpl::createFile( m_iterationEncoding = parameters.encoding; associateWithFile(writable, shared_name); this->m_dirty.emplace(shared_name); - getFileData(shared_name, IfFileNotOpen::OpenImplicitly).m_mode = - adios2::Mode::Write; // WORKAROUND - // ADIOS2 does not yet implement ReadWrite Mode writable->written = true; writable->abstractFilePosition = std::make_shared(); @@ -1074,21 +1071,16 @@ adios2::Mode ADIOS2IOHandlerImpl::adios2AccessMode(std::string const &fullPath) if (auxiliary::directory_exists(fullPath) || auxiliary::file_exists(fullPath)) { - std::cerr << "ADIOS2 does currently not yet implement ReadWrite " - "(Append) mode. " - << "Replacing with Read mode." << std::endl; return adios2::Mode::Read; } else { - std::cerr << "ADIOS2 does currently not yet implement ReadWrite " - "(Append) mode. " - << "Replacing with Write mode." << std::endl; return adios2::Mode::Write; } - default: - return adios2::Mode::Undefined; + case Access::APPEND: + return adios2::Mode::Append; } + throw std::runtime_error("Unreachable!"); } json::TracingJSON ADIOS2IOHandlerImpl::nullvalue = { @@ -2235,6 +2227,7 @@ namespace detail delayOpeningTheFirstStep = true; break; case adios2::Mode::Write: + case adios2::Mode::Append: /* * File engines, write mode: * Default for old layout is no steps. @@ -2442,6 +2435,7 @@ namespace detail { switch (m_mode) { + case adios2::Mode::Append: case adios2::Mode::Write: { // usesSteps attribute only written upon ::advance() // this makes sure that the attribute is only put in case @@ -2686,17 +2680,14 @@ namespace detail switch (ba.m_mode) { case adios2::Mode::Write: + case adios2::Mode::Append: eng.PerformPuts(); break; case adios2::Mode::Read: eng.PerformGets(); break; - case adios2::Mode::Append: - // TODO order? - eng.PerformGets(); - eng.PerformPuts(); - break; default: + throw error::Internal("[ADIOS2] Unexpected access mode."); break; } }, diff --git a/src/IO/ADIOS/CommonADIOS1IOHandler.cpp b/src/IO/ADIOS/CommonADIOS1IOHandler.cpp index e50207a74d..5e30ecabb3 100644 --- a/src/IO/ADIOS/CommonADIOS1IOHandler.cpp +++ b/src/IO/ADIOS/CommonADIOS1IOHandler.cpp @@ -20,6 +20,7 @@ */ #include "openPMD/IO/ADIOS/CommonADIOS1IOHandler.hpp" +#include "openPMD/Error.hpp" #if openPMD_HAVE_ADIOS1 @@ -406,6 +407,15 @@ void CommonADIOS1IOHandlerImpl::createFile( if (!auxiliary::ends_with(name, ".bp")) name += ".bp"; + if (m_handler->m_backendAccess == Access::APPEND && + auxiliary::file_exists(name)) + { + throw error::OperationUnsupportedInBackend( + "ADIOS1", + "Appending to existing file on disk (use Access::CREATE to " + "overwrite)"); + } + writable->written = true; writable->abstractFilePosition = std::make_shared("/"); diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index d0725395f7..86f79ea391 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -237,13 +237,40 @@ void HDF5IOHandlerImpl::createFile( std::string name = m_handler->directory + parameters.name; if (!auxiliary::ends_with(name, ".h5")) name += ".h5"; - unsigned flags; - if (m_handler->m_backendAccess == Access::CREATE) + unsigned flags{}; + switch (m_handler->m_backendAccess) + { + case Access::CREATE: flags = H5F_ACC_TRUNC; - else + break; + case Access::APPEND: + if (auxiliary::file_exists(name)) + { + flags = H5F_ACC_RDWR; + } + else + { + flags = H5F_ACC_TRUNC; + } + break; + case Access::READ_WRITE: flags = H5F_ACC_EXCL; - hid_t id = - H5Fcreate(name.c_str(), flags, H5P_DEFAULT, m_fileAccessProperty); + break; + case Access::READ_ONLY: + // condition has been checked above + throw std::runtime_error("Control flow error"); + } + + hid_t id{}; + if (flags == H5F_ACC_RDWR) + { + id = H5Fopen(name.c_str(), flags, m_fileAccessProperty); + } + else + { + id = H5Fcreate( + name.c_str(), flags, H5P_DEFAULT, m_fileAccessProperty); + } VERIFY(id >= 0, "[HDF5] Internal error: Failed to create HDF5 file"); writable->written = true; @@ -702,7 +729,14 @@ void HDF5IOHandlerImpl::openFile( Access at = m_handler->m_backendAccess; if (at == Access::READ_ONLY) flags = H5F_ACC_RDONLY; - else if (at == Access::READ_WRITE || at == Access::CREATE) + /* + * Within the HDF5 backend, APPEND and READ_WRITE mode are + * equivalent, but the openPMD frontend exposes no reading + * functionality in APPEND mode. + */ + else if ( + at == Access::READ_WRITE || at == Access::CREATE || + at == Access::APPEND) flags = H5F_ACC_RDWR; else throw std::runtime_error("[HDF5] Unknown file Access"); diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 61f8d239d5..6f8666fae5 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -117,7 +117,7 @@ void JSONIOHandlerImpl::createFile( file.invalidate(); } - std::string const dir(m_handler->directory); + std::string const &dir(m_handler->directory); if (!auxiliary::directory_exists(dir)) { auto success = auxiliary::create_directories(dir); @@ -126,8 +126,14 @@ void JSONIOHandlerImpl::createFile( associateWithFile(writable, shared_name); this->m_dirty.emplace(shared_name); - // make sure to overwrite! - this->m_jsonVals[shared_name] = std::make_shared(); + + if (m_handler->m_backendAccess != Access::APPEND) + { + // make sure to overwrite! + this->m_jsonVals[shared_name] = std::make_shared(); + } + // else: the JSON value is not available in m_jsonVals and will be + // read from the file later on before overwriting writable->written = true; writable->abstractFilePosition = std::make_shared(); @@ -910,6 +916,14 @@ JSONIOHandlerImpl::getFilehandle(File fileName, Access access) { case Access::CREATE: case Access::READ_WRITE: + case Access::APPEND: + /* + * Always truncate when writing, we alway write entire JSON + * datasets, never partial ones. + * Within the JSON backend, APPEND and READ_WRITE mode are + * equivalent, but the openPMD frontend exposes no reading + * functionality in APPEND mode. + */ fs->open(path, std::ios_base::out | std::ios_base::trunc); break; case Access::READ_ONLY: From 829da9c8d7bc26f52d58d03e5bbabe42d35cc9ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 21 Feb 2022 12:01:41 +0100 Subject: [PATCH 04/14] Documentation and testing --- docs/source/usage/workflow.rst | 33 ++++++ test/SerialIOTest.cpp | 185 ++++++++++++++++++++++++++++++++- 2 files changed, 216 insertions(+), 2 deletions(-) diff --git a/docs/source/usage/workflow.rst b/docs/source/usage/workflow.rst index febcebb41e..6c36e74ec0 100644 --- a/docs/source/usage/workflow.rst +++ b/docs/source/usage/workflow.rst @@ -1,5 +1,38 @@ .. _workflow: +Access modes +============ + +The openPMD-api distinguishes between a number of different access modes: + +* **Create mode**: Used for creating a new Series from scratch. + Any file possibly existing in the specified location will be overwritten. +* **Read-only mode**: Used for reading from an existing Series. + No modifications will be made. +* **Read/Write mode**: Creates a new Series if not existing, otherwise opens an existing Series for reading and writing. + New datasets and iterations will be inserted as needed. + Not fully supported by all backends: + * ADIOS1: Automatically coerced to *Create* mode if the file does not exist yet and to *Read-only* mode if it exists. + * ADIOS2: Automatically coerced to *Create* mode if the file does not exist yet and to *Read-only* mode if it exists. + Since this happens on a per-file level, this mode allows to read from existing iterations and write to new iterations at the same time in file-based iteration encoding. +* **Append mode**: Restricted mode for appending new iterations to an existing Series that is supported by all backends at least in file-based iteration encoding, and by all but ADIOS1 in other encodings. + The API is equivalent to that of the *Create* mode, meaning that no reading is supported whatsoever. + If the Series does not exist yet, this behaves equivalently to the *Create* mode. + Existing iterations will not be deleted, newly-written iterations will be inserted. + + **Warning:** If writing an iteration that already exists, the behavior is implementation-defined and depends on the chosen backend and iteration encoding: + + * The new iteration might fully replace the old one. + * The new iteration might be merged into the old one. + * (To be removed in a future update) The old and new iteration might coexist in the resulting dataset. + + We suggest to fully define iterations when using Append mode (i.e. as if using Create mode) to avoid implementation-specific behavior. + Appending to an openPMD Series is only supported on a per-iteration level. + + **Warning:** There is no reading involved in using Append mode. + It is a user's responsibility to ensure that the appended dataset and the appended-to dataset are compatible with each other. + The results of using incompatible backend configurations are undefined. + Workflow ======== diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index dc366fa773..3447b44e7c 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -1917,9 +1917,26 @@ inline void fileBased_write_test(const std::string &backend) REQUIRE(o.iterations[5].time() == 5.0); } - // extend existing series with new step and auto-detection of iteration - // padding + if (backend == "bp") { + // Append + filebased iteration encoding works for all backends + Series o = Series( + "../samples/subdir/serial_fileBased_write%T." + backend, + Access::APPEND); + // Append mode does not support reading anything that already exists + REQUIRE(o.iterations.size() == 0); + // write something to trigger opening of the file + o.iterations[6].particles["e"]["position"]["x"].resetDataset( + {Datatype::DOUBLE, {10}}); + o.iterations[6].particles["e"]["position"]["x"].makeConstant( + 1.0); + } + else + { + // @todo enable a workflow for ADIOS2 where only either reading from or + // writing to an iteration works + // extend existing series with new step and auto-detection of iteration + // padding Series o = Series( "../samples/subdir/serial_fileBased_write%T." + backend, Access::READ_WRITE); @@ -2009,6 +2026,7 @@ inline void fileBased_write_test(const std::string &backend) } // write with auto-detection and in-consistent padding from step 10 + if (backend != "bp") { REQUIRE_THROWS_WITH( Series( @@ -5715,3 +5733,166 @@ TEST_CASE("varying_zero_pattern", "[serial]") varying_pattern(t); } } + +void append_mode(std::string const &extension) +{ + std::string jsonConfig = R"END( +{ + "adios2": + { + "schema": 20210209, + "engine": + { + "usesteps" : true + } + } +})END"; + auto writeSomeIterations = [](WriteIterations &&writeIterations, + std::vector indices) { + for (auto index : indices) + { + auto it = writeIterations[index]; + auto dataset = it.meshes["E"]["x"]; + dataset.resetDataset({Datatype::INT, {1}}); + dataset.makeConstant(0); + // test that it works without closing too + it.close(); + } + }; + { + Series write( + "../samples/append." + extension, Access::CREATE, jsonConfig); + writeSomeIterations( + write.writeIterations(), std::vector{0, 1}); + } + { + Series write( + "../samples/append." + extension, Access::APPEND, jsonConfig); + if (write.backend() == "ADIOS1") + { + REQUIRE_THROWS_AS( + write.flush(), error::OperationUnsupportedInBackend); + // destructor will be noisy now + return; + } + + writeSomeIterations( + write.writeIterations(), std::vector{2, 3}); + write.flush(); + } + { + Series write( + "../samples/append." + extension, Access::APPEND, jsonConfig); + if (write.backend() == "ADIOS1") + { + REQUIRE_THROWS_AS( + write.flush(), error::OperationUnsupportedInBackend); + // destructor will be noisy now + return; + } + + writeSomeIterations( + write.writeIterations(), std::vector{4, 3}); + write.flush(); + } + { + Series read("../samples/append." + extension, Access::READ_ONLY); + REQUIRE(read.iterations.size() == 5); + /* + * Roadmap: for now, reading this should work by ignoring the last + * duplicate iteration. + * After merging https://github.com/openPMD/openPMD-api/pull/949, we + * should see both instances when reading. + * Final goal: Read only the last instance. + */ + helper::listSeries(read); + } +} + +TEST_CASE("append_mode", "[serial]") +{ + for (auto const &t : testedFileExtensions()) + { + append_mode(t); + } +} + +void append_mode_filebased(std::string const &extension) +{ + std::string jsonConfig = R"END( +{ + "adios2": + { + "schema": 20210209, + "engine": + { + "usesteps" : true + } + } +})END"; + auto writeSomeIterations = [](WriteIterations &&writeIterations, + std::vector indices) { + for (auto index : indices) + { + auto it = writeIterations[index]; + auto dataset = it.meshes["E"]["x"]; + dataset.resetDataset({Datatype::INT, {1}}); + dataset.makeConstant(0); + // test that it works without closing too + it.close(); + } + }; + if (auxiliary::directory_exists("../samples/append")) + { + auxiliary::remove_directory("../samples/append"); + } + { + Series write( + "../samples/append/append_%T." + extension, + Access::CREATE, + jsonConfig); + writeSomeIterations( + write.writeIterations(), std::vector{0, 1}); + } + { + Series write( + "../samples/append/append_%T." + extension, + Access::APPEND, + jsonConfig); + writeSomeIterations( + write.writeIterations(), std::vector{4, 5}); + write.flush(); + } + { + Series write( + "../samples/append/append_%T." + extension, + Access::APPEND, + jsonConfig); + writeSomeIterations( + write.writeIterations(), std::vector{2, 3}); + write.flush(); + } + { + Series write( + "../samples/append/append_%T." + extension, + Access::APPEND, + jsonConfig); + // overwrite a previous iteration + writeSomeIterations( + write.writeIterations(), std::vector{4, 123}); + write.flush(); + } + { + Series read( + "../samples/append/append_%T." + extension, Access::READ_ONLY); + REQUIRE(read.iterations.size() == 7); + } +} + +TEST_CASE("append_mode_filebased", "[serial]") +{ + for (auto const &t : testedFileExtensions()) + { + append_mode_filebased(t); + } +} From e865a031568b74eea2ab5fea56eda133a3fdac50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 21 Feb 2022 12:01:42 +0100 Subject: [PATCH 05/14] Extend tests: also use variable-based encoding --- test/SerialIOTest.cpp | 112 ++++++++++++++++++++++++++++++++---------- 1 file changed, 87 insertions(+), 25 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 3447b44e7c..fec6244f90 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -5734,40 +5734,48 @@ TEST_CASE("varying_zero_pattern", "[serial]") } } -void append_mode(std::string const &extension) +void append_mode( + std::string const &extension, + bool variableBased, + std::string jsonConfig = "{}") { - std::string jsonConfig = R"END( -{ - "adios2": - { - "schema": 20210209, - "engine": - { - "usesteps" : true - } - } -})END"; - auto writeSomeIterations = [](WriteIterations &&writeIterations, - std::vector indices) { + + std::string filename = (variableBased ? "../samples/append_variablebased." + : "../samples/append_groupbased.") + + extension; + std::vector data(10, 0); + auto writeSomeIterations = [&data]( + WriteIterations &&writeIterations, + std::vector indices) { for (auto index : indices) { auto it = writeIterations[index]; auto dataset = it.meshes["E"]["x"]; - dataset.resetDataset({Datatype::INT, {1}}); - dataset.makeConstant(0); + dataset.resetDataset({Datatype::INT, {10}}); + dataset.storeChunk(data, {0}, {10}); // test that it works without closing too it.close(); } }; { - Series write( - "../samples/append." + extension, Access::CREATE, jsonConfig); + Series write(filename, Access::CREATE, jsonConfig); + if (variableBased) + { + if (write.backend() != "ADIOS2") + { + return; + } + write.setIterationEncoding(IterationEncoding::variableBased); + } writeSomeIterations( write.writeIterations(), std::vector{0, 1}); } { - Series write( - "../samples/append." + extension, Access::APPEND, jsonConfig); + Series write(filename, Access::APPEND, jsonConfig); + if (variableBased) + { + write.setIterationEncoding(IterationEncoding::variableBased); + } if (write.backend() == "ADIOS1") { REQUIRE_THROWS_AS( @@ -5781,8 +5789,11 @@ void append_mode(std::string const &extension) write.flush(); } { - Series write( - "../samples/append." + extension, Access::APPEND, jsonConfig); + Series write(filename, Access::APPEND, jsonConfig); + if (variableBased) + { + write.setIterationEncoding(IterationEncoding::variableBased); + } if (write.backend() == "ADIOS1") { REQUIRE_THROWS_AS( @@ -5796,8 +5807,23 @@ void append_mode(std::string const &extension) write.flush(); } { - Series read("../samples/append." + extension, Access::READ_ONLY); - REQUIRE(read.iterations.size() == 5); + Series read(filename, Access::READ_ONLY); + if (variableBased) + { + // in variable-based encodings, iterations are not parsed ahead of + // time but as they go + unsigned counter = 0; + for (auto iteration : read.readIterations()) + { + REQUIRE(iteration.iterationIndex == counter); + ++counter; + } + REQUIRE(counter == 5); + } + else + { + REQUIRE(read.iterations.size() == 5); + } /* * Roadmap: for now, reading this should work by ignoring the last * duplicate iteration. @@ -5813,7 +5839,43 @@ TEST_CASE("append_mode", "[serial]") { for (auto const &t : testedFileExtensions()) { - append_mode(t); + if (t == "h5") + { + continue; + } + if (t == "bp") + { + std::string jsonConfigOld = R"END( +{ + "adios2": + { + "schema": 0, + "engine": + { + "usesteps" : true + } + } +})END"; + std::string jsonConfigNew = R"END( +{ + "adios2": + { + "schema": 20210209, + "engine": + { + "usesteps" : true + } + } +})END"; + append_mode(t, false, jsonConfigOld); + append_mode(t, false, jsonConfigNew); + append_mode(t, true, jsonConfigOld); + append_mode(t, true, jsonConfigNew); + } + else + { + append_mode(t, false); + } } } From de98254a1675e7d4a01b30eeb501adaffef5d8de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 21 Feb 2022 12:01:43 +0100 Subject: [PATCH 06/14] Allow overwriting old datasets in HDF5 --- src/IO/HDF5/HDF5IOHandler.cpp | 40 +++++++++++++++++++++++++++++++++++ test/SerialIOTest.cpp | 4 ---- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 86f79ea391..6d426f3a76 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -436,6 +436,46 @@ void HDF5IOHandlerImpl::createDataset( "[HDF5] Internal error: Failed to open HDF5 group during dataset " "creation"); + if (m_handler->m_backendAccess == Access::APPEND) + { + // The dataset might already exist in the file from a previous run + // We delete it, otherwise we could not create it again with + // possibly different parameters. + // This is inefficient, as only the link to the dataset will be + // removed, but not the actual dataset + // But such is life if overwriting an iteration in Append mode + H5G_info_t group_info; + herr_t status = H5Gget_info(node_id, &group_info); + VERIFY( + status == 0, + "[HDF5] Internal error: Failed to get HDF5 group info for " + + concrete_h5_file_position(writable) + + " during dataset creation"); + for (hsize_t i = 0; i < group_info.nlinks; ++i) + { + if (H5G_DATASET != H5Gget_objtype_by_idx(node_id, i)) + { + continue; + } + ssize_t name_length = + H5Gget_objname_by_idx(node_id, i, nullptr, 0); + std::vector charbuffer(name_length + 1); + H5Gget_objname_by_idx( + node_id, i, charbuffer.data(), name_length + 1); + if (std::strncmp( + name.c_str(), charbuffer.data(), name_length + 1) != 0) + { + continue; + } + status = H5Ldelete(node_id, name.c_str(), H5P_DEFAULT); + VERIFY( + status == 0, + "[HDF5] Internal error: Failed to delete old dataset from " + "group for overwriting."); + break; + } + } + Datatype d = parameters.dtype; if (d == Datatype::UNDEFINED) { diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index fec6244f90..025245b0d0 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -5839,10 +5839,6 @@ TEST_CASE("append_mode", "[serial]") { for (auto const &t : testedFileExtensions()) { - if (t == "h5") - { - continue; - } if (t == "bp") { std::string jsonConfigOld = R"END( From a26af8704310246f7371eeca9eee2faa9a8c93ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 21 Feb 2022 12:01:44 +0100 Subject: [PATCH 07/14] Test that RW-mode in group-based mode still works --- test/SerialIOTest.cpp | 94 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 025245b0d0..9d0bd3ab96 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -5954,3 +5954,97 @@ TEST_CASE("append_mode_filebased", "[serial]") append_mode_filebased(t); } } + +void groupbased_read_write(std::string const &ext) +{ + int data = 0; + Dataset ds(Datatype::INT, {1}); + std::string filename = "../samples/groupbased_read_write." + ext; + + { + Series write(filename, Access::CREATE); + auto E_x = write.iterations[0].meshes["E"]["x"]; + auto E_y = write.iterations[0].meshes["E"]["y"]; + E_x.resetDataset(ds); + E_y.resetDataset(ds); + E_x.storeChunk(shareRaw(&data), {0}, {1}); + E_y.storeChunk(shareRaw(&data), {0}, {1}); + + E_x.setAttribute("updated_in_run", 0); + E_y.setAttribute("updated_in_run", 0); + } + + { + Series write(filename, Access::READ_WRITE); + // create a new iteration + auto E_x = write.iterations[1].meshes["E"]["x"]; + E_x.resetDataset(ds); + + // overwrite old dataset + auto E_y = write.iterations[0].meshes["E"]["y"]; + + data = 1; + + E_x.storeChunk(shareRaw(&data), {0}, {1}); + E_y.storeChunk(shareRaw(&data), {0}, {1}); + + E_x.setAttribute("updated_in_run", 1); + E_y.setAttribute("updated_in_run", 1); + } + + { + Series read(filename, Access::READ_ONLY); + auto E_x_0_fromRun0 = read.iterations[0].meshes["E"]["x"]; + auto E_x_1_fromRun1 = read.iterations[1].meshes["E"]["x"]; + auto E_y_0_fromRun1 = read.iterations[0].meshes["E"]["y"]; + + REQUIRE(E_x_0_fromRun0.getAttribute("updated_in_run").get() == 0); + REQUIRE(E_x_1_fromRun1.getAttribute("updated_in_run").get() == 1); + REQUIRE(E_y_0_fromRun1.getAttribute("updated_in_run").get() == 1); + + auto chunk_E_x_0_fromRun0 = E_x_0_fromRun0.loadChunk({0}, {1}); + auto chunk_E_x_1_fromRun1 = E_x_1_fromRun1.loadChunk({0}, {1}); + auto chunk_E_y_0_fromRun1 = E_y_0_fromRun1.loadChunk({0}, {1}); + + read.flush(); + + REQUIRE(*chunk_E_x_0_fromRun0 == 0); + REQUIRE(*chunk_E_x_1_fromRun1 == 1); + REQUIRE(*chunk_E_y_0_fromRun1 == 1); + } + + // check that truncation works correctly + { + Series write(filename, Access::CREATE); + // create a new iteration + auto E_x = write.iterations[2].meshes["E"]["x"]; + E_x.resetDataset(ds); + + data = 2; + + E_x.storeChunk(shareRaw(&data), {0}, {1}); + E_x.setAttribute("updated_in_run", 2); + } + + { + Series read(filename, Access::READ_ONLY); + REQUIRE(read.iterations.size() == 1); + REQUIRE(read.iterations.count(2) == 1); + } +} + +TEST_CASE("groupbased_read_write", "[serial]") +{ + constexpr char const *supportsGroupbasedRW[] = {"h5", "json"}; + for (auto const &t : testedFileExtensions()) + { + for (auto const supported : supportsGroupbasedRW) + { + if (t == supported) + { + groupbased_read_write(t); + break; + } + } + } +} From 52176ca4cc6e9841cae5126c845fda881ec0895b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 21 Feb 2022 12:01:44 +0100 Subject: [PATCH 08/14] Document extended meaning of createFile task --- include/openPMD/IO/AbstractIOHandlerImpl.hpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/include/openPMD/IO/AbstractIOHandlerImpl.hpp b/include/openPMD/IO/AbstractIOHandlerImpl.hpp index e171115f95..518b9dac0a 100644 --- a/include/openPMD/IO/AbstractIOHandlerImpl.hpp +++ b/include/openPMD/IO/AbstractIOHandlerImpl.hpp @@ -266,14 +266,17 @@ class AbstractIOHandlerImpl * file. * * The operation should fail if m_handler->m_frontendAccess is - * Access::READ_ONLY. The new file should be located in - * m_handler->directory. The new file should have the filename - * parameters.name. The filename should include the correct corresponding - * filename extension. Any existing file should be overwritten if - * m_handler->m_frontendAccess is Access::CREATE. The Writables file - * position should correspond to the root group "/" of the hierarchy. The - * Writable should be marked written when the operation completes - * successfully. + * Access::READ_ONLY. If m_handler->m_frontendAccess is Access::APPEND, a + * possibly existing file should not be overwritten. Instead, written + * updates should then either occur in-place or in form of new IO steps. + * Support for reading is not necessary in Append mode. + * The new file should be located in m_handler->directory. + * The new file should have the filename parameters.name. + * The filename should include the correct corresponding filename extension. + * Any existing file should be overwritten if m_handler->m_frontendAccess is + * Access::CREATE. The Writables file position should correspond to the root + * group "/" of the hierarchy. The Writable should be marked written when + * the operation completes successfully. */ virtual void createFile(Writable *, Parameter const &) = 0; From 4ef966b52b25335d115ab996377f771b4f375872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 21 Feb 2022 12:01:45 +0100 Subject: [PATCH 09/14] Apply suggestions from code review Co-authored-by: Axel Huebl --- docs/source/usage/workflow.rst | 5 +++-- include/openPMD/Error.hpp | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/usage/workflow.rst b/docs/source/usage/workflow.rst index 6c36e74ec0..7cf0e232d7 100644 --- a/docs/source/usage/workflow.rst +++ b/docs/source/usage/workflow.rst @@ -12,15 +12,16 @@ The openPMD-api distinguishes between a number of different access modes: * **Read/Write mode**: Creates a new Series if not existing, otherwise opens an existing Series for reading and writing. New datasets and iterations will be inserted as needed. Not fully supported by all backends: + * ADIOS1: Automatically coerced to *Create* mode if the file does not exist yet and to *Read-only* mode if it exists. * ADIOS2: Automatically coerced to *Create* mode if the file does not exist yet and to *Read-only* mode if it exists. - Since this happens on a per-file level, this mode allows to read from existing iterations and write to new iterations at the same time in file-based iteration encoding. + Since this happens on a per-file level, this mode allows to read from existing iterations and write to new iterations at the same time in file-based iteration encoding. * **Append mode**: Restricted mode for appending new iterations to an existing Series that is supported by all backends at least in file-based iteration encoding, and by all but ADIOS1 in other encodings. The API is equivalent to that of the *Create* mode, meaning that no reading is supported whatsoever. If the Series does not exist yet, this behaves equivalently to the *Create* mode. Existing iterations will not be deleted, newly-written iterations will be inserted. - **Warning:** If writing an iteration that already exists, the behavior is implementation-defined and depends on the chosen backend and iteration encoding: + **Warning:** When writing an iteration that already exists, the behavior is implementation-defined and depends on the chosen backend and iteration encoding: * The new iteration might fully replace the old one. * The new iteration might be merged into the old one. diff --git a/include/openPMD/Error.hpp b/include/openPMD/Error.hpp index e9267a07a3..e172670bcc 100644 --- a/include/openPMD/Error.hpp +++ b/include/openPMD/Error.hpp @@ -75,7 +75,6 @@ namespace error * * Example: A nullpointer is observed somewhere. */ - class Internal : public Error { public: From e9caae7b39689cb0742c5ad9c518c899e28724a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 21 Feb 2022 12:01:46 +0100 Subject: [PATCH 10/14] Remove special test paths for ADIOS2 --- test/SerialIOTest.cpp | 34 +++++++--------------------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 9d0bd3ab96..721f591be7 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -1917,24 +1917,7 @@ inline void fileBased_write_test(const std::string &backend) REQUIRE(o.iterations[5].time() == 5.0); } - if (backend == "bp") { - // Append + filebased iteration encoding works for all backends - Series o = Series( - "../samples/subdir/serial_fileBased_write%T." + backend, - Access::APPEND); - // Append mode does not support reading anything that already exists - REQUIRE(o.iterations.size() == 0); - // write something to trigger opening of the file - o.iterations[6].particles["e"]["position"]["x"].resetDataset( - {Datatype::DOUBLE, {10}}); - o.iterations[6].particles["e"]["position"]["x"].makeConstant( - 1.0); - } - else - { - // @todo enable a workflow for ADIOS2 where only either reading from or - // writing to an iteration works // extend existing series with new step and auto-detection of iteration // padding Series o = Series( @@ -2026,16 +2009,13 @@ inline void fileBased_write_test(const std::string &backend) } // write with auto-detection and in-consistent padding from step 10 - if (backend != "bp") - { - REQUIRE_THROWS_WITH( - Series( - "../samples/subdir/serial_fileBased_write%T." + backend, - Access::READ_WRITE), - Catch::Equals( - "Cannot write to a series with inconsistent iteration padding. " - "Please specify '%0T' or open as read-only.")); - } + REQUIRE_THROWS_WITH( + Series( + "../samples/subdir/serial_fileBased_write%T." + backend, + Access::READ_WRITE), + Catch::Equals( + "Cannot write to a series with inconsistent iteration padding. " + "Please specify '%0T' or open as read-only.")); // read back with fixed padding { From 4f2130ba57ad8c490be01df0148abe40979dd3d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 21 Feb 2022 12:36:58 +0100 Subject: [PATCH 11/14] C++17 and other CI fixes --- test/SerialIOTest.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 721f591be7..155d4fc58d 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -5758,8 +5758,11 @@ void append_mode( } if (write.backend() == "ADIOS1") { - REQUIRE_THROWS_AS( - write.flush(), error::OperationUnsupportedInBackend); + REQUIRE_THROWS_WITH( + write.flush(), + Catch::Equals( + "Operation unsupported in ADIOS1: Appending to existing " + "file on disk (use Access::CREATE to overwrite)")); // destructor will be noisy now return; } @@ -5776,8 +5779,11 @@ void append_mode( } if (write.backend() == "ADIOS1") { - REQUIRE_THROWS_AS( - write.flush(), error::OperationUnsupportedInBackend); + REQUIRE_THROWS_WITH( + write.flush(), + Catch::Equals( + "Operation unsupported in ADIOS1: Appending to existing " + "file on disk (use Access::CREATE to overwrite)")); // destructor will be noisy now return; } @@ -5793,7 +5799,7 @@ void append_mode( // in variable-based encodings, iterations are not parsed ahead of // time but as they go unsigned counter = 0; - for (auto iteration : read.readIterations()) + for (auto const &iteration : read.readIterations()) { REQUIRE(iteration.iterationIndex == counter); ++counter; From 1c25b4f999e602c70ca9ddc4e6d39e935f4aeba4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 2 May 2022 14:01:46 +0200 Subject: [PATCH 12/14] Add missing throw --- src/Iteration.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Iteration.cpp b/src/Iteration.cpp index 91a97ea030..1cf77ec180 100644 --- a/src/Iteration.cpp +++ b/src/Iteration.cpp @@ -743,6 +743,7 @@ void Iteration::runDeferredParseAccess() // reset this thing it.m_deferredParseAccess = std::optional(); *newAccess = oldAccess; + throw; } // reset this thing it.m_deferredParseAccess = std::optional(); From 184f2a6abcb10f7068e55d68eadd6a82e2ae5492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 6 May 2022 10:44:27 +0200 Subject: [PATCH 13/14] Apply suggestions from code review Co-authored-by: Axel Huebl --- src/Error.cpp | 5 ++++- src/IO/HDF5/HDF5IOHandler.cpp | 7 ++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/Error.cpp b/src/Error.cpp index eb333c502f..a0331948c8 100644 --- a/src/Error.cpp +++ b/src/Error.cpp @@ -47,7 +47,10 @@ namespace error {} Internal::Internal(std::string const &what) - : Error("Internal error: " + what + "\nThis is a bug. Please report.") + : Error( + "Internal error: " + what + + "\nThis is a bug. Please report at ' " + "https://github.com/openPMD/openPMD-api/issues'.") {} } // namespace error } // namespace openPMD diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 6d426f3a76..72a7219a78 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -258,7 +258,8 @@ void HDF5IOHandlerImpl::createFile( break; case Access::READ_ONLY: // condition has been checked above - throw std::runtime_error("Control flow error"); + throw std::runtime_error( + "[HDF5] Control flow error in createFile backend access mode."); } hid_t id{}; @@ -470,8 +471,8 @@ void HDF5IOHandlerImpl::createDataset( status = H5Ldelete(node_id, name.c_str(), H5P_DEFAULT); VERIFY( status == 0, - "[HDF5] Internal error: Failed to delete old dataset from " - "group for overwriting."); + "[HDF5] Internal error: Failed to delete old dataset '" + + name + "' from group for overwriting."); break; } } From 7afb99dbb71bde944a42969db99122732524222f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 9 May 2022 11:20:55 +0200 Subject: [PATCH 14/14] Use H5Lexists to be more efficient in lookup --- src/IO/HDF5/HDF5IOHandler.cpp | 44 ++++++++++++++--------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 72a7219a78..51a013fca8 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -442,39 +442,29 @@ void HDF5IOHandlerImpl::createDataset( // The dataset might already exist in the file from a previous run // We delete it, otherwise we could not create it again with // possibly different parameters. - // This is inefficient, as only the link to the dataset will be - // removed, but not the actual dataset - // But such is life if overwriting an iteration in Append mode - H5G_info_t group_info; - herr_t status = H5Gget_info(node_id, &group_info); - VERIFY( - status == 0, - "[HDF5] Internal error: Failed to get HDF5 group info for " + - concrete_h5_file_position(writable) + - " during dataset creation"); - for (hsize_t i = 0; i < group_info.nlinks; ++i) + if (htri_t link_id = H5Lexists(node_id, name.c_str(), H5P_DEFAULT); + link_id > 0) { - if (H5G_DATASET != H5Gget_objtype_by_idx(node_id, i)) - { - continue; - } - ssize_t name_length = - H5Gget_objname_by_idx(node_id, i, nullptr, 0); - std::vector charbuffer(name_length + 1); - H5Gget_objname_by_idx( - node_id, i, charbuffer.data(), name_length + 1); - if (std::strncmp( - name.c_str(), charbuffer.data(), name_length + 1) != 0) - { - continue; - } - status = H5Ldelete(node_id, name.c_str(), H5P_DEFAULT); + // This only unlinks, but does not delete the dataset + // Deleting the actual dataset physically is now up to HDF5: + // > when removing an object with H5Ldelete, the HDF5 library + // > should be able to detect and recycle the file space when no + // > other reference to the deleted object exists + // https://github.com/openPMD/openPMD-api/pull/1007#discussion_r867223316 + herr_t status = H5Ldelete(node_id, name.c_str(), H5P_DEFAULT); VERIFY( status == 0, "[HDF5] Internal error: Failed to delete old dataset '" + name + "' from group for overwriting."); - break; } + else if (link_id < 0) + { + throw std::runtime_error( + "[HDF5] Internal error: Failed to check for link existence " + "of '" + + name + "' inside group for overwriting."); + } + // else: link_id == 0: Link does not exist, nothing to do } Datatype d = parameters.dtype;