diff --git a/docs/source/usage/workflow.rst b/docs/source/usage/workflow.rst index febcebb41e..7cf0e232d7 100644 --- a/docs/source/usage/workflow.rst +++ b/docs/source/usage/workflow.rst @@ -1,5 +1,39 @@ .. _workflow: +Access modes +============ + +The openPMD-api distinguishes between a number of different access modes: + +* **Create mode**: Used for creating a new Series from scratch. + Any file possibly existing in the specified location will be overwritten. +* **Read-only mode**: Used for reading from an existing Series. + No modifications will be made. +* **Read/Write mode**: Creates a new Series if not existing, otherwise opens an existing Series for reading and writing. + New datasets and iterations will be inserted as needed. + Not fully supported by all backends: + + * ADIOS1: Automatically coerced to *Create* mode if the file does not exist yet and to *Read-only* mode if it exists. + * ADIOS2: Automatically coerced to *Create* mode if the file does not exist yet and to *Read-only* mode if it exists. + Since this happens on a per-file level, this mode allows to read from existing iterations and write to new iterations at the same time in file-based iteration encoding. +* **Append mode**: Restricted mode for appending new iterations to an existing Series that is supported by all backends at least in file-based iteration encoding, and by all but ADIOS1 in other encodings. + The API is equivalent to that of the *Create* mode, meaning that no reading is supported whatsoever. + If the Series does not exist yet, this behaves equivalently to the *Create* mode. + Existing iterations will not be deleted, newly-written iterations will be inserted. + + **Warning:** When writing an iteration that already exists, the behavior is implementation-defined and depends on the chosen backend and iteration encoding: + + * The new iteration might fully replace the old one. + * The new iteration might be merged into the old one. + * (To be removed in a future update) The old and new iteration might coexist in the resulting dataset. + + We suggest to fully define iterations when using Append mode (i.e. as if using Create mode) to avoid implementation-specific behavior. + Appending to an openPMD Series is only supported on a per-iteration level. + + **Warning:** There is no reading involved in using Append mode. + It is a user's responsibility to ensure that the appended dataset and the appended-to dataset are compatible with each other. + The results of using incompatible backend configurations are undefined. + Workflow ======== diff --git a/include/openPMD/Error.hpp b/include/openPMD/Error.hpp index 4fcda11eeb..e172670bcc 100644 --- a/include/openPMD/Error.hpp +++ b/include/openPMD/Error.hpp @@ -69,5 +69,16 @@ namespace error BackendConfigSchema(std::vector, std::string what); }; + + /** + * @brief Internal errors that should not happen. Please report. + * + * Example: A nullpointer is observed somewhere. + */ + class Internal : public Error + { + public: + Internal(std::string const &what); + }; } // namespace error } // namespace openPMD diff --git a/include/openPMD/IO/AbstractIOHandler.hpp b/include/openPMD/IO/AbstractIOHandler.hpp index a7239a3375..80616e62bc 100644 --- a/include/openPMD/IO/AbstractIOHandler.hpp +++ b/include/openPMD/IO/AbstractIOHandler.hpp @@ -121,6 +121,23 @@ namespace internal */ class AbstractIOHandler { + friend class Series; + +private: + void setIterationEncoding(IterationEncoding encoding) + { + /* + * In file-based iteration encoding, the APPEND mode is handled entirely + * by the frontend, the backend should just treat it as CREATE mode + */ + if (encoding == IterationEncoding::fileBased && + m_backendAccess == Access::APPEND) + { + // do we really want to have those as const members..? + *const_cast(&m_backendAccess) = Access::CREATE; + } + } + public: #if openPMD_HAVE_MPI AbstractIOHandler(std::string path, Access at, MPI_Comm) @@ -153,6 +170,7 @@ class AbstractIOHandler virtual std::string backendName() const = 0; std::string const directory; + // why do these need to be separate? Access const m_backendAccess; Access const m_frontendAccess; std::queue m_work; diff --git a/include/openPMD/IO/AbstractIOHandlerImpl.hpp b/include/openPMD/IO/AbstractIOHandlerImpl.hpp index e171115f95..518b9dac0a 100644 --- a/include/openPMD/IO/AbstractIOHandlerImpl.hpp +++ b/include/openPMD/IO/AbstractIOHandlerImpl.hpp @@ -266,14 +266,17 @@ class AbstractIOHandlerImpl * file. * * The operation should fail if m_handler->m_frontendAccess is - * Access::READ_ONLY. The new file should be located in - * m_handler->directory. The new file should have the filename - * parameters.name. The filename should include the correct corresponding - * filename extension. Any existing file should be overwritten if - * m_handler->m_frontendAccess is Access::CREATE. The Writables file - * position should correspond to the root group "/" of the hierarchy. The - * Writable should be marked written when the operation completes - * successfully. + * Access::READ_ONLY. If m_handler->m_frontendAccess is Access::APPEND, a + * possibly existing file should not be overwritten. Instead, written + * updates should then either occur in-place or in form of new IO steps. + * Support for reading is not necessary in Append mode. + * The new file should be located in m_handler->directory. + * The new file should have the filename parameters.name. + * The filename should include the correct corresponding filename extension. + * Any existing file should be overwritten if m_handler->m_frontendAccess is + * Access::CREATE. The Writables file position should correspond to the root + * group "/" of the hierarchy. The Writable should be marked written when + * the operation completes successfully. */ virtual void createFile(Writable *, Parameter const &) = 0; diff --git a/include/openPMD/IO/Access.hpp b/include/openPMD/IO/Access.hpp index 93ba662241..2b5d37f260 100644 --- a/include/openPMD/IO/Access.hpp +++ b/include/openPMD/IO/Access.hpp @@ -28,7 +28,8 @@ enum class Access { READ_ONLY, //!< open series as read-only, fails if series is not found READ_WRITE, //!< open existing series as writable - CREATE //!< create new series and truncate existing (files) + CREATE, //!< create new series and truncate existing (files) + APPEND //!< write new iterations to an existing series without reading }; // Access // deprecated name (used prior to 0.12.0) diff --git a/src/Error.cpp b/src/Error.cpp index aa6723e63f..a0331948c8 100644 --- a/src/Error.cpp +++ b/src/Error.cpp @@ -45,5 +45,12 @@ namespace error concatVector(errorLocation_in) + "': " + std::move(what)) , errorLocation(std::move(errorLocation_in)) {} + + Internal::Internal(std::string const &what) + : Error( + "Internal error: " + what + + "\nThis is a bug. Please report at ' " + "https://github.com/openPMD/openPMD-api/issues'.") + {} } // namespace error } // namespace openPMD diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index 3d55598f25..072c5cd285 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -315,9 +315,6 @@ void ADIOS2IOHandlerImpl::createFile( m_iterationEncoding = parameters.encoding; associateWithFile(writable, shared_name); this->m_dirty.emplace(shared_name); - getFileData(shared_name, IfFileNotOpen::OpenImplicitly).m_mode = - adios2::Mode::Write; // WORKAROUND - // ADIOS2 does not yet implement ReadWrite Mode writable->written = true; writable->abstractFilePosition = std::make_shared(); @@ -1074,21 +1071,16 @@ adios2::Mode ADIOS2IOHandlerImpl::adios2AccessMode(std::string const &fullPath) if (auxiliary::directory_exists(fullPath) || auxiliary::file_exists(fullPath)) { - std::cerr << "ADIOS2 does currently not yet implement ReadWrite " - "(Append) mode. " - << "Replacing with Read mode." << std::endl; return adios2::Mode::Read; } else { - std::cerr << "ADIOS2 does currently not yet implement ReadWrite " - "(Append) mode. " - << "Replacing with Write mode." << std::endl; return adios2::Mode::Write; } - default: - return adios2::Mode::Undefined; + case Access::APPEND: + return adios2::Mode::Append; } + throw std::runtime_error("Unreachable!"); } json::TracingJSON ADIOS2IOHandlerImpl::nullvalue = { @@ -2235,6 +2227,7 @@ namespace detail delayOpeningTheFirstStep = true; break; case adios2::Mode::Write: + case adios2::Mode::Append: /* * File engines, write mode: * Default for old layout is no steps. @@ -2442,6 +2435,7 @@ namespace detail { switch (m_mode) { + case adios2::Mode::Append: case adios2::Mode::Write: { // usesSteps attribute only written upon ::advance() // this makes sure that the attribute is only put in case @@ -2686,17 +2680,14 @@ namespace detail switch (ba.m_mode) { case adios2::Mode::Write: + case adios2::Mode::Append: eng.PerformPuts(); break; case adios2::Mode::Read: eng.PerformGets(); break; - case adios2::Mode::Append: - // TODO order? - eng.PerformGets(); - eng.PerformPuts(); - break; default: + throw error::Internal("[ADIOS2] Unexpected access mode."); break; } }, diff --git a/src/IO/ADIOS/CommonADIOS1IOHandler.cpp b/src/IO/ADIOS/CommonADIOS1IOHandler.cpp index e50207a74d..5e30ecabb3 100644 --- a/src/IO/ADIOS/CommonADIOS1IOHandler.cpp +++ b/src/IO/ADIOS/CommonADIOS1IOHandler.cpp @@ -20,6 +20,7 @@ */ #include "openPMD/IO/ADIOS/CommonADIOS1IOHandler.hpp" +#include "openPMD/Error.hpp" #if openPMD_HAVE_ADIOS1 @@ -406,6 +407,15 @@ void CommonADIOS1IOHandlerImpl::createFile( if (!auxiliary::ends_with(name, ".bp")) name += ".bp"; + if (m_handler->m_backendAccess == Access::APPEND && + auxiliary::file_exists(name)) + { + throw error::OperationUnsupportedInBackend( + "ADIOS1", + "Appending to existing file on disk (use Access::CREATE to " + "overwrite)"); + } + writable->written = true; writable->abstractFilePosition = std::make_shared("/"); diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index d0725395f7..51a013fca8 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -237,13 +237,41 @@ void HDF5IOHandlerImpl::createFile( std::string name = m_handler->directory + parameters.name; if (!auxiliary::ends_with(name, ".h5")) name += ".h5"; - unsigned flags; - if (m_handler->m_backendAccess == Access::CREATE) + unsigned flags{}; + switch (m_handler->m_backendAccess) + { + case Access::CREATE: flags = H5F_ACC_TRUNC; - else + break; + case Access::APPEND: + if (auxiliary::file_exists(name)) + { + flags = H5F_ACC_RDWR; + } + else + { + flags = H5F_ACC_TRUNC; + } + break; + case Access::READ_WRITE: flags = H5F_ACC_EXCL; - hid_t id = - H5Fcreate(name.c_str(), flags, H5P_DEFAULT, m_fileAccessProperty); + break; + case Access::READ_ONLY: + // condition has been checked above + throw std::runtime_error( + "[HDF5] Control flow error in createFile backend access mode."); + } + + hid_t id{}; + if (flags == H5F_ACC_RDWR) + { + id = H5Fopen(name.c_str(), flags, m_fileAccessProperty); + } + else + { + id = H5Fcreate( + name.c_str(), flags, H5P_DEFAULT, m_fileAccessProperty); + } VERIFY(id >= 0, "[HDF5] Internal error: Failed to create HDF5 file"); writable->written = true; @@ -409,6 +437,36 @@ void HDF5IOHandlerImpl::createDataset( "[HDF5] Internal error: Failed to open HDF5 group during dataset " "creation"); + if (m_handler->m_backendAccess == Access::APPEND) + { + // The dataset might already exist in the file from a previous run + // We delete it, otherwise we could not create it again with + // possibly different parameters. + if (htri_t link_id = H5Lexists(node_id, name.c_str(), H5P_DEFAULT); + link_id > 0) + { + // This only unlinks, but does not delete the dataset + // Deleting the actual dataset physically is now up to HDF5: + // > when removing an object with H5Ldelete, the HDF5 library + // > should be able to detect and recycle the file space when no + // > other reference to the deleted object exists + // https://github.com/openPMD/openPMD-api/pull/1007#discussion_r867223316 + herr_t status = H5Ldelete(node_id, name.c_str(), H5P_DEFAULT); + VERIFY( + status == 0, + "[HDF5] Internal error: Failed to delete old dataset '" + + name + "' from group for overwriting."); + } + else if (link_id < 0) + { + throw std::runtime_error( + "[HDF5] Internal error: Failed to check for link existence " + "of '" + + name + "' inside group for overwriting."); + } + // else: link_id == 0: Link does not exist, nothing to do + } + Datatype d = parameters.dtype; if (d == Datatype::UNDEFINED) { @@ -702,7 +760,14 @@ void HDF5IOHandlerImpl::openFile( Access at = m_handler->m_backendAccess; if (at == Access::READ_ONLY) flags = H5F_ACC_RDONLY; - else if (at == Access::READ_WRITE || at == Access::CREATE) + /* + * Within the HDF5 backend, APPEND and READ_WRITE mode are + * equivalent, but the openPMD frontend exposes no reading + * functionality in APPEND mode. + */ + else if ( + at == Access::READ_WRITE || at == Access::CREATE || + at == Access::APPEND) flags = H5F_ACC_RDWR; else throw std::runtime_error("[HDF5] Unknown file Access"); diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 61f8d239d5..6f8666fae5 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -117,7 +117,7 @@ void JSONIOHandlerImpl::createFile( file.invalidate(); } - std::string const dir(m_handler->directory); + std::string const &dir(m_handler->directory); if (!auxiliary::directory_exists(dir)) { auto success = auxiliary::create_directories(dir); @@ -126,8 +126,14 @@ void JSONIOHandlerImpl::createFile( associateWithFile(writable, shared_name); this->m_dirty.emplace(shared_name); - // make sure to overwrite! - this->m_jsonVals[shared_name] = std::make_shared(); + + if (m_handler->m_backendAccess != Access::APPEND) + { + // make sure to overwrite! + this->m_jsonVals[shared_name] = std::make_shared(); + } + // else: the JSON value is not available in m_jsonVals and will be + // read from the file later on before overwriting writable->written = true; writable->abstractFilePosition = std::make_shared(); @@ -910,6 +916,14 @@ JSONIOHandlerImpl::getFilehandle(File fileName, Access access) { case Access::CREATE: case Access::READ_WRITE: + case Access::APPEND: + /* + * Always truncate when writing, we alway write entire JSON + * datasets, never partial ones. + * Within the JSON backend, APPEND and READ_WRITE mode are + * equivalent, but the openPMD frontend exposes no reading + * functionality in APPEND mode. + */ fs->open(path, std::ios_base::out | std::ios_base::trunc); break; case Access::READ_ONLY: diff --git a/src/Iteration.cpp b/src/Iteration.cpp index 8991f0ebad..1cf77ec180 100644 --- a/src/Iteration.cpp +++ b/src/Iteration.cpp @@ -298,15 +298,18 @@ void Iteration::flushVariableBased( void Iteration::flush(internal::FlushParams const &flushParams) { - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: { for (auto &m : meshes) m.second.flush(m.first, flushParams); for (auto &species : particles) species.second.flush(species.first, flushParams); + break; } - else - { + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { /* Find the root point [Series] of this file, * meshesPath and particlesPath are stored there */ Series s = retrieveSeries(); @@ -344,6 +347,8 @@ void Iteration::flush(internal::FlushParams const &flushParams) } flushAttributes(flushParams); + break; + } } } @@ -591,15 +596,26 @@ AdvanceStatus Iteration::beginStep(bool reread) (this->IOHandler()->m_frontendAccess == Access::READ_ONLY || this->IOHandler()->m_frontendAccess == Access::READ_WRITE)) { - bool previous = series.iterations.written(); - series.iterations.written() = false; - auto oldType = this->IOHandler()->m_frontendAccess; - auto newType = - const_cast(&this->IOHandler()->m_frontendAccess); - *newType = Access::READ_WRITE; - series.readGorVBased(false); - *newType = oldType; - series.iterations.written() = previous; + switch (IOHandler()->m_frontendAccess) + { + case Access::READ_ONLY: + case Access::READ_WRITE: { + bool previous = series.iterations.written(); + series.iterations.written() = false; + auto oldType = this->IOHandler()->m_frontendAccess; + auto newType = + const_cast(&this->IOHandler()->m_frontendAccess); + *newType = Access::READ_WRITE; + series.readGorVBased(false); + *newType = oldType; + series.iterations.written() = previous; + break; + } + case Access::CREATE: + case Access::APPEND: + // no re-reading necessary + break; + } } return status; @@ -696,42 +712,49 @@ void Iteration::linkHierarchy(Writable &w) void Iteration::runDeferredParseAccess() { - if (IOHandler()->m_frontendAccess == Access::CREATE) + switch (IOHandler()->m_frontendAccess) { - return; - } - - auto &it = get(); - if (!it.m_deferredParseAccess.has_value()) - { - return; - } - auto const &deferred = it.m_deferredParseAccess.value(); + case Access::READ_ONLY: + case Access::READ_WRITE: { + auto &it = get(); + if (!it.m_deferredParseAccess.has_value()) + { + return; + } + auto const &deferred = it.m_deferredParseAccess.value(); - auto oldAccess = IOHandler()->m_frontendAccess; - auto newAccess = const_cast(&IOHandler()->m_frontendAccess); - *newAccess = Access::READ_WRITE; - try - { - if (deferred.fileBased) + auto oldAccess = IOHandler()->m_frontendAccess; + auto newAccess = const_cast(&IOHandler()->m_frontendAccess); + *newAccess = Access::READ_WRITE; + try { - readFileBased(deferred.filename, deferred.path, deferred.beginStep); + if (deferred.fileBased) + { + readFileBased( + deferred.filename, deferred.path, deferred.beginStep); + } + else + { + readGorVBased(deferred.path, deferred.beginStep); + } } - else + catch (...) { - readGorVBased(deferred.path, deferred.beginStep); + // reset this thing + it.m_deferredParseAccess = std::optional(); + *newAccess = oldAccess; + throw; } - } - catch (...) - { // reset this thing it.m_deferredParseAccess = std::optional(); *newAccess = oldAccess; - throw; + break; + } + case Access::CREATE: + case Access::APPEND: + // no parsing in those modes + return; } - // reset this thing - it.m_deferredParseAccess = std::optional(); - *newAccess = oldAccess; } template float Iteration::time() const; diff --git a/src/Mesh.cpp b/src/Mesh.cpp index 761a9a304a..6f9b891635 100644 --- a/src/Mesh.cpp +++ b/src/Mesh.cpp @@ -216,13 +216,16 @@ template Mesh &Mesh::setTimeOffset(float); void Mesh::flush_impl( std::string const &name, internal::FlushParams const &flushParams) { - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: { for (auto &comp : *this) comp.second.flush(comp.first, flushParams); + break; } - else - { + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { if (!written()) { if (scalar()) @@ -260,6 +263,8 @@ void Mesh::flush_impl( } flushAttributes(flushParams); + break; + } } } diff --git a/src/ParticleSpecies.cpp b/src/ParticleSpecies.cpp index fdc4fa6f88..24ebadadec 100644 --- a/src/ParticleSpecies.cpp +++ b/src/ParticleSpecies.cpp @@ -141,15 +141,18 @@ namespace void ParticleSpecies::flush( std::string const &path, internal::FlushParams const &flushParams) { - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: { for (auto &record : *this) record.second.flush(record.first, flushParams); for (auto &patch : particlePatches) patch.second.flush(patch.first, flushParams); + break; } - else - { + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { auto it = find("position"); if (it != end()) it->second.setUnitDimension({{UnitDimension::L, 1}}); @@ -168,6 +171,8 @@ void ParticleSpecies::flush( for (auto &patch : particlePatches) patch.second.flush(patch.first, flushParams); } + break; + } } } diff --git a/src/Record.cpp b/src/Record.cpp index 90b0c61a91..bbd74ed0f5 100644 --- a/src/Record.cpp +++ b/src/Record.cpp @@ -46,13 +46,16 @@ Record &Record::setUnitDimension(std::map const &udim) void Record::flush_impl( std::string const &name, internal::FlushParams const &flushParams) { - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: { for (auto &comp : *this) comp.second.flush(comp.first, flushParams); + break; } - else - { + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { if (!written()) { if (scalar()) @@ -90,6 +93,8 @@ void Record::flush_impl( } flushAttributes(flushParams); + break; + } } } diff --git a/src/RecordComponent.cpp b/src/RecordComponent.cpp index c69c09e997..9e7a93e6ea 100644 --- a/src/RecordComponent.cpp +++ b/src/RecordComponent.cpp @@ -200,16 +200,18 @@ void RecordComponent::flush( rc.m_name = name; return; } - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: while (!rc.m_chunks.empty()) { IOHandler()->enqueue(rc.m_chunks.front()); rc.m_chunks.pop(); } - } - else - { + break; + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { /* * This catches when a user forgets to use resetDataset. */ @@ -275,6 +277,8 @@ void RecordComponent::flush( } flushAttributes(flushParams); + break; + } } } diff --git a/src/Series.cpp b/src/Series.cpp index 6adeefad55..36a7bd90d6 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -301,6 +301,7 @@ Series &Series::setIterationEncoding(IterationEncoding ie) setAttribute("iterationEncoding", std::string("variableBased")); break; } + IOHandler()->setIterationEncoding(ie); return *this; } @@ -469,6 +470,56 @@ bool Series::reparseExpansionPattern(std::string filenameWithExtension) return true; } +namespace +{ + /* + * Negative return values: + * -1: No padding detected, just keep the default from the file name + * -2: Contradicting paddings detected + */ + template + int autoDetectPadding( + std::function isPartOfSeries, + std::string const &directory, + MappingFunction &&mappingFunction) + { + bool isContained; + int padding; + uint64_t iterationIndex; + std::set paddings; + for (auto const &entry : auxiliary::list_directory(directory)) + { + std::tie(isContained, padding, iterationIndex) = + isPartOfSeries(entry); + if (isContained) + { + paddings.insert(padding); + // no std::forward as this is called repeatedly + mappingFunction(iterationIndex, entry); + } + } + if (paddings.size() == 1u) + return *paddings.begin(); + else if (paddings.empty()) + return -1; + else + return -2; + } + + int autoDetectPadding( + std::function isPartOfSeries, + std::string const &directory) + { + return autoDetectPadding( + std::move(isPartOfSeries), + directory, + [](uint64_t index, std::string const &filename) { + (void)index; + (void)filename; + }); + } +} // namespace + void Series::init( std::shared_ptr ioHandler, std::unique_ptr input) @@ -501,9 +552,10 @@ Given file pattern: ')END" << series.m_name << "'" << std::endl; } - if (IOHandler()->m_frontendAccess == Access::READ_ONLY || - IOHandler()->m_frontendAccess == Access::READ_WRITE) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: + case Access::READ_WRITE: { /* Allow creation of values in Containers and setting of Attributes * Would throw for Access::READ_ONLY */ auto oldType = IOHandler()->m_frontendAccess; @@ -528,11 +580,43 @@ Given file pattern: ')END" } *newType = oldType; + break; } - else - { + case Access::CREATE: { + initDefaults(input->iterationEncoding); + setIterationEncoding(input->iterationEncoding); + break; + } + case Access::APPEND: { initDefaults(input->iterationEncoding); setIterationEncoding(input->iterationEncoding); + if (input->iterationEncoding != IterationEncoding::fileBased) + { + break; + } + int padding = autoDetectPadding( + matcher( + series.m_filenamePrefix, + series.m_filenamePadding, + series.m_filenamePostfix, + series.m_format), + IOHandler()->directory); + switch (padding) + { + case -2: + throw std::runtime_error( + "Cannot write to a series with inconsistent iteration padding. " + "Please specify '%0T' or open as read-only."); + case -1: + std::cerr << "No matching iterations found: " << name() + << std::endl; + break; + default: + series.m_filenamePadding = padding; + break; + } + break; + } } series.m_lastFlushSuccessful = true; } @@ -609,7 +693,9 @@ void Series::flushFileBased( throw std::runtime_error( "fileBased output can not be written with no iterations."); - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) + { + case Access::READ_ONLY: for (auto it = begin; it != end; ++it) { // Phase 1 @@ -639,8 +725,10 @@ void Series::flushFileBased( IOHandler()->flush(flushParams); } } - else - { + break; + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { bool allDirty = dirty(); for (auto it = begin; it != end; ++it) { @@ -692,6 +780,8 @@ void Series::flushFileBased( dirty() = allDirty; } dirty() = false; + break; + } } } @@ -702,7 +792,9 @@ void Series::flushGorVBased( bool flushIOHandler) { auto &series = get(); - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) + { + case Access::READ_ONLY: for (auto it = begin; it != end; ++it) { // Phase 1 @@ -731,8 +823,10 @@ void Series::flushGorVBased( IOHandler()->flush(flushParams); } } - else - { + break; + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { if (!written()) { Parameter fCreate; @@ -788,6 +882,8 @@ void Series::flushGorVBased( { IOHandler()->flush(flushParams); } + break; + } } } @@ -827,23 +923,15 @@ void Series::readFileBased() series.m_filenamePadding, series.m_filenamePostfix, series.m_format); - bool isContained; - int padding; - uint64_t iterationIndex; - std::set paddings; - for (auto const &entry : auxiliary::list_directory(IOHandler()->directory)) - { - std::tie(isContained, padding, iterationIndex) = isPartOfSeries(entry); - if (isContained) - { - Iteration &i = series.iterations[iterationIndex]; - i.deferParseAccess( - {std::to_string(iterationIndex), iterationIndex, true, entry}); - // TODO skip if the padding is exact the number of chars in an - // iteration? - paddings.insert(padding); - } - } + + int padding = autoDetectPadding( + std::move(isPartOfSeries), + IOHandler()->directory, + // foreach found file with `filename` and `index`: + [&series](uint64_t index, std::string const &filename) { + Iteration &i = series.iterations[index]; + i.deferParseAccess({std::to_string(index), index, true, filename}); + }); if (series.iterations.empty()) { @@ -885,14 +973,15 @@ void Series::readFileBased() } } - if (paddings.size() == 1u) - series.m_filenamePadding = *paddings.begin(); + if (padding > 0) + series.m_filenamePadding = padding; - /* Frontend access type might change during Series::read() to allow - * parameter modification. Backend access type stays unchanged for the - * lifetime of a Series. */ - if (paddings.size() > 1u && - IOHandler()->m_backendAccess == Access::READ_WRITE) + /* Frontend access type might change during SeriesInterface::read() to allow + parameter modification. + * Backend access type stays unchanged for the lifetime of a Series. + autoDetectPadding() announces contradicting paddings with return status + -2. */ + if (padding == -2 && IOHandler()->m_backendAccess == Access::READ_WRITE) throw std::runtime_error( "Cannot write to a series with inconsistent iteration padding. " "Please specify '%0T' or open as read-only."); @@ -1639,6 +1728,10 @@ namespace internal Series impl{{this, [](auto const *) {}}}; impl.flush(); } + if (m_writeIterations.has_value()) + { + m_writeIterations = std::optional(); + } } catch (std::exception const &ex) { diff --git a/src/backend/PatchRecordComponent.cpp b/src/backend/PatchRecordComponent.cpp index ecc44625a2..a5178a9911 100644 --- a/src/backend/PatchRecordComponent.cpp +++ b/src/backend/PatchRecordComponent.cpp @@ -84,16 +84,19 @@ void PatchRecordComponent::flush( std::string const &name, internal::FlushParams const &flushParams) { auto &rc = get(); - if (IOHandler()->m_frontendAccess == Access::READ_ONLY) + switch (IOHandler()->m_frontendAccess) { + case Access::READ_ONLY: { while (!rc.m_chunks.empty()) { IOHandler()->enqueue(rc.m_chunks.front()); rc.m_chunks.pop(); } + break; } - else - { + case Access::READ_WRITE: + case Access::CREATE: + case Access::APPEND: { if (!written()) { Parameter dCreate; @@ -111,6 +114,8 @@ void PatchRecordComponent::flush( } flushAttributes(flushParams); + break; + } } } diff --git a/src/binding/python/Error.cpp b/src/binding/python/Error.cpp index ea5712296b..3b81ebe923 100644 --- a/src/binding/python/Error.cpp +++ b/src/binding/python/Error.cpp @@ -14,6 +14,7 @@ void init_Error(py::module &m) m, "ErrorWrongAPIUsage", baseError); py::register_exception( m, "ErrorBackendConfigSchema", baseError); + py::register_exception(m, "ErrorInternal", baseError); #ifndef NDEBUG m.def("test_throw", [](std::string description) { diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index dc366fa773..155d4fc58d 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -1917,9 +1917,9 @@ inline void fileBased_write_test(const std::string &backend) REQUIRE(o.iterations[5].time() == 5.0); } - // extend existing series with new step and auto-detection of iteration - // padding { + // extend existing series with new step and auto-detection of iteration + // padding Series o = Series( "../samples/subdir/serial_fileBased_write%T." + backend, Access::READ_WRITE); @@ -2009,15 +2009,13 @@ inline void fileBased_write_test(const std::string &backend) } // write with auto-detection and in-consistent padding from step 10 - { - REQUIRE_THROWS_WITH( - Series( - "../samples/subdir/serial_fileBased_write%T." + backend, - Access::READ_WRITE), - Catch::Equals( - "Cannot write to a series with inconsistent iteration padding. " - "Please specify '%0T' or open as read-only.")); - } + REQUIRE_THROWS_WITH( + Series( + "../samples/subdir/serial_fileBased_write%T." + backend, + Access::READ_WRITE), + Catch::Equals( + "Cannot write to a series with inconsistent iteration padding. " + "Please specify '%0T' or open as read-only.")); // read back with fixed padding { @@ -5715,3 +5713,324 @@ TEST_CASE("varying_zero_pattern", "[serial]") varying_pattern(t); } } + +void append_mode( + std::string const &extension, + bool variableBased, + std::string jsonConfig = "{}") +{ + + std::string filename = (variableBased ? "../samples/append_variablebased." + : "../samples/append_groupbased.") + + extension; + std::vector data(10, 0); + auto writeSomeIterations = [&data]( + WriteIterations &&writeIterations, + std::vector indices) { + for (auto index : indices) + { + auto it = writeIterations[index]; + auto dataset = it.meshes["E"]["x"]; + dataset.resetDataset({Datatype::INT, {10}}); + dataset.storeChunk(data, {0}, {10}); + // test that it works without closing too + it.close(); + } + }; + { + Series write(filename, Access::CREATE, jsonConfig); + if (variableBased) + { + if (write.backend() != "ADIOS2") + { + return; + } + write.setIterationEncoding(IterationEncoding::variableBased); + } + writeSomeIterations( + write.writeIterations(), std::vector{0, 1}); + } + { + Series write(filename, Access::APPEND, jsonConfig); + if (variableBased) + { + write.setIterationEncoding(IterationEncoding::variableBased); + } + if (write.backend() == "ADIOS1") + { + REQUIRE_THROWS_WITH( + write.flush(), + Catch::Equals( + "Operation unsupported in ADIOS1: Appending to existing " + "file on disk (use Access::CREATE to overwrite)")); + // destructor will be noisy now + return; + } + + writeSomeIterations( + write.writeIterations(), std::vector{2, 3}); + write.flush(); + } + { + Series write(filename, Access::APPEND, jsonConfig); + if (variableBased) + { + write.setIterationEncoding(IterationEncoding::variableBased); + } + if (write.backend() == "ADIOS1") + { + REQUIRE_THROWS_WITH( + write.flush(), + Catch::Equals( + "Operation unsupported in ADIOS1: Appending to existing " + "file on disk (use Access::CREATE to overwrite)")); + // destructor will be noisy now + return; + } + + writeSomeIterations( + write.writeIterations(), std::vector{4, 3}); + write.flush(); + } + { + Series read(filename, Access::READ_ONLY); + if (variableBased) + { + // in variable-based encodings, iterations are not parsed ahead of + // time but as they go + unsigned counter = 0; + for (auto const &iteration : read.readIterations()) + { + REQUIRE(iteration.iterationIndex == counter); + ++counter; + } + REQUIRE(counter == 5); + } + else + { + REQUIRE(read.iterations.size() == 5); + } + /* + * Roadmap: for now, reading this should work by ignoring the last + * duplicate iteration. + * After merging https://github.com/openPMD/openPMD-api/pull/949, we + * should see both instances when reading. + * Final goal: Read only the last instance. + */ + helper::listSeries(read); + } +} + +TEST_CASE("append_mode", "[serial]") +{ + for (auto const &t : testedFileExtensions()) + { + if (t == "bp") + { + std::string jsonConfigOld = R"END( +{ + "adios2": + { + "schema": 0, + "engine": + { + "usesteps" : true + } + } +})END"; + std::string jsonConfigNew = R"END( +{ + "adios2": + { + "schema": 20210209, + "engine": + { + "usesteps" : true + } + } +})END"; + append_mode(t, false, jsonConfigOld); + append_mode(t, false, jsonConfigNew); + append_mode(t, true, jsonConfigOld); + append_mode(t, true, jsonConfigNew); + } + else + { + append_mode(t, false); + } + } +} + +void append_mode_filebased(std::string const &extension) +{ + std::string jsonConfig = R"END( +{ + "adios2": + { + "schema": 20210209, + "engine": + { + "usesteps" : true + } + } +})END"; + auto writeSomeIterations = [](WriteIterations &&writeIterations, + std::vector indices) { + for (auto index : indices) + { + auto it = writeIterations[index]; + auto dataset = it.meshes["E"]["x"]; + dataset.resetDataset({Datatype::INT, {1}}); + dataset.makeConstant(0); + // test that it works without closing too + it.close(); + } + }; + if (auxiliary::directory_exists("../samples/append")) + { + auxiliary::remove_directory("../samples/append"); + } + { + Series write( + "../samples/append/append_%T." + extension, + Access::CREATE, + jsonConfig); + writeSomeIterations( + write.writeIterations(), std::vector{0, 1}); + } + { + Series write( + "../samples/append/append_%T." + extension, + Access::APPEND, + jsonConfig); + writeSomeIterations( + write.writeIterations(), std::vector{4, 5}); + write.flush(); + } + { + Series write( + "../samples/append/append_%T." + extension, + Access::APPEND, + jsonConfig); + writeSomeIterations( + write.writeIterations(), std::vector{2, 3}); + write.flush(); + } + { + Series write( + "../samples/append/append_%T." + extension, + Access::APPEND, + jsonConfig); + // overwrite a previous iteration + writeSomeIterations( + write.writeIterations(), std::vector{4, 123}); + write.flush(); + } + { + Series read( + "../samples/append/append_%T." + extension, Access::READ_ONLY); + REQUIRE(read.iterations.size() == 7); + } +} + +TEST_CASE("append_mode_filebased", "[serial]") +{ + for (auto const &t : testedFileExtensions()) + { + append_mode_filebased(t); + } +} + +void groupbased_read_write(std::string const &ext) +{ + int data = 0; + Dataset ds(Datatype::INT, {1}); + std::string filename = "../samples/groupbased_read_write." + ext; + + { + Series write(filename, Access::CREATE); + auto E_x = write.iterations[0].meshes["E"]["x"]; + auto E_y = write.iterations[0].meshes["E"]["y"]; + E_x.resetDataset(ds); + E_y.resetDataset(ds); + E_x.storeChunk(shareRaw(&data), {0}, {1}); + E_y.storeChunk(shareRaw(&data), {0}, {1}); + + E_x.setAttribute("updated_in_run", 0); + E_y.setAttribute("updated_in_run", 0); + } + + { + Series write(filename, Access::READ_WRITE); + // create a new iteration + auto E_x = write.iterations[1].meshes["E"]["x"]; + E_x.resetDataset(ds); + + // overwrite old dataset + auto E_y = write.iterations[0].meshes["E"]["y"]; + + data = 1; + + E_x.storeChunk(shareRaw(&data), {0}, {1}); + E_y.storeChunk(shareRaw(&data), {0}, {1}); + + E_x.setAttribute("updated_in_run", 1); + E_y.setAttribute("updated_in_run", 1); + } + + { + Series read(filename, Access::READ_ONLY); + auto E_x_0_fromRun0 = read.iterations[0].meshes["E"]["x"]; + auto E_x_1_fromRun1 = read.iterations[1].meshes["E"]["x"]; + auto E_y_0_fromRun1 = read.iterations[0].meshes["E"]["y"]; + + REQUIRE(E_x_0_fromRun0.getAttribute("updated_in_run").get() == 0); + REQUIRE(E_x_1_fromRun1.getAttribute("updated_in_run").get() == 1); + REQUIRE(E_y_0_fromRun1.getAttribute("updated_in_run").get() == 1); + + auto chunk_E_x_0_fromRun0 = E_x_0_fromRun0.loadChunk({0}, {1}); + auto chunk_E_x_1_fromRun1 = E_x_1_fromRun1.loadChunk({0}, {1}); + auto chunk_E_y_0_fromRun1 = E_y_0_fromRun1.loadChunk({0}, {1}); + + read.flush(); + + REQUIRE(*chunk_E_x_0_fromRun0 == 0); + REQUIRE(*chunk_E_x_1_fromRun1 == 1); + REQUIRE(*chunk_E_y_0_fromRun1 == 1); + } + + // check that truncation works correctly + { + Series write(filename, Access::CREATE); + // create a new iteration + auto E_x = write.iterations[2].meshes["E"]["x"]; + E_x.resetDataset(ds); + + data = 2; + + E_x.storeChunk(shareRaw(&data), {0}, {1}); + E_x.setAttribute("updated_in_run", 2); + } + + { + Series read(filename, Access::READ_ONLY); + REQUIRE(read.iterations.size() == 1); + REQUIRE(read.iterations.count(2) == 1); + } +} + +TEST_CASE("groupbased_read_write", "[serial]") +{ + constexpr char const *supportsGroupbasedRW[] = {"h5", "json"}; + for (auto const &t : testedFileExtensions()) + { + for (auto const supported : supportsGroupbasedRW) + { + if (t == supported) + { + groupbased_read_write(t); + break; + } + } + } +}