From 75f436faa85edaa2f86ec0528a810967f80bca35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 22 Mar 2021 09:44:24 +0100 Subject: [PATCH 01/11] Step-based iteration layout --- include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp | 6 +- include/openPMD/IO/AbstractIOHandlerImpl.hpp | 1 + include/openPMD/IO/IOTask.hpp | 9 +- include/openPMD/Iteration.hpp | 4 +- include/openPMD/IterationEncoding.hpp | 2 +- include/openPMD/Series.hpp | 13 +- include/openPMD/backend/Attributable.hpp | 2 +- src/IO/ADIOS/ADIOS1IOHandler.cpp | 7 +- src/IO/ADIOS/ADIOS2IOHandler.cpp | 36 ++++- src/IO/ADIOS/CommonADIOS1IOHandler.cpp | 11 +- src/IO/ADIOS/ParallelADIOS1IOHandler.cpp | 7 +- src/IO/HDF5/HDF5IOHandler.cpp | 24 +-- src/Iteration.cpp | 37 ++++- src/IterationEncoding.cpp | 3 + src/ReadIterations.cpp | 6 +- src/Series.cpp | 147 +++++++++++++++---- src/backend/Attributable.cpp | 26 +++- src/binding/python/IterationEncoding.cpp | 1 + test/SerialIOTest.cpp | 113 ++++++++++++++ 19 files changed, 379 insertions(+), 76 deletions(-) diff --git a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp index 2d5aba8394..0a85fb15c6 100644 --- a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp +++ b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp @@ -32,6 +32,7 @@ #include "openPMD/auxiliary/Option.hpp" #include "openPMD/backend/Writable.hpp" #include "openPMD/config.hpp" +#include "openPMD/IterationEncoding.hpp" #if openPMD_HAVE_ADIOS2 # include @@ -224,6 +225,7 @@ class ADIOS2IOHandlerImpl private: adios2::ADIOS m_ADIOS; + IterationEncoding m_iterationEncoding = IterationEncoding::groupBased; /** * The ADIOS2 engine type, to be passed to adios2::IO::SetEngine */ @@ -1367,7 +1369,7 @@ class ADIOS2IOHandler : public AbstractIOHandler { #if openPMD_HAVE_ADIOS2 -friend class ADIOS2IOHandlerImpl; + friend class ADIOS2IOHandlerImpl; private: ADIOS2IOHandlerImpl m_impl; @@ -1386,7 +1388,7 @@ friend class ADIOS2IOHandlerImpl; } catch( ... ) { - std::cerr << "[~ADIOS2IOHandler] An error occurred." << std::endl; + std::cerr << "[~ADIOS2IOHandler] An error occurred." << std::endl; } } diff --git a/include/openPMD/IO/AbstractIOHandlerImpl.hpp b/include/openPMD/IO/AbstractIOHandlerImpl.hpp index d66cc3997b..c584462d24 100644 --- a/include/openPMD/IO/AbstractIOHandlerImpl.hpp +++ b/include/openPMD/IO/AbstractIOHandlerImpl.hpp @@ -241,6 +241,7 @@ class AbstractIOHandlerImpl * * The operation should overwrite existing file positions, even when the Writable was already marked written. * The path parameters.path may contain multiple levels (e.g. first/second/third/). This path should be relative (i.e. it should not start with a slash "/"). + * The number of levels may be zero, i.e. parameters.path may be an empty string. * The Writables file position should correspond to the complete opened path (i.e. first/second/third/ should be assigned to the Writables file position). * The Writable should be marked written when the operation completes successfully. */ diff --git a/include/openPMD/IO/IOTask.hpp b/include/openPMD/IO/IOTask.hpp index 6d6087a178..4f49222a8e 100644 --- a/include/openPMD/IO/IOTask.hpp +++ b/include/openPMD/IO/IOTask.hpp @@ -25,6 +25,7 @@ #include "openPMD/backend/Attribute.hpp" #include "openPMD/ChunkInfo.hpp" #include "openPMD/Dataset.hpp" +#include "openPMD/IterationEncoding.hpp" #include #include @@ -108,7 +109,8 @@ template<> struct OPENPMDAPI_EXPORT Parameter< Operation::CREATE_FILE > : public AbstractParameter { Parameter() = default; - Parameter(Parameter const & p) : AbstractParameter(), name(p.name) {} + Parameter(Parameter const & p) : + AbstractParameter(), name(p.name), encoding(p.encoding) {} std::unique_ptr< AbstractParameter > clone() const override @@ -118,13 +120,15 @@ struct OPENPMDAPI_EXPORT Parameter< Operation::CREATE_FILE > : public AbstractPa } std::string name = ""; + IterationEncoding encoding = IterationEncoding::groupBased; }; template<> struct OPENPMDAPI_EXPORT Parameter< Operation::OPEN_FILE > : public AbstractParameter { Parameter() = default; - Parameter(Parameter const & p) : AbstractParameter(), name(p.name) {} + Parameter(Parameter const & p) : + AbstractParameter(), name(p.name), encoding(p.encoding) {} std::unique_ptr< AbstractParameter > clone() const override @@ -134,6 +138,7 @@ struct OPENPMDAPI_EXPORT Parameter< Operation::OPEN_FILE > : public AbstractPara } std::string name = ""; + IterationEncoding encoding = IterationEncoding::groupBased; }; template<> diff --git a/include/openPMD/Iteration.hpp b/include/openPMD/Iteration.hpp index 45fe494ef9..4411d89286 100644 --- a/include/openPMD/Iteration.hpp +++ b/include/openPMD/Iteration.hpp @@ -156,13 +156,15 @@ class Iteration : public LegacyAttributable struct DeferredParseAccess { - std::string index; + std::string path; + uint64_t iteration = 0; bool fileBased = false; std::string filename; }; void flushFileBased(std::string const&, uint64_t); void flushGroupBased(uint64_t); + void flushVariableBased(uint64_t); void flush(); void deferParseAccess( DeferredParseAccess ); /* diff --git a/include/openPMD/IterationEncoding.hpp b/include/openPMD/IterationEncoding.hpp index d0ad4d7f14..fa655aa348 100644 --- a/include/openPMD/IterationEncoding.hpp +++ b/include/openPMD/IterationEncoding.hpp @@ -31,7 +31,7 @@ namespace openPMD */ enum class IterationEncoding { - fileBased, groupBased + fileBased, groupBased, variableBased }; std::ostream& diff --git a/include/openPMD/Series.hpp b/include/openPMD/Series.hpp index 2703b15d4e..c9743d029f 100644 --- a/include/openPMD/Series.hpp +++ b/include/openPMD/Series.hpp @@ -342,13 +342,20 @@ class SeriesImpl : public AttributableImpl std::unique_ptr< ParsedInput > parseInput(std::string); void init(std::shared_ptr< AbstractIOHandler >, std::unique_ptr< ParsedInput >); - void initDefaults(); + void initDefaults( IterationEncoding ); std::future< void > flush_impl( iterations_iterator begin, iterations_iterator end, FlushLevel ); void flushFileBased( iterations_iterator begin, iterations_iterator end ); - void flushGroupBased( iterations_iterator begin, iterations_iterator end ); + /* + * Group-based and variable-based iteration layouts share a lot of logic + * (realistically, the variable-based iteration layout only throws out + * one layer in the hierarchy). + * As a convention, methods that deal with both layouts are called + * .*GorVBased, short for .*GroupOrVariableBased + */ + void flushGorVBased( iterations_iterator begin, iterations_iterator end ); void flushMeshesPath(); void flushParticlesPath(); void readFileBased( ); @@ -361,7 +368,7 @@ class SeriesImpl : public AttributableImpl * as of yet. Such a facility will be required upon implementing things such * as resizable datasets. */ - void readGroupBased( bool init = true ); + void readGorVBased( bool init = true ); void readBase(); std::string iterationFilename( uint64_t i ); void openIteration( uint64_t index, Iteration iteration ); diff --git a/include/openPMD/backend/Attributable.hpp b/include/openPMD/backend/Attributable.hpp index 0039b06484..ce3f08ec75 100644 --- a/include/openPMD/backend/Attributable.hpp +++ b/include/openPMD/backend/Attributable.hpp @@ -215,7 +215,7 @@ class AttributableImpl std::vector< std::string > myPath() const; void flushAttributes(); - void readAttributes(); + void readAttributes( bool reread = false ); /** Retrieve the value of a floating point Attribute of user-defined precision with ensured type-safety. * diff --git a/src/IO/ADIOS/ADIOS1IOHandler.cpp b/src/IO/ADIOS/ADIOS1IOHandler.cpp index f38c74e099..a2f90535ac 100644 --- a/src/IO/ADIOS/ADIOS1IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS1IOHandler.cpp @@ -95,6 +95,9 @@ ADIOS1IOHandlerImpl::flush() case O::CREATE_PATH: createPath(i.writable, deref_dynamic_cast< Parameter< O::CREATE_PATH > >(i.parameter.get())); break; + case O::OPEN_PATH: + openPath(i.writable, deref_dynamic_cast< Parameter< O::OPEN_PATH > >(i.parameter.get())); + break; case O::CREATE_DATASET: createDataset(i.writable, deref_dynamic_cast< Parameter< O::CREATE_DATASET > >(i.parameter.get())); break; @@ -129,9 +132,6 @@ ADIOS1IOHandlerImpl::flush() case O::EXTEND_DATASET: extendDataset(i.writable, deref_dynamic_cast< Parameter< O::EXTEND_DATASET > >(i.parameter.get())); break; - case O::OPEN_PATH: - openPath(i.writable, deref_dynamic_cast< Parameter< O::OPEN_PATH > >(i.parameter.get())); - break; case O::CLOSE_PATH: closePath(i.writable, deref_dynamic_cast< Parameter< O::CLOSE_PATH > >(i.parameter.get())); break; @@ -243,6 +243,7 @@ ADIOS1IOHandler::enqueue(IOTask const& i) { case Operation::CREATE_FILE: case Operation::CREATE_PATH: + case Operation::OPEN_PATH: case Operation::CREATE_DATASET: case Operation::OPEN_FILE: case Operation::WRITE_ATT: diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index 211bb0636d..ea2292b001 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -289,6 +289,7 @@ void ADIOS2IOHandlerImpl::createFile( VERIFY( success, "[ADIOS2] Could not create directory." ); } + m_iterationEncoding = parameters.encoding; associateWithFile( writable, shared_name ); this->m_dirty.emplace( shared_name ); getFileData( shared_name ).m_mode = adios2::Mode::Write; // WORKAROUND @@ -470,6 +471,7 @@ ADIOS2IOHandlerImpl::openFile( writable->written = true; writable->abstractFilePosition = std::make_shared< ADIOS2FilePosition >( ); + m_iterationEncoding = parameters.encoding; // enforce opening the file // lazy opening is deathly in parallel situations getFileData( file ); @@ -512,7 +514,9 @@ void ADIOS2IOHandlerImpl::openPath( std::string prefix = filePositionToString( setAndGetFilePosition( writable->parent ) ); std::string suffix = auxiliary::removeSlashes( parameters.path ); - std::string infix = auxiliary::ends_with( prefix, '/' ) ? "" : "/"; + std::string infix = suffix.empty() || auxiliary::ends_with( prefix, '/' ) + ? "" + : "/"; /* ADIOS has no concept for explicitly creating paths. * They are implicitly created with the paths of variables/attributes. */ @@ -1525,9 +1529,27 @@ namespace detail adios2::Dims const & count, bool const constantDims ) { - adios2::Variable< T > var = - IO.DefineVariable< T >( name, shape, start, count, constantDims ); - if ( !var ) + /* + * Step/Variable-based iteration layout: + * The variable may already be defined from a previous step, + * so check if it's already here. + */ + adios2::Variable< T > var = IO.InquireVariable< T >( name ); + if( !var ) + { + var = IO.DefineVariable< T >( + name, shape, start, count, constantDims ); + } + else + { + var.SetShape( shape ); + if( count.size() > 0 ) + { + var.SetSelection( { start, count } ); + } + } + + if( !var ) { throw std::runtime_error( "[ADIOS2] Internal error: Could not create Variable '" + name + "'." ); @@ -2206,6 +2228,12 @@ namespace detail "bp4", "bp3", "hdf5", "file" }; + // step/variable-based iteration encoding requires the new schema + if( m_impl->m_iterationEncoding == IterationEncoding::variableBased ) + { + m_impl->m_schema = ADIOS2Schema::schema_2021_02_09; + } + // set engine type bool isStreaming = false; { diff --git a/src/IO/ADIOS/CommonADIOS1IOHandler.cpp b/src/IO/ADIOS/CommonADIOS1IOHandler.cpp index b486d14f40..92fef41993 100644 --- a/src/IO/ADIOS/CommonADIOS1IOHandler.cpp +++ b/src/IO/ADIOS/CommonADIOS1IOHandler.cpp @@ -677,10 +677,13 @@ CommonADIOS1IOHandlerImpl::openPath( { /* Sanitize path */ std::string path = parameters.path; - if( auxiliary::starts_with(path, '/') ) - path = auxiliary::replace_first(path, "/", ""); - if( !auxiliary::ends_with(path, '/') ) - path += '/'; + if( !path.empty() ) + { + if( auxiliary::starts_with(path, '/') ) + path = auxiliary::replace_first(path, "/", ""); + if( !auxiliary::ends_with(path, '/') ) + path += '/'; + } writable->written = true; writable->abstractFilePosition = std::make_shared< ADIOS1FilePosition >(path); diff --git a/src/IO/ADIOS/ParallelADIOS1IOHandler.cpp b/src/IO/ADIOS/ParallelADIOS1IOHandler.cpp index 443e492df0..f4633856ff 100644 --- a/src/IO/ADIOS/ParallelADIOS1IOHandler.cpp +++ b/src/IO/ADIOS/ParallelADIOS1IOHandler.cpp @@ -119,6 +119,9 @@ ParallelADIOS1IOHandlerImpl::flush() case O::CREATE_PATH: createPath(i.writable, deref_dynamic_cast< Parameter< O::CREATE_PATH > >(i.parameter.get())); break; + case O::OPEN_PATH: + openPath(i.writable, deref_dynamic_cast< Parameter< O::OPEN_PATH > >(i.parameter.get())); + break; case O::CREATE_DATASET: createDataset(i.writable, deref_dynamic_cast< Parameter< O::CREATE_DATASET > >(i.parameter.get())); break; @@ -151,9 +154,6 @@ ParallelADIOS1IOHandlerImpl::flush() case O::EXTEND_DATASET: extendDataset(i.writable, deref_dynamic_cast< Parameter< O::EXTEND_DATASET > >(i.parameter.get())); break; - case O::OPEN_PATH: - openPath(i.writable, deref_dynamic_cast< Parameter< O::OPEN_PATH > >(i.parameter.get())); - break; case O::CLOSE_PATH: closePath(i.writable, deref_dynamic_cast< Parameter< O::CLOSE_PATH > >(i.parameter.get())); break; @@ -265,6 +265,7 @@ ParallelADIOS1IOHandler::enqueue(IOTask const& i) { case Operation::CREATE_FILE: case Operation::CREATE_PATH: + case Operation::OPEN_PATH: case Operation::CREATE_DATASET: case Operation::OPEN_FILE: case Operation::WRITE_ATT: diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 9056648078..4ef7c08384 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -528,19 +528,23 @@ HDF5IOHandlerImpl::openPath( /* Sanitize path */ std::string path = parameters.path; - if( auxiliary::starts_with(path, '/') ) - path = auxiliary::replace_first(path, "/", ""); - if( !auxiliary::ends_with(path, '/') ) - path += '/'; + if( !path.empty() ) + { + if( auxiliary::starts_with(path, '/') ) + path = auxiliary::replace_first(path, "/", ""); + if( !auxiliary::ends_with(path, '/') ) + path += '/'; + path_id = H5Gopen(node_id, + path.c_str(), + H5P_DEFAULT); + VERIFY(path_id >= 0, "[HDF5] Internal error: Failed to open HDF5 group during path opening"); - path_id = H5Gopen(node_id, - path.c_str(), - H5P_DEFAULT); - VERIFY(path_id >= 0, "[HDF5] Internal error: Failed to open HDF5 group during path opening"); + herr_t status; + status = H5Gclose(path_id); + VERIFY(status == 0, "[HDF5] Internal error: Failed to close HDF5 group during path opening"); + } herr_t status; - status = H5Gclose(path_id); - VERIFY(status == 0, "[HDF5] Internal error: Failed to close HDF5 group during path opening"); status = H5Gclose(node_id); VERIFY(status == 0, "[HDF5] Internal error: Failed to close HDF5 group during path opening"); diff --git a/src/Iteration.cpp b/src/Iteration.cpp index be3c268f70..af279ddfe9 100644 --- a/src/Iteration.cpp +++ b/src/Iteration.cpp @@ -233,6 +233,7 @@ Iteration::flushFileBased(std::string const& filename, uint64_t i) { Parameter< Operation::OPEN_FILE > fOpen; fOpen.name = filename; + fOpen.encoding = IterationEncoding::fileBased; IOHandler()->enqueue(IOTask(s, fOpen)); flush(); @@ -261,6 +262,21 @@ Iteration::flushGroupBased(uint64_t i) flush(); } +void +Iteration::flushVariableBased( uint64_t i ) +{ + if( !written() ) + { + /* create iteration path */ + Parameter< Operation::OPEN_PATH > pOpen; + pOpen.path = ""; + IOHandler()->enqueue( IOTask( this, pOpen ) ); + this->setAttribute( "__step__", i ); + } + + flush(); +} + void Iteration::flush() { @@ -279,8 +295,10 @@ Iteration::flush() if( !meshes.empty() || s->containsAttribute("meshesPath") ) { if( !s->containsAttribute("meshesPath") ) + { s->setMeshesPath("meshes/"); - s->flushMeshesPath(); + s->flushMeshesPath(); + } meshes.flush(s->meshesPath()); for( auto& m : meshes ) m.second.flush(m.first); @@ -293,8 +311,10 @@ Iteration::flush() if( !particles.empty() || s->containsAttribute("particlesPath") ) { if( !s->containsAttribute("particlesPath") ) + { s->setParticlesPath("particles/"); - s->flushParticlesPath(); + s->flushParticlesPath(); + } particles.flush(s->particlesPath()); for( auto& species : particles ) species.second.flush(species.first); @@ -323,11 +343,11 @@ void Iteration::read() auto const & deferred = m_deferredParseAccess->get(); if( deferred.fileBased ) { - readFileBased( deferred.filename, deferred.index ); + readFileBased( deferred.filename, deferred.path ); } else { - readGroupBased( deferred.index ); + readGroupBased( deferred.path ); } // reset this thing *m_deferredParseAccess = auxiliary::Option< DeferredParseAccess >(); @@ -521,6 +541,7 @@ Iteration::beginStep() file = m_attributableData.get(); break; case IE::groupBased: + case IE::variableBased: file = &series; break; } @@ -532,7 +553,8 @@ Iteration::beginStep() } // re-read -> new datasets might be available - if( series.iterationEncoding() == IE::groupBased && + if( ( series.iterationEncoding() == IE::groupBased || + series.iterationEncoding() == IE::variableBased ) && ( this->IOHandler()->m_frontendAccess == Access::READ_ONLY || this->IOHandler()->m_frontendAccess == Access::READ_WRITE ) ) { @@ -542,7 +564,7 @@ Iteration::beginStep() auto newType = const_cast< Access * >( &this->IOHandler()->m_frontendAccess ); *newType = Access::READ_WRITE; - series.readGroupBased( false ); + series.readGorVBased( false ); *newType = oldType; series.iterations.written() = previous; } @@ -564,6 +586,7 @@ Iteration::endStep() file = m_attributableData.get(); break; case IE::groupBased: + case IE::variableBased: file = &series; break; } @@ -582,6 +605,7 @@ Iteration::getStepStatus() case IE::fileBased: return *this->m_stepStatus; case IE::groupBased: + case IE::variableBased: return s->m_stepStatus; default: throw std::runtime_error( "[Iteration] unreachable" ); @@ -599,6 +623,7 @@ Iteration::setStepStatus( StepStatus status ) *this->m_stepStatus = status; break; case IE::groupBased: + case IE::variableBased: s->m_stepStatus = status; break; default: diff --git a/src/IterationEncoding.cpp b/src/IterationEncoding.cpp index 9de80e62e8..e463233b42 100644 --- a/src/IterationEncoding.cpp +++ b/src/IterationEncoding.cpp @@ -34,6 +34,9 @@ openPMD::operator<<(std::ostream& os, openPMD::IterationEncoding const& ie) case openPMD::IterationEncoding::groupBased: os << "groupBased"; break; + case openPMD::IterationEncoding::variableBased: + os << "variableBased"; + break; } return os; } diff --git a/src/ReadIterations.cpp b/src/ReadIterations.cpp index ffd36881cf..14329abf5d 100644 --- a/src/ReadIterations.cpp +++ b/src/ReadIterations.cpp @@ -68,7 +68,8 @@ SeriesIterator::SeriesIterator( Series series ) openIteration(); status = it->second.beginStep(); break; - default: + case IterationEncoding::groupBased: + case IterationEncoding::variableBased: /* * In group-based iteration layout, we have definitely already had * access to the file until now. Better to begin a step right away, @@ -105,7 +106,8 @@ SeriesIterator & SeriesIterator::operator++() switch( series.iterationEncoding() ) { using IE = IterationEncoding; - case IE::groupBased: { + case IE::groupBased: + case IE::variableBased: { // since we are in group-based iteration layout, it does not // matter which iteration we begin a step upon AdvanceStatus status = currentIteration.beginStep(); diff --git a/src/Series.cpp b/src/Series.cpp index ec4a4d5edd..f2a2821227 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -287,6 +287,11 @@ SeriesImpl::setIterationEncoding(IterationEncoding ie) setIterationFormat(BASEPATH); setAttribute("iterationEncoding", std::string("groupBased")); break; + case IterationEncoding::variableBased: + setIterationFormat( + auxiliary::replace_first(basePath(), "/%T/", "")); + setAttribute("iterationEncoding", std::string("variableBased")); + break; } return *this; } @@ -300,13 +305,13 @@ SeriesImpl::iterationFormat() const SeriesImpl& SeriesImpl::setIterationFormat(std::string const& i) { - auto & series = get(); if( written() ) throw std::runtime_error("A files iterationFormat can not (yet) be changed after it has been written."); - if( series.m_iterationEncoding == IterationEncoding::groupBased ) + if( iterationEncoding() == IterationEncoding::groupBased || + iterationEncoding() == IterationEncoding::variableBased ) if( basePath() != i && (openPMD() == "1.0.1" || openPMD() == "1.0.0") ) - throw std::invalid_argument("iterationFormat must not differ from basePath " + basePath() + " for groupBased data"); + throw std::invalid_argument("iterationFormat must not differ from basePath " + basePath() + " for group- or variableBased data"); setAttribute("iterationFormat", i); return *this; @@ -384,11 +389,25 @@ SeriesImpl::parseInput(std::string filepath) input->format = determineFormat(input->name); + // first, check for file-based iteration layout std::regex pattern("(.*)%(0[[:digit:]]+)?T(.*)"); std::smatch regexMatch; std::regex_match(input->name, regexMatch, pattern); if( regexMatch.empty() ) - input->iterationEncoding = IterationEncoding::groupBased; + { + // now check for variable-based iteration layout + pattern = "(.*)%V(\\.[^.]+)"; + std::regex_match( input->name, regexMatch, pattern ); + if( regexMatch.empty() ) + { + input->iterationEncoding = IterationEncoding::groupBased; + } + else + { + input->iterationEncoding = IterationEncoding::variableBased; + input->name = regexMatch[1].str() + regexMatch[2].str(); + } + } else if( regexMatch.size() == 4 ) { input->iterationEncoding = IterationEncoding::fileBased; @@ -441,7 +460,7 @@ void SeriesImpl::init( if( input->iterationEncoding == IterationEncoding::fileBased ) readFileBased(); else - readGroupBased(); + readGorVBased(); if( series.iterations.empty() ) { @@ -449,7 +468,7 @@ void SeriesImpl::init( * allow setting attributes in that case */ written() = false; - initDefaults(); + initDefaults( input->iterationEncoding ); setIterationEncoding(input->iterationEncoding); written() = true; @@ -458,20 +477,30 @@ void SeriesImpl::init( *newType = oldType; } else { - initDefaults(); + initDefaults( input->iterationEncoding ); setIterationEncoding(input->iterationEncoding); } } void -SeriesImpl::initDefaults() +SeriesImpl::initDefaults( IterationEncoding ie ) { if( !containsAttribute("openPMD")) setOpenPMD( getStandard() ); if( !containsAttribute("openPMDextension")) setOpenPMDextension(0); if( !containsAttribute("basePath")) - setAttribute("basePath", std::string(BASEPATH)); + { + if( ie == IterationEncoding::variableBased ) + { + setAttribute( + "basePath", auxiliary::replace_first(BASEPATH, "/%T/", "")); + } + else + { + setAttribute("basePath", std::string(BASEPATH)); + } + } if( !containsAttribute("date")) setDate( auxiliary::getDateString() ); if( !containsAttribute("software")) @@ -494,7 +523,8 @@ SeriesImpl::flush_impl( flushFileBased( begin, end ); break; case IE::groupBased: - flushGroupBased( begin, end ); + case IE::variableBased: + flushGorVBased( begin, end ); break; } auto res = IOHandler()->flush(); @@ -652,7 +682,7 @@ SeriesImpl::flushFileBased( iterations_iterator begin, iterations_iterator end ) } void -SeriesImpl::flushGroupBased( iterations_iterator begin, iterations_iterator end ) +SeriesImpl::flushGorVBased( iterations_iterator begin, iterations_iterator end ) { auto & series = get(); if( IOHandler()->m_frontendAccess == Access::READ_ONLY ) @@ -692,6 +722,7 @@ SeriesImpl::flushGroupBased( iterations_iterator begin, iterations_iterator end { Parameter< Operation::CREATE_FILE > fCreate; fCreate.name = series.m_name; + fCreate.encoding = iterationEncoding(); IOHandler()->enqueue(IOTask(this, fCreate)); } @@ -729,7 +760,19 @@ SeriesImpl::flushGroupBased( iterations_iterator begin, iterations_iterator end { it->second.parent() = getWritable( &series.iterations ); } - it->second.flushGroupBased(it->first); + switch( iterationEncoding() ) + { + using IE = IterationEncoding; + case IE::groupBased: + it->second.flushGroupBased( it->first ); + break; + case IE::variableBased: + it->second.flushVariableBased( it->first ); + break; + default: + throw std::runtime_error( + "[Series] Internal control flow error" ); + } if( *it->second.m_closed == Iteration::CloseStatus::ClosedInFrontend ) { // the iteration has no dedicated file in group-based mode @@ -770,6 +813,7 @@ SeriesImpl::readFileBased( ) auto & series = get(); Parameter< Operation::OPEN_FILE > fOpen; Parameter< Operation::READ_ATT > aRead; + fOpen.encoding = iterationEncoding(); if( !auxiliary::directory_exists(IOHandler()->directory) ) throw no_such_file_error("Supplied directory is not valid: " + IOHandler()->directory); @@ -787,8 +831,11 @@ SeriesImpl::readFileBased( ) if( isContained ) { Iteration & i = series.iterations[ iterationIndex ]; - i.deferParseAccess( - { std::to_string( iterationIndex ), true, entry } ); + i.deferParseAccess( { + std::to_string( iterationIndex ), + iterationIndex, + true, + entry } ); // TODO skip if the padding is exact the number of chars in an iteration? paddings.insert(padding); } @@ -875,6 +922,15 @@ void SeriesImpl::readOneIterationFileBased( std::string const & filePath ) << "time series with fileBased iteration " "encoding. Loaded file is groupBased.\n"; } + else if( encoding == "variableBased" ) + { + // @todo should we throw? test this path + series.m_iterationEncoding = IterationEncoding::variableBased; + std::cerr << "Series constructor called with iteration " + "regex '%T' suggests loading a " + << "time series with fileBased iteration " + "encoding. Loaded file is variableBased.\n"; + } else throw std::runtime_error( "Unknown iterationEncoding: " + encoding ); @@ -911,11 +967,12 @@ void SeriesImpl::readOneIterationFileBased( std::string const & filePath ) } void -SeriesImpl::readGroupBased( bool do_init ) +SeriesImpl::readGorVBased( bool do_init ) { auto & series = get(); Parameter< Operation::OPEN_FILE > fOpen; fOpen.name = series.m_name; + fOpen.encoding = iterationEncoding(); IOHandler()->enqueue(IOTask(this, fOpen)); IOHandler()->flush(); @@ -933,6 +990,8 @@ SeriesImpl::readGroupBased( bool do_init ) std::string encoding = Attribute(*aRead.resource).get< std::string >(); if( encoding == "groupBased" ) series.m_iterationEncoding = IterationEncoding::groupBased; + else if( encoding == "variableBased" ) + series.m_iterationEncoding = IterationEncoding::variableBased; else if( encoding == "fileBased" ) { series.m_iterationEncoding = IterationEncoding::fileBased; @@ -973,27 +1032,30 @@ SeriesImpl::readGroupBased( bool do_init ) IOHandler()->enqueue(IOTask(&series.iterations, pOpen)); readAttributes(); - series.iterations.readAttributes(); - + /* + * __step__ changes over steps, so reread that. + */ + series.iterations.readAttributes( /* reread = */ true ); /* obtain all paths inside the basepath (i.e. all iterations) */ Parameter< Operation::LIST_PATHS > pList; IOHandler()->enqueue(IOTask(&series.iterations, pList)); IOHandler()->flush(); - for( auto const & it : *pList.paths ) + auto readSingleIteration = + [&series, &pOpen, this] + (uint64_t index, std::string path) { - uint64_t index = std::stoull( it ); if( series.iterations.contains( index ) ) { // maybe re-read auto & i = series.iterations.at( index ); if( i.closedByWriter() ) { - continue; + return; } if( *i.m_closed != Iteration::CloseStatus::ParseAccessDeferred ) { - pOpen.path = it; + pOpen.path = path; IOHandler()->enqueue( IOTask( &i, pOpen ) ); i.read(); } @@ -1001,8 +1063,8 @@ SeriesImpl::readGroupBased( bool do_init ) else { // parse for the first time, resp. delay the parsing process - Iteration & i = series.iterations[ std::stoull( it ) ]; - i.deferParseAccess( { it, false, "" } ); + Iteration & i = series.iterations[ index ]; + i.deferParseAccess( { path, index, false, "" } ); if( !series.m_parseLazily ) { i.runDeferredParseAccess(); @@ -1013,6 +1075,33 @@ SeriesImpl::readGroupBased( bool do_init ) *i.m_closed = Iteration::CloseStatus::ParseAccessDeferred; } } + }; + + switch( iterationEncoding() ) + { + case IterationEncoding::groupBased: + /* + * Sic! This happens when a file-based Series is opened in group-based mode. + */ + case IterationEncoding::fileBased: + for( auto const & it : *pList.paths ) + { + uint64_t index = std::stoull( it ); + readSingleIteration( index, it ); + } + break; + case IterationEncoding::variableBased: + { + uint64_t index = 0; + if( series.iterations.containsAttribute( "__step__" ) ) + { + index = series.iterations + .getAttribute( "__step__" ) + .get< uint64_t >(); + } + readSingleIteration( index, "" ); + break; + } } } @@ -1153,7 +1242,8 @@ SeriesImpl::advance( { using IE = IterationEncoding; case IE::groupBased: - flushGroupBased( begin, end ); + case IE::variableBased: + flushGorVBased( begin, end ); break; case IE::fileBased: flushFileBased( begin, end ); @@ -1229,8 +1319,10 @@ SeriesImpl::advance( // not closed on a per-iteration basis // We will treat it as such nonetheless *iteration.m_closed = Iteration::CloseStatus::ClosedInBackend; + break; } - break; + case IE::variableBased: // no action necessary + break; } } @@ -1258,6 +1350,7 @@ SeriesImpl::openIteration( uint64_t index, Iteration iteration ) auto & series = get(); // open the iteration's file again Parameter< Operation::OPEN_FILE > fOpen; + fOpen.encoding = iterationEncoding(); fOpen.name = iterationFilename( index ); IOHandler()->enqueue( IOTask( this, fOpen ) ); @@ -1266,7 +1359,9 @@ SeriesImpl::openIteration( uint64_t index, Iteration iteration ) pOpen.path = auxiliary::replace_first( basePath(), "%T/", "" ); IOHandler()->enqueue( IOTask( &series.iterations, pOpen ) ); /* open iteration path */ - pOpen.path = std::to_string( index ); + pOpen.path = iterationEncoding() == IterationEncoding::variableBased + ? "" + : std::to_string( index ); IOHandler()->enqueue( IOTask( &iteration, pOpen ) ); switch( *iteration.m_closed ) { diff --git a/src/backend/Attributable.cpp b/src/backend/Attributable.cpp index 9ce727711a..e71feb46ce 100644 --- a/src/backend/Attributable.cpp +++ b/src/backend/Attributable.cpp @@ -186,21 +186,31 @@ AttributableImpl::flushAttributes() } void -AttributableImpl::readAttributes() +AttributableImpl::readAttributes( bool reread ) { Parameter< Operation::LIST_ATTS > aList; IOHandler()->enqueue(IOTask(this, aList)); IOHandler()->flush(); std::vector< std::string > written_attributes = attributes(); - /* std::set_difference requires sorted ranges */ - std::sort(aList.attributes->begin(), aList.attributes->end()); - std::sort(written_attributes.begin(), written_attributes.end()); - std::set< std::string > tmpAttributes; - std::set_difference(aList.attributes->begin(), aList.attributes->end(), - written_attributes.begin(), written_attributes.end(), - std::inserter(tmpAttributes, tmpAttributes.begin())); + if( reread ) + { + tmpAttributes = std::set< std::string >( + aList.attributes->begin(), + aList.attributes->end() ); + } + else + { + /* std::set_difference requires sorted ranges */ + std::sort(aList.attributes->begin(), aList.attributes->end()); + std::sort(written_attributes.begin(), written_attributes.end()); + + std::set_difference( + aList.attributes->begin(), aList.attributes->end(), + written_attributes.begin(), written_attributes.end(), + std::inserter(tmpAttributes, tmpAttributes.begin())); + } using DT = Datatype; Parameter< Operation::READ_ATT > aRead; diff --git a/src/binding/python/IterationEncoding.cpp b/src/binding/python/IterationEncoding.cpp index e8254452e7..fc8a57abae 100644 --- a/src/binding/python/IterationEncoding.cpp +++ b/src/binding/python/IterationEncoding.cpp @@ -31,5 +31,6 @@ void init_IterationEncoding(py::module &m) { py::enum_(m, "Iteration_Encoding") .value("file_based", IterationEncoding::fileBased) .value("group_based", IterationEncoding::groupBased) + .value("step_based", IterationEncoding::variableBased) ; } diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 0c08d5371c..efe32d9a23 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -6,6 +6,7 @@ #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/auxiliary/StringManip.hpp" #include "openPMD/openPMD.hpp" #if openPMD_HAVE_ADIOS2 @@ -3454,6 +3455,118 @@ TEST_CASE( "serial_iterator", "[serial][adios2]" ) } } +void +variableBasedSingleIteration( std::string const & file ) +{ + constexpr Extent::value_type extent = 1000; + { + Series writeSeries( file, Access::CREATE ); + writeSeries.setIterationEncoding( IterationEncoding::variableBased ); + auto iterations = writeSeries.writeIterations(); + auto iteration = writeSeries.iterations[ 0 ]; + auto E_x = iteration.meshes[ "E" ][ "x" ]; + E_x.resetDataset( + openPMD::Dataset( openPMD::Datatype::INT, { 1000 } ) ); + std::vector< int > data( 1000, 0 ); + std::iota( data.begin(), data.end(), 0 ); + E_x.storeChunk( data, { 0 }, { 1000 } ); + writeSeries.flush(); + } + + { + Series readSeries( file, Access::READ_ONLY ); + + auto E_x = readSeries.iterations[ 0 ].meshes[ "E" ][ "x" ]; + REQUIRE( E_x.getDimensionality() == 1 ); + REQUIRE( E_x.getExtent()[ 0 ] == extent ); + auto chunk = E_x.loadChunk< int >( { 0 }, { extent } ); + readSeries.flush(); + for( size_t i = 0; i < extent; ++i ) + { + REQUIRE( chunk.get()[ i ] == int( i ) ); + } + } +} + +TEST_CASE( "variableBasedSingleIteration", "[serial][adios2]" ) +{ + for( auto const & t : testedFileExtensions() ) + { + variableBasedSingleIteration( "../samples/variableBasedSingleIteration." + t ); + } +} + +#if openPMD_HAVE_ADIOS2 +void +variableBasedSeries( std::string const & file ) +{ + constexpr Extent::value_type extent = 1000; + { + Series writeSeries( file, Access::CREATE ); + REQUIRE( + writeSeries.iterationEncoding() == IterationEncoding::variableBased ); + if( writeSeries.backend() == "ADIOS1" ) + { + return; + } + auto iterations = writeSeries.writeIterations(); + for( size_t i = 0; i < 10; ++i ) + { + auto iteration = iterations[ i ]; + auto E_x = iteration.meshes[ "E" ][ "x" ]; + E_x.resetDataset( { openPMD::Datatype::INT, { 1000 } } ); + std::vector< int > data( 1000, i ); + E_x.storeChunk( data, { 0 }, { 1000 } ); + + auto E_y = iteration.meshes[ "E" ][ "y" ]; + unsigned dimensionality = i % 3 + 1; + unsigned len = i + 1; + Extent changingExtent( dimensionality, len ); + E_y.resetDataset( { openPMD::Datatype::INT, changingExtent } ); + std::vector< int > changingData( + dimensionality * len, dimensionality ); + E_y.storeChunk( + changingData, Offset( dimensionality, 0 ), changingExtent ); + iteration.close(); + } + } + + REQUIRE( auxiliary::directory_exists( + auxiliary::replace_last( file, "%V", "" ) ) ); + + Series readSeries( + file, Access::READ_ONLY, "{\"defer_iteration_parsing\": true}" ); + + size_t last_iteration_index = 0; + for( auto iteration : readSeries.readIterations() ) + { + auto E_x = iteration.meshes[ "E" ][ "x" ]; + REQUIRE( E_x.getDimensionality() == 1 ); + REQUIRE( E_x.getExtent()[ 0 ] == extent ); + auto chunk = E_x.loadChunk< int >( { 0 }, { extent } ); + iteration.close(); + for( size_t i = 0; i < extent; ++i ) + { + REQUIRE( chunk.get()[ i ] == int( iteration.iterationIndex ) ); + } + + auto E_y = iteration.meshes[ "E" ][ "y" ]; + unsigned dimensionality = iteration.iterationIndex % 3 + 1; + unsigned len = iteration.iterationIndex + 1; + Extent changingExtent( dimensionality, len ); + REQUIRE( E_y.getExtent() == changingExtent ); + last_iteration_index = iteration.iterationIndex; + } + REQUIRE( last_iteration_index == 9 ); +} + +TEST_CASE( "variableBasedSeries", "[serial][adios2]" ) +{ + variableBasedSeries( "../samples/variableBasedSeries%V.bp" ); +} +#endif + +// @todo Upon switching to ADIOS2 2.7.0, test this the other way around also void iterate_nonstreaming_series( std::string const & file ) { From bec97cb06b393856df912d3d5217d45585cb041f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 22 Mar 2021 11:22:54 +0100 Subject: [PATCH 02/11] Add eager parsing test Datasets wth changing dimensions require re-parsing --- test/SerialIOTest.cpp | 73 +++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index efe32d9a23..fb4f8d2011 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -3524,7 +3524,7 @@ variableBasedSeries( std::string const & file ) Extent changingExtent( dimensionality, len ); E_y.resetDataset( { openPMD::Datatype::INT, changingExtent } ); std::vector< int > changingData( - dimensionality * len, dimensionality ); + std::pow( len, dimensionality ), dimensionality ); E_y.storeChunk( changingData, Offset( dimensionality, 0 ), changingExtent ); iteration.close(); @@ -3534,30 +3534,63 @@ variableBasedSeries( std::string const & file ) REQUIRE( auxiliary::directory_exists( auxiliary::replace_last( file, "%V", "" ) ) ); - Series readSeries( - file, Access::READ_ONLY, "{\"defer_iteration_parsing\": true}" ); - - size_t last_iteration_index = 0; - for( auto iteration : readSeries.readIterations() ) { - auto E_x = iteration.meshes[ "E" ][ "x" ]; - REQUIRE( E_x.getDimensionality() == 1 ); - REQUIRE( E_x.getExtent()[ 0 ] == extent ); - auto chunk = E_x.loadChunk< int >( { 0 }, { extent } ); - iteration.close(); - for( size_t i = 0; i < extent; ++i ) + Series readSeries( + file, + Access::READ_ONLY, + "{\"defer_iteration_parsing\": true}" ); + + size_t last_iteration_index = 0; + for( auto iteration : readSeries.readIterations() ) { - REQUIRE( chunk.get()[ i ] == int( iteration.iterationIndex ) ); + auto E_x = iteration.meshes[ "E" ][ "x" ]; + REQUIRE( E_x.getDimensionality() == 1 ); + REQUIRE( E_x.getExtent()[ 0 ] == extent ); + auto chunk = E_x.loadChunk< int >( { 0 }, { extent } ); + iteration.close(); + for( size_t i = 0; i < extent; ++i ) + { + REQUIRE( chunk.get()[ i ] == int( iteration.iterationIndex ) ); + } + + auto E_y = iteration.meshes[ "E" ][ "y" ]; + unsigned dimensionality = iteration.iterationIndex % 3 + 1; + unsigned len = iteration.iterationIndex + 1; + Extent changingExtent( dimensionality, len ); + REQUIRE( E_y.getExtent() == changingExtent ); + last_iteration_index = iteration.iterationIndex; } + REQUIRE( last_iteration_index == 9 ); + } - auto E_y = iteration.meshes[ "E" ][ "y" ]; - unsigned dimensionality = iteration.iterationIndex % 3 + 1; - unsigned len = iteration.iterationIndex + 1; - Extent changingExtent( dimensionality, len ); - REQUIRE( E_y.getExtent() == changingExtent ); - last_iteration_index = iteration.iterationIndex; + { + Series readSeries( + file, + Access::READ_ONLY, + "{\"defer_iteration_parsing\": false}" ); + + size_t last_iteration_index = 0; + for( auto iteration : readSeries.readIterations() ) + { + auto E_x = iteration.meshes[ "E" ][ "x" ]; + REQUIRE( E_x.getDimensionality() == 1 ); + REQUIRE( E_x.getExtent()[ 0 ] == extent ); + auto chunk = E_x.loadChunk< int >( { 0 }, { extent } ); + iteration.close(); + for( size_t i = 0; i < extent; ++i ) + { + REQUIRE( chunk.get()[ i ] == int( iteration.iterationIndex ) ); + } + + auto E_y = iteration.meshes[ "E" ][ "y" ]; + unsigned dimensionality = iteration.iterationIndex % 3 + 1; + unsigned len = iteration.iterationIndex + 1; + Extent changingExtent( dimensionality, len ); + REQUIRE( E_y.getExtent() == changingExtent ); + last_iteration_index = iteration.iterationIndex; + } + REQUIRE( last_iteration_index == 9 ); } - REQUIRE( last_iteration_index == 9 ); } TEST_CASE( "variableBasedSeries", "[serial][adios2]" ) From a2817dc4293bea4ed37b452a5984a8426c85d38a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 22 Mar 2021 11:58:18 +0100 Subject: [PATCH 03/11] Fully re-read an Iteration in stepping mode --- include/openPMD/Iteration.hpp | 2 +- include/openPMD/RecordComponent.hpp | 7 +++---- include/openPMD/backend/PatchRecordComponent.hpp | 1 - src/Iteration.cpp | 9 +++++++-- src/Mesh.cpp | 10 ---------- src/Record.cpp | 9 --------- src/RecordComponent.cpp | 6 ------ src/Series.cpp | 10 +++++----- src/backend/MeshRecordComponent.cpp | 6 ------ src/backend/PatchRecordComponent.cpp | 8 -------- 10 files changed, 16 insertions(+), 52 deletions(-) diff --git a/include/openPMD/Iteration.hpp b/include/openPMD/Iteration.hpp index 4411d89286..9f168c4ca2 100644 --- a/include/openPMD/Iteration.hpp +++ b/include/openPMD/Iteration.hpp @@ -181,7 +181,7 @@ class Iteration : public LegacyAttributable * logic for an iteration. * */ - void read(); + void read( std::string path, bool reread = false ); void readFileBased( std::string filePath, std::string const & groupPath ); void readGroupBased( std::string const & groupPath ); void read_impl( std::string const & groupPath ); diff --git a/include/openPMD/RecordComponent.hpp b/include/openPMD/RecordComponent.hpp index 0790225a8b..7ccebcd3e3 100644 --- a/include/openPMD/RecordComponent.hpp +++ b/include/openPMD/RecordComponent.hpp @@ -286,18 +286,17 @@ class RecordComponent : public BaseRecordComponent bool dirtyRecursive() const; protected: - /** - * Make sure to parse a RecordComponent only once. - */ - std::shared_ptr< bool > hasBeenRead = std::make_shared< bool >( false ); + /** * The same std::string that the parent class would pass as parameter to * RecordComponent::flush(). * This is stored only upon RecordComponent::flush() if * AbstractIOHandler::flushLevel is set to FlushLevel::SkeletonOnly * (for use by the Span-based overload of RecordComponent::storeChunk()). + * @todo Merge functionality with ownKeyInParent? */ std::shared_ptr< std::string > m_name = std::make_shared< std::string >(); + }; // RecordComponent } // namespace openPMD diff --git a/include/openPMD/backend/PatchRecordComponent.hpp b/include/openPMD/backend/PatchRecordComponent.hpp index 2cf89349b0..b0aed3a0d1 100644 --- a/include/openPMD/backend/PatchRecordComponent.hpp +++ b/include/openPMD/backend/PatchRecordComponent.hpp @@ -75,7 +75,6 @@ class PatchRecordComponent : public BaseRecordComponent void read(); std::shared_ptr< std::queue< IOTask > > m_chunks; - std::shared_ptr< bool > hasBeenRead = std::make_shared< bool >( false ); /** * @brief Check recursively whether this RecordComponent is dirty. diff --git a/src/Iteration.cpp b/src/Iteration.cpp index af279ddfe9..1f668985e8 100644 --- a/src/Iteration.cpp +++ b/src/Iteration.cpp @@ -334,8 +334,13 @@ void Iteration::deferParseAccess( DeferredParseAccess dr ) auxiliary::makeOption< DeferredParseAccess >( std::move( dr ) ); } -void Iteration::read() +void Iteration::read( std::string path, bool reread ) { + if( reread ) + { + read_impl( path ); + return; + } if( !m_deferredParseAccess->has_value() ) { return; @@ -679,7 +684,7 @@ void Iteration::runDeferredParseAccess() *newAccess = Access::READ_WRITE; try { - read(); + read( "", false ); } catch( ... ) { diff --git a/src/Mesh.cpp b/src/Mesh.cpp index 9646cf6c8e..2e236f1f0d 100644 --- a/src/Mesh.cpp +++ b/src/Mesh.cpp @@ -343,11 +343,6 @@ Mesh::read() for( auto const& component : *pList.paths ) { MeshRecordComponent& rc = (*this)[component]; - if ( *rc.hasBeenRead ) - { - dirty() = false; - continue; - } pOpen.path = component; IOHandler()->enqueue(IOTask(&rc, pOpen)); *rc.m_isConstant = true; @@ -362,11 +357,6 @@ Mesh::read() for( auto const& component : *dList.datasets ) { MeshRecordComponent & rc = ( *this )[ component ]; - if( *rc.hasBeenRead ) - { - dirty() = false; - continue; - } dOpen.name = component; IOHandler()->enqueue(IOTask(&rc, dOpen)); IOHandler()->flush(); diff --git a/src/Record.cpp b/src/Record.cpp index 7e4078d968..9a11f47272 100644 --- a/src/Record.cpp +++ b/src/Record.cpp @@ -110,11 +110,6 @@ Record::read() for( auto const& component : *pList.paths ) { RecordComponent& rc = (*this)[component]; - if ( *rc.hasBeenRead ) - { - dirty() = false; - continue; - } pOpen.path = component; IOHandler()->enqueue(IOTask(&rc, pOpen)); *rc.m_isConstant = true; @@ -129,10 +124,6 @@ Record::read() for( auto const& component : *dList.datasets ) { RecordComponent & rc = ( *this )[ component ]; - if( *rc.hasBeenRead ) - { - continue; - } dOpen.name = component; IOHandler()->enqueue(IOTask(&rc, dOpen)); IOHandler()->flush(); diff --git a/src/RecordComponent.cpp b/src/RecordComponent.cpp index 2e574e231e..b3348332da 100644 --- a/src/RecordComponent.cpp +++ b/src/RecordComponent.cpp @@ -256,13 +256,7 @@ RecordComponent::flush(std::string const& name) void RecordComponent::read() { - if ( *hasBeenRead ) - { - dirty() = false; - return; - } readBase(); - *hasBeenRead = true; } void diff --git a/src/Series.cpp b/src/Series.cpp index f2a2821227..af48e9efbe 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -1043,13 +1043,13 @@ SeriesImpl::readGorVBased( bool do_init ) auto readSingleIteration = [&series, &pOpen, this] - (uint64_t index, std::string path) + (uint64_t index, std::string path, bool guardClosed ) { if( series.iterations.contains( index ) ) { // maybe re-read auto & i = series.iterations.at( index ); - if( i.closedByWriter() ) + if( guardClosed && i.closedByWriter() ) { return; } @@ -1057,7 +1057,7 @@ SeriesImpl::readGorVBased( bool do_init ) { pOpen.path = path; IOHandler()->enqueue( IOTask( &i, pOpen ) ); - i.read(); + i.read( path, /* reread = */ true ); } } else @@ -1087,7 +1087,7 @@ SeriesImpl::readGorVBased( bool do_init ) for( auto const & it : *pList.paths ) { uint64_t index = std::stoull( it ); - readSingleIteration( index, it ); + readSingleIteration( index, it, true ); } break; case IterationEncoding::variableBased: @@ -1099,7 +1099,7 @@ SeriesImpl::readGorVBased( bool do_init ) .getAttribute( "__step__" ) .get< uint64_t >(); } - readSingleIteration( index, "" ); + readSingleIteration( index, "", false ); break; } } diff --git a/src/backend/MeshRecordComponent.cpp b/src/backend/MeshRecordComponent.cpp index 821fac89e5..0a3a72d597 100644 --- a/src/backend/MeshRecordComponent.cpp +++ b/src/backend/MeshRecordComponent.cpp @@ -32,11 +32,6 @@ MeshRecordComponent::MeshRecordComponent() void MeshRecordComponent::read() { - if ( *hasBeenRead ) - { - dirty() = false; - return; - } using DT = Datatype; Parameter< Operation::READ_ATT > aRead; @@ -60,7 +55,6 @@ MeshRecordComponent::read() throw std::runtime_error( "Unexpected Attribute datatype for 'position'"); readBase(); - *hasBeenRead = true; } template< typename T > diff --git a/src/backend/PatchRecordComponent.cpp b/src/backend/PatchRecordComponent.cpp index d597f395bf..eff8b47f91 100644 --- a/src/backend/PatchRecordComponent.cpp +++ b/src/backend/PatchRecordComponent.cpp @@ -105,12 +105,6 @@ PatchRecordComponent::flush(std::string const& name) void PatchRecordComponent::read() { - if ( *hasBeenRead ) - { - dirty() = false; - return; - } - Parameter< Operation::READ_ATT > aRead; aRead.name = "unitSI"; @@ -122,8 +116,6 @@ PatchRecordComponent::read() throw std::runtime_error("Unexpected Attribute datatype for 'unitSI'"); readAttributes(); // this will set dirty() = false - - *hasBeenRead = true; } bool From 2a0accfc4c9af2c4f992080d43e5b405d4395c12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 7 Apr 2021 15:59:19 +0200 Subject: [PATCH 04/11] Remove %V shorthand to select variable-based layout --- src/Series.cpp | 16 +--------------- test/SerialIOTest.cpp | 6 +++--- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/src/Series.cpp b/src/Series.cpp index af48e9efbe..57a46c02e1 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -389,25 +389,11 @@ SeriesImpl::parseInput(std::string filepath) input->format = determineFormat(input->name); - // first, check for file-based iteration layout std::regex pattern("(.*)%(0[[:digit:]]+)?T(.*)"); std::smatch regexMatch; std::regex_match(input->name, regexMatch, pattern); if( regexMatch.empty() ) - { - // now check for variable-based iteration layout - pattern = "(.*)%V(\\.[^.]+)"; - std::regex_match( input->name, regexMatch, pattern ); - if( regexMatch.empty() ) - { - input->iterationEncoding = IterationEncoding::groupBased; - } - else - { - input->iterationEncoding = IterationEncoding::variableBased; - input->name = regexMatch[1].str() + regexMatch[2].str(); - } - } + input->iterationEncoding = IterationEncoding::groupBased; else if( regexMatch.size() == 4 ) { input->iterationEncoding = IterationEncoding::fileBased; diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index fb4f8d2011..ffd4fb7ea4 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -3503,6 +3503,7 @@ variableBasedSeries( std::string const & file ) constexpr Extent::value_type extent = 1000; { Series writeSeries( file, Access::CREATE ); + writeSeries.setIterationEncoding( IterationEncoding::variableBased ); REQUIRE( writeSeries.iterationEncoding() == IterationEncoding::variableBased ); if( writeSeries.backend() == "ADIOS1" ) @@ -3531,8 +3532,7 @@ variableBasedSeries( std::string const & file ) } } - REQUIRE( auxiliary::directory_exists( - auxiliary::replace_last( file, "%V", "" ) ) ); + REQUIRE( auxiliary::directory_exists( file ) ); { Series readSeries( @@ -3595,7 +3595,7 @@ variableBasedSeries( std::string const & file ) TEST_CASE( "variableBasedSeries", "[serial][adios2]" ) { - variableBasedSeries( "../samples/variableBasedSeries%V.bp" ); + variableBasedSeries( "../samples/variableBasedSeries.bp" ); } #endif From bb9cfd48346b34219b8162e04ca8b158423bfa24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 7 Apr 2021 16:11:51 +0200 Subject: [PATCH 05/11] Hijack some other tests for variable-based encoding --- test/ParallelIOTest.cpp | 10 +++++-- test/SerialIOTest.cpp | 64 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index 4a4e0d9f25..1f2c34cf0d 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -846,7 +846,7 @@ TEST_CASE( "hipace_like_write", "[parallel]" ) #if openPMD_HAVE_ADIOS2 && openPMD_HAVE_MPI void -adios2_streaming() +adios2_streaming( bool variableBasedLayout ) { int size{ -1 }; int rank{ -1 }; @@ -870,6 +870,11 @@ adios2_streaming() // write Series writeSeries( "../samples/adios2_stream.sst", Access::CREATE ); + if( variableBasedLayout ) + { + writeSeries.setIterationEncoding( + IterationEncoding::variableBased ); + } auto iterations = writeSeries.writeIterations(); for( size_t i = 0; i < 10; ++i ) { @@ -940,7 +945,8 @@ adios2_streaming() TEST_CASE( "adios2_streaming", "[pseudoserial][adios2]" ) { - adios2_streaming(); + adios2_streaming( true ); + adios2_streaming( false ); } TEST_CASE( "parallel_adios2_json_config", "[parallel][adios2]" ) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index ffd4fb7ea4..92a9c84ecf 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -3519,6 +3519,8 @@ variableBasedSeries( std::string const & file ) std::vector< int > data( 1000, i ); E_x.storeChunk( data, { 0 }, { 1000 } ); + // this tests changing extents and dimensionalities + // across iterations auto E_y = iteration.meshes[ "E" ][ "y" ]; unsigned dimensionality = i % 3 + 1; unsigned len = i + 1; @@ -3528,6 +3530,12 @@ variableBasedSeries( std::string const & file ) std::pow( len, dimensionality ), dimensionality ); E_y.storeChunk( changingData, Offset( dimensionality, 0 ), changingExtent ); + + // this tests datasets that are present in one iteration, but not + // in others + auto E_z = iteration.meshes[ "E" ][ std::to_string( i ) ]; + E_z.resetDataset( { Datatype::INT, { 1 } } ); + E_z.makeConstant( i ); iteration.close(); } } @@ -3558,6 +3566,24 @@ variableBasedSeries( std::string const & file ) unsigned len = iteration.iterationIndex + 1; Extent changingExtent( dimensionality, len ); REQUIRE( E_y.getExtent() == changingExtent ); + + // this loop ensures that only the recordcomponent ["E"]["i"] is + // present where i == iteration.iterationIndex + for( uint64_t otherIteration = 0; otherIteration < 10; + ++otherIteration ) + { + // component is present <=> (otherIteration == i) + REQUIRE( + iteration.meshes[ "E" ].contains( + std::to_string( otherIteration ) ) == + ( otherIteration == iteration.iterationIndex ) ); + } + REQUIRE( + iteration + .meshes[ "E" ][ std::to_string( iteration.iterationIndex ) ] + .getAttribute( "value" ) + .get< int >() == int( iteration.iterationIndex ) ); + last_iteration_index = iteration.iterationIndex; } REQUIRE( last_iteration_index == 9 ); @@ -3587,6 +3613,24 @@ variableBasedSeries( std::string const & file ) unsigned len = iteration.iterationIndex + 1; Extent changingExtent( dimensionality, len ); REQUIRE( E_y.getExtent() == changingExtent ); + + // this loop ensures that only the recordcomponent ["E"]["i"] is + // present where i == iteration.iterationIndex + for( uint64_t otherIteration = 0; otherIteration < 10; + ++otherIteration ) + { + // component is present <=> (otherIteration == i) + REQUIRE( + iteration.meshes[ "E" ].contains( + std::to_string( otherIteration ) ) == + ( otherIteration == iteration.iterationIndex ) ); + } + REQUIRE( + iteration + .meshes[ "E" ][ std::to_string( iteration.iterationIndex ) ] + .getAttribute( "value" ) + .get< int >() == int( iteration.iterationIndex ) ); + last_iteration_index = iteration.iterationIndex; } REQUIRE( last_iteration_index == 9 ); @@ -3601,11 +3645,21 @@ TEST_CASE( "variableBasedSeries", "[serial][adios2]" ) // @todo Upon switching to ADIOS2 2.7.0, test this the other way around also void -iterate_nonstreaming_series( std::string const & file ) +iterate_nonstreaming_series( + std::string const & file, bool variableBasedLayout ) { constexpr size_t extent = 100; { Series writeSeries( file, Access::CREATE ); + if( variableBasedLayout ) + { + if( writeSeries.backend() != "ADIOS2" ) + { + return; + } + writeSeries.setIterationEncoding( + IterationEncoding::variableBased ); + } // use conventional API to write iterations auto iterations = writeSeries.iterations; for( size_t i = 0; i < 10; ++i ) @@ -3709,9 +3763,11 @@ TEST_CASE( "iterate_nonstreaming_series", "[serial][adios2]" ) for( auto const & t : testedFileExtensions() ) { iterate_nonstreaming_series( - "../samples/iterate_nonstreaming_series_filebased_%T." + t ); + "../samples/iterate_nonstreaming_series_filebased_%T." + t, false ); + iterate_nonstreaming_series( + "../samples/iterate_nonstreaming_series_groupbased." + t, false ); iterate_nonstreaming_series( - "../samples/iterate_nonstreaming_series_groupbased." + t ); + "../samples/iterate_nonstreaming_series_variablebased." + t, true ); } } @@ -3730,6 +3786,8 @@ extendDataset( std::string const & ext ) // dataset resizing unsupported in ADIOS1 return; } + // only one iteration written anyway + write.setIterationEncoding( IterationEncoding::variableBased ); Dataset ds1{ Datatype::INT, { 5, 5 } }; Dataset ds2{ Datatype::INT, { 10, 5 } }; From 7330e33ab9f95d92d25541c0b6cda34f7978d95f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 7 Apr 2021 17:12:12 +0200 Subject: [PATCH 06/11] Refine re-parsing When re-parsing a Series, we must keep old handles valid, but we might need to delete stale map members. --- include/openPMD/backend/Container.hpp | 68 +++++++++++++++++++++++++-- src/Iteration.cpp | 9 ++-- src/Mesh.cpp | 18 +++++-- src/ParticleSpecies.cpp | 12 +++-- src/RecordComponent.cpp | 3 +- 5 files changed, 95 insertions(+), 15 deletions(-) diff --git a/include/openPMD/backend/Container.hpp b/include/openPMD/backend/Container.hpp index db133cc86b..fbef152da9 100644 --- a/include/openPMD/backend/Container.hpp +++ b/include/openPMD/backend/Container.hpp @@ -23,10 +23,12 @@ #include "openPMD/backend/Attributable.hpp" #include -#include -#include #include +#include +#include #include +#include +#include // expose private and protected members for invasive testing #ifndef OPENPMD_protected @@ -106,7 +108,6 @@ class Container : public LegacyAttributable static_assert( std::is_base_of< AttributableImpl, T >::value, "Type of container element must be derived from Writable"); - using InternalContainer = T_container; friend class Iteration; friend class ParticleSpecies; @@ -114,6 +115,9 @@ class Container : public LegacyAttributable friend class SeriesImpl; friend class Series; +protected: + using InternalContainer = T_container; + public: using key_type = typename InternalContainer::key_type; using mapped_type = typename InternalContainer::mapped_type; @@ -321,6 +325,64 @@ class Container : public LegacyAttributable } std::shared_ptr< InternalContainer > m_container; + + class EraseStaleEntries + { + std::set< key_type > m_accessedKeys; + /* + * Note: Putting a copy here leads to weird bugs due to destructors + * being called too eagerly upon destruction. + * Should be avoidable by extending the frontend redesign to the + * Container class template + * (https://github.com/openPMD/openPMD-api/pull/886) + */ + Container & m_originalContainer; + + public: + explicit EraseStaleEntries( Container & container_in ) + : m_originalContainer( container_in ) + { + } + + template< typename K > + mapped_type & operator[]( K && k ) + { + m_accessedKeys.insert( k ); // copy + return m_originalContainer[ std::forward< K >( k ) ]; + } + + template< typename K > + mapped_type & at( K && k ) + { + m_accessedKeys.insert( k ); // copy + return m_originalContainer.at( std::forward< K >( k ) ); + } + + ~EraseStaleEntries() + { + auto & map = *m_originalContainer.m_container; + using iterator_t = typename InternalContainer::const_iterator; + std::vector< iterator_t > deleteMe; + deleteMe.reserve( map.size() - m_accessedKeys.size() ); + for( iterator_t it = map.begin(); it != map.end(); ++it ) + { + auto lookup = m_accessedKeys.find( it->first ); + if( lookup == m_accessedKeys.end() ) + { + deleteMe.push_back( it ); + } + } + for( auto & it : deleteMe ) + { + map.erase( it ); + } + } + }; + + EraseStaleEntries eraseStaleEntries() + { + return EraseStaleEntries( *this ); + } }; } // openPMD diff --git a/src/Iteration.cpp b/src/Iteration.cpp index 1f668985e8..91f329fe74 100644 --- a/src/Iteration.cpp +++ b/src/Iteration.cpp @@ -447,6 +447,8 @@ void Iteration::read_impl( std::string const & groupPath ) meshes.readAttributes(); + auto map = meshes.eraseStaleEntries(); + /* obtain all non-scalar meshes */ IOHandler()->enqueue(IOTask(&meshes, pList)); IOHandler()->flush(); @@ -454,7 +456,7 @@ void Iteration::read_impl( std::string const & groupPath ) Parameter< Operation::LIST_ATTS > aList; for( auto const& mesh_name : *pList.paths ) { - Mesh& m = meshes[mesh_name]; + Mesh& m = map[mesh_name]; pOpen.path = mesh_name; aList.attributes->clear(); IOHandler()->enqueue(IOTask(&m, pOpen)); @@ -484,7 +486,7 @@ void Iteration::read_impl( std::string const & groupPath ) Parameter< Operation::OPEN_DATASET > dOpen; for( auto const& mesh_name : *dList.datasets ) { - Mesh& m = meshes[mesh_name]; + Mesh& m = map[mesh_name]; dOpen.name = mesh_name; IOHandler()->enqueue(IOTask(&m, dOpen)); IOHandler()->flush(); @@ -515,9 +517,10 @@ void Iteration::read_impl( std::string const & groupPath ) IOHandler()->enqueue(IOTask(&particles, pList)); IOHandler()->flush(); + auto map = particles.eraseStaleEntries(); for( auto const& species_name : *pList.paths ) { - ParticleSpecies& p = particles[species_name]; + ParticleSpecies& p = map[species_name]; pOpen.path = species_name; IOHandler()->enqueue(IOTask(&p, pOpen)); IOHandler()->flush(); diff --git a/src/Mesh.cpp b/src/Mesh.cpp index 2e236f1f0d..d758f50ddc 100644 --- a/src/Mesh.cpp +++ b/src/Mesh.cpp @@ -18,10 +18,10 @@ * and the GNU Lesser General Public License along with openPMD-api. * If not, see . */ -#include "openPMD/auxiliary/StringManip.hpp" #include "openPMD/Mesh.hpp" #include "openPMD/Series.hpp" #include "openPMD/auxiliary/DerefDynamicCast.hpp" +#include "openPMD/auxiliary/StringManip.hpp" #include "openPMD/backend/Writable.hpp" #include @@ -247,6 +247,16 @@ Mesh::flush_impl(std::string const& name) void Mesh::read() { + /* + * @todo This is a proof-of-concept on how we need to rework our re-parsing + * procedures. Goals when parsing an iteration again: + * 1. Old handles must survive + * 2. Map entries that cannot be found any more must perish. + * @todo This approach will kill references that users may have to mapped + * components, so improve that. + */ + auto map = eraseStaleEntries(); + using DT = Datatype; Parameter< Operation::READ_ATT > aRead; @@ -332,7 +342,7 @@ Mesh::read() if( scalar() ) { /* using operator[] will incorrectly update parent */ - this->at(MeshRecordComponent::SCALAR).read(); + map.at(MeshRecordComponent::SCALAR).read(); } else { Parameter< Operation::LIST_PATHS > pList; @@ -342,7 +352,7 @@ Mesh::read() Parameter< Operation::OPEN_PATH > pOpen; for( auto const& component : *pList.paths ) { - MeshRecordComponent& rc = (*this)[component]; + MeshRecordComponent& rc = map[ component ]; pOpen.path = component; IOHandler()->enqueue(IOTask(&rc, pOpen)); *rc.m_isConstant = true; @@ -356,7 +366,7 @@ Mesh::read() Parameter< Operation::OPEN_DATASET > dOpen; for( auto const& component : *dList.datasets ) { - MeshRecordComponent & rc = ( *this )[ component ]; + MeshRecordComponent & rc = map[ component ]; dOpen.name = component; IOHandler()->enqueue(IOTask(&rc, dOpen)); IOHandler()->flush(); diff --git a/src/ParticleSpecies.cpp b/src/ParticleSpecies.cpp index cdab14cb96..3ecd0a011a 100644 --- a/src/ParticleSpecies.cpp +++ b/src/ParticleSpecies.cpp @@ -42,6 +42,8 @@ ParticleSpecies::read() IOHandler()->enqueue(IOTask(this, pList)); IOHandler()->flush(); + auto map = eraseStaleEntries(); + Parameter< Operation::OPEN_PATH > pOpen; Parameter< Operation::LIST_ATTS > aList; bool hasParticlePatches = false; @@ -55,7 +57,7 @@ ParticleSpecies::read() particlePatches.read(); } else { - Record& r = (*this)[record_name]; + Record& r = map[record_name]; pOpen.path = record_name; aList.attributes->clear(); IOHandler()->enqueue(IOTask(&r, pOpen)); @@ -68,7 +70,8 @@ ParticleSpecies::read() auto shape = std::find(att_begin, att_end, "shape"); if( value != att_end && shape != att_end ) { - RecordComponent& rc = r[RecordComponent::SCALAR]; + auto scalarMap = r.eraseStaleEntries(); + RecordComponent& rc = scalarMap[RecordComponent::SCALAR]; rc.parent() = r.parent(); IOHandler()->enqueue(IOTask(&rc, pOpen)); IOHandler()->flush(); @@ -94,11 +97,12 @@ ParticleSpecies::read() for( auto const& record_name : *dList.datasets ) { try { - Record& r = (*this)[record_name]; + Record& r = map[record_name]; dOpen.name = record_name; IOHandler()->enqueue(IOTask(&r, dOpen)); IOHandler()->flush(); - RecordComponent& rc = r[RecordComponent::SCALAR]; + auto scalarMap = r.eraseStaleEntries(); + RecordComponent& rc = scalarMap[RecordComponent::SCALAR]; rc.parent() = r.parent(); IOHandler()->enqueue(IOTask(&rc, dOpen)); IOHandler()->flush(); diff --git a/src/RecordComponent.cpp b/src/RecordComponent.cpp index b3348332da..64d0b81f0f 100644 --- a/src/RecordComponent.cpp +++ b/src/RecordComponent.cpp @@ -33,7 +33,8 @@ namespace openPMD { -// we must instantiate this somewhere even if it is constexpr +// We need to instantiate this somewhere otherwise there might be linker issues +// despite this thing actually being constepxr constexpr char const * const RecordComponent::SCALAR; RecordComponent::RecordComponent() From 1f9369a390c1fa0d66322872d031c0b1dd25aa6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 8 Apr 2021 12:01:45 +0200 Subject: [PATCH 07/11] Test that attributes don't occur in the wrong step Test still failing --- test/SerialIOTest.cpp | 68 ++++++++++++------------------------------- 1 file changed, 18 insertions(+), 50 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 92a9c84ecf..87c161fe43 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -3536,17 +3536,20 @@ variableBasedSeries( std::string const & file ) auto E_z = iteration.meshes[ "E" ][ std::to_string( i ) ]; E_z.resetDataset( { Datatype::INT, { 1 } } ); E_z.makeConstant( i ); + // this tests attributes that are present in one iteration, but not + // in others + iteration.meshes[ "E" ].setAttribute( + "attr_" + std::to_string( i ), i ); + iteration.close(); } } REQUIRE( auxiliary::directory_exists( file ) ); + auto testRead = [ &file, &extent ]( std::string const & jsonConfig ) { - Series readSeries( - file, - Access::READ_ONLY, - "{\"defer_iteration_parsing\": true}" ); + Series readSeries( file, Access::READ_ONLY, jsonConfig ); size_t last_iteration_index = 0; for( auto iteration : readSeries.readIterations() ) @@ -3577,52 +3580,9 @@ variableBasedSeries( std::string const & file ) iteration.meshes[ "E" ].contains( std::to_string( otherIteration ) ) == ( otherIteration == iteration.iterationIndex ) ); - } - REQUIRE( - iteration - .meshes[ "E" ][ std::to_string( iteration.iterationIndex ) ] - .getAttribute( "value" ) - .get< int >() == int( iteration.iterationIndex ) ); - - last_iteration_index = iteration.iterationIndex; - } - REQUIRE( last_iteration_index == 9 ); - } - - { - Series readSeries( - file, - Access::READ_ONLY, - "{\"defer_iteration_parsing\": false}" ); - - size_t last_iteration_index = 0; - for( auto iteration : readSeries.readIterations() ) - { - auto E_x = iteration.meshes[ "E" ][ "x" ]; - REQUIRE( E_x.getDimensionality() == 1 ); - REQUIRE( E_x.getExtent()[ 0 ] == extent ); - auto chunk = E_x.loadChunk< int >( { 0 }, { extent } ); - iteration.close(); - for( size_t i = 0; i < extent; ++i ) - { - REQUIRE( chunk.get()[ i ] == int( iteration.iterationIndex ) ); - } - - auto E_y = iteration.meshes[ "E" ][ "y" ]; - unsigned dimensionality = iteration.iterationIndex % 3 + 1; - unsigned len = iteration.iterationIndex + 1; - Extent changingExtent( dimensionality, len ); - REQUIRE( E_y.getExtent() == changingExtent ); - - // this loop ensures that only the recordcomponent ["E"]["i"] is - // present where i == iteration.iterationIndex - for( uint64_t otherIteration = 0; otherIteration < 10; - ++otherIteration ) - { - // component is present <=> (otherIteration == i) REQUIRE( - iteration.meshes[ "E" ].contains( - std::to_string( otherIteration ) ) == + iteration.meshes[ "E" ].containsAttribute( + "attr_" + std::to_string( otherIteration ) ) == ( otherIteration == iteration.iterationIndex ) ); } REQUIRE( @@ -3630,11 +3590,19 @@ variableBasedSeries( std::string const & file ) .meshes[ "E" ][ std::to_string( iteration.iterationIndex ) ] .getAttribute( "value" ) .get< int >() == int( iteration.iterationIndex ) ); + REQUIRE( + iteration.meshes[ "E" ] + .getAttribute( + "attr_" + std::to_string( iteration.iterationIndex ) ) + .get< int >() == int( iteration.iterationIndex ) ); last_iteration_index = iteration.iterationIndex; } REQUIRE( last_iteration_index == 9 ); - } + }; + + testRead( "{\"defer_iteration_parsing\": true}" ); + testRead( "{\"defer_iteration_parsing\": false}" ); } TEST_CASE( "variableBasedSeries", "[serial][adios2]" ) From 1b322d39a9e1c3ed047b4650ad1a0e3a9da817d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 8 Apr 2021 12:47:42 +0200 Subject: [PATCH 08/11] Specifiy more precisely when to re-read attributes --- include/openPMD/Series.hpp | 3 --- include/openPMD/backend/Attributable.hpp | 19 +++++++++++++- src/Iteration.cpp | 6 ++--- src/Mesh.cpp | 2 +- src/ParticleSpecies.cpp | 2 +- src/Record.cpp | 2 +- src/RecordComponent.cpp | 2 +- src/Series.cpp | 8 +++--- src/backend/Attributable.cpp | 33 +++++++++++++++--------- src/backend/PatchRecordComponent.cpp | 2 +- 10 files changed, 51 insertions(+), 28 deletions(-) diff --git a/include/openPMD/Series.hpp b/include/openPMD/Series.hpp index c9743d029f..d8c908fbbb 100644 --- a/include/openPMD/Series.hpp +++ b/include/openPMD/Series.hpp @@ -364,9 +364,6 @@ class SeriesImpl : public AttributableImpl * Note on re-parsing of a Series: * If init == false, the parsing process will seek for new * Iterations/Records/Record Components etc. - * Re-parsing of objects that have already been parsed is not implemented - * as of yet. Such a facility will be required upon implementing things such - * as resizable datasets. */ void readGorVBased( bool init = true ); void readBase(); diff --git a/include/openPMD/backend/Attributable.hpp b/include/openPMD/backend/Attributable.hpp index ce3f08ec75..8cf9cfd59a 100644 --- a/include/openPMD/backend/Attributable.hpp +++ b/include/openPMD/backend/Attributable.hpp @@ -215,7 +215,24 @@ class AttributableImpl std::vector< std::string > myPath() const; void flushAttributes(); - void readAttributes( bool reread = false ); + enum ReadMode { + /** + * Don't read an attribute from the backend if it has been previously + * read. + */ + IgnoreExisting, + /** + * Read all the attributes that the backend has to offer and override + * if it has been read previously. + */ + OverrideExisting, + /** + * Remove all attributes that have been read previously and read + * everything that the backend currently has to offer. + */ + FullyReread + }; + void readAttributes( ReadMode ); /** Retrieve the value of a floating point Attribute of user-defined precision with ensured type-safety. * diff --git a/src/Iteration.cpp b/src/Iteration.cpp index 91f329fe74..771f8adc8f 100644 --- a/src/Iteration.cpp +++ b/src/Iteration.cpp @@ -445,7 +445,7 @@ void Iteration::read_impl( std::string const & groupPath ) pOpen.path = s->meshesPath(); IOHandler()->enqueue(IOTask(&meshes, pOpen)); - meshes.readAttributes(); + meshes.readAttributes( ReadMode::FullyReread ); auto map = meshes.eraseStaleEntries(); @@ -510,7 +510,7 @@ void Iteration::read_impl( std::string const & groupPath ) pOpen.path = s->particlesPath(); IOHandler()->enqueue(IOTask(&particles, pOpen)); - particles.readAttributes(); + particles.readAttributes( ReadMode::FullyReread ); /* obtain all particle species */ pList.paths->clear(); @@ -532,7 +532,7 @@ void Iteration::read_impl( std::string const & groupPath ) particles.dirty() = false; } - readAttributes(); + readAttributes( ReadMode::FullyReread ); } AdvanceStatus diff --git a/src/Mesh.cpp b/src/Mesh.cpp index d758f50ddc..914428677c 100644 --- a/src/Mesh.cpp +++ b/src/Mesh.cpp @@ -379,7 +379,7 @@ Mesh::read() readBase(); - readAttributes(); + readAttributes( ReadMode::FullyReread ); } } // openPMD diff --git a/src/ParticleSpecies.cpp b/src/ParticleSpecies.cpp index 3ecd0a011a..55e50df5fe 100644 --- a/src/ParticleSpecies.cpp +++ b/src/ParticleSpecies.cpp @@ -123,7 +123,7 @@ ParticleSpecies::read() } } - readAttributes(); + readAttributes( ReadMode::FullyReread ); } namespace diff --git a/src/Record.cpp b/src/Record.cpp index 9a11f47272..989eea975b 100644 --- a/src/Record.cpp +++ b/src/Record.cpp @@ -136,7 +136,7 @@ Record::read() readBase(); - readAttributes(); + readAttributes( ReadMode::FullyReread ); } template <> diff --git a/src/RecordComponent.cpp b/src/RecordComponent.cpp index 64d0b81f0f..0bcbab8dea 100644 --- a/src/RecordComponent.cpp +++ b/src/RecordComponent.cpp @@ -370,7 +370,7 @@ RecordComponent::readBase() else throw std::runtime_error("Unexpected Attribute datatype for 'unitSI'"); - readAttributes(); + readAttributes( ReadMode::FullyReread ); } bool diff --git a/src/Series.cpp b/src/Series.cpp index 57a46c02e1..8891851bc7 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -948,8 +948,8 @@ void SeriesImpl::readOneIterationFileBased( std::string const & filePath ) throw std::runtime_error("Unknown openPMD version - " + version); IOHandler()->enqueue(IOTask(&series.iterations, pOpen)); - readAttributes(); - series.iterations.readAttributes(); + readAttributes( ReadMode::IgnoreExisting ); + series.iterations.readAttributes(ReadMode::OverrideExisting ); } void @@ -1017,11 +1017,11 @@ SeriesImpl::readGorVBased( bool do_init ) throw std::runtime_error("Unknown openPMD version - " + version); IOHandler()->enqueue(IOTask(&series.iterations, pOpen)); - readAttributes(); + readAttributes( ReadMode::IgnoreExisting ); /* * __step__ changes over steps, so reread that. */ - series.iterations.readAttributes( /* reread = */ true ); + series.iterations.readAttributes( ReadMode::OverrideExisting ); /* obtain all paths inside the basepath (i.e. all iterations) */ Parameter< Operation::LIST_PATHS > pList; IOHandler()->enqueue(IOTask(&series.iterations, pList)); diff --git a/src/backend/Attributable.cpp b/src/backend/Attributable.cpp index e71feb46ce..cad2033e85 100644 --- a/src/backend/Attributable.cpp +++ b/src/backend/Attributable.cpp @@ -186,30 +186,39 @@ AttributableImpl::flushAttributes() } void -AttributableImpl::readAttributes( bool reread ) +AttributableImpl::readAttributes( ReadMode mode ) { + auto & attri = get(); Parameter< Operation::LIST_ATTS > aList; IOHandler()->enqueue(IOTask(this, aList)); IOHandler()->flush(); std::vector< std::string > written_attributes = attributes(); + /* std::set_difference requires sorted ranges */ + std::sort(aList.attributes->begin(), aList.attributes->end()); + std::sort(written_attributes.begin(), written_attributes.end()); + std::set< std::string > tmpAttributes; - if( reread ) - { - tmpAttributes = std::set< std::string >( - aList.attributes->begin(), - aList.attributes->end() ); - } - else + switch( mode ) { - /* std::set_difference requires sorted ranges */ - std::sort(aList.attributes->begin(), aList.attributes->end()); - std::sort(written_attributes.begin(), written_attributes.end()); - + case ReadMode::IgnoreExisting: + // reread: aList - written_attributes std::set_difference( aList.attributes->begin(), aList.attributes->end(), written_attributes.begin(), written_attributes.end(), std::inserter(tmpAttributes, tmpAttributes.begin())); + break; + case ReadMode::OverrideExisting: + tmpAttributes = std::set< std::string >( + aList.attributes->begin(), + aList.attributes->end() ); + break; + case ReadMode::FullyReread: + attri.m_attributes.clear(); + tmpAttributes = std::set< std::string >( + aList.attributes->begin(), + aList.attributes->end() ); + break; } using DT = Datatype; diff --git a/src/backend/PatchRecordComponent.cpp b/src/backend/PatchRecordComponent.cpp index eff8b47f91..aa97f6327d 100644 --- a/src/backend/PatchRecordComponent.cpp +++ b/src/backend/PatchRecordComponent.cpp @@ -115,7 +115,7 @@ PatchRecordComponent::read() else throw std::runtime_error("Unexpected Attribute datatype for 'unitSI'"); - readAttributes(); // this will set dirty() = false + readAttributes( ReadMode::FullyReread ); // this will set dirty() = false } bool From 1636c31945f0f48cdedf777f366eaa4abcde48f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 8 Apr 2021 14:53:32 +0200 Subject: [PATCH 09/11] Fix ADIOS1 bug: Wrong datatype reported for unitDimension --- src/backend/Attributable.cpp | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/backend/Attributable.cpp b/src/backend/Attributable.cpp index cad2033e85..186ac5fee7 100644 --- a/src/backend/Attributable.cpp +++ b/src/backend/Attributable.cpp @@ -241,6 +241,29 @@ AttributableImpl::readAttributes( ReadMode mode ) continue; } Attribute a(*aRead.resource); + + auto guardUnitDimension = + [ this ]( std::string const & key, auto vector ) + { + if( key == "unitDimension" ) + { + // Some backends may report the wrong type when reading + if( vector.size() != 7 ) + { + throw std::runtime_error( + "[Attributable] " + "Unexpected datatype for unitDimension." ); + } + std::array< double, 7 > arr; + std::copy_n( vector.begin(), 7, arr.begin() ); + setAttribute( key, std::move( arr ) ); + } + else + { + setAttribute( key, std::move( vector ) ); + } + }; + switch( *aRead.dtype ) { case DT::CHAR: @@ -325,13 +348,13 @@ AttributableImpl::readAttributes( ReadMode mode ) setAttribute(att, a.get< std::vector< unsigned long long > >()); break; case DT::VEC_FLOAT: - setAttribute(att, a.get< std::vector< float > >()); + guardUnitDimension( att, a.get< std::vector< float > >() ); break; case DT::VEC_DOUBLE: - setAttribute(att, a.get< std::vector< double > >()); + guardUnitDimension( att, a.get< std::vector< double > >() ); break; case DT::VEC_LONG_DOUBLE: - setAttribute(att, a.get< std::vector< long double > >()); + guardUnitDimension( att, a.get< std::vector< long double > >() ); break; case DT::VEC_CFLOAT: setAttribute(att, a.get< std::vector< std::complex< float > > >()); From c12d8fc9a30076eac8c3930c95c78d09af7fbb69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 8 Apr 2021 19:13:29 +0200 Subject: [PATCH 10/11] Rename __step__ -> snapshot --- src/Iteration.cpp | 2 +- src/Series.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Iteration.cpp b/src/Iteration.cpp index 771f8adc8f..868db95f19 100644 --- a/src/Iteration.cpp +++ b/src/Iteration.cpp @@ -271,7 +271,7 @@ Iteration::flushVariableBased( uint64_t i ) Parameter< Operation::OPEN_PATH > pOpen; pOpen.path = ""; IOHandler()->enqueue( IOTask( this, pOpen ) ); - this->setAttribute( "__step__", i ); + this->setAttribute( "snapshot", i ); } flush(); diff --git a/src/Series.cpp b/src/Series.cpp index 8891851bc7..35ec5f66dc 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -1019,7 +1019,7 @@ SeriesImpl::readGorVBased( bool do_init ) readAttributes( ReadMode::IgnoreExisting ); /* - * __step__ changes over steps, so reread that. + * 'snapshot' changes over steps, so reread that. */ series.iterations.readAttributes( ReadMode::OverrideExisting ); /* obtain all paths inside the basepath (i.e. all iterations) */ @@ -1079,10 +1079,10 @@ SeriesImpl::readGorVBased( bool do_init ) case IterationEncoding::variableBased: { uint64_t index = 0; - if( series.iterations.containsAttribute( "__step__" ) ) + if( series.iterations.containsAttribute( "snapshot" ) ) { index = series.iterations - .getAttribute( "__step__" ) + .getAttribute( "snapshot" ) .get< uint64_t >(); } readSingleIteration( index, "", false ); From db5f0a7f10bc746dca30723dd8d07b726a2242a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 9 Apr 2021 11:09:55 +0200 Subject: [PATCH 11/11] Code cleanup and in-code documentation --- include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp | 6 ++++- include/openPMD/IO/IOTask.hpp | 5 ++++ include/openPMD/Iteration.hpp | 24 ++++++++++++++++++-- include/openPMD/Series.hpp | 3 +++ include/openPMD/backend/Container.hpp | 8 +++++++ src/Iteration.cpp | 24 ++++++++++++-------- src/Mesh.cpp | 8 ------- src/Series.cpp | 17 ++++++++------ 8 files changed, 68 insertions(+), 27 deletions(-) diff --git a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp index 0a85fb15c6..f3256eb7e9 100644 --- a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp +++ b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp @@ -225,6 +225,10 @@ class ADIOS2IOHandlerImpl private: adios2::ADIOS m_ADIOS; + /* + * If the iteration encoding is variableBased, we default to using the + * 2021_02_09 schema since it allows mutable attributes. + */ IterationEncoding m_iterationEncoding = IterationEncoding::groupBased; /** * The ADIOS2 engine type, to be passed to adios2::IO::SetEngine @@ -1369,7 +1373,7 @@ class ADIOS2IOHandler : public AbstractIOHandler { #if openPMD_HAVE_ADIOS2 - friend class ADIOS2IOHandlerImpl; +friend class ADIOS2IOHandlerImpl; private: ADIOS2IOHandlerImpl m_impl; diff --git a/include/openPMD/IO/IOTask.hpp b/include/openPMD/IO/IOTask.hpp index 4f49222a8e..269639a20b 100644 --- a/include/openPMD/IO/IOTask.hpp +++ b/include/openPMD/IO/IOTask.hpp @@ -138,6 +138,11 @@ struct OPENPMDAPI_EXPORT Parameter< Operation::OPEN_FILE > : public AbstractPara } std::string name = ""; + /* + * The backends might need to ensure availability of certain features + * for some iteration encodings, e.g. availability of ADIOS steps for + * variableBased encoding. + */ IterationEncoding encoding = IterationEncoding::groupBased; }; diff --git a/include/openPMD/Iteration.hpp b/include/openPMD/Iteration.hpp index 9f168c4ca2..e950ecd770 100644 --- a/include/openPMD/Iteration.hpp +++ b/include/openPMD/Iteration.hpp @@ -156,9 +156,25 @@ class Iteration : public LegacyAttributable struct DeferredParseAccess { + /** + * The group path within /data containing this iteration. + * Example: "1" for iteration 1, "" in variable-based iteration + * encoding. + */ std::string path; + /** + * The iteration index as accessed by the user in series.iterations[i] + */ uint64_t iteration = 0; + /** + * If this iteration is part of a Series with file-based layout. + * (Group- and variable-based parsing shares the same code logic.) + */ bool fileBased = false; + /** + * If fileBased == true, the file name (without file path) of the file + * containing this iteration. + */ std::string filename; }; @@ -179,11 +195,15 @@ class Iteration : public LegacyAttributable * allow for those different control flows. * Finally, read_impl() is called which contains the common parsing * logic for an iteration. + * + * reread() reads again an Iteration that has been previously read. + * Calling it on an Iteration not yet parsed is an error. * */ - void read( std::string path, bool reread = false ); + void read(); + void reread( std::string const & path ); void readFileBased( std::string filePath, std::string const & groupPath ); - void readGroupBased( std::string const & groupPath ); + void readGorVBased( std::string const & groupPath ); void read_impl( std::string const & groupPath ); /** diff --git a/include/openPMD/Series.hpp b/include/openPMD/Series.hpp index d8c908fbbb..84d7237f57 100644 --- a/include/openPMD/Series.hpp +++ b/include/openPMD/Series.hpp @@ -260,6 +260,9 @@ class SeriesImpl : public AttributableImpl */ IterationEncoding iterationEncoding() const; /** Set the encoding style for multiple iterations in this series. + * A preview on the openPMD 2.0 variable-based iteration encoding can be activated with this call. + * Making full use of the variable-based iteration encoding requires (1) explicit support by the backend (available only in ADIOS2) and (2) use of the openPMD streaming API. + * In other backends and without the streaming API, only one iteration/snapshot may be written in the variable-based encoding, making this encoding a good choice for single-snapshot data dumps. * * @param iterationEncoding Desired encoding style for multiple iterations in this series. * @return Reference to modified series. diff --git a/include/openPMD/backend/Container.hpp b/include/openPMD/backend/Container.hpp index fbef152da9..b117b881f3 100644 --- a/include/openPMD/backend/Container.hpp +++ b/include/openPMD/backend/Container.hpp @@ -326,6 +326,14 @@ class Container : public LegacyAttributable std::shared_ptr< InternalContainer > m_container; + /** + * This class wraps a Container and forwards operator[]() and at() to it. + * It remembers the keys used for accessing. Upon going out of scope, all + * keys not yet accessed are removed from the Container. + * Note that the container is stored by non-owning reference, thus + * requiring that the original Container stay in scope while using this + * class. + */ class EraseStaleEntries { std::set< key_type > m_accessedKeys; diff --git a/src/Iteration.cpp b/src/Iteration.cpp index 868db95f19..de6f7c8e63 100644 --- a/src/Iteration.cpp +++ b/src/Iteration.cpp @@ -334,13 +334,8 @@ void Iteration::deferParseAccess( DeferredParseAccess dr ) auxiliary::makeOption< DeferredParseAccess >( std::move( dr ) ); } -void Iteration::read( std::string path, bool reread ) +void Iteration::read() { - if( reread ) - { - read_impl( path ); - return; - } if( !m_deferredParseAccess->has_value() ) { return; @@ -352,12 +347,23 @@ void Iteration::read( std::string path, bool reread ) } else { - readGroupBased( deferred.path ); + readGorVBased( deferred.path ); } // reset this thing *m_deferredParseAccess = auxiliary::Option< DeferredParseAccess >(); } +void Iteration::reread( std::string const & path ) +{ + if( m_deferredParseAccess->has_value() ) + { + throw std::runtime_error( + "[Iteration] Internal control flow error: Trying to reread an " + "iteration that has not yet been read for its first time." ); + } + read_impl( path ); +} + void Iteration::readFileBased( std::string filePath, std::string const & groupPath ) { @@ -368,7 +374,7 @@ void Iteration::readFileBased( read_impl( groupPath ); } -void Iteration::readGroupBased( std::string const & groupPath ) +void Iteration::readGorVBased( std::string const & groupPath ) { read_impl(groupPath ); @@ -687,7 +693,7 @@ void Iteration::runDeferredParseAccess() *newAccess = Access::READ_WRITE; try { - read( "", false ); + read(); } catch( ... ) { diff --git a/src/Mesh.cpp b/src/Mesh.cpp index 914428677c..405c43d0a8 100644 --- a/src/Mesh.cpp +++ b/src/Mesh.cpp @@ -247,14 +247,6 @@ Mesh::flush_impl(std::string const& name) void Mesh::read() { - /* - * @todo This is a proof-of-concept on how we need to rework our re-parsing - * procedures. Goals when parsing an iteration again: - * 1. Old handles must survive - * 2. Map entries that cannot be found any more must perish. - * @todo This approach will kill references that users may have to mapped - * components, so improve that. - */ auto map = eraseStaleEntries(); using DT = Datatype; diff --git a/src/Series.cpp b/src/Series.cpp index 35ec5f66dc..20af7882da 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -910,12 +910,15 @@ void SeriesImpl::readOneIterationFileBased( std::string const & filePath ) } else if( encoding == "variableBased" ) { - // @todo should we throw? test this path - series.m_iterationEncoding = IterationEncoding::variableBased; - std::cerr << "Series constructor called with iteration " - "regex '%T' suggests loading a " - << "time series with fileBased iteration " - "encoding. Loaded file is variableBased.\n"; + /* + * Unlike if the file were group-based, this one doesn't work + * at all since the group paths are different. + */ + throw std::runtime_error( + "Series constructor called with iteration " + "regex '%T' suggests loading a " + "time series with fileBased iteration " + "encoding. Loaded file is variableBased." ); } else throw std::runtime_error( @@ -1043,7 +1046,7 @@ SeriesImpl::readGorVBased( bool do_init ) { pOpen.path = path; IOHandler()->enqueue( IOTask( &i, pOpen ) ); - i.read( path, /* reread = */ true ); + i.reread( path ); } } else