From 7fc36763f520e30333ae0807357ee2d66008084b Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 7 Apr 2021 20:01:35 -0700 Subject: [PATCH 1/2] HDF5 Dask DataFrame issue: File ID When removing those lines - and only with HDF5 - I get the following error on `electrons.to_dask()`: ``` openpmd_api/DaskDataFrame.py in particles_to_daskdataframe(particle_species) 65 for k_rc, rc in r.items(): 66 if not rc.constant: ---> 67 chunks = rc.available_chunks() 68 break 69 if chunks: RuntimeError: [HDF5] File ID not found with file name ``` Seen with the 3D openPMD HDF5 example data set, at iteration 400. Printing the records and components in the loop shows: - charge Scalar (skipped because constant) - mass Scalar (skipped because constant) - momentum x (calls available_chunks) --- examples/11_particle_dataframe.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/11_particle_dataframe.py b/examples/11_particle_dataframe.py index a422bb71a9..124cbeebe0 100755 --- a/examples/11_particle_dataframe.py +++ b/examples/11_particle_dataframe.py @@ -27,13 +27,13 @@ electrons = s.iterations[400].particles["electrons"] # all particles -df = electrons.to_df() -print(type(df) is pd.DataFrame) -print(df) +#df = electrons.to_df() +#print(type(df) is pd.DataFrame) +#print(df) # only first 100 particles -df = electrons.to_df(np.s_[:100]) -print(df) +#df = electrons.to_df(np.s_[:100]) +#print(df) # Particles From 37e08fbfb46c9deb75e18dcac950d908ebfe3b2b Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 7 Apr 2021 20:21:10 -0700 Subject: [PATCH 2/2] Simpler Example --- examples/2_read_serial.cpp | 71 ++++++-------------------------------- 1 file changed, 10 insertions(+), 61 deletions(-) diff --git a/examples/2_read_serial.cpp b/examples/2_read_serial.cpp index 1bea5629a3..d67f62c606 100644 --- a/examples/2_read_serial.cpp +++ b/examples/2_read_serial.cpp @@ -21,8 +21,6 @@ #include #include -#include -#include using std::cout; @@ -30,72 +28,23 @@ using namespace openPMD; int main() { - Series series = Series( + Series s = Series( "../samples/git-sample/data%T.h5", Access::READ_ONLY ); - cout << "Read a Series with openPMD standard version " - << series.openPMD() << '\n'; - cout << "The Series contains " << series.iterations.size() << " iterations:"; - for( auto const& i : series.iterations ) - cout << "\n\t" << i.first; - cout << '\n'; + auto electrons = s.iterations[400].particles["electrons"]; - Iteration i = series.iterations[100]; - cout << "Iteration 100 contains " << i.meshes.size() << " meshes:"; - for( auto const& m : i.meshes ) - cout << "\n\t" << m.first; - cout << '\n'; - cout << "Iteration 100 contains " << i.particles.size() << " particle species:"; - for( auto const& ps : i.particles ) { - cout << "\n\t" << ps.first; - for( auto const& r : ps.second ) { - cout << "\n\t" << r.first; - cout << '\n'; - } - } - - openPMD::ParticleSpecies electrons = i.particles["electrons"]; - std::shared_ptr charge = electrons["charge"][openPMD::RecordComponent::SCALAR].loadChunk(); - series.flush(); - cout << "And the first electron particle has a charge = " << charge.get()[0]; - cout << '\n'; - - MeshRecordComponent E_x = i.meshes["E"]["x"]; - Extent extent = E_x.getExtent(); - cout << "Field E/x has shape ("; - for( auto const& dim : extent ) - cout << dim << ','; - cout << ") and has datatype " << E_x.getDatatype() << '\n'; - - Offset chunk_offset = {1, 1, 1}; - Extent chunk_extent = {2, 2, 1}; - auto chunk_data = E_x.loadChunk(chunk_offset, chunk_extent); - cout << "Queued the loading of a single chunk from disk, " - "ready to execute\n"; - series.flush(); - cout << "Chunk has been read from disk\n" - << "Read chunk contains:\n"; - for( size_t row = 0; row < chunk_extent[0]; ++row ) + for( auto & r : electrons ) { - for( size_t col = 0; col < chunk_extent[1]; ++col ) - cout << "\t" - << '(' << row + chunk_offset[0] << '|' << col + chunk_offset[1] << '|' << 1 << ")\t" - << chunk_data.get()[row*chunk_extent[1]+col]; - cout << '\n'; + std::cout << r.first << ": "; + for( auto & r_c : r.second ) + { + std::cout << r_c.first << "\n"; + if( !r_c.second.constant() ) + auto chunks = r_c.second.availableChunks(); + } } - auto all_data = E_x.loadChunk(); - series.flush(); - cout << "Full E/x starts with:\n\t{"; - for( size_t col = 0; col < extent[1] && col < 5; ++col ) - cout << all_data.get()[col] << ", "; - cout << "...}\n"; - - /* The files in 'series' are still open until the object is destroyed, on - * which it cleanly flushes and closes all open file handles. - * When running out of scope on return, the 'Series' destructor is called. - */ return 0; }