From 40acbbf378de2662be124470be66d18f042e41c3 Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Mon, 13 Nov 2023 16:10:00 +0100 Subject: [PATCH 1/6] Implement `Hdf5Reader` API and default. This commit introduces the API for an Hdf5Reader. This reader abstracts the process of opening HDF5 files, and reading an `libsonata.Selection` from a dataset. The default reader calls the existing `_readSelection`. --- CMakeLists.txt | 1 + include/bbp/sonata/config.h | 3 +- include/bbp/sonata/edges.h | 7 ++ include/bbp/sonata/hdf5_reader.h | 197 ++++++++++++++++++++++++++++++ include/bbp/sonata/nodes.h | 5 + include/bbp/sonata/population.h | 11 +- include/bbp/sonata/unique_tuple.h | 39 ++++++ python/bindings.cpp | 25 +++- python/libsonata/__init__.py | 1 + src/config.cpp | 22 +++- src/edge_index.cpp | 22 ++-- src/edge_index.h | 6 +- src/edges.cpp | 18 ++- src/hdf5_reader.cpp | 22 ++++ src/hdf5_reader.hpp | 100 +++++++++++++++ src/nodes.cpp | 8 +- src/population.cpp | 21 ++-- src/population.hpp | 59 +++++++-- src/read_bulk.hpp | 12 ++ 19 files changed, 531 insertions(+), 48 deletions(-) create mode 100644 include/bbp/sonata/hdf5_reader.h create mode 100644 include/bbp/sonata/unique_tuple.h create mode 100644 src/hdf5_reader.cpp create mode 100644 src/hdf5_reader.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 77084e97..9a455b4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,6 +96,7 @@ set(SONATA_SRC src/edge_index.cpp src/edges.cpp src/hdf5_mutex.cpp + src/hdf5_reader.cpp src/node_sets.cpp src/nodes.cpp src/population.cpp diff --git a/include/bbp/sonata/config.h b/include/bbp/sonata/config.h index 609494de..3809e475 100644 --- a/include/bbp/sonata/config.h +++ b/include/bbp/sonata/config.h @@ -182,8 +182,8 @@ class SONATA_API CircuitConfig * * \throws SonataError if the given population does not exist in any node network. */ + NodePopulation getNodePopulation(const std::string& name, const Hdf5Reader& hdf5_reader) const; NodePopulation getNodePopulation(const std::string& name) const; - /** * Returns a set with all available population names across all the edge networks. */ @@ -195,6 +195,7 @@ class SONATA_API CircuitConfig * * \throws SonataError if the given population does not exist in any edge network. */ + EdgePopulation getEdgePopulation(const std::string& name, const Hdf5Reader& hdf5_reader) const; EdgePopulation getEdgePopulation(const std::string& name) const; /** diff --git a/include/bbp/sonata/edges.h b/include/bbp/sonata/edges.h index 780d02cc..95845f57 100644 --- a/include/bbp/sonata/edges.h +++ b/include/bbp/sonata/edges.h @@ -15,6 +15,8 @@ #include #include +#include "hdf5_reader.h" + namespace bbp { namespace sonata { @@ -30,6 +32,11 @@ class SONATA_API EdgePopulation: public Population const std::string& csvFilePath, const std::string& name); + EdgePopulation(const std::string& h5FilePath, + const std::string& csvFilePath, + const std::string& name, + const Hdf5Reader& hdf5_reader); + /** * Name of source population extracted from 'source_node_id' dataset */ diff --git a/include/bbp/sonata/hdf5_reader.h b/include/bbp/sonata/hdf5_reader.h new file mode 100644 index 00000000..f0f5aec1 --- /dev/null +++ b/include/bbp/sonata/hdf5_reader.h @@ -0,0 +1,197 @@ +#pragma once + +#include +#include + +#include +#include +#include + +namespace bbp { +namespace sonata { + +/// Interface for implementing `readSelection(dset, selection)`. +template +class Hdf5PluginRead1DInterface +{ + public: + virtual ~Hdf5PluginRead1DInterface() = default; + + /// Read the selected subset of the one-dimensional array. + /// + /// The selection is canonical, i.e. sorted and non-overlapping. The dataset + /// is obtained from a `HighFive::File` opened via `this->openFile`. + virtual std::vector readSelection(const HighFive::DataSet& dset, + const Selection& selection) const = 0; +}; + +template +class Hdf5PluginRead2DInterface +{ + private: + using AltRanges = std::vector>; + + public: + virtual ~Hdf5PluginRead2DInterface() = default; + + /// Read the Cartesian product of the two selections. + /// + /// Both selections are canonical, i.e. sorted and non-overlapping. The dataset + /// is obtained from a `HighFive::File` opened via `this->openFile`. + virtual std::vector> readSelection(const HighFive::DataSet& dset, + const Selection& xsel, + const Selection& ysel) const = 0; + + virtual std::vector> readSelection(const HighFive::DataSet& dset, + const AltRanges& xsel, + const Selection& ysel) const = 0; + + virtual std::vector> readSelection(const HighFive::DataSet& dset, + const Selection& xsel, + const AltRanges& ysel) const = 0; + + virtual std::vector> readSelection(const HighFive::DataSet& dset, + const AltRanges& xsel, + const AltRanges& ysel) const = 0; +}; + +template +class Hdf5PluginInterface; + +/// Interface of Plugins for reading HDF5 datasets. +/// +/// All method must be called in an MPI-collective manner. Each method is free +/// to break any MPI collective requirements. +template +class Hdf5PluginInterface, std::tuple> + : virtual public Hdf5PluginRead1DInterface..., + virtual public Hdf5PluginRead2DInterface... +{ + public: + /// Open the HDF5 file. + /// + /// This allows setting File Access Properties. + virtual HighFive::File openFile(const std::string& path) const = 0; +}; + +/// Abstraction for reading HDF5 datasets. +/// +/// The Hdf5Reader provides an interface for reading canonical selections from +/// datasets. Selections are canonical if they are sorted and don't overlap. +/// This allows implementing different optimization strategies, such as +/// minimizing bytes read, aggregating nearby reads or using MPI collective I/O. +/// +/// The design uses virtual inheritance, which enables users to inject their own +/// reader if needed. This class is the interface used within libsonata. It +/// simply delegates to a "plugin", that satisfies the interface +/// `Hdf5PluginInterface`. +/// +/// To enable MPI collective I/O, `libsonata` must call all methods in an +/// MPI-collective manner. This implies that the number of times any function in +/// `libsonata` calls any of the `Hdf5Reader` methods must not depend on the +/// arguments to the function. +/// +/// Examples: +/// +/// void wrong(Selection selection) { +/// // Wrong because some MPI ranks might return without +/// // calling `readSelection`. +/// if(selection.empty()) { +/// return; +/// } +/// hdf5_reader.readSelection(dset, selection); +/// } +/// +/// void also_wrong(Selection selection) { +/// // Wrong because `hdf5_reader` is called `selection.ranges().size()` +/// // number of times. Which could be different on each MPI rank. +/// for(auto range : selection.ranges()) { +/// hdf5_reader.readSelection(dset, Selection(std::vector{range})); +/// } +/// } +/// +/// void subtle(Selection selection, bool flag) { +/// // If the flag can differ between MPI ranks, this is wrong because +/// // `readSelection` is called with different `dset`s. If the `flag` must +/// // be the same on all MPI ranks, this is correct. If this happens in +/// // the libsonata API, then passing the same `flag` on all MPI ranks becomes +/// // a requirement for the users, when using a collective reader. Example: +/// // pop.get_attribute(attr_name, selection) +/// if(flag) { +/// hdf5_reader.readSelection(dset1, selection); +/// } else { +/// hdf5_reader.readSelection(dset2, selection); +/// } +/// } +/// +/// void correct(Selection selection) { +/// // Correct because no matter which branch is taken +/// // `hdf5_reader.readSelection` is called exactly once. +/// if(selection.size % 2 == 0) { +/// hdf5_reader.readSelection(dset, selection); +/// } else { +/// hdf5_reader.readSelection(dset, {}); +/// } +/// } +/// +class Hdf5Reader +{ + public: + // The issue here is that on a mac `size_t` is different from + // `{,u}int{8,16,32,64}_t` but not on the other two OSes. + using supported_1D_types = detail::unique_tuple; + + using supported_2D_types = detail::unique_tuple>; + + /// Create a valid Hdf5Reader with the default plugin. + Hdf5Reader(); + + /// Create an Hdf5Reader with a user supplied plugin. + Hdf5Reader(std::shared_ptr> impl); + + /// Read the selected subset of the one-dimensional array. + /// + /// Both selections are canonical, i.e. sorted and non-overlapping. The dataset + /// is obtained from a `HighFive::File` opened via `this->openFile`. + template + std::vector readSelection(const HighFive::DataSet& dset, const Selection& selection) const { + return static_cast&>(*impl).readSelection(dset, + selection); + } + + /// Open the HDF5. + /// + /// The dataset passed to `readSelection` must be obtained from a file open + /// via this method. + HighFive::File openFile(const std::string& filename) const; + + /// Read the Cartesian product of the two selections. + /// + /// Both selections are canonical, i.e. sorted and non-overlapping. The dataset + /// is obtained from a `HighFive::File` opened via `this->openFile`. + template + std::vector readSelection(const HighFive::DataSet& dset, + const XSel& xsel, + const YSel& ysel) const { + return static_cast&>(*impl).readSelection(dset, + xsel, + ysel); + } + + private: + std::shared_ptr> impl; +}; + +} // namespace sonata +} // namespace bbp diff --git a/include/bbp/sonata/nodes.h b/include/bbp/sonata/nodes.h index a1e7e484..0dea8936 100644 --- a/include/bbp/sonata/nodes.h +++ b/include/bbp/sonata/nodes.h @@ -30,6 +30,11 @@ class SONATA_API NodePopulation: public Population const std::string& csvFilePath, const std::string& name); + NodePopulation(const std::string& h5FilePath, + const std::string& csvFilePath, + const std::string& name, + const Hdf5Reader& hdf5_reader); + /** * Return selection of where attribute values match value * diff --git a/include/bbp/sonata/population.h b/include/bbp/sonata/population.h index 94a7929f..214cab13 100644 --- a/include/bbp/sonata/population.h +++ b/include/bbp/sonata/population.h @@ -19,6 +19,7 @@ #include // std::move #include +#include #include namespace bbp { @@ -169,7 +170,8 @@ class SONATA_API Population Population(const std::string& h5FilePath, const std::string& csvFilePath, const std::string& name, - const std::string& prefix); + const std::string& prefix, + const Hdf5Reader& hdf5_reader); Population(const Population&) = delete; @@ -194,7 +196,12 @@ template class SONATA_API PopulationStorage { public: - PopulationStorage(const std::string& h5FilePath, const std::string& csvFilePath = ""); + PopulationStorage(const std::string& h5FilePath); + PopulationStorage(const std::string& h5FilePath, const std::string& csvFilePath); + PopulationStorage(const std::string& h5FilePath, const Hdf5Reader& hdf5_reader); + PopulationStorage(const std::string& h5FilePath, + const std::string& csvFilePath, + const Hdf5Reader& hdf5_reader); PopulationStorage(const PopulationStorage&) = delete; diff --git a/include/bbp/sonata/unique_tuple.h b/include/bbp/sonata/unique_tuple.h new file mode 100644 index 00000000..d3a8fccb --- /dev/null +++ b/include/bbp/sonata/unique_tuple.h @@ -0,0 +1,39 @@ + +// Slightly modified version of: +// https://stackoverflow.com/a/57528226 +// +// We've back ported `disjunction` from cppreference, and inlined the basecase +// for `unique`. + +namespace bbp { +namespace sonata { +namespace detail { + +template +struct disjunction: std::false_type {}; +template +struct disjunction: B1 {}; +template +struct disjunction: std::conditional_t> {}; + +template +struct unique; + +template +struct unique> { + using type = typename std::tuple; +}; + + +template +struct unique, U, Us...> + : std::conditional_t...>::value, + unique, Us...>, + unique, Us...>> {}; + +template +using unique_tuple = typename unique, Ts...>::type; + +} // namespace detail +} // namespace sonata +} // namespace bbp diff --git a/python/bindings.cpp b/python/bindings.cpp index b0de5aed..5c33e4fe 100644 --- a/python/bindings.cpp +++ b/python/bindings.cpp @@ -305,11 +305,14 @@ py::class_ bindStorageClass(py::module& m, const char* clsName, const c }; return py::class_( m, clsName, imbuePopulationClassName(DOC(bbp, sonata, PopulationStorage)).c_str()) - .def(py::init([](py::object h5_filepath, py::object csv_filepath) { - return Storage(py::str(h5_filepath), py::str(csv_filepath)); + .def(py::init([](py::object h5_filepath, py::object csv_filepath, Hdf5Reader hdf5_reader) { + return Storage(py::str(h5_filepath), + py::str(csv_filepath), + std::move(hdf5_reader)); }), "h5_filepath"_a, - "csv_filepath"_a = "") + "csv_filepath"_a = "", + "hdf5_reader"_a = Hdf5Reader()) .def_property_readonly("population_names", &Storage::populationNames, imbuePopulationClassName(DOC_POP_STOR(populationNames)).c_str()) @@ -403,6 +406,8 @@ void bindReportReader(py::module& m, const std::string& prefix) { PYBIND11_MODULE(_libsonata, m) { + py::class_(m, "Hdf5Reader").def(py::init([]() { return Hdf5Reader(); })); + py::class_(m, "Selection", "ID sequence in the form convenient for querying attributes") @@ -591,9 +596,19 @@ PYBIND11_MODULE(_libsonata, m) { .def_property_readonly("config_status", &CircuitConfig::getCircuitConfigStatus) .def_property_readonly("node_sets_path", &CircuitConfig::getNodeSetsPath) .def_property_readonly("node_populations", &CircuitConfig::listNodePopulations) - .def("node_population", &CircuitConfig::getNodePopulation) + .def("node_population", + [](const CircuitConfig& config, const std::string& name) { + return config.getNodePopulation(name); + }) .def_property_readonly("edge_populations", &CircuitConfig::listEdgePopulations) - .def("edge_population", &CircuitConfig::getEdgePopulation) + .def("edge_population", + [](const CircuitConfig& config, const std::string& name) { + return config.getEdgePopulation(name); + }) + .def("edge_population", + [](const CircuitConfig& config, const std::string& name, Hdf5Reader hdf5_reader) { + return config.getEdgePopulation(name, hdf5_reader); + }) .def("node_population_properties", &CircuitConfig::getNodePopulationProperties, "name"_a) .def("edge_population_properties", &CircuitConfig::getEdgePopulationProperties, "name"_a) .def_property_readonly("expanded_json", &CircuitConfig::getExpandedJSON); diff --git a/python/libsonata/__init__.py b/python/libsonata/__init__.py index f77def54..f9eca8d3 100644 --- a/python/libsonata/__init__.py +++ b/python/libsonata/__init__.py @@ -23,6 +23,7 @@ SpikePopulation, SpikeReader, version, + Hdf5Reader, ) __all__ = [ diff --git a/src/config.cpp b/src/config.cpp index dda95042..01a84ca8 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -202,9 +202,13 @@ PopulationType getPopulationProperties( template PopulationType getPopulation(const std::string& populationName, - const std::unordered_map& src) { + const std::unordered_map& src, + const Hdf5Reader& hdf5_reader) { const auto properties = getPopulationProperties(populationName, src); - return PopulationType(properties.elementsPath, properties.typesPath, populationName); + return PopulationType(properties.elementsPath, + properties.typesPath, + populationName, + hdf5_reader); } std::map replaceVariables(std::map variables) { @@ -918,7 +922,12 @@ std::set CircuitConfig::listNodePopulations() const { } NodePopulation CircuitConfig::getNodePopulation(const std::string& name) const { - return getPopulation(name, _nodePopulationProperties); + return getNodePopulation(name, Hdf5Reader()); +} + +NodePopulation CircuitConfig::getNodePopulation(const std::string& name, + const Hdf5Reader& hdf5_reader) const { + return getPopulation(name, _nodePopulationProperties, hdf5_reader); } std::set CircuitConfig::listEdgePopulations() const { @@ -926,7 +935,12 @@ std::set CircuitConfig::listEdgePopulations() const { } EdgePopulation CircuitConfig::getEdgePopulation(const std::string& name) const { - return getPopulation(name, _edgePopulationProperties); + return getPopulation(name, _edgePopulationProperties, Hdf5Reader()); +} + +EdgePopulation CircuitConfig::getEdgePopulation(const std::string& name, + const Hdf5Reader& hdf5_reader) const { + return getPopulation(name, _edgePopulationProperties, hdf5_reader); } NodePopulationProperties CircuitConfig::getNodePopulationProperties(const std::string& name) const { diff --git a/src/edge_index.cpp b/src/edge_index.cpp index 43dec9a9..d21f5536 100644 --- a/src/edge_index.cpp +++ b/src/edge_index.cpp @@ -18,7 +18,6 @@ #include #include "read_bulk.hpp" -#include "read_canonical_selection.hpp" namespace bbp { namespace sonata { @@ -55,7 +54,9 @@ const HighFive::Group targetIndex(const HighFive::Group& h5Root) { return h5Root.getGroup(TARGET_INDEX_GROUP); } -Selection resolve(const HighFive::Group& indexGroup, const std::vector& nodeIDs) { +Selection resolve(const HighFive::Group& indexGroup, + const std::vector& nodeIDs, + const Hdf5Reader& reader) { auto node2ranges_dset = indexGroup.getDataSet(NODE_ID_TO_RANGES_DSET); auto node_dim = node2ranges_dset.getSpace().getDimensions()[0]; auto sortedNodeIds = nodeIDs; @@ -65,12 +66,11 @@ Selection resolve(const HighFive::Group& indexGroup, const std::vector& return id >= node_dim; }); std::sort(sortedNodeIds.begin(), sortedNodeIds.end()); - sortedNodeIds.erase(std::unique(sortedNodeIds.begin(), sortedNodeIds.end()), - sortedNodeIds.end()); auto nodeSelection = Selection::fromValues(sortedNodeIds); - auto primaryRange = detail::readCanonicalSelection>( - node2ranges_dset, nodeSelection.ranges(), RawIndex{{0, 2}}); + auto primaryRange = reader.readSelection>(node2ranges_dset, + nodeSelection, + RawIndex{{0, 2}}); bulk_read::detail::erase_if(primaryRange, [](const auto& range) { // Filter out any invalid ranges `start >= end`. @@ -79,7 +79,7 @@ Selection resolve(const HighFive::Group& indexGroup, const std::vector& primaryRange = bulk_read::sortAndMerge(primaryRange); - auto secondaryRange = detail::readCanonicalSelection>( + auto secondaryRange = reader.readSelection>( indexGroup.getDataSet(RANGE_TO_EDGE_ID_DSET), primaryRange, RawIndex{{0, 2}}); // Sort and eliminate empty ranges. @@ -88,6 +88,12 @@ Selection resolve(const HighFive::Group& indexGroup, const std::vector& return Selection(std::move(secondaryRange)); } +Selection resolve(const HighFive::Group& indexGroup, + const NodeID nodeID, + const Hdf5Reader& reader) { + return resolve(indexGroup, std::vector{nodeID}, reader); +} + namespace { @@ -114,6 +120,8 @@ std::unordered_map _groupNodeRanges(const std::vector& } +// Use only in the writing code below. General purpose reading should use the +// Hdf5Reader interface. std::vector _readNodeIDs(const HighFive::Group& h5Root, const std::string& name) { std::vector result; h5Root.getDataSet(name).read(result); diff --git a/src/edge_index.h b/src/edge_index.h index fac4ead3..7eef8868 100644 --- a/src/edge_index.h +++ b/src/edge_index.h @@ -21,8 +21,10 @@ namespace edge_index { const HighFive::Group sourceIndex(const HighFive::Group& h5Root); const HighFive::Group targetIndex(const HighFive::Group& h5Root); -Selection resolve(const HighFive::Group& indexGroup, NodeID nodeID); -Selection resolve(const HighFive::Group& indexGroup, const std::vector& nodeIDs); +Selection resolve(const HighFive::Group& indexGroup, NodeID nodeID, const Hdf5Reader& reader); +Selection resolve(const HighFive::Group& indexGroup, + const std::vector& nodeIDs, + const Hdf5Reader& reader); void write(HighFive::Group& h5Root, uint64_t sourceNodeCount, diff --git a/src/edges.cpp b/src/edges.cpp index 3c91938d..323fd47a 100644 --- a/src/edges.cpp +++ b/src/edges.cpp @@ -33,11 +33,17 @@ namespace bbp { namespace sonata { //-------------------------------------------------------------------------------------------------- - +// EdgePopulation::EdgePopulation(const std::string& h5FilePath, const std::string& csvFilePath, const std::string& name) - : Population(h5FilePath, csvFilePath, name, ELEMENT) {} + : Population(h5FilePath, csvFilePath, name, ELEMENT, Hdf5Reader()) {} + +EdgePopulation::EdgePopulation(const std::string& h5FilePath, + const std::string& csvFilePath, + const std::string& name, + const Hdf5Reader& hdf5_reader) + : Population(h5FilePath, csvFilePath, name, ELEMENT, hdf5_reader) {} std::string EdgePopulation::source() const { @@ -59,26 +65,26 @@ std::string EdgePopulation::target() const { std::vector EdgePopulation::sourceNodeIDs(const Selection& selection) const { HDF5_LOCK_GUARD const auto dset = impl_->h5Root.getDataSet(SOURCE_NODE_ID_DSET); - return _readSelection(dset, selection); + return _readSelection(dset, selection, impl_->hdf5_reader); } std::vector EdgePopulation::targetNodeIDs(const Selection& selection) const { HDF5_LOCK_GUARD const auto dset = impl_->h5Root.getDataSet(TARGET_NODE_ID_DSET); - return _readSelection(dset, selection); + return _readSelection(dset, selection, impl_->hdf5_reader); } Selection EdgePopulation::afferentEdges(const std::vector& target) const { HDF5_LOCK_GUARD - return edge_index::resolve(edge_index::targetIndex(impl_->h5Root), target); + return edge_index::resolve(edge_index::targetIndex(impl_->h5Root), target, impl_->hdf5_reader); } Selection EdgePopulation::efferentEdges(const std::vector& source) const { HDF5_LOCK_GUARD - return edge_index::resolve(edge_index::sourceIndex(impl_->h5Root), source); + return edge_index::resolve(edge_index::sourceIndex(impl_->h5Root), source, impl_->hdf5_reader); } diff --git a/src/hdf5_reader.cpp b/src/hdf5_reader.cpp new file mode 100644 index 00000000..4fd16ea9 --- /dev/null +++ b/src/hdf5_reader.cpp @@ -0,0 +1,22 @@ +#include + +#include "hdf5_reader.hpp" + +namespace bbp { +namespace sonata { + + +Hdf5Reader::Hdf5Reader() + : impl(std::make_shared< + Hdf5PluginDefault>()) {} + +Hdf5Reader::Hdf5Reader( + std::shared_ptr> impl) + : impl(std::move(impl)) {} + +HighFive::File Hdf5Reader::openFile(const std::string& filename) const { + return impl->openFile(filename); +} + +} // namespace sonata +} // namespace bbp diff --git a/src/hdf5_reader.hpp b/src/hdf5_reader.hpp new file mode 100644 index 00000000..a84c4ab8 --- /dev/null +++ b/src/hdf5_reader.hpp @@ -0,0 +1,100 @@ +#pragma once + +#include "population.hpp" +#include "read_bulk.hpp" +#include "read_canonical_selection.hpp" + +namespace bbp { +namespace sonata { + +namespace detail { +template +HighFive::HyperSlab _makeHyperslab(const std::vector& ranges) { + HighFive::HyperSlab slab; + for (const auto& range : ranges) { + size_t i_begin = std::get<0>(range); + size_t i_end = std::get<1>(range); + slab |= HighFive::RegularHyperSlab({i_begin}, {i_end - i_begin}); + } + + return slab; +} +} // namespace detail + + +template +class Hdf5PluginRead1DDefault: virtual public Hdf5PluginRead1DInterface +{ + public: + std::vector readSelection(const HighFive::DataSet& dset, + const Selection& selection) const override { + if (selection.ranges().empty()) { + return {}; + } + + return dset.select(detail::_makeHyperslab(selection.ranges())) + .template read>(); + } +}; + +template +class Hdf5PluginRead2DDefault: virtual public Hdf5PluginRead2DInterface +{ + private: + using AltRanges = std::vector>; + + public: + std::vector readSelection(const HighFive::DataSet& dset, + const Selection& xsel, + const Selection& ysel) const override { + return readSelectionImpl(dset, xsel.ranges(), ysel.ranges()); + } + + std::vector readSelection(const HighFive::DataSet& dset, + const AltRanges& xsel, + const Selection& ysel) const override { + return readSelectionImpl(dset, xsel, ysel.ranges()); + } + + std::vector readSelection(const HighFive::DataSet& dset, + const Selection& xsel, + const AltRanges& ysel) const override { + return readSelectionImpl(dset, xsel.ranges(), ysel); + } + + std::vector readSelection(const HighFive::DataSet& dset, + const AltRanges& xsel, + const AltRanges& ysel) const override { + return readSelectionImpl(dset, xsel, ysel); + } + + private: + template + std::vector readSelectionImpl(const HighFive::DataSet& dset, + const std::vector& xsel, + const std::vector& ysel) const { + return detail::readCanonicalSelection(dset, xsel, ysel); + } +}; + +template +class Hdf5PluginDefault; + +template +class Hdf5PluginDefault, std::tuple> + : virtual public Hdf5PluginInterface, std::tuple>, + virtual public Hdf5PluginRead1DDefault..., + virtual public Hdf5PluginRead2DDefault... +{ + private: + using AltRanges = std::vector>; + + public: + HighFive::File openFile(const std::string& path) const override { + return HighFive::File(path); + } +}; + + +} // namespace sonata +} // namespace bbp diff --git a/src/nodes.cpp b/src/nodes.cpp index debc2014..cb38c1bc 100644 --- a/src/nodes.cpp +++ b/src/nodes.cpp @@ -92,7 +92,13 @@ Selection _filterStringAttribute(const NodePopulation& population, NodePopulation::NodePopulation(const std::string& h5FilePath, const std::string& csvFilePath, const std::string& name) - : Population(h5FilePath, csvFilePath, name, ELEMENT) {} + : NodePopulation(h5FilePath, csvFilePath, name, Hdf5Reader()) {} + +NodePopulation::NodePopulation(const std::string& h5FilePath, + const std::string& csvFilePath, + const std::string& name, + const Hdf5Reader& hdf5_reader) + : Population(h5FilePath, csvFilePath, name, ELEMENT, hdf5_reader) {} Selection NodePopulation::regexMatch(const std::string& attribute, const std::string& regex) const { std::regex re(regex); diff --git a/src/population.cpp b/src/population.cpp index 718e1ac9..0881e292 100644 --- a/src/population.cpp +++ b/src/population.cpp @@ -59,10 +59,11 @@ std::string _getDataType(const HighFive::DataSet& dset, const std::string& name) Population::Population(const std::string& h5FilePath, const std::string& csvFilePath, const std::string& name, - const std::string& prefix) - : impl_([h5FilePath, csvFilePath, name, prefix] { + const std::string& prefix, + const Hdf5Reader& hdf5_reader) + : impl_([h5FilePath, csvFilePath, name, prefix, hdf5_reader] { HDF5_LOCK_GUARD - return new Population::Impl(h5FilePath, csvFilePath, name, prefix); + return new Population::Impl(h5FilePath, csvFilePath, name, prefix, hdf5_reader); }()) {} @@ -105,14 +106,14 @@ std::vector Population::enumerationValues(const std::string& name) // Note: can't use select all, because our locks aren't re-entrant const auto selection = Selection({{0, dset.getSpace().getDimensions()[0]}}); - return _readSelection(dset, selection); + return _readSelection(dset, selection, impl_->hdf5_reader); } template std::vector Population::getAttribute(const std::string& name, const Selection& selection) const { HDF5_LOCK_GUARD - return _readSelection(impl_->getAttributeDataSet(name), selection); + return _readSelection(impl_->getAttributeDataSet(name), selection, impl_->hdf5_reader); } @@ -121,7 +122,9 @@ std::vector Population::getAttribute(const std::string const Selection& selection) const { if (impl_->attributeEnumNames.count(name) == 0) { HDF5_LOCK_GUARD - return _readSelection(impl_->getAttributeDataSet(name), selection); + return _readSelection(impl_->getAttributeDataSet(name), + selection, + impl_->hdf5_reader); } const auto indices = getAttribute(name, selection); @@ -162,7 +165,7 @@ std::vector Population::getEnumeration(const std::string& name, } HDF5_LOCK_GUARD - return _readSelection(impl_->getAttributeDataSet(name), selection); + return _readSelection(impl_->getAttributeDataSet(name), selection, impl_->hdf5_reader); } @@ -186,7 +189,9 @@ template std::vector Population::getDynamicsAttribute(const std::string& name, const Selection& selection) const { HDF5_LOCK_GUARD - return _readSelection(impl_->getDynamicsAttributeDataSet(name), selection); + return _readSelection(impl_->getDynamicsAttributeDataSet(name), + selection, + impl_->hdf5_reader); } diff --git a/src/population.hpp b/src/population.hpp index 3dbc84dd..fc6b1a55 100644 --- a/src/population.hpp +++ b/src/population.hpp @@ -21,7 +21,6 @@ #include #include "read_bulk.hpp" -#include "read_canonical_selection.hpp" #include namespace bbp { @@ -60,13 +59,15 @@ std::set _listExplicitEnumerations(const HighFive::Group h5Group, } template -std::vector _readSelection(const HighFive::DataSet& dset, const Selection& selection) { +std::vector _readSelection(const HighFive::DataSet& dset, + const Selection& selection, + const Hdf5Reader& hdf5_reader) { if (dset.getElementCount() == 0) { return {}; } if (bulk_read::detail::isCanonical(selection)) { - return detail::readCanonicalSelection(dset, selection); + return hdf5_reader.readSelection(dset, selection); } // The fully general case: @@ -75,7 +76,7 @@ std::vector _readSelection(const HighFive::DataSet& dset, const Selection& se // 2. Copy values from the canonical `linear_results` to their final // destination. auto canonicalRanges = bulk_read::sortAndMerge(selection, 0); - auto linear_result = detail::readCanonicalSelection(dset, canonicalRanges); + auto linear_result = hdf5_reader.readSelection(dset, canonicalRanges); const auto ids = selection.flatten(); @@ -102,14 +103,19 @@ std::vector _readSelection(const HighFive::DataSet& dset, const Selection& se } // unnamed namespace +inline HighFive::File open_hdf5_file(const std::string& filename, const Hdf5Reader& hdf5_reader) { + return hdf5_reader.openFile(filename); +} + struct Population::Impl { Impl(const std::string& h5FilePath, const std::string&, const std::string& _name, - const std::string& _prefix) + const std::string& _prefix, + const Hdf5Reader& hdf5_reader) : name(_name) , prefix(_prefix) - , h5File(h5FilePath) + , h5File(open_hdf5_file(h5FilePath, hdf5_reader)) , h5Root(h5File.getGroup(fmt::format("/{}s", prefix)).getGroup(name)) , attributeNames(_listChildren(h5Root.getGroup("0"), {H5_DYNAMICS_PARAMS, H5_LIBRARY})) , attributeEnumNames( @@ -120,7 +126,8 @@ struct Population::Impl { , dynamicsAttributeNames( h5Root.getGroup("0").exist(H5_DYNAMICS_PARAMS) ? _listChildren(h5Root.getGroup("0").getGroup(H5_DYNAMICS_PARAMS)) - : std::set{}) { + : std::set{}) + , hdf5_reader(hdf5_reader) { if (h5Root.exist("1")) { throw SonataError("Only single-group populations are supported at the moment"); } @@ -154,17 +161,27 @@ struct Population::Impl { const std::set attributeNames; const std::set attributeEnumNames; const std::set dynamicsAttributeNames; + const Hdf5Reader hdf5_reader; }; //-------------------------------------------------------------------------------------------------- template struct PopulationStorage::Impl { - Impl(const std::string& _h5FilePath, const std::string& _csvFilePath) + Impl(const std::string& _h5FilePath) + : Impl(_h5FilePath, Hdf5Reader()) {} + + Impl(const std::string& _h5FilePath, const Hdf5Reader& hdf5_reader) + : Impl(_h5FilePath, std::string(), hdf5_reader) {} + + Impl(const std::string& _h5FilePath, + const std::string& _csvFilePath, + const Hdf5Reader& hdf5_reader) : h5FilePath(_h5FilePath) , csvFilePath(_csvFilePath) , h5File(h5FilePath) - , h5Root(h5File.getGroup(fmt::format("/{}s", Population::ELEMENT))) { + , h5Root(h5File.getGroup(fmt::format("/{}s", Population::ELEMENT))) + , hdf5_reader(hdf5_reader) { if (!csvFilePath.empty()) { throw SonataError("CSV not supported at the moment"); } @@ -174,15 +191,30 @@ struct PopulationStorage::Impl { const std::string csvFilePath; const HighFive::File h5File; const HighFive::Group h5Root; + const Hdf5Reader hdf5_reader; }; +template +PopulationStorage::PopulationStorage(const std::string& h5FilePath) + : PopulationStorage(h5FilePath, Hdf5Reader()) {} + +template +PopulationStorage::PopulationStorage(const std::string& h5FilePath, + const Hdf5Reader& hdf5_reader) + : PopulationStorage(h5FilePath, std::string(), hdf5_reader) {} template PopulationStorage::PopulationStorage(const std::string& h5FilePath, const std::string& csvFilePath) - : impl_([h5FilePath, csvFilePath] { + : PopulationStorage(h5FilePath, csvFilePath, Hdf5Reader()) {} + +template +PopulationStorage::PopulationStorage(const std::string& h5FilePath, + const std::string& csvFilePath, + const Hdf5Reader& hdf5_reader) + : impl_([h5FilePath, csvFilePath, hdf5_reader] { HDF5_LOCK_GUARD - return new PopulationStorage::Impl(h5FilePath, csvFilePath); + return new PopulationStorage::Impl(h5FilePath, csvFilePath, hdf5_reader); }()) {} @@ -211,7 +243,10 @@ std::shared_ptr PopulationStorage::openPopulation( throw SonataError(fmt::format("No such population: '{}'", name)); } } - return std::make_shared(impl_->h5FilePath, impl_->csvFilePath, name); + return std::make_shared(impl_->h5FilePath, + impl_->csvFilePath, + name, + impl_->hdf5_reader); } //-------------------------------------------------------------------------------------------------- diff --git a/src/read_bulk.hpp b/src/read_bulk.hpp index 0eb78afd..4f528187 100644 --- a/src/read_bulk.hpp +++ b/src/read_bulk.hpp @@ -43,6 +43,18 @@ size_t flatSize(const std::vector& ranges) { return size; } +template +std::vector flattenRanges(const std::vector& ranges) { + std::vector result; + result.reserve(flatSize(ranges)); + for (const auto& range : ranges) { + for (auto v = std::get<0>(range); v < std::get<1>(range); ++v) { + result.emplace_back(v); + } + } + return result; +} + template void erase_if(std::vector& v, Pred pred) { auto it = std::remove_if(v.begin(), v.end(), pred); From 9ef9c54ae8d17286093c39f34d6f4f57dbac5361 Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Thu, 30 Nov 2023 08:50:50 +0100 Subject: [PATCH 2/6] Simplify after `Selection::Range` change. --- include/bbp/sonata/hdf5_reader.h | 21 +++------------------ src/edge_index.cpp | 2 +- src/hdf5_reader.hpp | 32 -------------------------------- src/read_canonical_selection.hpp | 8 +++++--- 4 files changed, 9 insertions(+), 54 deletions(-) diff --git a/include/bbp/sonata/hdf5_reader.h b/include/bbp/sonata/hdf5_reader.h index f0f5aec1..906ee839 100644 --- a/include/bbp/sonata/hdf5_reader.h +++ b/include/bbp/sonata/hdf5_reader.h @@ -28,9 +28,6 @@ class Hdf5PluginRead1DInterface template class Hdf5PluginRead2DInterface { - private: - using AltRanges = std::vector>; - public: virtual ~Hdf5PluginRead2DInterface() = default; @@ -41,18 +38,6 @@ class Hdf5PluginRead2DInterface virtual std::vector> readSelection(const HighFive::DataSet& dset, const Selection& xsel, const Selection& ysel) const = 0; - - virtual std::vector> readSelection(const HighFive::DataSet& dset, - const AltRanges& xsel, - const Selection& ysel) const = 0; - - virtual std::vector> readSelection(const HighFive::DataSet& dset, - const Selection& xsel, - const AltRanges& ysel) const = 0; - - virtual std::vector> readSelection(const HighFive::DataSet& dset, - const AltRanges& xsel, - const AltRanges& ysel) const = 0; }; template @@ -180,10 +165,10 @@ class Hdf5Reader /// /// Both selections are canonical, i.e. sorted and non-overlapping. The dataset /// is obtained from a `HighFive::File` opened via `this->openFile`. - template + template std::vector readSelection(const HighFive::DataSet& dset, - const XSel& xsel, - const YSel& ysel) const { + const Selection& xsel, + const Selection& ysel) const { return static_cast&>(*impl).readSelection(dset, xsel, ysel); diff --git a/src/edge_index.cpp b/src/edge_index.cpp index d21f5536..66472e6f 100644 --- a/src/edge_index.cpp +++ b/src/edge_index.cpp @@ -70,7 +70,7 @@ Selection resolve(const HighFive::Group& indexGroup, auto nodeSelection = Selection::fromValues(sortedNodeIds); auto primaryRange = reader.readSelection>(node2ranges_dset, nodeSelection, - RawIndex{{0, 2}}); + Selection(RawIndex{{0, 2}})); bulk_read::detail::erase_if(primaryRange, [](const auto& range) { // Filter out any invalid ranges `start >= end`. diff --git a/src/hdf5_reader.hpp b/src/hdf5_reader.hpp index a84c4ab8..6d1b73a4 100644 --- a/src/hdf5_reader.hpp +++ b/src/hdf5_reader.hpp @@ -40,39 +40,10 @@ class Hdf5PluginRead1DDefault: virtual public Hdf5PluginRead1DInterface template class Hdf5PluginRead2DDefault: virtual public Hdf5PluginRead2DInterface { - private: - using AltRanges = std::vector>; - public: std::vector readSelection(const HighFive::DataSet& dset, const Selection& xsel, const Selection& ysel) const override { - return readSelectionImpl(dset, xsel.ranges(), ysel.ranges()); - } - - std::vector readSelection(const HighFive::DataSet& dset, - const AltRanges& xsel, - const Selection& ysel) const override { - return readSelectionImpl(dset, xsel, ysel.ranges()); - } - - std::vector readSelection(const HighFive::DataSet& dset, - const Selection& xsel, - const AltRanges& ysel) const override { - return readSelectionImpl(dset, xsel.ranges(), ysel); - } - - std::vector readSelection(const HighFive::DataSet& dset, - const AltRanges& xsel, - const AltRanges& ysel) const override { - return readSelectionImpl(dset, xsel, ysel); - } - - private: - template - std::vector readSelectionImpl(const HighFive::DataSet& dset, - const std::vector& xsel, - const std::vector& ysel) const { return detail::readCanonicalSelection(dset, xsel, ysel); } }; @@ -86,9 +57,6 @@ class Hdf5PluginDefault, std::tuple> virtual public Hdf5PluginRead1DDefault..., virtual public Hdf5PluginRead2DDefault... { - private: - using AltRanges = std::vector>; - public: HighFive::File openFile(const std::string& path) const override { return HighFive::File(path); diff --git a/src/read_canonical_selection.hpp b/src/read_canonical_selection.hpp index 403f611e..d9526e44 100644 --- a/src/read_canonical_selection.hpp +++ b/src/read_canonical_selection.hpp @@ -30,10 +30,12 @@ std::vector readCanonicalSelection(const HighFive::DataSet& dset, const Selec return dset.select(make_hyperslab(selection.ranges())).template read>(); } -template +template std::vector readCanonicalSelection(const HighFive::DataSet& dset, - const std::vector& xranges, - const std::vector& yranges) { + const Selection& xsel, + const Selection& ysel) { + const auto& xranges = xsel.ranges(); + const auto& yranges = ysel.ranges(); if (yranges.size() != 1) { throw SonataError("Only yranges.size() == 1 has been implemented."); } From d2c401b0ebf4e503eae0a17f8a99f47a314db69b Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Thu, 30 Nov 2023 09:19:25 +0100 Subject: [PATCH 3/6] Remove unused overload. --- src/edge_index.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/edge_index.cpp b/src/edge_index.cpp index 66472e6f..089e050a 100644 --- a/src/edge_index.cpp +++ b/src/edge_index.cpp @@ -88,13 +88,6 @@ Selection resolve(const HighFive::Group& indexGroup, return Selection(std::move(secondaryRange)); } -Selection resolve(const HighFive::Group& indexGroup, - const NodeID nodeID, - const Hdf5Reader& reader) { - return resolve(indexGroup, std::vector{nodeID}, reader); -} - - namespace { std::unordered_map _groupNodeRanges(const std::vector& nodeIDs) { From 869bb32c62a6d26822bed43f44722fcd1eb1b227 Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Thu, 30 Nov 2023 09:36:37 +0100 Subject: [PATCH 4/6] remove flattenRanges --- src/read_bulk.hpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/read_bulk.hpp b/src/read_bulk.hpp index 4f528187..0eb78afd 100644 --- a/src/read_bulk.hpp +++ b/src/read_bulk.hpp @@ -43,18 +43,6 @@ size_t flatSize(const std::vector& ranges) { return size; } -template -std::vector flattenRanges(const std::vector& ranges) { - std::vector result; - result.reserve(flatSize(ranges)); - for (const auto& range : ranges) { - for (auto v = std::get<0>(range); v < std::get<1>(range); ++v) { - result.emplace_back(v); - } - } - return result; -} - template void erase_if(std::vector& v, Pred pred) { auto it = std::remove_if(v.begin(), v.end(), pred); From eb390fe953fe8d0d5a0aacd7c4cd097eb7902372 Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Thu, 30 Nov 2023 09:38:09 +0100 Subject: [PATCH 5/6] Use `#ifdef APPLE`. --- include/bbp/sonata/hdf5_reader.h | 31 ++++++++++++------------ include/bbp/sonata/unique_tuple.h | 39 ------------------------------- 2 files changed, 16 insertions(+), 54 deletions(-) delete mode 100644 include/bbp/sonata/unique_tuple.h diff --git a/include/bbp/sonata/hdf5_reader.h b/include/bbp/sonata/hdf5_reader.h index 906ee839..446faff9 100644 --- a/include/bbp/sonata/hdf5_reader.h +++ b/include/bbp/sonata/hdf5_reader.h @@ -4,7 +4,6 @@ #include #include -#include #include namespace bbp { @@ -124,20 +123,22 @@ class Hdf5Reader public: // The issue here is that on a mac `size_t` is different from // `{,u}int{8,16,32,64}_t` but not on the other two OSes. - using supported_1D_types = detail::unique_tuple; - - using supported_2D_types = detail::unique_tuple>; + using supported_1D_types = std::tuple; + + using supported_2D_types = std::tuple>; /// Create a valid Hdf5Reader with the default plugin. Hdf5Reader(); diff --git a/include/bbp/sonata/unique_tuple.h b/include/bbp/sonata/unique_tuple.h deleted file mode 100644 index d3a8fccb..00000000 --- a/include/bbp/sonata/unique_tuple.h +++ /dev/null @@ -1,39 +0,0 @@ - -// Slightly modified version of: -// https://stackoverflow.com/a/57528226 -// -// We've back ported `disjunction` from cppreference, and inlined the basecase -// for `unique`. - -namespace bbp { -namespace sonata { -namespace detail { - -template -struct disjunction: std::false_type {}; -template -struct disjunction: B1 {}; -template -struct disjunction: std::conditional_t> {}; - -template -struct unique; - -template -struct unique> { - using type = typename std::tuple; -}; - - -template -struct unique, U, Us...> - : std::conditional_t...>::value, - unique, Us...>, - unique, Us...>> {}; - -template -using unique_tuple = typename unique, Ts...>::type; - -} // namespace detail -} // namespace sonata -} // namespace bbp From 487e1c37fa5c0a26a6abcec537d5811710b33480 Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Wed, 6 Dec 2023 12:01:46 +0100 Subject: [PATCH 6/6] Fix ABI issue with Hdf5Reader. --- include/bbp/sonata/hdf5_reader.h | 2 +- python/libsonata/__init__.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/bbp/sonata/hdf5_reader.h b/include/bbp/sonata/hdf5_reader.h index 446faff9..557b8468 100644 --- a/include/bbp/sonata/hdf5_reader.h +++ b/include/bbp/sonata/hdf5_reader.h @@ -118,7 +118,7 @@ class Hdf5PluginInterface, std::tuple> /// } /// } /// -class Hdf5Reader +class SONATA_API Hdf5Reader { public: // The issue here is that on a mac `size_t` is different from diff --git a/python/libsonata/__init__.py b/python/libsonata/__init__.py index f9eca8d3..9409e749 100644 --- a/python/libsonata/__init__.py +++ b/python/libsonata/__init__.py @@ -45,4 +45,5 @@ "SpikePopulation", "SpikeReader", "version", + "Hdf5Reader", ]