From 6d9ae9c856eca5ac81d001377e550769304a35b4 Mon Sep 17 00:00:00 2001
From: Sergio <sergio.rivasgomez@epfl.ch>
Date: Fri, 9 Oct 2020 16:34:08 +0200
Subject: [PATCH 1/6] Improves IO performance for the report readers

> IO:
  * Loop through rows (timesteps) instead of columns (node_ids).
      - Changes from tsteps * node_ids reads, to tsteps reads (i.e.,
        constant time in terms of IO, regardless of node_ids reqs.).
  * Read all node_ids in a buffer every timestep, between min and max
    offsets of node_ids requested.

> Memory
  * Allocate buffer before passing to HighFive to avoid extra and
    unnecessary memory allocations before reading.
  * Change data structure of node_ids/offsets from vector to map
    for faster search (from n^2 to nlogn).
  * In soma reports, assign values directly to return buffer instead
    of std::copy/memcpy.

> New features and others:
  * Add support for strided reads, allowing to reduce the amount of
    timesteps (e.g., 1 by default, 2 for every 2 timesteps, etc.).
  * Eliminate duplicated code, avoid calling HDF5 metadata in every
    iteration, updated unit tests, and other minor changes.
---
 include/bbp/sonata/report_reader.h | 17 +++---
 python/bindings.cpp                |  5 +-
 python/tests/test.py               |  6 +-
 src/report_reader.cpp              | 96 ++++++++++++++----------------
 4 files changed, 62 insertions(+), 62 deletions(-)
diff --git a/include/bbp/sonata/report_reader.h b/include/bbp/sonata/report_reader.h
index e724a778..6887786d 100644
--- a/include/bbp/sonata/report_reader.h
+++ b/include/bbp/sonata/report_reader.h
@@ -8,8 +8,8 @@
 
 #include <highfive/H5File.hpp>
 
-#include <bbp/sonata/population.h>
 #include <bbp/sonata/optional.hpp>
+#include <bbp/sonata/population.h>
 
 namespace H5 = HighFive;
 
@@ -123,19 +123,22 @@ class SONATA_API ReportReader
 
         /**
          * \param node_ids limit the report to the given selection.
-         * \param tstart return spikes occurring on or after tstart. tstart=nonstd::nullopt
-         * indicates no limit. \param tstop return spikes occurring on or before tstop.
-         * tstop=nonstd::nullopt indicates no limit.
+         * \param tstart return voltages occurring on or after tstart. tstart=nonstd::nullopt
+         * indicates no limit. \param tstop return voltages occurring on or before tstop.
+         * tstop=nonstd::nullopt indicates no limit. \param tstride indicates every how many
+         * timesteps we read data. tstride=nonstd::nullopt indicates that all timesteps are read.
          */
         DataFrame<KeyType> get(const nonstd::optional<Selection>& node_ids = nonstd::nullopt,
                                const nonstd::optional<double>& tstart = nonstd::nullopt,
-                               const nonstd::optional<double>& tstop = nonstd::nullopt) const;
+                               const nonstd::optional<double>& tstop = nonstd::nullopt,
+                               const nonstd::optional<size_t>& tstride = nonstd::nullopt) const;
 
       private:
         Population(const H5::File& file, const std::string& populationName);
-        std::pair<size_t, size_t> getIndex(const nonstd::optional<double>& tstart, const nonstd::optional<double>& tstop) const;
+        std::pair<size_t, size_t> getIndex(const nonstd::optional<double>& tstart,
+                                           const nonstd::optional<double>& tstop) const;
 
-        std::vector<std::pair<NodeID, std::pair<uint64_t, uint64_t>>> nodes_pointers_;
+        std::map<NodeID, std::pair<uint64_t, uint64_t>> nodes_pointers_;
         H5::Group pop_group_;
         std::vector<NodeID> nodes_ids_;
         double tstart_, tstop_, tstep_;
diff --git a/python/bindings.cpp b/python/bindings.cpp
index 5acc9da5..98b624dc 100644
--- a/python/bindings.cpp
+++ b/python/bindings.cpp
@@ -343,10 +343,11 @@ void bindReportReader(py::module& m, const std::string& prefix) {
                                                 "A population inside a ReportReader")
         .def("get",
              &ReportType::Population::get,
-             "Return reports with all those node_ids between 'tstart' and 'tstop'",
+             "Return reports with all those node_ids between 'tstart' and 'tstop' with a stride tstride",
              "node_ids"_a = nonstd::nullopt,
              "tstart"_a = nonstd::nullopt,
-             "tstop"_a = nonstd::nullopt)
+             "tstop"_a = nonstd::nullopt,
+             "tstride"_a = nonstd::nullopt)
         .def("get_node_ids",
              &ReportType::Population::getNodeIds,
              "Return the list of nodes ids for this population")
diff --git a/python/tests/test.py b/python/tests/test.py
index 791dba72..7094340f 100644
--- a/python/tests/test.py
+++ b/python/tests/test.py
@@ -282,6 +282,7 @@ def test_get_reports_from_population(self):
         sel = self.test_obj['All'].get(node_ids=[13, 14], tstart=0.8, tstop=1.0)
         self.assertEqual(len(sel.times), 2)  # Number of timestamp (0.8 and 0.9)
         self.assertEqual(list(sel.ids), [13, 14])
+        np.testing.assert_allclose(sel.data, [[13.8, 14.8], [13.9, 14.9]])
 
 class TestElementReportPopulation(unittest.TestCase):
     def setUp(self):
@@ -308,8 +309,8 @@ def test_get_reports_from_population(self):
         self.assertEqual(self.test_obj['All'].time_units, 'ms')
         self.assertEqual(self.test_obj['All'].data_units, 'mV')
         self.assertTrue(self.test_obj['All'].sorted)
-        self.assertEqual(len(self.test_obj['All'].get().data), 20)  # Number of times in this range
-        self.assertEqual(len(self.test_obj['All'].get().times), 20)  # Should be the same
+        self.assertEqual(len(self.test_obj['All'].get(tstride=2).data), 10)  # Number of times in this range
+        self.assertEqual(len(self.test_obj['All'].get(tstride=2).times), 10)  # Should be the same
         self.assertEqual(len(self.test_obj['All'].get().ids), 100)
         sel = self.test_obj['All'].get(node_ids=[13, 14], tstart=0.8, tstop=1.2)
         keys = list(sel.ids)
@@ -327,6 +328,7 @@ def test_get_reports_from_population(self):
         # check following calls succeed (no memory destroyed)
         np.testing.assert_allclose(self.test_obj['All'].get(node_ids=[1, 2], tstart=3., tstop=3.).data[0], [150.0, 150.1, 150.2, 150.3, 150.4, 150.5, 150.6, 150.7, 150.8, 150.9])
         np.testing.assert_allclose(self.test_obj['All'].get(node_ids=[3, 4], tstart=0.2, tstop=0.4).data[0], [11.0, 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9], 1e-6, 0)
+        np.testing.assert_allclose(self.test_obj['All'].get(node_ids=[3, 4], tstride=4).data[2], [81.0, 81.1, 81.2, 81.3, 81.4, 81.5, 81.6, 81.7, 81.8, 81.9], 1e-6, 0)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/src/report_reader.cpp b/src/report_reader.cpp
index 232242d5..195b7f26 100644
--- a/src/report_reader.cpp
+++ b/src/report_reader.cpp
@@ -225,8 +225,8 @@ ReportReader<T>::Population::Population(const H5::File& file, const std::string&
         mapping_group.getDataSet("index_pointers").read(index_pointers);
 
         for (size_t i = 0; i < nodes_ids_.size(); ++i) {
-            nodes_pointers_.emplace_back(nodes_ids_[i],
-                                         std::make_pair(index_pointers[i], index_pointers[i + 1]));
+            nodes_pointers_.emplace(nodes_ids_[i],
+                                    std::make_pair(index_pointers[i], index_pointers[i + 1]));
         }
 
         {  // Get times
@@ -315,18 +315,18 @@ std::pair<size_t, size_t> ReportReader<T>::Population::getIndex(
 template <typename T>
 DataFrame<T> ReportReader<T>::Population::get(const nonstd::optional<Selection>& selection,
                                               const nonstd::optional<double>& tstart,
-                                              const nonstd::optional<double>& tstop) const {
+                                              const nonstd::optional<double>& tstop,
+                                              const nonstd::optional<size_t>& tstride) const {
     DataFrame<T> data_frame;
-
     size_t index_start = 0;
     size_t index_stop = 0;
     std::tie(index_start, index_stop) = getIndex(tstart, tstop);
-
+    const size_t stride = tstride.value_or(1) > 0 ? tstride.value_or(1) : 1;
     if (index_start > index_stop) {
         throw SonataError("tstart should be <= to tstop");
     }
 
-    for (size_t i = index_start; i <= index_stop; ++i) {
+    for (size_t i = index_start; i <= index_stop; i += stride) {
         data_frame.times.push_back(times_index_[i].second);
     }
 
@@ -349,22 +349,25 @@ DataFrame<T> ReportReader<T>::Population::get(const nonstd::optional<Selection>&
         node_ids = selection->flatten();
     }
 
+    std::vector<std::pair<uint64_t, uint64_t>> positions;
+    uint64_t min = UINT64_MAX;
+    uint64_t max = 0;
+    auto dataset_elem_ids = pop_group_.getGroup("mapping").getDataSet("element_ids");
     for (const auto& node_id : node_ids) {
-        const auto it = std::find_if(
-            nodes_pointers_.begin(),
-            nodes_pointers_.end(),
-            [&node_id](const std::pair<NodeID, std::pair<NodeID, uint64_t>>& node_pointer) {
-                return node_pointer.first == node_id;
-            });
+        const auto it = nodes_pointers_.find(node_id);
         if (it == nodes_pointers_.end()) {
             continue;
         }
-
-        std::vector<ElementID> element_ids;
-        pop_group_.getGroup("mapping")
-            .getDataSet("element_ids")
-            .select({it->second.first}, {it->second.second - it->second.first})
-            .read(element_ids);
+        if (it->second.first < min) {
+            min = it->second.first;
+        }
+        if (it->second.second > max) {
+            max = it->second.second;
+        }
+        positions.emplace_back(it->second.first, it->second.second);
+        std::vector<ElementID> element_ids(it->second.second - it->second.first);
+        dataset_elem_ids.select({it->second.first}, {it->second.second - it->second.first})
+            .read(element_ids.data());
         for (const auto& elem : element_ids) {
             data_frame.ids.push_back(make_key<T>(node_id, elem));
         }
@@ -374,43 +377,34 @@ DataFrame<T> ReportReader<T>::Population::get(const nonstd::optional<Selection>&
     }
 
     // Fill .data member
-
-    auto n_time_entries = index_stop - index_start + 1;
-    auto n_ids = data_frame.ids.size();
+    size_t n_time_entries = ((index_stop - index_start) / stride) + 1;
+    size_t n_ids = data_frame.ids.size();
     data_frame.data.resize(n_time_entries * n_ids);
 
-    // FIXME: It will be good to do it for ranges but if node_ids are not sorted it is not easy
-    // TODO: specialized this function for sorted node_ids?
-    int ids_index = 0;
-    for (const auto& node_id : node_ids) {
-        const auto it = std::find_if(
-            nodes_pointers_.begin(),
-            nodes_pointers_.end(),
-            [&node_id](const std::pair<NodeID, std::pair<uint64_t, uint64_t>>& node_pointer) {
-                return node_pointer.first == node_id;
-            });
-        if (it == nodes_pointers_.end()) {
-            continue;
-        }
-
-        // elems are by timestamp and by Nodes_id
-        std::vector<std::vector<float>> data;
-        pop_group_.getDataSet("data")
-            .select({index_start, it->second.first},
-                    {index_stop - index_start + 1, it->second.second - it->second.first})
-            .read(data);
-
-        int timer_index = 0;
-
-        for (const std::vector<float>& datum : data) {
-            std::copy(datum.data(),
-                      datum.data() + datum.size(),
-                      &data_frame.data[timer_index * n_ids + ids_index]);
-            ++timer_index;
+    std::vector<float> buffer(max - min);
+    auto dataset = pop_group_.getDataSet("data");
+    for (size_t timer_index = index_start; timer_index <= index_stop; timer_index += stride) {
+        dataset.select({timer_index, min}, {1, max - min}).read(buffer.data());
+
+        off_t offset = 0;
+        off_t data_offset = (timer_index - index_start) / stride;
+        auto data_ptr = &data_frame.data[data_offset * n_ids];
+        for (const auto& position : positions) {
+            uint64_t elements_per_gid = position.second - position.first;
+            uint64_t gid_start = position.first - min;
+            uint64_t gid_end = position.second - min;
+
+            // Soma report
+            if (elements_per_gid == 1) {
+                data_ptr[offset] = buffer[gid_start];
+            } else {  // Elements report
+                std::memcpy(&data_ptr[offset],
+                            &buffer[gid_start],
+                            sizeof(float) * (gid_end - gid_start));
+            }
+            offset += elements_per_gid;
         }
-        ids_index += data[0].size();
     }
-
     return data_frame;
 }
 

From 2b4e03f77846322be93aea7731a8357d04cbd3d4 Mon Sep 17 00:00:00 2001
From: Blanco Alonso Jorge <jblanco@bbpv2.epfl.ch>
Date: Fri, 9 Oct 2020 17:52:29 +0200
Subject: [PATCH 2/6] Updated docstrings and clang format

---
 python/bindings.cpp           |  3 ++-
 python/generated/docstrings.h | 12 ++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/python/bindings.cpp b/python/bindings.cpp
index 98b624dc..e94af2ba 100644
--- a/python/bindings.cpp
+++ b/python/bindings.cpp
@@ -343,7 +343,8 @@ void bindReportReader(py::module& m, const std::string& prefix) {
                                                 "A population inside a ReportReader")
         .def("get",
              &ReportType::Population::get,
-             "Return reports with all those node_ids between 'tstart' and 'tstop' with a stride tstride",
+             "Return reports with all those node_ids between 'tstart' and 'tstop' with a stride "
+             "tstride",
              "node_ids"_a = nonstd::nullopt,
              "tstart"_a = nonstd::nullopt,
              "tstop"_a = nonstd::nullopt,
diff --git a/python/generated/docstrings.h b/python/generated/docstrings.h
index 0b25fe02..0821d499 100644
--- a/python/generated/docstrings.h
+++ b/python/generated/docstrings.h
@@ -255,12 +255,16 @@ R"doc(Parameter ``node_ids``:
     limit the report to the given selection.
 
 Parameter ``tstart``:
-    return spikes occurring on or after tstart. tstart=nonstd::nullopt
-    indicates no limit.
+    return voltages occurring on or after tstart.
+    tstart=nonstd::nullopt indicates no limit.
 
 Parameter ``tstop``:
-    return spikes occurring on or before tstop. tstop=nonstd::nullopt
-    indicates no limit.)doc";
+    return voltages occurring on or before tstop.
+    tstop=nonstd::nullopt indicates no limit.
+
+Parameter ``tstride``:
+    indicates every how many timesteps we read data.
+    tstride=nonstd::nullopt indicates that all timesteps are read.)doc";
 
 static const char *__doc_bbp_sonata_ReportReader_Population_getDataUnits = R"doc(Return the unit of data.)doc";
 

From fc0da88de6139abfed41a8ee05a456be3520bcbe Mon Sep 17 00:00:00 2001
From: Blanco Alonso Jorge <jblanco@bbpv2.epfl.ch>
Date: Fri, 9 Oct 2020 19:02:23 +0200
Subject: [PATCH 3/6] Change min/max calcuations to one-liners

---
 src/report_reader.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/report_reader.cpp b/src/report_reader.cpp
index 195b7f26..b54c11db 100644
--- a/src/report_reader.cpp
+++ b/src/report_reader.cpp
@@ -358,13 +358,10 @@ DataFrame<T> ReportReader<T>::Population::get(const nonstd::optional<Selection>&
         if (it == nodes_pointers_.end()) {
             continue;
         }
-        if (it->second.first < min) {
-            min = it->second.first;
-        }
-        if (it->second.second > max) {
-            max = it->second.second;
-        }
+        min = std::min(it->second.first, min);
+        max = std::max(it->second.second, max);
         positions.emplace_back(it->second.first, it->second.second);
+
         std::vector<ElementID> element_ids(it->second.second - it->second.first);
         dataset_elem_ids.select({it->second.first}, {it->second.second - it->second.first})
             .read(element_ids.data());

From 24d294ecfa985c5e4cdc27c59620bfb3504f1414 Mon Sep 17 00:00:00 2001
From: Blanco Alonso Jorge <jblanco@bbpv2.epfl.ch>
Date: Mon, 19 Oct 2020 15:57:46 +0200
Subject: [PATCH 4/6] Address comments and suggestions

---
 include/bbp/sonata/report_reader.h |   2 +-
 python/tests/test.py               |   9 ++++++++-
 src/report_reader.cpp              |  24 +++++++++++++++---------
 tests/data/generate.py             |   3 +--
 tests/data/somas.h5                | Bin 13200 -> 13200 bytes
 tests/test_report_reader.cpp       |  16 +++++++++++++---
 6 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/include/bbp/sonata/report_reader.h b/include/bbp/sonata/report_reader.h
index 6887786d..77559b8d 100644
--- a/include/bbp/sonata/report_reader.h
+++ b/include/bbp/sonata/report_reader.h
@@ -138,7 +138,7 @@ class SONATA_API ReportReader
         std::pair<size_t, size_t> getIndex(const nonstd::optional<double>& tstart,
                                            const nonstd::optional<double>& tstop) const;
 
-        std::map<NodeID, std::pair<uint64_t, uint64_t>> nodes_pointers_;
+        std::map<NodeID, Selection::Range> nodes_pointers_;
         H5::Group pop_group_;
         std::vector<NodeID> nodes_ids_;
         double tstart_, tstop_, tstep_;
diff --git a/python/tests/test.py b/python/tests/test.py
index 7094340f..5c602af2 100644
--- a/python/tests/test.py
+++ b/python/tests/test.py
@@ -275,15 +275,22 @@ def test_get_reports_from_population(self):
         self.assertEqual(self.test_obj['All'].times, (0., 1., 0.1))
         self.assertEqual(self.test_obj['All'].time_units, 'ms')
         self.assertEqual(self.test_obj['All'].data_units, 'mV')
-        self.assertTrue(self.test_obj['All'].sorted)
+        self.assertFalse(self.test_obj['All'].sorted)
         self.assertEqual(len(self.test_obj['All'].get().ids), 20)  # Number of nodes
         self.assertEqual(len(self.test_obj['All'].get().times), 10)  # number of times
         self.assertEqual(len(self.test_obj['All'].get().data), 10)  # should be the same
+
         sel = self.test_obj['All'].get(node_ids=[13, 14], tstart=0.8, tstop=1.0)
         self.assertEqual(len(sel.times), 2)  # Number of timestamp (0.8 and 0.9)
         self.assertEqual(list(sel.ids), [13, 14])
         np.testing.assert_allclose(sel.data, [[13.8, 14.8], [13.9, 14.9]])
 
+        sel_all = self.test_obj['All'].get()
+        self.assertEqual(sel_all.ids, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20])
+
+        sel_empty = self.test_obj['All'].get(node_ids=[])
+        np.testing.assert_allclose(sel_empty.data, np.empty(shape=(0, 0)))
+
 class TestElementReportPopulation(unittest.TestCase):
     def setUp(self):
         path = os.path.join(PATH, "elements.h5")
diff --git a/src/report_reader.cpp b/src/report_reader.cpp
index b54c11db..83b14a4c 100644
--- a/src/report_reader.cpp
+++ b/src/report_reader.cpp
@@ -321,7 +321,10 @@ DataFrame<T> ReportReader<T>::Population::get(const nonstd::optional<Selection>&
     size_t index_start = 0;
     size_t index_stop = 0;
     std::tie(index_start, index_stop) = getIndex(tstart, tstop);
-    const size_t stride = tstride.value_or(1) > 0 ? tstride.value_or(1) : 1;
+    const size_t stride = tstride.value_or(1);
+    if (stride == 0) {
+        throw SonataError("tstride should be > 0");
+    }
     if (index_start > index_stop) {
         throw SonataError("tstart should be <= to tstop");
     }
@@ -337,10 +340,11 @@ DataFrame<T> ReportReader<T>::Population::get(const nonstd::optional<Selection>&
     Selection::Values node_ids;
 
     if (!selection) {  // Take all nodes in this case
+        node_ids.reserve(nodes_pointers_.size());
         std::transform(nodes_pointers_.begin(),
                        nodes_pointers_.end(),
                        std::back_inserter(node_ids),
-                       [](const std::pair<NodeID, std::pair<uint64_t, uint64_t>>& node_pointer) {
+                       [](const std::pair<NodeID, Selection::Range>& node_pointer) {
                            return node_pointer.first;
                        });
     } else if (selection->empty()) {
@@ -349,9 +353,11 @@ DataFrame<T> ReportReader<T>::Population::get(const nonstd::optional<Selection>&
         node_ids = selection->flatten();
     }
 
-    std::vector<std::pair<uint64_t, uint64_t>> positions;
-    uint64_t min = UINT64_MAX;
-    uint64_t max = 0;
+    Selection::Ranges positions;
+    // min and max offsets of the node_ids requested are calculated
+    // to reduce the amount of IO that is brought to memory
+    uint64_t min = std::numeric_limits<uint64_t>::max();
+    uint64_t max = std::numeric_limits<uint64_t>::min();
     auto dataset_elem_ids = pop_group_.getGroup("mapping").getDataSet("element_ids");
     for (const auto& node_id : node_ids) {
         const auto it = nodes_pointers_.find(node_id);
@@ -381,6 +387,8 @@ DataFrame<T> ReportReader<T>::Population::get(const nonstd::optional<Selection>&
     std::vector<float> buffer(max - min);
     auto dataset = pop_group_.getDataSet("data");
     for (size_t timer_index = index_start; timer_index <= index_stop; timer_index += stride) {
+        // Note: The code assumes that the file is chunked by rows and not by columns
+        // (i.e., if the chunking changes in the future, the reading method must also be adapted)
         dataset.select({timer_index, min}, {1, max - min}).read(buffer.data());
 
         off_t offset = 0;
@@ -389,15 +397,13 @@ DataFrame<T> ReportReader<T>::Population::get(const nonstd::optional<Selection>&
         for (const auto& position : positions) {
             uint64_t elements_per_gid = position.second - position.first;
             uint64_t gid_start = position.first - min;
-            uint64_t gid_end = position.second - min;
 
             // Soma report
             if (elements_per_gid == 1) {
                 data_ptr[offset] = buffer[gid_start];
             } else {  // Elements report
-                std::memcpy(&data_ptr[offset],
-                            &buffer[gid_start],
-                            sizeof(float) * (gid_end - gid_start));
+                uint64_t gid_end = position.second - min;
+                std::copy(&buffer[gid_start], &buffer[gid_end], &data_ptr[offset]);
             }
             offset += elements_per_gid;
         }
diff --git a/tests/data/generate.py b/tests/data/generate.py
index 9923bd81..fffe342f 100755
--- a/tests/data/generate.py
+++ b/tests/data/generate.py
@@ -134,7 +134,7 @@ def write_edges(filepath):
 
 def write_soma_report(filepath):
     population_names = ['All', 'soma1', 'soma2']
-    node_ids = np.arange(1, 21)
+    node_ids = np.concatenate((np.arange(10, 21), np.arange(1, 10)), axis=None)
     index_pointers = np.arange(0, 21)
     element_ids = np.zeros(20)
     times = (0.0, 1.0, 0.1)
@@ -148,7 +148,6 @@ def write_soma_report(filepath):
         gmapping = h5f.create_group('/report/' + population_names[0] + '/mapping')
 
         dnodes = gmapping.create_dataset('node_ids', data=node_ids, dtype=np.uint64)
-        dnodes.attrs.create('sorted', data=True, dtype=np.uint8)
         gmapping.create_dataset('index_pointers', data=index_pointers, dtype=np.uint64)
         gmapping.create_dataset('element_ids', data=element_ids, dtype=np.uint32)
         dtimes = gmapping.create_dataset('time', data=times, dtype=np.double)
diff --git a/tests/data/somas.h5 b/tests/data/somas.h5
index fce1806101bccc34bbcabea9da382586e490da84..52fd3d83e400593b18d05b41128617a6ca4b0b54 100644
GIT binary patch
delta 181
zcmbP`J|TU>8E%%EslD-&FK`!4PUF#<?7*Wkc?*xu<Ox8Ug;!_t10Wp&qz!mgCNBZf
z4M6$}kiGz<UHEh+EAXjIo&ux`fb<g}eE>+C@as$#;8&R(AP_N`L%?H`fEUMPCmpNF
zccj!NuaZz=2U*C#&@gdh>*NQlY@6dGk1-;NZQdX)B8bGF9H83+cH`y;x*J#lY@$Fs

delta 213
zcmbP`J|TU>8EzK#?+I~}FK`!4&f`&<?7*Wlc@K}u<Ox8UhgW6t10Wp(qz!m=Ca(d~
z4M6%2kiGz<efU%+EAZ(|o&%%{fb<(6eE>+?@T*J~;MbYVA+TaIhkz5u<RD$S$q70#
zlkZ5WO<pCTBn7gOhrxh>g@J*Ak%66ohk=8kIKQYQHD&TUZYgd?pfCh@Ox)-@`2j22
g<~Ye?j8KP4bZp)rEg}f#O%BlQ0dqD#(A~fa0F(MdK>z>%

diff --git a/tests/test_report_reader.cpp b/tests/test_report_reader.cpp
index 6e36ba51..816602e5 100644
--- a/tests/test_report_reader.cpp
+++ b/tests/test_report_reader.cpp
@@ -72,15 +72,22 @@ TEST_CASE("SomaReportReader", "[base]") {
 
     REQUIRE(pop.getDataUnits() == "mV");
 
-    REQUIRE(pop.getSorted());
+    REQUIRE(pop.getSorted() == false);
 
-    REQUIRE(pop.getNodeIds() == std::vector<NodeID>{1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
-                                                    11, 12, 13, 14, 15, 16, 17, 18, 19, 20});
+    REQUIRE(pop.getNodeIds() == std::vector<NodeID>{10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+                                                    20, 1,  2,  3,  4,  5,  6,  7,  8,  9});
 
     auto data = pop.get(Selection({{3, 5}}), 0.2, 0.5);
     REQUIRE(data.ids == DataFrame<NodeID>::DataType{{3, 4}});
     testTimes(data.times, 0.2, 0.1, 4);
     REQUIRE(data.data == std::vector<float>{3.2f, 4.2f, 3.3f, 4.3f, 3.4f, 4.4f, 3.5f, 4.5f});
+
+    auto data_all = pop.get();
+    REQUIRE(data_all.ids == DataFrame<NodeID>::DataType{{1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
+                                                         11, 12, 13, 14, 15, 16, 17, 18, 19, 20}});
+
+    auto data_empty = pop.get(Selection({}));
+    REQUIRE(data_empty.data == std::vector<float>{});
 }
 
 TEST_CASE("ElementReportReader limits", "[base]") {
@@ -106,6 +113,9 @@ TEST_CASE("ElementReportReader limits", "[base]") {
 
     // Negatives times
     REQUIRE_THROWS(pop.get(Selection({{1, 2}}), -1., -2.));
+
+    // Stride = 0
+    REQUIRE_THROWS(pop.get(Selection({{1, 2}}), 0.1, 0.2, 0));
 }
 
 TEST_CASE("ElementReportReader", "[base]") {

From a69e779a7553aa1338e1291d6ca495eaf7b01813 Mon Sep 17 00:00:00 2001
From: Sergio <sergio.rivasgomez@epfl.ch>
Date: Mon, 19 Oct 2020 18:37:02 +0200
Subject: [PATCH 5/6] Moves the Range/Ranges types definition locally

---
 include/bbp/sonata/report_reader.h | 5 ++++-
 src/report_reader.cpp              | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/bbp/sonata/report_reader.h b/include/bbp/sonata/report_reader.h
index 77559b8d..66a54538 100644
--- a/include/bbp/sonata/report_reader.h
+++ b/include/bbp/sonata/report_reader.h
@@ -97,6 +97,9 @@ template <typename KeyType>
 class SONATA_API ReportReader
 {
   public:
+    using Range = std::pair<uint64_t, uint64_t>;
+    using Ranges = std::vector<Range>;
+
     class Population
     {
       public:
@@ -138,7 +141,7 @@ class SONATA_API ReportReader
         std::pair<size_t, size_t> getIndex(const nonstd::optional<double>& tstart,
                                            const nonstd::optional<double>& tstop) const;
 
-        std::map<NodeID, Selection::Range> nodes_pointers_;
+        std::map<NodeID, Range> nodes_pointers_;
         H5::Group pop_group_;
         std::vector<NodeID> nodes_ids_;
         double tstart_, tstop_, tstep_;
diff --git a/src/report_reader.cpp b/src/report_reader.cpp
index 83b14a4c..47344aa0 100644
--- a/src/report_reader.cpp
+++ b/src/report_reader.cpp
@@ -344,7 +344,7 @@ DataFrame<T> ReportReader<T>::Population::get(const nonstd::optional<Selection>&
         std::transform(nodes_pointers_.begin(),
                        nodes_pointers_.end(),
                        std::back_inserter(node_ids),
-                       [](const std::pair<NodeID, Selection::Range>& node_pointer) {
+                       [](const std::pair<NodeID, Range>& node_pointer) {
                            return node_pointer.first;
                        });
     } else if (selection->empty()) {
@@ -353,7 +353,7 @@ DataFrame<T> ReportReader<T>::Population::get(const nonstd::optional<Selection>&
         node_ids = selection->flatten();
     }
 
-    Selection::Ranges positions;
+    Ranges positions;
     // min and max offsets of the node_ids requested are calculated
     // to reduce the amount of IO that is brought to memory
     uint64_t min = std::numeric_limits<uint64_t>::max();

From 9300a81da2a8bd3b867820212eb70aeb5795eca5 Mon Sep 17 00:00:00 2001
From: Blanco Alonso Jorge <jblanco@bbpv2.epfl.ch>
Date: Tue, 20 Oct 2020 13:10:03 +0200
Subject: [PATCH 6/6] Throw exception when datatype of dataset 'data' is not
 Float32

---
 src/report_reader.cpp        |   9 ++++++++-
 tests/data/generate.py       |   8 ++++++++
 tests/data/somas.h5          | Bin 13200 -> 17928 bytes
 tests/test_report_reader.cpp |   4 ++++
 4 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/report_reader.cpp b/src/report_reader.cpp
index 47344aa0..68785f27 100644
--- a/src/report_reader.cpp
+++ b/src/report_reader.cpp
@@ -1,4 +1,5 @@
 #include <bbp/sonata/report_reader.h>
+#include <fmt/format.h>
 
 constexpr double EPSILON = 1e-6;
 
@@ -384,8 +385,14 @@ DataFrame<T> ReportReader<T>::Population::get(const nonstd::optional<Selection>&
     size_t n_ids = data_frame.ids.size();
     data_frame.data.resize(n_time_entries * n_ids);
 
-    std::vector<float> buffer(max - min);
     auto dataset = pop_group_.getDataSet("data");
+    auto dataset_type = dataset.getDataType();
+    if (dataset_type.getClass() != HighFive::DataTypeClass::Float || dataset_type.getSize() != 4) {
+        throw SonataError(
+            fmt::format("DataType of dataset 'data' should be Float32 ('{}' was found)",
+                        dataset_type.string()));
+    }
+    std::vector<float> buffer(max - min);
     for (size_t timer_index = index_start; timer_index <= index_stop; timer_index += stride) {
         // Note: The code assumes that the file is chunked by rows and not by columns
         // (i.e., if the chunking changes in the future, the reading method must also be adapted)
diff --git a/tests/data/generate.py b/tests/data/generate.py
index fffe342f..e03e798c 100755
--- a/tests/data/generate.py
+++ b/tests/data/generate.py
@@ -154,6 +154,14 @@ def write_soma_report(filepath):
         dtimes.attrs.create('units', data="ms", dtype=string_dtype)
 
         gpop_soma1 = h5f.create_group('/report/' + population_names[1])
+        ddata2 = gpop_soma1.create_dataset('data', data=data, dtype=np.float64)
+        ddata2.attrs.create('units', data="mV", dtype=string_dtype)
+        gmapping2 = h5f.create_group('/report/' + population_names[1] + '/mapping')
+        gmapping2.create_dataset('node_ids', data=node_ids, dtype=np.uint64)
+        gmapping2.create_dataset('index_pointers', data=index_pointers, dtype=np.uint64)
+        gmapping2.create_dataset('element_ids', data=element_ids, dtype=np.uint32)
+        dtimes2 = gmapping2.create_dataset('time', data=times, dtype=np.double)
+        dtimes2.attrs.create('units', data="ms", dtype=string_dtype)
         gpop_soma2 = h5f.create_group('/report/' + population_names[2])
 
 
diff --git a/tests/data/somas.h5 b/tests/data/somas.h5
index 52fd3d83e400593b18d05b41128617a6ca4b0b54..7540da0da6f9172734a5c79a7286e0348c572a63 100644
GIT binary patch
delta 2072
zcmYk-y>BB$7{_sY?MwU;i0$*m>z8x<{=W9wYoseqcn-t~A&O9d5C!C}2m+e?2kQa>
zhawbfnllw81w~fdP#_eE=88Rqk^+g&I#Gc@a0rCh-T5YBr0BDLv^&r2WPh5|cRx9a
zp7ylFU1v~D6(f~MIHHQklNZ&+BllkFgJ>-K-DmDa`pz}?aMz5vB7YKBCH-|RxR4k6
z19?^!DREmpc3`dEswS@{k@r&jQdC4VCSR)*qH+0NWnVkpbyt;-cBL47cs0j?aS~qt
zWPB{&*e)d#Gc{kvake$NOeW+*Gb0}+52TrXMSg0SDKS@fh%LEO*40Gmm$qYJwYGBF
zQsgt*Q*-51<#9eCUruF`SF7~bb+J0{$?prgc5KO?3vbJ%sYjp4srAn4e9LYLHF*bn
zIKUB((Zngv(84+XvWTd`?*wKrue2jNg$mZNA*^t;u!B7u;0VX!^R+*UCQfmN7S5IH
zUl)%(&$B%*B7zIgONgUzWH669R%lei2K5$pu!jR2;h5)4oCezd-;9EVbNp=)zR^0$
zijyofgL%}kLZccsc&>#V?BT%I4)$zBVXP*4)x;^z(89UW`+cEyOrv3ng=g^w7V##1
zYS<8u{r&exiwQf}!vT(POd}JgI718P_{TzhLopVRrg0W;2wQD&k-|+H)v&?G*v2mQ
zafk*^luJ^wBqd8yvLq$T%alB|iAkIZb})x~Si&mSvB{gYv5S2iqJa}$l$5M(VzMSC
zYhtp#jLBbNOgt~nj_k0tIo!h%R%uknCiOOUv9Gj`h7=5(s0UqQ;)XG?ESHqHq{N+z
zdHB!$o0MGR&7NVQIo!h%R_Rj@wU3UPJlMuA_Hl>?PMC<4c%;N5B_1j9E>rRkDcPd&
zv+T_Ez##tx3T51<Q5~B!Y-1PuI79;{s?mm&Y)Hw5lx#@JCP>NMAi@6<lWi8TOXKTU
zz%uUBsE$qHIOkD$INl0Rv}3;MTaNQ*)Ia-f%O6V}Et8dB={1hQwf5+ae_W;Xh#Fsu
z;P5&tCbYxngZeG%KfJJVzLj5;?rUdR882Tx{=lF0e)7$ny6Zru%f<hlukUML=Hxd9
TH~S}LJiO08AD?p%$2ILg?NYs$

delta 196
zcmeC^VVsaYL4#?6@kT8<F3B6x3}C<rp%{J`Ko}C66S=Y(nQNx@Za&ZL$h!G}Py^$}
z0~;7OJBakKK!hbinLv!m_Kw|?wb;cbpODtzglnIi$mKrSLB?V71{n^f1N@s0%GC2u
zJYYWAL8k?*$=~Vo<XyT-VD^38_Ra5{rI;oca0)PTOrB^UKKX-jz~lryHn=b+2h@EQ
JlOGz#0{{@XJV5{e

diff --git a/tests/test_report_reader.cpp b/tests/test_report_reader.cpp
index 816602e5..de1cc25d 100644
--- a/tests/test_report_reader.cpp
+++ b/tests/test_report_reader.cpp
@@ -57,6 +57,10 @@ TEST_CASE("SomaReportReader limits", "[base]") {
 
     // Negatives times
     REQUIRE_THROWS(pop.get(Selection({{1, 2}}), -1., -2.));
+
+    // DataType of dataset 'data' should be Float32
+    auto pop2 = reader.openPopulation("soma1");
+    REQUIRE_THROWS(pop2.get());
 }
 
 TEST_CASE("SomaReportReader", "[base]") {