From 019f2e2f05a825ed8fa5882fda07386a38b80dfa Mon Sep 17 00:00:00 2001 From: Joseph Birkner Date: Wed, 18 Feb 2026 15:41:03 +0100 Subject: [PATCH 01/21] Migrate storage containers to noserde::Buffer --- CMakeLists.txt | 1 + cmake/deps.cmake | 7 +++++++ include/simfil/model/arena.h | 15 +++++++------- src/model/model.cpp | 40 +++++++++++++++++------------------- 4 files changed, 34 insertions(+), 29 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 109d002d..7a6cec83 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,6 +112,7 @@ target_compile_features(simfil target_link_libraries(simfil PUBLIC + noserde::runtime sfl::sfl fmt::fmt tl::expected diff --git a/cmake/deps.cmake b/cmake/deps.cmake index e7c88f37..24ff5fbc 100644 --- a/cmake/deps.cmake +++ b/cmake/deps.cmake @@ -13,6 +13,13 @@ CPMAddPackage( # bitsery CPMAddPackage("gh:fraillt/bitsery@5.2.4") +# noserde +CPMAddPackage( + URI "gh:Klebert-Engineering/noserde#main" + OPTIONS + "BUILD_TESTING OFF" + "NOSERDE_BUILD_BENCHMARKS OFF") + # tl::expected CPMAddPackage( URI "gh:TartanLlama/expected@1.1.0" diff --git a/include/simfil/model/arena.h b/include/simfil/model/arena.h index 92ed45f6..1031c745 100644 --- a/include/simfil/model/arena.h +++ b/include/simfil/model/arena.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include "simfil/exception-handler.h" #include "simfil/error.h" @@ -33,15 +33,14 @@ constexpr static ArrayIndex InvalidArrayIndex = -1; /** * ArrayArena - An arena allocator for append-only vectors. * - * The ArrayArena is a wrapper around a segmented_vector. It keeps track of + * The ArrayArena is a wrapper around paged noserde buffers. It keeps track of * forward-linked array chunks. When an array grows beyond the current capacity c * of its current last chunk, a new chunk of size c*2 is allocated and becomes * the new last chunk. This is then set as linked to the previous last chunk. * Usually, appending will be lock-free, and only growth needs the lock. * * @tparam ElementType_ The type of elements stored in the arrays. - * @tparam PageSize The number of elements that each segment in the - * segmented_vector can store. + * @tparam PageSize The number of elements that each storage page can store. */ template class ArrayArena @@ -337,7 +336,7 @@ class ArrayArena // Represents a chunk of an array in the arena. struct Chunk { - SizeType_ offset = 0; // The starting offset of the chunk in the segmented_vector. + SizeType_ offset = 0; // The starting offset of the chunk in the storage buffer. SizeType_ capacity = 0; // The maximum number of elements the chunk can hold. SizeType_ size = 0; // The current number of elements in the chunk, // or the total number of elements of the whole array if this is a head chunk. @@ -346,9 +345,9 @@ class ArrayArena ArrayIndex last = InvalidArrayIndex; // The index of the last chunk in the sequence, or InvalidArrayIndex if none. }; - sfl::segmented_vector heads_; // Head chunks of all arrays. - sfl::segmented_vector continuations_; // Continuation chunks of all arrays. - sfl::segmented_vector data_; // The underlying segmented_vector storing the array elements. + noserde::Buffer heads_; // Head chunks of all arrays. + noserde::Buffer continuations_; // Continuation chunks of all arrays. + noserde::Buffer data_; // Underlying element storage. #ifdef ARRAY_ARENA_THREAD_SAFE mutable std::shared_mutex lock_; // Mutex for synchronizing access to the data structure during growth. diff --git a/src/model/model.cpp b/src/model/model.cpp index 294c8bcc..07e931b8 100644 --- a/src/model/model.cpp +++ b/src/model/model.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include "../expected.h" @@ -81,13 +81,13 @@ struct ModelPool::Impl std::shared_ptr strings_; struct { - sfl::segmented_vector roots_; - sfl::segmented_vector i64_; - sfl::segmented_vector double_; + noserde::Buffer roots_; + noserde::Buffer i64_; + noserde::Buffer double_; std::string stringData_; - sfl::segmented_vector strings_; - sfl::segmented_vector byteArrays_; + noserde::Buffer strings_; + noserde::Buffer byteArrays_; Object::Storage objectMemberArrays_; Array::Storage arrayMemberArrays_; @@ -96,14 +96,12 @@ struct ModelPool::Impl template void readWrite(S& s) { constexpr size_t maxColumnSize = std::numeric_limits::max(); - - s.container(columns_.roots_, maxColumnSize); - - s.container(columns_.i64_, maxColumnSize); - s.container(columns_.double_, maxColumnSize); + s.object(columns_.roots_); + s.object(columns_.i64_); + s.object(columns_.double_); s.text1b(columns_.stringData_, maxColumnSize); - s.container(columns_.strings_, maxColumnSize); - s.container(columns_.byteArrays_, maxColumnSize); + s.object(columns_.strings_); + s.object(columns_.byteArrays_); s.ext(columns_.objectMemberArrays_, bitsery::ext::ArrayArenaExt{}); s.ext(columns_.arrayMemberArrays_, bitsery::ext::ArrayArenaExt{}); @@ -471,21 +469,21 @@ ModelPool::SerializationSizeStats ModelPool::serializationSizeStats() const SerializationSizeStats stats; stats.rootsBytes = measureBytes( - [&](auto& s) { s.container(impl_->columns_.roots_, maxColumnSize); }); + [&](auto& s) { s.object(const_cast&>(impl_->columns_.roots_)); }); stats.int64Bytes = measureBytes( - [&](auto& s) { s.container(impl_->columns_.i64_, maxColumnSize); }); + [&](auto& s) { s.object(const_cast&>(impl_->columns_.i64_)); }); stats.doubleBytes = measureBytes( - [&](auto& s) { s.container(impl_->columns_.double_, maxColumnSize); }); + [&](auto& s) { s.object(const_cast&>(impl_->columns_.double_)); }); stats.stringDataBytes = measureBytes( - [&](auto& s) { s.text1b(impl_->columns_.stringData_, maxColumnSize); }); + [&](auto& s) { s.text1b(const_cast(impl_->columns_.stringData_), maxColumnSize); }); stats.stringRangeBytes = measureBytes( - [&](auto& s) { s.container(impl_->columns_.strings_, maxColumnSize); }); + [&](auto& s) { s.object(const_cast&>(impl_->columns_.strings_)); }); stats.stringRangeBytes += measureBytes( - [&](auto& s) { s.container(impl_->columns_.byteArrays_, maxColumnSize); }); + [&](auto& s) { s.object(const_cast&>(impl_->columns_.byteArrays_)); }); stats.objectMemberBytes = measureBytes( - [&](auto& s) { s.ext(impl_->columns_.objectMemberArrays_, bitsery::ext::ArrayArenaExt{}); }); + [&](auto& s) { s.ext(const_cast(impl_->columns_.objectMemberArrays_), bitsery::ext::ArrayArenaExt{}); }); stats.arrayMemberBytes = measureBytes( - [&](auto& s) { s.ext(impl_->columns_.arrayMemberArrays_, bitsery::ext::ArrayArenaExt{}); }); + [&](auto& s) { s.ext(const_cast(impl_->columns_.arrayMemberArrays_), bitsery::ext::ArrayArenaExt{}); }); return stats; } From fecb1ba928733fbb0eae60e80da66513e3bed512 Mon Sep 17 00:00:00 2001 From: Joseph Birkner Date: Wed, 18 Feb 2026 15:49:47 +0100 Subject: [PATCH 02/21] Point noserde CPM dependency to josephbirkner fork --- cmake/deps.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/deps.cmake b/cmake/deps.cmake index 24ff5fbc..23ffd5bc 100644 --- a/cmake/deps.cmake +++ b/cmake/deps.cmake @@ -15,7 +15,7 @@ CPMAddPackage("gh:fraillt/bitsery@5.2.4") # noserde CPMAddPackage( - URI "gh:Klebert-Engineering/noserde#main" + URI "gh:josephbirkner/noserde#main" OPTIONS "BUILD_TESTING OFF" "NOSERDE_BUILD_BENCHMARKS OFF") From 8989316f349789e3eeebc958d03edfaa41e470d9 Mon Sep 17 00:00:00 2001 From: Joseph Birkner Date: Wed, 18 Feb 2026 16:02:59 +0100 Subject: [PATCH 03/21] Remove stale StringRange bitsery serializer --- src/model/model.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/model/model.cpp b/src/model/model.cpp index 07e931b8..fbda202b 100644 --- a/src/model/model.cpp +++ b/src/model/model.cpp @@ -69,12 +69,6 @@ struct ModelPool::Impl struct StringRange { uint32_t offset_; uint32_t length_; - - template - void serialize(S& s) { - s.value4b(offset_); - s.value4b(length_); - } }; /// This model pool's field name store From d667eba62751760362e6f38549447a83823ca2f4 Mon Sep 17 00:00:00 2001 From: Joseph Birkner Date: Thu, 19 Feb 2026 13:46:43 +0100 Subject: [PATCH 04/21] Use vector instead of stringstream. --- include/simfil/model/model.h | 3 ++- include/simfil/model/string-pool.h | 4 ++-- src/model/model.cpp | 11 +++++++++-- src/model/string-pool.cpp | 11 +++++++++-- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/include/simfil/model/model.h b/include/simfil/model/model.h index 66bdd81b..90e57f20 100644 --- a/include/simfil/model/model.h +++ b/include/simfil/model/model.h @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -265,7 +266,7 @@ class ModelPool : public Model /** Serialization */ virtual tl::expected write(std::ostream& outputStream); - virtual tl::expected read(std::istream& inputStream); + virtual tl::expected read(const std::vector& input, size_t offset = 0); struct SerializationSizeStats { size_t rootsBytes = 0; diff --git a/include/simfil/model/string-pool.h b/include/simfil/model/string-pool.h index 1887b565..e7fa0203 100644 --- a/include/simfil/model/string-pool.h +++ b/include/simfil/model/string-pool.h @@ -8,8 +8,8 @@ #include #include #include -#include #include +#include #include #include @@ -75,7 +75,7 @@ struct StringPool /// Serialization - write to stream, starting from a specific /// id offset if necessary (for partial serialisation). virtual auto write(std::ostream& outputStream, StringId offset = {}) const -> tl::expected; // NOLINT - virtual auto read(std::istream& inputStream) -> tl::expected; + virtual auto read(const std::vector& input, size_t offset = 0) -> tl::expected; /// Check if the content of the string pools is logically identical. bool operator== (StringPool const& other) const; diff --git a/src/model/model.cpp b/src/model/model.cpp index fbda202b..2a95162c 100644 --- a/src/model/model.cpp +++ b/src/model/model.cpp @@ -13,8 +13,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -501,8 +503,13 @@ tl::expected ModelPool::write(std::ostream& outputStream) { return {}; } -tl::expected ModelPool::read(std::istream& inputStream) { - bitsery::Deserializer s(inputStream); +tl::expected ModelPool::read(const std::vector& input, const size_t offset) { + if (offset > input.size()) { + return tl::unexpected(Error::EncodeDecodeError, "Failed to read ModelPool: invalid input offset."); + } + + using Adapter = bitsery::InputBufferAdapter>; + bitsery::Deserializer s(Adapter(input.begin() + static_cast(offset), input.end())); impl_->readWrite(s); if (s.adapter().error() != bitsery::ReaderError::NoError) { return tl::unexpected(Error::EncodeDecodeError, fmt::format( diff --git a/src/model/string-pool.cpp b/src/model/string-pool.cpp index a4c4f976..e50354dd 100644 --- a/src/model/string-pool.cpp +++ b/src/model/string-pool.cpp @@ -2,9 +2,11 @@ #include "simfil/exception-handler.h" #include "simfil/error.h" +#include #include #include #include +#include #include #include #include @@ -206,10 +208,15 @@ auto StringPool::write(std::ostream& outputStream, const StringId offset) const return {}; } -auto StringPool::read(std::istream& inputStream) -> tl::expected +auto StringPool::read(const std::vector& input, size_t offset) -> tl::expected { + if (offset > input.size()) { + return tl::unexpected(Error::EncodeDecodeError, "Failed to read StringPool: invalid input offset."); + } + std::unique_lock stringStoreWriteAccess_(stringStoreMutex_); - bitsery::Deserializer s(inputStream); + using Adapter = bitsery::InputBufferAdapter>; + bitsery::Deserializer s(Adapter(input.begin() + static_cast(offset), input.end())); // Determine how many strings are to be received StringId rcvStringCount{}; From 5ed51b58097a4e79cf147576623fa36465fb4628 Mon Sep 17 00:00:00 2001 From: Joseph Birkner Date: Thu, 19 Feb 2026 15:40:58 +0100 Subject: [PATCH 05/21] Enable fast serialization for ArrayArena. --- include/simfil/model/arena.h | 12 ++++ include/simfil/model/bitsery-traits.h | 79 ++++++++++++++++++++------- 2 files changed, 72 insertions(+), 19 deletions(-) diff --git a/include/simfil/model/arena.h b/include/simfil/model/arena.h index 1031c745..1d23978b 100644 --- a/include/simfil/model/arena.h +++ b/include/simfil/model/arena.h @@ -68,6 +68,7 @@ class ArrayArena heads_.push_back({(SizeType_)offset, (SizeType_)initialCapacity, 0, InvalidArrayIndex, InvalidArrayIndex}); + isCompact_ = false; return index; } @@ -130,6 +131,7 @@ class ArrayArena ++heads_[a].size; if (&heads_[a] != &updatedLast) ++updatedLast.size; + isCompact_ = false; return elem; } @@ -153,6 +155,7 @@ class ArrayArena ++heads_[a].size; if (&heads_[a] != &updatedLast) ++updatedLast.size; + isCompact_ = false; return elem; } @@ -187,6 +190,14 @@ class ArrayArena data_.shrink_to_fit(); } + /** + * Check if continuations_ is empty, and the capacity of every head_ matches its size. + * This is currently only true if the arena was deserialized using the bitsery extension. + */ + [[nodiscard]] bool isCompact() const { + return isCompact_; + } + // Iterator-related types and functions template class ArrayIterator; @@ -348,6 +359,7 @@ class ArrayArena noserde::Buffer heads_; // Head chunks of all arrays. noserde::Buffer continuations_; // Continuation chunks of all arrays. noserde::Buffer data_; // Underlying element storage. + bool isCompact_ = false; #ifdef ARRAY_ARENA_THREAD_SAFE mutable std::shared_mutex lock_; // Mutex for synchronizing access to the data structure during growth. diff --git a/include/simfil/model/bitsery-traits.h b/include/simfil/model/bitsery-traits.h index 7c306d41..2a5747f1 100644 --- a/include/simfil/model/bitsery-traits.h +++ b/include/simfil/model/bitsery-traits.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -55,32 +56,72 @@ struct ArrayArenaExt template void serialize(S& s, simfil::ArrayArena const& arena, Fnc&& fnc) const { - auto numArrays = static_cast(arena.heads_.size()); - s.value4b(numArrays); - for (simfil::ArrayIndex i = 0; i < numArrays; ++i) { - auto size = arena.size(i); - s.value4b(size); - for (size_t j = 0; j < size; ++j) { - if (auto value = arena.at(i, j)) - fnc(s, const_cast(value->get())); - else - raise(std::move(value.error())); // Bitsery does not support propagating errors + (void)fnc; + + // If the arena is already compact, we can simply dump out heads and data + if (arena.isCompact()) { + s.object(arena.heads_); + s.object(arena.data_); + return; + } + + // Otherwise: Build compact temporary heads/data, then serialize those buffers. + using HeadsStorage = typename std::remove_cv_t>; + using DataStorage = typename std::remove_cv_t>; + using Chunk = typename simfil::ArrayArena::Chunk; + using SizeType = typename simfil::ArrayArena::SizeType; + + HeadsStorage compactHeads; + DataStorage compactData; + compactHeads.reserve(arena.heads_.size()); + + size_t totalElements = 0; + for (auto const& head : arena.heads_) { + totalElements += static_cast(head.size); + } + compactData.resize(totalElements); + + size_t writeIndex = 0; + size_t packedOffset = 0; + for (auto const& head : arena.heads_) { + compactHeads.push_back(Chunk{ + static_cast(packedOffset), + head.size, + head.size, + simfil::InvalidArrayIndex, + simfil::InvalidArrayIndex + }); + + auto const* current = &head; + auto remaining = static_cast(head.size); + while (current != nullptr && remaining > 0) { + size_t chunkUsed = 0; + if (current == &head) { + chunkUsed = std::min(static_cast(head.capacity), remaining); + } else { + chunkUsed = std::min(static_cast(current->size), remaining); + } + + for (size_t i = 0; i < chunkUsed; ++i) { + compactData[writeIndex++] = arena.data_[current->offset + i]; + } + remaining -= chunkUsed; + current = (current->next != simfil::InvalidArrayIndex) ? &arena.continuations_[current->next] : nullptr; } + packedOffset += static_cast(head.size); } + + s.object(compactHeads); + s.object(compactData); } template void deserialize(S& s, simfil::ArrayArena& arena, Fnc&& fnc) const { - simfil::ArrayIndex numArrays; - s.value4b(numArrays); - for (simfil::ArrayIndex i = 0; i < numArrays; ++i) { - typename std::decay_t::SizeType size; - s.value4b(size); - auto arrayIndex = arena.new_array(size); - for (size_t j = 0; j < size; ++j) - fnc(s, arena.emplace_back(arrayIndex)); - } + s.object(arena.heads_); + s.object(arena.data_); + arena.continuations_.clear(); + arena.isCompact_ = true; } }; From 3922313e7cdce9eac876b9a74fb5e506b7ac6573 Mon Sep 17 00:00:00 2001 From: Joseph Birkner Date: Fri, 20 Feb 2026 12:03:46 +0100 Subject: [PATCH 06/21] Introduce compactHeads_ for arrays. --- include/simfil/model/arena.h | 110 +++++++++++++++++++++++--- include/simfil/model/bitsery-traits.h | 29 ++++--- src/model/model.cpp | 63 ++------------- 3 files changed, 125 insertions(+), 77 deletions(-) diff --git a/include/simfil/model/arena.h b/include/simfil/model/arena.h index 1d23978b..1796dd7e 100644 --- a/include/simfil/model/arena.h +++ b/include/simfil/model/arena.h @@ -2,8 +2,11 @@ #pragma once +#include +#include #include #include +#include #include #include #include @@ -50,6 +53,18 @@ class ArrayArena public: using ElementType = ElementType_; using SizeType = SizeType_; + struct CompactArrayChunk + { + std::uint32_t offset = 0; + std::uint32_t size = 0; + + template + void serialize(S& s) { + s.value4b(offset); + s.value4b(size); + } + }; + using CompactHeadStorage = noserde::Buffer; /** * Creates a new array with the specified initial capacity. @@ -62,13 +77,14 @@ class ArrayArena #ifdef ARRAY_ARENA_THREAD_SAFE std::unique_lock guard(lock_); #endif + ensure_runtime_heads_from_compact(); size_t offset = data_.size(); data_.resize(offset + initialCapacity); auto index = static_cast(heads_.size()); heads_.push_back({(SizeType_)offset, (SizeType_)initialCapacity, 0, InvalidArrayIndex, InvalidArrayIndex}); - isCompact_ = false; + compactHeads_.reset(); return index; } @@ -80,6 +96,8 @@ class ArrayArena #ifdef ARRAY_ARENA_THREAD_SAFE std::shared_lock guard(lock_); #endif + if (heads_.empty() && compactHeads_) + return compactHeads_->size(); return heads_.size(); } @@ -93,9 +111,25 @@ class ArrayArena #ifdef ARRAY_ARENA_THREAD_SAFE std::shared_lock guard(lock_); #endif + if (heads_.empty() && compactHeads_) + return static_cast((*compactHeads_)[a].size); return heads_[a].size; } + /** + * @return The current size, in bytes, of the array arena if serialized. + */ + [[nodiscard]] size_t byte_size() const { + if (compactHeads_) { + return compactHeads_->byte_size() + data_.byte_size(); + } + auto result = heads_.size() * sizeof(CompactArrayChunk); + for (auto const& head : heads_) { + result += head.size * sizeof(ElementType_); + } + return result; + } + /** * Returns a reference to the element at the specified index in the array. * @@ -131,7 +165,7 @@ class ArrayArena ++heads_[a].size; if (&heads_[a] != &updatedLast) ++updatedLast.size; - isCompact_ = false; + compactHeads_.reset(); return elem; } @@ -155,7 +189,7 @@ class ArrayArena ++heads_[a].size; if (&heads_[a] != &updatedLast) ++updatedLast.size; - isCompact_ = false; + compactHeads_.reset(); return elem; } @@ -172,6 +206,7 @@ class ArrayArena heads_.clear(); continuations_.clear(); data_.clear(); + compactHeads_.reset(); } /** @@ -188,14 +223,16 @@ class ArrayArena heads_.shrink_to_fit(); continuations_.shrink_to_fit(); data_.shrink_to_fit(); + if (compactHeads_) { + compactHeads_->shrink_to_fit(); + } } /** - * Check if continuations_ is empty, and the capacity of every head_ matches its size. - * This is currently only true if the arena was deserialized using the bitsery extension. + * Check if a compact chunk representation is available. */ [[nodiscard]] bool isCompact() const { - return isCompact_; + return compactHeads_.has_value(); } // Iterator-related types and functions @@ -309,9 +346,9 @@ class ArrayArena const_iterator end(ArrayIndex const& a) const { return const_iterator(*this, a, size(a)); } ArrayArenaIterator begin() { return ArrayArenaIterator(*this, 0); } - ArrayArenaIterator end() { return ArrayArenaIterator(*this, static_cast(heads_.size())); } + ArrayArenaIterator end() { return ArrayArenaIterator(*this, static_cast(size())); } ArrayArenaIterator begin() const { return ArrayArenaIterator(*this, 0); } - ArrayArenaIterator end() const { return ArrayArenaIterator(*this, static_cast(heads_.size())); } + ArrayArenaIterator end() const { return ArrayArenaIterator(*this, static_cast(size())); } ArrayRange range(ArrayIndex const& array) {return ArrayRange(begin(array), end(array));} @@ -320,6 +357,24 @@ class ArrayArena template void iterate(ArrayIndex const& a, Func&& lambda) { + if (heads_.empty() && compactHeads_) { + auto const& compact = (*compactHeads_)[a]; + for (size_t i = 0; i < static_cast(compact.size); ++i) + { + if constexpr (std::is_invocable_r_v) { + if (!lambda(data_[static_cast(compact.offset) + i])) + return; + } + else if constexpr (std::is_invocable_v) { + lambda(data_[static_cast(compact.offset) + i], i); + } + else { + lambda(data_[static_cast(compact.offset) + i]); + } + } + return; + } + Chunk const* current = &heads_[a]; size_t globalIndex = 0; while (current != nullptr) @@ -359,12 +414,31 @@ class ArrayArena noserde::Buffer heads_; // Head chunks of all arrays. noserde::Buffer continuations_; // Continuation chunks of all arrays. noserde::Buffer data_; // Underlying element storage. - bool isCompact_ = false; + std::optional compactHeads_; #ifdef ARRAY_ARENA_THREAD_SAFE mutable std::shared_mutex lock_; // Mutex for synchronizing access to the data structure during growth. #endif + void ensure_runtime_heads_from_compact() + { + if (!heads_.empty() || !compactHeads_) + return; + + heads_.clear(); + heads_.reserve(compactHeads_->size()); + continuations_.clear(); + for (auto const& compactHead : *compactHeads_) { + heads_.push_back({ + static_cast(compactHead.offset), + static_cast(compactHead.size), + static_cast(compactHead.size), + InvalidArrayIndex, + InvalidArrayIndex + }); + } + } + /** * Ensures that the specified array has enough capacity to add one more element * and returns a reference to the last chunk in the array. @@ -377,8 +451,19 @@ class ArrayArena */ Chunk& ensure_capacity_and_get_last_chunk(ArrayIndex const& a) { + #ifndef ARRAY_ARENA_THREAD_SAFE + ensure_runtime_heads_from_compact(); + #endif + #ifdef ARRAY_ARENA_THREAD_SAFE std::shared_lock read_guard(lock_); + if (heads_.empty() && compactHeads_) { + read_guard.unlock(); + std::unique_lock write_guard(lock_); + ensure_runtime_heads_from_compact(); + write_guard.unlock(); + read_guard.lock(); + } #endif Chunk& head = heads_[a]; Chunk& last = (head.last == InvalidArrayIndex) ? head : continuations_[head.last]; @@ -410,6 +495,13 @@ class ArrayArena #ifdef ARRAY_ARENA_THREAD_SAFE std::shared_lock guard(self.lock_); #endif + if (self.heads_.empty() && self.compactHeads_) { + auto const& compact = (*self.compactHeads_)[a]; + if (i < static_cast(compact.size)) + return self.data_[static_cast(compact.offset) + i]; + return tl::unexpected(Error::IndexOutOfRange, "index out of range"); + } + typename Self::Chunk const* current = &self.heads_[a]; size_t remaining = i; while (true) { diff --git a/include/simfil/model/bitsery-traits.h b/include/simfil/model/bitsery-traits.h index 2a5747f1..d78f4228 100644 --- a/include/simfil/model/bitsery-traits.h +++ b/include/simfil/model/bitsery-traits.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -60,18 +61,17 @@ struct ArrayArenaExt // If the arena is already compact, we can simply dump out heads and data if (arena.isCompact()) { - s.object(arena.heads_); + s.object(*arena.compactHeads_); s.object(arena.data_); return; } // Otherwise: Build compact temporary heads/data, then serialize those buffers. - using HeadsStorage = typename std::remove_cv_t>; + using CompactHeadsStorage = typename simfil::ArrayArena::CompactHeadStorage; using DataStorage = typename std::remove_cv_t>; - using Chunk = typename simfil::ArrayArena::Chunk; - using SizeType = typename simfil::ArrayArena::SizeType; + using CompactChunk = typename simfil::ArrayArena::CompactArrayChunk; - HeadsStorage compactHeads; + CompactHeadsStorage compactHeads; DataStorage compactData; compactHeads.reserve(arena.heads_.size()); @@ -84,12 +84,9 @@ struct ArrayArenaExt size_t writeIndex = 0; size_t packedOffset = 0; for (auto const& head : arena.heads_) { - compactHeads.push_back(Chunk{ - static_cast(packedOffset), - head.size, - head.size, - simfil::InvalidArrayIndex, - simfil::InvalidArrayIndex + compactHeads.push_back(CompactChunk{ + static_cast(packedOffset), + static_cast(head.size) }); auto const* current = &head; @@ -118,10 +115,16 @@ struct ArrayArenaExt template void deserialize(S& s, simfil::ArrayArena& arena, Fnc&& fnc) const { - s.object(arena.heads_); + (void)fnc; + using CompactHeadsStorage = typename simfil::ArrayArena::CompactHeadStorage; + + CompactHeadsStorage compactHeads; + s.object(compactHeads); s.object(arena.data_); + + arena.heads_.clear(); arena.continuations_.clear(); - arena.isCompact_ = true; + arena.compactHeads_ = std::move(compactHeads); } }; diff --git a/src/model/model.cpp b/src/model/model.cpp index 2a95162c..2c4451e3 100644 --- a/src/model/model.cpp +++ b/src/model/model.cpp @@ -104,42 +104,6 @@ struct ModelPool::Impl } }; -namespace -{ -class CountingStreambuf : public std::streambuf -{ -public: - size_t size() const { return size_; } - -protected: - std::streamsize xsputn(const char* /*s*/, std::streamsize count) override - { - size_ += static_cast(count); - return count; - } - - int overflow(int ch) override - { - if (ch != EOF) - ++size_; - return ch; - } - -private: - size_t size_ = 0; -}; - -template -size_t measureBytes(Fn&& fn) -{ - CountingStreambuf buf; - std::ostream os(&buf); - bitsery::Serializer s(os); - fn(s); - return buf.size(); -} -} - ModelPool::ModelPool() : impl_(std::make_unique(std::make_shared())) {} @@ -461,26 +425,15 @@ auto ModelPool::setStrings(std::shared_ptr const& strings) -> tl::ex ModelPool::SerializationSizeStats ModelPool::serializationSizeStats() const { - constexpr size_t maxColumnSize = std::numeric_limits::max(); SerializationSizeStats stats; - - stats.rootsBytes = measureBytes( - [&](auto& s) { s.object(const_cast&>(impl_->columns_.roots_)); }); - stats.int64Bytes = measureBytes( - [&](auto& s) { s.object(const_cast&>(impl_->columns_.i64_)); }); - stats.doubleBytes = measureBytes( - [&](auto& s) { s.object(const_cast&>(impl_->columns_.double_)); }); - stats.stringDataBytes = measureBytes( - [&](auto& s) { s.text1b(const_cast(impl_->columns_.stringData_), maxColumnSize); }); - stats.stringRangeBytes = measureBytes( - [&](auto& s) { s.object(const_cast&>(impl_->columns_.strings_)); }); - stats.stringRangeBytes += measureBytes( - [&](auto& s) { s.object(const_cast&>(impl_->columns_.byteArrays_)); }); - stats.objectMemberBytes = measureBytes( - [&](auto& s) { s.ext(const_cast(impl_->columns_.objectMemberArrays_), bitsery::ext::ArrayArenaExt{}); }); - stats.arrayMemberBytes = measureBytes( - [&](auto& s) { s.ext(const_cast(impl_->columns_.arrayMemberArrays_), bitsery::ext::ArrayArenaExt{}); }); - + stats.rootsBytes = impl_->columns_.roots_.byte_size(); + stats.int64Bytes = impl_->columns_.i64_.byte_size(); + stats.doubleBytes = impl_->columns_.double_.byte_size(); + stats.stringDataBytes = impl_->columns_.stringData_.size(); + stats.stringRangeBytes = impl_->columns_.strings_.byte_size(); + stats.stringRangeBytes += impl_->columns_.byteArrays_.byte_size(); + stats.objectMemberBytes = impl_->columns_.objectMemberArrays_.byte_size(); + stats.arrayMemberBytes = impl_->columns_.arrayMemberArrays_.byte_size(); return stats; } From ae9f4ea28b2d4cf83d298a4293b9070bfba5fbab Mon Sep 17 00:00:00 2001 From: Joseph Birkner Date: Tue, 24 Feb 2026 20:23:29 +0100 Subject: [PATCH 07/21] model: add ModelColumn and tagged type validation --- CMakeLists.txt | 21 +- cmake/deps.cmake | 7 - include/simfil/model/arena.h | 16 +- include/simfil/model/column.h | 560 +++++++++++++++++++++++++++++++++ include/simfil/model/nodes.h | 4 + src/model/model.cpp | 13 +- tools/column_type_validator.py | 297 +++++++++++++++++ 7 files changed, 898 insertions(+), 20 deletions(-) create mode 100644 include/simfil/model/column.h create mode 100755 tools/column_type_validator.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a6cec83..d6481065 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,7 @@ option(SIMFIL_WITH_REPL "Build simfil repl" ${MAIN_PROJECT}) option(SIMFIL_WITH_EXAMPLES "Build examples" ${MAIN_PROJECT}) option(SIMFIL_WITH_TESTS "Build tests" ${MAIN_PROJECT}) option(SIMFIL_WITH_MODEL_JSON "Include JSON model support" YES) +option(SIMFIL_VALIDATE_MODEL_COLUMNS "Validate MODEL_COLUMN_TYPE structs" ${MAIN_PROJECT}) find_program(GCOVR_BIN gcovr) if (SIMFIL_WITH_COVERAGE AND NOT GCOVR_BIN) @@ -52,6 +53,20 @@ if (NOT CPM_INITIALIZED) endif () include(cmake/deps.cmake) +if (SIMFIL_VALIDATE_MODEL_COLUMNS) + find_package(Python3 COMPONENTS Interpreter REQUIRED) + set(SIMFIL_COLUMN_VALIDATION_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/include/simfil/model/column.h" + "${CMAKE_CURRENT_SOURCE_DIR}/include/simfil/model/nodes.h" + "${CMAKE_CURRENT_SOURCE_DIR}/include/simfil/model/arena.h" + "${CMAKE_CURRENT_SOURCE_DIR}/src/model/model.cpp") + add_custom_target(simfil-column-type-validation + COMMAND "${Python3_EXECUTABLE}" + "${CMAKE_CURRENT_SOURCE_DIR}/tools/column_type_validator.py" + ${SIMFIL_COLUMN_VALIDATION_FILES} + VERBATIM) +endif() + ##################### # Main simfil library @@ -96,6 +111,7 @@ target_sources(simfil PUBLIC include/simfil/exception-handler.h include/simfil/model/arena.h + include/simfil/model/column.h include/simfil/model/string-pool.h include/simfil/model/model.h include/simfil/model/nodes.h @@ -112,7 +128,6 @@ target_compile_features(simfil target_link_libraries(simfil PUBLIC - noserde::runtime sfl::sfl fmt::fmt tl::expected @@ -156,6 +171,10 @@ endif() add_library(simfil::simfil ALIAS simfil) +if (SIMFIL_VALIDATE_MODEL_COLUMNS) + add_dependencies(simfil simfil-column-type-validation) +endif() + ##################### # Tests / Repl / Examples diff --git a/cmake/deps.cmake b/cmake/deps.cmake index 23ffd5bc..e7c88f37 100644 --- a/cmake/deps.cmake +++ b/cmake/deps.cmake @@ -13,13 +13,6 @@ CPMAddPackage( # bitsery CPMAddPackage("gh:fraillt/bitsery@5.2.4") -# noserde -CPMAddPackage( - URI "gh:josephbirkner/noserde#main" - OPTIONS - "BUILD_TESTING OFF" - "NOSERDE_BUILD_BENCHMARKS OFF") - # tl::expected CPMAddPackage( URI "gh:TartanLlama/expected@1.1.0" diff --git a/include/simfil/model/arena.h b/include/simfil/model/arena.h index 1796dd7e..642279ca 100644 --- a/include/simfil/model/arena.h +++ b/include/simfil/model/arena.h @@ -11,7 +11,7 @@ #include #include #include -#include +#include "simfil/model/column.h" #include "simfil/exception-handler.h" #include "simfil/error.h" @@ -36,7 +36,7 @@ constexpr static ArrayIndex InvalidArrayIndex = -1; /** * ArrayArena - An arena allocator for append-only vectors. * - * The ArrayArena is a wrapper around paged noserde buffers. It keeps track of + * The ArrayArena is a wrapper around paged model columns. It keeps track of * forward-linked array chunks. When an array grows beyond the current capacity c * of its current last chunk, a new chunk of size c*2 is allocated and becomes * the new last chunk. This is then set as linked to the previous last chunk. @@ -55,6 +55,8 @@ class ArrayArena using SizeType = SizeType_; struct CompactArrayChunk { + MODEL_COLUMN_TYPE(8); + std::uint32_t offset = 0; std::uint32_t size = 0; @@ -64,7 +66,7 @@ class ArrayArena s.value4b(size); } }; - using CompactHeadStorage = noserde::Buffer; + using CompactHeadStorage = ModelColumn; /** * Creates a new array with the specified initial capacity. @@ -402,6 +404,8 @@ class ArrayArena // Represents a chunk of an array in the arena. struct Chunk { + MODEL_COLUMN_TYPE((sizeof(SizeType_) * 3) + (sizeof(ArrayIndex) * 2)); + SizeType_ offset = 0; // The starting offset of the chunk in the storage buffer. SizeType_ capacity = 0; // The maximum number of elements the chunk can hold. SizeType_ size = 0; // The current number of elements in the chunk, @@ -411,9 +415,9 @@ class ArrayArena ArrayIndex last = InvalidArrayIndex; // The index of the last chunk in the sequence, or InvalidArrayIndex if none. }; - noserde::Buffer heads_; // Head chunks of all arrays. - noserde::Buffer continuations_; // Continuation chunks of all arrays. - noserde::Buffer data_; // Underlying element storage. + ModelColumn heads_; // Head chunks of all arrays. + ModelColumn continuations_; // Continuation chunks of all arrays. + ModelColumn data_; // Underlying element storage. std::optional compactHeads_; #ifdef ARRAY_ARENA_THREAD_SAFE diff --git a/include/simfil/model/column.h b/include/simfil/model/column.h new file mode 100644 index 00000000..d000744a --- /dev/null +++ b/include/simfil/model/column.h @@ -0,0 +1,560 @@ +// Copyright (c) Navigation Data Standard e.V. - See "LICENSE" file. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace simfil +{ + +static_assert( + std::endian::native == std::endian::little, + "simfil::ModelColumn currently supports little-endian hosts only"); + +enum class model_column_io_error +{ + payload_size_mismatch, +}; + +template +struct ColumnTypeField +{ + static_assert( + sizeof(T) == ExpectedSize, + "simfil::ColumnTypeField size mismatch"); + + T value_{}; + + constexpr ColumnTypeField() = default; + constexpr ColumnTypeField(const T& value) : value_(value) {} // NOLINT + constexpr ColumnTypeField(T&& value) : value_(std::move(value)) {} // NOLINT + + constexpr ColumnTypeField& operator=(const T& value) + { + value_ = value; + return *this; + } + + constexpr ColumnTypeField& operator=(T&& value) + { + value_ = std::move(value); + return *this; + } + + constexpr operator T&() noexcept { return value_; } // NOLINT + constexpr operator const T&() const noexcept { return value_; } // NOLINT +}; + +#ifndef MODEL_COLUMN_TYPE +#define MODEL_COLUMN_TYPE(expected_size) \ + using IsModelColumnType = void; \ + static constexpr std::size_t kModelColumnExpectedSize = static_cast(expected_size) +#endif + +namespace detail +{ + +template +struct has_model_column_tag : std::false_type +{}; + +template +struct has_model_column_tag< + T, + std::void_t> + : std::true_type +{}; + +template +inline constexpr bool has_model_column_tag_v = + has_model_column_tag>::value; + +template +struct is_model_column_external_type : std::false_type +{}; + +template +inline constexpr bool is_model_column_external_type_v = + is_model_column_external_type>::value; + +template +inline constexpr bool is_fixed_width_integer_v = + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v; + +template > +struct is_fixed_width_enum : std::false_type +{}; + +template +struct is_fixed_width_enum + : std::bool_constant>> +{}; + +template +inline constexpr bool is_fixed_width_enum_v = is_fixed_width_enum::value; + +template +inline constexpr bool is_scalar_model_column_type_v = + std::is_same_v || std::is_same_v || + std::is_same_v || is_fixed_width_integer_v || + is_fixed_width_enum_v; + +template +inline constexpr bool is_native_pod_wire_candidate_v = + std::is_trivially_copyable_v> && + std::is_standard_layout_v>; + +template +constexpr std::size_t expected_model_column_sizeof() +{ + using U = std::remove_cv_t; + if constexpr (has_model_column_tag_v) { + return U::kModelColumnExpectedSize; + } else { + return sizeof(U); + } +} + +template +constexpr std::uint64_t model_column_schema_hash() +{ + return static_cast(expected_model_column_sizeof()); +} + +template +struct segmented_storage_page_elements +{ + static_assert(T_PageBytes > 0, "page size must be greater than zero"); + static_assert( + (T_PageBytes % sizeof(TValue)) == 0, + "page size must be a multiple of element size"); + static constexpr std::size_t value = T_PageBytes / sizeof(TValue); +}; + +template +inline constexpr bool is_bitsery_input_archive_v = + requires(S& archive, bitsery::ReaderError error) { + archive.adapter().error(); + archive.adapter().error(error); + }; + +template +void mark_bitsery_invalid_data(S& archive) +{ + if constexpr (is_bitsery_input_archive_v) { + archive.adapter().error(bitsery::ReaderError::InvalidData); + } +} + +template +bool read_bitsery_size_prefix_1b(S& archive, std::size_t& out_size) +{ + std::uint8_t hb = 0; + archive.adapter().template readBytes<1>(hb); + if (archive.adapter().error() != bitsery::ReaderError::NoError) { + return false; + } + + if (hb < 0x80U) { + out_size = hb; + return true; + } + + std::uint8_t lb = 0; + archive.adapter().template readBytes<1>(lb); + if (archive.adapter().error() != bitsery::ReaderError::NoError) { + return false; + } + + if ((hb & 0x40U) != 0U) { + std::uint16_t lw = 0; + archive.adapter().template readBytes<2>(lw); + if (archive.adapter().error() != bitsery::ReaderError::NoError) { + return false; + } + out_size = static_cast( + ((((hb & 0x3FU) << 8U) | lb) << 16U) | lw); + return true; + } + + out_size = static_cast(((hb & 0x7FU) << 8U) | lb); + return true; +} + +template