diff --git a/CMakeLists.txt b/CMakeLists.txt index 109d002..f0db16a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,7 @@ add_library(simfil ${LIBRARY_TYPE} src/value.cpp src/overlay.cpp src/exception-handler.cpp + src/expression-visitor.cpp src/model/model.cpp src/model/nodes.cpp src/model/string-pool.cpp) @@ -94,8 +95,10 @@ target_sources(simfil PUBLIC include/simfil/transient.h include/simfil/simfil.h include/simfil/exception-handler.h + include/simfil/expression-visitor.h include/simfil/model/arena.h + include/simfil/model/column.h include/simfil/model/string-pool.h include/simfil/model/model.h include/simfil/model/nodes.h diff --git a/docs/simfil-dev-guide.md b/docs/simfil-dev-guide.md index 4844e08..1a403cd 100644 --- a/docs/simfil-dev-guide.md +++ b/docs/simfil-dev-guide.md @@ -62,6 +62,24 @@ Objects and arrays do not embed child nodes directly. Instead, they maintain `Mo `StringPool` maintains the mapping between strings and the `StringId` integers stored in object fields. The base `Model` interface exposes `lookupStringId` so that serialization code such as `ModelNode::toJson` can recover human-readable field names. `ModelPool::setStrings` allows a pool to adopt a different `StringPool`, populating any missing field names along the way. This operation is used by higher-level components that need to merge data from several pools into a unified string namespace. +### ModelColumn + +The primitive storage building block below `ModelPool` and `ArrayArena` is `ModelColumn`. A model column stores a single fixed-width record stream and exposes bulk byte operations for serialization and deserialization. The generic implementation accepts three families of types: + +- fixed-width scalar types (`bool`, fixed-width integers, fixed-width enums, `float`, `double`) +- explicitly tagged external record types via `MODEL_COLUMN_TYPE(expected_size)` +- other approved native POD records that are trivially copyable and standard-layout + +The column implementation assumes little-endian hosts and treats the in-memory representation as the wire representation. `bytes()` returns the canonical payload bytes for the current record stream; `assign_bytes()` and `read_payload_from_bitsery()` perform the inverse operation. For vector-backed columns this is one contiguous bulk copy; for segmented storage the same payload is copied chunk-by-chunk while preserving the same wire layout. + +`RecordsPerPage` defines the number of records stored per page, not the page size in bytes. The effective page size is `RecordsPerPage * sizeof(T)`, and segmented storage requires that value to be a multiple of the record size. This keeps page boundaries aligned with record boundaries and lets callers reason about capacity in record counts instead of byte counts. + +### Split pair columns with `TwoPart` + +`TwoPart` is a logical pair type used when a compound record should behave like `{A, B}` in C++ but should not pay struct-padding costs on the wire. `ModelColumn>` specializes the generic column by storing the `first()` and `second()` members in two synchronized child columns. Reads and writes still happen through a pair-like ref proxy, but serialization concatenates the dense payload of the first column and the dense payload of the second column. + +The main current use is object member storage. `detail::ObjectField` is defined as `TwoPart`, so object fields still behave like `(name, value)` pairs while the wire payload remains dense and deterministic regardless of host padding rules. + ### Value representation `Value` is the runtime carrier for scalar and structured results: @@ -127,7 +145,7 @@ classDiagram `BaseArray` provides the generic implementation of array behaviour for model pools. It owns a pointer to an `ArrayArena` and an `ArrayIndex` into that arena. The base class implements `type()` (always `Array`), `at()`, `size()`, and `iterate()` in terms of the arena. `Array` itself is a thin wrapper over `BaseArray` that adds convenience overloads for appending scalars, which internally delegate to `ModelPool::newSmallValue` or `ModelPool::newValue` and then record the resulting address in the arena. -`BaseObject` plays the same role for object nodes. It stores key–value pairs as `{StringId, ModelNodeAddress}` elements inside an `ArrayArena`. The base class implements `type()` (always `Object`), `get(StringId)`, `keyAt()`, `at()` (interpreting the array as an ordered sequence of fields), and `iterate()`. The concrete `Object` subclass adds convenience `addField` overloads for common scalar types and an `extend` method that copies all fields from another `Object`. +`BaseObject` plays the same role for object nodes. It stores key–value pairs as `detail::ObjectField` elements inside an `ArrayArena`; that type is currently `TwoPart`, so names and child addresses are physically stored in split columns while the API still behaves like a logical pair sequence. The base class implements `type()` (always `Object`), `get(StringId)`, `keyAt()`, `at()` (interpreting the array as an ordered sequence of fields), and `iterate()`. The concrete `Object` subclass adds convenience `addField` overloads for common scalar types and an `extend` method that copies all fields from another `Object`. `ProceduralObject` extends `Object` with a bounded number of synthetic fields. These fields are represented as `std::function` callbacks in a `small_vector`. Accessors such as `get`, `at`, `keyAt`, and `iterate` first consult the procedural fields and then fall back to the underlying `Object` storage. This pattern makes it possible to expose computed members alongside stored ones without materialising them permanently in the arena. @@ -135,17 +153,24 @@ classDiagram ### Array arena details -The `ArrayArena` template implements the append-only sequences used by arrays and objects. Conceptually, it manages a collection of logical arrays, each of which may consist of one or more “chunks” backed by a single `segmented_vector`. A logical array is identified by an `ArrayIndex`. For each index, the arena stores a head `Chunk` in `heads_` and, if the array grows beyond the head’s capacity, additional continuation chunks in `continuations_`. +The `ArrayArena` template implements the append-only sequences used by arrays and objects. Conceptually, it manages a collection of logical arrays, each of which may use one of two physical representations: + +- a regular growable chunk chain backed by `heads_`, `continuations_`, and `data_` +- a singleton handle backed by `singletonValues_` and `singletonOccupied_` + +Regular arrays behave like the historical arena implementation. Each logical array is identified by an `ArrayIndex` and starts with a head `Chunk` in `heads_`. If the array grows beyond the head’s capacity, the arena allocates continuation chunks in `continuations_`. Each chunk records an `offset` into `data_`, a `capacity`, and a `size`. For a head chunk, `size` also tracks the total logical length of the array; for continuation chunks, `size` is local to that chunk. The `next` and `last` indices form a singly-linked list from the head to the tail chunk. + +`new_array(initialCapacity, fixedSize)` controls which representation is chosen. If `fixedSize` is `false`, even `initialCapacity == 1` creates a regular growable array. If `fixedSize` is `true` and `initialCapacity == 1`, the arena instead returns a singleton handle. That handle represents a 0-or-1 element logical array with no head chunk allocation. This is useful for storage patterns where one-element arrays are common and known not to grow later. -Each `Chunk` records an `offset` into the `data_` vector, a `capacity`, and a `size`. For a head chunk, `size` also tracks the total logical length of the array; for continuation chunks, `size` expresses the number of valid elements in that chunk only. The `next` and `last` indices form a singly-linked list from the head to the tail chunk. `new_array(initialCapacity)` reserves a contiguous region in `data_`, initialises the head chunk with the offset and capacity, and returns a fresh `ArrayIndex`. +When a caller appends an element to a regular array via `push_back` or `emplace_back`, the arena calls `ensure_capacity_and_get_last_chunk_unlocked`. This function locates the current tail chunk (either the head or a continuation). If the tail still has spare capacity, it is returned directly; otherwise, the function allocates a new continuation chunk with capacity doubled relative to the previous tail, extends `data_`, links the new chunk into `continuations_`, and updates the head’s `last` pointer. Singleton handles do not use this growth path; they allow at most one element and reject further appends. -When a caller appends an element via `push_back` or `emplace_back`, the arena calls `ensure_capacity_and_get_last_chunk`. This function locates the current tail chunk (either the head or a continuation). If the tail still has spare capacity, it is returned directly; otherwise, the function allocates a new continuation chunk with capacity doubled relative to the previous tail, extends `data_` accordingly, links the new chunk into `continuations_`, and updates the head’s `last` pointer. This growth strategy guarantees amortised constant time for appends while avoiding large reallocations. +Element access via `at(ArrayIndex, i)` dispatches by representation. Singleton handles resolve directly against `singletonValues_`. Compact arenas resolve against the compact head metadata. Regular arrays walk the chunk list, subtracting full chunk capacities from the requested index until the index falls within the current chunk’s capacity and size. This keeps the public API uniform while allowing denser storage for the common singleton case. -Element access via `at(ArrayIndex, i)` walks the chunk list for the target array. It subtracts full chunk capacities from the requested index until the index falls within the current chunk’s capacity and size, and then returns a reference to `data_[offset + localIndex]`. This guarantees O(number_of_chunks) access in the worst case, but in practice the number of chunks per array remains small because capacities grow geometrically. +The arena also supports a compact serialization mode. In that mode, `compactHeads_` stores only `{offset, size}` metadata for each regular array, while `data_` already contains a dense payload without chunk gaps. Runtime head chunks are materialized lazily from `compactHeads_` when a later mutation requires growable chunk state again. This allows serialized arenas to stay compact without forcing the mutable runtime representation onto the wire. -The arena also provides higher-level iteration facilities. The `begin(array)`/`end(array)` pair yields an iterator over the elements of a specific logical array. The `iterate(ArrayIndex, lambda)` helper executes a callback on every element and supports two signatures: a unary callback receiving a reference to the element, and a binary callback receiving both the element and its global index. This is used by `BaseArray::iterate` to implement `ModelNode::iterate` efficiently without allocating intermediate containers. +The higher-level iteration facilities follow the same dispatch rules. `begin(array)`/`end(array)` iterate one logical array, while the top-level arena iterator skips the sentinel head entry and also yields singleton handles. `iterate(ArrayIndex, lambda)` supports unary callbacks receiving a value and binary callbacks receiving both a value and its logical index. This is used by `BaseArray::iterate` and `BaseObject::iterate` to expose child traversal without materializing temporary containers. -Thread-safety is conditional. If `ARRAY_ARENA_THREAD_SAFE` is defined, the arena uses a shared mutex to protect growth and element access. Appends and `new_array` take an exclusive lock only when allocating new chunks; reads can proceed with shared locks. Simfil itself does not require the arena to be thread-safe as long as model construction happens before concurrent evaluation, but the hooks are present for embedders that need concurrent writers. +Thread-safety is conditional. If `ARRAY_ARENA_THREAD_SAFE` is defined, the arena uses a shared mutex to protect growth and element access. Reads use shared locks, while mutations and compact-to-runtime materialization take an exclusive lock. Simfil itself does not require the arena to be thread-safe as long as model construction happens before concurrent evaluation, but the hooks are present for embedders that need concurrent writers. ## Parser, tokens, and AST diff --git a/include/simfil/diagnostics.h b/include/simfil/diagnostics.h index 791fd6b..8dca090 100644 --- a/include/simfil/diagnostics.h +++ b/include/simfil/diagnostics.h @@ -2,29 +2,48 @@ #pragma once +#include "simfil/sourcelocation.h" #include "simfil/value.h" -#include "simfil/token.h" #include "simfil/error.h" +#include "simfil/expression.h" +#include #include #include #include #include -#include +#include namespace simfil { class AST; -class Expr; struct Environment; struct ModelNode; /** Query Diagnostics. */ -struct Diagnostics +class Diagnostics { + static constexpr std::uint32_t InvalidIndex = std::numeric_limits::max(); public: - using ExprId = std::uint32_t; + struct FieldExprData + { + SourceLocation location; + std::uint32_t hits = 0; + std::uint32_t evaluations = 0; + std::string name; + }; + + + struct ComparisonExprData + { + SourceLocation location; + TypeFlags leftTypes; + TypeFlags rightTypes; + std::uint32_t evaluations = 0u; + std::uint32_t falseResults = 0u; + std::uint32_t trueResults = 0u; + }; struct Message { @@ -42,6 +61,12 @@ struct Diagnostics Diagnostics(Diagnostics&&) noexcept; ~Diagnostics(); + /** + * Get diagnostics data for a single Expr. + */ + template + auto get(const Expr& expr) -> DiagnosticsDataType&; + /** * Append/merge another diagnostics object into this one. */ @@ -53,22 +78,82 @@ struct Diagnostics auto write(std::ostream& stream) const -> tl::expected; auto read(std::istream& stream) -> tl::expected; - struct Data; -private: - friend auto eval(Environment&, const AST&, const ModelNode&, Diagnostics*) -> tl::expected, Error>; - friend auto diagnostics(Environment& env, const AST& ast, const Diagnostics& diag) -> tl::expected, Error>; - - std::unique_ptr data; - /** - * Collect diagnostics data from an AST. + * Build the exprIndex_ map for the AST. */ - auto collect(Expr& ast) -> void; + auto prepareIndices(const Expr& ast) -> void; + + /** ExprId to diagnostics data index mapping. */ + std::vector exprIndex_; + + /** FieldExpr diagnostics data. */ + std::vector fieldData_; + + /** ComparisonExpr diagnostics data. */ + std::vector comparisonData_; + +private: + friend auto diagnostics(const Diagnostics& diag) -> tl::expected, Error>; /** * Build messages from this objecst diagnostics data. */ - auto buildMessages(Environment& env, const AST& ast) const -> std::vector; + auto buildMessages() const -> std::vector; + + mutable std::mutex mtx_; }; +namespace detail +{ + +template +struct DiagnosticsStorage; + +template <> +struct DiagnosticsStorage +{ + static auto get(Diagnostics& diag) + { + return &diag.fieldData_; + } +}; + +template <> +struct DiagnosticsStorage +{ + static auto get(Diagnostics& diag) + { + return &diag.comparisonData_; + } +}; + +} + +/** + * Get typed diagnostics data for a single Expr. + */ +template +auto Diagnostics::get(const Expr& expr) -> DiagnosticsDataType& +{ + auto* data = detail::DiagnosticsStorage::get(*this); + + const auto id = expr.id(); + if (exprIndex_.size() <= id) [[unlikely]] { + exprIndex_.resize(id + 1u, Diagnostics::InvalidIndex); + exprIndex_[id] = data->size(); + } + + auto index = exprIndex_[id]; + if (index == Diagnostics::InvalidIndex) { + exprIndex_[id] = data->size(); + index = exprIndex_[id]; + } + + if (data->size() <= index) { + data->resize(index + 1u); + } + + return (*data)[index]; +} + } diff --git a/include/simfil/environment.h b/include/simfil/environment.h index 4ed5046..0383872 100644 --- a/include/simfil/environment.h +++ b/include/simfil/environment.h @@ -21,6 +21,7 @@ namespace simfil class Expr; class Function; +class Diagnostics; struct ResultFn; struct Debug; @@ -138,6 +139,7 @@ struct Environment struct Context { Environment* const env; + Diagnostics* const diag; /* Current phase under which the evaluation * takes place. */ @@ -151,7 +153,8 @@ struct Context /* Timeout after which the evaluation should be canceled. */ std::optional> timeout; - Context(Environment* env, Phase = Phase::Evaluation); + Context() = delete; + Context(Environment* env, Diagnostics* diag, Phase = Phase::Evaluation); auto canceled() const -> bool { diff --git a/include/simfil/expression-visitor.h b/include/simfil/expression-visitor.h new file mode 100644 index 0000000..d01ccdf --- /dev/null +++ b/include/simfil/expression-visitor.h @@ -0,0 +1,77 @@ +// Copyright (c) Navigation Data Standard e.V. - See "LICENSE" file. + +#pragma once + +#include + +namespace simfil +{ + +class Expr; +class WildcardExpr; +class AnyChildExpr; +class MultiConstExpr; +class ConstExpr; +class SubscriptExpr; +class SubExpr; +class AnyExpr; +class EachExpr; +class CallExpression; +class UnpackExpr; +class UnaryWordOpExpr; +class BinaryWordOpExpr; +class FieldExpr; +class PathExpr; +class AndExpr; +class OrExpr; +struct OperatorEq; +struct OperatorNeq; +struct OperatorLt; +struct OperatorLtEq; +struct OperatorGt; +struct OperatorGtEq; +template class UnaryExpr; +template class BinaryExpr; + +/** + * Visitor base for visiting expressions recursively. + */ +class ExprVisitor +{ +public: + ExprVisitor(); + virtual ~ExprVisitor(); + + virtual void visit(const Expr& expr); + virtual void visit(const WildcardExpr& expr); + virtual void visit(const AnyChildExpr& expr); + virtual void visit(const MultiConstExpr& expr); + virtual void visit(const ConstExpr& expr); + virtual void visit(const SubscriptExpr& expr); + virtual void visit(const SubExpr& expr); + virtual void visit(const AnyExpr& expr); + virtual void visit(const EachExpr& expr); + virtual void visit(const CallExpression& expr); + virtual void visit(const PathExpr& expr); + virtual void visit(const FieldExpr& expr); + virtual void visit(const UnpackExpr& expr); + virtual void visit(const UnaryWordOpExpr& expr); + virtual void visit(const BinaryWordOpExpr& expr); + virtual void visit(const AndExpr& expr); + virtual void visit(const OrExpr& expr); + virtual void visit(const BinaryExpr& expr); + virtual void visit(const BinaryExpr& expr); + virtual void visit(const BinaryExpr& expr); + virtual void visit(const BinaryExpr& expr); + virtual void visit(const BinaryExpr& expr); + virtual void visit(const BinaryExpr& expr); + +protected: + /* Returns the index of the current expression */ + std::size_t index() const; + +private: + std::size_t index_ = 0; +}; + +} diff --git a/include/simfil/expression.h b/include/simfil/expression.h index 01fb0ad..39692f9 100644 --- a/include/simfil/expression.h +++ b/include/simfil/expression.h @@ -5,7 +5,6 @@ #include "simfil/token.h" #include "simfil/value.h" #include "simfil/environment.h" -#include "simfil/diagnostics.h" #include "simfil/result.h" #include @@ -19,6 +18,8 @@ class Expr { friend class AST; public: + using ExprId = std::uint32_t; + /** * Type of an expression. */ @@ -30,8 +31,12 @@ class Expr VALUE, }; - Expr() = default; - explicit Expr(const Token& token) + Expr() = delete; + explicit Expr(ExprId id) + : id_(id) + {} + explicit Expr(ExprId id, const Token& token) + : id_(id) { assert(token.end >= token.begin); sourceLocation_.offset = token.begin; @@ -41,6 +46,10 @@ class Expr virtual ~Expr() = default; /* Category */ + auto id() const -> ExprId + { + return id_; + } virtual auto type() const -> Type = 0; virtual auto constant() const -> bool { @@ -50,7 +59,7 @@ class Expr /* Debug */ virtual auto toString() const -> std::string = 0; - auto eval(Context ctx, const Value& val, const ResultFn& res) -> tl::expected + auto eval(Context ctx, const Value& val, const ResultFn& res) const -> tl::expected { if (ctx.canceled()) return Result::Stop; @@ -66,7 +75,7 @@ class Expr return ieval(ctx, val, res); } - auto eval(Context ctx, Value&& val, const ResultFn& res) -> tl::expected + auto eval(Context ctx, Value&& val, const ResultFn& res) const -> tl::expected { if (ctx.canceled()) return Result::Stop; @@ -81,12 +90,8 @@ class Expr return ieval(ctx, std::move(val), res); } - /* Recursive clone */ - [[nodiscard]] - virtual auto clone() const -> std::unique_ptr = 0; - /* Accept expression visitor */ - virtual auto accept(ExprVisitor& v) -> void = 0; + virtual auto accept(ExprVisitor& v) const -> void = 0; /* Source location the expression got parsed from */ [[nodiscard]] @@ -97,13 +102,15 @@ class Expr private: /* Abstract evaluation implementation */ - virtual auto ieval(Context ctx, const Value& value, const ResultFn& result) -> tl::expected = 0; + virtual auto ieval(Context ctx, const Value& value, const ResultFn& result) const -> tl::expected = 0; /* Move-optimized evaluation implementation */ - virtual auto ieval(Context ctx, Value&& value, const ResultFn& result) -> tl::expected { + virtual auto ieval(Context ctx, Value&& value, const ResultFn& result) const -> tl::expected + { return ieval(ctx, value, result); } + ExprId id_; SourceLocation sourceLocation_; }; @@ -119,7 +126,7 @@ class AST ~AST(); - auto expr() const -> Expr& + auto expr() const -> const Expr& { return *expr_; } diff --git a/include/simfil/model/arena.h b/include/simfil/model/arena.h index 92ed45f..36af661 100644 --- a/include/simfil/model/arena.h +++ b/include/simfil/model/arena.h @@ -2,13 +2,20 @@ #pragma once +#include +#include #include #include +#include #include #include #include +#include +#include +#include +#include #include -#include +#include "simfil/model/column.h" #include "simfil/exception-handler.h" #include "simfil/error.h" @@ -24,26 +31,34 @@ namespace bitsery::ext { namespace simfil { -/// Address of an array within an ArrayArena -using ArrayIndex = int32_t; +/// Address of an array within an ArrayArena. Note, that only the lowest 3B may be +/// used. This is to allow passing ArrayIndex as the value of a ModelNodeAddress. +using ArrayIndex = uint32_t; /// Array index which can be used to indicate a default/invalid value. -constexpr static ArrayIndex InvalidArrayIndex = -1; +constexpr static ArrayIndex InvalidArrayIndex = 0x00ffffffu; +constexpr static ArrayIndex FirstRegularArrayIndex = 1u; +constexpr static ArrayIndex SingletonArrayHandleMask = 0x00800000u; +constexpr static ArrayIndex SingletonArrayHandlePayloadMask = 0x007fffffu; /** * ArrayArena - An arena allocator for append-only vectors. * - * The ArrayArena is a wrapper around a segmented_vector. It keeps track of - * forward-linked array chunks. When an array grows beyond the current capacity c - * of its current last chunk, a new chunk of size c*2 is allocated and becomes - * the new last chunk. This is then set as linked to the previous last chunk. - * Usually, appending will be lock-free, and only growth needs the lock. + * The ArrayArena is a wrapper around paged model columns. It keeps track of + * forward-linked array chunks for regular growable arrays, optional singleton + * handles for fixed-size 0-or-1 arrays, and an optional compact head + * representation used during serialization. Without ARRAY_ARENA_THREAD_SAFE, + * appending is lock-free. With it enabled, reads use shared locks while + * mutations take a write lock. * * @tparam ElementType_ The type of elements stored in the arrays. - * @tparam PageSize The number of elements that each segment in the - * segmented_vector can store. + * @tparam PageSize The number of elements that each storage page can store. */ -template +template < + class ElementType_, + size_t PageSize = 4096, + size_t ChunkPageSize = 4096, + typename SizeType_ = uint32_t> class ArrayArena { friend struct bitsery::ext::ArrayArenaExt; @@ -51,24 +66,90 @@ class ArrayArena public: using ElementType = ElementType_; using SizeType = SizeType_; + using DataStorage = ModelColumn; + using DataWriteRef = decltype(std::declval()[std::declval()]); + using DataReadRef = decltype(std::declval()[std::declval()]); + using AtValue = detail::arena_access_result_t; + using ConstAtValue = detail::arena_access_result_t; + + struct SingletonStats + { + size_t handleCount = 0; + size_t occupiedCount = 0; + size_t emptyCount = 0; + size_t singletonStorageBytes = 0; + size_t hypotheticalRegularBytes = 0; + size_t estimatedSavedBytes = 0; + }; + + struct CompactArrayChunk + { + MODEL_COLUMN_TYPE(8); + + std::uint32_t offset = 0; + std::uint32_t size = 0; + + template + void serialize(S& s) { + s.value4b(offset); + s.value4b(size); + } + }; + using CompactHeadStorage = ModelColumn; + + ArrayArena() + { + ensure_regular_head_pool(); + } + + static constexpr bool is_singleton_handle(ArrayIndex arrayIndex) + { + return arrayIndex != InvalidArrayIndex && + (arrayIndex & SingletonArrayHandleMask) != 0; + } + + static constexpr ArrayIndex singleton_payload(ArrayIndex handle) + { + return handle & SingletonArrayHandlePayloadMask; + } /** * Creates a new array with the specified initial capacity. * * @param initialCapacity The initial capacity of the new array. + * @param fixedSize If true, `initialCapacity == 1` creates a singleton handle + * instead of a growable chunk-backed array. * @return The index of the new array. */ - ArrayIndex new_array(size_t initialCapacity) + ArrayIndex new_array(size_t initialCapacity, bool fixedSize = false) { #ifdef ARRAY_ARENA_THREAD_SAFE std::unique_lock guard(lock_); #endif + ensure_runtime_heads_from_compact(); + + if (initialCapacity == 1U && fixedSize) { + auto singletonIndex = to_array_index(singletonValues_.size()); + if (singletonIndex > SingletonArrayHandlePayloadMask) { + raise("ArrayArena singleton pool exhausted."); + } + singletonValues_.emplace_back(ElementType_{}); + singletonOccupied_.emplace_back(static_cast(0)); + compactHeads_.reset(); + return SingletonArrayHandleMask | singletonIndex; + } + + ensure_regular_head_pool(); size_t offset = data_.size(); data_.resize(offset + initialCapacity); - auto index = static_cast(heads_.size()); + auto index = to_array_index(heads_.size()); + if ((index & SingletonArrayHandleMask) != 0) { + raise("ArrayArena regular head index exceeded handle bit range."); + } heads_.push_back({(SizeType_)offset, (SizeType_)initialCapacity, 0, InvalidArrayIndex, InvalidArrayIndex}); + compactHeads_.reset(); return index; } @@ -80,22 +161,110 @@ class ArrayArena #ifdef ARRAY_ARENA_THREAD_SAFE std::shared_lock guard(lock_); #endif + if (heads_.empty() && compactHeads_) + return compactHeads_->size(); return heads_.size(); } + [[nodiscard]] size_t singleton_handle_count() const + { + return singletonValues_.size(); + } + + [[nodiscard]] size_t singleton_occupied_count() const + { + size_t occupiedCount = 0; + for (auto const occupied : singletonOccupied_) { + occupiedCount += occupied == 0 ? 0 : 1; + } + return occupiedCount; + } + + [[nodiscard]] SingletonStats singleton_stats() const + { + const auto handleCount = singleton_handle_count(); + const auto occupiedCount = singleton_occupied_count(); + const auto emptyCount = handleCount >= occupiedCount ? handleCount - occupiedCount : 0; + + const auto singletonStorageBytes = + singletonValues_.byte_size() + singletonOccupied_.byte_size(); + const auto hypotheticalRegularBytes = + handleCount * sizeof(CompactArrayChunk) + occupiedCount * DataStorage::record_size; + + return SingletonStats{ + .handleCount = handleCount, + .occupiedCount = occupiedCount, + .emptyCount = emptyCount, + .singletonStorageBytes = singletonStorageBytes, + .hypotheticalRegularBytes = hypotheticalRegularBytes, + .estimatedSavedBytes = hypotheticalRegularBytes > singletonStorageBytes + ? hypotheticalRegularBytes - singletonStorageBytes + : 0}; + } + + [[nodiscard]] bool valid(ArrayIndex a) const + { + if (a == InvalidArrayIndex) { + return false; + } + if (is_singleton_handle(a)) { + auto singletonIndex = singleton_payload(a); + return singletonIndex < singletonValues_.size() && + singletonIndex < singletonOccupied_.size(); + } + if (heads_.empty() && compactHeads_) { + return a < compactHeads_->size(); + } + return a < heads_.size(); + } + /** * Returns the size of the specified array. * * @param a The index of the array. * @return The size of the array. */ - [[nodiscard]] SizeType_ size(ArrayIndex const& a) const { + [[nodiscard]] SizeType_ size(ArrayIndex a) const { #ifdef ARRAY_ARENA_THREAD_SAFE std::shared_lock guard(lock_); #endif + if (is_singleton_handle(a)) { + auto singletonIndex = singleton_payload(a); + if (singletonIndex >= singletonOccupied_.size()) { + raise("ArrayArena singleton handle index out of range."); + } + return singletonOccupied_.at(singletonIndex) == 0 ? 0 : 1; + } + + if (heads_.empty() && compactHeads_) { + if (a >= compactHeads_->size()) { + raise("ArrayArena head index out of range."); + } + return static_cast((*compactHeads_)[a].size); + } + if (a >= heads_.size()) { + raise("ArrayArena head index out of range."); + } return heads_[a].size; } + /** + * @return The current size, in bytes, of the array arena if serialized. + */ + [[nodiscard]] size_t byte_size() const { + auto singletonBytes = + singletonValues_.byte_size() + + singletonOccupied_.byte_size(); + if (heads_.empty() && compactHeads_) { + return compactHeads_->byte_size() + data_.byte_size() + singletonBytes; + } + auto result = heads_.size() * sizeof(CompactArrayChunk); + for (auto const& head : heads_) { + result += head.size * DataStorage::record_size; + } + return result + singletonBytes; + } + /** * Returns a reference to the element at the specified index in the array. * @@ -104,13 +273,13 @@ class ArrayArena * @return A reference to the element at the specified index. * @throws std::out_of_range if the index is out of the array bounds. */ - tl::expected, Error> - at(ArrayIndex const& a, size_t const& i) { - return at_impl(*this, a, i); + tl::expected + at(ArrayIndex a, size_t i) { + return at_impl(*this, a, i); } - tl::expected, Error> - at(ArrayIndex const& a, size_t const& i) const { - return at_impl(*this, a, i); + tl::expected + at(ArrayIndex a, size_t i) const { + return at_impl(*this, a, i); } /** @@ -120,17 +289,38 @@ class ArrayArena * @param data The element to be appended. * @return A reference to the appended element. */ - ElementType_& push_back(ArrayIndex const& a, ElementType_ const& data) + DataWriteRef push_back(ArrayIndex a, ElementType_ const& data) { - Chunk& updatedLast = ensure_capacity_and_get_last_chunk(a); + if (is_singleton_handle(a)) { + #ifdef ARRAY_ARENA_THREAD_SAFE + std::unique_lock guard(lock_); + #endif + auto singletonIndex = singleton_payload(a); + if (singletonIndex >= singletonValues_.size() || + singletonIndex >= singletonOccupied_.size()) { + raise("ArrayArena singleton handle index out of range."); + } + auto& occupied = singletonOccupied_.at(singletonIndex); + if (occupied != 0) { + raise( + "Cannot append more than one element to a singleton array handle."); + } + singletonValues_.at(singletonIndex) = data; + occupied = 1; + compactHeads_.reset(); + return singletonValues_.at(singletonIndex); + } + #ifdef ARRAY_ARENA_THREAD_SAFE - std::shared_lock guard(lock_); + std::unique_lock guard(lock_); #endif - auto& elem = data_[updatedLast.offset + updatedLast.size]; + Chunk& updatedLast = ensure_capacity_and_get_last_chunk_unlocked(a); + DataWriteRef elem = data_[updatedLast.offset + updatedLast.size]; elem = data; ++heads_[a].size; if (&heads_[a] != &updatedLast) ++updatedLast.size; + compactHeads_.reset(); return elem; } @@ -143,17 +333,38 @@ class ArrayArena * @return A reference to the appended element. */ template - ElementType_& emplace_back(ArrayIndex const& a, Args&&... args) + DataWriteRef emplace_back(ArrayIndex a, Args&&... args) { - Chunk& updatedLast = ensure_capacity_and_get_last_chunk(a); + if (is_singleton_handle(a)) { + #ifdef ARRAY_ARENA_THREAD_SAFE + std::unique_lock guard(lock_); + #endif + auto singletonIndex = singleton_payload(a); + if (singletonIndex >= singletonValues_.size() || + singletonIndex >= singletonOccupied_.size()) { + raise("ArrayArena singleton handle index out of range."); + } + auto& occupied = singletonOccupied_.at(singletonIndex); + if (occupied != 0) { + raise( + "Cannot append more than one element to a singleton array handle."); + } + singletonValues_.at(singletonIndex) = ElementType_(std::forward(args)...); + occupied = 1; + compactHeads_.reset(); + return singletonValues_.at(singletonIndex); + } + #ifdef ARRAY_ARENA_THREAD_SAFE - std::shared_lock guard(lock_); + std::unique_lock guard(lock_); #endif - auto& elem = data_[updatedLast.offset + updatedLast.size]; - new (&elem) ElementType_(std::forward(args)...); + Chunk& updatedLast = ensure_capacity_and_get_last_chunk_unlocked(a); + DataWriteRef elem = data_[updatedLast.offset + updatedLast.size]; + elem = ElementType_(std::forward(args)...); ++heads_[a].size; if (&heads_[a] != &updatedLast) ++updatedLast.size; + compactHeads_.reset(); return elem; } @@ -170,6 +381,10 @@ class ArrayArena heads_.clear(); continuations_.clear(); data_.clear(); + singletonValues_.clear(); + singletonOccupied_.clear(); + compactHeads_.reset(); + ensure_regular_head_pool(); } /** @@ -186,32 +401,51 @@ class ArrayArena heads_.shrink_to_fit(); continuations_.shrink_to_fit(); data_.shrink_to_fit(); + singletonValues_.shrink_to_fit(); + singletonOccupied_.shrink_to_fit(); + if (compactHeads_) { + compactHeads_->shrink_to_fit(); + } + } + + /** + * Check if the arena is currently represented by compact heads only. + */ + [[nodiscard]] bool is_compact() const { + return heads_.empty() && compactHeads_.has_value(); } // Iterator-related types and functions template class ArrayIterator; - class ArrayRange; using iterator = ArrayIterator; using const_iterator = ArrayIterator; + template + class BasicArrayRange; + template + class BasicArrayArenaIterator; + using arena_iterator = BasicArrayArenaIterator; + using const_arena_iterator = BasicArrayArenaIterator; template class ArrayIterator { using ArrayArenaRef = std::conditional_t; - using ElementRef = std::conditional_t; - friend class ArrayRange; + using AtExpected = decltype(std::declval().at(std::declval(), std::declval())); + using ElementAccess = std::remove_cvref_t().value())>; + template + friend class BasicArrayRange; public: using iterator_category = std::input_iterator_tag; using value_type = T; using difference_type = std::ptrdiff_t; - using pointer = value_type*; - using reference = ElementRef; + using pointer = void; + using reference = ElementAccess; ArrayIterator(ArrayArenaRef arena, ArrayIndex array_index, size_t elem_index) : arena_(arena), array_index_(array_index), elem_index_(elem_index) {} - ElementRef operator*() noexcept { + reference operator*() noexcept { auto res = arena_.at(array_index_, elem_index_); assert(res); // Unchecked access! @@ -239,105 +473,168 @@ class ArrayArena size_t elem_index_; }; - class ArrayRange + template + class BasicArrayRange { public: - ArrayRange(iterator begin, iterator end) : begin_(begin), end_(end) {} + using element_iterator = std::conditional_t; + + BasicArrayRange(element_iterator begin, element_iterator end) + : begin_(begin), end_(end) + { + } - iterator begin() const { return begin_; } - iterator end() const { return end_; } + element_iterator begin() const { return begin_; } + element_iterator end() const { return end_; } [[nodiscard]] size_t size() const { return begin_.arena_.size(begin_.array_index_); } - decltype(auto) operator[] (size_t const& i) const { return begin_.arena_.at(begin_.array_index_, i); } + decltype(auto) operator[](size_t i) const { return begin_.arena_.at(begin_.array_index_, i); } private: - iterator begin_; - iterator end_; + element_iterator begin_; + element_iterator end_; }; - class ArrayArenaIterator + template + class BasicArrayArenaIterator { public: - ArrayArenaIterator(ArrayArena& arena, ArrayIndex index) - : arena_(arena), index_(index) {} + using ArrayArenaRef = std::conditional_t; + using element_iterator = std::conditional_t; + using value_type = BasicArrayRange; + using difference_type = std::ptrdiff_t; + using pointer = void; + using reference = value_type; + using iterator_category = std::input_iterator_tag; + + BasicArrayArenaIterator(ArrayArenaRef arena, size_t ordinal) + : arena_(arena), + ordinal_(ordinal) + { + update_array_index(); + } - iterator begin() { return arena_.begin(index_); } - iterator end() { return arena_.end(index_); } - const_iterator begin() const { return arena_.begin(index_); } - const_iterator end() const { return arena_.end(index_); } + element_iterator begin() const { return arena_.begin(index_); } + element_iterator end() const { return arena_.end(index_); } - ArrayRange operator*() { - return ArrayRange(arena_.begin(index_), arena_.end(index_)); + value_type operator*() const + { + return value_type(begin(), end()); } - ArrayArenaIterator& operator++() { - ++index_; + BasicArrayArenaIterator& operator++() { + ++ordinal_; + update_array_index(); return *this; } - bool operator==(const ArrayArenaIterator& other) const { - return &arena_ == &other.arena_ && index_ == other.index_; + bool operator==(const BasicArrayArenaIterator& other) const { + return &arena_ == &other.arena_ && ordinal_ == other.ordinal_; } - bool operator!=(const ArrayArenaIterator& other) const { + bool operator!=(const BasicArrayArenaIterator& other) const { return !(*this == other); // NOLINT } - using iterator_category = std::input_iterator_tag; - using value_type = ArrayRange; - using difference_type = std::ptrdiff_t; - using pointer = value_type*; - using reference = value_type&; - private: - ArrayArena& arena_; + [[nodiscard]] size_t regular_array_count() const + { + if (arena_.heads_.empty() && arena_.compactHeads_) { + return arena_.compactHeads_->size(); + } + return arena_.heads_.size(); + } + + [[nodiscard]] size_t visible_regular_array_count() const + { + const auto count = regular_array_count(); + return count > FirstRegularArrayIndex ? count - FirstRegularArrayIndex : 0; + } + + [[nodiscard]] size_t total_visible_array_count() const + { + return visible_regular_array_count() + arena_.singleton_handle_count(); + } + + void update_array_index() + { + const auto regularCount = visible_regular_array_count(); + if (ordinal_ < regularCount) { + index_ = to_array_index(FirstRegularArrayIndex + ordinal_); + return; + } + + if (auto const singletonOrdinal = ordinal_ - regularCount; + ordinal_ < total_visible_array_count() && + singletonOrdinal <= SingletonArrayHandlePayloadMask) { + index_ = SingletonArrayHandleMask | to_array_index(singletonOrdinal); + return; + } + + index_ = InvalidArrayIndex; + } + + ArrayArenaRef arena_; + size_t ordinal_ = 0; ArrayIndex index_; }; - iterator begin(ArrayIndex const& a) { return iterator(*this, a, 0); } - iterator end(ArrayIndex const& a) { return iterator(*this, a, size(a)); } - const_iterator begin(ArrayIndex const& a) const { return const_iterator(*this, a, 0); } - const_iterator end(ArrayIndex const& a) const { return const_iterator(*this, a, size(a)); } + iterator begin(ArrayIndex a) { return iterator(*this, a, 0); } + iterator end(ArrayIndex a) { return iterator(*this, a, size(a)); } + const_iterator begin(ArrayIndex a) const { return const_iterator(*this, a, 0); } + const_iterator end(ArrayIndex a) const { return const_iterator(*this, a, size(a)); } - ArrayArenaIterator begin() { return ArrayArenaIterator(*this, 0); } - ArrayArenaIterator end() { return ArrayArenaIterator(*this, static_cast(heads_.size())); } - ArrayArenaIterator begin() const { return ArrayArenaIterator(*this, 0); } - ArrayArenaIterator end() const { return ArrayArenaIterator(*this, static_cast(heads_.size())); } + arena_iterator begin() { return arena_iterator(*this, 0); } + arena_iterator end() + { + const auto regularCount = size(); + const auto visibleRegularCount = regularCount > FirstRegularArrayIndex + ? regularCount - FirstRegularArrayIndex + : 0; + return arena_iterator(*this, visibleRegularCount + singleton_handle_count()); + } + const_arena_iterator begin() const + { + return const_arena_iterator(*this, 0); + } + const_arena_iterator end() const + { + const auto regularCount = size(); + const auto visibleRegularCount = regularCount > FirstRegularArrayIndex + ? regularCount - FirstRegularArrayIndex + : 0; + return const_arena_iterator( + *this, + visibleRegularCount + singleton_handle_count()); + } - ArrayRange range(ArrayIndex const& array) {return ArrayRange(begin(array), end(array));} + BasicArrayRange range(ArrayIndex array) { return BasicArrayRange(begin(array), end(array)); } + BasicArrayRange range(ArrayIndex array) const { return BasicArrayRange(begin(array), end(array)); } /// Support fast iteration via callback. The passed lambda needs to return true, /// as long as the iteration is supposed to continue. template - void iterate(ArrayIndex const& a, Func&& lambda) + void iterate(ArrayIndex a, Func&& lambda) { - Chunk const* current = &heads_[a]; - size_t globalIndex = 0; - while (current != nullptr) - { - for (size_t i = 0; i < current->size && i < current->capacity; ++i) - { - if constexpr (std::is_invocable_r_v) { - // If lambda returns bool, break if it returns false - if (!lambda(data_[current->offset + i])) - return; - } - else if constexpr (std::is_invocable_v) { - // If lambda takes two arguments, pass the current index - lambda(data_[current->offset + i], globalIndex); - } - else - lambda(data_[current->offset + i]); - ++globalIndex; - } - current = (current->next != InvalidArrayIndex) ? &continuations_[current->next] : nullptr; + if (is_singleton_handle(a)) { + iterate_singleton(a, std::forward(lambda)); + return; + } + + if (heads_.empty() && compactHeads_) { + iterate_compact(a, std::forward(lambda)); + return; } + + iterate_chunked(a, std::forward(lambda)); } private: // Represents a chunk of an array in the arena. struct Chunk { - SizeType_ offset = 0; // The starting offset of the chunk in the segmented_vector. + MODEL_COLUMN_TYPE((sizeof(SizeType_) * 3) + (sizeof(ArrayIndex) * 2)); + + SizeType_ offset = 0; // The starting offset of the chunk in the storage buffer. SizeType_ capacity = 0; // The maximum number of elements the chunk can hold. SizeType_ size = 0; // The current number of elements in the chunk, // or the total number of elements of the whole array if this is a head chunk. @@ -346,14 +643,137 @@ class ArrayArena ArrayIndex last = InvalidArrayIndex; // The index of the last chunk in the sequence, or InvalidArrayIndex if none. }; - sfl::segmented_vector heads_; // Head chunks of all arrays. - sfl::segmented_vector continuations_; // Continuation chunks of all arrays. - sfl::segmented_vector data_; // The underlying segmented_vector storing the array elements. + ModelColumn heads_; // Head chunks of all arrays. + ModelColumn continuations_; // Continuation chunks of all arrays. + DataStorage data_; // Underlying element storage. + DataStorage singletonValues_; + ModelColumn singletonOccupied_; + std::optional compactHeads_; #ifdef ARRAY_ARENA_THREAD_SAFE mutable std::shared_mutex lock_; // Mutex for synchronizing access to the data structure during growth. #endif + static ArrayIndex to_array_index(size_t value) + { + if (value > std::numeric_limits::max()) { + raise("ArrayArena index exceeds address space."); + } + return static_cast(value); + } + + template + static bool invoke_iter_callback(Func&& lambda, Value&& value, size_t index) + { + using Arg = decltype(value); + if constexpr (std::is_invocable_r_v) { + return lambda(std::forward(value)); + } else if constexpr (std::is_invocable_v) { + lambda(std::forward(value), index); + return true; + } else if constexpr (std::is_invocable_v) { + lambda(std::forward(value)); + return true; + } else { + static_assert( + std::is_invocable_v, + "ArrayArena::iterate callback must accept (value) or (value, index), optionally returning bool"); + return false; + } + } + + template + void iterate_singleton(ArrayIndex a, Func&& lambda) + { + auto singletonIndex = singleton_payload(a); + if (singletonIndex >= singletonValues_.size() || + singletonIndex >= singletonOccupied_.size()) { + raise("ArrayArena singleton handle index out of range."); + } + if (singletonOccupied_.at(singletonIndex) == 0) { + return; + } + + decltype(auto) value = singletonValues_.at(singletonIndex); + invoke_iter_callback(lambda, value, 0); + } + + template + void iterate_compact(ArrayIndex a, Func&& lambda) + { + if (a >= compactHeads_->size()) { + raise("ArrayArena head index out of range."); + } + + auto const& compact = (*compactHeads_)[a]; + for (size_t i = 0; i < static_cast(compact.size); ++i) + { + decltype(auto) value = data_[static_cast(compact.offset) + i]; + if (!invoke_iter_callback(lambda, value, i)) { + return; + } + } + } + + template + void iterate_chunked(ArrayIndex a, Func&& lambda) + { + if (a >= heads_.size()) { + raise("ArrayArena head index out of range."); + } + + Chunk const* current = &heads_[a]; + size_t globalIndex = 0; + while (current != nullptr) + { + for (size_t i = 0; i < current->size && i < current->capacity; ++i) + { + decltype(auto) value = data_[current->offset + i]; + if (!invoke_iter_callback(lambda, value, globalIndex)) { + return; + } + ++globalIndex; + } + current = (current->next != InvalidArrayIndex) + ? &continuations_[current->next] + : nullptr; + } + } + + void ensure_regular_head_pool() + { + if (!heads_.empty()) { + return; + } + heads_.push_back({ + 0, + 0, + 0, + InvalidArrayIndex, + InvalidArrayIndex + }); + } + + void ensure_runtime_heads_from_compact() + { + if (!heads_.empty() || !compactHeads_) + return; + + heads_.clear(); + heads_.reserve(compactHeads_->size()); + continuations_.clear(); + for (auto const& compactHead : *compactHeads_) { + heads_.push_back({ + static_cast(compactHead.offset), + static_cast(compactHead.size), + static_cast(compactHead.size), + InvalidArrayIndex, + InvalidArrayIndex + }); + } + ensure_regular_head_pool(); + } + /** * Ensures that the specified array has enough capacity to add one more element * and returns a reference to the last chunk in the array. @@ -364,19 +784,22 @@ class ArrayArena * @param a The index of the array. * @return A reference to the last chunk of the array, after ensuring there's capacity. */ - Chunk& ensure_capacity_and_get_last_chunk(ArrayIndex const& a) + // Caller must hold the write lock when ARRAY_ARENA_THREAD_SAFE is enabled. + Chunk& ensure_capacity_and_get_last_chunk_unlocked(ArrayIndex a) { - #ifdef ARRAY_ARENA_THREAD_SAFE - std::shared_lock read_guard(lock_); - #endif + if (is_singleton_handle(a)) { + raise("Singleton handles do not use chunk growth."); + } + + ensure_runtime_heads_from_compact(); + ensure_regular_head_pool(); + if (a >= heads_.size()) { + raise("ArrayArena head index out of range."); + } Chunk& head = heads_[a]; Chunk& last = (head.last == InvalidArrayIndex) ? head : continuations_[head.last]; if (last.size < last.capacity) return last; - #ifdef ARRAY_ARENA_THREAD_SAFE - read_guard.unlock(); - std::unique_lock guard(lock_); - #endif size_t offset = data_.size(); size_t newCapacity = std::max((SizeType_)2, (SizeType_)last.capacity * 2); data_.resize(offset + newCapacity); @@ -385,25 +808,53 @@ class ArrayArena head.capacity = static_cast(newCapacity); return head; } - auto newIndex = static_cast(continuations_.size()); + auto newIndex = to_array_index(continuations_.size()); continuations_.push_back({(SizeType_)offset, (SizeType_)newCapacity, 0, InvalidArrayIndex, InvalidArrayIndex}); last.next = newIndex; head.last = newIndex; return continuations_[newIndex]; } - template - static tl::expected, Error> - at_impl(Self& self, ArrayIndex const& a, size_t const& i) + template + static tl::expected + at_impl(Self& self, ArrayIndex a, size_t i) { #ifdef ARRAY_ARENA_THREAD_SAFE std::shared_lock guard(self.lock_); #endif + if (is_singleton_handle(a)) { + auto singletonIndex = singleton_payload(a); + if (singletonIndex >= self.singletonValues_.size() || + singletonIndex >= self.singletonOccupied_.size()) { + return tl::unexpected(Error::IndexOutOfRange, "singleton handle index out of range"); + } + if (self.singletonOccupied_.at(singletonIndex) == 0 || i > 0) { + return tl::unexpected(Error::IndexOutOfRange, "index out of range"); + } + return detail::arena_access_wrap(self.singletonValues_.at(singletonIndex)); + } + + if (self.heads_.empty() && self.compactHeads_) { + if (a >= self.compactHeads_->size()) { + return tl::unexpected(Error::IndexOutOfRange, "array index out of range"); + } + auto const& compact = (*self.compactHeads_)[a]; + if (i < static_cast(compact.size)) { + return detail::arena_access_wrap(self.data_[static_cast(compact.offset) + i]); + } + return tl::unexpected(Error::IndexOutOfRange, "index out of range"); + } + + if (a >= self.heads_.size()) { + return tl::unexpected(Error::IndexOutOfRange, "array index out of range"); + } + typename Self::Chunk const* current = &self.heads_[a]; size_t remaining = i; while (true) { - if (remaining < current->capacity && remaining < current->size) - return self.data_[current->offset + remaining]; + if (remaining < current->capacity && remaining < current->size) { + return detail::arena_access_wrap(self.data_[current->offset + remaining]); + } if (current->next == InvalidArrayIndex) return tl::unexpected(Error::IndexOutOfRange, "index out of range"); remaining -= current->capacity; diff --git a/include/simfil/model/bitsery-traits.h b/include/simfil/model/bitsery-traits.h index 7c306d4..e3b39e6 100644 --- a/include/simfil/model/bitsery-traits.h +++ b/include/simfil/model/bitsery-traits.h @@ -1,7 +1,9 @@ #pragma once +#include #include #include +#include #include #include #include @@ -55,31 +57,87 @@ struct ArrayArenaExt template void serialize(S& s, simfil::ArrayArena const& arena, Fnc&& fnc) const { - auto numArrays = static_cast(arena.heads_.size()); - s.value4b(numArrays); - for (simfil::ArrayIndex i = 0; i < numArrays; ++i) { - auto size = arena.size(i); - s.value4b(size); - for (size_t j = 0; j < size; ++j) { - if (auto value = arena.at(i, j)) - fnc(s, const_cast(value->get())); - else - raise(std::move(value.error())); // Bitsery does not support propagating errors + (void)fnc; + + // If the arena is already compact, we can simply dump out heads and data + if (arena.is_compact()) { + s.object(*arena.compactHeads_); + s.object(arena.data_); + s.object(arena.singletonValues_); + s.object(arena.singletonOccupied_); + return; + } + + // Otherwise: Build compact temporary heads/data, then serialize those buffers. + using CompactHeadsStorage = typename simfil::ArrayArena::CompactHeadStorage; + using DataStorage = typename std::remove_cv_t>; + using CompactChunk = typename simfil::ArrayArena::CompactArrayChunk; + + CompactHeadsStorage compactHeads; + DataStorage compactData; + compactHeads.reserve(arena.heads_.size()); + + size_t totalElements = 0; + for (auto const& head : arena.heads_) { + totalElements += head.size; + } + compactData.resize(totalElements); + + size_t writeIndex = 0; + size_t packedOffset = 0; + for (auto const& head : arena.heads_) { + compactHeads.push_back(CompactChunk{ + static_cast(packedOffset), + static_cast(head.size) + }); + + auto const* current = &head; + size_t remaining = head.size; + while (current != nullptr && remaining > 0) { + size_t chunkUsed = 0; + if (current == &head) { + chunkUsed = std::min(head.capacity, remaining); + } else { + chunkUsed = std::min(current->size, remaining); + } + + for (size_t i = 0; i < chunkUsed; ++i) { + compactData[writeIndex++] = arena.data_[current->offset + i]; + } + remaining -= chunkUsed; + current = (current->next != simfil::InvalidArrayIndex) + ? &arena.continuations_[static_cast(current->next)] + : nullptr; } + packedOffset += head.size; } + + s.object(compactHeads); + s.object(compactData); + s.object(arena.singletonValues_); + s.object(arena.singletonOccupied_); } template void deserialize(S& s, simfil::ArrayArena& arena, Fnc&& fnc) const { - simfil::ArrayIndex numArrays; - s.value4b(numArrays); - for (simfil::ArrayIndex i = 0; i < numArrays; ++i) { - typename std::decay_t::SizeType size; - s.value4b(size); - auto arrayIndex = arena.new_array(size); - for (size_t j = 0; j < size; ++j) - fnc(s, arena.emplace_back(arrayIndex)); + (void)fnc; + using CompactHeadsStorage = typename simfil::ArrayArena::CompactHeadStorage; + + CompactHeadsStorage compactHeads; + s.object(compactHeads); + s.object(arena.data_); + s.object(arena.singletonValues_); + s.object(arena.singletonOccupied_); + + arena.heads_.clear(); + arena.continuations_.clear(); + arena.compactHeads_ = std::move(compactHeads); + if (arena.singletonOccupied_.size() < arena.singletonValues_.size()) { + auto const missing = arena.singletonValues_.size() - arena.singletonOccupied_.size(); + for (size_t i = 0; i < missing; ++i) { + arena.singletonOccupied_.emplace_back(static_cast(1)); + } } } }; @@ -90,7 +148,7 @@ namespace traits { template struct ExtensionTraits { - using TValue = typename T::ElementType; + using TValue = void; static constexpr bool SupportValueOverload = true; static constexpr bool SupportObjectOverload = true; static constexpr bool SupportLambdaOverload = true; diff --git a/include/simfil/model/column.h b/include/simfil/model/column.h new file mode 100644 index 0000000..e7a38ed --- /dev/null +++ b/include/simfil/model/column.h @@ -0,0 +1,914 @@ +// Copyright (c) Navigation Data Standard e.V. - See "LICENSE" file. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace simfil +{ + +static_assert( + std::endian::native == std::endian::little, + "simfil::ModelColumn currently supports little-endian hosts only"); + +enum class model_column_io_error +{ + payload_size_mismatch, +}; + +#ifndef MODEL_COLUMN_TYPE +#define MODEL_COLUMN_TYPE(expected_size) \ + using IsModelColumnType = void; \ + static constexpr std::size_t model_column_expected_size = expected_size +#endif + +/** + * Logical pair type for split column storage. + * + * `TwoPart` behaves like a plain aggregate pair, but + * `ModelColumn>` stores both halves in separate child columns. + * This keeps the wire layout dense and avoids struct-padding overhead for + * common compound records such as `{StringId, ModelNodeAddress}`. + */ +template +struct TwoPart +{ + using first_type = std::remove_cv_t; + using second_type = std::remove_cv_t; + + first_type first_{}; + second_type second_{}; + + TwoPart() = default; + + template + requires std::constructible_from && + std::constructible_from + TwoPart(A&& first, B&& second) + : first_(std::forward(first)), second_(std::forward(second)) + { + } + + [[nodiscard]] first_type& first() noexcept { return first_; } + [[nodiscard]] first_type const& first() const noexcept { return first_; } + [[nodiscard]] second_type& second() noexcept { return second_; } + [[nodiscard]] second_type const& second() const noexcept { return second_; } + + bool operator==(TwoPart const&) const = default; +}; + +namespace detail +{ + +template +std::span as_u8_span(std::span values) +{ + auto const bytes = std::as_writable_bytes(values); + return { + std::bit_cast(bytes.data()), + bytes.size() + }; +} + +template +std::span as_u8_span(std::span values) +{ + auto const bytes = std::as_bytes(values); + return { + std::bit_cast(bytes.data()), + bytes.size() + }; +} + +template +struct is_two_part : std::false_type +{}; + +template +struct is_two_part> : std::true_type +{}; + +template +concept two_part_type = is_two_part>::value; + +template +struct has_model_column_tag_trait : std::false_type +{}; + +template +struct has_model_column_tag_trait< + T, + std::void_t> + : std::true_type +{}; + +template +concept model_column_tagged = + has_model_column_tag_trait>::value; + +template +struct is_model_column_external_type : std::false_type +{}; + +template +concept model_column_external_type = + is_model_column_external_type>::value; + +template +concept fixed_width_integer = + std::same_as, std::int8_t> || + std::same_as, std::uint8_t> || + std::same_as, std::int16_t> || + std::same_as, std::uint16_t> || + std::same_as, std::int32_t> || + std::same_as, std::uint32_t> || + std::same_as, std::int64_t> || + std::same_as, std::uint64_t>; + +template +concept fixed_width_enum = + std::is_enum_v> && + fixed_width_integer>>; + +template +concept scalar_model_column_type = + std::same_as, bool> || + std::same_as, float> || + std::same_as, double> || + fixed_width_integer || + fixed_width_enum; + +template +concept native_pod_wire_candidate = + std::is_trivially_copyable_v> && + std::is_standard_layout_v>; + +template +constexpr std::size_t expected_model_column_sizeof() +{ + using U = std::remove_cv_t; + if constexpr (model_column_tagged) { + return U::model_column_expected_size; + } else { + return sizeof(U); + } +} + +template +struct segmented_storage_page_elements +{ + static_assert(T_PageBytes > 0, "page size must be greater than zero"); + static_assert( + (T_PageBytes % sizeof(TValue)) == 0, + "page size must be a multiple of element size"); + static constexpr std::size_t value = T_PageBytes / sizeof(TValue); +}; + +template +concept bitsery_input_archive = + requires(S& archive, bitsery::ReaderError error) { + archive.adapter().error(); + archive.adapter().error(error); + }; + +template +void mark_bitsery_invalid_data(S& archive) +{ + if constexpr (bitsery_input_archive) { + archive.adapter().error(bitsery::ReaderError::InvalidData); + } +} + +template