diff --git a/ci/appveyor-cpp-setup-mingw.bat b/ci/appveyor-cpp-setup-mingw.bat index d65b9bd4d20..4e4ae757985 100644 --- a/ci/appveyor-cpp-setup-mingw.bat +++ b/ci/appveyor-cpp-setup-mingw.bat @@ -32,6 +32,7 @@ pacman --sync --refresh --noconfirm ^ "%MINGW_PACKAGE_PREFIX%-flatbuffers" ^ "%MINGW_PACKAGE_PREFIX%-gflags" ^ "%MINGW_PACKAGE_PREFIX%-gobject-introspection" ^ + "%MINGW_PACKAGE_PREFIX%-gtest" ^ "%MINGW_PACKAGE_PREFIX%-gtk-doc" ^ "%MINGW_PACKAGE_PREFIX%-lz4" ^ "%MINGW_PACKAGE_PREFIX%-meson" ^ diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 3a6605a035e..8b81af3c4a5 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -320,26 +320,14 @@ arrow_add_pkg_config("arrow") # add_arrow_test(allocator-test) - -if(WIN32) - add_arrow_test(array-test - SOURCES - array-test.cc - array-binary-test.cc - array-list-test.cc - array-struct-test.cc - array-union-test.cc) -else() - add_arrow_test(array-test - SOURCES - array-test.cc - array-binary-test.cc - array-dict-test.cc - array-list-test.cc - array-struct-test.cc - array-union-test.cc) -endif() - +add_arrow_test(array-test + SOURCES + array-test.cc + array-binary-test.cc + array-dict-test.cc + array-list-test.cc + array-struct-test.cc + array-union-test.cc) add_arrow_test(buffer-test) if(ARROW_IPC) diff --git a/cpp/src/arrow/array/builder_dict.cc b/cpp/src/arrow/array/builder_dict.cc index 2e43234ae11..8af36280d0d 100644 --- a/cpp/src/arrow/array/builder_dict.cc +++ b/cpp/src/arrow/array/builder_dict.cc @@ -17,7 +17,6 @@ #include "arrow/array/builder_dict.h" -#include #include #include #include @@ -145,229 +144,173 @@ Status DictionaryType::Unify(MemoryPool* pool, const std::vector -class DictionaryBuilder::MemoTableImpl - : public internal::HashTraits::MemoTableType { - public: - using MemoTableType = typename internal::HashTraits::MemoTableType; - using MemoTableType::MemoTableType; - - MemoTableImpl(const std::shared_ptr& dictionary) - : MemoTableImpl(dictionary->length()) { - const auto& values = - static_cast::ArrayType&>(*dictionary); - for (int64_t i = 0; i < values.length(); ++i) { - ARROW_IGNORE_EXPR(this->GetOrInsert(values.GetView(i))); +class internal::DictionaryMemoTable::DictionaryMemoTableImpl { + struct MemoTableInitializer { + std::shared_ptr value_type_; + std::unique_ptr* memo_table_; + + Status Visit(const DataType&, void* = nullptr) { + return Status::NotImplemented("Initialization of ", value_type_, + " memo table is not implemented"); } - } -}; -template -DictionaryBuilder::~DictionaryBuilder() {} - -template -DictionaryBuilder::DictionaryBuilder(const std::shared_ptr& dictionary, - MemoryPool* pool) - : ArrayBuilder(dictionary->type(), pool), - memo_table_(new MemoTableImpl(dictionary)), - delta_offset_(0), - byte_width_(-1), - values_builder_(pool) { - DCHECK_EQ(T::type_id, type_->id()) << "inconsistent type passed to DictionaryBuilder"; -} + template + Status Visit(const T&, + typename internal::DictionaryTraits::MemoTableType* = nullptr) { + using MemoTable = typename internal::DictionaryTraits::MemoTableType; + memo_table_->reset(new MemoTable(0)); + return Status::OK(); + } + }; -template -DictionaryBuilder::DictionaryBuilder(const std::shared_ptr& type, - MemoryPool* pool) - : ArrayBuilder(type, pool), - memo_table_(new MemoTableImpl(0)), - delta_offset_(0), - byte_width_(-1), - values_builder_(pool) { - DCHECK_EQ(T::type_id, type->id()) << "inconsistent type passed to DictionaryBuilder"; -} + struct ArrayValuesInserter { + DictionaryMemoTableImpl* impl_; -DictionaryBuilder::DictionaryBuilder(const std::shared_ptr& type, - MemoryPool* pool) - : ArrayBuilder(type, pool), values_builder_(pool) { - DCHECK_EQ(Type::NA, type->id()) << "inconsistent type passed to DictionaryBuilder"; -} + template + Status Visit(const T& array) { + return InsertValues(array.type(), array); + } -DictionaryBuilder::DictionaryBuilder(const std::shared_ptr& dictionary, - MemoryPool* pool) - : ArrayBuilder(dictionary->type(), pool), values_builder_(pool) { - DCHECK_EQ(Type::NA, type_->id()) << "inconsistent type passed to DictionaryBuilder"; -} + private: + template + Status InsertValues(const DataType& type, const Array&, void* = nullptr) { + return Status::NotImplemented("Inserting array values of ", type, + " is not implemented"); + } -template <> -DictionaryBuilder::DictionaryBuilder( - const std::shared_ptr& type, MemoryPool* pool) - : ArrayBuilder(type, pool), - memo_table_(new MemoTableImpl(0)), - delta_offset_(0), - byte_width_(checked_cast(*type).byte_width()) {} - -template -void DictionaryBuilder::Reset() { - ArrayBuilder::Reset(); - values_builder_.Reset(); - memo_table_.reset(new MemoTableImpl(0)); - delta_offset_ = 0; -} + template + Status InsertValues( + const DataType&, const Array& array, + typename internal::DictionaryTraits::MemoTableType* = nullptr) { + for (int64_t i = 0; i < array.length(); ++i) { + ARROW_IGNORE_EXPR(impl_->GetOrInsert(array.GetView(i))); + } + return Status::OK(); + } + }; + + struct ArrayDataGetter { + std::shared_ptr value_type_; + MemoTable* memo_table_; + MemoryPool* pool_; + int64_t start_offset_; + std::shared_ptr* out_; + + Status Visit(const DataType&, void* = nullptr) { + return Status::NotImplemented("Getting array data of ", value_type_, + " is not implemented"); + } -template -Status DictionaryBuilder::Resize(int64_t capacity) { - RETURN_NOT_OK(CheckCapacity(capacity, capacity_)); - capacity = std::max(capacity, kMinBuilderCapacity); + template + Status Visit(const T&, + typename internal::DictionaryTraits::MemoTableType* = nullptr) { + using ConcreteMemoTable = typename internal::DictionaryTraits::MemoTableType; + auto memo_table = static_cast(memo_table_); + return internal::DictionaryTraits::GetDictionaryArrayData( + pool_, value_type_, *memo_table, start_offset_, out_); + } + }; - if (capacity_ == 0) { - // Initialize hash table - // XXX should we let the user pass additional size heuristics? - delta_offset_ = 0; + public: + explicit DictionaryMemoTableImpl(const std::shared_ptr& type) + : type_(type), memo_table_(nullptr) { + MemoTableInitializer visitor{type_, &memo_table_}; + ARROW_IGNORE_EXPR(VisitTypeInline(*type_, &visitor)); } - RETURN_NOT_OK(values_builder_.Resize(capacity)); - capacity_ = values_builder_.capacity(); - return Status::OK(); -} - -Status DictionaryBuilder::Resize(int64_t capacity) { - RETURN_NOT_OK(CheckCapacity(capacity, capacity_)); - capacity = std::max(capacity, kMinBuilderCapacity); - RETURN_NOT_OK(values_builder_.Resize(capacity)); - capacity_ = values_builder_.capacity(); - return Status::OK(); -} + Status InsertValues(const std::shared_ptr& array) { + ArrayValuesInserter visitor{this}; + return VisitArrayInline(*array, &visitor); + } -template -Status DictionaryBuilder::Append(const Scalar& value) { - RETURN_NOT_OK(Reserve(1)); + template + int32_t GetOrInsert(const T& value) { + using ConcreteMemoTable = typename internal::DictionaryTraits< + typename CTypeTraits::ArrowType>::MemoTableType; + return static_cast(memo_table_.get())->GetOrInsert(value); + } - auto memo_index = memo_table_->GetOrInsert(value); - RETURN_NOT_OK(values_builder_.Append(memo_index)); - length_ += 1; + int32_t GetOrInsert(const util::string_view& value) { + return static_cast(memo_table_.get())->GetOrInsert(value); + } - return Status::OK(); -} + Status GetArrayData(MemoryPool* pool, int64_t start_offset, + std::shared_ptr* out) { + ArrayDataGetter visitor{type_, memo_table_.get(), pool, start_offset, out}; + return VisitTypeInline(*type_, &visitor); + } -template -Status DictionaryBuilder::AppendNull() { - length_ += 1; - null_count_ += 1; + int32_t size() const { return memo_table_->size(); } - return values_builder_.AppendNull(); -} + private: + std::shared_ptr type_; + std::unique_ptr memo_table_; +}; -template -Status DictionaryBuilder::AppendNulls(int64_t length) { - length_ += length; - null_count_ += length; +internal::DictionaryMemoTable::DictionaryMemoTable(const std::shared_ptr& type) + : impl_(new DictionaryMemoTableImpl(type)) {} - return values_builder_.AppendNulls(length); +internal::DictionaryMemoTable::DictionaryMemoTable( + const std::shared_ptr& dictionary) + : impl_(new DictionaryMemoTableImpl(dictionary->type())) { + ARROW_IGNORE_EXPR(impl_->InsertValues(dictionary)); } -Status DictionaryBuilder::AppendNull() { - length_ += 1; - null_count_ += 1; +internal::DictionaryMemoTable::~DictionaryMemoTable() = default; - return values_builder_.AppendNull(); +int32_t internal::DictionaryMemoTable::GetOrInsert(const bool& value) { + return impl_->GetOrInsert(value); } -Status DictionaryBuilder::AppendNulls(int64_t length) { - length_ += length; - null_count_ += length; - - return values_builder_.AppendNulls(length); +int32_t internal::DictionaryMemoTable::GetOrInsert(const int8_t& value) { + return impl_->GetOrInsert(value); } -template -Status DictionaryBuilder::AppendArray(const Array& array) { - using ArrayType = typename TypeTraits::ArrayType; - - const auto& concrete_array = checked_cast(array); - for (int64_t i = 0; i < array.length(); i++) { - if (array.IsNull(i)) { - RETURN_NOT_OK(AppendNull()); - } else { - RETURN_NOT_OK(Append(concrete_array.GetView(i))); - } - } - return Status::OK(); +int32_t internal::DictionaryMemoTable::GetOrInsert(const int16_t& value) { + return impl_->GetOrInsert(value); } -template <> -Status DictionaryBuilder::AppendArray(const Array& array) { - if (!type_->Equals(*array.type())) { - return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type"); - } - - const auto& typed_array = checked_cast(array); - for (int64_t i = 0; i < array.length(); i++) { - if (array.IsNull(i)) { - RETURN_NOT_OK(AppendNull()); - } else { - RETURN_NOT_OK(Append(typed_array.GetValue(i))); - } - } - return Status::OK(); +int32_t internal::DictionaryMemoTable::GetOrInsert(const int32_t& value) { + return impl_->GetOrInsert(value); } -Status DictionaryBuilder::AppendArray(const Array& array) { - for (int64_t i = 0; i < array.length(); i++) { - RETURN_NOT_OK(AppendNull()); - } - return Status::OK(); +int32_t internal::DictionaryMemoTable::GetOrInsert(const int64_t& value) { + return impl_->GetOrInsert(value); } -template -Status DictionaryBuilder::FinishInternal(std::shared_ptr* out) { - // Finalize indices array - RETURN_NOT_OK(values_builder_.FinishInternal(out)); - - // Generate dictionary array from hash table contents - std::shared_ptr dictionary; - std::shared_ptr dictionary_data; +int32_t internal::DictionaryMemoTable::GetOrInsert(const uint8_t& value) { + return impl_->GetOrInsert(value); +} - RETURN_NOT_OK(internal::DictionaryTraits::GetDictionaryArrayData( - pool_, type_, *memo_table_, delta_offset_, &dictionary_data)); - dictionary = MakeArray(dictionary_data); +int32_t internal::DictionaryMemoTable::GetOrInsert(const uint16_t& value) { + return impl_->GetOrInsert(value); +} - // Set type of array data to the right dictionary type - (*out)->type = std::make_shared((*out)->type, dictionary); +int32_t internal::DictionaryMemoTable::GetOrInsert(const uint32_t& value) { + return impl_->GetOrInsert(value); +} - // Update internals for further uses of this DictionaryBuilder - delta_offset_ = memo_table_->size(); - values_builder_.Reset(); +int32_t internal::DictionaryMemoTable::GetOrInsert(const uint64_t& value) { + return impl_->GetOrInsert(value); +} - return Status::OK(); +int32_t internal::DictionaryMemoTable::GetOrInsert(const float& value) { + return impl_->GetOrInsert(value); } -Status DictionaryBuilder::FinishInternal(std::shared_ptr* out) { - std::shared_ptr dictionary = std::make_shared(0); +int32_t internal::DictionaryMemoTable::GetOrInsert(const double& value) { + return impl_->GetOrInsert(value); +} - RETURN_NOT_OK(values_builder_.FinishInternal(out)); - (*out)->type = std::make_shared((*out)->type, dictionary); +int32_t internal::DictionaryMemoTable::GetOrInsert(const util::string_view& value) { + return impl_->GetOrInsert(value); +} - return Status::OK(); +Status internal::DictionaryMemoTable::GetArrayData(MemoryPool* pool, int64_t start_offset, + std::shared_ptr* out) { + return impl_->GetArrayData(pool, start_offset, out); } -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; -template class DictionaryBuilder; +int32_t internal::DictionaryMemoTable::size() const { return impl_->size(); } } // namespace arrow diff --git a/cpp/src/arrow/array/builder_dict.h b/cpp/src/arrow/array/builder_dict.h index e7f44b9d09f..84f2e87c359 100644 --- a/cpp/src/arrow/array/builder_dict.h +++ b/cpp/src/arrow/array/builder_dict.h @@ -17,11 +17,14 @@ #pragma once +#include #include #include "arrow/array/builder_adaptive.h" // IWYU pragma: export #include "arrow/array/builder_base.h" // IWYU pragma: export +#include "arrow/array.h" + namespace arrow { // ---------------------------------------------------------------------- @@ -49,6 +52,35 @@ struct DictionaryScalar { using type = util::string_view; }; +class ARROW_EXPORT DictionaryMemoTable { + public: + explicit DictionaryMemoTable(const std::shared_ptr& type); + explicit DictionaryMemoTable(const std::shared_ptr& dictionary); + ~DictionaryMemoTable(); + + int32_t GetOrInsert(const bool& value); + int32_t GetOrInsert(const int8_t& value); + int32_t GetOrInsert(const int16_t& value); + int32_t GetOrInsert(const int32_t& value); + int32_t GetOrInsert(const int64_t& value); + int32_t GetOrInsert(const uint8_t& value); + int32_t GetOrInsert(const uint16_t& value); + int32_t GetOrInsert(const uint32_t& value); + int32_t GetOrInsert(const uint64_t& value); + int32_t GetOrInsert(const float& value); + int32_t GetOrInsert(const double& value); + int32_t GetOrInsert(const util::string_view& value); + + Status GetArrayData(MemoryPool* pool, int64_t start_offset, + std::shared_ptr* out); + + int32_t size() const; + + private: + class DictionaryMemoTableImpl; + std::unique_ptr impl_; +}; + } // namespace internal /// \brief Array builder for created encoded DictionaryArray from dense array @@ -60,25 +92,58 @@ struct DictionaryScalar { /// /// data template -class ARROW_EXPORT DictionaryBuilder : public ArrayBuilder { +class DictionaryBuilder : public ArrayBuilder { public: using Scalar = typename internal::DictionaryScalar::type; // WARNING: the type given below is the value type, not the DictionaryType. // The DictionaryType is instantiated on the Finish() call. - DictionaryBuilder(const std::shared_ptr& type, MemoryPool* pool); + template + DictionaryBuilder( + typename std::enable_if::value, + const std::shared_ptr&>::type type, + MemoryPool* pool) + : ArrayBuilder(type, pool), + memo_table_(new internal::DictionaryMemoTable(type)), + delta_offset_(0), + byte_width_(-1), + values_builder_(pool) {} - DictionaryBuilder(const std::shared_ptr& dictionary, MemoryPool* pool); + template + explicit DictionaryBuilder( + typename std::enable_if::value, + const std::shared_ptr&>::type type, + MemoryPool* pool) + : ArrayBuilder(type, pool), + memo_table_(new internal::DictionaryMemoTable(type)), + delta_offset_(0), + byte_width_(static_cast(*type).byte_width()), + values_builder_(pool) {} template explicit DictionaryBuilder( typename std::enable_if::is_parameter_free, MemoryPool*>::type pool) : DictionaryBuilder(TypeTraits::type_singleton(), pool) {} - ~DictionaryBuilder() override; + DictionaryBuilder(const std::shared_ptr& dictionary, MemoryPool* pool) + : ArrayBuilder(dictionary->type(), pool), + memo_table_(new internal::DictionaryMemoTable(dictionary)), + delta_offset_(0), + byte_width_(-1), + values_builder_(pool) {} + + ~DictionaryBuilder() override = default; /// \brief Append a scalar value - Status Append(const Scalar& value); + Status Append(const Scalar& value) { + ARROW_RETURN_NOT_OK(Reserve(1)); + + auto memo_index = memo_table_->GetOrInsert(value); + ARROW_RETURN_NOT_OK(values_builder_.Append(memo_index)); + length_ += 1; + + return Status::OK(); + } /// \brief Append a fixed-width string (only for FixedSizeBinaryType) template @@ -95,16 +160,100 @@ class ARROW_EXPORT DictionaryBuilder : public ArrayBuilder { } /// \brief Append a scalar null value - Status AppendNull() final; + Status AppendNull() final { + length_ += 1; + null_count_ += 1; - Status AppendNulls(int64_t length) final; + return values_builder_.AppendNull(); + } + + Status AppendNulls(int64_t length) final { + length_ += length; + null_count_ += length; + + return values_builder_.AppendNulls(length); + } /// \brief Append a whole dense array to the builder - Status AppendArray(const Array& array); + template + Status AppendArray( + typename std::enable_if::value, + const Array&>::type array) { + using ArrayType = typename TypeTraits::ArrayType; + + const auto& concrete_array = static_cast(array); + for (int64_t i = 0; i < array.length(); i++) { + if (array.IsNull(i)) { + ARROW_RETURN_NOT_OK(AppendNull()); + } else { + ARROW_RETURN_NOT_OK(Append(concrete_array.GetView(i))); + } + } + return Status::OK(); + } - void Reset() override; - Status Resize(int64_t capacity) override; - Status FinishInternal(std::shared_ptr* out) override; + template + Status AppendArray( + typename std::enable_if::value, + const Array&>::type array) { + if (!type_->Equals(*array.type())) { + return Status::Invalid( + "Cannot append FixedSizeBinary array with non-matching type"); + } + + const auto& concrete_array = static_cast(array); + for (int64_t i = 0; i < array.length(); i++) { + if (array.IsNull(i)) { + ARROW_RETURN_NOT_OK(AppendNull()); + } else { + ARROW_RETURN_NOT_OK(Append(concrete_array.GetValue(i))); + } + } + return Status::OK(); + } + + void Reset() override { + ArrayBuilder::Reset(); + values_builder_.Reset(); + memo_table_.reset(new internal::DictionaryMemoTable(type_)); + delta_offset_ = 0; + } + + Status Resize(int64_t capacity) override { + ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_)); + capacity = std::max(capacity, kMinBuilderCapacity); + + if (capacity_ == 0) { + // Initialize hash table + // XXX should we let the user pass additional size heuristics? + delta_offset_ = 0; + } + ARROW_RETURN_NOT_OK(values_builder_.Resize(capacity)); + capacity_ = values_builder_.capacity(); + return Status::OK(); + } + + Status FinishInternal(std::shared_ptr* out) override { + // Finalize indices array + ARROW_RETURN_NOT_OK(values_builder_.FinishInternal(out)); + + // Generate dictionary array from hash table contents + std::shared_ptr dictionary; + std::shared_ptr dictionary_data; + + ARROW_RETURN_NOT_OK( + memo_table_->GetArrayData(pool_, delta_offset_, &dictionary_data)); + dictionary = MakeArray(dictionary_data); + + // Set type of array data to the right dictionary type + (*out)->type = std::make_shared((*out)->type, dictionary); + + // Update internals for further uses of this DictionaryBuilder + delta_offset_ = memo_table_->size(); + values_builder_.Reset(); + + return Status::OK(); + } /// \cond FALSE using ArrayBuilder::Finish; @@ -116,8 +265,7 @@ class ARROW_EXPORT DictionaryBuilder : public ArrayBuilder { bool is_building_delta() { return delta_offset_ > 0; } protected: - class MemoTableImpl; - std::unique_ptr memo_table_; + std::unique_ptr memo_table_; int32_t delta_offset_; // Only used for FixedSizeBinaryType @@ -127,23 +275,56 @@ class ARROW_EXPORT DictionaryBuilder : public ArrayBuilder { }; template <> -class ARROW_EXPORT DictionaryBuilder : public ArrayBuilder { +class DictionaryBuilder : public ArrayBuilder { public: - DictionaryBuilder(const std::shared_ptr& type, MemoryPool* pool); - explicit DictionaryBuilder(MemoryPool* pool); + DictionaryBuilder(const std::shared_ptr& type, MemoryPool* pool) + : ArrayBuilder(type, pool), values_builder_(pool) {} + explicit DictionaryBuilder(MemoryPool* pool) + : ArrayBuilder(null(), pool), values_builder_(pool) {} - DictionaryBuilder(const std::shared_ptr& dictionary, MemoryPool* pool); + DictionaryBuilder(const std::shared_ptr& dictionary, MemoryPool* pool) + : ArrayBuilder(dictionary->type(), pool), values_builder_(pool) {} /// \brief Append a scalar null value - Status AppendNull() final; + Status AppendNull() final { + length_ += 1; + null_count_ += 1; + + return values_builder_.AppendNull(); + } + + Status AppendNulls(int64_t length) final { + length_ += length; + null_count_ += length; - Status AppendNulls(int64_t length) final; + return values_builder_.AppendNulls(length); + } /// \brief Append a whole dense array to the builder - Status AppendArray(const Array& array); + Status AppendArray(const Array& array) { + for (int64_t i = 0; i < array.length(); i++) { + ARROW_RETURN_NOT_OK(AppendNull()); + } + return Status::OK(); + } + + Status Resize(int64_t capacity) override { + ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_)); + capacity = std::max(capacity, kMinBuilderCapacity); - Status Resize(int64_t capacity) override; - Status FinishInternal(std::shared_ptr* out) override; + ARROW_RETURN_NOT_OK(values_builder_.Resize(capacity)); + capacity_ = values_builder_.capacity(); + return Status::OK(); + } + + Status FinishInternal(std::shared_ptr* out) override { + std::shared_ptr dictionary = std::make_shared(0); + + ARROW_RETURN_NOT_OK(values_builder_.FinishInternal(out)); + (*out)->type = std::make_shared((*out)->type, dictionary); + + return Status::OK(); + } /// \cond FALSE using ArrayBuilder::Finish; diff --git a/cpp/src/arrow/util/hashing.h b/cpp/src/arrow/util/hashing.h index 044d4e96624..27301585fc6 100644 --- a/cpp/src/arrow/util/hashing.h +++ b/cpp/src/arrow/util/hashing.h @@ -332,6 +332,16 @@ class HashTable { // XXX typedef memo_index_t int32_t ? +// ---------------------------------------------------------------------- +// A base class for memoization table. + +class MemoTable { + public: + virtual ~MemoTable() = default; + + virtual int32_t size() const = 0; +}; + // ---------------------------------------------------------------------- // A memoization table for memory-cheap scalar values. @@ -339,7 +349,7 @@ class HashTable { // index for each key. template class HashTableTemplateType = HashTable> -class ScalarMemoTable { +class ScalarMemoTable : public MemoTable { public: explicit ScalarMemoTable(int64_t entries = 0) : hash_table_(static_cast(entries)) {} @@ -382,7 +392,7 @@ class ScalarMemoTable { // The number of entries in the memo table // (which is also 1 + the largest memo index) - int32_t size() const { return static_cast(hash_table_.size()); } + int32_t size() const override { return static_cast(hash_table_.size()); } // Copy values starting from index `start` into `out_data` void CopyValues(int32_t start, Scalar* out_data) const { @@ -435,7 +445,7 @@ struct SmallScalarTraits class HashTableTemplateType = HashTable> -class SmallScalarMemoTable { +class SmallScalarMemoTable : public MemoTable { public: explicit SmallScalarMemoTable(int64_t entries = 0) { std::fill(value_to_index_, value_to_index_ + cardinality, -1); @@ -469,7 +479,7 @@ class SmallScalarMemoTable { // The number of entries in the memo table // (which is also 1 + the largest memo index) - int32_t size() const { return static_cast(index_to_value_.size()); } + int32_t size() const override { return static_cast(index_to_value_.size()); } // Copy values starting from index `start` into `out_data` void CopyValues(int32_t start, Scalar* out_data) const { @@ -498,7 +508,7 @@ class SmallScalarMemoTable { // ---------------------------------------------------------------------- // A memoization table for variable-sized binary data. -class BinaryMemoTable { +class BinaryMemoTable : public MemoTable { public: explicit BinaryMemoTable(int64_t entries = 0, int64_t values_size = -1) : hash_table_(static_cast(entries)) { @@ -576,7 +586,7 @@ class BinaryMemoTable { // The number of entries in the memo table // (which is also 1 + the largest memo index) - int32_t size() const { return static_cast(hash_table_.size()); } + int32_t size() const override { return static_cast(hash_table_.size()); } int32_t values_size() const { return static_cast(values_.size()); }