diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index 97310830ddc..4ea9248f3e4 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -1200,7 +1200,7 @@ class TestFWBinaryArray : public ::testing::Test { }; TEST_F(TestFWBinaryArray, Builder) { - const int32_t byte_width = 10; + constexpr int32_t byte_width = 10; int64_t length = 4096; int64_t nbytes = length * byte_width; @@ -1215,8 +1215,7 @@ TEST_F(TestFWBinaryArray, Builder) { std::shared_ptr result; - auto CheckResult = [this, &length, &is_valid, &raw_data, - &byte_width](const Array& result) { + auto CheckResult = [&length, &is_valid, &raw_data, byte_width](const Array& result) { // Verify output const auto& fw_result = static_cast(result); @@ -1847,6 +1846,93 @@ TEST(TestFixedSizeBinaryDictionaryBuilder, InvalidTypeAppend) { ASSERT_RAISES(Invalid, builder.AppendArray(*fsb_array)); } +TEST(TestDecimalDictionaryBuilder, Basic) { + // Build the dictionary Array + const auto& decimal_type = arrow::decimal(2, 0); + DictionaryBuilder builder(decimal_type, default_memory_pool()); + + // Test data + std::vector test{12, 12, 11, 12}; + for (const auto& value : test) { + ASSERT_OK(builder.Append(value.ToBytes().data())); + } + + std::shared_ptr result; + ASSERT_OK(builder.Finish(&result)); + + // Build expected data + FixedSizeBinaryBuilder decimal_builder(decimal_type); + ASSERT_OK(decimal_builder.Append(Decimal128(12).ToBytes())); + ASSERT_OK(decimal_builder.Append(Decimal128(11).ToBytes())); + + std::shared_ptr decimal_array; + ASSERT_OK(decimal_builder.Finish(&decimal_array)); + auto dtype = arrow::dictionary(int8(), decimal_array); + + Int8Builder int_builder; + ASSERT_OK(int_builder.Append({0, 0, 1, 0})); + std::shared_ptr int_array; + ASSERT_OK(int_builder.Finish(&int_array)); + + DictionaryArray expected(dtype, int_array); + ASSERT_TRUE(expected.Equals(result)); +} + +TEST(TestDecimalDictionaryBuilder, DoubleTableSize) { + const auto& decimal_type = arrow::decimal(21, 0); + + // Build the dictionary Array + DictionaryBuilder builder(decimal_type, default_memory_pool()); + + // Build expected data + FixedSizeBinaryBuilder fsb_builder(decimal_type); + Int16Builder int_builder; + + // Fill with 1024 different values + for (int64_t i = 0; i < 1024; i++) { + const uint8_t bytes[] = {0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 12, + 12, + static_cast(i / 128), + static_cast(i % 128)}; + ASSERT_OK(builder.Append(bytes)); + ASSERT_OK(fsb_builder.Append(bytes)); + ASSERT_OK(int_builder.Append(static_cast(i))); + } + // Fill with an already existing value + const uint8_t known_value[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 1}; + for (int64_t i = 0; i < 1024; i++) { + ASSERT_OK(builder.Append(known_value)); + ASSERT_OK(int_builder.Append(1)); + } + + // Finalize result + std::shared_ptr result; + ASSERT_OK(builder.Finish(&result)); + + // Finalize expected data + std::shared_ptr fsb_array; + ASSERT_OK(fsb_builder.Finish(&fsb_array)); + + auto dtype = std::make_shared(int16(), fsb_array); + std::shared_ptr int_array; + ASSERT_OK(int_builder.Finish(&int_array)); + + DictionaryArray expected(dtype, int_array); + ASSERT_TRUE(expected.Equals(result)); +} + // ---------------------------------------------------------------------- // List tests diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index a194ab74572..0479dc55592 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -831,11 +831,11 @@ DictionaryBuilder::DictionaryBuilder( hash_table_(new PoolBuffer(pool)), hash_slots_(nullptr), dict_builder_(type, pool), - values_builder_(pool) { + values_builder_(pool), + byte_width_(static_cast(*type).byte_width()) { if (!::arrow::CpuInfo::initialized()) { ::arrow::CpuInfo::Init(); } - byte_width_ = static_cast(*type).byte_width(); } #ifndef ARROW_NO_DEPRECATED_API @@ -921,7 +921,7 @@ Status DictionaryBuilder::Append(const Scalar& value) { template Status DictionaryBuilder::AppendArray(const Array& array) { - const NumericArray& numeric_array = static_cast&>(array); + const auto& numeric_array = static_cast&>(array); for (int64_t i = 0; i < array.length(); i++) { if (array.IsNull(i)) { RETURN_NOT_OK(AppendNull()); @@ -938,8 +938,7 @@ Status DictionaryBuilder::AppendArray(const Array& array) { return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type"); } - const FixedSizeBinaryArray& numeric_array = - static_cast(array); + const auto& numeric_array = static_cast(array); for (int64_t i = 0; i < array.length(); i++) { if (array.IsNull(i)) { RETURN_NOT_OK(AppendNull()); @@ -1493,6 +1492,7 @@ Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr& DICTIONARY_BUILDER_CASE(STRING, StringDictionaryBuilder); DICTIONARY_BUILDER_CASE(BINARY, BinaryDictionaryBuilder); DICTIONARY_BUILDER_CASE(FIXED_SIZE_BINARY, DictionaryBuilder); + DICTIONARY_BUILDER_CASE(DECIMAL, DictionaryBuilder); default: return Status::NotImplemented(type->ToString()); } @@ -1528,6 +1528,7 @@ Status EncodeArrayToDictionary(const Array& input, MemoryPool* pool, DICTIONARY_ARRAY_CASE(STRING, StringDictionaryBuilder); DICTIONARY_ARRAY_CASE(BINARY, BinaryDictionaryBuilder); DICTIONARY_ARRAY_CASE(FIXED_SIZE_BINARY, DictionaryBuilder); + DICTIONARY_ARRAY_CASE(DECIMAL, DictionaryBuilder); default: std::stringstream ss; ss << "Cannot encode array of type " << type->ToString(); diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index 28f3cb97232..da7386aafef 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -937,8 +937,8 @@ class ARROW_EXPORT DictionaryBuilder : public ArrayBuilder { class ARROW_EXPORT BinaryDictionaryBuilder : public DictionaryBuilder { public: - using DictionaryBuilder::DictionaryBuilder; using DictionaryBuilder::Append; + using DictionaryBuilder::DictionaryBuilder; Status Append(const uint8_t* value, int32_t length) { return Append(internal::WrappedBinary(value, length)); @@ -958,8 +958,8 @@ class ARROW_EXPORT BinaryDictionaryBuilder : public DictionaryBuilder { public: - using DictionaryBuilder::DictionaryBuilder; using DictionaryBuilder::Append; + using DictionaryBuilder::DictionaryBuilder; Status Append(const uint8_t* value, int32_t length) { return Append(internal::WrappedBinary(value, length)); diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc index ee838fa38f4..149cc36b615 100644 --- a/cpp/src/arrow/compute/cast.cc +++ b/cpp/src/arrow/compute/cast.cc @@ -490,7 +490,10 @@ static Status AllocateIfNotPreallocated(FunctionContext* ctx, const Array& input if (can_pre_allocate_values) { std::shared_ptr out_data; - if (!(is_primitive(out->type->id()) || out->type->id() == Type::FIXED_SIZE_BINARY)) { + const Type::type type_id = out->type->id(); + + if (!(is_primitive(type_id) || type_id == Type::FIXED_SIZE_BINARY || + type_id == Type::DECIMAL)) { std::stringstream ss; ss << "Cannot pre-allocate memory for type: " << out->type->ToString(); return Status::NotImplemented(ss.str()); @@ -614,6 +617,7 @@ class CastKernel : public UnaryKernel { FN(IN_TYPE, FloatType); \ FN(IN_TYPE, DoubleType); \ FN(IN_TYPE, FixedSizeBinaryType); \ + FN(IN_TYPE, DecimalType); \ FN(IN_TYPE, BinaryType); \ FN(IN_TYPE, StringType); diff --git a/cpp/src/arrow/io/io-file-test.cc b/cpp/src/arrow/io/io-file-test.cc index 636fbd842d0..ee3beabd9a8 100644 --- a/cpp/src/arrow/io/io-file-test.cc +++ b/cpp/src/arrow/io/io-file-test.cc @@ -393,9 +393,9 @@ TEST_F(TestReadableFile, ThreadSafety) { ASSERT_OK(ReadableFile::Open(path_, &pool, &file_)); std::atomic correct_count(0); - const int niter = 10000; + constexpr int niter = 10000; - auto ReadData = [&correct_count, &data, niter, this]() { + auto ReadData = [&correct_count, &data, this, niter]() { std::shared_ptr buffer; for (int i = 0; i < niter; ++i) { @@ -587,9 +587,9 @@ TEST_F(TestMemoryMappedFile, ThreadSafety) { static_cast(data.size()))); std::atomic correct_count(0); - const int niter = 10000; + constexpr int niter = 10000; - auto ReadData = [&correct_count, &data, niter, &file]() { + auto ReadData = [&correct_count, &data, &file, niter]() { std::shared_ptr buffer; for (int i = 0; i < niter; ++i) { diff --git a/cpp/src/arrow/io/io-hdfs-test.cc b/cpp/src/arrow/io/io-hdfs-test.cc index eaf638f2002..5305b477462 100644 --- a/cpp/src/arrow/io/io-hdfs-test.cc +++ b/cpp/src/arrow/io/io-hdfs-test.cc @@ -437,7 +437,7 @@ TYPED_TEST(TestHadoopFileSystem, ThreadSafety) { ASSERT_OK(this->client_->OpenReadable(src_path, &file)); std::atomic correct_count(0); - const int niter = 1000; + constexpr int niter = 1000; auto ReadData = [&file, &correct_count, &data, niter]() { for (int i = 0; i < niter; ++i) {