From c31f95f1bc685b8478806d526175cf13df7b58c9 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Mon, 5 Aug 2019 11:22:56 -0400 Subject: [PATCH 1/4] add install prefix to build summaries --- cpp/cmake_modules/DefineOptions.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 639594b0c54..6d34bd28272 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -336,6 +336,7 @@ macro(config_summary) message(STATUS " Generator: ${CMAKE_GENERATOR}") message(STATUS " Build type: ${CMAKE_BUILD_TYPE}") message(STATUS " Source directory: ${CMAKE_CURRENT_SOURCE_DIR}") + message(STATUS " Install prefix: ${CMAKE_INSTALL_PREFIX}") if(${CMAKE_EXPORT_COMPILE_COMMANDS}) message( STATUS " Compile commands: ${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json") @@ -415,6 +416,7 @@ macro(config_summary) file(APPEND ${summary} "\"compile_commands\": " "\"${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json\",\n") endif() + file(APPEND ${summary} "\"install_prefix\": \"${CMAKE_INSTALL_PREFIX}\",\n") file(APPEND ${summary} "\"arrow_version\": \"${ARROW_VERSION}\"\n") file(APPEND ${summary} "}\n") endif() From a5398da341e3b31b5c4370933a23f9df1dd230a6 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Mon, 5 Aug 2019 14:57:06 -0400 Subject: [PATCH 2/4] ChunkedBinaryBuilder should only grow when necessary --- cpp/src/arrow/array/builder_binary.cc | 7 ++++++- cpp/src/arrow/array/builder_binary.h | 9 ++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc index b83897d7e19..99e798ae61e 100644 --- a/cpp/src/arrow/array/builder_binary.cc +++ b/cpp/src/arrow/array/builder_binary.cc @@ -179,8 +179,13 @@ Status ChunkedBinaryBuilder::Reserve(int64_t values) { return Status::OK(); } + auto current_capacity = builder_->capacity(); auto min_capacity = builder_->length() + values; - auto new_capacity = BufferBuilder::GrowByFactor(builder_->capacity(), min_capacity); + if (current_capacity >= min_capacity) { + return Status::OK(); + } + + auto new_capacity = BufferBuilder::GrowByFactor(current_capacity, min_capacity); if (ARROW_PREDICT_TRUE(new_capacity <= kListMaximumElements)) { return builder_->Resize(new_capacity); } diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h index 4c779d2895d..8941280c935 100644 --- a/cpp/src/arrow/array/builder_binary.h +++ b/cpp/src/arrow/array/builder_binary.h @@ -509,8 +509,8 @@ namespace internal { class ARROW_EXPORT ChunkedBinaryBuilder { public: - ChunkedBinaryBuilder(int32_t max_chunk_value_length, - MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT); + explicit ChunkedBinaryBuilder(int32_t max_chunk_value_length, + MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT); ChunkedBinaryBuilder(int32_t max_chunk_value_length, int32_t max_chunk_length, MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT); @@ -534,9 +534,8 @@ class ARROW_EXPORT ChunkedBinaryBuilder { } if (ARROW_PREDICT_FALSE(builder_->length() == max_chunk_length_)) { - // The current item would cause builder_->value_data_length() to exceed - // max_chunk_size_, so finish this chunk and append the current item to the next - // chunk + // The current item would cause builder_->length() to exceed max_chunk_length_, so + // finish this chunk and append the current item to the next chunk ARROW_RETURN_NOT_OK(NextChunk()); } From 9c5f9aad99067591ede26bc2ee06eb59dec14cca Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Mon, 5 Aug 2019 22:46:15 -0400 Subject: [PATCH 3/4] add unit test for new Reserve logic --- cpp/src/arrow/array-binary-test.cc | 13 +++++++++++++ cpp/src/arrow/array/builder_binary.cc | 6 +++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/array-binary-test.cc b/cpp/src/arrow/array-binary-test.cc index 85a1620f0f1..11efda315d5 100644 --- a/cpp/src/arrow/array-binary-test.cc +++ b/cpp/src/arrow/array-binary-test.cc @@ -522,6 +522,19 @@ TEST_F(TestChunkedBinaryBuilder, BasicOperation) { } } +TEST_F(TestChunkedBinaryBuilder, Reserve) { + const int32_t chunksize = 1000; + Init(chunksize); + auto bytes_before_reserve = default_memory_pool()->bytes_allocated(); + ASSERT_OK(builder_->Reserve(chunksize / 2)); + for (int i = 0; i < 8; ++i) { + ASSERT_OK(builder_->Reserve(chunksize / 2)); + } + // no new memory will be allocated since capacity was sufficient for the loop's + // Reserve() calls + ASSERT_EQ(default_memory_pool()->bytes_allocated(), bytes_before_reserve); +} + TEST_F(TestChunkedBinaryBuilder, NoData) { Init(1000); diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc index 99e798ae61e..9a4ad74c26f 100644 --- a/cpp/src/arrow/array/builder_binary.cc +++ b/cpp/src/arrow/array/builder_binary.cc @@ -186,12 +186,12 @@ Status ChunkedBinaryBuilder::Reserve(int64_t values) { } auto new_capacity = BufferBuilder::GrowByFactor(current_capacity, min_capacity); - if (ARROW_PREDICT_TRUE(new_capacity <= kListMaximumElements)) { + if (ARROW_PREDICT_TRUE(new_capacity <= max_chunk_length_)) { return builder_->Resize(new_capacity); } - extra_capacity_ = new_capacity - kListMaximumElements; - return builder_->Resize(kListMaximumElements); + extra_capacity_ = new_capacity - max_chunk_length_; + return builder_->Resize(max_chunk_length_); } } // namespace internal From 5ddc25697a9afd9f78c8b5f2f894a8a915c49a3a Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Tue, 6 Aug 2019 10:05:23 -0400 Subject: [PATCH 4/4] fix test, add clarification to Reserve() docstring --- cpp/src/arrow/array-binary-test.cc | 5 +++-- cpp/src/arrow/array/builder_base.h | 7 +++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/array-binary-test.cc b/cpp/src/arrow/array-binary-test.cc index 11efda315d5..ed5389549e1 100644 --- a/cpp/src/arrow/array-binary-test.cc +++ b/cpp/src/arrow/array-binary-test.cc @@ -523,16 +523,17 @@ TEST_F(TestChunkedBinaryBuilder, BasicOperation) { } TEST_F(TestChunkedBinaryBuilder, Reserve) { + // ARROW-6060 const int32_t chunksize = 1000; Init(chunksize); - auto bytes_before_reserve = default_memory_pool()->bytes_allocated(); ASSERT_OK(builder_->Reserve(chunksize / 2)); + auto bytes_after_first_reserve = default_memory_pool()->bytes_allocated(); for (int i = 0; i < 8; ++i) { ASSERT_OK(builder_->Reserve(chunksize / 2)); } // no new memory will be allocated since capacity was sufficient for the loop's // Reserve() calls - ASSERT_EQ(default_memory_pool()->bytes_allocated(), bytes_before_reserve); + ASSERT_EQ(default_memory_pool()->bytes_allocated(), bytes_after_first_reserve); } TEST_F(TestChunkedBinaryBuilder, NoData) { diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h index 36f6c7a2a4d..73934bf891d 100644 --- a/cpp/src/arrow/array/builder_base.h +++ b/cpp/src/arrow/array/builder_base.h @@ -79,9 +79,12 @@ class ARROW_EXPORT ArrayBuilder { /// \return Status virtual Status Resize(int64_t capacity); - /// \brief Ensure that there is enough space allocated to add the indicated - /// number of elements without any further calls to Resize. Overallocation is + /// \brief Ensure that there is enough space allocated to append the indicated + /// number of elements without any further reallocation. Overallocation is /// used in order to minimize the impact of incremental Reserve() calls. + /// Note that additional_capacity is relative to the current number of elements + /// rather than to the current capacity, so calls to Reserve() which are not + /// interspersed with addition of new elements may not increase the capacity. /// /// \param[in] additional_capacity the number of additional array values /// \return Status