From ec7836974b7360bbfd76ae11e554942d6ef88069 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sun, 21 Jun 2020 21:21:29 -0500 Subject: [PATCH 1/3] Add comments explaining high level detail about ChunkedArray class and usage --- cpp/src/arrow/chunked_array.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h index 7d920b65cdf..bd0bda81b9c 100644 --- a/cpp/src/arrow/chunked_array.h +++ b/cpp/src/arrow/chunked_array.h @@ -38,6 +38,32 @@ class MemoryPool; /// \class ChunkedArray /// \brief A data structure managing a list of primitive Arrow arrays logically /// as one large array +/// +/// Data chunking is treated throughout this project is handled largely as an +/// implementation detail for performance and memory use +/// optimization. ChunkedArray allows Array objects to be collected and +/// interpreted as a single logical array without requiring an expensive +/// concatenation step. +/// +/// In some cases, data produced by a function may exceed the capacity of an +/// Array (like BinaryArray or StringArray) and so returning multiple Arrays is +/// the only possibility. In these cases, we recommend returning a ChunkedArray +/// instead of vector of Arrays or some alternative. +/// +/// When data is processed in parallel, it may not be practical or possible to +/// create large contiguous and write output into them. With some data types, +/// like binary and string types, it is not possible at all to produce +/// non-chunked array outputs without requiring a concatenation step at the end +/// of processing. +/// +/// Application developers may tune chunk sizes based on analysis of +/// performance profiles but many developer-users will not need to be +/// especially concerned with the chunking details. +/// +/// Preserving the chunk layout/sizes in processing steps is generally not +/// considered to be a contract in APIs. A function may decide to alter the +/// chunk of its result. Similarly, APIs accepting multiple ChunkedArray inputs +/// should not expect the chunk layout to be the same in each input. class ARROW_EXPORT ChunkedArray { public: /// \brief Construct a chunked array from a vector of arrays From 016e252cd91ee98d3562ea475507f37128553de4 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sun, 21 Jun 2020 21:23:00 -0500 Subject: [PATCH 2/3] Fix typo --- cpp/src/arrow/chunked_array.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h index bd0bda81b9c..b1e18aa735e 100644 --- a/cpp/src/arrow/chunked_array.h +++ b/cpp/src/arrow/chunked_array.h @@ -51,10 +51,10 @@ class MemoryPool; /// instead of vector of Arrays or some alternative. /// /// When data is processed in parallel, it may not be practical or possible to -/// create large contiguous and write output into them. With some data types, -/// like binary and string types, it is not possible at all to produce -/// non-chunked array outputs without requiring a concatenation step at the end -/// of processing. +/// create large contiguous memory allocations and write output into them. With +/// some data types, like binary and string types, it is not possible at all to +/// produce non-chunked array outputs without requiring a concatenation step at +/// the end of processing. /// /// Application developers may tune chunk sizes based on analysis of /// performance profiles but many developer-users will not need to be From 75765a1ba006bd5616a80b173ca6eaa1b6a65da9 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 22 Jun 2020 11:44:31 +0200 Subject: [PATCH 3/3] Nits --- cpp/src/arrow/chunked_array.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h index b1e18aa735e..c16d66f77b0 100644 --- a/cpp/src/arrow/chunked_array.h +++ b/cpp/src/arrow/chunked_array.h @@ -39,11 +39,11 @@ class MemoryPool; /// \brief A data structure managing a list of primitive Arrow arrays logically /// as one large array /// -/// Data chunking is treated throughout this project is handled largely as an -/// implementation detail for performance and memory use -/// optimization. ChunkedArray allows Array objects to be collected and -/// interpreted as a single logical array without requiring an expensive -/// concatenation step. +/// Data chunking is treated throughout this project largely as an +/// implementation detail for performance and memory use optimization. +/// ChunkedArray allows Array objects to be collected and interpreted +/// as a single logical array without requiring an expensive concatenation +/// step. /// /// In some cases, data produced by a function may exceed the capacity of an /// Array (like BinaryArray or StringArray) and so returning multiple Arrays is @@ -62,8 +62,8 @@ class MemoryPool; /// /// Preserving the chunk layout/sizes in processing steps is generally not /// considered to be a contract in APIs. A function may decide to alter the -/// chunk of its result. Similarly, APIs accepting multiple ChunkedArray inputs -/// should not expect the chunk layout to be the same in each input. +/// chunking of its result. Similarly, APIs accepting multiple ChunkedArray +/// inputs should not expect the chunk layout to be the same in each input. class ARROW_EXPORT ChunkedArray { public: /// \brief Construct a chunked array from a vector of arrays