From 7af4ee2bc472cd13f6996617e946b216f0a38623 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 31 Jul 2023 14:50:18 -0400 Subject: [PATCH 01/14] Add more docs and examples for ListArray and OffsetsBuffer --- arrow-array/src/array/list_array.rs | 125 +++++++++++++++++++++++++++- arrow-buffer/src/buffer/offset.rs | 38 ++++++++- 2 files changed, 159 insertions(+), 4 deletions(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 0c1fea6f4161..c79be5b80989 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -54,11 +54,130 @@ impl OffsetSizeTrait for i64 { const PREFIX: &'static str = "Large"; } -/// An array of [variable length arrays](https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout) +/// An array of [variable length lists], similar to JSON arrays +/// (e.g. `["A", "B", "C"]`). /// -/// See [`ListArray`] and [`LargeListArray`]` +/// Lists are represented using `offsets` into a `values` child +/// array. Offsets are stored in two adjacent entries of an +/// [`OffsetBuffer`]. /// -/// See [`GenericListBuilder`](crate::builder::GenericListBuilder) for how to construct a [`GenericListArray`] +/// Arrow defines [`ListArray`] with `i32` offsets and +/// [`LargeListArray`]` with `i64` offsets. +/// +/// Use [`GenericListBuilder`](crate::builder::GenericListBuilder) to +/// construct a [`GenericListArray`]. +/// +/// # Representation +/// +/// A [`ListArray`] can represent a list of values of any other +/// supported Arrow type. Each element of the `ListArray` itself is a +/// a list which mayb be empty, may contain NULL and non-null values, +/// or may itself be NULL. +/// +/// For example, this `ListArray` stores lists of strings: +/// +/// ```text +/// ┌─────────────┐ +/// │ [A,B,C] │ +/// ├─────────────┤ +/// │ [] (empty) │ +/// ├─────────────┤ +/// │ NULL │ +/// ├─────────────┤ +/// │ [D] │ +/// ├─────────────┤ +/// │ [NULL, F] │ +/// └─────────────┘ +/// ``` +/// +/// The `values` of this `ListArray`s are stored in a child +/// [`StringArray`] and the offsets are stored in an [`OffsetBuffer`] +/// as shown in the following diagram. The logical values and offsets +/// are shown on the left, and the actual `ListArray` encoding on the right +/// +/// ```text +/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ +/// ┌ ─ ─ ─ ─ ─ ─ ┐ │ +/// ┌─────────────┐ ┌───────┐ │ ┌───┐ ┌───┐ ┌───┐ ┌───┐ +/// │ [A,B,C] │ │ (0,3) │ │ 1 │ │ 0 │ │ │ 1 │ │ A │ │ 0 │ +/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ +/// │ [] (empty) │ │ (3,3) │ │ 1 │ │ 3 │ │ │ 1 │ │ B │ │ 1 │ +/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ +/// │ NULL │ │ (3,4) │ │ 0 │ │ 4 │ │ │ 1 │ │ C │ │ 2 │ +/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ +/// │ [D] │ │ (4,5) │ │ 1 │ │ 5 │ │ │ 1 │ │ ? │ │ 3 │ +/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ +/// │ [NULL, F] │ │ (5,7) │ │ 1 │ │ 5 │ │ │ 0 │ │ D │ │ 4 │ +/// └─────────────┘ └───────┘ │ └───┘ ├───┤ ├───┤ ├───┤ +/// │ 7 │ │ │ 1 │ │ ? │ │ 5 │ +/// │ Validity └───┘ ├───┤ ├───┤ +/// Logical Logical (nulls) Offsets │ │ 1 │ │ F │ │ 6 │ +/// Values Offsets │ └───┘ └───┘ +/// │ Values │ │ +/// (offsets[i], │ ListArray (Array) +/// offsets[i+1]) └ ─ ─ ─ ─ ─ ─ ┘ │ +/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ +/// +/// +/// ``` +/// +/// # Example of constructing a [`ListArray`] +/// +/// Here is code that constructs the [`ListArray`' in the example above +/// +/// ``` +/// # use std::sync::Arc; +/// # use arrow::{array::{ListBuilder, StringBuilder, ArrayRef, StringArray, Array}, +/// # util::pretty::pretty_format_columns}; +/// # +/// let values_builder = StringBuilder::new(); +/// let mut builder = ListBuilder::new(values_builder); +/// +/// // [A, B, C] +/// builder.values().append_value("A"); +/// builder.values().append_value("B"); +/// builder.values().append_value("C"); +/// builder.append(true); +/// +/// // [ ] (empty list) +/// builder.append(true); +/// +/// // Null +/// builder.values().append_value("?"); // irrelevant +/// builder.append(false); +/// +/// // [D] +/// builder.values().append_value("D"); +/// builder.append(true); +/// +/// // [NULL, F] +/// builder.values().append_null(); +/// builder.values().append_value("F"); +/// builder.append(true); +/// +/// // Build the array +/// let array = builder.finish(); +/// +/// // Values is a string array +/// // "A", "B" "C", "?", "D", NULL, "F" +/// assert_eq!( +/// array.values().as_ref(), +/// &StringArray::from(vec![ +/// Some("A"), Some("B"), Some("C"), +/// Some("?"), Some("D"), None, +/// Some("F") +/// ]) as &dyn Array +/// ); +/// +/// // Offsets are indexes into the values array +/// assert_eq!( +/// array.value_offsets(), +/// &[0, 3, 3, 4, 5, 7] +/// ); +/// ``` +/// +/// [`StringArray`]: crate::array::StringArray +/// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout pub struct GenericListArray { data_type: DataType, nulls: Option, diff --git a/arrow-buffer/src/buffer/offset.rs b/arrow-buffer/src/buffer/offset.rs index 0111d12fbab1..e64ad41a2cd2 100644 --- a/arrow-buffer/src/buffer/offset.rs +++ b/arrow-buffer/src/buffer/offset.rs @@ -19,7 +19,43 @@ use crate::buffer::ScalarBuffer; use crate::{ArrowNativeType, MutableBuffer}; use std::ops::Deref; -/// A non-empty buffer of monotonically increasing, positive integers +/// A non-empty buffer of monotonically increasing, positive integers. +/// +/// [`OffsetBuffer`] are used to represent ranges of offsets. An +/// `OffsetBuffer` of `N+1` items contains `N` such ranges. The start +/// offset for element `i` is `offsets[i]` and the end offset is +/// `offsets[i+1]`. Equal offsets represent an empty range. +/// +/// # Example +/// +/// This example shows how 5 distinct ranges, are represented using a +/// 6 entry `OffsetBuffer`. The first entry `(0, 3)` represents the +/// three offsets `0, 1, 2` entry `(3,3)` represent no offsets +/// (e.g. an empty list). +/// +/// ```text +/// ┌───────┐ ┌───┐ +/// │ (0,3) │ │ 0 │ +/// ├───────┤ ├───┤ +/// │ (3,3) │ │ 3 │ +/// ├───────┤ ├───┤ +/// │ (3,4) │ │ 4 │ +/// ├───────┤ ├───┤ +/// │ (4,5) │ │ 5 │ +/// ├───────┤ ├───┤ +/// │ (5,7) │ │ 5 │ +/// └───────┘ ├───┤ +/// │ 7 │ +/// └───┘ +/// +/// Offsets Buffer +/// Logical +/// Offsets +/// +/// (offsets[i], +/// offsets[i+1]) +/// ``` + #[derive(Debug, Clone)] pub struct OffsetBuffer(ScalarBuffer); From ff72a5efe28928b2b891a469e3c7fb0fb4855607 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 31 Jul 2023 15:14:13 -0400 Subject: [PATCH 02/14] fix docs --- arrow-array/src/array/list_array.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index c79be5b80989..3b6df10193c1 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -127,8 +127,7 @@ impl OffsetSizeTrait for i64 { /// /// ``` /// # use std::sync::Arc; -/// # use arrow::{array::{ListBuilder, StringBuilder, ArrayRef, StringArray, Array}, -/// # util::pretty::pretty_format_columns}; +/// # use arrow_array::{builder::ListBuilder, builder::StringBuilder, ArrayRef, StringArray, Array}; /// # /// let values_builder = StringBuilder::new(); /// let mut builder = ListBuilder::new(values_builder); From b593aafef0a96d2fdba9b707b285d7fb7db70f69 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 31 Jul 2023 15:53:37 -0400 Subject: [PATCH 03/14] Update arrow-array/src/array/list_array.rs --- arrow-array/src/array/list_array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 3b6df10193c1..48a615a3fdd2 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -123,7 +123,7 @@ impl OffsetSizeTrait for i64 { /// /// # Example of constructing a [`ListArray`] /// -/// Here is code that constructs the [`ListArray`' in the example above +/// Here is code that constructs the [`ListArray`] in the example above /// /// ``` /// # use std::sync::Arc; From e051478adf5235e09021f1e8f9f01f1059dd2718 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 31 Jul 2023 15:57:26 -0400 Subject: [PATCH 04/14] Update arrow-array/src/array/list_array.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow-array/src/array/list_array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 48a615a3fdd2..15c616ab48c8 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -103,7 +103,7 @@ impl OffsetSizeTrait for i64 { /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ /// │ [] (empty) │ │ (3,3) │ │ 1 │ │ 3 │ │ │ 1 │ │ B │ │ 1 │ /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ -/// │ NULL │ │ (3,4) │ │ 0 │ │ 4 │ │ │ 1 │ │ C │ │ 2 │ +/// │ NULL │ │ (3,4) │ │ 0 │ │ 3 │ │ │ 1 │ │ C │ │ 2 │ /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ /// │ [D] │ │ (4,5) │ │ 1 │ │ 5 │ │ │ 1 │ │ ? │ │ 3 │ /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ From 88efb5764aec421a7f3c43123a359c439d18c067 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 31 Jul 2023 15:58:29 -0400 Subject: [PATCH 05/14] Update arrow-array/src/array/list_array.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow-array/src/array/list_array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 15c616ab48c8..8334c078514f 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -105,7 +105,7 @@ impl OffsetSizeTrait for i64 { /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ /// │ NULL │ │ (3,4) │ │ 0 │ │ 3 │ │ │ 1 │ │ C │ │ 2 │ /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ -/// │ [D] │ │ (4,5) │ │ 1 │ │ 5 │ │ │ 1 │ │ ? │ │ 3 │ +/// │ [D] │ │ (4,5) │ │ 1 │ │ 4 │ │ 0│ │ │ ? │ │ 3 │ /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ /// │ [NULL, F] │ │ (5,7) │ │ 1 │ │ 5 │ │ │ 0 │ │ D │ │ 4 │ /// └─────────────┘ └───────┘ │ └───┘ ├───┤ ├───┤ ├───┤ From c0b3411a93433f73d288a7bddfd493aef30eb8ef Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 31 Jul 2023 15:58:51 -0400 Subject: [PATCH 06/14] Update arrow-array/src/array/list_array.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow-array/src/array/list_array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 8334c078514f..7b49d570a2fb 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -107,7 +107,7 @@ impl OffsetSizeTrait for i64 { /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ /// │ [D] │ │ (4,5) │ │ 1 │ │ 4 │ │ 0│ │ │ ? │ │ 3 │ /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ -/// │ [NULL, F] │ │ (5,7) │ │ 1 │ │ 5 │ │ │ 0 │ │ D │ │ 4 │ +/// │ [NULL, F] │ │ (5,7) │ │ 1 │ │ 5 │ │ │ 1 │ │ D │ │ 4 │ /// └─────────────┘ └───────┘ │ └───┘ ├───┤ ├───┤ ├───┤ /// │ 7 │ │ │ 1 │ │ ? │ │ 5 │ /// │ Validity └───┘ ├───┤ ├───┤ From 05faa4572c01fe7e2b86c99c8c7b4e57e9cdfcf9 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 31 Jul 2023 15:58:56 -0400 Subject: [PATCH 07/14] Update arrow-array/src/array/list_array.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow-array/src/array/list_array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 7b49d570a2fb..b3a6577f1c15 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -109,7 +109,7 @@ impl OffsetSizeTrait for i64 { /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ /// │ [NULL, F] │ │ (5,7) │ │ 1 │ │ 5 │ │ │ 1 │ │ D │ │ 4 │ /// └─────────────┘ └───────┘ │ └───┘ ├───┤ ├───┤ ├───┤ -/// │ 7 │ │ │ 1 │ │ ? │ │ 5 │ +/// │ 7 │ │ │ 0 │ │ ? │ │ 5 │ /// │ Validity └───┘ ├───┤ ├───┤ /// Logical Logical (nulls) Offsets │ │ 1 │ │ F │ │ 6 │ /// Values Offsets │ └───┘ └───┘ From f03fd91d5c3fb28d5a7952cf18bb7201355f36a7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 31 Jul 2023 16:00:12 -0400 Subject: [PATCH 08/14] ascii repair --- arrow-array/src/array/list_array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index b3a6577f1c15..b88dcdbc393a 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -105,7 +105,7 @@ impl OffsetSizeTrait for i64 { /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ /// │ NULL │ │ (3,4) │ │ 0 │ │ 3 │ │ │ 1 │ │ C │ │ 2 │ /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ -/// │ [D] │ │ (4,5) │ │ 1 │ │ 4 │ │ 0│ │ │ ? │ │ 3 │ +/// │ [D] │ │ (4,5) │ │ 1 │ │ 4 │ │ │ 0 │ │ ? │ │ 3 │ /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ /// │ [NULL, F] │ │ (5,7) │ │ 1 │ │ 5 │ │ │ 1 │ │ D │ │ 4 │ /// └─────────────┘ └───────┘ │ └───┘ ├───┤ ├───┤ ├───┤ From 60a6e065ecc6472ee1c13016b1450bd6f537b907 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 1 Aug 2023 06:37:41 -0400 Subject: [PATCH 09/14] Apply suggestions from code review Co-authored-by: Liang-Chi Hsieh --- arrow-array/src/array/list_array.rs | 4 ++-- arrow-buffer/src/buffer/offset.rs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index b88dcdbc393a..30ec5edb0d03 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -70,8 +70,8 @@ impl OffsetSizeTrait for i64 { /// # Representation /// /// A [`ListArray`] can represent a list of values of any other -/// supported Arrow type. Each element of the `ListArray` itself is a -/// a list which mayb be empty, may contain NULL and non-null values, +/// supported Arrow type. Each element of the `ListArray` itself is +/// a list which may be empty, may contain NULL and non-null values, /// or may itself be NULL. /// /// For example, this `ListArray` stores lists of strings: diff --git a/arrow-buffer/src/buffer/offset.rs b/arrow-buffer/src/buffer/offset.rs index e64ad41a2cd2..fede32c57924 100644 --- a/arrow-buffer/src/buffer/offset.rs +++ b/arrow-buffer/src/buffer/offset.rs @@ -30,7 +30,7 @@ use std::ops::Deref; /// /// This example shows how 5 distinct ranges, are represented using a /// 6 entry `OffsetBuffer`. The first entry `(0, 3)` represents the -/// three offsets `0, 1, 2` entry `(3,3)` represent no offsets +/// three offsets `0, 1, 2`. The entry `(3,3)` represent no offsets /// (e.g. an empty list). /// /// ```text @@ -39,9 +39,9 @@ use std::ops::Deref; /// ├───────┤ ├───┤ /// │ (3,3) │ │ 3 │ /// ├───────┤ ├───┤ -/// │ (3,4) │ │ 4 │ +/// │ (3,4) │ │ 3 │ /// ├───────┤ ├───┤ -/// │ (4,5) │ │ 5 │ +/// │ (4,5) │ │ 4 │ /// ├───────┤ ├───┤ /// │ (5,7) │ │ 5 │ /// └───────┘ ├───┤ From ecd5557f9478dd717b3f9126de203dcc535bf231 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 1 Aug 2023 06:37:52 -0400 Subject: [PATCH 10/14] Update arrow-array/src/array/list_array.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow-array/src/array/list_array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 30ec5edb0d03..bef80f17465f 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -165,7 +165,7 @@ impl OffsetSizeTrait for i64 { /// Some("A"), Some("B"), Some("C"), /// Some("?"), Some("D"), None, /// Some("F") -/// ]) as &dyn Array +/// ]) /// ); /// /// // Offsets are indexes into the values array From 8e6a56d1cde1cd5cf5a7a25e16f7e5f241cd51b9 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 2 Aug 2023 06:04:29 -0400 Subject: [PATCH 11/14] Clarify empty list [] documenation and typos --- arrow-array/src/array/list_array.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index bef80f17465f..5863a74bce10 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -62,7 +62,7 @@ impl OffsetSizeTrait for i64 { /// [`OffsetBuffer`]. /// /// Arrow defines [`ListArray`] with `i32` offsets and -/// [`LargeListArray`]` with `i64` offsets. +/// [`LargeListArray`] with `i64` offsets. /// /// Use [`GenericListBuilder`](crate::builder::GenericListBuilder) to /// construct a [`GenericListArray`]. @@ -74,13 +74,15 @@ impl OffsetSizeTrait for i64 { /// a list which may be empty, may contain NULL and non-null values, /// or may itself be NULL. /// -/// For example, this `ListArray` stores lists of strings: +/// For example, the `ListArray` shown in the following diagram stores +/// lists of strings. Note that `[]` represents an empty (length +/// 0), but non NULL list. /// /// ```text /// ┌─────────────┐ /// │ [A,B,C] │ /// ├─────────────┤ -/// │ [] (empty) │ +/// │ [] │ /// ├─────────────┤ /// │ NULL │ /// ├─────────────┤ @@ -90,10 +92,10 @@ impl OffsetSizeTrait for i64 { /// └─────────────┘ /// ``` /// -/// The `values` of this `ListArray`s are stored in a child -/// [`StringArray`] and the offsets are stored in an [`OffsetBuffer`] -/// as shown in the following diagram. The logical values and offsets -/// are shown on the left, and the actual `ListArray` encoding on the right +/// The `values` are stored in a child [`StringArray`] and the offsets +/// are stored in an [`OffsetBuffer`] as shown in the following +/// diagram. The logical values and offsets are shown on the left, and +/// the actual `ListArray` encoding on the right. /// /// ```text /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ @@ -101,7 +103,7 @@ impl OffsetSizeTrait for i64 { /// ┌─────────────┐ ┌───────┐ │ ┌───┐ ┌───┐ ┌───┐ ┌───┐ /// │ [A,B,C] │ │ (0,3) │ │ 1 │ │ 0 │ │ │ 1 │ │ A │ │ 0 │ /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ -/// │ [] (empty) │ │ (3,3) │ │ 1 │ │ 3 │ │ │ 1 │ │ B │ │ 1 │ +/// │ [] │ │ (3,3) │ │ 1 │ │ 3 │ │ │ 1 │ │ B │ │ 1 │ /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ /// │ NULL │ │ (3,4) │ │ 0 │ │ 3 │ │ │ 1 │ │ C │ │ 2 │ /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ From 151564d5abbbc6337124fcffb4748181b3cbcc4a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 2 Aug 2023 06:08:40 -0400 Subject: [PATCH 12/14] Move example to GenericLisBuilder --- arrow-array/src/array/list_array.rs | 54 ------------------- .../src/builder/generic_list_builder.rs | 54 +++++++++++++++++++ 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 5863a74bce10..b04d59b60835 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -123,60 +123,6 @@ impl OffsetSizeTrait for i64 { /// /// ``` /// -/// # Example of constructing a [`ListArray`] -/// -/// Here is code that constructs the [`ListArray`] in the example above -/// -/// ``` -/// # use std::sync::Arc; -/// # use arrow_array::{builder::ListBuilder, builder::StringBuilder, ArrayRef, StringArray, Array}; -/// # -/// let values_builder = StringBuilder::new(); -/// let mut builder = ListBuilder::new(values_builder); -/// -/// // [A, B, C] -/// builder.values().append_value("A"); -/// builder.values().append_value("B"); -/// builder.values().append_value("C"); -/// builder.append(true); -/// -/// // [ ] (empty list) -/// builder.append(true); -/// -/// // Null -/// builder.values().append_value("?"); // irrelevant -/// builder.append(false); -/// -/// // [D] -/// builder.values().append_value("D"); -/// builder.append(true); -/// -/// // [NULL, F] -/// builder.values().append_null(); -/// builder.values().append_value("F"); -/// builder.append(true); -/// -/// // Build the array -/// let array = builder.finish(); -/// -/// // Values is a string array -/// // "A", "B" "C", "?", "D", NULL, "F" -/// assert_eq!( -/// array.values().as_ref(), -/// &StringArray::from(vec![ -/// Some("A"), Some("B"), Some("C"), -/// Some("?"), Some("D"), None, -/// Some("F") -/// ]) -/// ); -/// -/// // Offsets are indexes into the values array -/// assert_eq!( -/// array.value_offsets(), -/// &[0, 3, 3, 4, 5, 7] -/// ); -/// ``` -/// /// [`StringArray`]: crate::array::StringArray /// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout pub struct GenericListArray { diff --git a/arrow-array/src/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs index b31814615fc9..4de32355cca9 100644 --- a/arrow-array/src/builder/generic_list_builder.rs +++ b/arrow-array/src/builder/generic_list_builder.rs @@ -28,6 +28,60 @@ use std::sync::Arc; /// /// Use [`ListBuilder`] to build [`ListArray`]s and [`LargeListBuilder`] to build [`LargeListArray`]s. /// +/// # Example +/// +/// Here is code that constructs the same [`ListArray`] shown in the +/// [`GenericListArray`] documentation. +/// +/// ``` +/// # use std::sync::Arc; +/// # use arrow_array::{builder::ListBuilder, builder::StringBuilder, ArrayRef, StringArray, Array}; +/// # +/// let values_builder = StringBuilder::new(); +/// let mut builder = ListBuilder::new(values_builder); +/// +/// // [A, B, C] +/// builder.values().append_value("A"); +/// builder.values().append_value("B"); +/// builder.values().append_value("C"); +/// builder.append(true); +/// +/// // [ ] (empty list) +/// builder.append(true); +/// +/// // Null +/// builder.values().append_value("?"); // irrelevant +/// builder.append(false); +/// +/// // [D] +/// builder.values().append_value("D"); +/// builder.append(true); +/// +/// // [NULL, F] +/// builder.values().append_null(); +/// builder.values().append_value("F"); +/// builder.append(true); +/// +/// // Build the array +/// let array = builder.finish(); +/// +/// // Values is a string array +/// // "A", "B" "C", "?", "D", NULL, "F" +/// assert_eq!( +/// array.values().as_ref(), +/// &StringArray::from(vec![ +/// Some("A"), Some("B"), Some("C"), +/// Some("?"), Some("D"), None, +/// Some("F") +/// ]) +/// ); +/// +/// // Offsets are indexes into the values array +/// assert_eq!( +/// array.value_offsets(), +/// &[0, 3, 3, 4, 5, 7] +/// ); +/// ``` /// /// [`ListBuilder`]: crate::builder::ListBuilder /// [`ListArray`]: crate::array::ListArray From 25d9d5a82e66b72c6d08fb731be4d6139accd26e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 2 Aug 2023 12:47:22 -0400 Subject: [PATCH 13/14] Update arrow-array/src/array/list_array.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow-array/src/array/list_array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index b04d59b60835..05628084c844 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -107,7 +107,7 @@ impl OffsetSizeTrait for i64 { /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ /// │ NULL │ │ (3,4) │ │ 0 │ │ 3 │ │ │ 1 │ │ C │ │ 2 │ /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ -/// │ [D] │ │ (4,5) │ │ 1 │ │ 4 │ │ │ 0 │ │ ? │ │ 3 │ +/// │ [D] │ │ (4,5) │ │ 1 │ │ 4 │ │ │ ? │ │ ? │ │ 3 │ /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ /// │ [NULL, F] │ │ (5,7) │ │ 1 │ │ 5 │ │ │ 1 │ │ D │ │ 4 │ /// └─────────────┘ └───────┘ │ └───┘ ├───┤ ├───┤ ├───┤ From 4e9829b3877ca4d4f894446b042c0066ce9c1158 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 2 Aug 2023 12:47:40 -0400 Subject: [PATCH 14/14] Update arrow-array/src/builder/generic_list_builder.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow-array/src/builder/generic_list_builder.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow-array/src/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs index 4de32355cca9..5cc7f7b04e0a 100644 --- a/arrow-array/src/builder/generic_list_builder.rs +++ b/arrow-array/src/builder/generic_list_builder.rs @@ -30,8 +30,8 @@ use std::sync::Arc; /// /// # Example /// -/// Here is code that constructs the same [`ListArray`] shown in the -/// [`GenericListArray`] documentation. +/// Here is code that constructs a ListArray with the contents: +/// `[[A,B,C], [], NULL, [D], [NULL, F]]` /// /// ``` /// # use std::sync::Arc;