Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 69 additions & 3 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,77 @@ impl OffsetSizeTrait for i64 {
const PREFIX: &'static str = "Large";
}

/// An array of [variable length arrays](https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout)
/// An array of [variable length lists], similar to JSON arrays
/// (e.g. `["A", "B", "C"]`).
///
/// See [`ListArray`] and [`LargeListArray`]`
/// Lists are represented using `offsets` into a `values` child
/// array. Offsets are stored in two adjacent entries of an
/// [`OffsetBuffer`].
///
/// See [`GenericListBuilder`](crate::builder::GenericListBuilder) for how to construct a [`GenericListArray`]
/// Arrow defines [`ListArray`] with `i32` offsets and
/// [`LargeListArray`] with `i64` offsets.
///
/// Use [`GenericListBuilder`](crate::builder::GenericListBuilder) to
/// construct a [`GenericListArray`].
///
/// # Representation
///
/// A [`ListArray`] can represent a list of values of any other
/// supported Arrow type. Each element of the `ListArray` itself is
/// a list which may be empty, may contain NULL and non-null values,
/// or may itself be NULL.
///
/// For example, the `ListArray` shown in the following diagram stores
/// lists of strings. Note that `[]` represents an empty (length
/// 0), but non NULL list.
///
/// ```text
/// ┌─────────────┐
/// │ [A,B,C] │
/// ├─────────────┤
/// │ [] │
/// ├─────────────┤
/// │ NULL │
/// ├─────────────┤
/// │ [D] │
/// ├─────────────┤
/// │ [NULL, F] │
/// └─────────────┘
/// ```
///
/// The `values` are stored in a child [`StringArray`] and the offsets
/// are stored in an [`OffsetBuffer`] as shown in the following
/// diagram. The logical values and offsets are shown on the left, and
/// the actual `ListArray` encoding on the right.
///
/// ```text
/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
/// ┌ ─ ─ ─ ─ ─ ─ ┐ │
/// ┌─────────────┐ ┌───────┐ │ ┌───┐ ┌───┐ ┌───┐ ┌───┐
/// │ [A,B,C] │ │ (0,3) │ │ 1 │ │ 0 │ │ │ 1 │ │ A │ │ 0 │
/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤
/// │ [] │ │ (3,3) │ │ 1 │ │ 3 │ │ │ 1 │ │ B │ │ 1 │
/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤
/// │ NULL │ │ (3,4) │ │ 0 │ │ 3 │ │ │ 1 │ │ C │ │ 2 │
/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤
/// │ [D] │ │ (4,5) │ │ 1 │ │ 4 │ │ │ ? │ │ ? │ │ 3 │
/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤
/// │ [NULL, F] │ │ (5,7) │ │ 1 │ │ 5 │ │ │ 1 │ │ D │ │ 4 │
/// └─────────────┘ └───────┘ │ └───┘ ├───┤ ├───┤ ├───┤
/// │ 7 │ │ │ 0 │ │ ? │ │ 5 │
/// │ Validity └───┘ ├───┤ ├───┤
/// Logical Logical (nulls) Offsets │ │ 1 │ │ F │ │ 6 │
/// Values Offsets │ └───┘ └───┘
/// │ Values │ │
/// (offsets[i], │ ListArray (Array)
/// offsets[i+1]) └ ─ ─ ─ ─ ─ ─ ┘ │
/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
///
///
/// ```
///
/// [`StringArray`]: crate::array::StringArray
/// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout
pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
data_type: DataType,
nulls: Option<NullBuffer>,
Expand Down
54 changes: 54 additions & 0 deletions arrow-array/src/builder/generic_list_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,60 @@ use std::sync::Arc;
///
/// Use [`ListBuilder`] to build [`ListArray`]s and [`LargeListBuilder`] to build [`LargeListArray`]s.
///
/// # Example
///
/// Here is code that constructs a ListArray with the contents:
/// `[[A,B,C], [], NULL, [D], [NULL, F]]`
///
/// ```
/// # use std::sync::Arc;
/// # use arrow_array::{builder::ListBuilder, builder::StringBuilder, ArrayRef, StringArray, Array};
/// #
/// let values_builder = StringBuilder::new();
/// let mut builder = ListBuilder::new(values_builder);
///
/// // [A, B, C]
/// builder.values().append_value("A");
/// builder.values().append_value("B");
/// builder.values().append_value("C");
/// builder.append(true);
///
/// // [ ] (empty list)
/// builder.append(true);
///
/// // Null
/// builder.values().append_value("?"); // irrelevant
/// builder.append(false);
///
/// // [D]
/// builder.values().append_value("D");
/// builder.append(true);
///
/// // [NULL, F]
/// builder.values().append_null();
/// builder.values().append_value("F");
/// builder.append(true);
///
/// // Build the array
/// let array = builder.finish();
///
/// // Values is a string array
/// // "A", "B" "C", "?", "D", NULL, "F"
/// assert_eq!(
/// array.values().as_ref(),
/// &StringArray::from(vec![
/// Some("A"), Some("B"), Some("C"),
/// Some("?"), Some("D"), None,
/// Some("F")
/// ])
/// );
///
/// // Offsets are indexes into the values array
/// assert_eq!(
/// array.value_offsets(),
/// &[0, 3, 3, 4, 5, 7]
/// );
/// ```
///
/// [`ListBuilder`]: crate::builder::ListBuilder
/// [`ListArray`]: crate::array::ListArray
Expand Down
38 changes: 37 additions & 1 deletion arrow-buffer/src/buffer/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,43 @@ use crate::buffer::ScalarBuffer;
use crate::{ArrowNativeType, MutableBuffer};
use std::ops::Deref;

/// A non-empty buffer of monotonically increasing, positive integers
/// A non-empty buffer of monotonically increasing, positive integers.
///
/// [`OffsetBuffer`] are used to represent ranges of offsets. An
/// `OffsetBuffer` of `N+1` items contains `N` such ranges. The start
/// offset for element `i` is `offsets[i]` and the end offset is
/// `offsets[i+1]`. Equal offsets represent an empty range.
///
/// # Example
///
/// This example shows how 5 distinct ranges, are represented using a
/// 6 entry `OffsetBuffer`. The first entry `(0, 3)` represents the
/// three offsets `0, 1, 2`. The entry `(3,3)` represent no offsets
/// (e.g. an empty list).
///
/// ```text
/// ┌───────┐ ┌───┐
/// │ (0,3) │ │ 0 │
/// ├───────┤ ├───┤
/// │ (3,3) │ │ 3 │
/// ├───────┤ ├───┤
/// │ (3,4) │ │ 3 │
/// ├───────┤ ├───┤
/// │ (4,5) │ │ 4 │
/// ├───────┤ ├───┤
/// │ (5,7) │ │ 5 │
/// └───────┘ ├───┤
/// │ 7 │
/// └───┘
///
/// Offsets Buffer
/// Logical
/// Offsets
///
/// (offsets[i],
/// offsets[i+1])
/// ```

#[derive(Debug, Clone)]
pub struct OffsetBuffer<O: ArrowNativeType>(ScalarBuffer<O>);

Expand Down