Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
220 changes: 220 additions & 0 deletions rust/arrow/format-0ed34c83.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

diff --git a/format/Message.fbs b/format/Message.fbs
index 1a7e0dfff..f1c18d765 100644
--- a/format/Message.fbs
+++ b/format/Message.fbs
@@ -28,7 +28,7 @@ namespace org.apache.arrow.flatbuf;
/// Metadata about a field at some level of a nested type tree (but not
/// its children).
///
-/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
+/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
/// null_count: 0} for its Int16 node, as separate FieldNode structs
struct FieldNode {
diff --git a/format/Schema.fbs b/format/Schema.fbs
index 3b37e5d85..3b00dd478 100644
--- a/format/Schema.fbs
+++ b/format/Schema.fbs
@@ -110,10 +110,11 @@ table FixedSizeList {
/// not enforced.
///
/// Map
+/// ```text
/// - child[0] entries: Struct
/// - child[0] key: K
/// - child[1] value: V
-///
+/// ```
/// Neither the "entries" field nor the "key" field may be nullable.
///
/// The metadata is structured so that Arrow systems without special handling
@@ -129,7 +130,7 @@ enum UnionMode:short { Sparse, Dense }
/// A union is a complex type with children in Field
/// By default ids in the type vector refer to the offsets in the children
/// optionally typeIds provides an indirection between the child offset and the type id
-/// for each child typeIds[offset] is the id used in the type vector
+/// for each child `typeIds[offset]` is the id used in the type vector
table Union {
mode: UnionMode;
typeIds: [ int ]; // optional, describes typeid of each child.
diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index 3fe8a7582..a6fd2f9e7 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -37,21 +37,21 @@ namespace org.apache.arrow.flatbuf;
///
/// For example, let X be a 2x3x4x5 tensor, and it has the following
/// 6 non-zero values:
-///
+/// ```text
/// X[0, 1, 2, 0] := 1
/// X[1, 1, 2, 3] := 2
/// X[0, 2, 1, 0] := 3
/// X[0, 1, 3, 0] := 4
/// X[0, 1, 2, 1] := 5
/// X[1, 2, 0, 4] := 6
-///
+/// ```
/// In COO format, the index matrix of X is the following 4x6 matrix:
-///
+/// ```text
/// [[0, 0, 0, 0, 1, 1],
/// [1, 1, 1, 2, 1, 2],
/// [2, 2, 3, 1, 2, 0],
/// [0, 1, 0, 0, 3, 4]]
-///
+/// ```
/// When isCanonical is true, the indices is sorted in lexicographical order
/// (row-major order), and it does not have duplicated entries. Otherwise,
/// the indices may not be sorted, or may have duplicated entries.
@@ -86,26 +86,27 @@ table SparseMatrixIndexCSX {

/// indptrBuffer stores the location and size of indptr array that
/// represents the range of the rows.
- /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
+ /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
/// The length of this array is 1 + (the number of rows), and the type
/// of index value is long.
///
/// For example, let X be the following 6x4 matrix:
- ///
+ /// ```text
/// X := [[0, 1, 2, 0],
/// [0, 0, 3, 0],
/// [0, 4, 0, 5],
/// [0, 0, 0, 0],
/// [6, 0, 7, 8],
/// [0, 9, 0, 0]].
- ///
+ /// ```
/// The array of non-zero values in X is:
- ///
+ /// ```text
/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
- ///
+ /// ```
/// And the indptr of X is:
- ///
+ /// ```text
/// indptr(X) = [0, 2, 3, 5, 5, 8, 10].
+ /// ```
indptrBuffer: Buffer (required);

/// The type of values in indicesBuffer
@@ -116,9 +117,9 @@ table SparseMatrixIndexCSX {
/// The type of index value is long.
///
/// For example, the indices of the above X is:
- ///
+ /// ```text
/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
- ///
+ /// ```
/// Note that the indices are sorted in lexicographical order for each row.
indicesBuffer: Buffer (required);
}
@@ -126,7 +127,7 @@ table SparseMatrixIndexCSX {
/// Compressed Sparse Fiber (CSF) sparse tensor index.
table SparseTensorIndexCSF {
/// CSF is a generalization of compressed sparse row (CSR) index.
- /// See [smith2017knl]: http://shaden.io/pub-files/smith2017knl.pdf
+ /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
///
/// CSF index recursively compresses each dimension of a tensor into a set
/// of prefix trees. Each path from a root to leaf forms one tensor
@@ -135,7 +136,7 @@ table SparseTensorIndexCSF {
///
/// For example, let X be a 2x3x4x5 tensor and let it have the following
/// 8 non-zero values:
- ///
+ /// ```text
/// X[0, 0, 0, 1] := 1
/// X[0, 0, 0, 2] := 2
/// X[0, 1, 0, 0] := 3
@@ -144,9 +145,9 @@ table SparseTensorIndexCSF {
/// X[1, 1, 1, 0] := 6
/// X[1, 1, 1, 1] := 7
/// X[1, 1, 1, 2] := 8
- ///
+ /// ```
/// As a prefix tree this would be represented as:
- ///
+ /// ```text
/// 0 1
/// / \ |
/// 0 1 1
@@ -154,24 +155,24 @@ table SparseTensorIndexCSF {
/// 0 0 1 1
/// /| /| | /| |
/// 1 2 0 2 0 0 1 2
-
+ /// ```
/// The type of values in indptrBuffers
indptrType: Int (required);

/// indptrBuffers stores the sparsity structure.
/// Each two consecutive dimensions in a tensor correspond to a buffer in
- /// indptrBuffers. A pair of consecutive values at indptrBuffers[dim][i]
- /// and indptrBuffers[dim][i + 1] signify a range of nodes in
- /// indicesBuffers[dim + 1] who are children of indicesBuffers[dim][i] node.
+ /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
+ /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in
+ /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
///
/// For example, the indptrBuffers for the above X is:
- ///
+ /// ```text
/// indptrBuffer(X) = [
/// [0, 2, 3],
/// [0, 1, 3, 4],
/// [0, 2, 4, 5, 8]
/// ].
- ///
+ /// ```
indptrBuffers: [Buffer] (required);

/// The type of values in indicesBuffers
@@ -180,22 +181,22 @@ table SparseTensorIndexCSF {
/// indicesBuffers stores values of nodes.
/// Each tensor dimension corresponds to a buffer in indicesBuffers.
/// For example, the indicesBuffers for the above X is:
- ///
+ /// ```text
/// indicesBuffer(X) = [
/// [0, 1],
/// [0, 1, 1],
/// [0, 0, 1, 1],
/// [1, 2, 0, 2, 0, 0, 1, 2]
/// ].
- ///
+ /// ```
indicesBuffers: [Buffer] (required);

/// axisOrder stores the sequence in which dimensions were traversed to
/// produce the prefix tree.
/// For example, the axisOrder for the above X is:
- ///
+ /// ```text
/// axisOrder(X) = [0, 1, 2, 3].
- ///
+ /// ```
axisOrder: [int] (required);
}

26 changes: 11 additions & 15 deletions rust/arrow/regen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,17 @@ echo "run: bazel build :flatc ..."
bazel build :flatc
popd

FB_PATCH="rust/arrow/format-0ed34c83.patch"
echo "Patch flatbuffer files with ${FB_PATCH} for cargo doc"
echo "NOTE: the patch MAY need update in case of changes in format/*.fbs"
git apply --check ${FB_PATCH} && git apply ${FB_PATCH}

# Execute the code generation:
$FLATC --filename-suffix "" --rust -o rust/arrow/src/ipc/gen/ format/*.fbs

# Reset changes to format/
git checkout -- format

# Now the files are wrongly named so we have to change that.
popd
pushd $DIR/src/ipc/gen
Expand Down Expand Up @@ -97,7 +105,6 @@ names=("File" "Message" "Schema" "SparseTensor" "Tensor")

# Remove all generated lines we don't need
for f in `ls *.rs`; do

if [[ $f == "mod.rs" ]]; then
continue
fi
Expand Down Expand Up @@ -147,17 +154,6 @@ done
popd
cargo +stable fmt -- src/ipc/gen/*

echo "=== TIPS ==="
echo "Let's manually fix rustdoc of SparseTensorIndexCSF::indptrType:"
echo 'prepend the tree with ```text, and append the tree with ```'
cat <<TREE_EOF
/// \`\`\`text
/// 0 1
/// / \ |
/// 0 1 1
/// / / \ |
/// 0 0 1 1
/// /| /| | /| |
/// 1 2 0 2 0 0 1 2
/// \`\`\`
TREE_EOF
echo "DONE!"
echo "Please run 'cargo doc' and 'cargo test' with nightly and stable, "
echo "and fix possible errors or warnings!"
2 changes: 1 addition & 1 deletion rust/arrow/src/array/array_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {
/// Pointer to the value array. The lifetime of this must be <= to the value buffer
/// stored in `data`, so it's safe to store.
/// # Safety
/// raw_values must have a value equivalent to data.buffers()[0].raw_data()
/// raw_values must have a value equivalent to `data.buffers()[0].raw_data()`
/// raw_values must have alignment for type T::NativeType
raw_values: RawPtrBox<T::Native>,
}
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/src/array/equal/structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use super::{equal_range, utils::child_logical_null_buffer};
/// If an array is a child of a struct or list, the array's nulls have to be merged with the parent.
/// This then affects the null count of the array, thus the merged nulls are passed separately
/// as `lhs_nulls` and `rhs_nulls` variables to functions.
/// The nulls are merged with a bitwise AND, and null counts are recomputed wheer necessary.
/// The nulls are merged with a bitwise AND, and null counts are recomputed where necessary.
fn equal_values(
lhs: &ArrayData,
rhs: &ArrayData,
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ make_type!(
);

/// A subtype of primitive type that represents legal dictionary keys.
/// See https://arrow.apache.org/docs/format/Columnar.html
/// See <https://arrow.apache.org/docs/format/Columnar.html>
pub trait ArrowDictionaryKeyType: ArrowPrimitiveType {}

impl ArrowDictionaryKeyType for Int8Type {}
Expand Down
10 changes: 5 additions & 5 deletions rust/arrow/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
//! One interface maps C ABI to native Rust types, i.e. convert c-pointers, c_char, to native rust.
//! This is handled by [FFI_ArrowSchema] and [FFI_ArrowArray].
//!
//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to [Datatype],
//! `Buffer`, etc. This is handled by [ArrowArray].
//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`,
//! `Buffer`, etc. This is handled by `ArrowArray`.
//!
//! ```rust
//! # use std::sync::Arc;
Expand Down Expand Up @@ -91,7 +91,7 @@ use crate::error::{ArrowError, Result};
use crate::util::bit_util;

/// ABI-compatible struct for `ArrowSchema` from C Data Interface
/// See https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions
/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
/// This was created by bindgen
#[repr(C)]
#[derive(Debug)]
Expand Down Expand Up @@ -120,7 +120,7 @@ unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) {
impl FFI_ArrowSchema {
/// create a new [FFI_ArrowSchema] from a format.
fn new(format: &str) -> FFI_ArrowSchema {
// https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema
// <https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema>
FFI_ArrowSchema {
format: CString::new(format).unwrap().into_raw(),
name: std::ptr::null_mut(),
Expand Down Expand Up @@ -303,7 +303,7 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
}

/// ABI-compatible struct for ArrowArray from C Data Interface
/// See https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions
/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
/// This was created by bindgen
#[repr(C)]
#[derive(Debug)]
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/src/ipc/gen/Message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ impl flatbuffers::SimpleToVerifyInSlice for MessageHeader {}
/// Metadata about a field at some level of a nested type tree (but not
/// its children).
///
/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
/// null_count: 0} for its Int16 node, as separate FieldNode structs
// struct FieldNode, aligned to 8
Expand Down
5 changes: 3 additions & 2 deletions rust/arrow/src/ipc/gen/Schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1594,10 +1594,11 @@ pub enum MapOffset {}
/// not enforced.
///
/// Map
/// ```text
/// - child[0] entries: Struct
/// - child[0] key: K
/// - child[1] value: V
///
/// ```
/// Neither the "entries" field nor the "key" field may be nullable.
///
/// The metadata is structured so that Arrow systems without special handling
Expand Down Expand Up @@ -1703,7 +1704,7 @@ pub enum UnionOffset {}
/// A union is a complex type with children in Field
/// By default ids in the type vector refer to the offsets in the children
/// optionally typeIds provides an indirection between the child offset and the type id
/// for each child typeIds[offset] is the id used in the type vector
/// for each child `typeIds[offset]` is the id used in the type vector
pub struct Union<'a> {
pub _tab: flatbuffers::Table<'a>,
}
Expand Down
Loading