From 03b2e1712724e770a60b8478abff5c76f855d911 Mon Sep 17 00:00:00 2001
From: mqy <meng.qingyou@gmail.com>
Date: Fri, 8 Jan 2021 14:05:31 +0800
Subject: [PATCH 1/7] ARROW-11168: [Rust] Fix cargo doc warnings

---
 rust/arrow/src/array/array_primitive.rs |  2 +-
 rust/arrow/src/ffi.rs                   |  4 +--
 rust/arrow/src/ipc/gen/Message.rs       |  2 +-
 rust/arrow/src/ipc/gen/Schema.rs        |  5 ++--
 rust/arrow/src/ipc/gen/SparseTensor.rs  | 40 +++++++++++++------------
 rust/arrow/src/lib.rs                   |  6 ++--
 rust/datafusion/src/error.rs            |  4 +--
 rust/parquet/src/arrow/levels.rs        |  8 ++---
 rust/parquet/src/column/page.rs         |  2 +-
 rust/parquet/src/record/reader.rs       | 16 +++++-----
 10 files changed, 46 insertions(+), 43 deletions(-)
diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs
index 0bdc3e51d99..febb1656350 100644
--- a/rust/arrow/src/array/array_primitive.rs
+++ b/rust/arrow/src/array/array_primitive.rs
@@ -49,7 +49,7 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {
     /// Pointer to the value array. The lifetime of this must be <= to the value buffer
     /// stored in `data`, so it's safe to store.
     /// # Safety
-    /// raw_values must have a value equivalent to data.buffers()[0].raw_data()
+    /// raw_values must have a value equivalent to `data.buffers()[0].raw_data()`
     /// raw_values must have alignment for type T::NativeType
     raw_values: RawPtrBox<T::Native>,
 }
diff --git a/rust/arrow/src/ffi.rs b/rust/arrow/src/ffi.rs
index 79638b94d0a..53c0a13d5b9 100644
--- a/rust/arrow/src/ffi.rs
+++ b/rust/arrow/src/ffi.rs
@@ -21,8 +21,8 @@
 //! One interface maps C ABI to native Rust types, i.e. convert c-pointers, c_char, to native rust.
 //! This is handled by [FFI_ArrowSchema] and [FFI_ArrowArray].
 //!
-//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to [Datatype],
-//! `Buffer`, etc. This is handled by [ArrowArray].
+//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`,
+//! `Buffer`, etc. This is handled by `ArrowArray`.
 //!
 //! ```rust
 //! # use std::sync::Arc;
diff --git a/rust/arrow/src/ipc/gen/Message.rs b/rust/arrow/src/ipc/gen/Message.rs
index 0d05a49f18a..79a9df307af 100644
--- a/rust/arrow/src/ipc/gen/Message.rs
+++ b/rust/arrow/src/ipc/gen/Message.rs
@@ -336,7 +336,7 @@ impl flatbuffers::SimpleToVerifyInSlice for MessageHeader {}
 /// Metadata about a field at some level of a nested type tree (but not
 /// its children).
 ///
-/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
+/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
 /// would have {length: 5, null_count: 2} for its List node, and {length: 6,
 /// null_count: 0} for its Int16 node, as separate FieldNode structs
 // struct FieldNode, aligned to 8
diff --git a/rust/arrow/src/ipc/gen/Schema.rs b/rust/arrow/src/ipc/gen/Schema.rs
index 55bbc3362e3..5dcc7a0fa84 100644
--- a/rust/arrow/src/ipc/gen/Schema.rs
+++ b/rust/arrow/src/ipc/gen/Schema.rs
@@ -1594,10 +1594,11 @@ pub enum MapOffset {}
 /// not enforced.
 ///
 /// Map
+/// ```
 ///   - child[0] entries: Struct
 ///     - child[0] key: K
 ///     - child[1] value: V
-///
+/// ```
 /// Neither the "entries" field nor the "key" field may be nullable.
 ///
 /// The metadata is structured so that Arrow systems without special handling
@@ -1703,7 +1704,7 @@ pub enum UnionOffset {}
 /// A union is a complex type with children in Field
 /// By default ids in the type vector refer to the offsets in the children
 /// optionally typeIds provides an indirection between the child offset and the type id
-/// for each child typeIds[offset] is the id used in the type vector
+/// for each child `typeIds[offset]` is the id used in the type vector
 pub struct Union<'a> {
     pub _tab: flatbuffers::Table<'a>,
 }
diff --git a/rust/arrow/src/ipc/gen/SparseTensor.rs b/rust/arrow/src/ipc/gen/SparseTensor.rs
index 1b45a8241f7..aef429f489f 100644
--- a/rust/arrow/src/ipc/gen/SparseTensor.rs
+++ b/rust/arrow/src/ipc/gen/SparseTensor.rs
@@ -518,26 +518,28 @@ impl<'a> SparseMatrixIndexCSX<'a> {
     }
     /// indptrBuffer stores the location and size of indptr array that
     /// represents the range of the rows.
-    /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
+    /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
     /// The length of this array is 1 + (the number of rows), and the type
     /// of index value is long.
     ///
     /// For example, let X be the following 6x4 matrix:
     ///
+    /// ```
     ///   X := [[0, 1, 2, 0],
     ///         [0, 0, 3, 0],
     ///         [0, 4, 0, 5],
     ///         [0, 0, 0, 0],
     ///         [6, 0, 7, 8],
     ///         [0, 9, 0, 0]].
-    ///
+    /// ```
     /// The array of non-zero values in X is:
-    ///
+    /// ```
     ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
-    ///
+    /// ```
     /// And the indptr of X is:
-    ///
+    /// ```
     ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
+    /// ```
     #[inline]
     pub fn indptrBuffer(&self) -> &'a Buffer {
         self._tab
@@ -559,9 +561,9 @@ impl<'a> SparseMatrixIndexCSX<'a> {
     /// The type of index value is long.
     ///
     /// For example, the indices of the above X is:
-    ///
+    /// ```
     ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
-    ///
+    /// ```
     /// Note that the indices are sorted in lexicographical order for each row.
     #[inline]
     pub fn indicesBuffer(&self) -> &'a Buffer {
@@ -750,7 +752,7 @@ impl<'a> SparseTensorIndexCSF<'a> {
     pub const VT_AXISORDER: flatbuffers::VOffsetT = 12;
 
     /// CSF is a generalization of compressed sparse row (CSR) index.
-    /// See [smith2017knl]: http://shaden.io/pub-files/smith2017knl.pdf
+    /// See \[smith2017knl\]: http://shaden.io/pub-files/smith2017knl.pdf
     ///
     /// CSF index recursively compresses each dimension of a tensor into a set
     /// of prefix trees. Each path from a root to leaf forms one tensor
@@ -759,7 +761,7 @@ impl<'a> SparseTensorIndexCSF<'a> {
     ///
     /// For example, let X be a 2x3x4x5 tensor and let it have the following
     /// 8 non-zero values:
-    ///
+    /// ```
     ///   X[0, 0, 0, 1] := 1
     ///   X[0, 0, 0, 2] := 2
     ///   X[0, 1, 0, 0] := 3
@@ -768,7 +770,7 @@ impl<'a> SparseTensorIndexCSF<'a> {
     ///   X[1, 1, 1, 0] := 6
     ///   X[1, 1, 1, 1] := 7
     ///   X[1, 1, 1, 2] := 8
-    ///
+    /// ```
     /// As a prefix tree this would be represented as:
     ///
     /// ```text
@@ -792,18 +794,18 @@ impl<'a> SparseTensorIndexCSF<'a> {
     }
     /// indptrBuffers stores the sparsity structure.
     /// Each two consecutive dimensions in a tensor correspond to a buffer in
-    /// indptrBuffers. A pair of consecutive values at indptrBuffers[dim][i]
-    /// and indptrBuffers[dim][i + 1] signify a range of nodes in
-    /// indicesBuffers[dim + 1] who are children of indicesBuffers[dim][i] node.
+    /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
+    /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in
+    /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
     ///
     /// For example, the indptrBuffers for the above X is:
-    ///
+    /// ```
     ///   indptrBuffer(X) = [
     ///                       [0, 2, 3],
     ///                       [0, 1, 3, 4],
     ///                       [0, 2, 4, 5, 8]
     ///                     ].
-    ///
+    /// ```
     #[inline]
     pub fn indptrBuffers(&self) -> &'a [Buffer] {
         self._tab
@@ -827,14 +829,14 @@ impl<'a> SparseTensorIndexCSF<'a> {
     /// indicesBuffers stores values of nodes.
     /// Each tensor dimension corresponds to a buffer in indicesBuffers.
     /// For example, the indicesBuffers for the above X is:
-    ///
+    /// ```
     ///   indicesBuffer(X) = [
     ///                        [0, 1],
     ///                        [0, 1, 1],
     ///                        [0, 0, 1, 1],
     ///                        [1, 2, 0, 2, 0, 0, 1, 2]
     ///                      ].
-    ///
+    /// ```
     #[inline]
     pub fn indicesBuffers(&self) -> &'a [Buffer] {
         self._tab
@@ -848,9 +850,9 @@ impl<'a> SparseTensorIndexCSF<'a> {
     /// axisOrder stores the sequence in which dimensions were traversed to
     /// produce the prefix tree.
     /// For example, the axisOrder for the above X is:
-    ///
+    /// ```
     ///   axisOrder(X) = [0, 1, 2, 3].
-    ///
+    /// ```
     #[inline]
     pub fn axisOrder(&self) -> flatbuffers::Vector<'a, i32> {
         self._tab
diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs
index 9c91d38566f..1fa3cddec2a 100644
--- a/rust/arrow/src/lib.rs
+++ b/rust/arrow/src/lib.rs
@@ -70,8 +70,8 @@
 //!
 //! ## Memory and Buffers
 //!
-//! Data in [`Array`](array::Array) is stored in [`ArrayData`](array::data::ArrayData), that in turn
-//! is a collection of other [`ArrayData`](array::data::ArrayData) and [`Buffers`](buffer::Buffer).
+//! Data in [`Array`](array::Array) is stored in [`ArrayData`](array::ArrayData), that in turn
+//! is a collection of other [`ArrayData`](array::ArrayData) and [`Buffers`](buffer::Buffer).
 //! [`Buffers`](buffer::Buffer) is the central struct that array implementations use keep allocated memory and pointers.
 //! The [`MutableBuffer`](buffer::MutableBuffer) is the mutable counter-part of[`Buffer`](buffer::Buffer).
 //! These are the lowest abstractions of this crate, and are used throughout the crate to
@@ -90,7 +90,7 @@
 //! ## Compute
 //!
 //! This crate offers many operations (called kernels) to operate on `Array`s, that you can find at [compute::kernels].
-//! It has both vertial and horizontal operations, and some of them have an SIMD implementation.
+//! It has both vertical and horizontal operations, and some of them have an SIMD implementation.
 //!
 //! ## Status
 //!
diff --git a/rust/datafusion/src/error.rs b/rust/datafusion/src/error.rs
index b4c8dcc026b..903faeabf69 100644
--- a/rust/datafusion/src/error.rs
+++ b/rust/datafusion/src/error.rs
@@ -39,7 +39,7 @@ pub enum DataFusionError {
     ParquetError(ParquetError),
     /// Error associated to I/O operations and associated traits.
     IoError(io::Error),
-    /// Error returned when SQL is syntatically incorrect.
+    /// Error returned when SQL is syntactically incorrect.
     SQL(ParserError),
     /// Error returned on a branch that we know it is possible
     /// but to which we still have no implementation for.
@@ -59,7 +59,7 @@ pub enum DataFusionError {
 }
 
 impl DataFusionError {
-    /// Wraps this [DataFusionError] as an [Arrow::error::ArrowError].
+    /// Wraps this [DataFusionError] as an [arrow::error::ArrowError].
     pub fn into_arrow_external_error(self) -> ArrowError {
         ArrowError::from_external_error(Box::new(self))
     }
diff --git a/rust/parquet/src/arrow/levels.rs b/rust/parquet/src/arrow/levels.rs
index 1c178e3a0eb..32617a15680 100644
--- a/rust/parquet/src/arrow/levels.rs
+++ b/rust/parquet/src/arrow/levels.rs
@@ -20,12 +20,12 @@
 //! Contains the algorithm for computing definition and repetition levels.
 //! The algorithm works by tracking the slots of an array that should ultimately be populated when
 //! writing to Parquet.
-//! Parquet achieves nesting through definition levels and repetition levels [1].
+//! Parquet achieves nesting through definition levels and repetition levels \[1\].
 //! Definition levels specify how many optional fields in the part for the column are defined.
 //! Repetition levels specify at what repeated field (list) in the path a column is defined.
 //!
 //! In a nested data structure such as `a.b.c`, one can see levels as defining whether a record is
-//! defined at `a`, `a.b`, or `a.b.c`. Optional fields are nullable fields, thus if all 3 fiedls
+//! defined at `a`, `a.b`, or `a.b.c`. Optional fields are nullable fields, thus if all 3 fields
 //! are nullable, the maximum definition will be = 3.
 //!
 //! The algorithm in this module computes the necessary information to enable the writer to keep
@@ -37,13 +37,13 @@
 //! We use an eager approach that increments definition levels where incrementable, and decrements
 //! if a value being checked is null.
 //!
-//! [1] https://github.com/apache/parquet-format#nested-encoding
+//! \[1\] [parquet-format#nested-encoding]<https://github.com/apache/parquet-format#nested-encoding>
 
 use arrow::array::{Array, ArrayRef, StructArray};
 use arrow::datatypes::{DataType, Field};
 use arrow::record_batch::RecordBatch;
 
-/// Keeps track of the level information per array that is needed to write an Arrow aray to Parquet.
+/// Keeps track of the level information per array that is needed to write an Arrow array to Parquet.
 ///
 /// When a nested schema is traversed, intermediate [LevelInfo] structs are created to track
 /// the state of parent arrays. When a primitive Arrow array is encountered, a final [LevelInfo]
diff --git a/rust/parquet/src/column/page.rs b/rust/parquet/src/column/page.rs
index 43c0c4aac4c..0573616fa8d 100644
--- a/rust/parquet/src/column/page.rs
+++ b/rust/parquet/src/column/page.rs
@@ -93,7 +93,7 @@ impl Page {
         }
     }
 
-    /// Returns optional [`Statistics`](crate::file::metadata::Statistics).
+    /// Returns optional [`Statistics`](crate::file::statistics::Statistics).
     pub fn statistics(&self) -> Option<&Statistics> {
         match self {
             Page::DataPage { ref statistics, .. } => statistics.as_ref(),
diff --git a/rust/parquet/src/record/reader.rs b/rust/parquet/src/record/reader.rs
index a6f5e29bc5e..882187cb38e 100644
--- a/rust/parquet/src/record/reader.rs
+++ b/rust/parquet/src/record/reader.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 //! Contains implementation of record assembly and converting Parquet types into
-//! [`Row`](crate::record::api::Row)s.
+//! [`Row`](crate::record::Row)s.
 
 use std::{collections::HashMap, fmt, sync::Arc};
 
@@ -628,7 +628,7 @@ impl<'a> Either<'a> {
     }
 }
 
-/// Iterator of [`Row`](crate::record::api::Row)s.
+/// Iterator of [`Row`](crate::record::Row)s.
 /// It is used either for a single row group to iterate over data in that row group, or
 /// an entire file with auto buffering of all row groups.
 pub struct RowIter<'a> {
@@ -641,7 +641,7 @@ pub struct RowIter<'a> {
 }
 
 impl<'a> RowIter<'a> {
-    /// Creates a new iterator of [`Row`](crate::record::api::Row)s.
+    /// Creates a new iterator of [`Row`](crate::record::Row)s.
     fn new(
         file_reader: Option<Either<'a>>,
         row_iter: Option<ReaderIter>,
@@ -663,7 +663,7 @@ impl<'a> RowIter<'a> {
         }
     }
 
-    /// Creates iterator of [`Row`](crate::record::api::Row)s for all row groups in a
+    /// Creates iterator of [`Row`](crate::record::Row)s for all row groups in a
     /// file.
     pub fn from_file(proj: Option<Type>, reader: &'a FileReader) -> Result<Self> {
         let either = Either::Left(reader);
@@ -675,7 +675,7 @@ impl<'a> RowIter<'a> {
         Ok(Self::new(Some(either), None, descr))
     }
 
-    /// Creates iterator of [`Row`](crate::record::api::Row)s for a specific row group.
+    /// Creates iterator of [`Row`](crate::record::Row)s for a specific row group.
     pub fn from_row_group(
         proj: Option<Type>,
         reader: &'a RowGroupReader,
@@ -689,7 +689,7 @@ impl<'a> RowIter<'a> {
         Ok(Self::new(None, Some(row_iter), descr))
     }
 
-    /// Creates a iterator of [`Row`](crate::record::api::Row)s from a
+    /// Creates a iterator of [`Row`](crate::record::Row)s from a
     /// [`FileReader`](crate::file::reader::FileReader) using the full file schema.
     pub fn from_file_into(reader: Box<FileReader>) -> Self {
         let either = Either::Right(reader);
@@ -702,7 +702,7 @@ impl<'a> RowIter<'a> {
         Self::new(Some(either), None, descr)
     }
 
-    /// Tries to create a iterator of [`Row`](crate::record::api::Row)s using projections.
+    /// Tries to create a iterator of [`Row`](crate::record::Row)s using projections.
     /// Returns a error if a file reader is not the source of this iterator.
     ///
     /// The Projected schema can be a subset of or equal to the file schema,
@@ -784,7 +784,7 @@ impl<'a> Iterator for RowIter<'a> {
     }
 }
 
-/// Internal iterator of [`Row`](crate::record::api::Row)s for a reader.
+/// Internal iterator of [`Row`](crate::record::Row)s for a reader.
 pub struct ReaderIter {
     root_reader: Reader,
     records_left: usize,

From b6157b31b58fa9f516487d1974c5dfe40170e19d Mon Sep 17 00:00:00 2001
From: mqy <meng.qingyou@gmail.com>
Date: Fri, 8 Jan 2021 14:44:39 +0800
Subject: [PATCH 2/7] Update regen.sh

---
 rust/arrow/regen.sh | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/rust/arrow/regen.sh b/rust/arrow/regen.sh
index 4bc35a4852f..b0193689d5d 100755
--- a/rust/arrow/regen.sh
+++ b/rust/arrow/regen.sh
@@ -147,17 +147,4 @@ done
 popd
 cargo +stable fmt -- src/ipc/gen/*
 
-echo "=== TIPS ==="
-echo "Let's manually fix rustdoc of SparseTensorIndexCSF::indptrType:"
-echo 'prepend the tree with ```text, and append the tree with ```'
-cat <<TREE_EOF
-    /// \`\`\`text
-    ///         0          1
-    ///        / \         |
-    ///       0   1        1
-    ///      /   / \       |
-    ///     0   0   1      1
-    ///    /|  /|   |    /| |
-    ///   1 2 0 2   0   0 1 2
-    /// \`\`\`
-TREE_EOF
\ No newline at end of file
+echo "DONE! please run cargo doc and fix possible warnings!"

From 465b0590efcaf4c651437d3f6cbcdfe8cc3bfb90 Mon Sep 17 00:00:00 2001
From: mqy <meng.qingyou@gmail.com>
Date: Fri, 8 Jan 2021 16:10:32 +0800
Subject: [PATCH 3/7] Fix according to cargo +nightly doc and cargo test

---
 rust/arrow/regen.sh                          |  4 +++-
 rust/arrow/src/array/equal/structure.rs      |  2 +-
 rust/arrow/src/datatypes.rs                  |  2 +-
 rust/arrow/src/ffi.rs                        |  6 +++---
 rust/arrow/src/ipc/gen/Schema.rs             |  2 +-
 rust/arrow/src/ipc/gen/SparseTensor.rs       | 20 ++++++++++----------
 rust/datafusion/src/logical_plan/plan.rs     | 10 +++++-----
 rust/datafusion/src/physical_plan/mod.rs     |  2 +-
 rust/datafusion/src/physical_plan/parquet.rs |  4 ++--
 rust/parquet/src/basic.rs                    |  2 +-
 10 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/rust/arrow/regen.sh b/rust/arrow/regen.sh
index b0193689d5d..723ff52dffb 100755
--- a/rust/arrow/regen.sh
+++ b/rust/arrow/regen.sh
@@ -147,4 +147,6 @@ done
 popd
 cargo +stable fmt -- src/ipc/gen/*
 
-echo "DONE! please run cargo doc and fix possible warnings!"
+echo "DONE!"
+echo "Please run 'cargo doc' and 'cargo test' with nightly and stable, "
+echo "and fix possible errors or warnings!"
diff --git a/rust/arrow/src/array/equal/structure.rs b/rust/arrow/src/array/equal/structure.rs
index 8779a160460..6ec71837b86 100644
--- a/rust/arrow/src/array/equal/structure.rs
+++ b/rust/arrow/src/array/equal/structure.rs
@@ -27,7 +27,7 @@ use super::{equal_range, utils::child_logical_null_buffer};
 /// If an array is a child of a struct or list, the array's nulls have to be merged with the parent.
 /// This then affects the null count of the array, thus the merged nulls are passed separately
 /// as `lhs_nulls` and `rhs_nulls` variables to functions.
-/// The nulls are merged with a bitwise AND, and null counts are recomputed wheer necessary.
+/// The nulls are merged with a bitwise AND, and null counts are recomputed where necessary.
 fn equal_values(
     lhs: &ArrayData,
     rhs: &ArrayData,
diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index 7b16d95a868..8c03a755789 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -487,7 +487,7 @@ make_type!(
 );
 
 /// A subtype of primitive type that represents legal dictionary keys.
-/// See https://arrow.apache.org/docs/format/Columnar.html
+/// See <https://arrow.apache.org/docs/format/Columnar.html>
 pub trait ArrowDictionaryKeyType: ArrowPrimitiveType {}
 
 impl ArrowDictionaryKeyType for Int8Type {}
diff --git a/rust/arrow/src/ffi.rs b/rust/arrow/src/ffi.rs
index 53c0a13d5b9..c3b050916cc 100644
--- a/rust/arrow/src/ffi.rs
+++ b/rust/arrow/src/ffi.rs
@@ -91,7 +91,7 @@ use crate::error::{ArrowError, Result};
 use crate::util::bit_util;
 
 /// ABI-compatible struct for `ArrowSchema` from C Data Interface
-/// See https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions
+/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
 /// This was created by bindgen
 #[repr(C)]
 #[derive(Debug)]
@@ -120,7 +120,7 @@ unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) {
 impl FFI_ArrowSchema {
     /// create a new [FFI_ArrowSchema] from a format.
     fn new(format: &str) -> FFI_ArrowSchema {
-        // https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema
+        // <https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema>
         FFI_ArrowSchema {
             format: CString::new(format).unwrap().into_raw(),
             name: std::ptr::null_mut(),
@@ -303,7 +303,7 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
 }
 
 /// ABI-compatible struct for ArrowArray from C Data Interface
-/// See https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions
+/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
 /// This was created by bindgen
 #[repr(C)]
 #[derive(Debug)]
diff --git a/rust/arrow/src/ipc/gen/Schema.rs b/rust/arrow/src/ipc/gen/Schema.rs
index 5dcc7a0fa84..61a9574221c 100644
--- a/rust/arrow/src/ipc/gen/Schema.rs
+++ b/rust/arrow/src/ipc/gen/Schema.rs
@@ -1594,7 +1594,7 @@ pub enum MapOffset {}
 /// not enforced.
 ///
 /// Map
-/// ```
+/// ```text
 ///   - child[0] entries: Struct
 ///     - child[0] key: K
 ///     - child[1] value: V
diff --git a/rust/arrow/src/ipc/gen/SparseTensor.rs b/rust/arrow/src/ipc/gen/SparseTensor.rs
index aef429f489f..532f73cb5d1 100644
--- a/rust/arrow/src/ipc/gen/SparseTensor.rs
+++ b/rust/arrow/src/ipc/gen/SparseTensor.rs
@@ -524,7 +524,7 @@ impl<'a> SparseMatrixIndexCSX<'a> {
     ///
     /// For example, let X be the following 6x4 matrix:
     ///
-    /// ```
+    /// ```text
     ///   X := [[0, 1, 2, 0],
     ///         [0, 0, 3, 0],
     ///         [0, 4, 0, 5],
@@ -533,11 +533,11 @@ impl<'a> SparseMatrixIndexCSX<'a> {
     ///         [0, 9, 0, 0]].
     /// ```
     /// The array of non-zero values in X is:
-    /// ```
+    /// ```text
     ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
     /// ```
     /// And the indptr of X is:
-    /// ```
+    /// ```text
     ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
     /// ```
     #[inline]
@@ -560,8 +560,8 @@ impl<'a> SparseMatrixIndexCSX<'a> {
     /// contains the column indices of the corresponding non-zero values.
     /// The type of index value is long.
     ///
-    /// For example, the indices of the above X is:
-    /// ```
+    /// For example, the indices of the above X is
+    /// ```text
     ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
     /// ```
     /// Note that the indices are sorted in lexicographical order for each row.
@@ -752,7 +752,7 @@ impl<'a> SparseTensorIndexCSF<'a> {
     pub const VT_AXISORDER: flatbuffers::VOffsetT = 12;
 
     /// CSF is a generalization of compressed sparse row (CSR) index.
-    /// See \[smith2017knl\]: http://shaden.io/pub-files/smith2017knl.pdf
+    /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
     ///
     /// CSF index recursively compresses each dimension of a tensor into a set
     /// of prefix trees. Each path from a root to leaf forms one tensor
@@ -761,7 +761,7 @@ impl<'a> SparseTensorIndexCSF<'a> {
     ///
     /// For example, let X be a 2x3x4x5 tensor and let it have the following
     /// 8 non-zero values:
-    /// ```
+    /// ```text
     ///   X[0, 0, 0, 1] := 1
     ///   X[0, 0, 0, 2] := 2
     ///   X[0, 1, 0, 0] := 3
@@ -799,7 +799,7 @@ impl<'a> SparseTensorIndexCSF<'a> {
     /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
     ///
     /// For example, the indptrBuffers for the above X is:
-    /// ```
+    /// ```text
     ///   indptrBuffer(X) = [
     ///                       [0, 2, 3],
     ///                       [0, 1, 3, 4],
@@ -829,7 +829,7 @@ impl<'a> SparseTensorIndexCSF<'a> {
     /// indicesBuffers stores values of nodes.
     /// Each tensor dimension corresponds to a buffer in indicesBuffers.
     /// For example, the indicesBuffers for the above X is:
-    /// ```
+    /// ```text
     ///   indicesBuffer(X) = [
     ///                        [0, 1],
     ///                        [0, 1, 1],
@@ -850,7 +850,7 @@ impl<'a> SparseTensorIndexCSF<'a> {
     /// axisOrder stores the sequence in which dimensions were traversed to
     /// produce the prefix tree.
     /// For example, the axisOrder for the above X is:
-    /// ```
+    /// ```text
     ///   axisOrder(X) = [0, 1, 2, 3].
     /// ```
     #[inline]
diff --git a/rust/datafusion/src/logical_plan/plan.rs b/rust/datafusion/src/logical_plan/plan.rs
index f120548d5ed..8002d16c44e 100644
--- a/rust/datafusion/src/logical_plan/plan.rs
+++ b/rust/datafusion/src/logical_plan/plan.rs
@@ -213,7 +213,7 @@ pub enum Partitioning {
     RoundRobinBatch(usize),
     /// Allocate rows based on a hash of one of more expressions and the specified number
     /// of partitions.
-    /// This partitioning scheme is not yet fully supported. See https://issues.apache.org/jira/browse/ARROW-11011
+    /// This partitioning scheme is not yet fully supported. See <https://issues.apache.org/jira/browse/ARROW-11011>
     Hash(Vec<Expr>, usize),
 }
 
@@ -248,7 +248,7 @@ pub trait PlanVisitor {
     /// Invoked on a logical plan before any of its child inputs have been
     /// visited. If Ok(true) is returned, the recursion continues. If
     /// Err(..) or Ok(false) are returned, the recursion stops
-    /// immedately and the error, if any, is returned to `accept`
+    /// immediately and the error, if any, is returned to `accept`
     fn pre_visit(&mut self, plan: &LogicalPlan)
         -> std::result::Result<bool, Self::Error>;
 
@@ -835,9 +835,9 @@ mod tests {
         }
     }
 
-    /// test earliy stopping in pre-visit
+    /// test early stopping in pre-visit
     #[test]
-    fn early_stoping_pre_visit() {
+    fn early_stopping_pre_visit() {
         let mut visitor = StoppingVisitor {
             return_false_from_pre_in: OptionalCounter::new(2),
             ..Default::default()
@@ -853,7 +853,7 @@ mod tests {
     }
 
     #[test]
-    fn early_stoping_post_visit() {
+    fn early_stopping_post_visit() {
         let mut visitor = StoppingVisitor {
             return_false_from_post_in: OptionalCounter::new(1),
             ..Default::default()
diff --git a/rust/datafusion/src/physical_plan/mod.rs b/rust/datafusion/src/physical_plan/mod.rs
index 605e5d6f44a..f2b984bb306 100644
--- a/rust/datafusion/src/physical_plan/mod.rs
+++ b/rust/datafusion/src/physical_plan/mod.rs
@@ -131,7 +131,7 @@ pub enum Partitioning {
     RoundRobinBatch(usize),
     /// Allocate rows based on a hash of one of more expressions and the specified
     /// number of partitions
-    /// This partitioning scheme is not yet fully supported. See https://issues.apache.org/jira/browse/ARROW-11011
+    /// This partitioning scheme is not yet fully supported. See [ARROW-11011](https://issues.apache.org/jira/browse/ARROW-11011)
     Hash(Vec<Arc<dyn PhysicalExpr>>, usize),
     /// Unknown partitioning scheme with a known number of partitions
     UnknownPartitioning(usize),
diff --git a/rust/datafusion/src/physical_plan/parquet.rs b/rust/datafusion/src/physical_plan/parquet.rs
index 53b26678320..9a03afdf426 100644
--- a/rust/datafusion/src/physical_plan/parquet.rs
+++ b/rust/datafusion/src/physical_plan/parquet.rs
@@ -60,11 +60,11 @@ pub struct ParquetExec {
 ///
 /// In the future it would be good to support subsets of files based on ranges of row groups
 /// so that we can better parallelize reads of large files across available cores (see
-/// https://issues.apache.org/jira/browse/ARROW-10995).
+/// [ARROW-10995](https://issues.apache.org/jira/browse/ARROW-10995)).
 ///
 /// We may also want to support reading Parquet files that are partitioned based on a key and
 /// in this case we would want this partition struct to represent multiple files for a given
-/// partition key (see https://issues.apache.org/jira/browse/ARROW-11019).
+/// partition key (see [ARROW-11019](https://issues.apache.org/jira/browse/ARROW-11019)).
 #[derive(Debug, Clone)]
 pub struct ParquetPartition {
     /// The Parquet filename for this partition
diff --git a/rust/parquet/src/basic.rs b/rust/parquet/src/basic.rs
index 0cea0439402..bf41d43da90 100644
--- a/rust/parquet/src/basic.rs
+++ b/rust/parquet/src/basic.rs
@@ -256,7 +256,7 @@ pub enum PageType {
 /// min/max.
 ///
 /// See reference in
-/// https://github.com/apache/parquet-cpp/blob/master/src/parquet/types.h
+/// <https://github.com/apache/parquet-cpp/blob/master/src/parquet/types.h>
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum SortOrder {
     /// Signed (either value or legacy byte-wise) comparison.

From c7d7d5a39ff460e2305bff0f4bccc5ead9e01ad8 Mon Sep 17 00:00:00 2001
From: mqy <meng.qingyou@gmail.com>
Date: Fri, 8 Jan 2021 18:32:55 +0800
Subject: [PATCH 4/7] Patch fbs files

---
 rust/arrow/format-0ed34c83.patch       | 258 +++++++++++++++++++++++++
 rust/arrow/regen.sh                    |   6 +-
 rust/arrow/src/ipc/gen/SparseTensor.rs |  12 +-
 3 files changed, 268 insertions(+), 8 deletions(-)
 create mode 100644 rust/arrow/format-0ed34c83.patch

diff --git a/rust/arrow/format-0ed34c83.patch b/rust/arrow/format-0ed34c83.patch
new file mode 100644
index 00000000000..4cb026030f2
--- /dev/null
+++ b/rust/arrow/format-0ed34c83.patch
@@ -0,0 +1,258 @@
+diff --git a/format/Message.fbs b/format/Message.fbs
+index 1a7e0dfff..f1c18d765 100644
+--- a/format/Message.fbs
++++ b/format/Message.fbs
+@@ -28,7 +28,7 @@ namespace org.apache.arrow.flatbuf;
+ /// Metadata about a field at some level of a nested type tree (but not
+ /// its children).
+ ///
+-/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
++/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
+ /// would have {length: 5, null_count: 2} for its List node, and {length: 6,
+ /// null_count: 0} for its Int16 node, as separate FieldNode structs
+ struct FieldNode {
+diff --git a/format/Schema.fbs b/format/Schema.fbs
+index 3b37e5d85..3b00dd478 100644
+--- a/format/Schema.fbs
++++ b/format/Schema.fbs
+@@ -110,10 +110,11 @@ table FixedSizeList {
+ /// not enforced.
+ ///
+ /// Map
++/// ```text
+ ///   - child[0] entries: Struct
+ ///     - child[0] key: K
+ ///     - child[1] value: V
+-///
++/// ```
+ /// Neither the "entries" field nor the "key" field may be nullable.
+ ///
+ /// The metadata is structured so that Arrow systems without special handling
+@@ -129,7 +130,7 @@ enum UnionMode:short { Sparse, Dense }
+ /// A union is a complex type with children in Field
+ /// By default ids in the type vector refer to the offsets in the children
+ /// optionally typeIds provides an indirection between the child offset and the type id
+-/// for each child typeIds[offset] is the id used in the type vector
++/// for each child `typeIds[offset]` is the id used in the type vector
+ table Union {
+   mode: UnionMode;
+   typeIds: [ int ]; // optional, describes typeid of each child.
+diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
+index 3fe8a7582..a6fd2f9e7 100644
+--- a/format/SparseTensor.fbs
++++ b/format/SparseTensor.fbs
+@@ -37,21 +37,21 @@ namespace org.apache.arrow.flatbuf;
+ ///
+ /// For example, let X be a 2x3x4x5 tensor, and it has the following
+ /// 6 non-zero values:
+-///
++/// ```text
+ ///   X[0, 1, 2, 0] := 1
+ ///   X[1, 1, 2, 3] := 2
+ ///   X[0, 2, 1, 0] := 3
+ ///   X[0, 1, 3, 0] := 4
+ ///   X[0, 1, 2, 1] := 5
+ ///   X[1, 2, 0, 4] := 6
+-///
++/// ```
+ /// In COO format, the index matrix of X is the following 4x6 matrix:
+-///
++/// ```text
+ ///   [[0, 0, 0, 0, 1, 1],
+ ///    [1, 1, 1, 2, 1, 2],
+ ///    [2, 2, 3, 1, 2, 0],
+ ///    [0, 1, 0, 0, 3, 4]]
+-///
++/// ```
+ /// When isCanonical is true, the indices is sorted in lexicographical order
+ /// (row-major order), and it does not have duplicated entries.  Otherwise,
+ /// the indices may not be sorted, or may have duplicated entries.
+@@ -86,26 +86,27 @@ table SparseMatrixIndexCSX {
+ 
+   /// indptrBuffer stores the location and size of indptr array that
+   /// represents the range of the rows.
+-  /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
++  /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
+   /// The length of this array is 1 + (the number of rows), and the type
+   /// of index value is long.
+   ///
+   /// For example, let X be the following 6x4 matrix:
+-  ///
++  /// ```text
+   ///   X := [[0, 1, 2, 0],
+   ///         [0, 0, 3, 0],
+   ///         [0, 4, 0, 5],
+   ///         [0, 0, 0, 0],
+   ///         [6, 0, 7, 8],
+   ///         [0, 9, 0, 0]].
+-  ///
++  /// ```
+   /// The array of non-zero values in X is:
+-  ///
++  /// ```text
+   ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
+-  ///
++  /// ```
+   /// And the indptr of X is:
+-  ///
++  /// ```text
+   ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
++  /// ```
+   indptrBuffer: Buffer (required);
+ 
+   /// The type of values in indicesBuffer
+@@ -116,9 +117,9 @@ table SparseMatrixIndexCSX {
+   /// The type of index value is long.
+   ///
+   /// For example, the indices of the above X is:
+-  ///
++  /// ```text
+   ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
+-  ///
++  /// ```
+   /// Note that the indices are sorted in lexicographical order for each row.
+   indicesBuffer: Buffer (required);
+ }
+@@ -126,7 +127,7 @@ table SparseMatrixIndexCSX {
+ /// Compressed Sparse Fiber (CSF) sparse tensor index.
+ table SparseTensorIndexCSF {
+   /// CSF is a generalization of compressed sparse row (CSR) index.
+-  /// See [smith2017knl]: http://shaden.io/pub-files/smith2017knl.pdf
++  /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
+   ///
+   /// CSF index recursively compresses each dimension of a tensor into a set
+   /// of prefix trees. Each path from a root to leaf forms one tensor
+@@ -135,7 +136,7 @@ table SparseTensorIndexCSF {
+   ///
+   /// For example, let X be a 2x3x4x5 tensor and let it have the following
+   /// 8 non-zero values:
+-  ///
++  /// ```text
+   ///   X[0, 0, 0, 1] := 1
+   ///   X[0, 0, 0, 2] := 2
+   ///   X[0, 1, 0, 0] := 3
+@@ -144,9 +145,9 @@ table SparseTensorIndexCSF {
+   ///   X[1, 1, 1, 0] := 6
+   ///   X[1, 1, 1, 1] := 7
+   ///   X[1, 1, 1, 2] := 8
+-  ///
++  /// ```
+   /// As a prefix tree this would be represented as:
+-  ///
++  /// ```text
+   ///         0          1
+   ///        / \         |
+   ///       0   1        1
+@@ -154,24 +155,24 @@ table SparseTensorIndexCSF {
+   ///     0   0   1      1
+   ///    /|  /|   |    /| |
+   ///   1 2 0 2   0   0 1 2
+-
++  /// ```
+   /// The type of values in indptrBuffers
+   indptrType: Int (required);
+ 
+   /// indptrBuffers stores the sparsity structure.
+   /// Each two consecutive dimensions in a tensor correspond to a buffer in
+-  /// indptrBuffers. A pair of consecutive values at indptrBuffers[dim][i]
+-  /// and indptrBuffers[dim][i + 1] signify a range of nodes in
+-  /// indicesBuffers[dim + 1] who are children of indicesBuffers[dim][i] node.
++  /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
++  /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in
++  /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
+   ///
+   /// For example, the indptrBuffers for the above X is:
+-  ///
++  /// ```text
+   ///   indptrBuffer(X) = [
+   ///                       [0, 2, 3],
+   ///                       [0, 1, 3, 4],
+   ///                       [0, 2, 4, 5, 8]
+   ///                     ].
+-  ///
++  /// ```
+   indptrBuffers: [Buffer] (required);
+ 
+   /// The type of values in indicesBuffers
+@@ -180,22 +181,22 @@ table SparseTensorIndexCSF {
+   /// indicesBuffers stores values of nodes.
+   /// Each tensor dimension corresponds to a buffer in indicesBuffers.
+   /// For example, the indicesBuffers for the above X is:
+-  ///
++  /// ```text
+   ///   indicesBuffer(X) = [
+   ///                        [0, 1],
+   ///                        [0, 1, 1],
+   ///                        [0, 0, 1, 1],
+   ///                        [1, 2, 0, 2, 0, 0, 1, 2]
+   ///                      ].
+-  ///
++  /// ```
+   indicesBuffers: [Buffer] (required);
+ 
+   /// axisOrder stores the sequence in which dimensions were traversed to
+   /// produce the prefix tree.
+   /// For example, the axisOrder for the above X is:
+-  ///
++  /// ```text
+   ///   axisOrder(X) = [0, 1, 2, 3].
+-  ///
++  /// ```
+   axisOrder: [int] (required);
+ }
+ 
+diff --git a/rust/arrow/src/ipc/gen/SparseTensor.rs b/rust/arrow/src/ipc/gen/SparseTensor.rs
+index 532f73cb5..04a23398b 100644
+--- a/rust/arrow/src/ipc/gen/SparseTensor.rs
++++ b/rust/arrow/src/ipc/gen/SparseTensor.rs
+@@ -235,21 +235,21 @@ pub enum SparseTensorIndexCOOOffset {}
+ ///
+ /// For example, let X be a 2x3x4x5 tensor, and it has the following
+ /// 6 non-zero values:
+-///
++/// ```text
+ ///   X[0, 1, 2, 0] := 1
+ ///   X[1, 1, 2, 3] := 2
+ ///   X[0, 2, 1, 0] := 3
+ ///   X[0, 1, 3, 0] := 4
+ ///   X[0, 1, 2, 1] := 5
+ ///   X[1, 2, 0, 4] := 6
+-///
++/// ```
+ /// In COO format, the index matrix of X is the following 4x6 matrix:
+-///
++/// ```text
+ ///   [[0, 0, 0, 0, 1, 1],
+ ///    [1, 1, 1, 2, 1, 2],
+ ///    [2, 2, 3, 1, 2, 0],
+ ///    [0, 1, 0, 0, 3, 4]]
+-///
++/// ```
+ /// When isCanonical is true, the indices is sorted in lexicographical order
+ /// (row-major order), and it does not have duplicated entries.  Otherwise,
+ /// the indices may not be sorted, or may have duplicated entries.
+@@ -523,7 +523,6 @@ impl<'a> SparseMatrixIndexCSX<'a> {
+     /// of index value is long.
+     ///
+     /// For example, let X be the following 6x4 matrix:
+-    ///
+     /// ```text
+     ///   X := [[0, 1, 2, 0],
+     ///         [0, 0, 3, 0],
+@@ -560,7 +559,7 @@ impl<'a> SparseMatrixIndexCSX<'a> {
+     /// contains the column indices of the corresponding non-zero values.
+     /// The type of index value is long.
+     ///
+-    /// For example, the indices of the above X is
++    /// For example, the indices of the above X is:
+     /// ```text
+     ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
+     /// ```
+@@ -772,7 +771,6 @@ impl<'a> SparseTensorIndexCSF<'a> {
+     ///   X[1, 1, 1, 2] := 8
+     /// ```
+     /// As a prefix tree this would be represented as:
+-    ///
+     /// ```text
+     ///         0          1
+     ///        / \         |
diff --git a/rust/arrow/regen.sh b/rust/arrow/regen.sh
index 723ff52dffb..1abfe79f7d1 100755
--- a/rust/arrow/regen.sh
+++ b/rust/arrow/regen.sh
@@ -54,6 +54,11 @@ echo "run: bazel build :flatc ..."
 bazel build :flatc
 popd
 
+FB_PATCH="rust/arrow/format-0ed34c83.patch"
+echo"Patch flatbuffer files with ${FB_PATCH} for cargo doc"
+echo "NOTE: the patch MAY need update in case of changes in format/*.fbs"
+git apply rust/arrow/format-5504ee4.patch
+
 # Execute the code generation:
 $FLATC --filename-suffix "" --rust -o rust/arrow/src/ipc/gen/ format/*.fbs
 
@@ -97,7 +102,6 @@ names=("File" "Message" "Schema" "SparseTensor" "Tensor")
 
 # Remove all generated lines we don't need
 for f in `ls *.rs`; do
-
     if [[ $f == "mod.rs" ]]; then
         continue
     fi
diff --git a/rust/arrow/src/ipc/gen/SparseTensor.rs b/rust/arrow/src/ipc/gen/SparseTensor.rs
index 532f73cb5d1..04a23398bef 100644
--- a/rust/arrow/src/ipc/gen/SparseTensor.rs
+++ b/rust/arrow/src/ipc/gen/SparseTensor.rs
@@ -235,21 +235,21 @@ pub enum SparseTensorIndexCOOOffset {}
 ///
 /// For example, let X be a 2x3x4x5 tensor, and it has the following
 /// 6 non-zero values:
-///
+/// ```text
 ///   X[0, 1, 2, 0] := 1
 ///   X[1, 1, 2, 3] := 2
 ///   X[0, 2, 1, 0] := 3
 ///   X[0, 1, 3, 0] := 4
 ///   X[0, 1, 2, 1] := 5
 ///   X[1, 2, 0, 4] := 6
-///
+/// ```
 /// In COO format, the index matrix of X is the following 4x6 matrix:
-///
+/// ```text
 ///   [[0, 0, 0, 0, 1, 1],
 ///    [1, 1, 1, 2, 1, 2],
 ///    [2, 2, 3, 1, 2, 0],
 ///    [0, 1, 0, 0, 3, 4]]
-///
+/// ```
 /// When isCanonical is true, the indices is sorted in lexicographical order
 /// (row-major order), and it does not have duplicated entries.  Otherwise,
 /// the indices may not be sorted, or may have duplicated entries.
@@ -523,7 +523,6 @@ impl<'a> SparseMatrixIndexCSX<'a> {
     /// of index value is long.
     ///
     /// For example, let X be the following 6x4 matrix:
-    ///
     /// ```text
     ///   X := [[0, 1, 2, 0],
     ///         [0, 0, 3, 0],
@@ -560,7 +559,7 @@ impl<'a> SparseMatrixIndexCSX<'a> {
     /// contains the column indices of the corresponding non-zero values.
     /// The type of index value is long.
     ///
-    /// For example, the indices of the above X is
+    /// For example, the indices of the above X is:
     /// ```text
     ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
     /// ```
@@ -772,7 +771,6 @@ impl<'a> SparseTensorIndexCSF<'a> {
     ///   X[1, 1, 1, 2] := 8
     /// ```
     /// As a prefix tree this would be represented as:
-    ///
     /// ```text
     ///         0          1
     ///        / \         |

From 1aed118b9b32eff38879af4c7ba497a05739c4c1 Mon Sep 17 00:00:00 2001
From: mqy <meng.qingyou@gmail.com>
Date: Fri, 8 Jan 2021 20:38:21 +0800
Subject: [PATCH 5/7] Add license to patch file, update regen.sh

---
 rust/arrow/format-0ed34c83.patch | 74 ++++++++------------------------
 rust/arrow/regen.sh              |  7 ++-
 2 files changed, 23 insertions(+), 58 deletions(-)

diff --git a/rust/arrow/format-0ed34c83.patch b/rust/arrow/format-0ed34c83.patch
index 4cb026030f2..5da0a0c51f0 100644
--- a/rust/arrow/format-0ed34c83.patch
+++ b/rust/arrow/format-0ed34c83.patch
@@ -1,3 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
 diff --git a/format/Message.fbs b/format/Message.fbs
 index 1a7e0dfff..f1c18d765 100644
 --- a/format/Message.fbs
@@ -200,59 +217,4 @@ index 3fe8a7582..a6fd2f9e7 100644
 +  /// ```
    axisOrder: [int] (required);
  }
- 
-diff --git a/rust/arrow/src/ipc/gen/SparseTensor.rs b/rust/arrow/src/ipc/gen/SparseTensor.rs
-index 532f73cb5..04a23398b 100644
---- a/rust/arrow/src/ipc/gen/SparseTensor.rs
-+++ b/rust/arrow/src/ipc/gen/SparseTensor.rs
-@@ -235,21 +235,21 @@ pub enum SparseTensorIndexCOOOffset {}
- ///
- /// For example, let X be a 2x3x4x5 tensor, and it has the following
- /// 6 non-zero values:
--///
-+/// ```text
- ///   X[0, 1, 2, 0] := 1
- ///   X[1, 1, 2, 3] := 2
- ///   X[0, 2, 1, 0] := 3
- ///   X[0, 1, 3, 0] := 4
- ///   X[0, 1, 2, 1] := 5
- ///   X[1, 2, 0, 4] := 6
--///
-+/// ```
- /// In COO format, the index matrix of X is the following 4x6 matrix:
--///
-+/// ```text
- ///   [[0, 0, 0, 0, 1, 1],
- ///    [1, 1, 1, 2, 1, 2],
- ///    [2, 2, 3, 1, 2, 0],
- ///    [0, 1, 0, 0, 3, 4]]
--///
-+/// ```
- /// When isCanonical is true, the indices is sorted in lexicographical order
- /// (row-major order), and it does not have duplicated entries.  Otherwise,
- /// the indices may not be sorted, or may have duplicated entries.
-@@ -523,7 +523,6 @@ impl<'a> SparseMatrixIndexCSX<'a> {
-     /// of index value is long.
-     ///
-     /// For example, let X be the following 6x4 matrix:
--    ///
-     /// ```text
-     ///   X := [[0, 1, 2, 0],
-     ///         [0, 0, 3, 0],
-@@ -560,7 +559,7 @@ impl<'a> SparseMatrixIndexCSX<'a> {
-     /// contains the column indices of the corresponding non-zero values.
-     /// The type of index value is long.
-     ///
--    /// For example, the indices of the above X is
-+    /// For example, the indices of the above X is:
-     /// ```text
-     ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
-     /// ```
-@@ -772,7 +771,6 @@ impl<'a> SparseTensorIndexCSF<'a> {
-     ///   X[1, 1, 1, 2] := 8
-     /// ```
-     /// As a prefix tree this would be represented as:
--    ///
-     /// ```text
-     ///         0          1
-     ///        / \         |
+
diff --git a/rust/arrow/regen.sh b/rust/arrow/regen.sh
index 1abfe79f7d1..e96f11e3800 100755
--- a/rust/arrow/regen.sh
+++ b/rust/arrow/regen.sh
@@ -55,13 +55,16 @@ bazel build :flatc
 popd
 
 FB_PATCH="rust/arrow/format-0ed34c83.patch"
-echo"Patch flatbuffer files with ${FB_PATCH} for cargo doc"
+echo "Patch flatbuffer files with ${FB_PATCH} for cargo doc"
 echo "NOTE: the patch MAY need update in case of changes in format/*.fbs"
-git apply rust/arrow/format-5504ee4.patch
+git apply --check ${FB_PATCH} && git apply ${FB_PATCH}
 
 # Execute the code generation:
 $FLATC --filename-suffix "" --rust -o rust/arrow/src/ipc/gen/ format/*.fbs
 
+# Reset changes to format/
+git checkout -- format
+
 # Now the files are wrongly named so we have to change that.
 popd
 pushd $DIR/src/ipc/gen

From b538ae8564270b410e43043c8240b93df6276216 Mon Sep 17 00:00:00 2001
From: mqy <meng.qingyou@gmail.com>
Date: Fri, 8 Jan 2021 21:32:56 +0800
Subject: [PATCH 6/7] Fix links in rust/parquet/

---
 rust/parquet/src/arrow/levels.rs  | 2 +-
 rust/parquet/src/encodings/rle.rs | 2 +-
 rust/parquet/src/record/reader.rs | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/rust/parquet/src/arrow/levels.rs b/rust/parquet/src/arrow/levels.rs
index 32617a15680..846ceabc03d 100644
--- a/rust/parquet/src/arrow/levels.rs
+++ b/rust/parquet/src/arrow/levels.rs
@@ -37,7 +37,7 @@
 //! We use an eager approach that increments definition levels where incrementable, and decrements
 //! if a value being checked is null.
 //!
-//! \[1\] [parquet-format#nested-encoding]<https://github.com/apache/parquet-format#nested-encoding>
+//! \[1\] [parquet-format#nested-encoding](https://github.com/apache/parquet-format#nested-encoding)
 
 use arrow::array::{Array, ArrayRef, StructArray};
 use arrow::datatypes::{DataType, Field};
diff --git a/rust/parquet/src/encodings/rle.rs b/rust/parquet/src/encodings/rle.rs
index 5a522017c59..d8cd50d3b91 100644
--- a/rust/parquet/src/encodings/rle.rs
+++ b/rust/parquet/src/encodings/rle.rs
@@ -25,7 +25,7 @@ use crate::util::{
 
 /// Rle/Bit-Packing Hybrid Encoding
 /// The grammar for this encoding looks like the following (copied verbatim
-/// from https://github.com/Parquet/parquet-format/blob/master/Encodings.md):
+/// from <https://github.com/Parquet/parquet-format/blob/master/Encodings.md>):
 ///
 /// rle-bit-packed-hybrid: <length> <encoded-data>
 /// length := length of the <encoded-data> in bytes stored as 4 bytes little endian
diff --git a/rust/parquet/src/record/reader.rs b/rust/parquet/src/record/reader.rs
index 882187cb38e..0b02bc8ed46 100644
--- a/rust/parquet/src/record/reader.rs
+++ b/rust/parquet/src/record/reader.rs
@@ -346,7 +346,7 @@ impl Reader {
     /// Returns true if repeated type is an element type for the list.
     /// Used to determine legacy list types.
     /// This method is copied from Spark Parquet reader and is based on the reference:
-    /// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+    /// <https://github.com/apache/parquet-format/blob/master/LogicalTypes.md>
     ///   #backward-compatibility-rules
     fn is_element_type(repeated_type: &Type) -> bool {
         // For legacy 2-level list types with primitive element type, e.g.:

From 8cf807f8ba1e4a2f769dad8a1b1f63dc725eb34e Mon Sep 17 00:00:00 2001
From: mqy <meng.qingyou@gmail.com>
Date: Sat, 9 Jan 2021 03:48:41 +0800
Subject: [PATCH 7/7] Add update note for parquet-format

---
 rust/parquet/Cargo.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/rust/parquet/Cargo.toml b/rust/parquet/Cargo.toml
index 72b0e9da332..6529a85d7eb 100644
--- a/rust/parquet/Cargo.toml
+++ b/rust/parquet/Cargo.toml
@@ -29,6 +29,8 @@ build = "build.rs"
 edition = "2018"
 
 [dependencies]
+# update note: pin `parquet-format` to specific version until it does not break at minor
+# version, see ARROW-11187.
 parquet-format = "~2.6.1"
 byteorder = "1"
 thrift = "0.13"