From 03b2e1712724e770a60b8478abff5c76f855d911 Mon Sep 17 00:00:00 2001 From: mqy Date: Fri, 8 Jan 2021 14:05:31 +0800 Subject: [PATCH 1/7] ARROW-11168: [Rust] Fix cargo doc warnings --- rust/arrow/src/array/array_primitive.rs | 2 +- rust/arrow/src/ffi.rs | 4 +-- rust/arrow/src/ipc/gen/Message.rs | 2 +- rust/arrow/src/ipc/gen/Schema.rs | 5 ++-- rust/arrow/src/ipc/gen/SparseTensor.rs | 40 +++++++++++++------------ rust/arrow/src/lib.rs | 6 ++-- rust/datafusion/src/error.rs | 4 +-- rust/parquet/src/arrow/levels.rs | 8 ++--- rust/parquet/src/column/page.rs | 2 +- rust/parquet/src/record/reader.rs | 16 +++++----- 10 files changed, 46 insertions(+), 43 deletions(-) diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs index 0bdc3e51d99..febb1656350 100644 --- a/rust/arrow/src/array/array_primitive.rs +++ b/rust/arrow/src/array/array_primitive.rs @@ -49,7 +49,7 @@ pub struct PrimitiveArray { /// Pointer to the value array. The lifetime of this must be <= to the value buffer /// stored in `data`, so it's safe to store. /// # Safety - /// raw_values must have a value equivalent to data.buffers()[0].raw_data() + /// raw_values must have a value equivalent to `data.buffers()[0].raw_data()` /// raw_values must have alignment for type T::NativeType raw_values: RawPtrBox, } diff --git a/rust/arrow/src/ffi.rs b/rust/arrow/src/ffi.rs index 79638b94d0a..53c0a13d5b9 100644 --- a/rust/arrow/src/ffi.rs +++ b/rust/arrow/src/ffi.rs @@ -21,8 +21,8 @@ //! One interface maps C ABI to native Rust types, i.e. convert c-pointers, c_char, to native rust. //! This is handled by [FFI_ArrowSchema] and [FFI_ArrowArray]. //! -//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to [Datatype], -//! `Buffer`, etc. This is handled by [ArrowArray]. +//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`, +//! `Buffer`, etc. This is handled by `ArrowArray`. //! //! ```rust //! # use std::sync::Arc; diff --git a/rust/arrow/src/ipc/gen/Message.rs b/rust/arrow/src/ipc/gen/Message.rs index 0d05a49f18a..79a9df307af 100644 --- a/rust/arrow/src/ipc/gen/Message.rs +++ b/rust/arrow/src/ipc/gen/Message.rs @@ -336,7 +336,7 @@ impl flatbuffers::SimpleToVerifyInSlice for MessageHeader {} /// Metadata about a field at some level of a nested type tree (but not /// its children). /// -/// For example, a List with values [[1, 2, 3], null, [4], [5, 6], null] +/// For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` /// would have {length: 5, null_count: 2} for its List node, and {length: 6, /// null_count: 0} for its Int16 node, as separate FieldNode structs // struct FieldNode, aligned to 8 diff --git a/rust/arrow/src/ipc/gen/Schema.rs b/rust/arrow/src/ipc/gen/Schema.rs index 55bbc3362e3..5dcc7a0fa84 100644 --- a/rust/arrow/src/ipc/gen/Schema.rs +++ b/rust/arrow/src/ipc/gen/Schema.rs @@ -1594,10 +1594,11 @@ pub enum MapOffset {} /// not enforced. /// /// Map +/// ``` /// - child[0] entries: Struct /// - child[0] key: K /// - child[1] value: V -/// +/// ``` /// Neither the "entries" field nor the "key" field may be nullable. /// /// The metadata is structured so that Arrow systems without special handling @@ -1703,7 +1704,7 @@ pub enum UnionOffset {} /// A union is a complex type with children in Field /// By default ids in the type vector refer to the offsets in the children /// optionally typeIds provides an indirection between the child offset and the type id -/// for each child typeIds[offset] is the id used in the type vector +/// for each child `typeIds[offset]` is the id used in the type vector pub struct Union<'a> { pub _tab: flatbuffers::Table<'a>, } diff --git a/rust/arrow/src/ipc/gen/SparseTensor.rs b/rust/arrow/src/ipc/gen/SparseTensor.rs index 1b45a8241f7..aef429f489f 100644 --- a/rust/arrow/src/ipc/gen/SparseTensor.rs +++ b/rust/arrow/src/ipc/gen/SparseTensor.rs @@ -518,26 +518,28 @@ impl<'a> SparseMatrixIndexCSX<'a> { } /// indptrBuffer stores the location and size of indptr array that /// represents the range of the rows. - /// The i-th row spans from indptr[i] to indptr[i+1] in the data. + /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. /// The length of this array is 1 + (the number of rows), and the type /// of index value is long. /// /// For example, let X be the following 6x4 matrix: /// + /// ``` /// X := [[0, 1, 2, 0], /// [0, 0, 3, 0], /// [0, 4, 0, 5], /// [0, 0, 0, 0], /// [6, 0, 7, 8], /// [0, 9, 0, 0]]. - /// + /// ``` /// The array of non-zero values in X is: - /// + /// ``` /// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. - /// + /// ``` /// And the indptr of X is: - /// + /// ``` /// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. + /// ``` #[inline] pub fn indptrBuffer(&self) -> &'a Buffer { self._tab @@ -559,9 +561,9 @@ impl<'a> SparseMatrixIndexCSX<'a> { /// The type of index value is long. /// /// For example, the indices of the above X is: - /// + /// ``` /// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. - /// + /// ``` /// Note that the indices are sorted in lexicographical order for each row. #[inline] pub fn indicesBuffer(&self) -> &'a Buffer { @@ -750,7 +752,7 @@ impl<'a> SparseTensorIndexCSF<'a> { pub const VT_AXISORDER: flatbuffers::VOffsetT = 12; /// CSF is a generalization of compressed sparse row (CSR) index. - /// See [smith2017knl]: http://shaden.io/pub-files/smith2017knl.pdf + /// See \[smith2017knl\]: http://shaden.io/pub-files/smith2017knl.pdf /// /// CSF index recursively compresses each dimension of a tensor into a set /// of prefix trees. Each path from a root to leaf forms one tensor @@ -759,7 +761,7 @@ impl<'a> SparseTensorIndexCSF<'a> { /// /// For example, let X be a 2x3x4x5 tensor and let it have the following /// 8 non-zero values: - /// + /// ``` /// X[0, 0, 0, 1] := 1 /// X[0, 0, 0, 2] := 2 /// X[0, 1, 0, 0] := 3 @@ -768,7 +770,7 @@ impl<'a> SparseTensorIndexCSF<'a> { /// X[1, 1, 1, 0] := 6 /// X[1, 1, 1, 1] := 7 /// X[1, 1, 1, 2] := 8 - /// + /// ``` /// As a prefix tree this would be represented as: /// /// ```text @@ -792,18 +794,18 @@ impl<'a> SparseTensorIndexCSF<'a> { } /// indptrBuffers stores the sparsity structure. /// Each two consecutive dimensions in a tensor correspond to a buffer in - /// indptrBuffers. A pair of consecutive values at indptrBuffers[dim][i] - /// and indptrBuffers[dim][i + 1] signify a range of nodes in - /// indicesBuffers[dim + 1] who are children of indicesBuffers[dim][i] node. + /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` + /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in + /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. /// /// For example, the indptrBuffers for the above X is: - /// + /// ``` /// indptrBuffer(X) = [ /// [0, 2, 3], /// [0, 1, 3, 4], /// [0, 2, 4, 5, 8] /// ]. - /// + /// ``` #[inline] pub fn indptrBuffers(&self) -> &'a [Buffer] { self._tab @@ -827,14 +829,14 @@ impl<'a> SparseTensorIndexCSF<'a> { /// indicesBuffers stores values of nodes. /// Each tensor dimension corresponds to a buffer in indicesBuffers. /// For example, the indicesBuffers for the above X is: - /// + /// ``` /// indicesBuffer(X) = [ /// [0, 1], /// [0, 1, 1], /// [0, 0, 1, 1], /// [1, 2, 0, 2, 0, 0, 1, 2] /// ]. - /// + /// ``` #[inline] pub fn indicesBuffers(&self) -> &'a [Buffer] { self._tab @@ -848,9 +850,9 @@ impl<'a> SparseTensorIndexCSF<'a> { /// axisOrder stores the sequence in which dimensions were traversed to /// produce the prefix tree. /// For example, the axisOrder for the above X is: - /// + /// ``` /// axisOrder(X) = [0, 1, 2, 3]. - /// + /// ``` #[inline] pub fn axisOrder(&self) -> flatbuffers::Vector<'a, i32> { self._tab diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs index 9c91d38566f..1fa3cddec2a 100644 --- a/rust/arrow/src/lib.rs +++ b/rust/arrow/src/lib.rs @@ -70,8 +70,8 @@ //! //! ## Memory and Buffers //! -//! Data in [`Array`](array::Array) is stored in [`ArrayData`](array::data::ArrayData), that in turn -//! is a collection of other [`ArrayData`](array::data::ArrayData) and [`Buffers`](buffer::Buffer). +//! Data in [`Array`](array::Array) is stored in [`ArrayData`](array::ArrayData), that in turn +//! is a collection of other [`ArrayData`](array::ArrayData) and [`Buffers`](buffer::Buffer). //! [`Buffers`](buffer::Buffer) is the central struct that array implementations use keep allocated memory and pointers. //! The [`MutableBuffer`](buffer::MutableBuffer) is the mutable counter-part of[`Buffer`](buffer::Buffer). //! These are the lowest abstractions of this crate, and are used throughout the crate to @@ -90,7 +90,7 @@ //! ## Compute //! //! This crate offers many operations (called kernels) to operate on `Array`s, that you can find at [compute::kernels]. -//! It has both vertial and horizontal operations, and some of them have an SIMD implementation. +//! It has both vertical and horizontal operations, and some of them have an SIMD implementation. //! //! ## Status //! diff --git a/rust/datafusion/src/error.rs b/rust/datafusion/src/error.rs index b4c8dcc026b..903faeabf69 100644 --- a/rust/datafusion/src/error.rs +++ b/rust/datafusion/src/error.rs @@ -39,7 +39,7 @@ pub enum DataFusionError { ParquetError(ParquetError), /// Error associated to I/O operations and associated traits. IoError(io::Error), - /// Error returned when SQL is syntatically incorrect. + /// Error returned when SQL is syntactically incorrect. SQL(ParserError), /// Error returned on a branch that we know it is possible /// but to which we still have no implementation for. @@ -59,7 +59,7 @@ pub enum DataFusionError { } impl DataFusionError { - /// Wraps this [DataFusionError] as an [Arrow::error::ArrowError]. + /// Wraps this [DataFusionError] as an [arrow::error::ArrowError]. pub fn into_arrow_external_error(self) -> ArrowError { ArrowError::from_external_error(Box::new(self)) } diff --git a/rust/parquet/src/arrow/levels.rs b/rust/parquet/src/arrow/levels.rs index 1c178e3a0eb..32617a15680 100644 --- a/rust/parquet/src/arrow/levels.rs +++ b/rust/parquet/src/arrow/levels.rs @@ -20,12 +20,12 @@ //! Contains the algorithm for computing definition and repetition levels. //! The algorithm works by tracking the slots of an array that should ultimately be populated when //! writing to Parquet. -//! Parquet achieves nesting through definition levels and repetition levels [1]. +//! Parquet achieves nesting through definition levels and repetition levels \[1\]. //! Definition levels specify how many optional fields in the part for the column are defined. //! Repetition levels specify at what repeated field (list) in the path a column is defined. //! //! In a nested data structure such as `a.b.c`, one can see levels as defining whether a record is -//! defined at `a`, `a.b`, or `a.b.c`. Optional fields are nullable fields, thus if all 3 fiedls +//! defined at `a`, `a.b`, or `a.b.c`. Optional fields are nullable fields, thus if all 3 fields //! are nullable, the maximum definition will be = 3. //! //! The algorithm in this module computes the necessary information to enable the writer to keep @@ -37,13 +37,13 @@ //! We use an eager approach that increments definition levels where incrementable, and decrements //! if a value being checked is null. //! -//! [1] https://github.com/apache/parquet-format#nested-encoding +//! \[1\] [parquet-format#nested-encoding] use arrow::array::{Array, ArrayRef, StructArray}; use arrow::datatypes::{DataType, Field}; use arrow::record_batch::RecordBatch; -/// Keeps track of the level information per array that is needed to write an Arrow aray to Parquet. +/// Keeps track of the level information per array that is needed to write an Arrow array to Parquet. /// /// When a nested schema is traversed, intermediate [LevelInfo] structs are created to track /// the state of parent arrays. When a primitive Arrow array is encountered, a final [LevelInfo] diff --git a/rust/parquet/src/column/page.rs b/rust/parquet/src/column/page.rs index 43c0c4aac4c..0573616fa8d 100644 --- a/rust/parquet/src/column/page.rs +++ b/rust/parquet/src/column/page.rs @@ -93,7 +93,7 @@ impl Page { } } - /// Returns optional [`Statistics`](crate::file::metadata::Statistics). + /// Returns optional [`Statistics`](crate::file::statistics::Statistics). pub fn statistics(&self) -> Option<&Statistics> { match self { Page::DataPage { ref statistics, .. } => statistics.as_ref(), diff --git a/rust/parquet/src/record/reader.rs b/rust/parquet/src/record/reader.rs index a6f5e29bc5e..882187cb38e 100644 --- a/rust/parquet/src/record/reader.rs +++ b/rust/parquet/src/record/reader.rs @@ -16,7 +16,7 @@ // under the License. //! Contains implementation of record assembly and converting Parquet types into -//! [`Row`](crate::record::api::Row)s. +//! [`Row`](crate::record::Row)s. use std::{collections::HashMap, fmt, sync::Arc}; @@ -628,7 +628,7 @@ impl<'a> Either<'a> { } } -/// Iterator of [`Row`](crate::record::api::Row)s. +/// Iterator of [`Row`](crate::record::Row)s. /// It is used either for a single row group to iterate over data in that row group, or /// an entire file with auto buffering of all row groups. pub struct RowIter<'a> { @@ -641,7 +641,7 @@ pub struct RowIter<'a> { } impl<'a> RowIter<'a> { - /// Creates a new iterator of [`Row`](crate::record::api::Row)s. + /// Creates a new iterator of [`Row`](crate::record::Row)s. fn new( file_reader: Option>, row_iter: Option, @@ -663,7 +663,7 @@ impl<'a> RowIter<'a> { } } - /// Creates iterator of [`Row`](crate::record::api::Row)s for all row groups in a + /// Creates iterator of [`Row`](crate::record::Row)s for all row groups in a /// file. pub fn from_file(proj: Option, reader: &'a FileReader) -> Result { let either = Either::Left(reader); @@ -675,7 +675,7 @@ impl<'a> RowIter<'a> { Ok(Self::new(Some(either), None, descr)) } - /// Creates iterator of [`Row`](crate::record::api::Row)s for a specific row group. + /// Creates iterator of [`Row`](crate::record::Row)s for a specific row group. pub fn from_row_group( proj: Option, reader: &'a RowGroupReader, @@ -689,7 +689,7 @@ impl<'a> RowIter<'a> { Ok(Self::new(None, Some(row_iter), descr)) } - /// Creates a iterator of [`Row`](crate::record::api::Row)s from a + /// Creates a iterator of [`Row`](crate::record::Row)s from a /// [`FileReader`](crate::file::reader::FileReader) using the full file schema. pub fn from_file_into(reader: Box) -> Self { let either = Either::Right(reader); @@ -702,7 +702,7 @@ impl<'a> RowIter<'a> { Self::new(Some(either), None, descr) } - /// Tries to create a iterator of [`Row`](crate::record::api::Row)s using projections. + /// Tries to create a iterator of [`Row`](crate::record::Row)s using projections. /// Returns a error if a file reader is not the source of this iterator. /// /// The Projected schema can be a subset of or equal to the file schema, @@ -784,7 +784,7 @@ impl<'a> Iterator for RowIter<'a> { } } -/// Internal iterator of [`Row`](crate::record::api::Row)s for a reader. +/// Internal iterator of [`Row`](crate::record::Row)s for a reader. pub struct ReaderIter { root_reader: Reader, records_left: usize, From b6157b31b58fa9f516487d1974c5dfe40170e19d Mon Sep 17 00:00:00 2001 From: mqy Date: Fri, 8 Jan 2021 14:44:39 +0800 Subject: [PATCH 2/7] Update regen.sh --- rust/arrow/regen.sh | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/rust/arrow/regen.sh b/rust/arrow/regen.sh index 4bc35a4852f..b0193689d5d 100755 --- a/rust/arrow/regen.sh +++ b/rust/arrow/regen.sh @@ -147,17 +147,4 @@ done popd cargo +stable fmt -- src/ipc/gen/* -echo "=== TIPS ===" -echo "Let's manually fix rustdoc of SparseTensorIndexCSF::indptrType:" -echo 'prepend the tree with ```text, and append the tree with ```' -cat < Date: Fri, 8 Jan 2021 16:10:32 +0800 Subject: [PATCH 3/7] Fix according to cargo +nightly doc and cargo test --- rust/arrow/regen.sh | 4 +++- rust/arrow/src/array/equal/structure.rs | 2 +- rust/arrow/src/datatypes.rs | 2 +- rust/arrow/src/ffi.rs | 6 +++--- rust/arrow/src/ipc/gen/Schema.rs | 2 +- rust/arrow/src/ipc/gen/SparseTensor.rs | 20 ++++++++++---------- rust/datafusion/src/logical_plan/plan.rs | 10 +++++----- rust/datafusion/src/physical_plan/mod.rs | 2 +- rust/datafusion/src/physical_plan/parquet.rs | 4 ++-- rust/parquet/src/basic.rs | 2 +- 10 files changed, 28 insertions(+), 26 deletions(-) diff --git a/rust/arrow/regen.sh b/rust/arrow/regen.sh index b0193689d5d..723ff52dffb 100755 --- a/rust/arrow/regen.sh +++ b/rust/arrow/regen.sh @@ -147,4 +147,6 @@ done popd cargo +stable fmt -- src/ipc/gen/* -echo "DONE! please run cargo doc and fix possible warnings!" +echo "DONE!" +echo "Please run 'cargo doc' and 'cargo test' with nightly and stable, " +echo "and fix possible errors or warnings!" diff --git a/rust/arrow/src/array/equal/structure.rs b/rust/arrow/src/array/equal/structure.rs index 8779a160460..6ec71837b86 100644 --- a/rust/arrow/src/array/equal/structure.rs +++ b/rust/arrow/src/array/equal/structure.rs @@ -27,7 +27,7 @@ use super::{equal_range, utils::child_logical_null_buffer}; /// If an array is a child of a struct or list, the array's nulls have to be merged with the parent. /// This then affects the null count of the array, thus the merged nulls are passed separately /// as `lhs_nulls` and `rhs_nulls` variables to functions. -/// The nulls are merged with a bitwise AND, and null counts are recomputed wheer necessary. +/// The nulls are merged with a bitwise AND, and null counts are recomputed where necessary. fn equal_values( lhs: &ArrayData, rhs: &ArrayData, diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index 7b16d95a868..8c03a755789 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -487,7 +487,7 @@ make_type!( ); /// A subtype of primitive type that represents legal dictionary keys. -/// See https://arrow.apache.org/docs/format/Columnar.html +/// See pub trait ArrowDictionaryKeyType: ArrowPrimitiveType {} impl ArrowDictionaryKeyType for Int8Type {} diff --git a/rust/arrow/src/ffi.rs b/rust/arrow/src/ffi.rs index 53c0a13d5b9..c3b050916cc 100644 --- a/rust/arrow/src/ffi.rs +++ b/rust/arrow/src/ffi.rs @@ -91,7 +91,7 @@ use crate::error::{ArrowError, Result}; use crate::util::bit_util; /// ABI-compatible struct for `ArrowSchema` from C Data Interface -/// See https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions +/// See /// This was created by bindgen #[repr(C)] #[derive(Debug)] @@ -120,7 +120,7 @@ unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) { impl FFI_ArrowSchema { /// create a new [FFI_ArrowSchema] from a format. fn new(format: &str) -> FFI_ArrowSchema { - // https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema + // FFI_ArrowSchema { format: CString::new(format).unwrap().into_raw(), name: std::ptr::null_mut(), @@ -303,7 +303,7 @@ fn bit_width(data_type: &DataType, i: usize) -> Result { } /// ABI-compatible struct for ArrowArray from C Data Interface -/// See https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions +/// See /// This was created by bindgen #[repr(C)] #[derive(Debug)] diff --git a/rust/arrow/src/ipc/gen/Schema.rs b/rust/arrow/src/ipc/gen/Schema.rs index 5dcc7a0fa84..61a9574221c 100644 --- a/rust/arrow/src/ipc/gen/Schema.rs +++ b/rust/arrow/src/ipc/gen/Schema.rs @@ -1594,7 +1594,7 @@ pub enum MapOffset {} /// not enforced. /// /// Map -/// ``` +/// ```text /// - child[0] entries: Struct /// - child[0] key: K /// - child[1] value: V diff --git a/rust/arrow/src/ipc/gen/SparseTensor.rs b/rust/arrow/src/ipc/gen/SparseTensor.rs index aef429f489f..532f73cb5d1 100644 --- a/rust/arrow/src/ipc/gen/SparseTensor.rs +++ b/rust/arrow/src/ipc/gen/SparseTensor.rs @@ -524,7 +524,7 @@ impl<'a> SparseMatrixIndexCSX<'a> { /// /// For example, let X be the following 6x4 matrix: /// - /// ``` + /// ```text /// X := [[0, 1, 2, 0], /// [0, 0, 3, 0], /// [0, 4, 0, 5], @@ -533,11 +533,11 @@ impl<'a> SparseMatrixIndexCSX<'a> { /// [0, 9, 0, 0]]. /// ``` /// The array of non-zero values in X is: - /// ``` + /// ```text /// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. /// ``` /// And the indptr of X is: - /// ``` + /// ```text /// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. /// ``` #[inline] @@ -560,8 +560,8 @@ impl<'a> SparseMatrixIndexCSX<'a> { /// contains the column indices of the corresponding non-zero values. /// The type of index value is long. /// - /// For example, the indices of the above X is: - /// ``` + /// For example, the indices of the above X is + /// ```text /// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. /// ``` /// Note that the indices are sorted in lexicographical order for each row. @@ -752,7 +752,7 @@ impl<'a> SparseTensorIndexCSF<'a> { pub const VT_AXISORDER: flatbuffers::VOffsetT = 12; /// CSF is a generalization of compressed sparse row (CSR) index. - /// See \[smith2017knl\]: http://shaden.io/pub-files/smith2017knl.pdf + /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) /// /// CSF index recursively compresses each dimension of a tensor into a set /// of prefix trees. Each path from a root to leaf forms one tensor @@ -761,7 +761,7 @@ impl<'a> SparseTensorIndexCSF<'a> { /// /// For example, let X be a 2x3x4x5 tensor and let it have the following /// 8 non-zero values: - /// ``` + /// ```text /// X[0, 0, 0, 1] := 1 /// X[0, 0, 0, 2] := 2 /// X[0, 1, 0, 0] := 3 @@ -799,7 +799,7 @@ impl<'a> SparseTensorIndexCSF<'a> { /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. /// /// For example, the indptrBuffers for the above X is: - /// ``` + /// ```text /// indptrBuffer(X) = [ /// [0, 2, 3], /// [0, 1, 3, 4], @@ -829,7 +829,7 @@ impl<'a> SparseTensorIndexCSF<'a> { /// indicesBuffers stores values of nodes. /// Each tensor dimension corresponds to a buffer in indicesBuffers. /// For example, the indicesBuffers for the above X is: - /// ``` + /// ```text /// indicesBuffer(X) = [ /// [0, 1], /// [0, 1, 1], @@ -850,7 +850,7 @@ impl<'a> SparseTensorIndexCSF<'a> { /// axisOrder stores the sequence in which dimensions were traversed to /// produce the prefix tree. /// For example, the axisOrder for the above X is: - /// ``` + /// ```text /// axisOrder(X) = [0, 1, 2, 3]. /// ``` #[inline] diff --git a/rust/datafusion/src/logical_plan/plan.rs b/rust/datafusion/src/logical_plan/plan.rs index f120548d5ed..8002d16c44e 100644 --- a/rust/datafusion/src/logical_plan/plan.rs +++ b/rust/datafusion/src/logical_plan/plan.rs @@ -213,7 +213,7 @@ pub enum Partitioning { RoundRobinBatch(usize), /// Allocate rows based on a hash of one of more expressions and the specified number /// of partitions. - /// This partitioning scheme is not yet fully supported. See https://issues.apache.org/jira/browse/ARROW-11011 + /// This partitioning scheme is not yet fully supported. See Hash(Vec, usize), } @@ -248,7 +248,7 @@ pub trait PlanVisitor { /// Invoked on a logical plan before any of its child inputs have been /// visited. If Ok(true) is returned, the recursion continues. If /// Err(..) or Ok(false) are returned, the recursion stops - /// immedately and the error, if any, is returned to `accept` + /// immediately and the error, if any, is returned to `accept` fn pre_visit(&mut self, plan: &LogicalPlan) -> std::result::Result; @@ -835,9 +835,9 @@ mod tests { } } - /// test earliy stopping in pre-visit + /// test early stopping in pre-visit #[test] - fn early_stoping_pre_visit() { + fn early_stopping_pre_visit() { let mut visitor = StoppingVisitor { return_false_from_pre_in: OptionalCounter::new(2), ..Default::default() @@ -853,7 +853,7 @@ mod tests { } #[test] - fn early_stoping_post_visit() { + fn early_stopping_post_visit() { let mut visitor = StoppingVisitor { return_false_from_post_in: OptionalCounter::new(1), ..Default::default() diff --git a/rust/datafusion/src/physical_plan/mod.rs b/rust/datafusion/src/physical_plan/mod.rs index 605e5d6f44a..f2b984bb306 100644 --- a/rust/datafusion/src/physical_plan/mod.rs +++ b/rust/datafusion/src/physical_plan/mod.rs @@ -131,7 +131,7 @@ pub enum Partitioning { RoundRobinBatch(usize), /// Allocate rows based on a hash of one of more expressions and the specified /// number of partitions - /// This partitioning scheme is not yet fully supported. See https://issues.apache.org/jira/browse/ARROW-11011 + /// This partitioning scheme is not yet fully supported. See [ARROW-11011](https://issues.apache.org/jira/browse/ARROW-11011) Hash(Vec>, usize), /// Unknown partitioning scheme with a known number of partitions UnknownPartitioning(usize), diff --git a/rust/datafusion/src/physical_plan/parquet.rs b/rust/datafusion/src/physical_plan/parquet.rs index 53b26678320..9a03afdf426 100644 --- a/rust/datafusion/src/physical_plan/parquet.rs +++ b/rust/datafusion/src/physical_plan/parquet.rs @@ -60,11 +60,11 @@ pub struct ParquetExec { /// /// In the future it would be good to support subsets of files based on ranges of row groups /// so that we can better parallelize reads of large files across available cores (see -/// https://issues.apache.org/jira/browse/ARROW-10995). +/// [ARROW-10995](https://issues.apache.org/jira/browse/ARROW-10995)). /// /// We may also want to support reading Parquet files that are partitioned based on a key and /// in this case we would want this partition struct to represent multiple files for a given -/// partition key (see https://issues.apache.org/jira/browse/ARROW-11019). +/// partition key (see [ARROW-11019](https://issues.apache.org/jira/browse/ARROW-11019)). #[derive(Debug, Clone)] pub struct ParquetPartition { /// The Parquet filename for this partition diff --git a/rust/parquet/src/basic.rs b/rust/parquet/src/basic.rs index 0cea0439402..bf41d43da90 100644 --- a/rust/parquet/src/basic.rs +++ b/rust/parquet/src/basic.rs @@ -256,7 +256,7 @@ pub enum PageType { /// min/max. /// /// See reference in -/// https://github.com/apache/parquet-cpp/blob/master/src/parquet/types.h +/// #[derive(Debug, Clone, Copy, PartialEq)] pub enum SortOrder { /// Signed (either value or legacy byte-wise) comparison. From c7d7d5a39ff460e2305bff0f4bccc5ead9e01ad8 Mon Sep 17 00:00:00 2001 From: mqy Date: Fri, 8 Jan 2021 18:32:55 +0800 Subject: [PATCH 4/7] Patch fbs files --- rust/arrow/format-0ed34c83.patch | 258 +++++++++++++++++++++++++ rust/arrow/regen.sh | 6 +- rust/arrow/src/ipc/gen/SparseTensor.rs | 12 +- 3 files changed, 268 insertions(+), 8 deletions(-) create mode 100644 rust/arrow/format-0ed34c83.patch diff --git a/rust/arrow/format-0ed34c83.patch b/rust/arrow/format-0ed34c83.patch new file mode 100644 index 00000000000..4cb026030f2 --- /dev/null +++ b/rust/arrow/format-0ed34c83.patch @@ -0,0 +1,258 @@ +diff --git a/format/Message.fbs b/format/Message.fbs +index 1a7e0dfff..f1c18d765 100644 +--- a/format/Message.fbs ++++ b/format/Message.fbs +@@ -28,7 +28,7 @@ namespace org.apache.arrow.flatbuf; + /// Metadata about a field at some level of a nested type tree (but not + /// its children). + /// +-/// For example, a List with values [[1, 2, 3], null, [4], [5, 6], null] ++/// For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` + /// would have {length: 5, null_count: 2} for its List node, and {length: 6, + /// null_count: 0} for its Int16 node, as separate FieldNode structs + struct FieldNode { +diff --git a/format/Schema.fbs b/format/Schema.fbs +index 3b37e5d85..3b00dd478 100644 +--- a/format/Schema.fbs ++++ b/format/Schema.fbs +@@ -110,10 +110,11 @@ table FixedSizeList { + /// not enforced. + /// + /// Map ++/// ```text + /// - child[0] entries: Struct + /// - child[0] key: K + /// - child[1] value: V +-/// ++/// ``` + /// Neither the "entries" field nor the "key" field may be nullable. + /// + /// The metadata is structured so that Arrow systems without special handling +@@ -129,7 +130,7 @@ enum UnionMode:short { Sparse, Dense } + /// A union is a complex type with children in Field + /// By default ids in the type vector refer to the offsets in the children + /// optionally typeIds provides an indirection between the child offset and the type id +-/// for each child typeIds[offset] is the id used in the type vector ++/// for each child `typeIds[offset]` is the id used in the type vector + table Union { + mode: UnionMode; + typeIds: [ int ]; // optional, describes typeid of each child. +diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs +index 3fe8a7582..a6fd2f9e7 100644 +--- a/format/SparseTensor.fbs ++++ b/format/SparseTensor.fbs +@@ -37,21 +37,21 @@ namespace org.apache.arrow.flatbuf; + /// + /// For example, let X be a 2x3x4x5 tensor, and it has the following + /// 6 non-zero values: +-/// ++/// ```text + /// X[0, 1, 2, 0] := 1 + /// X[1, 1, 2, 3] := 2 + /// X[0, 2, 1, 0] := 3 + /// X[0, 1, 3, 0] := 4 + /// X[0, 1, 2, 1] := 5 + /// X[1, 2, 0, 4] := 6 +-/// ++/// ``` + /// In COO format, the index matrix of X is the following 4x6 matrix: +-/// ++/// ```text + /// [[0, 0, 0, 0, 1, 1], + /// [1, 1, 1, 2, 1, 2], + /// [2, 2, 3, 1, 2, 0], + /// [0, 1, 0, 0, 3, 4]] +-/// ++/// ``` + /// When isCanonical is true, the indices is sorted in lexicographical order + /// (row-major order), and it does not have duplicated entries. Otherwise, + /// the indices may not be sorted, or may have duplicated entries. +@@ -86,26 +86,27 @@ table SparseMatrixIndexCSX { + + /// indptrBuffer stores the location and size of indptr array that + /// represents the range of the rows. +- /// The i-th row spans from indptr[i] to indptr[i+1] in the data. ++ /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. + /// The length of this array is 1 + (the number of rows), and the type + /// of index value is long. + /// + /// For example, let X be the following 6x4 matrix: +- /// ++ /// ```text + /// X := [[0, 1, 2, 0], + /// [0, 0, 3, 0], + /// [0, 4, 0, 5], + /// [0, 0, 0, 0], + /// [6, 0, 7, 8], + /// [0, 9, 0, 0]]. +- /// ++ /// ``` + /// The array of non-zero values in X is: +- /// ++ /// ```text + /// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +- /// ++ /// ``` + /// And the indptr of X is: +- /// ++ /// ```text + /// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. ++ /// ``` + indptrBuffer: Buffer (required); + + /// The type of values in indicesBuffer +@@ -116,9 +117,9 @@ table SparseMatrixIndexCSX { + /// The type of index value is long. + /// + /// For example, the indices of the above X is: +- /// ++ /// ```text + /// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +- /// ++ /// ``` + /// Note that the indices are sorted in lexicographical order for each row. + indicesBuffer: Buffer (required); + } +@@ -126,7 +127,7 @@ table SparseMatrixIndexCSX { + /// Compressed Sparse Fiber (CSF) sparse tensor index. + table SparseTensorIndexCSF { + /// CSF is a generalization of compressed sparse row (CSR) index. +- /// See [smith2017knl]: http://shaden.io/pub-files/smith2017knl.pdf ++ /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) + /// + /// CSF index recursively compresses each dimension of a tensor into a set + /// of prefix trees. Each path from a root to leaf forms one tensor +@@ -135,7 +136,7 @@ table SparseTensorIndexCSF { + /// + /// For example, let X be a 2x3x4x5 tensor and let it have the following + /// 8 non-zero values: +- /// ++ /// ```text + /// X[0, 0, 0, 1] := 1 + /// X[0, 0, 0, 2] := 2 + /// X[0, 1, 0, 0] := 3 +@@ -144,9 +145,9 @@ table SparseTensorIndexCSF { + /// X[1, 1, 1, 0] := 6 + /// X[1, 1, 1, 1] := 7 + /// X[1, 1, 1, 2] := 8 +- /// ++ /// ``` + /// As a prefix tree this would be represented as: +- /// ++ /// ```text + /// 0 1 + /// / \ | + /// 0 1 1 +@@ -154,24 +155,24 @@ table SparseTensorIndexCSF { + /// 0 0 1 1 + /// /| /| | /| | + /// 1 2 0 2 0 0 1 2 +- ++ /// ``` + /// The type of values in indptrBuffers + indptrType: Int (required); + + /// indptrBuffers stores the sparsity structure. + /// Each two consecutive dimensions in a tensor correspond to a buffer in +- /// indptrBuffers. A pair of consecutive values at indptrBuffers[dim][i] +- /// and indptrBuffers[dim][i + 1] signify a range of nodes in +- /// indicesBuffers[dim + 1] who are children of indicesBuffers[dim][i] node. ++ /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` ++ /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in ++ /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. + /// + /// For example, the indptrBuffers for the above X is: +- /// ++ /// ```text + /// indptrBuffer(X) = [ + /// [0, 2, 3], + /// [0, 1, 3, 4], + /// [0, 2, 4, 5, 8] + /// ]. +- /// ++ /// ``` + indptrBuffers: [Buffer] (required); + + /// The type of values in indicesBuffers +@@ -180,22 +181,22 @@ table SparseTensorIndexCSF { + /// indicesBuffers stores values of nodes. + /// Each tensor dimension corresponds to a buffer in indicesBuffers. + /// For example, the indicesBuffers for the above X is: +- /// ++ /// ```text + /// indicesBuffer(X) = [ + /// [0, 1], + /// [0, 1, 1], + /// [0, 0, 1, 1], + /// [1, 2, 0, 2, 0, 0, 1, 2] + /// ]. +- /// ++ /// ``` + indicesBuffers: [Buffer] (required); + + /// axisOrder stores the sequence in which dimensions were traversed to + /// produce the prefix tree. + /// For example, the axisOrder for the above X is: +- /// ++ /// ```text + /// axisOrder(X) = [0, 1, 2, 3]. +- /// ++ /// ``` + axisOrder: [int] (required); + } + +diff --git a/rust/arrow/src/ipc/gen/SparseTensor.rs b/rust/arrow/src/ipc/gen/SparseTensor.rs +index 532f73cb5..04a23398b 100644 +--- a/rust/arrow/src/ipc/gen/SparseTensor.rs ++++ b/rust/arrow/src/ipc/gen/SparseTensor.rs +@@ -235,21 +235,21 @@ pub enum SparseTensorIndexCOOOffset {} + /// + /// For example, let X be a 2x3x4x5 tensor, and it has the following + /// 6 non-zero values: +-/// ++/// ```text + /// X[0, 1, 2, 0] := 1 + /// X[1, 1, 2, 3] := 2 + /// X[0, 2, 1, 0] := 3 + /// X[0, 1, 3, 0] := 4 + /// X[0, 1, 2, 1] := 5 + /// X[1, 2, 0, 4] := 6 +-/// ++/// ``` + /// In COO format, the index matrix of X is the following 4x6 matrix: +-/// ++/// ```text + /// [[0, 0, 0, 0, 1, 1], + /// [1, 1, 1, 2, 1, 2], + /// [2, 2, 3, 1, 2, 0], + /// [0, 1, 0, 0, 3, 4]] +-/// ++/// ``` + /// When isCanonical is true, the indices is sorted in lexicographical order + /// (row-major order), and it does not have duplicated entries. Otherwise, + /// the indices may not be sorted, or may have duplicated entries. +@@ -523,7 +523,6 @@ impl<'a> SparseMatrixIndexCSX<'a> { + /// of index value is long. + /// + /// For example, let X be the following 6x4 matrix: +- /// + /// ```text + /// X := [[0, 1, 2, 0], + /// [0, 0, 3, 0], +@@ -560,7 +559,7 @@ impl<'a> SparseMatrixIndexCSX<'a> { + /// contains the column indices of the corresponding non-zero values. + /// The type of index value is long. + /// +- /// For example, the indices of the above X is ++ /// For example, the indices of the above X is: + /// ```text + /// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. + /// ``` +@@ -772,7 +771,6 @@ impl<'a> SparseTensorIndexCSF<'a> { + /// X[1, 1, 1, 2] := 8 + /// ``` + /// As a prefix tree this would be represented as: +- /// + /// ```text + /// 0 1 + /// / \ | diff --git a/rust/arrow/regen.sh b/rust/arrow/regen.sh index 723ff52dffb..1abfe79f7d1 100755 --- a/rust/arrow/regen.sh +++ b/rust/arrow/regen.sh @@ -54,6 +54,11 @@ echo "run: bazel build :flatc ..." bazel build :flatc popd +FB_PATCH="rust/arrow/format-0ed34c83.patch" +echo"Patch flatbuffer files with ${FB_PATCH} for cargo doc" +echo "NOTE: the patch MAY need update in case of changes in format/*.fbs" +git apply rust/arrow/format-5504ee4.patch + # Execute the code generation: $FLATC --filename-suffix "" --rust -o rust/arrow/src/ipc/gen/ format/*.fbs @@ -97,7 +102,6 @@ names=("File" "Message" "Schema" "SparseTensor" "Tensor") # Remove all generated lines we don't need for f in `ls *.rs`; do - if [[ $f == "mod.rs" ]]; then continue fi diff --git a/rust/arrow/src/ipc/gen/SparseTensor.rs b/rust/arrow/src/ipc/gen/SparseTensor.rs index 532f73cb5d1..04a23398bef 100644 --- a/rust/arrow/src/ipc/gen/SparseTensor.rs +++ b/rust/arrow/src/ipc/gen/SparseTensor.rs @@ -235,21 +235,21 @@ pub enum SparseTensorIndexCOOOffset {} /// /// For example, let X be a 2x3x4x5 tensor, and it has the following /// 6 non-zero values: -/// +/// ```text /// X[0, 1, 2, 0] := 1 /// X[1, 1, 2, 3] := 2 /// X[0, 2, 1, 0] := 3 /// X[0, 1, 3, 0] := 4 /// X[0, 1, 2, 1] := 5 /// X[1, 2, 0, 4] := 6 -/// +/// ``` /// In COO format, the index matrix of X is the following 4x6 matrix: -/// +/// ```text /// [[0, 0, 0, 0, 1, 1], /// [1, 1, 1, 2, 1, 2], /// [2, 2, 3, 1, 2, 0], /// [0, 1, 0, 0, 3, 4]] -/// +/// ``` /// When isCanonical is true, the indices is sorted in lexicographical order /// (row-major order), and it does not have duplicated entries. Otherwise, /// the indices may not be sorted, or may have duplicated entries. @@ -523,7 +523,6 @@ impl<'a> SparseMatrixIndexCSX<'a> { /// of index value is long. /// /// For example, let X be the following 6x4 matrix: - /// /// ```text /// X := [[0, 1, 2, 0], /// [0, 0, 3, 0], @@ -560,7 +559,7 @@ impl<'a> SparseMatrixIndexCSX<'a> { /// contains the column indices of the corresponding non-zero values. /// The type of index value is long. /// - /// For example, the indices of the above X is + /// For example, the indices of the above X is: /// ```text /// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. /// ``` @@ -772,7 +771,6 @@ impl<'a> SparseTensorIndexCSF<'a> { /// X[1, 1, 1, 2] := 8 /// ``` /// As a prefix tree this would be represented as: - /// /// ```text /// 0 1 /// / \ | From 1aed118b9b32eff38879af4c7ba497a05739c4c1 Mon Sep 17 00:00:00 2001 From: mqy Date: Fri, 8 Jan 2021 20:38:21 +0800 Subject: [PATCH 5/7] Add license to patch file, update regen.sh --- rust/arrow/format-0ed34c83.patch | 74 ++++++++------------------------ rust/arrow/regen.sh | 7 ++- 2 files changed, 23 insertions(+), 58 deletions(-) diff --git a/rust/arrow/format-0ed34c83.patch b/rust/arrow/format-0ed34c83.patch index 4cb026030f2..5da0a0c51f0 100644 --- a/rust/arrow/format-0ed34c83.patch +++ b/rust/arrow/format-0ed34c83.patch @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + diff --git a/format/Message.fbs b/format/Message.fbs index 1a7e0dfff..f1c18d765 100644 --- a/format/Message.fbs @@ -200,59 +217,4 @@ index 3fe8a7582..a6fd2f9e7 100644 + /// ``` axisOrder: [int] (required); } - -diff --git a/rust/arrow/src/ipc/gen/SparseTensor.rs b/rust/arrow/src/ipc/gen/SparseTensor.rs -index 532f73cb5..04a23398b 100644 ---- a/rust/arrow/src/ipc/gen/SparseTensor.rs -+++ b/rust/arrow/src/ipc/gen/SparseTensor.rs -@@ -235,21 +235,21 @@ pub enum SparseTensorIndexCOOOffset {} - /// - /// For example, let X be a 2x3x4x5 tensor, and it has the following - /// 6 non-zero values: --/// -+/// ```text - /// X[0, 1, 2, 0] := 1 - /// X[1, 1, 2, 3] := 2 - /// X[0, 2, 1, 0] := 3 - /// X[0, 1, 3, 0] := 4 - /// X[0, 1, 2, 1] := 5 - /// X[1, 2, 0, 4] := 6 --/// -+/// ``` - /// In COO format, the index matrix of X is the following 4x6 matrix: --/// -+/// ```text - /// [[0, 0, 0, 0, 1, 1], - /// [1, 1, 1, 2, 1, 2], - /// [2, 2, 3, 1, 2, 0], - /// [0, 1, 0, 0, 3, 4]] --/// -+/// ``` - /// When isCanonical is true, the indices is sorted in lexicographical order - /// (row-major order), and it does not have duplicated entries. Otherwise, - /// the indices may not be sorted, or may have duplicated entries. -@@ -523,7 +523,6 @@ impl<'a> SparseMatrixIndexCSX<'a> { - /// of index value is long. - /// - /// For example, let X be the following 6x4 matrix: -- /// - /// ```text - /// X := [[0, 1, 2, 0], - /// [0, 0, 3, 0], -@@ -560,7 +559,7 @@ impl<'a> SparseMatrixIndexCSX<'a> { - /// contains the column indices of the corresponding non-zero values. - /// The type of index value is long. - /// -- /// For example, the indices of the above X is -+ /// For example, the indices of the above X is: - /// ```text - /// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. - /// ``` -@@ -772,7 +771,6 @@ impl<'a> SparseTensorIndexCSF<'a> { - /// X[1, 1, 1, 2] := 8 - /// ``` - /// As a prefix tree this would be represented as: -- /// - /// ```text - /// 0 1 - /// / \ | + diff --git a/rust/arrow/regen.sh b/rust/arrow/regen.sh index 1abfe79f7d1..e96f11e3800 100755 --- a/rust/arrow/regen.sh +++ b/rust/arrow/regen.sh @@ -55,13 +55,16 @@ bazel build :flatc popd FB_PATCH="rust/arrow/format-0ed34c83.patch" -echo"Patch flatbuffer files with ${FB_PATCH} for cargo doc" +echo "Patch flatbuffer files with ${FB_PATCH} for cargo doc" echo "NOTE: the patch MAY need update in case of changes in format/*.fbs" -git apply rust/arrow/format-5504ee4.patch +git apply --check ${FB_PATCH} && git apply ${FB_PATCH} # Execute the code generation: $FLATC --filename-suffix "" --rust -o rust/arrow/src/ipc/gen/ format/*.fbs +# Reset changes to format/ +git checkout -- format + # Now the files are wrongly named so we have to change that. popd pushd $DIR/src/ipc/gen From b538ae8564270b410e43043c8240b93df6276216 Mon Sep 17 00:00:00 2001 From: mqy Date: Fri, 8 Jan 2021 21:32:56 +0800 Subject: [PATCH 6/7] Fix links in rust/parquet/ --- rust/parquet/src/arrow/levels.rs | 2 +- rust/parquet/src/encodings/rle.rs | 2 +- rust/parquet/src/record/reader.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/parquet/src/arrow/levels.rs b/rust/parquet/src/arrow/levels.rs index 32617a15680..846ceabc03d 100644 --- a/rust/parquet/src/arrow/levels.rs +++ b/rust/parquet/src/arrow/levels.rs @@ -37,7 +37,7 @@ //! We use an eager approach that increments definition levels where incrementable, and decrements //! if a value being checked is null. //! -//! \[1\] [parquet-format#nested-encoding] +//! \[1\] [parquet-format#nested-encoding](https://github.com/apache/parquet-format#nested-encoding) use arrow::array::{Array, ArrayRef, StructArray}; use arrow::datatypes::{DataType, Field}; diff --git a/rust/parquet/src/encodings/rle.rs b/rust/parquet/src/encodings/rle.rs index 5a522017c59..d8cd50d3b91 100644 --- a/rust/parquet/src/encodings/rle.rs +++ b/rust/parquet/src/encodings/rle.rs @@ -25,7 +25,7 @@ use crate::util::{ /// Rle/Bit-Packing Hybrid Encoding /// The grammar for this encoding looks like the following (copied verbatim -/// from https://github.com/Parquet/parquet-format/blob/master/Encodings.md): +/// from ): /// /// rle-bit-packed-hybrid: /// length := length of the in bytes stored as 4 bytes little endian diff --git a/rust/parquet/src/record/reader.rs b/rust/parquet/src/record/reader.rs index 882187cb38e..0b02bc8ed46 100644 --- a/rust/parquet/src/record/reader.rs +++ b/rust/parquet/src/record/reader.rs @@ -346,7 +346,7 @@ impl Reader { /// Returns true if repeated type is an element type for the list. /// Used to determine legacy list types. /// This method is copied from Spark Parquet reader and is based on the reference: - /// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md + /// /// #backward-compatibility-rules fn is_element_type(repeated_type: &Type) -> bool { // For legacy 2-level list types with primitive element type, e.g.: From 8cf807f8ba1e4a2f769dad8a1b1f63dc725eb34e Mon Sep 17 00:00:00 2001 From: mqy Date: Sat, 9 Jan 2021 03:48:41 +0800 Subject: [PATCH 7/7] Add update note for parquet-format --- rust/parquet/Cargo.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/parquet/Cargo.toml b/rust/parquet/Cargo.toml index 72b0e9da332..6529a85d7eb 100644 --- a/rust/parquet/Cargo.toml +++ b/rust/parquet/Cargo.toml @@ -29,6 +29,8 @@ build = "build.rs" edition = "2018" [dependencies] +# update note: pin `parquet-format` to specific version until it does not break at minor +# version, see ARROW-11187. parquet-format = "~2.6.1" byteorder = "1" thrift = "0.13"