diff --git a/.env b/.env index 5c6aaa5182a..c06a7f9f128 100644 --- a/.env +++ b/.env @@ -28,7 +28,7 @@ FEDORA=33 PYTHON=3.6 LLVM=11 CLANG_TOOLS=8 -RUST=nightly-2020-04-22 +RUST=nightly-2020-11-14 GO=1.12 NODE=14 MAVEN=3.5.4 diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index b4f68ece02b..d99641e7a0c 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -50,7 +50,7 @@ jobs: strategy: fail-fast: false matrix: - rust: [nightly-2020-04-22] + rust: [nightly-2020-11-14] env: RUST: ${{ matrix.rust }} steps: @@ -96,7 +96,7 @@ jobs: strategy: fail-fast: false matrix: - rust: [nightly-2020-04-22] + rust: [nightly-2020-11-14] steps: - name: Install Rust uses: actions-rs/toolchain@v1 @@ -134,7 +134,7 @@ jobs: strategy: fail-fast: false matrix: - rust: [nightly-2020-04-22] + rust: [nightly-2020-11-14] steps: - name: Install Rust uses: actions-rs/toolchain@v1 diff --git a/.github/workflows/rust_cron.yml b/.github/workflows/rust_cron.yml index 378f2dd1081..3f605f11210 100644 --- a/.github/workflows/rust_cron.yml +++ b/.github/workflows/rust_cron.yml @@ -35,7 +35,7 @@ jobs: strategy: fail-fast: false matrix: - rust: [nightly-2020-04-22] + rust: [nightly-2020-11-14] env: RUST: ${{ matrix.rust }} steps: diff --git a/ci/docker/debian-10-rust.dockerfile b/ci/docker/debian-10-rust.dockerfile index 9c9c9b51048..b074eb1286f 100644 --- a/ci/docker/debian-10-rust.dockerfile +++ b/ci/docker/debian-10-rust.dockerfile @@ -33,7 +33,7 @@ RUN wget -q -O - https://github.com/google/flatbuffers/archive/v${flatbuffers}.t cd / && \ rm -rf flatbuffers-${flatbuffers} -ARG rust=nightly-2020-04-22 +ARG rust=nightly-2020-11-14 # freeze the version for deterministic builds RUN rustup default ${rust} && \ diff --git a/ci/scripts/rust_coverage.sh b/ci/scripts/rust_coverage.sh index fbe5b0d853a..a8c1c362594 100755 --- a/ci/scripts/rust_coverage.sh +++ b/ci/scripts/rust_coverage.sh @@ -32,7 +32,9 @@ pushd ${source_dir} rustup default ${rust} rustup component add rustfmt --toolchain ${rust}-x86_64-unknown-linux-gnu -cargo install cargo-tarpaulin +# 2020-11-15: There is a cargo-tarpaulin regression in 0.17.0 +# see https://github.com/xd009642/tarpaulin/issues/618 +cargo install --version 0.16.0 cargo-tarpaulin cargo tarpaulin --out Xml diff --git a/rust/arrow/benches/array_from_vec.rs b/rust/arrow/benches/array_from_vec.rs index 41900b8831a..6135bcdaab9 100644 --- a/rust/arrow/benches/array_from_vec.rs +++ b/rust/arrow/benches/array_from_vec.rs @@ -76,8 +76,7 @@ fn struct_array_from_vec( let ints: ArrayRef = Arc::new(Int32Array::from(ints.clone())); criterion::black_box( - StructArray::try_from(vec![(field1.clone(), strings), (field2.clone(), ints)]) - .unwrap(), + StructArray::try_from(vec![(field1, strings), (field2, ints)]).unwrap(), ); } diff --git a/rust/arrow/benches/equal.rs b/rust/arrow/benches/equal.rs index 9d9c68abbb6..6783662e186 100644 --- a/rust/arrow/benches/equal.rs +++ b/rust/arrow/benches/equal.rs @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +// Allowed because we use `arr == arr` in benchmarks +#![allow(clippy::eq_op)] + #[macro_use] extern crate criterion; use criterion::Criterion; diff --git a/rust/arrow/benches/filter_kernels.rs b/rust/arrow/benches/filter_kernels.rs index 75c04352c0a..1348238b074 100644 --- a/rust/arrow/benches/filter_kernels.rs +++ b/rust/arrow/benches/filter_kernels.rs @@ -81,18 +81,9 @@ fn bench_filter_context_f32(data_array: &Float32Array, filter_context: &FilterCo fn add_benchmark(c: &mut Criterion) { let size = 65536; - let filter_array = create_bool_array(size, |i| match i % 2 { - 0 => true, - _ => false, - }); - let sparse_filter_array = create_bool_array(size, |i| match i % 8000 { - 0 => true, - _ => false, - }); - let dense_filter_array = create_bool_array(size, |i| match i % 8000 { - 0 => false, - _ => true, - }); + let filter_array = create_bool_array(size, |i| matches!(i % 2, 0)); + let sparse_filter_array = create_bool_array(size, |i| matches!(i % 8000, 0)); + let dense_filter_array = create_bool_array(size, |i| !matches!(i % 8000, 0)); let filter_context = FilterContext::new(&filter_array).unwrap(); let sparse_filter_context = FilterContext::new(&sparse_filter_array).unwrap(); diff --git a/rust/arrow/examples/dynamic_types.rs b/rust/arrow/examples/dynamic_types.rs index cd4d30e79c5..95e0a2831e3 100644 --- a/rust/arrow/examples/dynamic_types.rs +++ b/rust/arrow/examples/dynamic_types.rs @@ -62,7 +62,8 @@ fn main() -> Result<()> { let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id), Arc::new(nested)])?; - Ok(process(&batch)) + process(&batch); + Ok(()) } /// Create a new batch by performing a projection of id, nested.c diff --git a/rust/arrow/src/array/array_string.rs b/rust/arrow/src/array/array_string.rs index ec79ac9010d..5f871b8f595 100644 --- a/rust/arrow/src/array/array_string.rs +++ b/rust/arrow/src/array/array_string.rs @@ -434,10 +434,8 @@ mod tests { // from Iterator> let array2: StringArray = data.clone().into_iter().collect(); // from Iterator> - let array3: StringArray = data - .into_iter() - .map(|x| x.map(|s| format!("{}", s))) - .collect(); + let array3: StringArray = + data.into_iter().map(|x| x.map(|s| s.to_string())).collect(); assert_eq!(array1, array2); assert_eq!(array2, array3); diff --git a/rust/arrow/src/array/array_struct.rs b/rust/arrow/src/array/array_struct.rs index 3715a8b1501..8565a3a5194 100644 --- a/rust/arrow/src/array/array_struct.rs +++ b/rust/arrow/src/array/array_struct.rs @@ -408,7 +408,7 @@ mod tests { Err(ArrowError::InvalidArgumentError(e)) => { assert!(e.starts_with("Array of field \"f2\" has length 4, but previous elements have length 3.")); } - _ => assert!(false, "This test got an unexpected error type"), + _ => panic!("This test got an unexpected error type"), }; } diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs index d3b2f9890a5..ac8c40d9a75 100644 --- a/rust/arrow/src/array/builder.rs +++ b/rust/arrow/src/array/builder.rs @@ -188,7 +188,7 @@ pub trait BufferBuilderTrait { /// /// assert!(builder.capacity() >= 20); /// ``` - fn reserve(&mut self, n: usize) -> (); + fn reserve(&mut self, n: usize); /// Appends a value of type `T` into the builder, /// growing the internal buffer as needed. @@ -370,7 +370,8 @@ impl BufferBuilderTrait for BufferBuilder { } Ok(()) } else { - Ok(self.write_bytes(slice.to_byte_slice(), array_slots)) + self.write_bytes(slice.to_byte_slice(), array_slots); + Ok(()) } } diff --git a/rust/arrow/src/array/data.rs b/rust/arrow/src/array/data.rs index 0aec4e614d9..a75d38ccfd4 100644 --- a/rust/arrow/src/array/data.rs +++ b/rust/arrow/src/array/data.rs @@ -237,7 +237,7 @@ impl ArrayData { #[inline] pub(super) fn buffer(&self, buffer: usize) -> &[T] { let values = unsafe { self.buffers[buffer].data().align_to::() }; - if values.0.len() != 0 || values.2.len() != 0 { + if !values.0.is_empty() || !values.2.is_empty() { panic!("The buffer is not byte-aligned with its interpretation") }; assert_ne!(self.data_type, DataType::Boolean); diff --git a/rust/arrow/src/array/equal/list.rs b/rust/arrow/src/array/equal/list.rs index 7e81a342443..6a9305edc11 100644 --- a/rust/arrow/src/array/equal/list.rs +++ b/rust/arrow/src/array/equal/list.rs @@ -23,7 +23,7 @@ fn lengths_equal(lhs: &[T], rhs: &[T]) -> bool { // invariant from `base_equal` debug_assert_eq!(lhs.len(), rhs.len()); - if lhs.len() == 0 { + if lhs.is_empty() { return true; } diff --git a/rust/arrow/src/array/equal/mod.rs b/rust/arrow/src/array/equal/mod.rs index ab063754051..4900c8e74e3 100644 --- a/rust/arrow/src/array/equal/mod.rs +++ b/rust/arrow/src/array/equal/mod.rs @@ -398,7 +398,7 @@ mod tests { false, ), (base.clone(), base.clone(), true), - (base.clone(), not_base.clone(), false), + (base, not_base, false), ] } diff --git a/rust/arrow/src/array/equal_json.rs b/rust/arrow/src/array/equal_json.rs index cdc9fc4122e..3338138f881 100644 --- a/rust/arrow/src/array/equal_json.rs +++ b/rust/arrow/src/array/equal_json.rs @@ -162,10 +162,7 @@ impl JsonEqual for StructArray { return false; } - let all_object = json.iter().all(|v| match v { - Object(_) | JNull => true, - _ => false, - }); + let all_object = json.iter().all(|v| matches!(v, Object(_) | JNull)); if !all_object { return false; diff --git a/rust/arrow/src/array/iterator.rs b/rust/arrow/src/array/iterator.rs index d93f86d9614..8b50b3de062 100644 --- a/rust/arrow/src/array/iterator.rs +++ b/rust/arrow/src/array/iterator.rs @@ -193,8 +193,7 @@ mod tests { let array = array.as_any().downcast_ref::().unwrap(); // to and from iter, with a +1 - let result: Int32Array = - array.iter().map(|e| e.and_then(|e| Some(e + 1))).collect(); + let result: Int32Array = array.iter().map(|e| e.map(|e| e + 1)).collect(); let expected = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]); assert_eq!(result, expected); diff --git a/rust/arrow/src/compute/kernels/aggregate.rs b/rust/arrow/src/compute/kernels/aggregate.rs index 444e2454a1c..503557927fd 100644 --- a/rust/arrow/src/compute/kernels/aggregate.rs +++ b/rust/arrow/src/compute/kernels/aggregate.rs @@ -153,7 +153,7 @@ where let remainder = data_chunks.remainder(); let bit_chunks = buffer.bit_chunks(array.offset(), array.len()); - &data_chunks + data_chunks .zip(bit_chunks.iter()) .for_each(|(chunk, mask)| { chunk.iter().enumerate().for_each(|(i, value)| { diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs index f054542b079..443ba45d3f2 100644 --- a/rust/arrow/src/compute/kernels/cast.rs +++ b/rust/arrow/src/compute/kernels/cast.rs @@ -191,11 +191,9 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { (Time32(_), Time64(_)) => true, (Time64(TimeUnit::Microsecond), Time64(TimeUnit::Nanosecond)) => true, (Time64(TimeUnit::Nanosecond), Time64(TimeUnit::Microsecond)) => true, - (Time64(_), Time32(to_unit)) => match to_unit { - TimeUnit::Second => true, - TimeUnit::Millisecond => true, - _ => false, - }, + (Time64(_), Time32(to_unit)) => { + matches!(to_unit, TimeUnit::Second | TimeUnit::Millisecond) + } (Timestamp(_, _), Int64) => true, (Int64, Timestamp(_, _)) => true, (Timestamp(_, _), Timestamp(_, _)) => true, @@ -2857,10 +2855,10 @@ mod tests { // Construct a list array from the above two let list_data_type = DataType::List(Box::new(Field::new("item", DataType::Int32, true))); - let list_data = ArrayData::builder(list_data_type.clone()) + let list_data = ArrayData::builder(list_data_type) .len(3) - .add_buffer(value_offsets.clone()) - .add_child_data(value_data.clone()) + .add_buffer(value_offsets) + .add_child_data(value_data) .build(); ListArray::from(list_data) } @@ -2879,10 +2877,10 @@ mod tests { // Construct a list array from the above two let list_data_type = DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true))); - let list_data = ArrayData::builder(list_data_type.clone()) + let list_data = ArrayData::builder(list_data_type) .len(3) - .add_buffer(value_offsets.clone()) - .add_child_data(value_data.clone()) + .add_buffer(value_offsets) + .add_child_data(value_data) .build(); LargeListArray::from(list_data) } @@ -2903,7 +2901,7 @@ mod tests { ); let list_data = ArrayData::builder(list_data_type) .len(5) - .add_child_data(value_data.clone()) + .add_child_data(value_data) .build(); FixedSizeListArray::from(list_data) } @@ -2973,7 +2971,7 @@ mod tests { Timestamp(TimeUnit::Second, Some(tz_name.clone())), Timestamp(TimeUnit::Millisecond, Some(tz_name.clone())), Timestamp(TimeUnit::Microsecond, Some(tz_name.clone())), - Timestamp(TimeUnit::Nanosecond, Some(tz_name.clone())), + Timestamp(TimeUnit::Nanosecond, Some(tz_name)), Date32(DateUnit::Day), Date64(DateUnit::Day), Date32(DateUnit::Millisecond), diff --git a/rust/arrow/src/compute/kernels/length.rs b/rust/arrow/src/compute/kernels/length.rs index bf57f70fa35..f0f8bf160b7 100644 --- a/rust/arrow/src/compute/kernels/length.rs +++ b/rust/arrow/src/compute/kernels/length.rs @@ -102,36 +102,30 @@ mod tests { #[test] fn test_string() -> Result<()> { - cases() - .into_iter() - .map(|(input, len, expected)| { - let array = StringArray::from(input); - let result = length(&array)?; - assert_eq!(len, result.len()); - let result = result.as_any().downcast_ref::().unwrap(); - expected.iter().enumerate().for_each(|(i, value)| { - assert_eq!(*value, result.value(i)); - }); - Ok(()) - }) - .collect::>() + cases().into_iter().try_for_each(|(input, len, expected)| { + let array = StringArray::from(input); + let result = length(&array)?; + assert_eq!(len, result.len()); + let result = result.as_any().downcast_ref::().unwrap(); + expected.iter().enumerate().for_each(|(i, value)| { + assert_eq!(*value, result.value(i)); + }); + Ok(()) + }) } #[test] fn test_large_string() -> Result<()> { - cases() - .into_iter() - .map(|(input, len, expected)| { - let array = LargeStringArray::from(input); - let result = length(&array)?; - assert_eq!(len, result.len()); - let result = result.as_any().downcast_ref::().unwrap(); - expected.iter().enumerate().for_each(|(i, value)| { - assert_eq!(*value as i64, result.value(i)); - }); - Ok(()) - }) - .collect::>() + cases().into_iter().try_for_each(|(input, len, expected)| { + let array = LargeStringArray::from(input); + let result = length(&array)?; + assert_eq!(len, result.len()); + let result = result.as_any().downcast_ref::().unwrap(); + expected.iter().enumerate().for_each(|(i, value)| { + assert_eq!(*value as i64, result.value(i)); + }); + Ok(()) + }) } fn null_cases() -> Vec<(Vec>, usize, Vec>)> { @@ -146,7 +140,7 @@ mod tests { fn null_string() -> Result<()> { null_cases() .into_iter() - .map(|(input, len, expected)| { + .try_for_each(|(input, len, expected)| { let array = StringArray::from(input); let result = length(&array)?; assert_eq!(len, result.len()); @@ -156,14 +150,13 @@ mod tests { assert_eq!(expected.data(), result.data()); Ok(()) }) - .collect::>() } #[test] fn null_large_string() -> Result<()> { null_cases() .into_iter() - .map(|(input, len, expected)| { + .try_for_each(|(input, len, expected)| { let array = LargeStringArray::from(input); let result = length(&array)?; assert_eq!(len, result.len()); @@ -178,7 +171,6 @@ mod tests { assert_eq!(expected.data(), result.data()); Ok(()) }) - .collect::>() } /// Tests that length is not valid for u64. diff --git a/rust/arrow/src/compute/kernels/sort.rs b/rust/arrow/src/compute/kernels/sort.rs index eb9e56e54fa..f0560ca1e14 100644 --- a/rust/arrow/src/compute/kernels/sort.rs +++ b/rust/arrow/src/compute/kernels/sort.rs @@ -468,7 +468,7 @@ pub fn lexsort(columns: &[SortColumn]) -> Result> { /// Sort elements lexicographically from a list of `ArrayRef` into an unsigned integer /// (`UInt32Array`) of indices. pub fn lexsort_to_indices(columns: &[SortColumn]) -> Result { - if columns.len() == 0 { + if columns.is_empty() { return Err(ArrowError::InvalidArgumentError( "Sort requires at least one column".to_string(), )); diff --git a/rust/arrow/src/compute/kernels/take.rs b/rust/arrow/src/compute/kernels/take.rs index 9c9ca56fec3..0d999e34128 100644 --- a/rust/arrow/src/compute/kernels/take.rs +++ b/rust/arrow/src/compute/kernels/take.rs @@ -1008,7 +1008,7 @@ mod tests { // Construct a list array from the above two let list_data_type = DataType::List(Box::new(Field::new("item", DataType::Int32, false))); - let list_data = ArrayData::builder(list_data_type.clone()) + let list_data = ArrayData::builder(list_data_type) .len(3) .add_buffer(value_offsets) .add_child_data(value_data) diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs index 3b578011640..ba7de77f6b0 100644 --- a/rust/arrow/src/compute/util.rs +++ b/rust/arrow/src/compute/util.rs @@ -314,8 +314,8 @@ mod tests { .add_buffer(value_offsets) .add_child_data(value_data) .build(); - let array = Arc::new(GenericListArray::::from(list_data)) as ArrayRef; - array + + Arc::new(GenericListArray::::from(list_data)) } #[test] diff --git a/rust/arrow/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs index 4f926f9a98f..9ed2d1fd4bc 100644 --- a/rust/arrow/src/csv/reader.rs +++ b/rust/arrow/src/csv/reader.rs @@ -140,7 +140,7 @@ fn infer_file_schema( for i in 0..header_length { if let Some(string) = record.get(i) { - if string == "" { + if string.is_empty() { nulls[i] = true; } else { column_types[i].insert(infer_field_schema(string)); @@ -329,7 +329,7 @@ impl Reader { schema, projection, record_iter, - line_number: if has_header { start + 1 } else { start + 0 }, + line_number: if has_header { start + 1 } else { start }, } } } @@ -372,7 +372,7 @@ impl Iterator for Reader { /// parses a slice of [csv_crate::StringRecord] into a [array::record_batch::RecordBatch]. fn parse( rows: &[StringRecord], - fields: &Vec, + fields: &[Field], projection: &Option>, line_number: usize, ) -> Result { diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index d6dd7a544cd..630b14dfc1e 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -1128,11 +1128,19 @@ impl DataType { /// Returns true if this type is numeric: (UInt*, Unit*, or Float*) pub fn is_numeric(t: &DataType) -> bool { use DataType::*; - match t { - UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float32 - | Float64 => true, - _ => false, - } + matches!( + t, + UInt8 + | UInt16 + | UInt32 + | UInt64 + | Int8 + | Int16 + | Int32 + | Int64 + | Float32 + | Float64 + ) } } @@ -2586,7 +2594,7 @@ mod tests { assert_eq!(Some(VNumber(Number::from(1))), 1u32.into_json_value()); assert_eq!(Some(VNumber(Number::from(1))), 1u64.into_json_value()); assert_eq!( - Some(VNumber(Number::from_f64(0.01 as f64).unwrap())), + Some(VNumber(Number::from_f64(0.01f64).unwrap())), 0.01.into_json_value() ); assert_eq!( diff --git a/rust/arrow/src/json/reader.rs b/rust/arrow/src/json/reader.rs index 7a414db8795..a3368cdd65e 100644 --- a/rust/arrow/src/json/reader.rs +++ b/rust/arrow/src/json/reader.rs @@ -263,125 +263,122 @@ pub fn infer_json_schema( match record { Value::Object(map) => { - let res = map - .iter() - .map(|(k, v)| { - match v { - Value::Array(a) => { - // collect the data types in array - let types: Result>> = a - .iter() - .map(|a| match a { - Value::Null => Ok(None), - Value::Number(n) => { - if n.is_i64() { - Ok(Some(&DataType::Int64)) - } else { - Ok(Some(&DataType::Float64)) - } - } - Value::Bool(_) => Ok(Some(&DataType::Boolean)), - Value::String(_) => Ok(Some(&DataType::Utf8)), - Value::Array(_) | Value::Object(_) => { - Err(ArrowError::JsonError( - "Nested lists and structs not supported" - .to_string(), - )) + let res = map.iter().try_for_each(|(k, v)| { + match v { + Value::Array(a) => { + // collect the data types in array + let types: Result>> = a + .iter() + .map(|a| match a { + Value::Null => Ok(None), + Value::Number(n) => { + if n.is_i64() { + Ok(Some(&DataType::Int64)) + } else { + Ok(Some(&DataType::Float64)) } - }) - .collect(); - match types { - Ok(types) => { - // unwrap the Option and discard None values (from - // JSON nulls) - let mut types: Vec<&DataType> = - types.into_iter().filter_map(|t| t).collect(); - types.dedup(); - // if a record contains only nulls, it is not - // added to values - if !types.is_empty() { - let dt = coerce_data_type(types)?; - - if values.contains_key(k) { - let x = values.get_mut(k).unwrap(); - x.insert(DataType::List(Box::new( - Field::new("item", dt, true), - ))); - } else { - // create hashset and add value type - let mut hs = HashSet::new(); - hs.insert(DataType::List(Box::new( - Field::new("item", dt, true), - ))); - values.insert(k.to_string(), hs); - } + } + Value::Bool(_) => Ok(Some(&DataType::Boolean)), + Value::String(_) => Ok(Some(&DataType::Utf8)), + Value::Array(_) | Value::Object(_) => { + Err(ArrowError::JsonError( + "Nested lists and structs not supported" + .to_string(), + )) + } + }) + .collect(); + match types { + Ok(types) => { + // unwrap the Option and discard None values (from + // JSON nulls) + let mut types: Vec<&DataType> = + types.into_iter().filter_map(|t| t).collect(); + types.dedup(); + // if a record contains only nulls, it is not + // added to values + if !types.is_empty() { + let dt = coerce_data_type(types)?; + + if values.contains_key(k) { + let x = values.get_mut(k).unwrap(); + x.insert(DataType::List(Box::new( + Field::new("item", dt, true), + ))); + } else { + // create hashset and add value type + let mut hs = HashSet::new(); + hs.insert(DataType::List(Box::new( + Field::new("item", dt, true), + ))); + values.insert(k.to_string(), hs); } - Ok(()) } - Err(e) => Err(e), + Ok(()) } + Err(e) => Err(e), + } + } + Value::Bool(_) => { + if values.contains_key(k) { + let x = values.get_mut(k).unwrap(); + x.insert(DataType::Boolean); + } else { + // create hashset and add value type + let mut hs = HashSet::new(); + hs.insert(DataType::Boolean); + values.insert(k.to_string(), hs); } - Value::Bool(_) => { + Ok(()) + } + Value::Null => { + // do nothing, we treat json as nullable by default when + // inferring + Ok(()) + } + Value::Number(n) => { + if n.is_f64() { if values.contains_key(k) { let x = values.get_mut(k).unwrap(); - x.insert(DataType::Boolean); + x.insert(DataType::Float64); } else { // create hashset and add value type let mut hs = HashSet::new(); - hs.insert(DataType::Boolean); + hs.insert(DataType::Float64); values.insert(k.to_string(), hs); } - Ok(()) - } - Value::Null => { - // do nothing, we treat json as nullable by default when - // inferring - Ok(()) - } - Value::Number(n) => { - if n.is_f64() { - if values.contains_key(k) { - let x = values.get_mut(k).unwrap(); - x.insert(DataType::Float64); - } else { - // create hashset and add value type - let mut hs = HashSet::new(); - hs.insert(DataType::Float64); - values.insert(k.to_string(), hs); - } - } else { - // default to i64 - if values.contains_key(k) { - let x = values.get_mut(k).unwrap(); - x.insert(DataType::Int64); - } else { - // create hashset and add value type - let mut hs = HashSet::new(); - hs.insert(DataType::Int64); - values.insert(k.to_string(), hs); - } - } - Ok(()) - } - Value::String(_) => { + } else { + // default to i64 if values.contains_key(k) { let x = values.get_mut(k).unwrap(); - x.insert(DataType::Utf8); + x.insert(DataType::Int64); } else { // create hashset and add value type let mut hs = HashSet::new(); - hs.insert(DataType::Utf8); + hs.insert(DataType::Int64); values.insert(k.to_string(), hs); } - Ok(()) } - Value::Object(_) => Err(ArrowError::JsonError( - "Reading nested JSON structs currently not supported" - .to_string(), - )), + Ok(()) } - }) - .collect(); + Value::String(_) => { + if values.contains_key(k) { + let x = values.get_mut(k).unwrap(); + x.insert(DataType::Utf8); + } else { + // create hashset and add value type + let mut hs = HashSet::new(); + hs.insert(DataType::Utf8); + values.insert(k.to_string(), hs); + } + Ok(()) + } + Value::Object(_) => Err(ArrowError::JsonError( + "Reading nested JSON structs currently not supported" + .to_string(), + )), + } + }); match res { Ok(()) => {} Err(e) => return Err(e), diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs index 09c04be4b2c..092ed5efe7f 100644 --- a/rust/arrow/src/lib.rs +++ b/rust/arrow/src/lib.rs @@ -129,6 +129,16 @@ #![allow(bare_trait_objects)] #![warn(missing_debug_implementations)] #![deny(clippy::redundant_clone)] +// introduced to ignore lint errors when upgrading from 2020-04-22 to 2020-11-14 +#![allow( + clippy::bind_instead_of_map, + clippy::float_equality_without_abs, + clippy::match_like_matches_macro, + clippy::needless_lifetimes, + clippy::needless_range_loop, + clippy::or_fun_call, + clippy::type_complexity +)] pub mod array; pub mod bitmap; diff --git a/rust/arrow/src/util/buffered_iterator.rs b/rust/arrow/src/util/buffered_iterator.rs index 059b82424a8..5d42ee43e66 100644 --- a/rust/arrow/src/util/buffered_iterator.rs +++ b/rust/arrow/src/util/buffered_iterator.rs @@ -54,7 +54,7 @@ where /// Useful to extract the exact item where an error occurred #[inline] pub fn n(&self) -> usize { - return self.buffer.len(); + self.buffer.len() } } diff --git a/rust/arrow/src/util/pretty.rs b/rust/arrow/src/util/pretty.rs index 7eacba3c1b5..4896d1a2629 100644 --- a/rust/arrow/src/util/pretty.rs +++ b/rust/arrow/src/util/pretty.rs @@ -140,7 +140,7 @@ mod tests { builder.append("three")?; let array = Arc::new(builder.finish()); - let batch = RecordBatch::try_new(schema.clone(), vec![array])?; + let batch = RecordBatch::try_new(schema, vec![array])?; let table = pretty_format_batches(&[batch])?; diff --git a/rust/datafusion/src/lib.rs b/rust/datafusion/src/lib.rs index 45a73eb74b8..e90086ee22e 100644 --- a/rust/datafusion/src/lib.rs +++ b/rust/datafusion/src/lib.rs @@ -16,6 +16,43 @@ // under the License. #![warn(missing_docs)] +// Clippy lints, some should be disabled incrementally +#![allow( + clippy::assertions_on_constants, + clippy::bind_instead_of_map, + clippy::blocks_in_if_conditions, + clippy::clone_on_copy, + clippy::collapsible_if, + clippy::explicit_counter_loop, + clippy::field_reassign_with_default, + clippy::float_cmp, + clippy::into_iter_on_ref, + clippy::len_zero, + clippy::let_and_return, + clippy::map_clone, + clippy::map_collect_result_unit, + clippy::match_like_matches_macro, + clippy::match_ref_pats, + clippy::module_inception, + clippy::needless_lifetimes, + clippy::needless_range_loop, + clippy::needless_return, + clippy::new_without_default, + clippy::or_fun_call, + clippy::ptr_arg, + clippy::redundant_clone, + clippy::redundant_field_names, + clippy::redundant_static_lifetimes, + clippy::redundant_pattern_matching, + clippy::redundant_closure, + clippy::single_match, + clippy::stable_sort_primitive, + clippy::type_complexity, + clippy::unit_arg, + clippy::unnecessary_unwrap, + clippy::useless_format, + clippy::zero_prefixed_literal +)] //! DataFusion is an extensible query execution framework that uses //! [Apache Arrow](https://arrow.apache.org) as its in-memory format. diff --git a/rust/integration-testing/src/bin/arrow-file-to-stream.rs b/rust/integration-testing/src/bin/arrow-file-to-stream.rs index ded1972e40c..e50afb62466 100644 --- a/rust/integration-testing/src/bin/arrow-file-to-stream.rs +++ b/rust/integration-testing/src/bin/arrow-file-to-stream.rs @@ -32,17 +32,15 @@ fn main() -> Result<()> { let f = File::open(filename)?; let reader = BufReader::new(f); - let reader = FileReader::try_new(reader)?; + let mut reader = FileReader::try_new(reader)?; let schema = reader.schema(); let mut writer = StreamWriter::try_new(io::stdout(), &schema)?; - reader - .map(|batch| { - let batch = batch?; - writer.write(&batch) - }) - .collect::>()?; + reader.try_for_each(|batch| { + let batch = batch?; + writer.write(&batch) + })?; writer.finish()?; eprintln!("Completed without error"); diff --git a/rust/integration-testing/src/bin/arrow-stream-to-file.rs b/rust/integration-testing/src/bin/arrow-stream-to-file.rs index 87101c3ec89..d7af0a0d2a0 100644 --- a/rust/integration-testing/src/bin/arrow-stream-to-file.rs +++ b/rust/integration-testing/src/bin/arrow-stream-to-file.rs @@ -26,14 +26,12 @@ fn main() -> Result<()> { let args: Vec = env::args().collect(); eprintln!("{:?}", args); - let arrow_stream_reader = StreamReader::try_new(io::stdin())?; + let mut arrow_stream_reader = StreamReader::try_new(io::stdin())?; let schema = arrow_stream_reader.schema(); let mut writer = FileWriter::try_new(io::stdout(), &schema)?; - arrow_stream_reader - .map(|batch| writer.write(&batch?)) - .collect::>()?; + arrow_stream_reader.try_for_each(|batch| writer.write(&batch?))?; writer.finish()?; eprintln!("Completed without error"); diff --git a/rust/parquet/src/arrow/arrow_reader.rs b/rust/parquet/src/arrow/arrow_reader.rs index 88af583a3d4..c503779e8c1 100644 --- a/rust/parquet/src/arrow/arrow_reader.rs +++ b/rust/parquet/src/arrow/arrow_reader.rs @@ -583,7 +583,7 @@ mod tests { .next() .map(|r| r.expect("Failed to read record batch!").into()); - let (start, end) = (i * 60 as usize, (i + 1) * 60 as usize); + let (start, end) = (i * 60_usize, (i + 1) * 60_usize); if start < max_len { assert!(array.is_some()); diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index c84e8955501..87a1004639e 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -84,7 +84,7 @@ where (0..(parquet_schema.num_columns())).for_each(|i| { let p_type = parquet_schema.get_column_root(i); curr_name = p_type.get_basic_info().name(); - if prev_name == "" { + if prev_name.is_empty() { // first index indices.push(i); prev_name = curr_name; diff --git a/rust/parquet/src/bin/parquet-schema.rs b/rust/parquet/src/bin/parquet-schema.rs index 2eaf7652ae9..cff6d0c7169 100644 --- a/rust/parquet/src/bin/parquet-schema.rs +++ b/rust/parquet/src/bin/parquet-schema.rs @@ -77,7 +77,7 @@ fn main() { Ok(parquet_reader) => { let metadata = parquet_reader.metadata(); println!("Metadata for file: {}", &args[1]); - println!(""); + println!(); if verbose { print_parquet_metadata(&mut std::io::stdout(), &metadata); } else { diff --git a/rust/parquet/src/column/reader.rs b/rust/parquet/src/column/reader.rs index d9a49d2cb76..d5d86ed60a2 100644 --- a/rust/parquet/src/column/reader.rs +++ b/rust/parquet/src/column/reader.rs @@ -410,6 +410,7 @@ impl ColumnReaderImpl { .expect("Decoder for dict should have been set") } else { // Search cache for data page decoder + #[allow(clippy::map_entry)] if !self.decoders.contains_key(&encoding) { // Initialize decoder for this page let data_decoder = get_decoder::(self.descr.clone(), encoding)?; diff --git a/rust/parquet/src/data_type.rs b/rust/parquet/src/data_type.rs index 0dbf185b733..37d4189e2b1 100644 --- a/rust/parquet/src/data_type.rs +++ b/rust/parquet/src/data_type.rs @@ -642,23 +642,20 @@ mod tests { fn test_as_bytes() { assert_eq!(false.as_bytes(), &[0]); assert_eq!(true.as_bytes(), &[1]); - assert_eq!((7 as i32).as_bytes(), &[7, 0, 0, 0]); - assert_eq!((555 as i32).as_bytes(), &[43, 2, 0, 0]); - assert_eq!((555 as u32).as_bytes(), &[43, 2, 0, 0]); + assert_eq!(7_i32.as_bytes(), &[7, 0, 0, 0]); + assert_eq!(555_i32.as_bytes(), &[43, 2, 0, 0]); + assert_eq!(555_u32.as_bytes(), &[43, 2, 0, 0]); assert_eq!(i32::max_value().as_bytes(), &[255, 255, 255, 127]); assert_eq!(i32::min_value().as_bytes(), &[0, 0, 0, 128]); - assert_eq!((7 as i64).as_bytes(), &[7, 0, 0, 0, 0, 0, 0, 0]); - assert_eq!((555 as i64).as_bytes(), &[43, 2, 0, 0, 0, 0, 0, 0]); + assert_eq!(7_i64.as_bytes(), &[7, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(555_i64.as_bytes(), &[43, 2, 0, 0, 0, 0, 0, 0]); assert_eq!( (i64::max_value()).as_bytes(), &[255, 255, 255, 255, 255, 255, 255, 127] ); assert_eq!((i64::min_value()).as_bytes(), &[0, 0, 0, 0, 0, 0, 0, 128]); - assert_eq!((3.14 as f32).as_bytes(), &[195, 245, 72, 64]); - assert_eq!( - (3.14 as f64).as_bytes(), - &[31, 133, 235, 81, 184, 30, 9, 64] - ); + assert_eq!(3.14_f32.as_bytes(), &[195, 245, 72, 64]); + assert_eq!(3.14_f64.as_bytes(), &[31, 133, 235, 81, 184, 30, 9, 64]); assert_eq!("hello".as_bytes(), &[b'h', b'e', b'l', b'l', b'o']); assert_eq!( Vec::from("hello".as_bytes()).as_bytes(), diff --git a/rust/parquet/src/encodings/encoding.rs b/rust/parquet/src/encodings/encoding.rs index 8660168a5d1..4d8dc679b37 100644 --- a/rust/parquet/src/encodings/encoding.rs +++ b/rust/parquet/src/encodings/encoding.rs @@ -388,7 +388,7 @@ impl DictEncoder { self.hash_table_size = new_size; self.mod_bitmask = (new_size - 1) as u32; - mem::replace(&mut self.hash_slots, new_hash_slots); + let _ = mem::replace(&mut self.hash_slots, new_hash_slots); } } diff --git a/rust/parquet/src/lib.rs b/rust/parquet/src/lib.rs index c2925f9970b..d292312a59f 100644 --- a/rust/parquet/src/lib.rs +++ b/rust/parquet/src/lib.rs @@ -16,14 +16,24 @@ // under the License. #![feature(specialization)] +#![allow(incomplete_features)] #![allow(dead_code)] #![allow(non_camel_case_types)] #![allow(bare_trait_objects)] #![allow( - clippy::too_many_arguments, - clippy::new_without_default, + clippy::approx_constant, + clippy::borrowed_box, + clippy::cast_ptr_alignment, + clippy::comparison_chain, + clippy::float_cmp, + clippy::float_equality_without_abs, + clippy::many_single_char_names, clippy::needless_range_loop, - clippy::borrowed_box + clippy::new_without_default, + clippy::or_fun_call, + clippy::same_item_push, + clippy::too_many_arguments, + clippy::transmute_ptr_to_ptr )] #[macro_use] diff --git a/rust/parquet/src/record/api.rs b/rust/parquet/src/record/api.rs index ef7e0709052..482f3f62f78 100644 --- a/rust/parquet/src/record/api.rs +++ b/rust/parquet/src/record/api.rs @@ -529,12 +529,7 @@ impl Field { /// Determines if this Row represents a primitive value. pub fn is_primitive(&self) -> bool { - match *self { - Field::Group(_) => false, - Field::ListInternal(_) => false, - Field::MapInternal(_) => false, - _ => true, - } + !matches!(*self, Field::Group(_) | Field::ListInternal(_) | Field::MapInternal(_)) } /// Converts Parquet BOOLEAN type with logical type into `bool` value. @@ -645,14 +640,14 @@ impl fmt::Display for Field { Field::UInt(value) => write!(f, "{}", value), Field::ULong(value) => write!(f, "{}", value), Field::Float(value) => { - if value > 1e19 || value < 1e-15 { + if !(1e-15..=1e19).contains(&value) { write!(f, "{:E}", value) } else { write!(f, "{:?}", value) } } Field::Double(value) => { - if value > 1e19 || value < 1e-15 { + if !(1e-15..=1e19).contains(&value) { write!(f, "{:E}", value) } else { write!(f, "{:?}", value) diff --git a/rust/parquet/src/schema/parser.rs b/rust/parquet/src/schema/parser.rs index 9c4f0e3d12e..21c168f9cef 100644 --- a/rust/parquet/src/schema/parser.rs +++ b/rust/parquet/src/schema/parser.rs @@ -422,9 +422,9 @@ mod tests { } } "; - let mut iter = Tokenizer::from_str(schema); + let iter = Tokenizer::from_str(schema); let mut res = Vec::new(); - while let Some(token) = iter.next() { + for token in iter { res.push(token); } assert_eq!( diff --git a/rust/parquet/src/schema/types.rs b/rust/parquet/src/schema/types.rs index 0ba12c17bff..c89afbe85f0 100644 --- a/rust/parquet/src/schema/types.rs +++ b/rust/parquet/src/schema/types.rs @@ -143,18 +143,12 @@ impl Type { /// Returns `true` if this type is a primitive type, `false` otherwise. pub fn is_primitive(&self) -> bool { - match *self { - Type::PrimitiveType { .. } => true, - _ => false, - } + matches!(*self, Type::PrimitiveType { .. }) } /// Returns `true` if this type is a group type, `false` otherwise. pub fn is_group(&self) -> bool { - match *self { - Type::GroupType { .. } => true, - _ => false, - } + matches!(*self, Type::GroupType { .. }) } /// Returns `true` if this type is the top-level schema type (message type). diff --git a/rust/parquet/src/util/io.rs b/rust/parquet/src/util/io.rs index 1403de0f529..44e99ac0a77 100644 --- a/rust/parquet/src/util/io.rs +++ b/rust/parquet/src/util/io.rs @@ -84,7 +84,7 @@ impl FileSource { reader, start, end: start + length as u64, - buf: vec![0 as u8; DEFAULT_BUF_SIZE], + buf: vec![0_u8; DEFAULT_BUF_SIZE], buf_pos: 0, buf_cap: 0, } @@ -262,7 +262,7 @@ mod tests { let mut file = get_test_file("alltypes_plain.parquet"); let mut src = FileSource::new(&file, 0, 4); - file.seek(SeekFrom::Start(5 as u64)) + file.seek(SeekFrom::Start(5_u64)) .expect("File seek to a position"); let bytes_read = src.read(&mut buf[..]).unwrap(); diff --git a/rust/parquet_derive/src/parquet_field.rs b/rust/parquet_derive/src/parquet_field.rs index 54a18bd8d7f..8ce3050bbe6 100644 --- a/rust/parquet_derive/src/parquet_field.rs +++ b/rust/parquet_derive/src/parquet_field.rs @@ -92,7 +92,7 @@ impl Field { Type::TypePath(_) => self.option_into_vals(), _ => unimplemented!("Unsupported type encountered"), }, - ref f @ _ => unimplemented!("Unsupported: {:#?}", f), + ref f => unimplemented!("Unsupported: {:#?}", f), }, Type::Reference(_, ref first_type) => match **first_type { Type::TypePath(_) => self.copied_direct_vals(), @@ -102,11 +102,11 @@ impl Field { Type::TypePath(_) => self.option_into_vals(), _ => unimplemented!("Unsupported type encountered"), }, - ref f @ _ => unimplemented!("Unsupported: {:#?}", f), + ref f => unimplemented!("Unsupported: {:#?}", f), }, - ref f @ _ => unimplemented!("Unsupported: {:#?}", f), + ref f => unimplemented!("Unsupported: {:#?}", f), }, - f @ _ => unimplemented!("Unsupported: {:#?}", f), + f => unimplemented!("Unsupported: {:#?}", f), }; let definition_levels = match &self.ty { @@ -181,11 +181,10 @@ impl Field { self.third_party_type == Some(ThirdPartyType::ChronoNaiveDateTime); let is_a_date = self.third_party_type == Some(ThirdPartyType::ChronoNaiveDate); let is_a_uuid = self.third_party_type == Some(ThirdPartyType::Uuid); - let copy_to_vec = match self.ty.physical_type() { - parquet::basic::Type::BYTE_ARRAY - | parquet::basic::Type::FIXED_LEN_BYTE_ARRAY => false, - _ => true, - }; + let copy_to_vec = !matches!( + self.ty.physical_type(), + parquet::basic::Type::BYTE_ARRAY | parquet::basic::Type::FIXED_LEN_BYTE_ARRAY + ); let binding = if copy_to_vec { quote! { let Some(inner) = rec.#field_name } @@ -406,7 +405,7 @@ impl Type { "f32" => BasicType::FLOAT, "f64" => BasicType::DOUBLE, "String" | "str" | "Uuid" => BasicType::BYTE_ARRAY, - f @ _ => unimplemented!("{} currently is not supported", f), + f => unimplemented!("{} currently is not supported", f), } } @@ -421,7 +420,7 @@ impl Type { syn::Type::Path(ref p) => Type::from_type_path(f, p), syn::Type::Reference(ref tr) => Type::from_type_reference(f, tr), syn::Type::Array(ref ta) => Type::from_type_array(f, ta), - other @ _ => unimplemented!( + other => unimplemented!( "Unable to derive {:?} - it is currently an unsupported type\n{:#?}", f.ident.as_ref().unwrap(), other @@ -445,10 +444,10 @@ impl Type { match first_arg { syn::GenericArgument::Type(ref typath) => typath.clone(), - other @ _ => unimplemented!("Unsupported: {:#?}", other), + other => unimplemented!("Unsupported: {:#?}", other), } } - other @ _ => unimplemented!("Unsupported: {:#?}", other), + other => unimplemented!("Unsupported: {:#?}", other), }; if is_vec { diff --git a/rust/parquet_derive_test/src/lib.rs b/rust/parquet_derive_test/src/lib.rs index aca4dc57049..3f5d90e537c 100644 --- a/rust/parquet_derive_test/src/lib.rs +++ b/rust/parquet_derive_test/src/lib.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::approx_constant)] + extern crate parquet; #[macro_use] diff --git a/rust/rust-toolchain b/rust/rust-toolchain index f3bb3d37585..1ff3dd1284c 100644 --- a/rust/rust-toolchain +++ b/rust/rust-toolchain @@ -1 +1 @@ -nightly-2020-04-22 +nightly-2020-11-14