Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion ci/scripts/rust_lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,14 @@
# specific language governing permissions and limitations
# under the License.

# Entrypoint to run rust linter on CI (automated tests)
#
# Usage: rust_lint.sh <arrow checkout directory>

set -e

source_dir=${1}/rust

pushd ${source_dir}/arrow
cargo clippy -- -A clippy::redundant_field_names
cargo clippy --all-targets --workspace -- -D warnings -A clippy::redundant_field_names
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The -D warnings means "disallow anything that clippy would warn against" and treat them as an error

popd
8 changes: 4 additions & 4 deletions rust/arrow/benches/array_from_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ fn struct_array_values(

fn struct_array_from_vec(
field1: &str,
strings: &Vec<Option<&str>>,
strings: &[Option<&str>],
field2: &str,
ints: &Vec<Option<i32>>,
ints: &[Option<i32>],
) {
let strings: ArrayRef = Arc::new(StringArray::from(strings.clone()));
let ints: ArrayRef = Arc::new(Int32Array::from(ints.clone()));
let strings: ArrayRef = Arc::new(StringArray::from(strings.to_owned()));
let ints: ArrayRef = Arc::new(Int32Array::from(ints.to_owned()));

criterion::black_box(
StructArray::try_from(vec![(field1, strings), (field2, ints)]).unwrap(),
Expand Down
1 change: 1 addition & 0 deletions rust/arrow/benches/csv_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ fn record_batches_to_csv() {
let file = File::create("target/bench_write_csv.csv").unwrap();
let mut writer = csv::Writer::new(file);
let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b];
#[allow(clippy::unit_arg)]
criterion::black_box(for batch in batches {
writer.write(batch).unwrap()
});
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/benches/take_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ where
let array: PrimitiveArray<T> = seedable_rng()
.sample_iter(&Standard)
.take(size)
.map(|v| Some(v))
.map(Some)
.collect();

Arc::new(array) as ArrayRef
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/src/array/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ mod tests {
.map(|e| {
e.and_then(|e| {
let mut a = e.to_string();
a.push_str("b");
a.push('b');
Some(a)
})
})
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,7 @@ impl MutableBuffer {
/// `new_len` will be zeroed out.
///
/// If `new_len` is less than `len`, the buffer will be truncated.
pub fn resize(&mut self, new_len: usize) -> () {
pub fn resize(&mut self, new_len: usize) {
if new_len > self.len {
self.reserve(new_len);
} else {
Expand Down
20 changes: 9 additions & 11 deletions rust/arrow/src/compute/kernels/substring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,19 +161,18 @@ mod tests {
),
];

cases
.into_iter()
.map(|(array, start, length, expected)| {
cases.into_iter().try_for_each::<_, Result<()>>(
|(array, start, length, expected)| {
let array = T::from(array);
let result = substring(&array, start, &length)?;
let result: ArrayRef = substring(&array, start, &length)?;
assert_eq!(array.len(), result.len());

let result = result.as_any().downcast_ref::<T>().unwrap();
let expected = T::from(expected);
assert_eq!(&expected, result);
Ok(())
})
.collect::<Result<()>>()?;
},
)?;

Ok(())
}
Expand Down Expand Up @@ -243,18 +242,17 @@ mod tests {
),
];

cases
.into_iter()
.map(|(array, start, length, expected)| {
cases.into_iter().try_for_each::<_, Result<()>>(
|(array, start, length, expected)| {
let array = StringArray::from(array);
let result = substring(&array, start, &length)?;
assert_eq!(array.len(), result.len());
let result = result.as_any().downcast_ref::<StringArray>().unwrap();
let expected = StringArray::from(expected);
assert_eq!(&expected, result,);
Ok(())
})
.collect::<Result<()>>()?;
},
)?;

Ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/src/util/bit_chunk_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ mod tests {

assert_eq!(63, bitchunks.remainder_len());
assert_eq!(
0b1000000_00111111_11000000_00111111_11000000_00111111_11000000_00111111,
0b100_0000_0011_1111_1100_0000_0011_1111_1100_0000_0011_1111_1100_0000_0011_1111,
bitchunks.remainder_bits()
);
}
Expand Down
2 changes: 1 addition & 1 deletion rust/datafusion/examples/simple_udaf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ async fn main() -> Result<()> {
.unwrap();

// verify that the calculation is correct
assert_eq!(result.value(0), 8.0);
assert!((result.value(0) - 8.0).abs() < f64::EPSILON);
println!("The geometric mean of [2,4,8,64] is {}", result.value(0));

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion rust/datafusion/src/bin/repl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ pub async fn main() {
}
Ok(ref line) => {
query.push_str(line);
query.push_str(" ");
query.push(' ');
}
Err(_) => {
break;
Expand Down
4 changes: 2 additions & 2 deletions rust/datafusion/tests/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,9 @@ async fn custom_source_dataframe() -> Result<()> {
assert_eq!(table_schema.fields().len(), 2);
assert_eq!(projected_schema.fields().len(), 1);
}
_ => assert!(false, "input to projection should be TableScan"),
_ => panic!("input to projection should be TableScan"),
},
_ => assert!(false, "expect optimized_plan to be projection"),
_ => panic!("expect optimized_plan to be projection"),
}

let expected = "Projection: #c2\
Expand Down
4 changes: 2 additions & 2 deletions rust/datafusion/tests/sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ async fn nyc() -> Result<()> {
assert_eq!(projected_schema.field(0).name(), "passenger_count");
assert_eq!(projected_schema.field(1).name(), "fare_amount");
}
_ => assert!(false),
_ => unreachable!(),
},
_ => assert!(false),
_ => unreachable!(false),
}

Ok(())
Expand Down
4 changes: 2 additions & 2 deletions rust/datafusion/tests/user_defined_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ use async_trait::async_trait;
async fn exec_sql(ctx: &mut ExecutionContext, sql: &str) -> Result<String> {
let df = ctx.sql(sql)?;
let batches = df.collect().await?;
pretty_format_batches(&batches).map_err(|e| DataFusionError::ArrowError(e))
pretty_format_batches(&batches).map_err(DataFusionError::ArrowError)
}

/// Create a test table.
Expand Down Expand Up @@ -494,7 +494,7 @@ impl Stream for TopKReader {

// take this as immutable
let k = self.k;
let schema = self.schema().clone();
let schema = self.schema();

let top_values = self
.input
Expand Down
98 changes: 48 additions & 50 deletions rust/parquet/src/arrow/arrow_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,56 +194,54 @@ fn write_leaves(
);
}

match (&**key_type, &**value_type, &mut col_writer) {
(UInt8, UInt32, Int32ColumnWriter(writer)) => {
let typed_array = array
.as_any()
.downcast_ref::<arrow_array::UInt8DictionaryArray>()
.expect("Unable to get dictionary array");

let keys = typed_array.keys();

let value_buffer = typed_array.values();
let value_array =
arrow::compute::cast(&value_buffer, &ArrowDataType::Int32)?;

let values = value_array
.as_any()
.downcast_ref::<arrow_array::Int32Array>()
.unwrap();

use std::convert::TryFrom;
// This removes NULL values from the keys, but
// they're encoded by the levels, so that's fine.
let materialized_values: Vec<_> = keys
.into_iter()
.flatten()
.map(|key| {
usize::try_from(key).unwrap_or_else(|k| {
panic!("key {} does not fit in usize", k)
})
})
.map(|key| values.value(key))
.collect();

let materialized_primitive_array =
PrimitiveArray::<arrow::datatypes::Int32Type>::from(
materialized_values,
);

writer.write_batch(
get_numeric_array_slice::<Int32Type, _>(
&materialized_primitive_array,
)
.as_slice(),
Some(levels.definition.as_slice()),
levels.repetition.as_deref(),
)?;
row_group_writer.close_column(col_writer)?;

return Ok(());
}
_ => {}
if let (UInt8, UInt32, Int32ColumnWriter(writer)) =
(&**key_type, &**value_type, &mut col_writer)
{
let typed_array = array
.as_any()
.downcast_ref::<arrow_array::UInt8DictionaryArray>()
.expect("Unable to get dictionary array");

let keys = typed_array.keys();

let value_buffer = typed_array.values();
let value_array =
arrow::compute::cast(&value_buffer, &ArrowDataType::Int32)?;

let values = value_array
.as_any()
.downcast_ref::<arrow_array::Int32Array>()
.unwrap();

use std::convert::TryFrom;
// This removes NULL values from the keys, but
// they're encoded by the levels, so that's fine.
let materialized_values: Vec<_> = keys
.into_iter()
.flatten()
.map(|key| {
usize::try_from(key)
.unwrap_or_else(|k| panic!("key {} does not fit in usize", k))
})
.map(|key| values.value(key))
.collect();

let materialized_primitive_array =
PrimitiveArray::<arrow::datatypes::Int32Type>::from(
materialized_values,
);

writer.write_batch(
get_numeric_array_slice::<Int32Type, _>(
&materialized_primitive_array,
)
.as_slice(),
Some(levels.definition.as_slice()),
levels.repetition.as_deref(),
)?;
row_group_writer.close_column(col_writer)?;

return Ok(());
}

dispatch_dictionary!(
Expand Down
3 changes: 1 addition & 2 deletions rust/parquet/src/bin/parquet-rowcount.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ fn main() {
process::exit(1);
}

for i in 1..args.len() {
let filename = args[i].clone();
for filename in args {
let path = Path::new(&filename);
let file = File::open(&path).unwrap();
let parquet_reader = SerializedFileReader::new(file).unwrap();
Expand Down
5 changes: 5 additions & 0 deletions rust/parquet/src/util/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ impl SliceableCursor {
pub fn len(&self) -> u64 {
self.length as u64
}

/// return true if the cursor is empty (self.len() == 0)
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}

/// Implementation inspired by std::io::Cursor
Expand Down
2 changes: 2 additions & 0 deletions rust/parquet_derive/src/parquet_field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ impl Field {
}
}

#[allow(clippy::enum_variant_names)]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While clippy has some valid points with these warnings, I did not want to address them in this PR

#[allow(clippy::large_enum_variant)]
#[derive(Debug, PartialEq)]
enum Type {
Array(Box<Type>),
Expand Down