From 59d0bcdbcabd3e3b2ed3905dedfe3e7046524435 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 28 Aug 2024 12:51:35 +0200 Subject: [PATCH] Enable needless_pass_by_value clippy lint --- Cargo.toml | 4 ++ .../examples/custom_datasource.rs | 10 +++- .../examples/flight/flight_server.rs | 1 + datafusion-examples/examples/pruning.rs | 6 +- datafusion/common/src/column.rs | 8 +-- datafusion/common/src/error.rs | 1 + .../common/src/file_options/parquet_writer.rs | 18 +++--- datafusion/common/src/hash_utils.rs | 18 +++--- datafusion/common/src/scalar/mod.rs | 30 ++++++---- .../common/src/scalar/struct_builder.rs | 4 +- datafusion/common/src/stats.rs | 4 +- .../core/benches/aggregate_query_sql.rs | 2 + datafusion/core/benches/data_utils/mod.rs | 2 + datafusion/core/benches/distinct_query_sql.rs | 2 + datafusion/core/benches/math_query_sql.rs | 2 + datafusion/core/benches/physical_plan.rs | 2 + .../core/benches/sort_limit_query_sql.rs | 2 + datafusion/core/benches/sql_planner.rs | 2 + datafusion/core/benches/topk_aggregate.rs | 2 + datafusion/core/benches/window_query_sql.rs | 2 + .../src/catalog_common/information_schema.rs | 4 +- datafusion/core/src/dataframe/mod.rs | 2 +- .../src/datasource/file_format/parquet.rs | 60 +++++++++---------- .../src/datasource/file_format/write/demux.rs | 4 +- datafusion/core/src/datasource/memory.rs | 2 +- .../datasource/physical_plan/file_groups.rs | 1 + .../physical_plan/file_scan_config.rs | 11 ++-- .../datasource/physical_plan/file_stream.rs | 2 +- .../datasource/physical_plan/parquet/mod.rs | 2 +- .../physical_plan/parquet/opener.rs | 2 +- .../physical_plan/parquet/page_filter.rs | 2 +- .../physical_plan/parquet/row_filter.rs | 4 +- .../physical_plan/parquet/row_group_filter.rs | 1 + .../datasource/physical_plan/statistics.rs | 8 +-- datafusion/core/src/datasource/statistics.rs | 2 +- datafusion/core/src/execution/context/mod.rs | 6 +- .../enforce_distribution.rs | 1 + .../core/src/physical_optimizer/pruning.rs | 1 + .../src/physical_optimizer/sanity_checker.rs | 1 + .../core/src/physical_optimizer/test_utils.rs | 2 + datafusion/core/src/physical_planner.rs | 7 ++- datafusion/core/src/test/mod.rs | 2 + .../core/tests/custom_sources_cases/mod.rs | 2 + datafusion/core/tests/expr_api/mod.rs | 2 + datafusion/core/tests/fuzz_cases/mod.rs | 2 + datafusion/core/tests/parquet/mod.rs | 3 + .../aggregate_statistics.rs | 2 + .../physical_optimizer/limit_pushdown.rs | 2 + .../limited_distinct_aggregation.rs | 2 + .../user_defined/user_defined_aggregates.rs | 2 + .../tests/user_defined/user_defined_plan.rs | 2 +- datafusion/execution/src/disk_manager.rs | 4 +- datafusion/execution/src/memory_pool/pool.rs | 9 +-- .../expr-common/src/interval_arithmetic.rs | 16 ++--- .../expr-common/src/type_coercion/binary.rs | 5 +- .../expr/src/built_in_window_function.rs | 2 +- datafusion/expr/src/execution_props.rs | 4 +- datafusion/expr/src/expr_rewriter/mod.rs | 1 + datafusion/expr/src/expr_rewriter/order_by.rs | 4 +- datafusion/expr/src/expr_schema.rs | 8 +-- datafusion/expr/src/literal.rs | 4 ++ datafusion/expr/src/logical_plan/plan.rs | 10 ++-- datafusion/expr/src/tree_node.rs | 6 +- datafusion/expr/src/utils.rs | 17 +++--- .../functions-aggregate-common/src/tdigest.rs | 11 ++-- .../functions-aggregate/benches/count.rs | 12 ++-- datafusion/functions-aggregate/benches/sum.rs | 17 ++++-- .../src/approx_percentile_cont.rs | 4 +- .../src/approx_percentile_cont_with_weight.rs | 2 +- datafusion/functions-aggregate/src/stddev.rs | 8 +-- datafusion/functions-nested/src/array_has.rs | 16 ++--- datafusion/functions-nested/src/flatten.rs | 6 +- datafusion/functions-nested/src/map.rs | 23 +++---- datafusion/functions-nested/src/planner.rs | 2 +- datafusion/functions-nested/src/position.rs | 4 +- datafusion/functions-nested/src/remove.rs | 10 ++-- datafusion/functions-nested/src/replace.rs | 14 ++--- datafusion/functions-nested/src/set_ops.rs | 20 +++---- datafusion/functions-nested/src/string.rs | 54 ++++++++--------- datafusion/functions/src/core/expr_ext.rs | 4 +- datafusion/functions/src/core/mod.rs | 2 +- .../functions/src/datetime/date_trunc.rs | 26 ++++---- .../functions/src/datetime/to_local_time.rs | 1 + datafusion/functions/src/math/mod.rs | 1 + .../functions/src/regex/regexpreplace.rs | 4 +- datafusion/functions/src/string/btrim.rs | 2 +- datafusion/functions/src/string/common.rs | 2 +- datafusion/functions/src/string/lower.rs | 1 + datafusion/functions/src/string/ltrim.rs | 2 +- datafusion/functions/src/string/repeat.rs | 8 +-- datafusion/functions/src/string/rtrim.rs | 2 +- datafusion/functions/src/string/split_part.rs | 40 ++++++------- datafusion/functions/src/string/upper.rs | 1 + datafusion/functions/src/unicode/lpad.rs | 12 ++-- datafusion/functions/src/unicode/rpad.rs | 12 ++-- .../optimizer/src/common_subexpr_eliminate.rs | 2 +- .../optimizer/src/eliminate_cross_join.rs | 1 + datafusion/optimizer/src/join_key_set.rs | 1 + .../simplify_expressions/expr_simplifier.rs | 1 + datafusion/optimizer/src/test/mod.rs | 2 + .../src/unwrap_cast_in_comparison.rs | 1 + datafusion/optimizer/src/utils.rs | 1 + datafusion/physical-expr/src/aggregate.rs | 4 +- datafusion/physical-expr/src/analysis.rs | 6 +- .../physical-expr/src/equivalence/mod.rs | 9 +-- .../src/equivalence/projection.rs | 4 +- .../src/equivalence/properties.rs | 1 + .../physical-expr/src/expressions/binary.rs | 5 +- .../physical-expr/src/expressions/in_list.rs | 14 ++--- .../src/expressions/is_not_null.rs | 2 +- .../physical-expr/src/expressions/is_null.rs | 18 +++--- .../physical-expr/src/expressions/literal.rs | 2 + .../physical-expr/src/intervals/cp_solver.rs | 1 + datafusion/physical-expr/src/planner.rs | 4 +- .../physical-expr/src/utils/guarantee.rs | 5 +- .../physical-expr/src/window/cume_dist.rs | 1 + .../physical-expr/src/window/lead_lag.rs | 1 + .../physical-expr/src/window/nth_value.rs | 1 + datafusion/physical-expr/src/window/rank.rs | 1 + .../src/window/sliding_aggregate.rs | 2 +- .../physical-plan/src/aggregates/mod.rs | 10 ++-- .../src/aggregates/no_grouping.rs | 12 ++-- .../physical-plan/src/aggregates/row_hash.rs | 30 +++++----- .../src/aggregates/topk/hash_table.rs | 2 +- .../physical-plan/src/aggregates/topk/heap.rs | 14 ++--- .../src/aggregates/topk/priority_map.rs | 22 +++---- .../src/aggregates/topk_stream.rs | 10 ++-- datafusion/physical-plan/src/analyze.rs | 4 +- datafusion/physical-plan/src/coalesce/mod.rs | 6 +- .../physical-plan/src/coalesce_batches.rs | 4 +- .../physical-plan/src/execution_plan.rs | 15 ++--- datafusion/physical-plan/src/filter.rs | 2 +- datafusion/physical-plan/src/insert.rs | 2 +- .../physical-plan/src/joins/hash_join.rs | 17 +++--- .../src/joins/nested_loop_join.rs | 6 +- .../src/joins/sort_merge_join.rs | 14 ++--- .../src/joins/symmetric_hash_join.rs | 24 ++++---- datafusion/physical-plan/src/joins/utils.rs | 36 +++++------ datafusion/physical-plan/src/limit.rs | 5 +- datafusion/physical-plan/src/projection.rs | 10 ++-- .../physical-plan/src/recursive_query.rs | 6 +- .../physical-plan/src/repartition/mod.rs | 26 ++++---- datafusion/physical-plan/src/sorts/sort.rs | 11 ++-- datafusion/physical-plan/src/spill.rs | 33 ++++------ datafusion/physical-plan/src/topk/mod.rs | 2 +- datafusion/physical-plan/src/union.rs | 13 ++-- datafusion/physical-plan/src/unnest.rs | 1 + datafusion/physical-plan/src/values.rs | 8 +-- datafusion/physical-plan/src/windows/mod.rs | 15 +++-- .../src/windows/window_agg_exec.rs | 6 +- datafusion/proto/src/physical_plan/mod.rs | 4 +- .../proto/src/physical_plan/to_proto.rs | 2 +- datafusion/proto/tests/cases/mod.rs | 2 + .../tests/cases/roundtrip_logical_plan.rs | 10 ++-- datafusion/sql/src/cte.rs | 12 ++-- datafusion/sql/src/expr/binary_op.rs | 2 +- datafusion/sql/src/expr/mod.rs | 8 +-- datafusion/sql/src/parser.rs | 19 +++--- datafusion/sql/src/planner.rs | 4 +- datafusion/sql/src/statement.rs | 32 +++++----- datafusion/sql/src/unparser/plan.rs | 6 +- datafusion/sql/tests/sql_integration.rs | 2 + datafusion/sqllogictest/src/engines/mod.rs | 2 + datafusion/substrait/src/extensions.rs | 4 +- .../substrait/src/logical_plan/consumer.rs | 10 +++- .../substrait/src/logical_plan/producer.rs | 34 +++++------ 166 files changed, 686 insertions(+), 604 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c155e475a026f..a4644208e2c6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -162,8 +162,12 @@ panic = 'unwind' rpath = false [workspace.lints.clippy] +# Note: we run clippy with `-D warnings`, warnings are not allowed. # Detects large stack-allocated futures that may cause stack overflow crashes (see threshold in clippy.toml) large_futures = "warn" +# Detects functions that do not need to move their parameters and thus potentially require clones. +# Performance-related, OK to suppress in test code. +needless_pass_by_value = "warn" [workspace.lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] } diff --git a/datafusion-examples/examples/custom_datasource.rs b/datafusion-examples/examples/custom_datasource.rs index 0f7748b133650..ab178809980c0 100644 --- a/datafusion-examples/examples/custom_datasource.rs +++ b/datafusion-examples/examples/custom_datasource.rs @@ -121,7 +121,11 @@ impl CustomDataSource { projections: Option<&Vec>, schema: SchemaRef, ) -> Result> { - Ok(Arc::new(CustomExec::new(projections, schema, self.clone()))) + Ok(Arc::new(CustomExec::new( + projections, + &schema, + self.clone(), + ))) } pub(crate) fn populate_users(&self) { @@ -196,10 +200,10 @@ struct CustomExec { impl CustomExec { fn new( projections: Option<&Vec>, - schema: SchemaRef, + schema: &SchemaRef, db: CustomDataSource, ) -> Self { - let projected_schema = project_schema(&schema, projections).unwrap(); + let projected_schema = project_schema(schema, projections).unwrap(); let cache = Self::compute_properties(projected_schema.clone()); Self { db, diff --git a/datafusion-examples/examples/flight/flight_server.rs b/datafusion-examples/examples/flight/flight_server.rs index f9d1b8029f04b..8bca16c333414 100644 --- a/datafusion-examples/examples/flight/flight_server.rs +++ b/datafusion-examples/examples/flight/flight_server.rs @@ -186,6 +186,7 @@ impl FlightService for FlightServiceImpl { } } +#[allow(clippy::needless_pass_by_value)] // moved value better aligns with .map_err() usages fn to_tonic_err(e: datafusion::error::DataFusionError) -> Status { Status::internal(format!("{e:?}")) } diff --git a/datafusion-examples/examples/pruning.rs b/datafusion-examples/examples/pruning.rs index c090cd2bcca91..45644c3fafb06 100644 --- a/datafusion-examples/examples/pruning.rs +++ b/datafusion-examples/examples/pruning.rs @@ -64,7 +64,7 @@ async fn main() { // // Note the predicate does not automatically coerce types or simplify // expressions. See expr_api.rs examples for how to do this if required - let predicate = create_pruning_predicate(expr, &my_catalog.schema); + let predicate = create_pruning_predicate(&expr, &my_catalog.schema); // Evaluate the predicate for the three files in the catalog let prune_results = predicate.prune(&my_catalog).unwrap(); @@ -184,10 +184,10 @@ impl PruningStatistics for MyCatalog { } } -fn create_pruning_predicate(expr: Expr, schema: &SchemaRef) -> PruningPredicate { +fn create_pruning_predicate(expr: &Expr, schema: &SchemaRef) -> PruningPredicate { let df_schema = DFSchema::try_from(schema.as_ref().clone()).unwrap(); let props = ExecutionProps::new(); - let physical_expr = create_physical_expr(&expr, &df_schema, &props).unwrap(); + let physical_expr = create_physical_expr(expr, &df_schema, &props).unwrap(); PruningPredicate::try_new(physical_expr, schema.clone()).unwrap() } diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs index d855198fa7c6b..401134105dbd8 100644 --- a/datafusion/common/src/column.rs +++ b/datafusion/common/src/column.rs @@ -309,7 +309,7 @@ mod tests { use arrow_schema::SchemaBuilder; use std::sync::Arc; - fn create_qualified_schema(qualifier: &str, names: Vec<&str>) -> Result { + fn create_qualified_schema(qualifier: &str, names: &[&str]) -> Result { let mut schema_builder = SchemaBuilder::new(); schema_builder.extend( names @@ -322,9 +322,9 @@ mod tests { #[test] fn test_normalize_with_schemas_and_ambiguity_check() -> Result<()> { - let schema1 = create_qualified_schema("t1", vec!["a", "b"])?; - let schema2 = create_qualified_schema("t2", vec!["c", "d"])?; - let schema3 = create_qualified_schema("t3", vec!["a", "b", "c", "d", "e"])?; + let schema1 = create_qualified_schema("t1", &["a", "b"])?; + let schema2 = create_qualified_schema("t2", &["c", "d"])?; + let schema3 = create_qualified_schema("t3", &["a", "b", "c", "d", "e"])?; // already normalized let col = Column::new(Some("t1"), "a"); diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index 05988d6c6da4c..cf6806d9218a9 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -812,6 +812,7 @@ mod test { Err(ArrowError::SchemaError("bar".to_string()).into()) } + #[allow(clippy::needless_pass_by_value)] // OK in tests fn do_root_test(e: DataFusionError, exp: DataFusionError) { let e = e.find_root(); diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs index e42fb96ed6a5b..b837d609c0909 100644 --- a/datafusion/common/src/file_options/parquet_writer.rs +++ b/datafusion/common/src/file_options/parquet_writer.rs @@ -447,22 +447,22 @@ mod tests { fn extract_column_options( props: &WriterProperties, - col: ColumnPath, + col: &ColumnPath, ) -> ParquetColumnOptions { - let bloom_filter_default_props = props.bloom_filter_properties(&col); + let bloom_filter_default_props = props.bloom_filter_properties(col); ParquetColumnOptions { bloom_filter_enabled: Some(bloom_filter_default_props.is_some()), - encoding: props.encoding(&col).map(|s| s.to_string()), - dictionary_enabled: Some(props.dictionary_enabled(&col)), - compression: match props.compression(&col) { + encoding: props.encoding(col).map(|s| s.to_string()), + dictionary_enabled: Some(props.dictionary_enabled(col)), + compression: match props.compression(col) { Compression::ZSTD(lvl) => { Some(format!("zstd({})", lvl.compression_level())) } _ => None, }, statistics_enabled: Some( - match props.statistics_enabled(&col) { + match props.statistics_enabled(col) { EnabledStatistics::None => "none", EnabledStatistics::Chunk => "chunk", EnabledStatistics::Page => "page", @@ -471,7 +471,7 @@ mod tests { ), bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp), bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv), - max_statistics_size: Some(props.max_statistics_size(&col)), + max_statistics_size: Some(props.max_statistics_size(col)), } } @@ -479,10 +479,10 @@ mod tests { /// (use identity to confirm correct.) fn session_config_from_writer_props(props: &WriterProperties) -> TableParquetOptions { let default_col = ColumnPath::from("col doesn't have specific config"); - let default_col_props = extract_column_options(props, default_col); + let default_col_props = extract_column_options(props, &default_col); let configured_col = ColumnPath::from(COL_NAME); - let configured_col_props = extract_column_options(props, configured_col); + let configured_col_props = extract_column_options(props, &configured_col); let key_value_metadata = props .key_value_metadata() diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs index 72cfeafd0bfec..9774bf3ab3390 100644 --- a/datafusion/common/src/hash_utils.rs +++ b/datafusion/common/src/hash_utils.rs @@ -143,7 +143,7 @@ fn hash_array_primitive( /// with the new hash using `combine_hashes` #[cfg(not(feature = "force_hash_collisions"))] fn hash_array( - array: T, + array: &T, random_state: &RandomState, hashes_buffer: &mut [u64], rehash: bool, @@ -382,16 +382,16 @@ pub fn create_hashes<'a>( downcast_primitive_array! { array => hash_array_primitive(array, random_state, hashes_buffer, rehash), DataType::Null => hash_null(random_state, hashes_buffer, rehash), - DataType::Boolean => hash_array(as_boolean_array(array)?, random_state, hashes_buffer, rehash), - DataType::Utf8 => hash_array(as_string_array(array)?, random_state, hashes_buffer, rehash), - DataType::Utf8View => hash_array(as_string_view_array(array)?, random_state, hashes_buffer, rehash), - DataType::LargeUtf8 => hash_array(as_largestring_array(array), random_state, hashes_buffer, rehash), - DataType::Binary => hash_array(as_generic_binary_array::(array)?, random_state, hashes_buffer, rehash), - DataType::BinaryView => hash_array(as_binary_view_array(array)?, random_state, hashes_buffer, rehash), - DataType::LargeBinary => hash_array(as_generic_binary_array::(array)?, random_state, hashes_buffer, rehash), + DataType::Boolean => hash_array(&as_boolean_array(array)?, random_state, hashes_buffer, rehash), + DataType::Utf8 => hash_array(&as_string_array(array)?, random_state, hashes_buffer, rehash), + DataType::Utf8View => hash_array(&as_string_view_array(array)?, random_state, hashes_buffer, rehash), + DataType::LargeUtf8 => hash_array(&as_largestring_array(array), random_state, hashes_buffer, rehash), + DataType::Binary => hash_array(&as_generic_binary_array::(array)?, random_state, hashes_buffer, rehash), + DataType::BinaryView => hash_array(&as_binary_view_array(array)?, random_state, hashes_buffer, rehash), + DataType::LargeBinary => hash_array(&as_generic_binary_array::(array)?, random_state, hashes_buffer, rehash), DataType::FixedSizeBinary(_) => { let array: &FixedSizeBinaryArray = array.as_any().downcast_ref().unwrap(); - hash_array(array, random_state, hashes_buffer, rehash) + hash_array(&array, random_state, hashes_buffer, rehash) } DataType::Decimal128(_, _) => { let array = as_primitive_array::(array)?; diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 0cb325e0b02bd..268c2164112b1 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -721,19 +721,19 @@ impl std::hash::Hash for ScalarValue { v.hash(state) } List(arr) => { - hash_nested_array(arr.to_owned() as ArrayRef, state); + hash_nested_array(&(arr.to_owned() as ArrayRef), state); } LargeList(arr) => { - hash_nested_array(arr.to_owned() as ArrayRef, state); + hash_nested_array(&(arr.to_owned() as ArrayRef), state); } FixedSizeList(arr) => { - hash_nested_array(arr.to_owned() as ArrayRef, state); + hash_nested_array(&(arr.to_owned() as ArrayRef), state); } Struct(arr) => { - hash_nested_array(arr.to_owned() as ArrayRef, state); + hash_nested_array(&(arr.to_owned() as ArrayRef), state); } Map(arr) => { - hash_nested_array(arr.to_owned() as ArrayRef, state); + hash_nested_array(&(arr.to_owned() as ArrayRef), state); } Date32(v) => v.hash(state), Date64(v) => v.hash(state), @@ -767,7 +767,7 @@ impl std::hash::Hash for ScalarValue { } } -fn hash_nested_array(arr: ArrayRef, state: &mut H) { +fn hash_nested_array(arr: &ArrayRef, state: &mut H) { let arrays = vec![arr.to_owned()]; let hashes_buffer = &mut vec![0; arr.len()]; let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0); @@ -3234,7 +3234,7 @@ impl From> for ScalarValue { value .into_iter() .fold(ScalarStructBuilder::new(), |builder, (name, value)| { - builder.with_name_and_scalar(name, value) + builder.with_name_and_scalar(name, &value) }) .build() .unwrap() @@ -3542,9 +3542,11 @@ impl fmt::Display for ScalarValue { } None => write!(f, "NULL")?, }, - ScalarValue::List(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?, - ScalarValue::LargeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?, - ScalarValue::FixedSizeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?, + ScalarValue::List(arr) => fmt_list(&(arr.to_owned() as ArrayRef), f)?, + ScalarValue::LargeList(arr) => fmt_list(&(arr.to_owned() as ArrayRef), f)?, + ScalarValue::FixedSizeList(arr) => { + fmt_list(&(arr.to_owned() as ArrayRef), f)? + } ScalarValue::Date32(e) => { format_option!(f, e.map(|v| Date32Type::to_naive_date(v).to_string()))? } @@ -3651,7 +3653,7 @@ impl fmt::Display for ScalarValue { } } -fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result { +fn fmt_list(arr: &ArrayRef, f: &mut fmt::Formatter) -> fmt::Result { // ScalarValue List, LargeList, FixedSizeList should always have a single element assert_eq!(arr.len(), 1); let options = FormatOptions::default().with_display_error(true); @@ -4433,6 +4435,7 @@ mod tests { } // Verifies that ScalarValue has the same behavior with compute kernal when it overflows. + #[allow(clippy::needless_pass_by_value)] // OK in tests fn check_scalar_add_overflow(left: ScalarValue, right: ScalarValue) where T: ArrowNumericType, @@ -5967,6 +5970,7 @@ mod tests { } // mimics how casting work on scalar values by `casting` `scalar` to `desired_type` + #[allow(clippy::needless_pass_by_value)] // OK in tests fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) { // convert from scalar --> Array to call cast let scalar_array = scalar.to_array().expect("Failed to convert to array"); @@ -6571,8 +6575,8 @@ mod tests { let field_b = Field::new("b", DataType::Utf8, true); let s = ScalarStructBuilder::new() - .with_scalar(field_a, ScalarValue::from(1i32)) - .with_scalar(field_b, ScalarValue::Utf8(None)) + .with_scalar(field_a, &ScalarValue::from(1i32)) + .with_scalar(field_b, &ScalarValue::Utf8(None)) .build() .unwrap(); diff --git a/datafusion/common/src/scalar/struct_builder.rs b/datafusion/common/src/scalar/struct_builder.rs index 4a6a8f0289a7d..9c9f3b074316c 100644 --- a/datafusion/common/src/scalar/struct_builder.rs +++ b/datafusion/common/src/scalar/struct_builder.rs @@ -87,7 +87,7 @@ impl ScalarStructBuilder { } /// Add the specified field and `ScalarValue` to the struct. - pub fn with_scalar(self, field: impl IntoFieldRef, value: ScalarValue) -> Self { + pub fn with_scalar(self, field: impl IntoFieldRef, value: &ScalarValue) -> Self { // valid scalar value should not fail let array = value.to_array().unwrap(); self.with_array(field, array) @@ -95,7 +95,7 @@ impl ScalarStructBuilder { /// Add a field with the specified name and value to the struct. /// the field is created with the specified data type and as non nullable - pub fn with_name_and_scalar(self, name: &str, value: ScalarValue) -> Self { + pub fn with_name_and_scalar(self, name: &str, value: &ScalarValue) -> Self { let field = Field::new(name, value.data_type(), false); self.with_scalar(field, value) } diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs index d8e62b3045f93..fb1dd8900e16f 100644 --- a/datafusion/common/src/stats.rs +++ b/datafusion/common/src/stats.rs @@ -263,7 +263,7 @@ impl Statistics { /// parameter to compute global statistics in a multi-partition setting. pub fn with_fetch( mut self, - schema: SchemaRef, + schema: &SchemaRef, fetch: Option, skip: usize, n_partitions: usize, @@ -317,7 +317,7 @@ impl Statistics { .. } => check_num_rows(fetch.and_then(|v| v.checked_mul(n_partitions)), false), }; - self.column_statistics = Statistics::unknown_column(&schema); + self.column_statistics = Statistics::unknown_column(schema); self.total_byte_size = Precision::Absent; Ok(self) } diff --git a/datafusion/core/benches/aggregate_query_sql.rs b/datafusion/core/benches/aggregate_query_sql.rs index 1d8d87ada7847..d612035be85c9 100644 --- a/datafusion/core/benches/aggregate_query_sql.rs +++ b/datafusion/core/benches/aggregate_query_sql.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions + #[macro_use] extern crate criterion; extern crate arrow; diff --git a/datafusion/core/benches/data_utils/mod.rs b/datafusion/core/benches/data_utils/mod.rs index 9d2864919225a..c582873073a10 100644 --- a/datafusion/core/benches/data_utils/mod.rs +++ b/datafusion/core/benches/data_utils/mod.rs @@ -17,6 +17,8 @@ //! This module provides the in-memory table for more realistic benchmarking. +#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions + use arrow::{ array::Float32Array, array::Float64Array, diff --git a/datafusion/core/benches/distinct_query_sql.rs b/datafusion/core/benches/distinct_query_sql.rs index c242798a56f00..a63fff7794720 100644 --- a/datafusion/core/benches/distinct_query_sql.rs +++ b/datafusion/core/benches/distinct_query_sql.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions + #[macro_use] extern crate criterion; extern crate arrow; diff --git a/datafusion/core/benches/math_query_sql.rs b/datafusion/core/benches/math_query_sql.rs index 92c59d5066401..36ee9621bd207 100644 --- a/datafusion/core/benches/math_query_sql.rs +++ b/datafusion/core/benches/math_query_sql.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions + #[macro_use] extern crate criterion; use criterion::Criterion; diff --git a/datafusion/core/benches/physical_plan.rs b/datafusion/core/benches/physical_plan.rs index 3ad71be1f4478..bd0bff383815f 100644 --- a/datafusion/core/benches/physical_plan.rs +++ b/datafusion/core/benches/physical_plan.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions + #[macro_use] extern crate criterion; use criterion::{BatchSize, Criterion}; diff --git a/datafusion/core/benches/sort_limit_query_sql.rs b/datafusion/core/benches/sort_limit_query_sql.rs index cfd4b8bc4bba8..e2ac061cbeb4c 100644 --- a/datafusion/core/benches/sort_limit_query_sql.rs +++ b/datafusion/core/benches/sort_limit_query_sql.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions + #[macro_use] extern crate criterion; use criterion::Criterion; diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs index 00f6d5916751b..3ea23e25ba586 100644 --- a/datafusion/core/benches/sql_planner.rs +++ b/datafusion/core/benches/sql_planner.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions + #[macro_use] extern crate criterion; extern crate arrow; diff --git a/datafusion/core/benches/topk_aggregate.rs b/datafusion/core/benches/topk_aggregate.rs index 922cbd2b42292..46614456451f2 100644 --- a/datafusion/core/benches/topk_aggregate.rs +++ b/datafusion/core/benches/topk_aggregate.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions + mod data_utils; use arrow::util::pretty::pretty_format_batches; use criterion::{criterion_group, criterion_main, Criterion}; diff --git a/datafusion/core/benches/window_query_sql.rs b/datafusion/core/benches/window_query_sql.rs index 42a1e51be361a..b1b64503db813 100644 --- a/datafusion/core/benches/window_query_sql.rs +++ b/datafusion/core/benches/window_query_sql.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions + #[macro_use] extern crate criterion; extern crate arrow; diff --git a/datafusion/core/src/catalog_common/information_schema.rs b/datafusion/core/src/catalog_common/information_schema.rs index d086ce900cc37..ca21b16bdeb47 100644 --- a/datafusion/core/src/catalog_common/information_schema.rs +++ b/datafusion/core/src/catalog_common/information_schema.rs @@ -150,7 +150,7 @@ impl InformationSchemaConfig { &catalog_name, &schema_name, &table_name, - table.get_table_definition(), + &table.get_table_definition(), ) } } @@ -401,7 +401,7 @@ impl InformationSchemaViewBuilder { catalog_name: impl AsRef, schema_name: impl AsRef, table_name: impl AsRef, - definition: Option>, + definition: &Option>, ) { // Note: append_value is actually infallible. self.catalog_names.append_value(catalog_name.as_ref()); diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 2138bd1294b4b..03b5fce330af5 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -1120,7 +1120,7 @@ impl DataFrame { ) -> Result> { let task_ctx = Arc::new(self.task_ctx()); let plan = self.create_physical_plan().await?; - execute_stream_partitioned(plan, task_ctx) + execute_stream_partitioned(&plan, &task_ctx) } /// Returns the `DFSchema` describing the output of this DataFrame. diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index 76e8ad9da559c..12952a578d524 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -340,7 +340,7 @@ impl FileFormat for ParquetFormat { ) -> Result { let stats = fetch_statistics( store.as_ref(), - table_schema, + &table_schema, object, self.metadata_size_hint(), ) @@ -479,7 +479,7 @@ async fn fetch_schema( /// See [`statistics_from_parquet_meta`] for more details async fn fetch_statistics( store: &dyn ObjectStore, - table_schema: SchemaRef, + table_schema: &SchemaRef, file: &ObjectMeta, metadata_size_hint: Option, ) -> Result { @@ -493,11 +493,11 @@ async fn fetch_statistics( /// using the row group statistics in the parquet metadata. pub fn statistics_from_parquet_meta_calc( metadata: &ParquetMetaData, - table_schema: SchemaRef, + table_schema: &SchemaRef, ) -> Result { let row_groups_metadata = metadata.row_groups(); - let mut statistics = Statistics::new_unknown(&table_schema); + let mut statistics = Statistics::new_unknown(table_schema); let mut has_statistics = false; let mut num_rows = 0_usize; let mut total_byte_size = 0_usize; @@ -521,7 +521,7 @@ pub fn statistics_from_parquet_meta_calc( )?; statistics.column_statistics = if has_statistics { - let (mut max_accs, mut min_accs) = create_max_min_accs(&table_schema); + let (mut max_accs, mut min_accs) = create_max_min_accs(table_schema); let mut null_counts_array = vec![Precision::Exact(0); table_schema.fields().len()]; @@ -555,13 +555,13 @@ pub fn statistics_from_parquet_meta_calc( }); get_col_stats( - &table_schema, - null_counts_array, + table_schema, + &null_counts_array, &mut max_accs, &mut min_accs, ) } else { - Statistics::unknown_column(&table_schema) + Statistics::unknown_column(table_schema) }; Ok(statistics) @@ -577,7 +577,7 @@ pub fn statistics_from_parquet_meta_calc( )] pub async fn statistics_from_parquet_meta( metadata: &ParquetMetaData, - table_schema: SchemaRef, + table_schema: &SchemaRef, ) -> Result { statistics_from_parquet_meta_calc(metadata, table_schema) } @@ -867,13 +867,13 @@ type ColSender = Sender; /// Returns join handles for each columns serialization task along with a send channel /// to send arrow arrays to each serialization task. fn spawn_column_parallel_row_group_writer( - schema: Arc, - parquet_props: Arc, + schema: &Arc, + parquet_props: &Arc, max_buffer_size: usize, pool: &Arc, ) -> Result<(Vec, Vec)> { - let schema_desc = arrow_to_parquet_schema(&schema)?; - let col_writers = get_column_writers(&schema_desc, &parquet_props, &schema)?; + let schema_desc = arrow_to_parquet_schema(schema)?; + let col_writers = get_column_writers(&schema_desc, parquet_props, schema)?; let num_columns = col_writers.len(); let mut col_writer_tasks = Vec::with_capacity(num_columns); @@ -967,6 +967,7 @@ fn spawn_rg_join_and_finalize_task( /// on the next row group in parallel. So, parquet serialization is parallelized /// across both columns and row_groups, with a theoretical max number of parallel tasks /// given by n_columns * num_row_groups. +#[allow(clippy::needless_pass_by_value)] // https://github.com/rust-lang/rust-clippy/issues/13321 fn spawn_parquet_parallel_serialization_task( mut data: Receiver, serialize_tx: Sender>, @@ -980,8 +981,8 @@ fn spawn_parquet_parallel_serialization_task( let max_row_group_rows = writer_props.max_row_group_size(); let (mut column_writer_handles, mut col_array_channels) = spawn_column_parallel_row_group_writer( - schema.clone(), - writer_props.clone(), + &schema, + &writer_props, max_buffer_rb, &pool, )?; @@ -1024,8 +1025,8 @@ fn spawn_parquet_parallel_serialization_task( (column_writer_handles, col_array_channels) = spawn_column_parallel_row_group_writer( - schema.clone(), - writer_props.clone(), + &schema, + &writer_props, max_buffer_rb, &pool, )?; @@ -1230,6 +1231,7 @@ pub(crate) mod test_util { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::super::test_util::scan_format; use crate::datasource::listing::{ListingTableUrl, PartitionedFile}; use crate::physical_plan::collect; @@ -1289,8 +1291,7 @@ mod tests { let format = ParquetFormat::default(); let schema = format.infer_schema(&ctx, &store, &meta).await.unwrap(); - let stats = - fetch_statistics(store.as_ref(), schema.clone(), &meta[0], None).await?; + let stats = fetch_statistics(store.as_ref(), &schema, &meta[0], None).await?; assert_eq!(stats.num_rows, Precision::Exact(3)); let c1_stats = &stats.column_statistics[0]; @@ -1298,7 +1299,7 @@ mod tests { assert_eq!(c1_stats.null_count, Precision::Exact(1)); assert_eq!(c2_stats.null_count, Precision::Exact(3)); - let stats = fetch_statistics(store.as_ref(), schema, &meta[1], None).await?; + let stats = fetch_statistics(store.as_ref(), &schema, &meta[1], None).await?; assert_eq!(stats.num_rows, Precision::Exact(3)); let c1_stats = &stats.column_statistics[0]; let c2_stats = &stats.column_statistics[1]; @@ -1478,8 +1479,7 @@ mod tests { .unwrap(); let stats = - fetch_statistics(store.upcast().as_ref(), schema.clone(), &meta[0], Some(9)) - .await?; + fetch_statistics(store.upcast().as_ref(), &schema, &meta[0], Some(9)).await?; assert_eq!(stats.num_rows, Precision::Exact(3)); let c1_stats = &stats.column_statistics[0]; @@ -1506,13 +1506,9 @@ mod tests { .infer_schema(&ctx, &store.upcast(), &meta) .await .unwrap(); - let stats = fetch_statistics( - store.upcast().as_ref(), - schema.clone(), - &meta[0], - Some(size_hint), - ) - .await?; + let stats = + fetch_statistics(store.upcast().as_ref(), &schema, &meta[0], Some(size_hint)) + .await?; assert_eq!(stats.num_rows, Precision::Exact(3)); let c1_stats = &stats.column_statistics[0]; @@ -1559,7 +1555,7 @@ mod tests { // Fetch statistics for first file let pq_meta = fetch_parquet_metadata(store.as_ref(), &files[0], None).await?; - let stats = statistics_from_parquet_meta_calc(&pq_meta, schema.clone())?; + let stats = statistics_from_parquet_meta_calc(&pq_meta, &schema)?; assert_eq!(stats.num_rows, Precision::Exact(4)); // column c_dic @@ -1606,7 +1602,7 @@ mod tests { // Fetch statistics for first file let pq_meta = fetch_parquet_metadata(store.as_ref(), &files[0], None).await?; - let stats = statistics_from_parquet_meta_calc(&pq_meta, schema.clone())?; + let stats = statistics_from_parquet_meta_calc(&pq_meta, &schema)?; // assert_eq!(stats.num_rows, Precision::Exact(3)); // column c1 @@ -1628,7 +1624,7 @@ mod tests { // Fetch statistics for second file let pq_meta = fetch_parquet_metadata(store.as_ref(), &files[1], None).await?; - let stats = statistics_from_parquet_meta_calc(&pq_meta, schema.clone())?; + let stats = statistics_from_parquet_meta_calc(&pq_meta, &schema)?; assert_eq!(stats.num_rows, Precision::Exact(3)); // column c1: missing from the file so the table treats all 3 rows as null let c1_stats = &stats.column_statistics[0]; diff --git a/datafusion/core/src/datasource/file_format/write/demux.rs b/datafusion/core/src/datasource/file_format/write/demux.rs index a58c77e313137..5c50b5d27fd30 100644 --- a/datafusion/core/src/datasource/file_format/write/demux.rs +++ b/datafusion/core/src/datasource/file_format/write/demux.rs @@ -258,7 +258,7 @@ async fn hive_style_partitions_demuxer( let all_partition_values = compute_partition_keys_by_row(&rb, &partition_by)?; // Next compute how the batch should be split up to take each distinct key to its own batch - let take_map = compute_take_arrays(&rb, all_partition_values); + let take_map = compute_take_arrays(&rb, &all_partition_values); // Divide up the batch into distinct partition key batches and send each batch for (part_key, mut builder) in take_map.into_iter() { @@ -377,7 +377,7 @@ fn compute_partition_keys_by_row<'a>( fn compute_take_arrays( rb: &RecordBatch, - all_partition_values: Vec>, + all_partition_values: &[Vec<&str>], ) -> HashMap, UInt64Builder> { let mut take_map = HashMap::new(); for i in 0..rb.num_rows() { diff --git a/datafusion/core/src/datasource/memory.rs b/datafusion/core/src/datasource/memory.rs index cef7f210e1182..cb20d3c36796f 100644 --- a/datafusion/core/src/datasource/memory.rs +++ b/datafusion/core/src/datasource/memory.rs @@ -365,7 +365,7 @@ impl DataSink for MemSink { #[cfg(test)] mod tests { - + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use crate::datasource::provider_as_source; use crate::physical_plan::collect; diff --git a/datafusion/core/src/datasource/physical_plan/file_groups.rs b/datafusion/core/src/datasource/physical_plan/file_groups.rs index 28f975ae193d5..baaaa8beab202 100644 --- a/datafusion/core/src/datasource/physical_plan/file_groups.rs +++ b/datafusion/core/src/datasource/physical_plan/file_groups.rs @@ -388,6 +388,7 @@ impl Ord for ToRepartition { #[cfg(test)] mod test { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; /// Empty file won't get partitioned diff --git a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs index 9f674185694da..00597d9327def 100644 --- a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs +++ b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs @@ -407,7 +407,7 @@ impl PartitionColumnProjector { // - `partition_values`: the list of partition values, one for each partition column pub fn project( &mut self, - file_batch: RecordBatch, + file_batch: &RecordBatch, partition_values: &[ScalarValue], ) -> Result { let expected_cols = @@ -610,6 +610,7 @@ fn create_output_array( #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use arrow_array::Int32Array; use super::*; @@ -768,7 +769,7 @@ mod tests { let projected_batch = proj .project( // file_batch is ok here because we kept all the file cols in the projection - file_batch, + &file_batch, &[ wrap_partition_value_in_dict(ScalarValue::from("2021")), wrap_partition_value_in_dict(ScalarValue::from("10")), @@ -796,7 +797,7 @@ mod tests { let projected_batch = proj .project( // file_batch is ok here because we kept all the file cols in the projection - file_batch, + &file_batch, &[ wrap_partition_value_in_dict(ScalarValue::from("2021")), wrap_partition_value_in_dict(ScalarValue::from("10")), @@ -826,7 +827,7 @@ mod tests { let projected_batch = proj .project( // file_batch is ok here because we kept all the file cols in the projection - file_batch, + &file_batch, &[ wrap_partition_value_in_dict(ScalarValue::from("2021")), wrap_partition_value_in_dict(ScalarValue::from("10")), @@ -854,7 +855,7 @@ mod tests { let projected_batch = proj .project( // file_batch is ok here because we kept all the file cols in the projection - file_batch, + &file_batch, &[ ScalarValue::from("2021"), ScalarValue::from("10"), diff --git a/datafusion/core/src/datasource/physical_plan/file_stream.rs b/datafusion/core/src/datasource/physical_plan/file_stream.rs index 6f354b31ae878..f22968f8d8cd1 100644 --- a/datafusion/core/src/datasource/physical_plan/file_stream.rs +++ b/datafusion/core/src/datasource/physical_plan/file_stream.rs @@ -394,7 +394,7 @@ impl FileStream { self.file_stream_metrics.time_scanning_total.stop(); let result = self .pc_projector - .project(batch, partition_values) + .project(&batch, partition_values) .map_err(|e| ArrowError::ExternalError(e.into())) .map(|batch| match &mut self.remain { Some(remain) => { diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs index b2f86db742f2d..ef90a708a17f6 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs @@ -399,7 +399,7 @@ impl ParquetExecBuilder { let page_pruning_predicate = predicate .as_ref() .map(|predicate_expr| { - PagePruningAccessPlanFilter::new(predicate_expr, file_schema.clone()) + PagePruningAccessPlanFilter::new(predicate_expr, file_schema) }) .map(Arc::new); diff --git a/datafusion/core/src/datasource/physical_plan/parquet/opener.rs b/datafusion/core/src/datasource/physical_plan/parquet/opener.rs index 4edc0ac525de6..04742056b58d9 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/opener.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/opener.rs @@ -133,7 +133,7 @@ impl FileOpener for ParquetOpener { builder.metadata(), reorder_predicates, &file_metrics, - Arc::clone(&schema_mapping), + &schema_mapping, ); match row_filter { diff --git a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs index 4e71993b51537..c8c44d2719179 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs @@ -113,7 +113,7 @@ pub struct PagePruningAccessPlanFilter { impl PagePruningAccessPlanFilter { /// Create a new [`PagePruningAccessPlanFilter`] from a physical /// expression. - pub fn new(expr: &Arc, schema: SchemaRef) -> Self { + pub fn new(expr: &Arc, schema: &SchemaRef) -> Self { // extract any single column predicates let predicates = split_conjunction(expr) .into_iter() diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs index 59d23fd68c313..33ad9fc8507ee 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs @@ -432,7 +432,7 @@ pub fn build_row_filter( metadata: &ParquetMetaData, reorder_predicates: bool, file_metrics: &ParquetFileMetrics, - schema_mapping: Arc, + schema_mapping: &Arc, ) -> Result> { let rows_filtered = &file_metrics.pushdown_rows_filtered; let time = &file_metrics.pushdown_eval_time; @@ -474,7 +474,7 @@ pub fn build_row_filter( metadata, rows_filtered.clone(), time.clone(), - Arc::clone(&schema_mapping), + Arc::clone(schema_mapping), ) .map(|pred| Box::new(pred) as _) }) diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs index ccd77d90be579..2bb946a659a38 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs @@ -408,6 +408,7 @@ impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use std::ops::Rem; use std::sync::Arc; diff --git a/datafusion/core/src/datasource/physical_plan/statistics.rs b/datafusion/core/src/datasource/physical_plan/statistics.rs index e1c61ec1a7129..e58defb3255f8 100644 --- a/datafusion/core/src/datasource/physical_plan/statistics.rs +++ b/datafusion/core/src/datasource/physical_plan/statistics.rs @@ -152,12 +152,12 @@ impl MinMaxStatistics { Self::new( &min_max_sort_order, &min_max_schema, - RecordBatch::try_new(Arc::clone(&min_max_schema), min_values).map_err( + &RecordBatch::try_new(Arc::clone(&min_max_schema), min_values).map_err( |e| { DataFusionError::ArrowError(e, Some("\ncreate min batch".to_string())) }, )?, - RecordBatch::try_new(Arc::clone(&min_max_schema), max_values).map_err( + &RecordBatch::try_new(Arc::clone(&min_max_schema), max_values).map_err( |e| { DataFusionError::ArrowError(e, Some("\ncreate max batch".to_string())) }, @@ -168,8 +168,8 @@ impl MinMaxStatistics { pub fn new( sort_order: &[PhysicalSortExpr], schema: &SchemaRef, - min_values: RecordBatch, - max_values: RecordBatch, + min_values: &RecordBatch, + max_values: &RecordBatch, ) -> Result { use arrow::row::*; diff --git a/datafusion/core/src/datasource/statistics.rs b/datafusion/core/src/datasource/statistics.rs index 201bbfd5c0076..a7314ec137a5b 100644 --- a/datafusion/core/src/datasource/statistics.rs +++ b/datafusion/core/src/datasource/statistics.rs @@ -186,7 +186,7 @@ fn add_row_stats( #[cfg(feature = "parquet")] pub(crate) fn get_col_stats( schema: &Schema, - null_counts: Vec>, + null_counts: &[Precision], max_values: &mut [Option], min_values: &mut [Option], ) -> Vec { diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 06dc797ae27af..e0c4f9917e3d6 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -872,18 +872,18 @@ impl SessionContext { } else if allow_missing { return self.return_empty_dataframe(); } else { - return self.schema_doesnt_exist_err(name); + return self.schema_doesnt_exist_err(&name); } }; let dereg = catalog.deregister_schema(name.schema_name(), cascade)?; match (dereg, allow_missing) { (None, true) => self.return_empty_dataframe(), - (None, false) => self.schema_doesnt_exist_err(name), + (None, false) => self.schema_doesnt_exist_err(&name), (Some(_), _) => self.return_empty_dataframe(), } } - fn schema_doesnt_exist_err(&self, schemaref: SchemaReference) -> Result { + fn schema_doesnt_exist_err(&self, schemaref: &SchemaReference) -> Result { exec_err!("Schema '{schemaref}' doesn't exist.") } diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index 095590fe03f6c..bc21e18788fa1 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -1391,6 +1391,7 @@ type PlanWithKeyRequirements = PlanContext>>; #[cfg(feature = "parquet")] #[cfg(test)] pub(crate) mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use std::ops::Deref; use super::*; diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/core/src/physical_optimizer/pruning.rs index 9bc2bb1d1db98..0021947d5e246 100644 --- a/datafusion/core/src/physical_optimizer/pruning.rs +++ b/datafusion/core/src/physical_optimizer/pruning.rs @@ -1567,6 +1567,7 @@ pub(crate) enum StatisticsType { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use std::collections::HashMap; use std::ops::{Not, Rem}; diff --git a/datafusion/core/src/physical_optimizer/sanity_checker.rs b/datafusion/core/src/physical_optimizer/sanity_checker.rs index e392105fbcb78..99a8fb829e84f 100644 --- a/datafusion/core/src/physical_optimizer/sanity_checker.rs +++ b/datafusion/core/src/physical_optimizer/sanity_checker.rs @@ -159,6 +159,7 @@ pub fn check_plan_sanity( #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use crate::physical_optimizer::test_utils::{ diff --git a/datafusion/core/src/physical_optimizer/test_utils.rs b/datafusion/core/src/physical_optimizer/test_utils.rs index 98f1a7c21a39b..02015bb17f448 100644 --- a/datafusion/core/src/physical_optimizer/test_utils.rs +++ b/datafusion/core/src/physical_optimizer/test_utils.rs @@ -17,6 +17,8 @@ //! Collection of testing utility functions that are leveraged by the query optimizer rules +#![allow(clippy::needless_pass_by_value)] // OK in tests + use std::any::Any; use std::fmt::Formatter; use std::sync::Arc; diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 82405dd98e30c..a0112b85ca6db 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -466,7 +466,8 @@ impl DefaultPhysicalPlanner { .collect::>>>() }) .collect::>>()?; - let value_exec = ValuesExec::try_new(SchemaRef::new(exec_schema), exprs)?; + let value_exec = + ValuesExec::try_new(SchemaRef::new(exec_schema), &exprs)?; Arc::new(value_exec) } LogicalPlan::EmptyRelation(EmptyRelation { @@ -486,7 +487,7 @@ impl DefaultPhysicalPlanner { output_schema, }) => { let output_schema: Schema = output_schema.as_ref().into(); - self.plan_describe(schema.clone(), Arc::new(output_schema))? + self.plan_describe(schema, Arc::new(output_schema))? } // 1 Child @@ -1875,7 +1876,7 @@ impl DefaultPhysicalPlanner { // return an record_batch which describes a table's schema. fn plan_describe( &self, - table_schema: Arc, + table_schema: &Arc, output_schema: Arc, ) -> Result> { let mut column_names = StringBuilder::new(); diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs index 39a126a06bb60..e00d5ae268c66 100644 --- a/datafusion/core/src/test/mod.rs +++ b/datafusion/core/src/test/mod.rs @@ -17,6 +17,8 @@ //! Common unit test utility methods +#![allow(clippy::needless_pass_by_value)] // OK in tests + use std::any::Any; use std::fs::File; use std::io::prelude::*; diff --git a/datafusion/core/tests/custom_sources_cases/mod.rs b/datafusion/core/tests/custom_sources_cases/mod.rs index c12dd4e1b0eee..dd8594de2af46 100644 --- a/datafusion/core/tests/custom_sources_cases/mod.rs +++ b/datafusion/core/tests/custom_sources_cases/mod.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in tests + use std::any::Any; use std::pin::Pin; use std::sync::Arc; diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs index cbd8926721529..176438ed650ef 100644 --- a/datafusion/core/tests/expr_api/mod.rs +++ b/datafusion/core/tests/expr_api/mod.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in tests + use arrow::util::pretty::{pretty_format_batches, pretty_format_columns}; use arrow_array::builder::{ListBuilder, StringBuilder}; use arrow_array::{ArrayRef, Int64Array, RecordBatch, StringArray, StructArray}; diff --git a/datafusion/core/tests/fuzz_cases/mod.rs b/datafusion/core/tests/fuzz_cases/mod.rs index 69241571b4af0..4ff1ecb205e47 100644 --- a/datafusion/core/tests/fuzz_cases/mod.rs +++ b/datafusion/core/tests/fuzz_cases/mod.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in tests + mod aggregate_fuzz; mod distinct_count_string_fuzz; mod join_fuzz; diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs index 60a8dd4007865..d7b9bc00dac16 100644 --- a/datafusion/core/tests/parquet/mod.rs +++ b/datafusion/core/tests/parquet/mod.rs @@ -16,6 +16,9 @@ // under the License. //! Parquet integration tests + +#![allow(clippy::needless_pass_by_value)] // OK in tests + use crate::parquet::utils::MetricsFinder; use arrow::array::Decimal128Array; use arrow::{ diff --git a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs index bbf4dcd2b799d..b55872a7d0f5d 100644 --- a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs +++ b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs @@ -17,6 +17,8 @@ //! Tests for the physical optimizer +#![allow(clippy::needless_pass_by_value)] // OK in tests + use datafusion_common::config::ConfigOptions; use datafusion_physical_optimizer::aggregate_statistics::AggregateStatistics; use datafusion_physical_optimizer::PhysicalOptimizerRule; diff --git a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs index 750544ecdec11..baa9ff3746db2 100644 --- a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs +++ b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in tests + use arrow_schema::{DataType, Field, Schema, SchemaRef, SortOptions}; use datafusion_common::config::ConfigOptions; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs index 042f6d622565c..9ac4eec60f7ee 100644 --- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs +++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs @@ -17,6 +17,8 @@ //! Tests for the limited distinct aggregation optimizer rule +#![allow(clippy::needless_pass_by_value)] // OK in tests + use super::test_util::{parquet_exec_with_sort, schema, trim_plan_display}; use std::sync::Arc; diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs index 1e0d3d9d514e8..590870134997c 100644 --- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs +++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs @@ -18,6 +18,8 @@ //! This module contains end to end demonstrations of creating //! user defined aggregate functions +#![allow(clippy::needless_pass_by_value)] // OK in tests + use std::hash::{DefaultHasher, Hash, Hasher}; use std::sync::{ atomic::{AtomicBool, Ordering}, diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs index 56edeab443c7c..2ac6c6b127481 100644 --- a/datafusion/core/tests/user_defined/user_defined_plan.rs +++ b/datafusion/core/tests/user_defined/user_defined_plan.rs @@ -437,7 +437,7 @@ impl UserDefinedLogicalNodeCore for TopKPlanNode { Ok(Self { k: self.k, input: inputs.swap_remove(0), - expr: replace_sort_expression(self.expr.clone(), exprs.swap_remove(0)), + expr: replace_sort_expression(&self.expr, exprs.swap_remove(0)), }) } } diff --git a/datafusion/execution/src/disk_manager.rs b/datafusion/execution/src/disk_manager.rs index c98d7e5579f0f..c40ccc3efa952 100644 --- a/datafusion/execution/src/disk_manager.rs +++ b/datafusion/execution/src/disk_manager.rs @@ -87,7 +87,7 @@ impl DiskManager { local_dirs: Mutex::new(Some(vec![])), })), DiskManagerConfig::NewSpecified(conf_dirs) => { - let local_dirs = create_local_dirs(conf_dirs)?; + let local_dirs = create_local_dirs(&conf_dirs)?; debug!( "Created local dirs {:?} as DataFusion working directory", local_dirs @@ -169,7 +169,7 @@ impl RefCountedTempFile { } /// Setup local dirs by creating one new dir in each of the given dirs -fn create_local_dirs(local_dirs: Vec) -> Result>> { +fn create_local_dirs(local_dirs: &[PathBuf]) -> Result>> { local_dirs .iter() .map(|root| { diff --git a/datafusion/execution/src/memory_pool/pool.rs b/datafusion/execution/src/memory_pool/pool.rs index e169c1f319cca..1d9c8282d7d54 100644 --- a/datafusion/execution/src/memory_pool/pool.rs +++ b/datafusion/execution/src/memory_pool/pool.rs @@ -366,8 +366,8 @@ impl MemoryPool for TrackConsumersPool { // wrap OOM message in top consumers DataFusionError::ResourcesExhausted( provide_top_memory_consumers_to_error_msg( - e, - self.report_top(self.top.into()), + &e, + &self.report_top(self.top.into()), ), ) } @@ -389,8 +389,8 @@ impl MemoryPool for TrackConsumersPool { } fn provide_top_memory_consumers_to_error_msg( - error_msg: String, - top_consumers: String, + error_msg: &str, + top_consumers: &str, ) -> String { format!("Additional allocation failed with top memory consumers (across reservations) as: {}. Error: {}", top_consumers, error_msg) } @@ -583,6 +583,7 @@ mod tests { #[test] fn test_tracked_consumers_pool_deregister() { + #[allow(clippy::needless_pass_by_value)] // OK in tests fn test_per_pool_type(pool: Arc) { // Baseline: see the 2 memory consumers let mut r0 = MemoryConsumer::new("r0").register(&pool); diff --git a/datafusion/expr-common/src/interval_arithmetic.rs b/datafusion/expr-common/src/interval_arithmetic.rs index 6424888c896a5..d704e1de8b1ff 100644 --- a/datafusion/expr-common/src/interval_arithmetic.rs +++ b/datafusion/expr-common/src/interval_arithmetic.rs @@ -733,11 +733,11 @@ impl Interval { ); }; - let zero = ScalarValue::new_zero(&dt)?; + let zero = &ScalarValue::new_zero(&dt)?; let result = match ( - self.contains_value(&zero)?, - rhs.contains_value(&zero)?, + self.contains_value(zero)?, + rhs.contains_value(zero)?, dt.is_unsigned_integer(), ) { (true, true, false) => mul_helper_multi_zero_inclusive(&dt, self, rhs), @@ -1320,10 +1320,10 @@ fn mul_helper_single_zero_inclusive( dt: &DataType, lhs: &Interval, rhs: &Interval, - zero: ScalarValue, + zero: &ScalarValue, ) -> Interval { // With the following interval bounds, there is no possibility to create an invalid interval. - if rhs.upper <= zero && !rhs.upper.is_null() { + if rhs.upper <= *zero && !rhs.upper.is_null() { // <-------=====0=====-------> // <--======----0------------> let lower = mul_bounds::(dt, &lhs.upper, &rhs.lower); @@ -1372,11 +1372,11 @@ fn mul_helper_zero_exclusive( dt: &DataType, lhs: &Interval, rhs: &Interval, - zero: ScalarValue, + zero: &ScalarValue, ) -> Interval { let (lower, upper) = match ( - lhs.upper <= zero && !lhs.upper.is_null(), - rhs.upper <= zero && !rhs.upper.is_null(), + lhs.upper <= *zero && !lhs.upper.is_null(), + rhs.upper <= *zero && !rhs.upper.is_null(), ) { // With the following interval bounds, there is no possibility to create an invalid interval. (true, true) => ( diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 3617f56905a99..ba5e0a902e839 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -562,7 +562,7 @@ fn string_temporal_coercion( match temporal { Date32 | Date64 => Some(temporal.clone()), Time32(_) | Time64(_) => { - if is_time_with_valid_unit(temporal.to_owned()) { + if is_time_with_valid_unit(temporal) { Some(temporal.to_owned()) } else { None @@ -1083,7 +1083,7 @@ pub fn regex_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option bool { +fn is_time_with_valid_unit(datatype: &DataType) -> bool { matches!( datatype, DataType::Time32(TimeUnit::Second) @@ -1662,6 +1662,7 @@ mod tests { Ok(()) } + #[allow(clippy::needless_pass_by_value)] // OK in tests fn test_math_decimal_coercion_rule( lhs_type: DataType, rhs_type: DataType, diff --git a/datafusion/expr/src/built_in_window_function.rs b/datafusion/expr/src/built_in_window_function.rs index 597e4e68a0c69..cb4d2c3491de2 100644 --- a/datafusion/expr/src/built_in_window_function.rs +++ b/datafusion/expr/src/built_in_window_function.rs @@ -120,7 +120,7 @@ impl BuiltInWindowFunction { "{}", utils::generate_signature_error_msg( &format!("{self}"), - self.signature(), + &self.signature(), input_expr_types, ) ) diff --git a/datafusion/expr/src/execution_props.rs b/datafusion/expr/src/execution_props.rs index 3401a94b2736e..857055a94e49d 100644 --- a/datafusion/expr/src/execution_props.rs +++ b/datafusion/expr/src/execution_props.rs @@ -93,11 +93,11 @@ impl ExecutionProps { /// Returns the provider for the `var_type`, if any pub fn get_var_provider( &self, - var_type: VarType, + var_type: &VarType, ) -> Option> { self.var_providers .as_ref() - .and_then(|var_providers| var_providers.get(&var_type).cloned()) + .and_then(|var_providers| var_providers.get(var_type).cloned()) } } diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs index f0f02f398642a..17792d88a61ef 100644 --- a/datafusion/expr/src/expr_rewriter/mod.rs +++ b/datafusion/expr/src/expr_rewriter/mod.rs @@ -349,6 +349,7 @@ impl SavedName { #[cfg(test)] mod test { + #![allow(clippy::needless_pass_by_value)] // OK in tests use std::ops::Add; use super::*; diff --git a/datafusion/expr/src/expr_rewriter/order_by.rs b/datafusion/expr/src/expr_rewriter/order_by.rs index 48d380cd59e24..bb524dbec8eda 100644 --- a/datafusion/expr/src/expr_rewriter/order_by.rs +++ b/datafusion/expr/src/expr_rewriter/order_by.rs @@ -50,7 +50,7 @@ fn rewrite_sort_col_by_aggs(expr: Expr, plan: &LogicalPlan) -> Result { // on top of them) if plan_inputs.len() == 1 { let proj_exprs = plan.expressions(); - rewrite_in_terms_of_projection(expr, proj_exprs, plan_inputs[0]) + rewrite_in_terms_of_projection(expr, &proj_exprs, plan_inputs[0]) } else { Ok(expr) } @@ -69,7 +69,7 @@ fn rewrite_sort_col_by_aggs(expr: Expr, plan: &LogicalPlan) -> Result { /// 2. t produces an output schema with two columns "a", "b + c" fn rewrite_in_terms_of_projection( expr: Expr, - proj_exprs: Vec, + proj_exprs: &[Expr], input: &LogicalPlan, ) -> Result { // assumption is that each item in exprs, such as "b + c" is diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 598d172d30a01..74c5f608b658e 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -156,7 +156,7 @@ impl ExprSchemable for Expr { err, utils::generate_signature_error_msg( func.name(), - func.signature().clone(), + func.signature(), &arg_data_types, ) ) @@ -184,7 +184,7 @@ impl ExprSchemable for Expr { err, utils::generate_signature_error_msg( fun.name(), - fun.signature(), + &fun.signature(), &data_types ) ) @@ -199,7 +199,7 @@ impl ExprSchemable for Expr { err, utils::generate_signature_error_msg( fun.name(), - fun.signature(), + &fun.signature(), &data_types ) ) @@ -221,7 +221,7 @@ impl ExprSchemable for Expr { err, utils::generate_signature_error_msg( func.name(), - func.signature().clone(), + &func.signature().clone(), &data_types ) ) diff --git a/datafusion/expr/src/literal.rs b/datafusion/expr/src/literal.rs index 90ba5a9a693c7..3691b4fd7d760 100644 --- a/datafusion/expr/src/literal.rs +++ b/datafusion/expr/src/literal.rs @@ -21,11 +21,15 @@ use crate::Expr; use datafusion_common::ScalarValue; /// Create a literal expression +// TODO address needless_pass_by_value & remove the suppression. Maybe change Literal::lit to move self? +#[allow(clippy::needless_pass_by_value)] pub fn lit(n: T) -> Expr { n.lit() } /// Create a literal timestamp expression +// TODO address needless_pass_by_value & remove the suppression. See lit() above. +#[allow(clippy::needless_pass_by_value)] pub fn lit_timestamp_nano(n: T) -> Expr { n.lit_timestamp_nano() } diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 18a624dd9cb2a..41c474e4bda25 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -2779,7 +2779,7 @@ impl Aggregate { input.schema().metadata().clone(), )?; - Self::try_new_with_schema(input, group_expr, aggr_expr, Arc::new(schema)) + Self::try_new_with_schema(input, group_expr, aggr_expr, schema) } /// Create a new aggregate operator using the provided schema to avoid the overhead of @@ -2791,7 +2791,7 @@ impl Aggregate { input: Arc, group_expr: Vec, aggr_expr: Vec, - schema: DFSchemaRef, + schema: DFSchema, ) -> Result { if group_expr.is_empty() && aggr_expr.is_empty() { return plan_err!( @@ -2809,10 +2809,8 @@ impl Aggregate { let aggregate_func_dependencies = calc_func_dependencies_for_aggregate(&group_expr, &input, &schema)?; - let new_schema = schema.as_ref().clone(); - let schema = Arc::new( - new_schema.with_functional_dependencies(aggregate_func_dependencies)?, - ); + let schema = + Arc::new(schema.with_functional_dependencies(aggregate_func_dependencies)?); Ok(Self { input, group_expr, diff --git a/datafusion/expr/src/tree_node.rs b/datafusion/expr/src/tree_node.rs index c7c498dd3f017..75ee71c12ccf8 100644 --- a/datafusion/expr/src/tree_node.rs +++ b/datafusion/expr/src/tree_node.rs @@ -407,13 +407,13 @@ pub fn replace_sort_expressions(sorts: Vec, new_expr: Vec) -> Vec Sort { +pub fn replace_sort_expression(sort: &Sort, new_expr: Expr) -> Sort { Sort { expr: new_expr, - ..sort + ..*sort } } diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 1d8eb9445edad..06255f952f60d 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -353,7 +353,7 @@ fn get_excluded_columns( /// Returns all `Expr`s in the schema, except the `Column`s in the `columns_to_skip` fn get_exprs_except_skipped( schema: &DFSchema, - columns_to_skip: HashSet, + columns_to_skip: &HashSet, ) -> Vec { if columns_to_skip.is_empty() { schema.iter().map(Expr::from).collect::>() @@ -412,7 +412,7 @@ pub fn expand_wildcard( }; // Add each excluded `Column` to columns_to_skip columns_to_skip.extend(excluded_columns); - Ok(get_exprs_except_skipped(schema, columns_to_skip)) + Ok(get_exprs_except_skipped(schema, &columns_to_skip)) } /// Resolves an `Expr::Wildcard` to a collection of qualified `Expr::Column`'s. @@ -454,7 +454,7 @@ pub fn expand_qualified_wildcard( columns_to_skip.extend(excluded_columns); Ok(get_exprs_except_skipped( &qualified_dfschema, - columns_to_skip, + &columns_to_skip, )) } @@ -793,8 +793,11 @@ pub fn exprlist_len( .into_iter() .collect::>(); Ok( - get_exprs_except_skipped(wildcard_schema.unwrap_or(schema), excluded) - .len(), + get_exprs_except_skipped( + wildcard_schema.unwrap_or(schema), + &excluded, + ) + .len(), ) } Expr::Wildcard { @@ -832,7 +835,7 @@ pub fn exprlist_len( .into_iter() .collect::>(); Ok( - get_exprs_except_skipped(related_wildcard_schema.as_ref(), excluded) + get_exprs_except_skipped(related_wildcard_schema.as_ref(), &excluded) .len(), ) } @@ -1040,7 +1043,7 @@ pub fn find_valid_equijoin_key_pair( /// ``` pub fn generate_signature_error_msg( func_name: &str, - func_signature: Signature, + func_signature: &Signature, input_expr_types: &[DataType], ) -> String { let candidate_signatures = func_signature diff --git a/datafusion/functions-aggregate-common/src/tdigest.rs b/datafusion/functions-aggregate-common/src/tdigest.rs index 620a68e83ecdc..674e853e794bb 100644 --- a/datafusion/functions-aggregate-common/src/tdigest.rs +++ b/datafusion/functions-aggregate-common/src/tdigest.rs @@ -171,13 +171,16 @@ impl TDigest { } pub fn new_with_centroid(max_size: usize, centroid: Centroid) -> Self { + let sum = centroid.mean * centroid.weight; + let max = centroid.mean; + let min = centroid.mean; TDigest { - centroids: vec![centroid.clone()], + centroids: vec![centroid], max_size, - sum: centroid.mean * centroid.weight, + sum, count: 1, - max: centroid.mean, - min: centroid.mean, + max, + min, } } diff --git a/datafusion/functions-aggregate/benches/count.rs b/datafusion/functions-aggregate/benches/count.rs index 65956cb8a1dea..bd104623b84f9 100644 --- a/datafusion/functions-aggregate/benches/count.rs +++ b/datafusion/functions-aggregate/benches/count.rs @@ -47,7 +47,7 @@ fn prepare_accumulator() -> Box { fn convert_to_state_bench( c: &mut Criterion, name: &str, - values: ArrayRef, + values: &ArrayRef, opt_filter: Option<&BooleanArray>, ) { let accumulator = prepare_accumulator(); @@ -64,20 +64,20 @@ fn convert_to_state_bench( fn count_benchmark(c: &mut Criterion) { let values = Arc::new(create_primitive_array::(8192, 0.0)) as ArrayRef; - convert_to_state_bench(c, "count convert state no nulls, no filter", values, None); + convert_to_state_bench(c, "count convert state no nulls, no filter", &values, None); let values = Arc::new(create_primitive_array::(8192, 0.3)) as ArrayRef; - convert_to_state_bench(c, "count convert state 30% nulls, no filter", values, None); + convert_to_state_bench(c, "count convert state 30% nulls, no filter", &values, None); let values = Arc::new(create_primitive_array::(8192, 0.3)) as ArrayRef; - convert_to_state_bench(c, "count convert state 70% nulls, no filter", values, None); + convert_to_state_bench(c, "count convert state 70% nulls, no filter", &values, None); let values = Arc::new(create_primitive_array::(8192, 0.0)) as ArrayRef; let filter = create_boolean_array(8192, 0.0, 0.5); convert_to_state_bench( c, "count convert state no nulls, filter", - values, + &values, Some(&filter), ); @@ -86,7 +86,7 @@ fn count_benchmark(c: &mut Criterion) { convert_to_state_bench( c, "count convert state nulls, filter", - values, + &values, Some(&filter), ); } diff --git a/datafusion/functions-aggregate/benches/sum.rs b/datafusion/functions-aggregate/benches/sum.rs index 652d447129dc1..4e61e52ea3884 100644 --- a/datafusion/functions-aggregate/benches/sum.rs +++ b/datafusion/functions-aggregate/benches/sum.rs @@ -45,7 +45,7 @@ fn prepare_accumulator(data_type: &DataType) -> Box { fn convert_to_state_bench( c: &mut Criterion, name: &str, - values: ArrayRef, + values: &ArrayRef, opt_filter: Option<&BooleanArray>, ) { let accumulator = prepare_accumulator(values.data_type()); @@ -62,13 +62,18 @@ fn convert_to_state_bench( fn count_benchmark(c: &mut Criterion) { let values = Arc::new(create_primitive_array::(8192, 0.0)) as ArrayRef; - convert_to_state_bench(c, "sum i64 convert state no nulls, no filter", values, None); + convert_to_state_bench( + c, + "sum i64 convert state no nulls, no filter", + &values, + None, + ); let values = Arc::new(create_primitive_array::(8192, 0.3)) as ArrayRef; convert_to_state_bench( c, "sum i64 convert state 30% nulls, no filter", - values, + &values, None, ); @@ -76,7 +81,7 @@ fn count_benchmark(c: &mut Criterion) { convert_to_state_bench( c, "sum i64 convert state 70% nulls, no filter", - values, + &values, None, ); @@ -85,7 +90,7 @@ fn count_benchmark(c: &mut Criterion) { convert_to_state_bench( c, "sum i64 convert state no nulls, filter", - values, + &values, Some(&filter), ); @@ -94,7 +99,7 @@ fn count_benchmark(c: &mut Criterion) { convert_to_state_bench( c, "sum i64 convert state nulls, filter", - values, + &values, Some(&filter), ); } diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs index 5578aebbf4031..725cceca6b378 100644 --- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs +++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs @@ -104,7 +104,7 @@ impl ApproxPercentileCont { pub(crate) fn create_accumulator( &self, - args: AccumulatorArgs, + args: &AccumulatorArgs, ) -> Result { let percentile = validate_input_percentile_expr(&args.exprs[1])?; let tdigest_max_size = if args.exprs.len() == 3 { @@ -254,7 +254,7 @@ impl AggregateUDFImpl for ApproxPercentileCont { #[inline] fn accumulator(&self, acc_args: AccumulatorArgs) -> Result> { - Ok(Box::new(self.create_accumulator(acc_args)?)) + Ok(Box::new(self.create_accumulator(&acc_args)?)) } fn return_type(&self, arg_types: &[DataType]) -> Result { diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs index fee67ba1623db..fc612e9c4a278 100644 --- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs +++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs @@ -138,7 +138,7 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight { ..acc_args }; let approx_percentile_cont_accumulator = - self.approx_percentile_cont.create_accumulator(sub_args)?; + self.approx_percentile_cont.create_accumulator(&sub_args)?; let accumulator = ApproxPercentileWithWeightAccumulator::new( approx_percentile_cont_accumulator, ); diff --git a/datafusion/functions-aggregate/src/stddev.rs b/datafusion/functions-aggregate/src/stddev.rs index a25ab5e319915..ee3b16238628b 100644 --- a/datafusion/functions-aggregate/src/stddev.rs +++ b/datafusion/functions-aggregate/src/stddev.rs @@ -367,7 +367,7 @@ mod tests { let agg1 = stddev_pop_udaf(); let agg2 = stddev_pop_udaf(); - let actual = merge(&batch1, &batch2, agg1, agg2, &schema)?; + let actual = merge(&batch1, &batch2, &agg1, &agg2, &schema)?; assert_eq!(actual, ScalarValue::from(std::f64::consts::SQRT_2)); Ok(()) @@ -386,7 +386,7 @@ mod tests { let agg1 = stddev_pop_udaf(); let agg2 = stddev_pop_udaf(); - let actual = merge(&batch1, &batch2, agg1, agg2, &schema)?; + let actual = merge(&batch1, &batch2, &agg1, &agg2, &schema)?; assert_eq!(actual, ScalarValue::from(std::f64::consts::SQRT_2)); Ok(()) @@ -395,8 +395,8 @@ mod tests { fn merge( batch1: &RecordBatch, batch2: &RecordBatch, - agg1: Arc, - agg2: Arc, + agg1: &Arc, + agg2: &Arc, schema: &Schema, ) -> Result { let args1 = AccumulatorArgs { diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 9b4357d0d14f5..f855e5a9c32e7 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -381,8 +381,8 @@ fn array_has_all_and_any_string_internal( let haystack_array = string_array_to_vec(&arr); let needle_array = string_array_to_vec(&sub_arr); boolean_builder.append_value(array_has_string_kernel( - haystack_array, - needle_array, + &haystack_array, + &needle_array, comparison_type, )); } @@ -396,8 +396,8 @@ fn array_has_all_and_any_string_internal( } fn array_has_string_kernel( - haystack: Vec>, - needle: Vec>, + haystack: &[Option<&str>], + needle: &[Option<&str>], comparison_type: ComparisonType, ) -> bool { match comparison_type { @@ -426,8 +426,8 @@ fn general_array_has_for_all_and_any( let arr_values = converter.convert_columns(&[arr])?; let sub_arr_values = converter.convert_columns(&[sub_arr])?; boolean_builder.append_value(general_array_has_all_and_any_kernel( - arr_values, - sub_arr_values, + &arr_values, + &sub_arr_values, comparison_type, )); } else { @@ -439,8 +439,8 @@ fn general_array_has_for_all_and_any( } fn general_array_has_all_and_any_kernel( - haystack_rows: Rows, - needle_rows: Rows, + haystack_rows: &Rows, + needle_rows: &Rows, comparison_type: ComparisonType, ) -> bool { match comparison_type { diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs index b04c35667226c..db2a6c94a1164 100644 --- a/datafusion/functions-nested/src/flatten.rs +++ b/datafusion/functions-nested/src/flatten.rs @@ -134,7 +134,7 @@ fn flatten_internal( List(_) | LargeList(_) => { let sub_list = as_generic_list_array::(&values)?; if let Some(indexes) = indexes { - let offsets = get_offsets_for_flatten(offsets, indexes); + let offsets = get_offsets_for_flatten(offsets, &indexes); flatten_internal::(sub_list.clone(), Some(offsets)) } else { flatten_internal::(sub_list.clone(), Some(offsets)) @@ -143,7 +143,7 @@ fn flatten_internal( // Reach the base level, create a new list array _ => { if let Some(indexes) = indexes { - let offsets = get_offsets_for_flatten(offsets, indexes); + let offsets = get_offsets_for_flatten(offsets, &indexes); let list_arr = GenericListArray::::new(field, offsets, values, None); Ok(list_arr) } else { @@ -156,7 +156,7 @@ fn flatten_internal( // Create new offsets that are equivalent to `flatten` the array. fn get_offsets_for_flatten( offsets: OffsetBuffer, - indexes: OffsetBuffer, + indexes: &OffsetBuffer, ) -> OffsetBuffer { let buffer = offsets.into_inner(); let offsets: Vec = indexes diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs index 29afe4a7f3bea..19a45709f5d1d 100644 --- a/datafusion/functions-nested/src/map.rs +++ b/datafusion/functions-nested/src/map.rs @@ -93,7 +93,7 @@ fn make_map_batch(args: &[ColumnarValue]) -> Result { } let values = get_first_array_ref(&args[1])?; - make_map_batch_internal(keys, values, can_evaluate_to_const, args[0].data_type()) + make_map_batch_internal(keys, values, can_evaluate_to_const, &args[0].data_type()) } fn check_unique_keys(array: &dyn Array) -> Result<()> { @@ -125,7 +125,7 @@ fn make_map_batch_internal( keys: ArrayRef, values: ArrayRef, can_evaluate_to_const: bool, - data_type: DataType, + data_type: &DataType, ) -> Result { if keys.len() != values.len() { return exec_err!("map requires key and value lists to have the same length"); @@ -133,9 +133,9 @@ fn make_map_batch_internal( if !can_evaluate_to_const { return if let DataType::LargeList(..) = data_type { - make_map_array_internal::(keys, values) + make_map_array_internal::(&keys, &values) } else { - make_map_array_internal::(keys, values) + make_map_array_internal::(&keys, &values) }; } @@ -145,9 +145,10 @@ fn make_map_batch_internal( let mut entry_offsets_buffer = VecDeque::new(); entry_offsets_buffer.push_back(0); - entry_struct_buffer.push_back((Arc::clone(&key_field), Arc::clone(&keys))); - entry_struct_buffer.push_back((Arc::clone(&value_field), Arc::clone(&values))); - entry_offsets_buffer.push_back(keys.len() as u32); + let keys_len = keys.len(); + entry_struct_buffer.push_back((key_field, keys)); + entry_struct_buffer.push_back((value_field, values)); + entry_offsets_buffer.push_back(keys_len as u32); let entry_struct: Vec<(Arc, ArrayRef)> = entry_struct_buffer.into(); let entry_struct = StructArray::from(entry_struct); @@ -308,14 +309,14 @@ fn get_element_type(data_type: &DataType) -> Result<&DataType> { /// ```text fn make_map_array_internal( - keys: ArrayRef, - values: ArrayRef, + keys: &ArrayRef, + values: &ArrayRef, ) -> Result { let mut offset_buffer = vec![O::zero()]; let mut running_offset = O::zero(); - let keys = list_to_arrays::(&keys); - let values = list_to_arrays::(&values); + let keys = list_to_arrays::(keys); + let values = list_to_arrays::(values); let mut key_array_vec = vec![]; let mut value_array_vec = vec![]; diff --git a/datafusion/functions-nested/src/planner.rs b/datafusion/functions-nested/src/planner.rs index 4cd8faa3ca98c..7a5a5aad395f3 100644 --- a/datafusion/functions-nested/src/planner.rs +++ b/datafusion/functions-nested/src/planner.rs @@ -143,7 +143,7 @@ impl ExprPlanner for FieldAccessPlanner { match field_access { // expr["field"] => get_field(expr, "field") GetFieldAccess::NamedStructField { name } => { - Ok(PlannerResult::Planned(get_field(expr, name))) + Ok(PlannerResult::Planned(get_field(expr, &name))) } // expr[idx] ==> array_element(expr, idx) GetFieldAccess::ListIndex { key: index } => { diff --git a/datafusion/functions-nested/src/position.rs b/datafusion/functions-nested/src/position.rs index a48332ceb0b30..09ac4e5ed64f0 100644 --- a/datafusion/functions-nested/src/position.rs +++ b/datafusion/functions-nested/src/position.rs @@ -130,13 +130,13 @@ fn general_position_dispatch(args: &[ArrayRef]) -> Result(list_array, element_array, arr_from) + generic_position::(list_array, element_array, &arr_from) } fn generic_position( list_array: &GenericListArray, element_array: &ArrayRef, - arr_from: Vec, // 0-indexed + arr_from: &[i64], // 0-indexed ) -> Result { let mut data = Vec::with_capacity(list_array.len()); diff --git a/datafusion/functions-nested/src/remove.rs b/datafusion/functions-nested/src/remove.rs index 0b7cfc283c06f..1a61ea4b0d2b8 100644 --- a/datafusion/functions-nested/src/remove.rs +++ b/datafusion/functions-nested/src/remove.rs @@ -185,7 +185,7 @@ pub fn array_remove_inner(args: &[ArrayRef]) -> Result { } let arr_n = vec![1; args[0].len()]; - array_remove_internal(&args[0], &args[1], arr_n) + array_remove_internal(&args[0], &args[1], &arr_n) } /// Array_remove_n SQL function @@ -195,7 +195,7 @@ pub fn array_remove_n_inner(args: &[ArrayRef]) -> Result { } let arr_n = as_int64_array(&args[2])?.values().to_vec(); - array_remove_internal(&args[0], &args[1], arr_n) + array_remove_internal(&args[0], &args[1], &arr_n) } /// Array_remove_all SQL function @@ -205,13 +205,13 @@ pub fn array_remove_all_inner(args: &[ArrayRef]) -> Result { } let arr_n = vec![i64::MAX; args[0].len()]; - array_remove_internal(&args[0], &args[1], arr_n) + array_remove_internal(&args[0], &args[1], &arr_n) } fn array_remove_internal( array: &ArrayRef, element_array: &ArrayRef, - arr_n: Vec, + arr_n: &[i64], ) -> Result { match array.data_type() { DataType::List(_) => { @@ -248,7 +248,7 @@ fn array_remove_internal( fn general_remove( list_array: &GenericListArray, element_array: &ArrayRef, - arr_n: Vec, + arr_n: &[i64], ) -> Result { let data_type = list_array.value_type(); let mut new_values = vec![]; diff --git a/datafusion/functions-nested/src/replace.rs b/datafusion/functions-nested/src/replace.rs index 46a2e078aa4cd..443d722c2ccd5 100644 --- a/datafusion/functions-nested/src/replace.rs +++ b/datafusion/functions-nested/src/replace.rs @@ -199,7 +199,7 @@ fn general_replace( list_array: &GenericListArray, from_array: &ArrayRef, to_array: &ArrayRef, - arr_n: Vec, + arr_n: &[i64], ) -> Result { // Build up the offsets for the final output array let mut offsets: Vec = vec![O::usize_as(0)]; @@ -300,11 +300,11 @@ pub(crate) fn array_replace_inner(args: &[ArrayRef]) -> Result { match array.data_type() { DataType::List(_) => { let list_array = array.as_list::(); - general_replace::(list_array, &args[1], &args[2], arr_n) + general_replace::(list_array, &args[1], &args[2], &arr_n) } DataType::LargeList(_) => { let list_array = array.as_list::(); - general_replace::(list_array, &args[1], &args[2], arr_n) + general_replace::(list_array, &args[1], &args[2], &arr_n) } array_type => exec_err!("array_replace does not support type '{array_type:?}'."), } @@ -321,11 +321,11 @@ pub(crate) fn array_replace_n_inner(args: &[ArrayRef]) -> Result { match array.data_type() { DataType::List(_) => { let list_array = array.as_list::(); - general_replace::(list_array, &args[1], &args[2], arr_n) + general_replace::(list_array, &args[1], &args[2], &arr_n) } DataType::LargeList(_) => { let list_array = array.as_list::(); - general_replace::(list_array, &args[1], &args[2], arr_n) + general_replace::(list_array, &args[1], &args[2], &arr_n) } array_type => { exec_err!("array_replace_n does not support type '{array_type:?}'.") @@ -344,11 +344,11 @@ pub(crate) fn array_replace_all_inner(args: &[ArrayRef]) -> Result { match array.data_type() { DataType::List(_) => { let list_array = array.as_list::(); - general_replace::(list_array, &args[1], &args[2], arr_n) + general_replace::(list_array, &args[1], &args[2], &arr_n) } DataType::LargeList(_) => { let list_array = array.as_list::(); - general_replace::(list_array, &args[1], &args[2], arr_n) + general_replace::(list_array, &args[1], &args[2], &arr_n) } array_type => { exec_err!("array_replace_all does not support type '{array_type:?}'.") diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index 1de9c264ddc2c..f05fc8afa6abb 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -249,7 +249,7 @@ fn generic_set_lists( l: &GenericListArray, r: &GenericListArray, field: Arc, - set_op: SetOp, + set_op: &SetOp, ) -> Result { if matches!(l.value_type(), Null) { let field = Arc::new(Field::new("item", r.value_type(), true)); @@ -263,7 +263,7 @@ fn generic_set_lists( // array_union(arr, []) -> arr; // array_intersect(arr, []) -> []; if r.value_length(0).is_zero() { - if set_op == SetOp::Union { + if set_op == &SetOp::Union { return Ok(Arc::new(l.clone()) as ArrayRef); } else { return Ok(Arc::new(r.clone()) as ArrayRef); @@ -287,7 +287,7 @@ fn generic_set_lists( let l_iter = l_values.iter().sorted().dedup(); let values_set: HashSet<_> = l_iter.clone().collect(); - let mut rows = if set_op == SetOp::Union { + let mut rows = if set_op == &SetOp::Union { l_iter.collect::>() } else { vec![] @@ -333,11 +333,11 @@ fn generic_set_lists( fn general_set_op( array1: &ArrayRef, array2: &ArrayRef, - set_op: SetOp, + set_op: &SetOp, ) -> Result { match (array1.data_type(), array2.data_type()) { (Null, List(field)) => { - if set_op == SetOp::Intersect { + if set_op == &SetOp::Intersect { return Ok(new_empty_array(&Null)); } let array = as_list_array(&array2)?; @@ -345,21 +345,21 @@ fn general_set_op( } (List(field), Null) => { - if set_op == SetOp::Intersect { + if set_op == &SetOp::Intersect { return make_array_inner(&[]); } let array = as_list_array(&array1)?; general_array_distinct::(array, field) } (Null, LargeList(field)) => { - if set_op == SetOp::Intersect { + if set_op == &SetOp::Intersect { return Ok(new_empty_array(&Null)); } let array = as_large_list_array(&array2)?; general_array_distinct::(array, field) } (LargeList(field), Null) => { - if set_op == SetOp::Intersect { + if set_op == &SetOp::Intersect { return make_array_inner(&[]); } let array = as_large_list_array(&array1)?; @@ -393,7 +393,7 @@ fn array_union_inner(args: &[ArrayRef]) -> Result { let array1 = &args[0]; let array2 = &args[1]; - general_set_op(array1, array2, SetOp::Union) + general_set_op(array1, array2, &SetOp::Union) } /// array_intersect SQL function @@ -405,7 +405,7 @@ fn array_intersect_inner(args: &[ArrayRef]) -> Result { let array1 = &args[0]; let array2 = &args[1]; - general_set_op(array1, array2, SetOp::Intersect) + general_set_op(array1, array2, &SetOp::Intersect) } fn general_array_distinct( diff --git a/datafusion/functions-nested/src/string.rs b/datafusion/functions-nested/src/string.rs index 2dc0a55e69519..f720fc7959eb2 100644 --- a/datafusion/functions-nested/src/string.rs +++ b/datafusion/functions-nested/src/string.rs @@ -241,31 +241,31 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { let delimiters = as_string_array(&args[1])?; let delimiters: Vec> = delimiters.iter().collect(); - let mut null_string = String::from(""); + let mut null_string = ""; let mut with_null_string = false; if args.len() == 3 { - null_string = as_string_array(&args[2])?.value(0).to_string(); + null_string = as_string_array(&args[2])?.value(0); with_null_string = true; } /// Creates a single string from single element of a ListArray (which is /// itself another Array) - fn compute_array_to_string( - arg: &mut String, - arr: ArrayRef, - delimiter: String, - null_string: String, + fn compute_array_to_string<'a>( + arg: &'a mut String, + arr: &ArrayRef, + delimiter: &str, + null_string: &str, with_null_string: bool, - ) -> Result<&mut String> { + ) -> Result<&'a mut String> { match arr.data_type() { List(..) => { - let list_array = as_list_array(&arr)?; + let list_array = as_list_array(arr)?; for i in 0..list_array.len() { compute_array_to_string( arg, - list_array.value(i), - delimiter.clone(), - null_string.clone(), + &list_array.value(i), + delimiter, + null_string, with_null_string, )?; } @@ -273,13 +273,13 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { Ok(arg) } LargeList(..) => { - let list_array = as_large_list_array(&arr)?; + let list_array = as_large_list_array(arr)?; for i in 0..list_array.len() { compute_array_to_string( arg, - list_array.value(i), - delimiter.clone(), - null_string.clone(), + &list_array.value(i), + delimiter, + null_string, with_null_string, )?; } @@ -289,14 +289,14 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { Dictionary(_key_type, value_type) => { // Call cast to unwrap the dictionary. This could be optimized if we wanted // to accept the overhead of extra code - let values = cast(&arr, value_type.as_ref()).map_err(|e| { + let values = cast(arr, value_type.as_ref()).map_err(|e| { DataFusionError::from(e).context( "Casting dictionary to values in compute_array_to_string", ) })?; compute_array_to_string( arg, - values, + &values, delimiter, null_string, with_null_string, @@ -323,8 +323,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { fn generate_string_array( list_arr: &GenericListArray, - delimiters: Vec>, - null_string: String, + delimiters: &[Option<&str>], + null_string: &str, with_null_string: bool, ) -> Result { let mut res: Vec> = Vec::new(); @@ -333,9 +333,9 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { let mut arg = String::from(""); let s = compute_array_to_string( &mut arg, - arr, - delimiter.to_string(), - null_string.clone(), + &arr, + delimiter, + null_string, with_null_string, )? .clone(); @@ -359,7 +359,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { let list_array = as_list_array(&arr)?; generate_string_array::( list_array, - delimiters, + &delimiters, null_string, with_null_string, )? @@ -368,7 +368,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { let list_array = as_large_list_array(&arr)?; generate_string_array::( list_array, - delimiters, + &delimiters, null_string, with_null_string, )? @@ -381,8 +381,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { let delimiter = delimiters[0].unwrap(); let s = compute_array_to_string( &mut arg, - Arc::clone(arr), - delimiter.to_string(), + arr, + delimiter, null_string, with_null_string, )? diff --git a/datafusion/functions/src/core/expr_ext.rs b/datafusion/functions/src/core/expr_ext.rs index af05f447f1c1e..82c6bcfc47203 100644 --- a/datafusion/functions/src/core/expr_ext.rs +++ b/datafusion/functions/src/core/expr_ext.rs @@ -49,7 +49,7 @@ pub trait FieldAccessor { impl FieldAccessor for Expr { fn field(self, name: impl Literal) -> Expr { - get_field(self, name) + get_field(self, &name) } } @@ -62,7 +62,7 @@ mod tests { #[test] fn test_field() { let expr1 = col("a").field("b"); - let expr2 = get_field(col("a"), "b"); + let expr2 = get_field(col("a"), &"b"); assert_eq!(expr1, expr2); } } diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs index af340930eabce..09b17f89887d5 100644 --- a/datafusion/functions/src/core/mod.rs +++ b/datafusion/functions/src/core/mod.rs @@ -81,7 +81,7 @@ pub mod expr_fn { )); #[doc = "Returns the value of the field with the given name from the struct"] - pub fn get_field(arg1: Expr, arg2: impl Literal) -> Expr { + pub fn get_field(arg1: Expr, arg2: &impl Literal) -> Expr { super::get_field().call(vec![arg1, arg2.lit()]) } } diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index 308ea668d3d7b..2540c5b74b52e 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -150,14 +150,14 @@ impl ScalarUDFImpl for DateTruncFunc { fn process_array( array: &dyn Array, - granularity: String, + granularity: &str, tz_opt: &Option>, ) -> Result { let parsed_tz = parse_tz(tz_opt)?; let array = as_primitive_array::(array)?; let array = array .iter() - .map(|x| general_date_trunc(T::UNIT, &x, parsed_tz, granularity.as_str())) + .map(|x| general_date_trunc(T::UNIT, &x, parsed_tz, granularity)) .collect::>>()? .with_timezone_opt(tz_opt.clone()); Ok(ColumnarValue::Array(Arc::new(array))) @@ -165,52 +165,52 @@ impl ScalarUDFImpl for DateTruncFunc { fn process_scalar( v: &Option, - granularity: String, + granularity: &str, tz_opt: &Option>, ) -> Result { let parsed_tz = parse_tz(tz_opt)?; - let value = general_date_trunc(T::UNIT, v, parsed_tz, granularity.as_str())?; + let value = general_date_trunc(T::UNIT, v, parsed_tz, granularity)?; let value = ScalarValue::new_timestamp::(value, tz_opt.clone()); Ok(ColumnarValue::Scalar(value)) } Ok(match array { ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => { - process_scalar::(v, granularity, tz_opt)? + process_scalar::(v, &granularity, tz_opt)? } ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => { - process_scalar::(v, granularity, tz_opt)? + process_scalar::(v, &granularity, tz_opt)? } ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => { - process_scalar::(v, granularity, tz_opt)? + process_scalar::(v, &granularity, tz_opt)? } ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => { - process_scalar::(v, granularity, tz_opt)? + process_scalar::(v, &granularity, tz_opt)? } ColumnarValue::Array(array) => { let array_type = array.data_type(); match array_type { Timestamp(Second, tz_opt) => { - process_array::(array, granularity, tz_opt)? + process_array::(array, &granularity, tz_opt)? } Timestamp(Millisecond, tz_opt) => process_array::< TimestampMillisecondType, >( - array, granularity, tz_opt + array, &granularity, tz_opt )?, Timestamp(Microsecond, tz_opt) => process_array::< TimestampMicrosecondType, >( - array, granularity, tz_opt + array, &granularity, tz_opt )?, Timestamp(Nanosecond, tz_opt) => process_array::< TimestampNanosecondType, >( - array, granularity, tz_opt + array, &granularity, tz_opt )?, _ => process_array::( array, - granularity, + &granularity, &None, )?, } diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index 634e28e6f3930..6db6488e217ab 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -352,6 +352,7 @@ impl ScalarUDFImpl for ToLocalTimeFunc { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use std::sync::Arc; use arrow::array::{types::TimestampNanosecondType, TimestampNanosecondArray}; diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs index b221fb900cfa3..2b892689f2ce8 100644 --- a/datafusion/functions/src/math/mod.rs +++ b/datafusion/functions/src/math/mod.rs @@ -322,6 +322,7 @@ pub fn functions() -> Vec> { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use arrow::datatypes::DataType; use datafusion_common::ScalarValue; use datafusion_expr::interval_arithmetic::Interval; diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index 0b0f7287e1ec9..e06323d7366b8 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -289,7 +289,7 @@ pub fn regexp_replace(args: &[ArrayRef]) -> Result } fn _regexp_replace_early_abort( - input_array: T, + input_array: &T, sz: usize, ) -> Result { // Mimicking the existing behavior of regexp_replace, if any of the scalar arguments @@ -307,7 +307,7 @@ macro_rules! fetch_string_arg { ($ARG:expr, $NAME:expr, $T:ident, $EARLY_ABORT:ident, $ARRAY_SIZE:expr) => {{ let array = as_generic_string_array::<$T>($ARG)?; if array.len() == 0 || array.is_null(0) { - return $EARLY_ABORT(array, $ARRAY_SIZE); + return $EARLY_ABORT(&array, $ARRAY_SIZE); } else { array.value(0) } diff --git a/datafusion/functions/src/string/btrim.rs b/datafusion/functions/src/string/btrim.rs index 371a11c82c543..fe88adc5ee765 100644 --- a/datafusion/functions/src/string/btrim.rs +++ b/datafusion/functions/src/string/btrim.rs @@ -32,7 +32,7 @@ use crate::utils::{make_scalar_function, utf8_to_str_type}; /// btrim('xyxtrimyyx', 'xyz') = 'trim' fn btrim(args: &[ArrayRef]) -> Result { let use_string_view = args[0].data_type() == &DataType::Utf8View; - general_trim::(args, TrimType::Both, use_string_view) + general_trim::(args, &TrimType::Both, use_string_view) } #[derive(Debug)] diff --git a/datafusion/functions/src/string/common.rs b/datafusion/functions/src/string/common.rs index 6ebcc4ee6cd3f..3ba6b30e306cd 100644 --- a/datafusion/functions/src/string/common.rs +++ b/datafusion/functions/src/string/common.rs @@ -50,7 +50,7 @@ impl Display for TrimType { pub(crate) fn general_trim( args: &[ArrayRef], - trim_type: TrimType, + trim_type: &TrimType, use_string_view: bool, ) -> Result { let func = match trim_type { diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs index ca324e69c0d23..59a4df57300eb 100644 --- a/datafusion/functions/src/string/lower.rs +++ b/datafusion/functions/src/string/lower.rs @@ -74,6 +74,7 @@ impl ScalarUDFImpl for LowerFunc { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use arrow::array::{ArrayRef, StringArray}; use std::sync::Arc; diff --git a/datafusion/functions/src/string/ltrim.rs b/datafusion/functions/src/string/ltrim.rs index b7b27afcee1fa..c5057a35dbfa8 100644 --- a/datafusion/functions/src/string/ltrim.rs +++ b/datafusion/functions/src/string/ltrim.rs @@ -33,7 +33,7 @@ use crate::utils::{make_scalar_function, utf8_to_str_type}; /// ltrim('zzzytest', 'xyz') = 'test' fn ltrim(args: &[ArrayRef]) -> Result { let use_string_view = args[0].data_type() == &DataType::Utf8View; - general_trim::(args, TrimType::Left, use_string_view) + general_trim::(args, &TrimType::Left, use_string_view) } #[derive(Debug)] diff --git a/datafusion/functions/src/string/repeat.rs b/datafusion/functions/src/string/repeat.rs index 20e4462784b82..e13c66a81e0f9 100644 --- a/datafusion/functions/src/string/repeat.rs +++ b/datafusion/functions/src/string/repeat.rs @@ -92,15 +92,15 @@ fn repeat(args: &[ArrayRef]) -> Result { match args[0].data_type() { Utf8View => { let string_view_array = args[0].as_string_view(); - repeat_impl::(string_view_array, number_array) + repeat_impl::(&string_view_array, number_array) } Utf8 => { let string_array = args[0].as_string::(); - repeat_impl::>(string_array, number_array) + repeat_impl::>(&string_array, number_array) } LargeUtf8 => { let string_array = args[0].as_string::(); - repeat_impl::>(string_array, number_array) + repeat_impl::>(&string_array, number_array) } other => exec_err!( "Unsupported data type {other:?} for function repeat. \ @@ -109,7 +109,7 @@ fn repeat(args: &[ArrayRef]) -> Result { } } -fn repeat_impl<'a, T, S>(string_array: S, number_array: &Int64Array) -> Result +fn repeat_impl<'a, T, S>(string_array: &S, number_array: &Int64Array) -> Result where T: OffsetSizeTrait, S: StringArrayType<'a>, diff --git a/datafusion/functions/src/string/rtrim.rs b/datafusion/functions/src/string/rtrim.rs index ec53f3ed74307..6fa20bb16108f 100644 --- a/datafusion/functions/src/string/rtrim.rs +++ b/datafusion/functions/src/string/rtrim.rs @@ -33,7 +33,7 @@ use crate::utils::{make_scalar_function, utf8_to_str_type}; /// rtrim('testxxzx', 'xyz') = 'test' fn rtrim(args: &[ArrayRef]) -> Result { let use_string_view = args[0].data_type() == &DataType::Utf8View; - general_trim::(args, TrimType::Right, use_string_view) + general_trim::(args, &TrimType::Right, use_string_view) } #[derive(Debug)] diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs index 8d292315a35ac..7311ce7213bd1 100644 --- a/datafusion/functions/src/string/split_part.rs +++ b/datafusion/functions/src/string/split_part.rs @@ -107,64 +107,64 @@ impl ScalarUDFImpl for SplitPartFunc { let result = match (args[0].data_type(), args[1].data_type()) { (DataType::Utf8View, DataType::Utf8View) => { split_part_impl::<&StringViewArray, &StringViewArray, i32>( - args[0].as_string_view(), - args[1].as_string_view(), + &args[0].as_string_view(), + &args[1].as_string_view(), n_array, ) } (DataType::Utf8View, DataType::Utf8) => { split_part_impl::<&StringViewArray, &GenericStringArray, i32>( - args[0].as_string_view(), - args[1].as_string::(), + &args[0].as_string_view(), + &args[1].as_string::(), n_array, ) } (DataType::Utf8View, DataType::LargeUtf8) => { split_part_impl::<&StringViewArray, &GenericStringArray, i32>( - args[0].as_string_view(), - args[1].as_string::(), + &args[0].as_string_view(), + &args[1].as_string::(), n_array, ) } (DataType::Utf8, DataType::Utf8View) => { split_part_impl::<&GenericStringArray, &StringViewArray, i32>( - args[0].as_string::(), - args[1].as_string_view(), + &args[0].as_string::(), + &args[1].as_string_view(), n_array, ) } (DataType::LargeUtf8, DataType::Utf8View) => { split_part_impl::<&GenericStringArray, &StringViewArray, i64>( - args[0].as_string::(), - args[1].as_string_view(), + &args[0].as_string::(), + &args[1].as_string_view(), n_array, ) } (DataType::Utf8, DataType::Utf8) => { split_part_impl::<&GenericStringArray, &GenericStringArray, i32>( - args[0].as_string::(), - args[1].as_string::(), + &args[0].as_string::(), + &args[1].as_string::(), n_array, ) } (DataType::LargeUtf8, DataType::LargeUtf8) => { split_part_impl::<&GenericStringArray, &GenericStringArray, i64>( - args[0].as_string::(), - args[1].as_string::(), + &args[0].as_string::(), + &args[1].as_string::(), n_array, ) } (DataType::Utf8, DataType::LargeUtf8) => { split_part_impl::<&GenericStringArray, &GenericStringArray, i32>( - args[0].as_string::(), - args[1].as_string::(), + &args[0].as_string::(), + &args[1].as_string::(), n_array, ) } (DataType::LargeUtf8, DataType::Utf8) => { split_part_impl::<&GenericStringArray, &GenericStringArray, i64>( - args[0].as_string::(), - args[1].as_string::(), + &args[0].as_string::(), + &args[1].as_string::(), n_array, ) } @@ -182,8 +182,8 @@ impl ScalarUDFImpl for SplitPartFunc { /// impl pub fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>( - string_array: StringArrType, - delimiter_array: DelimiterArrType, + string_array: &StringArrType, + delimiter_array: &DelimiterArrType, n_array: &Int64Array, ) -> Result where diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs index 593e33ab6bb48..f8ff728ebdca4 100644 --- a/datafusion/functions/src/string/upper.rs +++ b/datafusion/functions/src/string/upper.rs @@ -71,6 +71,7 @@ impl ScalarUDFImpl for UpperFunc { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use arrow::array::{ArrayRef, StringArray}; use std::sync::Arc; diff --git a/datafusion/functions/src/unicode/lpad.rs b/datafusion/functions/src/unicode/lpad.rs index e102673c42530..6d1a63a0a75d0 100644 --- a/datafusion/functions/src/unicode/lpad.rs +++ b/datafusion/functions/src/unicode/lpad.rs @@ -112,7 +112,7 @@ pub fn lpad(args: &[ArrayRef]) -> Result { match (args.len(), args[0].data_type()) { (2, Utf8View) => lpad_impl::<&StringViewArray, &GenericStringArray, T>( - args[0].as_string_view(), + &args[0].as_string_view(), length_array, None, ), @@ -120,14 +120,14 @@ pub fn lpad(args: &[ArrayRef]) -> Result { &GenericStringArray, &GenericStringArray, T, - >(args[0].as_string::(), length_array, None), + >(&args[0].as_string::(), length_array, None), (3, Utf8View) => lpad_with_replace::<&StringViewArray, T>( - args[0].as_string_view(), + &args[0].as_string_view(), length_array, &args[2], ), (3, Utf8 | LargeUtf8) => lpad_with_replace::<&GenericStringArray, T>( - args[0].as_string::(), + &args[0].as_string::(), length_array, &args[2], ), @@ -136,7 +136,7 @@ pub fn lpad(args: &[ArrayRef]) -> Result { } fn lpad_with_replace<'a, V, T: OffsetSizeTrait>( - string_array: V, + string_array: &V, length_array: &Int64Array, fill_array: &'a ArrayRef, ) -> Result @@ -166,7 +166,7 @@ where } fn lpad_impl<'a, V, V2, T>( - string_array: V, + string_array: &V, length_array: &Int64Array, fill_array: Option, ) -> Result diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs index c1d6f327928f2..0be944e9e9fbc 100644 --- a/datafusion/functions/src/unicode/rpad.rs +++ b/datafusion/functions/src/unicode/rpad.rs @@ -133,21 +133,21 @@ pub fn rpad( ) { (2, Utf8View, _) => { rpad_impl::<&StringViewArray, &StringViewArray, StringArrayLen>( - args[0].as_string_view(), + &args[0].as_string_view(), length_array, None, ) } (3, Utf8View, Some(Utf8View)) => { rpad_impl::<&StringViewArray, &StringViewArray, StringArrayLen>( - args[0].as_string_view(), + &args[0].as_string_view(), length_array, Some(args[2].as_string_view()), ) } (3, Utf8View, Some(Utf8 | LargeUtf8)) => { rpad_impl::<&StringViewArray, &GenericStringArray, StringArrayLen>( - args[0].as_string_view(), + &args[0].as_string_view(), length_array, Some(args[2].as_string::()), ) @@ -157,7 +157,7 @@ pub fn rpad( &StringViewArray, StringArrayLen, >( - args[0].as_string::(), + &args[0].as_string::(), length_array, Some(args[2].as_string_view()), ), @@ -166,7 +166,7 @@ pub fn rpad( &GenericStringArray, StringArrayLen, >( - args[0].as_string::(), + &args[0].as_string::(), length_array, args.get(2).map(|arg| arg.as_string::()), ), @@ -176,7 +176,7 @@ pub fn rpad( /// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated. /// rpad('hi', 5, 'xy') = 'hixyx' pub fn rpad_impl<'a, StringArrType, FillArrType, StringArrayLen>( - string_array: StringArrType, + string_array: &StringArrType, length_array: &Int64Array, fill_array: Option, ) -> Result diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 583c6cf50de33..ed2121520f53c 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -626,7 +626,7 @@ impl CommonSubexprEliminate { new_input, new_group_expr, rewritten_aggr_expr, - schema, + schema.as_ref().clone(), ) .map(LogicalPlan::Aggregate) } diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs b/datafusion/optimizer/src/eliminate_cross_join.rs index 93df0dcfd5007..5e716025b5b99 100644 --- a/datafusion/optimizer/src/eliminate_cross_join.rs +++ b/datafusion/optimizer/src/eliminate_cross_join.rs @@ -435,6 +435,7 @@ fn remove_join_expressions(expr: Expr, join_keys: &JoinKeySet) -> Option { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use crate::optimizer::OptimizerContext; use crate::test::*; diff --git a/datafusion/optimizer/src/join_key_set.rs b/datafusion/optimizer/src/join_key_set.rs index c0eec78b183db..2f66628207fa0 100644 --- a/datafusion/optimizer/src/join_key_set.rs +++ b/datafusion/optimizer/src/join_key_set.rs @@ -156,6 +156,7 @@ impl<'a> Equivalent<(Expr, Expr)> for ExprPair<'a> { #[cfg(test)] mod test { + #![allow(clippy::needless_pass_by_value)] // OK in tests use crate::join_key_set::JoinKeySet; use datafusion_expr::{col, Expr}; diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index fc3921d29615a..65d6f0d0b4276 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1789,6 +1789,7 @@ fn inlist_except(mut l1: InList, l2: &InList) -> Result { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use datafusion_common::{assert_contains, DFSchemaRef, ToDFSchema}; use datafusion_expr::{ function::{ diff --git a/datafusion/optimizer/src/test/mod.rs b/datafusion/optimizer/src/test/mod.rs index 1266b548ab057..eac3b4698c7b1 100644 --- a/datafusion/optimizer/src/test/mod.rs +++ b/datafusion/optimizer/src/test/mod.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in tests + use crate::analyzer::{Analyzer, AnalyzerRule}; use crate::optimizer::Optimizer; use crate::{OptimizerContext, OptimizerRule}; diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs index 6043f0d7c8b51..29d29c25c626c 100644 --- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs +++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs @@ -532,6 +532,7 @@ fn cast_between_timestamp(from: &DataType, to: &DataType, value: i128) -> Option #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use std::collections::HashMap; use super::*; diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs index 9e602ad497159..7763de1c7bf2a 100644 --- a/datafusion/optimizer/src/utils.rs +++ b/datafusion/optimizer/src/utils.rs @@ -295,6 +295,7 @@ pub fn only_or_err(slice: &[T]) -> Result<&T> { since = "34.0.0", note = "use `datafusion_expr::utils::merge_schema` instead" )] +#[allow(clippy::needless_pass_by_value)] // this function is deprecated and will be removed pub fn merge_schema(inputs: Vec<&LogicalPlan>) -> DFSchema { expr_utils::merge_schema(&inputs) } diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs index d62dc27ece861..6772bd71f7f7d 100644 --- a/datafusion/physical-expr/src/aggregate.rs +++ b/datafusion/physical-expr/src/aggregate.rs @@ -505,8 +505,8 @@ impl AggregateFunctionExpr { /// Returns `Some(Arc)` if re-write is supported, otherwise returns `None`. pub fn with_new_expressions( &self, - _args: Vec>, - _order_by_exprs: Vec>, + _args: &[Arc], + _order_by_exprs: &[Arc], ) -> Option> { None } diff --git a/datafusion/physical-expr/src/analysis.rs b/datafusion/physical-expr/src/analysis.rs index 3eac62a4df089..001f471531a1a 100644 --- a/datafusion/physical-expr/src/analysis.rs +++ b/datafusion/physical-expr/src/analysis.rs @@ -188,7 +188,7 @@ pub fn analyze( .update_ranges(&mut target_indices_and_boundaries, Interval::CERTAINLY_TRUE)? { PropagationResult::Success => { - shrink_boundaries(graph, target_boundaries, target_expr_and_indices) + shrink_boundaries(&graph, target_boundaries, &target_expr_and_indices) } PropagationResult::Infeasible => { Ok(AnalysisContext::new(target_boundaries).with_selectivity(0.0)) @@ -204,9 +204,9 @@ pub fn analyze( /// Following this, it constructs and returns a new `AnalysisContext` with the /// updated parameters. fn shrink_boundaries( - graph: ExprIntervalGraph, + graph: &ExprIntervalGraph, mut target_boundaries: Vec, - target_expr_and_indices: Vec<(Arc, usize)>, + target_expr_and_indices: &[(Arc, usize)], ) -> Result { let initial_boundaries = target_boundaries.clone(); target_expr_and_indices.iter().for_each(|(expr, i)| { diff --git a/datafusion/physical-expr/src/equivalence/mod.rs b/datafusion/physical-expr/src/equivalence/mod.rs index d862eda5018ea..3d81567149d0d 100644 --- a/datafusion/physical-expr/src/equivalence/mod.rs +++ b/datafusion/physical-expr/src/equivalence/mod.rs @@ -72,6 +72,7 @@ pub fn add_offset_to_expr( #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use crate::expressions::col; use crate::PhysicalSortExpr; @@ -296,12 +297,12 @@ mod tests { // Apply projection to the input_data, return projected equivalence properties and record batch pub fn apply_projection( - proj_exprs: Vec<(Arc, String)>, + proj_exprs: &[(Arc, String)], input_data: &RecordBatch, input_eq_properties: &EquivalenceProperties, ) -> Result<(RecordBatch, EquivalenceProperties)> { let input_schema = input_data.schema(); - let projection_mapping = ProjectionMapping::try_new(&proj_exprs, &input_schema)?; + let projection_mapping = ProjectionMapping::try_new(proj_exprs, &input_schema)?; let output_schema = output_schema(&projection_mapping, &input_schema)?; let num_rows = input_data.num_rows(); @@ -450,7 +451,7 @@ mod tests { fn get_representative_arr( eq_group: &EquivalenceClass, existing_vec: &[Option], - schema: SchemaRef, + schema: &Schema, ) -> Option { for expr in eq_group.iter() { let col = expr.as_any().downcast_ref::().unwrap(); @@ -518,7 +519,7 @@ mod tests { // Fill columns based on equivalence groups for eq_group in eq_properties.eq_group.iter() { let representative_array = - get_representative_arr(eq_group, &schema_vec, Arc::clone(schema)) + get_representative_arr(eq_group, &schema_vec, schema.as_ref()) .unwrap_or_else(|| generate_random_array(n_elem, n_distinct)); for expr in eq_group.iter() { diff --git a/datafusion/physical-expr/src/equivalence/projection.rs b/datafusion/physical-expr/src/equivalence/projection.rs index ebf26d3262aa2..2ed190e6a8bd8 100644 --- a/datafusion/physical-expr/src/equivalence/projection.rs +++ b/datafusion/physical-expr/src/equivalence/projection.rs @@ -1033,7 +1033,7 @@ mod tests { .map(|(expr, name)| (Arc::clone(expr), name.to_string())) .collect::>(); let (projected_batch, projected_eq) = apply_projection( - proj_exprs.clone(), + &proj_exprs, &table_data_with_properties, &eq_properties, )?; @@ -1111,7 +1111,7 @@ mod tests { .map(|(expr, name)| (Arc::clone(expr), name.to_string())) .collect::>(); let (projected_batch, projected_eq) = apply_projection( - proj_exprs.clone(), + &proj_exprs, &table_data_with_properties, &eq_properties, )?; diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs index a5d54ee56cffe..eb60344960e32 100644 --- a/datafusion/physical-expr/src/equivalence/properties.rs +++ b/datafusion/physical-expr/src/equivalence/properties.rs @@ -1604,6 +1604,7 @@ pub fn calculate_union( #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use std::ops::Not; use super::*; diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 08c133d7193ae..fed6eeb4967cb 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -639,7 +639,7 @@ impl BinaryExpr { BitwiseXor => bitwise_xor_dyn(left, right), BitwiseShiftRight => bitwise_shift_right_dyn(left, right), BitwiseShiftLeft => bitwise_shift_left_dyn(left, right), - StringConcat => concat_elements(left, right), + StringConcat => concat_elements(&left, &right), AtArrow | ArrowAt => { unreachable!("ArrowAt and AtArrow should be rewritten to function") } @@ -647,7 +647,7 @@ impl BinaryExpr { } } -fn concat_elements(left: Arc, right: Arc) -> Result { +fn concat_elements(left: &Arc, right: &Arc) -> Result { Ok(match left.data_type() { DataType::Utf8 => Arc::new(concat_elements_utf8( left.as_string::(), @@ -683,6 +683,7 @@ pub fn binary( #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use crate::expressions::{col, lit, try_cast, Column, Literal}; use datafusion_common::plan_datafusion_err; diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs index dfc70551ccf6e..6c1c1fa3d8a6d 100644 --- a/datafusion/physical-expr/src/expressions/in_list.rs +++ b/datafusion/physical-expr/src/expressions/in_list.rs @@ -149,7 +149,7 @@ where /// /// Note: This is split into a separate function as higher-rank trait bounds currently /// cause type inference to misbehave -fn make_hash_set(array: T) -> ArrayHashSet +fn make_hash_set(array: &T) -> ArrayHashSet where T: ArrayAccessor, T::Item: IsEqual, @@ -183,26 +183,26 @@ where /// Creates a `Box` for the given list of `IN` expressions and `batch` fn make_set(array: &dyn Array) -> Result> { Ok(downcast_primitive_array! { - array => Arc::new(ArraySet::new(array, make_hash_set(array))), + array => Arc::new(ArraySet::new(array, make_hash_set(&array))), DataType::Boolean => { let array = as_boolean_array(array)?; - Arc::new(ArraySet::new(array, make_hash_set(array))) + Arc::new(ArraySet::new(array, make_hash_set(&array))) }, DataType::Utf8 => { let array = as_string_array(array)?; - Arc::new(ArraySet::new(array, make_hash_set(array))) + Arc::new(ArraySet::new(array, make_hash_set(&array))) } DataType::LargeUtf8 => { let array = as_largestring_array(array); - Arc::new(ArraySet::new(array, make_hash_set(array))) + Arc::new(ArraySet::new(array, make_hash_set(&array))) } DataType::Binary => { let array = as_generic_binary_array::(array)?; - Arc::new(ArraySet::new(array, make_hash_set(array))) + Arc::new(ArraySet::new(array, make_hash_set(&array))) } DataType::LargeBinary => { let array = as_generic_binary_array::(array)?; - Arc::new(ArraySet::new(array, make_hash_set(array))) + Arc::new(ArraySet::new(array, make_hash_set(&array))) } DataType::Dictionary(_, _) => unreachable!("dictionary should have been flattened"), d => return not_impl_err!("DataType::{d} not supported in InList") diff --git a/datafusion/physical-expr/src/expressions/is_not_null.rs b/datafusion/physical-expr/src/expressions/is_not_null.rs index 58559352d44c0..4a48c2185a748 100644 --- a/datafusion/physical-expr/src/expressions/is_not_null.rs +++ b/datafusion/physical-expr/src/expressions/is_not_null.rs @@ -73,7 +73,7 @@ impl PhysicalExpr for IsNotNullExpr { let arg = self.arg.evaluate(batch)?; match arg { ColumnarValue::Array(array) => { - let is_not_null = super::is_null::compute_is_not_null(array)?; + let is_not_null = super::is_null::compute_is_not_null(array.as_ref())?; Ok(ColumnarValue::Array(Arc::new(is_not_null))) } ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar( diff --git a/datafusion/physical-expr/src/expressions/is_null.rs b/datafusion/physical-expr/src/expressions/is_null.rs index 3cdb49bcab42f..8a05d5cfc8eaf 100644 --- a/datafusion/physical-expr/src/expressions/is_null.rs +++ b/datafusion/physical-expr/src/expressions/is_null.rs @@ -25,7 +25,7 @@ use arrow::{ datatypes::{DataType, Schema}, record_batch::RecordBatch, }; -use arrow_array::{Array, ArrayRef, BooleanArray, Int8Array, UnionArray}; +use arrow_array::{Array, BooleanArray, Int8Array, UnionArray}; use arrow_buffer::{BooleanBuffer, ScalarBuffer}; use arrow_ord::cmp; @@ -78,7 +78,7 @@ impl PhysicalExpr for IsNullExpr { let arg = self.arg.evaluate(batch)?; match arg { ColumnarValue::Array(array) => { - Ok(ColumnarValue::Array(Arc::new(compute_is_null(array)?))) + Ok(ColumnarValue::Array(Arc::new(compute_is_null(&array)?))) } ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar( ScalarValue::Boolean(Some(scalar.is_null())), @@ -105,7 +105,7 @@ impl PhysicalExpr for IsNullExpr { /// workaround , /// this can be replaced with a direct call to `arrow::compute::is_null` once it's fixed. -pub(crate) fn compute_is_null(array: ArrayRef) -> Result { +pub(crate) fn compute_is_null(array: &dyn Array) -> Result { if let Some(union_array) = array.as_any().downcast_ref::() { if let Some(offsets) = union_array.offsets() { dense_union_is_null(union_array, offsets) @@ -113,17 +113,17 @@ pub(crate) fn compute_is_null(array: ArrayRef) -> Result { sparse_union_is_null(union_array) } } else { - compute::is_null(array.as_ref()).map_err(Into::into) + compute::is_null(array).map_err(Into::into) } } /// workaround , /// this can be replaced with a direct call to `arrow::compute::is_not_null` once it's fixed. -pub(crate) fn compute_is_not_null(array: ArrayRef) -> Result { +pub(crate) fn compute_is_not_null(array: &dyn Array) -> Result { if array.as_any().is::() { compute::not(&compute_is_null(array)?).map_err(Into::into) } else { - compute::is_not_null(array.as_ref()).map_err(Into::into) + compute::is_not_null(array).map_err(Into::into) } } @@ -243,8 +243,7 @@ mod tests { let array = UnionArray::try_new(union_fields(), type_ids, None, children).unwrap(); - let array_ref = Arc::new(array) as ArrayRef; - let result = compute_is_null(array_ref).unwrap(); + let result = compute_is_null(&array).unwrap(); let expected = &BooleanArray::from(vec![false, true, false, false, true, true, false]); @@ -272,8 +271,7 @@ mod tests { UnionArray::try_new(union_fields(), type_ids, Some(offsets), children) .unwrap(); - let array_ref = Arc::new(array) as ArrayRef; - let result = compute_is_null(array_ref).unwrap(); + let result = compute_is_null(&array).unwrap(); let expected = &BooleanArray::from(vec![false, true, false, true, false, true]); assert_eq!(expected, &result); diff --git a/datafusion/physical-expr/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs index ed24e9028153e..4411be70bc252 100644 --- a/datafusion/physical-expr/src/expressions/literal.rs +++ b/datafusion/physical-expr/src/expressions/literal.rs @@ -109,6 +109,8 @@ impl PartialEq for Literal { } /// Create a literal expression +// TODO address needless_pass_by_value & remove the suppression. Maybe change datafusion_expr::Literal::lit to move self? +#[allow(clippy::needless_pass_by_value)] pub fn lit(value: T) -> Arc { match value.lit() { Expr::Literal(v) => Arc::new(Literal::new(v)), diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs index f05ac3624b8e2..e2169a4204501 100644 --- a/datafusion/physical-expr/src/intervals/cp_solver.rs +++ b/datafusion/physical-expr/src/intervals/cp_solver.rs @@ -718,6 +718,7 @@ fn reverse_tuple((first, second): (T, U)) -> (U, T) { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use crate::expressions::{BinaryExpr, Column}; use crate::intervals::test_utils::gen_conjunctive_numerical_expr; diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index d015f545bf9d8..5581e2f018d9e 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -121,7 +121,7 @@ pub fn create_physical_expr( Expr::Literal(value) => Ok(Arc::new(Literal::new(value.clone()))), Expr::ScalarVariable(_, variable_names) => { if is_system_variables(variable_names) { - match execution_props.get_var_provider(VarType::System) { + match execution_props.get_var_provider(&VarType::System) { Some(provider) => { let scalar_value = provider.get_value(variable_names.clone())?; Ok(Arc::new(Literal::new(scalar_value))) @@ -129,7 +129,7 @@ pub fn create_physical_expr( _ => plan_err!("No system variable provider found"), } } else { - match execution_props.get_var_provider(VarType::UserDefined) { + match execution_props.get_var_provider(&VarType::UserDefined) { Some(provider) => { let scalar_value = provider.get_value(variable_names.clone())?; Ok(Arc::new(Literal::new(scalar_value))) diff --git a/datafusion/physical-expr/src/utils/guarantee.rs b/datafusion/physical-expr/src/utils/guarantee.rs index 4385066529e74..4ce2893644d53 100644 --- a/datafusion/physical-expr/src/utils/guarantee.rs +++ b/datafusion/physical-expr/src/utils/guarantee.rs @@ -124,7 +124,7 @@ impl LiteralGuarantee { // for an `AND` conjunction to be true, all terms individually must be true .fold(GuaranteeBuilder::new(), |builder, expr| { if let Some(cel) = ColOpLit::try_new(expr) { - return builder.aggregate_conjunct(cel); + return builder.aggregate_conjunct(&cel); } else if let Some(inlist) = expr .as_any() .downcast_ref::() @@ -275,7 +275,7 @@ impl<'a> GuaranteeBuilder<'a> { /// # Examples /// * `AND (a = 1)`: `a` is guaranteed to be 1 /// * `AND (a != 1)`: a is guaranteed to not be 1 - fn aggregate_conjunct(self, col_op_lit: ColOpLit<'a>) -> Self { + fn aggregate_conjunct(self, col_op_lit: &ColOpLit<'a>) -> Self { self.aggregate_multi_conjunct( col_op_lit.col, col_op_lit.guarantee, @@ -420,6 +420,7 @@ impl<'a> ColOpLit<'a> { #[cfg(test)] mod test { + #![allow(clippy::needless_pass_by_value)] // OK in tests use std::sync::OnceLock; use super::*; diff --git a/datafusion/physical-expr/src/window/cume_dist.rs b/datafusion/physical-expr/src/window/cume_dist.rs index 9720187ea83dd..6f2fd76d5c94f 100644 --- a/datafusion/physical-expr/src/window/cume_dist.rs +++ b/datafusion/physical-expr/src/window/cume_dist.rs @@ -102,6 +102,7 @@ impl PartitionEvaluator for CumeDistEvaluator { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use datafusion_common::cast::as_float64_array; diff --git a/datafusion/physical-expr/src/window/lead_lag.rs b/datafusion/physical-expr/src/window/lead_lag.rs index 1656b7c3033a4..506c4fe2a062a 100644 --- a/datafusion/physical-expr/src/window/lead_lag.rs +++ b/datafusion/physical-expr/src/window/lead_lag.rs @@ -401,6 +401,7 @@ impl PartitionEvaluator for WindowShiftEvaluator { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use crate::expressions::Column; use arrow::{array::*, datatypes::*}; diff --git a/datafusion/physical-expr/src/window/nth_value.rs b/datafusion/physical-expr/src/window/nth_value.rs index 87c74579c6392..906ec1f47f4e0 100644 --- a/datafusion/physical-expr/src/window/nth_value.rs +++ b/datafusion/physical-expr/src/window/nth_value.rs @@ -319,6 +319,7 @@ impl PartitionEvaluator for NthValueEvaluator { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use crate::expressions::Column; use arrow::{array::*, datatypes::*}; diff --git a/datafusion/physical-expr/src/window/rank.rs b/datafusion/physical-expr/src/window/rank.rs index fa3d4e487f14f..c83c0b3f71dc9 100644 --- a/datafusion/physical-expr/src/window/rank.rs +++ b/datafusion/physical-expr/src/window/rank.rs @@ -232,6 +232,7 @@ impl PartitionEvaluator for RankEvaluator { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use datafusion_common::cast::{as_float64_array, as_uint64_array}; diff --git a/datafusion/physical-expr/src/window/sliding_aggregate.rs b/datafusion/physical-expr/src/window/sliding_aggregate.rs index b3848e15ee42f..2392ebb2ad95c 100644 --- a/datafusion/physical-expr/src/window/sliding_aggregate.rs +++ b/datafusion/physical-expr/src/window/sliding_aggregate.rs @@ -159,7 +159,7 @@ impl WindowExpr for SlidingAggregateWindowExpr { }) .collect::>(); Some(Arc::new(SlidingAggregateWindowExpr { - aggregate: self.aggregate.with_new_expressions(args, vec![])?, + aggregate: self.aggregate.with_new_expressions(&args, &[])?, partition_by: partition_bys, order_by: new_order_by, window_frame: Arc::clone(&self.window_frame), diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index bf6d71308cabf..2ddecf4d4d3ce 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -327,7 +327,7 @@ impl AggregateExec { &input.schema(), &group_by.expr, &aggr_expr, - group_by.exprs_nullable(), + &group_by.exprs_nullable(), mode, )?; @@ -495,7 +495,7 @@ impl AggregateExec { // no group by at all if self.group_by.expr.is_empty() { return Ok(StreamType::AggregateStream(AggregateStream::new( - self, context, partition, + self, &context, partition, )?)); } @@ -510,7 +510,7 @@ impl AggregateExec { // grouping by something else and we need to just materialize all results Ok(StreamType::GroupedHash(GroupedHashAggregateStream::new( - self, context, partition, + self, &context, partition, )?)) } @@ -791,7 +791,7 @@ fn create_schema( input_schema: &Schema, group_expr: &[(Arc, String)], aggr_expr: &[Arc], - group_expr_nullable: Vec, + group_expr_nullable: &[bool], mode: AggregateMode, ) -> Result { let mut fields = Vec::with_capacity(group_expr.len() + aggr_expr.len()); @@ -2519,7 +2519,7 @@ mod tests { &input_schema, &grouping_set.expr, &aggr_expr, - grouping_set.exprs_nullable(), + &grouping_set.exprs_nullable(), AggregateMode::Final, )?; let expected_schema = Schema::new(vec![ diff --git a/datafusion/physical-plan/src/aggregates/no_grouping.rs b/datafusion/physical-plan/src/aggregates/no_grouping.rs index 99417e4ee3e91..6fea29551c7f3 100644 --- a/datafusion/physical-plan/src/aggregates/no_grouping.rs +++ b/datafusion/physical-plan/src/aggregates/no_grouping.rs @@ -68,14 +68,14 @@ impl AggregateStream { /// Create a new AggregateStream pub fn new( agg: &AggregateExec, - context: Arc, + context: &Arc, partition: usize, ) -> Result { let agg_schema = Arc::clone(&agg.schema); let agg_filter_expr = agg.filter_expr.clone(); let baseline_metrics = BaselineMetrics::new(&agg.metrics, partition); - let input = agg.input.execute(partition, Arc::clone(&context))?; + let input = agg.input.execute(partition, Arc::clone(context))?; let aggregate_expressions = aggregate_expressions(&agg.aggr_expr, &agg.mode, 0)?; let filter_expressions = match agg.mode { @@ -115,7 +115,7 @@ impl AggregateStream { let timer = elapsed_compute.timer(); let result = aggregate_batch( &this.mode, - batch, + &batch, &mut this.accumulators, &this.aggregate_expressions, &this.filter_expressions, @@ -195,7 +195,7 @@ impl RecordBatchStream for AggregateStream { /// TODO: Make this a member function fn aggregate_batch( mode: &AggregateMode, - batch: RecordBatch, + batch: &RecordBatch, accumulators: &mut [AccumulatorItem], expressions: &[Vec>], filters: &[Option>], @@ -215,8 +215,8 @@ fn aggregate_batch( .try_for_each(|((accum, expr), filter)| { // 1.2 let batch = match filter { - Some(filter) => Cow::Owned(batch_filter(&batch, filter)?), - None => Cow::Borrowed(&batch), + Some(filter) => Cow::Owned(batch_filter(batch, filter)?), + None => Cow::Borrowed(batch), }; // 1.3 diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs index c38137994d44b..7afd004c9af97 100644 --- a/datafusion/physical-plan/src/aggregates/row_hash.rs +++ b/datafusion/physical-plan/src/aggregates/row_hash.rs @@ -436,7 +436,7 @@ impl GroupedHashAggregateStream { /// Create a new GroupedHashAggregateStream pub fn new( agg: &AggregateExec, - context: Arc, + context: &Arc, partition: usize, ) -> Result { debug!("Creating GroupedHashAggregateStream"); @@ -445,7 +445,7 @@ impl GroupedHashAggregateStream { let agg_filter_expr = agg.filter_expr.clone(); let batch_size = context.session_config().batch_size(); - let input = agg.input.execute(partition, Arc::clone(&context))?; + let input = agg.input.execute(partition, Arc::clone(context))?; let baseline_metrics = BaselineMetrics::new(&agg.metrics, partition); let timer = baseline_metrics.elapsed_compute().timer(); @@ -626,7 +626,7 @@ impl Stream for GroupedHashAggregateStream { extract_ok!(self.spill_previous_if_necessary(&batch)); // Do the grouping - extract_ok!(self.group_aggregate_batch(batch)); + extract_ok!(self.group_aggregate_batch(&batch)); self.update_skip_aggregation_probe(input_rows); @@ -675,7 +675,7 @@ impl Stream for GroupedHashAggregateStream { if let Some(probe) = self.skip_aggregation_probe.as_mut() { probe.record_skipped(&batch); } - let states = self.transform_to_states(batch)?; + let states = self.transform_to_states(&batch)?; return Poll::Ready(Some(Ok( states.record_output(&self.baseline_metrics) ))); @@ -739,27 +739,27 @@ impl RecordBatchStream for GroupedHashAggregateStream { impl GroupedHashAggregateStream { /// Perform group-by aggregation for the given [`RecordBatch`]. - fn group_aggregate_batch(&mut self, batch: RecordBatch) -> Result<()> { + fn group_aggregate_batch(&mut self, batch: &RecordBatch) -> Result<()> { // Evaluate the grouping expressions let group_by_values = if self.spill_state.is_stream_merging { - evaluate_group_by(&self.spill_state.merging_group_by, &batch)? + evaluate_group_by(&self.spill_state.merging_group_by, batch)? } else { - evaluate_group_by(&self.group_by, &batch)? + evaluate_group_by(&self.group_by, batch)? }; // Evaluate the aggregation expressions. let input_values = if self.spill_state.is_stream_merging { - evaluate_many(&self.spill_state.merging_aggregate_arguments, &batch)? + evaluate_many(&self.spill_state.merging_aggregate_arguments, batch)? } else { - evaluate_many(&self.aggregate_arguments, &batch)? + evaluate_many(&self.aggregate_arguments, batch)? }; // Evaluate the filter expressions, if any, against the inputs let filter_values = if self.spill_state.is_stream_merging { let filter_expressions = vec![None; self.accumulators.len()]; - evaluate_optional(&filter_expressions, &batch)? + evaluate_optional(&filter_expressions, batch)? } else { - evaluate_optional(&self.filter_expressions, &batch)? + evaluate_optional(&self.filter_expressions, batch)? }; for group_values in &group_by_values { @@ -1054,10 +1054,10 @@ impl GroupedHashAggregateStream { } /// Transforms input batch to intermediate aggregate state, without grouping it - fn transform_to_states(&self, batch: RecordBatch) -> Result { - let group_values = evaluate_group_by(&self.group_by, &batch)?; - let input_values = evaluate_many(&self.aggregate_arguments, &batch)?; - let filter_values = evaluate_optional(&self.filter_expressions, &batch)?; + fn transform_to_states(&self, batch: &RecordBatch) -> Result { + let group_values = evaluate_group_by(&self.group_by, batch)?; + let input_values = evaluate_many(&self.aggregate_arguments, batch)?; + let filter_values = evaluate_optional(&self.filter_expressions, batch)?; let mut output = group_values.first().cloned().ok_or_else(|| { internal_datafusion_err!("group_values expected to have at least one element") diff --git a/datafusion/physical-plan/src/aggregates/topk/hash_table.rs b/datafusion/physical-plan/src/aggregates/topk/hash_table.rs index 232b87de32314..6108e037a117d 100644 --- a/datafusion/physical-plan/src/aggregates/topk/hash_table.rs +++ b/datafusion/physical-plan/src/aggregates/topk/hash_table.rs @@ -369,7 +369,7 @@ hash_float!(f16, f32, f64); pub fn new_hash_table( limit: usize, - kt: DataType, + kt: &DataType, ) -> Result> { macro_rules! downcast_helper { ($kt:ty, $d:ident) => { diff --git a/datafusion/physical-plan/src/aggregates/topk/heap.rs b/datafusion/physical-plan/src/aggregates/topk/heap.rs index e694422e443da..ab3ecaedbbae7 100644 --- a/datafusion/physical-plan/src/aggregates/topk/heap.rs +++ b/datafusion/physical-plan/src/aggregates/topk/heap.rs @@ -323,13 +323,7 @@ impl TopKHeap { } } - fn _tree_print( - &self, - idx: usize, - prefix: String, - is_tail: bool, - output: &mut String, - ) { + fn _tree_print(&self, idx: usize, prefix: &str, is_tail: bool, output: &mut String) { if let Some(Some(hi)) = self.heap.get(idx) { let connector = if idx != 0 { if is_tail { @@ -354,10 +348,10 @@ impl TopKHeap { let right_exists = right_idx < self.len; if left_exists { - self._tree_print(left_idx, child_prefix.clone(), !right_exists, output); + self._tree_print(left_idx, &child_prefix, !right_exists, output); } if right_exists { - self._tree_print(right_idx, child_prefix, true, output); + self._tree_print(right_idx, &child_prefix, true, output); } } } @@ -367,7 +361,7 @@ impl Display for TopKHeap { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); if self.heap.first().is_some() { - self._tree_print(0, String::new(), true, &mut output); + self._tree_print(0, "", true, &mut output); } write!(f, "{}", output) } diff --git a/datafusion/physical-plan/src/aggregates/topk/priority_map.rs b/datafusion/physical-plan/src/aggregates/topk/priority_map.rs index ed41d22e935ba..924fb036d66ec 100644 --- a/datafusion/physical-plan/src/aggregates/topk/priority_map.rs +++ b/datafusion/physical-plan/src/aggregates/topk/priority_map.rs @@ -33,7 +33,7 @@ pub struct PriorityMap { impl PriorityMap { pub fn new( - key_type: DataType, + key_type: &DataType, val_type: DataType, capacity: usize, descending: bool, @@ -119,7 +119,7 @@ mod tests { fn should_append() -> Result<()> { let ids: ArrayRef = Arc::new(StringArray::from(vec!["1"])); let vals: ArrayRef = Arc::new(Int64Array::from(vec![1])); - let mut agg = PriorityMap::new(DataType::Utf8, DataType::Int64, 1, false)?; + let mut agg = PriorityMap::new(&DataType::Utf8, DataType::Int64, 1, false)?; agg.set_batch(ids, vals); agg.insert(0)?; @@ -143,7 +143,7 @@ mod tests { fn should_ignore_higher_group() -> Result<()> { let ids: ArrayRef = Arc::new(StringArray::from(vec!["1", "2"])); let vals: ArrayRef = Arc::new(Int64Array::from(vec![1, 2])); - let mut agg = PriorityMap::new(DataType::Utf8, DataType::Int64, 1, false)?; + let mut agg = PriorityMap::new(&DataType::Utf8, DataType::Int64, 1, false)?; agg.set_batch(ids, vals); agg.insert(0)?; agg.insert(1)?; @@ -168,7 +168,7 @@ mod tests { fn should_ignore_lower_group() -> Result<()> { let ids: ArrayRef = Arc::new(StringArray::from(vec!["2", "1"])); let vals: ArrayRef = Arc::new(Int64Array::from(vec![2, 1])); - let mut agg = PriorityMap::new(DataType::Utf8, DataType::Int64, 1, true)?; + let mut agg = PriorityMap::new(&DataType::Utf8, DataType::Int64, 1, true)?; agg.set_batch(ids, vals); agg.insert(0)?; agg.insert(1)?; @@ -193,7 +193,7 @@ mod tests { fn should_ignore_higher_same_group() -> Result<()> { let ids: ArrayRef = Arc::new(StringArray::from(vec!["1", "1"])); let vals: ArrayRef = Arc::new(Int64Array::from(vec![1, 2])); - let mut agg = PriorityMap::new(DataType::Utf8, DataType::Int64, 2, false)?; + let mut agg = PriorityMap::new(&DataType::Utf8, DataType::Int64, 2, false)?; agg.set_batch(ids, vals); agg.insert(0)?; agg.insert(1)?; @@ -218,7 +218,7 @@ mod tests { fn should_ignore_lower_same_group() -> Result<()> { let ids: ArrayRef = Arc::new(StringArray::from(vec!["1", "1"])); let vals: ArrayRef = Arc::new(Int64Array::from(vec![2, 1])); - let mut agg = PriorityMap::new(DataType::Utf8, DataType::Int64, 2, true)?; + let mut agg = PriorityMap::new(&DataType::Utf8, DataType::Int64, 2, true)?; agg.set_batch(ids, vals); agg.insert(0)?; agg.insert(1)?; @@ -243,7 +243,7 @@ mod tests { fn should_accept_lower_group() -> Result<()> { let ids: ArrayRef = Arc::new(StringArray::from(vec!["2", "1"])); let vals: ArrayRef = Arc::new(Int64Array::from(vec![2, 1])); - let mut agg = PriorityMap::new(DataType::Utf8, DataType::Int64, 1, false)?; + let mut agg = PriorityMap::new(&DataType::Utf8, DataType::Int64, 1, false)?; agg.set_batch(ids, vals); agg.insert(0)?; agg.insert(1)?; @@ -268,7 +268,7 @@ mod tests { fn should_accept_higher_group() -> Result<()> { let ids: ArrayRef = Arc::new(StringArray::from(vec!["1", "2"])); let vals: ArrayRef = Arc::new(Int64Array::from(vec![1, 2])); - let mut agg = PriorityMap::new(DataType::Utf8, DataType::Int64, 1, true)?; + let mut agg = PriorityMap::new(&DataType::Utf8, DataType::Int64, 1, true)?; agg.set_batch(ids, vals); agg.insert(0)?; agg.insert(1)?; @@ -293,7 +293,7 @@ mod tests { fn should_accept_lower_for_group() -> Result<()> { let ids: ArrayRef = Arc::new(StringArray::from(vec!["1", "1"])); let vals: ArrayRef = Arc::new(Int64Array::from(vec![2, 1])); - let mut agg = PriorityMap::new(DataType::Utf8, DataType::Int64, 2, false)?; + let mut agg = PriorityMap::new(&DataType::Utf8, DataType::Int64, 2, false)?; agg.set_batch(ids, vals); agg.insert(0)?; agg.insert(1)?; @@ -318,7 +318,7 @@ mod tests { fn should_accept_higher_for_group() -> Result<()> { let ids: ArrayRef = Arc::new(StringArray::from(vec!["1", "1"])); let vals: ArrayRef = Arc::new(Int64Array::from(vec![1, 2])); - let mut agg = PriorityMap::new(DataType::Utf8, DataType::Int64, 2, true)?; + let mut agg = PriorityMap::new(&DataType::Utf8, DataType::Int64, 2, true)?; agg.set_batch(ids, vals); agg.insert(0)?; agg.insert(1)?; @@ -343,7 +343,7 @@ mod tests { fn should_handle_null_ids() -> Result<()> { let ids: ArrayRef = Arc::new(StringArray::from(vec![Some("1"), None, None])); let vals: ArrayRef = Arc::new(Int64Array::from(vec![1, 2, 3])); - let mut agg = PriorityMap::new(DataType::Utf8, DataType::Int64, 2, true)?; + let mut agg = PriorityMap::new(&DataType::Utf8, DataType::Int64, 2, true)?; agg.set_batch(ids, vals); agg.insert(0)?; agg.insert(1)?; diff --git a/datafusion/physical-plan/src/aggregates/topk_stream.rs b/datafusion/physical-plan/src/aggregates/topk_stream.rs index 075d8c5f28833..99fed0b87db0d 100644 --- a/datafusion/physical-plan/src/aggregates/topk_stream.rs +++ b/datafusion/physical-plan/src/aggregates/topk_stream.rs @@ -56,7 +56,7 @@ impl GroupedTopKAggregateStream { ) -> Result { let agg_schema = Arc::clone(&aggr.schema); let group_by = aggr.group_by.clone(); - let input = aggr.input.execute(partition, Arc::clone(&context))?; + let input = aggr.input.execute(partition, context)?; let aggregate_arguments = aggregate_expressions(&aggr.aggr_expr, &aggr.mode, group_by.expr.len())?; let (val_field, desc) = aggr @@ -67,7 +67,7 @@ impl GroupedTopKAggregateStream { let kt = expr.data_type(&aggr.input().schema())?; let vt = val_field.data_type().clone(); - let priority_map = PriorityMap::new(kt, vt, limit, desc)?; + let priority_map = PriorityMap::new(&kt, vt, limit, desc)?; Ok(GroupedTopKAggregateStream { partition, @@ -89,9 +89,9 @@ impl RecordBatchStream for GroupedTopKAggregateStream { } impl GroupedTopKAggregateStream { - fn intern(&mut self, ids: ArrayRef, vals: ArrayRef) -> Result<()> { + fn intern(&mut self, ids: ArrayRef, vals: &ArrayRef) -> Result<()> { let len = ids.len(); - self.priority_map.set_batch(ids, Arc::clone(&vals)); + self.priority_map.set_batch(ids, Arc::clone(vals)); let has_nulls = vals.null_count() > 0; for row_idx in 0..len { @@ -146,7 +146,7 @@ impl Stream for GroupedTopKAggregateStream { )?; assert_eq!(input_values.len(), 1, "Exactly 1 input required"); assert_eq!(input_values[0].len(), 1, "Exactly 1 input required"); - let input_values = Arc::clone(&input_values[0][0]); + let input_values = &input_values[0][0]; // iterate over each column of group_by values (*self).intern(group_by_values, input_values)?; diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs index 287446328f8de..031b34a6d4429 100644 --- a/datafusion/physical-plan/src/analyze.rs +++ b/datafusion/physical-plan/src/analyze.rs @@ -194,7 +194,7 @@ impl ExecutionPlan for AnalyzeExec { show_statistics, total_rows, duration, - captured_input, + &captured_input, captured_schema, ) }; @@ -212,7 +212,7 @@ fn create_output_batch( show_statistics: bool, total_rows: usize, duration: std::time::Duration, - input: Arc, + input: &Arc, schema: SchemaRef, ) -> Result { let mut type_builder = StringBuilder::with_capacity(1, 1024); diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs index 46875fae94fc3..3a71e325b68cc 100644 --- a/datafusion/physical-plan/src/coalesce/mod.rs +++ b/datafusion/physical-plan/src/coalesce/mod.rs @@ -113,8 +113,8 @@ impl BatchCoalescer { /// Push next batch, and returns [`CoalescerState`] indicating the current /// state of the buffer. - pub fn push_batch(&mut self, batch: RecordBatch) -> CoalescerState { - let batch = gc_string_view_batch(&batch); + pub fn push_batch(&mut self, batch: &RecordBatch) -> CoalescerState { + let batch = gc_string_view_batch(batch); if self.limit_reached(&batch) { CoalescerState::LimitReached } else if self.target_reached(batch) { @@ -429,7 +429,7 @@ mod tests { let mut output_batches = vec![]; for batch in input_batches { - match coalescer.push_batch(batch) { + match coalescer.push_batch(&batch) { CoalescerState::Continue => {} CoalescerState::LimitReached => { output_batches.push(coalescer.finish_batch().unwrap()); diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs index 7caf5b8ab65a3..99bc1783dc174 100644 --- a/datafusion/physical-plan/src/coalesce_batches.rs +++ b/datafusion/physical-plan/src/coalesce_batches.rs @@ -183,7 +183,7 @@ impl ExecutionPlan for CoalesceBatchesExec { } fn statistics(&self) -> Result { - Statistics::with_fetch(self.input.statistics()?, self.schema(), self.fetch, 0, 1) + Statistics::with_fetch(self.input.statistics()?, &self.schema(), self.fetch, 0, 1) } fn with_fetch(&self, limit: Option) -> Option> { @@ -290,7 +290,7 @@ impl CoalesceBatchesStream { let _timer = cloned_time.timer(); match input_batch { - Some(Ok(batch)) => match self.coalescer.push_batch(batch) { + Some(Ok(batch)) => match self.coalescer.push_batch(&batch) { CoalescerState::Continue => {} CoalescerState::LimitReached => { self.inner_state = CoalesceBatchesStreamState::Exhausted; diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs index f584542fafcfb..ea165a18554a9 100644 --- a/datafusion/physical-plan/src/execution_plan.rs +++ b/datafusion/physical-plan/src/execution_plan.rs @@ -644,7 +644,7 @@ impl PlanProperties { /// 1. RepartitionExec for changing the partition number between two `ExecutionPlan`s /// 2. CoalescePartitionsExec for collapsing all of the partitions into one without ordering guarantee /// 3. SortPreservingMergeExec for collapsing all of the sorted partitions into one with ordering guarantee -pub fn need_data_exchange(plan: Arc) -> bool { +pub fn need_data_exchange(plan: &Arc) -> bool { if let Some(repartition) = plan.as_any().downcast_ref::() { !matches!( repartition.properties().output_partitioning(), @@ -720,7 +720,7 @@ pub fn execute_stream( 1 => plan.execute(0, context), 2.. => { // merge into a single partition - let plan = CoalescePartitionsExec::new(Arc::clone(&plan)); + let plan = CoalescePartitionsExec::new(plan); // CoalescePartitionsExec must produce a single partition assert_eq!(1, plan.properties().output_partitioning().partition_count()); plan.execute(0, context) @@ -733,7 +733,7 @@ pub async fn collect_partitioned( plan: Arc, context: Arc, ) -> Result>> { - let streams = execute_stream_partitioned(plan, context)?; + let streams = execute_stream_partitioned(&plan, &context)?; let mut join_set = JoinSet::new(); // Execute the plan and collect the results into batches. @@ -776,13 +776,13 @@ pub async fn collect_partitioned( /// Dropping the stream will abort the execution of the query, and free up /// any allocated resources pub fn execute_stream_partitioned( - plan: Arc, - context: Arc, + plan: &Arc, + context: &Arc, ) -> Result> { let num_partitions = plan.output_partitioning().partition_count(); let mut streams = Vec::with_capacity(num_partitions); for i in 0..num_partitions { - streams.push(plan.execute(i, Arc::clone(&context))?); + streams.push(plan.execute(i, Arc::clone(context))?); } Ok(streams) } @@ -807,7 +807,7 @@ pub fn execute_stream_partitioned( /// such columns, it wraps the resulting stream to enforce the `not null` constraints /// by invoking the `check_not_null_contraits` function on each batch of the stream. pub fn execute_input_stream( - input: Arc, + input: &Arc, sink_schema: SchemaRef, partition: usize, context: Arc, @@ -887,6 +887,7 @@ pub fn get_plan_string(plan: &Arc) -> Vec { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use std::any::Any; use std::sync::Arc; diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 3da0f21156d94..f3850032b0feb 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -86,7 +86,7 @@ impl FilterExec { )?; Ok(Self { predicate, - input: Arc::clone(&input), + input, metrics: ExecutionPlanMetricsSet::new(), default_selectivity, cache, diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs index 5dc27bc239d26..34691ec33ecf6 100644 --- a/datafusion/physical-plan/src/insert.rs +++ b/datafusion/physical-plan/src/insert.rs @@ -229,7 +229,7 @@ impl ExecutionPlan for DataSinkExec { return internal_err!("DataSinkExec can only be called on partition 0!"); } let data = execute_input_stream( - Arc::clone(&self.input), + &self.input, Arc::clone(&self.sink_schema), 0, Arc::clone(&context), diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs index 48d648c89a354..0ac853c3f5e0e 100644 --- a/datafusion/physical-plan/src/joins/hash_join.rs +++ b/datafusion/physical-plan/src/joins/hash_join.rs @@ -365,7 +365,7 @@ impl HashJoinExec { let cache = Self::compute_properties( &left, &right, - Arc::clone(&join_schema), + &join_schema, *join_type, &on, partition_mode, @@ -477,7 +477,7 @@ impl HashJoinExec { fn compute_properties( left: &Arc, right: &Arc, - schema: SchemaRef, + schema: &SchemaRef, join_type: JoinType, on: JoinOnRef, mode: PartitionMode, @@ -488,7 +488,7 @@ impl HashJoinExec { left.equivalence_properties().clone(), right.equivalence_properties().clone(), &join_type, - Arc::clone(&schema), + Arc::clone(schema), &Self::maintains_input_order(join_type), Some(Self::probe_side()), on, @@ -528,9 +528,8 @@ impl HashJoinExec { // If contains projection, update the PlanProperties. if let Some(projection) = projection { // construct a map from the input expressions to the output expression of the Projection - let projection_mapping = - ProjectionMapping::from_indices(projection, &schema)?; - let out_schema = project_schema(&schema, Some(projection))?; + let projection_mapping = ProjectionMapping::from_indices(projection, schema)?; + let out_schema = project_schema(schema, Some(projection))?; output_partitioning = output_partitioning.project(&projection_mapping, &eq_properties); eq_properties = eq_properties.project(&projection_mapping, out_schema); @@ -773,9 +772,9 @@ impl ExecutionPlan for HashJoinExec { // There are some special cases though, for example: // - `A LEFT JOIN B ON A.col=B.col` with `COUNT_DISTINCT(B.col)=COUNT(B.col)` let mut stats = estimate_join_statistics( - Arc::clone(&self.left), - Arc::clone(&self.right), - self.on.clone(), + &self.left, + &self.right, + &self.on, &self.join_type, &self.join_schema, )?; diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index dadd20714eade..f6da51230096e 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -336,9 +336,9 @@ impl ExecutionPlan for NestedLoopJoinExec { fn statistics(&self) -> Result { estimate_join_statistics( - Arc::clone(&self.left), - Arc::clone(&self.right), - vec![], + &self.left, + &self.right, + &vec![], &self.join_type, &self.schema, ) diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index 2118c1a5266fb..3cb4ecfaeccfb 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -395,9 +395,9 @@ impl ExecutionPlan for SortMergeJoinExec { // There are some special cases though, for example: // - `A LEFT JOIN B ON A.col=B.col` with `COUNT_DISTINCT(B.col)=COUNT(B.col)` estimate_join_statistics( - Arc::clone(&self.left), - Arc::clone(&self.right), - self.on.clone(), + &self.left, + &self.right, + &self.on, &self.join_type, &self.schema, ) @@ -896,7 +896,7 @@ impl SMJStream { } } - fn free_reservation(&mut self, buffered_batch: BufferedBatch) -> Result<()> { + fn free_reservation(&mut self, buffered_batch: &BufferedBatch) -> Result<()> { // Shrink memory usage for in-memory batches only if buffered_batch.spill_file.is_none() && buffered_batch.batch.is_some() { self.reservation @@ -924,8 +924,8 @@ impl SMJStream { if let Some(batch) = buffered_batch.batch { spill_record_batches( vec![batch], - spill_file.path().into(), - Arc::clone(&self.buffered_schema), + spill_file.path(), + &self.buffered_schema, )?; buffered_batch.spill_file = Some(spill_file); buffered_batch.batch = None; @@ -962,7 +962,7 @@ impl SMJStream { if let Some(buffered_batch) = self.buffered_data.batches.pop_front() { - self.free_reservation(buffered_batch)?; + self.free_reservation(&buffered_batch)?; } } else { // If the head batch is not fully processed, break the loop. diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index ac718a95e9f4c..b79247de01c98 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -1200,7 +1200,7 @@ impl SymmetricHashJoinStream { return Poll::Ready(Ok(StatefulStreamResult::Continue)); } self.set_state(SHJStreamState::PullLeft); - Poll::Ready(self.process_batch_from_right(batch)) + Poll::Ready(self.process_batch_from_right(&batch)) } Some(Err(e)) => Poll::Ready(Err(e)), None => { @@ -1229,7 +1229,7 @@ impl SymmetricHashJoinStream { return Poll::Ready(Ok(StatefulStreamResult::Continue)); } self.set_state(SHJStreamState::PullRight); - Poll::Ready(self.process_batch_from_left(batch)) + Poll::Ready(self.process_batch_from_left(&batch)) } Some(Err(e)) => Poll::Ready(Err(e)), None => { @@ -1258,7 +1258,7 @@ impl SymmetricHashJoinStream { if batch.num_rows() == 0 { return Poll::Ready(Ok(StatefulStreamResult::Continue)); } - Poll::Ready(self.process_batch_after_right_end(batch)) + Poll::Ready(self.process_batch_after_right_end(&batch)) } Some(Err(e)) => Poll::Ready(Err(e)), None => { @@ -1289,7 +1289,7 @@ impl SymmetricHashJoinStream { if batch.num_rows() == 0 { return Poll::Ready(Ok(StatefulStreamResult::Continue)); } - Poll::Ready(self.process_batch_after_left_end(batch)) + Poll::Ready(self.process_batch_after_left_end(&batch)) } Some(Err(e)) => Poll::Ready(Err(e)), None => { @@ -1319,7 +1319,7 @@ impl SymmetricHashJoinStream { fn process_batch_from_right( &mut self, - batch: RecordBatch, + batch: &RecordBatch, ) -> Result>> { self.perform_join_for_given_side(batch, JoinSide::Right) .map(|maybe_batch| { @@ -1333,7 +1333,7 @@ impl SymmetricHashJoinStream { fn process_batch_from_left( &mut self, - batch: RecordBatch, + batch: &RecordBatch, ) -> Result>> { self.perform_join_for_given_side(batch, JoinSide::Left) .map(|maybe_batch| { @@ -1347,14 +1347,14 @@ impl SymmetricHashJoinStream { fn process_batch_after_left_end( &mut self, - right_batch: RecordBatch, + right_batch: &RecordBatch, ) -> Result>> { self.process_batch_from_right(right_batch) } fn process_batch_after_right_end( &mut self, - left_batch: RecordBatch, + left_batch: &RecordBatch, ) -> Result>> { self.process_batch_from_left(left_batch) } @@ -1436,7 +1436,7 @@ impl SymmetricHashJoinStream { /// 5. Combines the results and returns a combined batch or `None` if no batch was produced. fn perform_join_for_given_side( &mut self, - probe_batch: RecordBatch, + probe_batch: &RecordBatch, probe_side: JoinSide, ) -> Result> { let ( @@ -1466,7 +1466,7 @@ impl SymmetricHashJoinStream { probe_side_metrics.input_batches.add(1); probe_side_metrics.input_rows.add(probe_batch.num_rows()); // Update the internal state of the hash joiner for the build side: - probe_hash_joiner.update_internal_state(&probe_batch, &self.random_state)?; + probe_hash_joiner.update_internal_state(probe_batch, &self.random_state)?; // Join the two sides: let equal_result = join_with_probe_batch( build_hash_joiner, @@ -1474,7 +1474,7 @@ impl SymmetricHashJoinStream { &self.schema, self.join_type, self.filter.as_ref(), - &probe_batch, + probe_batch, &self.column_indices, &self.random_state, self.null_equals_null, @@ -1495,7 +1495,7 @@ impl SymmetricHashJoinStream { calculate_filter_expr_intervals( &build_hash_joiner.input_buffer, build_side_sorted_filter_expr, - &probe_batch, + probe_batch, probe_side_sorted_filter_expr, )?; let prune_length = build_hash_joiner diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs index 89f3feaf07be6..19e10e07843ae 100644 --- a/datafusion/physical-plan/src/joins/utils.rs +++ b/datafusion/physical-plan/src/joins/utils.rs @@ -772,16 +772,16 @@ struct PartialJoinStatistics { /// Estimate the statistics for the given join's output. pub(crate) fn estimate_join_statistics( - left: Arc, - right: Arc, - on: JoinOn, + left: &Arc, + right: &Arc, + on: &JoinOn, join_type: &JoinType, schema: &Schema, ) -> Result { let left_stats = left.statistics()?; let right_stats = right.statistics()?; - let join_stats = estimate_join_cardinality(join_type, left_stats, right_stats, &on); + let join_stats = estimate_join_cardinality(join_type, left_stats, right_stats, on); let (num_rows, column_statistics) = match join_stats { Some(stats) => (Precision::Inexact(stats.num_rows), stats.column_statistics), None => (Precision::Absent, Statistics::unknown_column(schema)), @@ -822,12 +822,12 @@ fn estimate_join_cardinality( match join_type { JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => { let ij_cardinality = estimate_inner_join_cardinality( - Statistics { + &Statistics { num_rows: left_stats.num_rows, total_byte_size: Precision::Absent, column_statistics: left_col_stats, }, - Statistics { + &Statistics { num_rows: right_stats.num_rows, total_byte_size: Precision::Absent, column_statistics: right_col_stats, @@ -903,11 +903,11 @@ fn estimate_join_cardinality( /// a very conservative implementation that can quickly give up if there is not /// enough input statistics. fn estimate_inner_join_cardinality( - left_stats: Statistics, - right_stats: Statistics, + left_stats: &Statistics, + right_stats: &Statistics, ) -> Option> { // Immediately return if inputs considered as non-overlapping - if let Some(estimation) = estimate_disjoint_inputs(&left_stats, &right_stats) { + if let Some(estimation) = estimate_disjoint_inputs(left_stats, right_stats) { return Some(estimation); }; @@ -1349,7 +1349,7 @@ pub(crate) fn append_right_indices( preserve_order_for_right: bool, ) -> (UInt64Array, UInt32Array) { if preserve_order_for_right { - append_probe_indices_in_order(left_indices, right_indices, adjust_range) + append_probe_indices_in_order(&left_indices, &right_indices, adjust_range) } else { let right_unmatched_indices = get_anti_indices(adjust_range, &right_indices); @@ -1448,8 +1448,8 @@ where /// - A `PrimitiveArray` of `UInt64Type` with the newly constructed build indices. /// - A `PrimitiveArray` of `UInt32Type` with the newly constructed probe indices. fn append_probe_indices_in_order( - build_indices: PrimitiveArray, - probe_indices: PrimitiveArray, + build_indices: &PrimitiveArray, + probe_indices: &PrimitiveArray, range: Range, ) -> (PrimitiveArray, PrimitiveArray) { // Builders for new indices: @@ -2017,12 +2017,12 @@ mod tests { assert_eq!( estimate_inner_join_cardinality( - Statistics { + &Statistics { num_rows: Inexact(left_num_rows), total_byte_size: Absent, column_statistics: left_col_stats.clone(), }, - Statistics { + &Statistics { num_rows: Inexact(right_num_rows), total_byte_size: Absent, column_statistics: right_col_stats.clone(), @@ -2072,12 +2072,12 @@ mod tests { // count is 200, so we are going to pick it. assert_eq!( estimate_inner_join_cardinality( - Statistics { + &Statistics { num_rows: Precision::Inexact(400), total_byte_size: Precision::Absent, column_statistics: left_col_stats, }, - Statistics { + &Statistics { num_rows: Precision::Inexact(400), total_byte_size: Precision::Absent, column_statistics: right_col_stats, @@ -2106,12 +2106,12 @@ mod tests { assert_eq!( estimate_inner_join_cardinality( - Statistics { + &Statistics { num_rows: Precision::Inexact(100), total_byte_size: Precision::Absent, column_statistics: left_col_stats, }, - Statistics { + &Statistics { num_rows: Precision::Inexact(100), total_byte_size: Precision::Absent, column_statistics: right_col_stats, diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs index 360e942226d24..aaee751ed8412 100644 --- a/datafusion/physical-plan/src/limit.rs +++ b/datafusion/physical-plan/src/limit.rs @@ -186,7 +186,7 @@ impl ExecutionPlan for GlobalLimitExec { fn statistics(&self) -> Result { Statistics::with_fetch( self.input.statistics()?, - self.schema(), + &self.schema(), self.fetch, self.skip, 1, @@ -322,7 +322,7 @@ impl ExecutionPlan for LocalLimitExec { fn statistics(&self) -> Result { Statistics::with_fetch( self.input.statistics()?, - self.schema(), + &self.schema(), Some(self.fetch), 0, 1, @@ -465,6 +465,7 @@ impl RecordBatchStream for LimitStream { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use crate::coalesce_partitions::CoalescePartitionsExec; use crate::common::collect; diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index f1b9cdaf728ff..5dd3472b23524 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -226,7 +226,7 @@ impl ExecutionPlan for ProjectionExec { Ok(stats_projection( self.input.statistics()?, self.expr.iter().map(|(e, _)| Arc::clone(e)), - Arc::clone(&self.schema), + self.schema.as_ref(), )) } @@ -252,7 +252,7 @@ fn get_field_metadata( fn stats_projection( mut stats: Statistics, exprs: impl Iterator>, - schema: SchemaRef, + schema: &Schema, ) -> Statistics { let mut primitive_row_size = 0; let mut primitive_row_size_possible = true; @@ -266,7 +266,7 @@ fn stats_projection( ColumnStatistics::new_unknown() }; column_statistics.push(col_stats); - if let Ok(data_type) = expr.data_type(&schema) { + if let Ok(data_type) = expr.data_type(schema) { if let Some(value) = data_type.primitive_width() { primitive_row_size += value; continue; @@ -413,7 +413,7 @@ mod tests { Arc::new(expressions::Column::new("col0", 0)), ]; - let result = stats_projection(source, exprs.into_iter(), Arc::new(schema)); + let result = stats_projection(source, exprs.into_iter(), &schema); let expected = Statistics { num_rows: Precision::Exact(5), @@ -447,7 +447,7 @@ mod tests { Arc::new(expressions::Column::new("col0", 0)), ]; - let result = stats_projection(source, exprs.into_iter(), Arc::new(schema)); + let result = stats_projection(source, exprs.into_iter(), &schema); let expected = Statistics { num_rows: Precision::Exact(5), diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs index e9ea9d4f50321..ca0cdb07106ee 100644 --- a/datafusion/physical-plan/src/recursive_query.rs +++ b/datafusion/physical-plan/src/recursive_query.rs @@ -82,7 +82,7 @@ impl RecursiveQueryExec { // Each recursive query needs its own work table let work_table = Arc::new(WorkTable::new()); // Use the same work table for both the WorkTableExec and the recursive term - let recursive_term = assign_work_table(recursive_term, Arc::clone(&work_table))?; + let recursive_term = assign_work_table(recursive_term, &work_table)?; let cache = Self::compute_properties(static_term.schema()); Ok(RecursiveQueryExec { name, @@ -322,7 +322,7 @@ impl RecursiveQueryStream { fn assign_work_table( plan: Arc, - work_table: Arc, + work_table: &Arc, ) -> Result> { let mut work_table_refs = 0; plan.transform_down(|plan| { @@ -334,7 +334,7 @@ fn assign_work_table( } else { work_table_refs += 1; Ok(Transformed::yes(Arc::new( - exec.with_work_table(Arc::clone(&work_table)), + exec.with_work_table(Arc::clone(work_table)), ))) } } else if plan.as_any().is::() { diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs index 093803e3c8b30..c3e202c3c53ec 100644 --- a/datafusion/physical-plan/src/repartition/mod.rs +++ b/datafusion/physical-plan/src/repartition/mod.rs @@ -81,12 +81,12 @@ struct RepartitionExecState { impl RepartitionExecState { fn new( - input: Arc, - partitioning: Partitioning, - metrics: ExecutionPlanMetricsSet, + input: &Arc, + partitioning: &Partitioning, + metrics: &ExecutionPlanMetricsSet, preserve_order: bool, - name: String, - context: Arc, + name: &str, + context: &Arc, ) -> Self { let num_input_partitions = input.output_partitioning().partition_count(); let num_output_partitions = partitioning.partition_count(); @@ -131,15 +131,15 @@ impl RepartitionExecState { }) .collect(); - let r_metrics = RepartitionMetrics::new(i, num_output_partitions, &metrics); + let r_metrics = RepartitionMetrics::new(i, num_output_partitions, metrics); let input_task = SpawnedTask::spawn(RepartitionExec::pull_from_input( - Arc::clone(&input), + Arc::clone(input), i, txs.clone(), partitioning.clone(), r_metrics, - Arc::clone(&context), + Arc::clone(context), )); // In a separate task, wait for each input to be done @@ -581,15 +581,15 @@ impl ExecutionPlan for RepartitionExec { let stream = futures::stream::once(async move { let num_input_partitions = input.output_partitioning().partition_count(); - let input_captured = Arc::clone(&input); - let metrics_captured = metrics.clone(); - let name_captured = name.clone(); - let context_captured = Arc::clone(&context); + let input_captured = &input; + let metrics_captured = &metrics; + let name_captured = &name; + let context_captured = &context; let state = lazy_state .get_or_init(|| async move { Mutex::new(RepartitionExecState::new( input_captured, - partitioning, + &partitioning, metrics_captured, preserve_order, name_captured, diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index fa9628abdfbb6..120340f7adeb6 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -395,11 +395,8 @@ impl ExternalSorter { let spill_file = self.runtime.disk_manager.create_tmp_file("Sorting")?; let batches = std::mem::take(&mut self.in_mem_batches); - let spilled_rows = spill_record_batches( - batches, - spill_file.path().into(), - Arc::clone(&self.schema), - )?; + let spilled_rows = + spill_record_batches(batches, spill_file.path(), &self.schema)?; let used = self.reservation.free(); self.metrics.spill_count.add(1); self.metrics.spilled_bytes.add(used); @@ -913,7 +910,7 @@ impl ExecutionPlan for SortExec { self.expr.clone(), *fetch, context.session_config().batch_size(), - context.runtime_env(), + &context.runtime_env(), &self.metrics_set, partition, )?; @@ -961,7 +958,7 @@ impl ExecutionPlan for SortExec { } fn statistics(&self) -> Result { - Statistics::with_fetch(self.input.statistics()?, self.schema(), self.fetch, 0, 1) + Statistics::with_fetch(self.input.statistics()?, &self.schema(), self.fetch, 0, 1) } fn with_fetch(&self, limit: Option) -> Option> { diff --git a/datafusion/physical-plan/src/spill.rs b/datafusion/physical-plan/src/spill.rs index 21ca58fa0a9fa..5e01f08512b3e 100644 --- a/datafusion/physical-plan/src/spill.rs +++ b/datafusion/physical-plan/src/spill.rs @@ -19,11 +19,12 @@ use std::fs::File; use std::io::BufReader; -use std::path::{Path, PathBuf}; +use std::path::Path; use arrow::datatypes::SchemaRef; use arrow::ipc::reader::FileReader; use arrow::record_batch::RecordBatch; +use arrow_schema::Schema; use log::debug; use tokio::sync::mpsc::Sender; @@ -48,7 +49,7 @@ pub(crate) fn read_spill_as_stream( let mut builder = RecordBatchReceiverStream::builder(schema, buffer); let sender = builder.tx(); - builder.spawn_blocking(move || read_spill(sender, path.path())); + builder.spawn_blocking(move || read_spill(&sender, path.path())); Ok(builder.build()) } @@ -58,10 +59,10 @@ pub(crate) fn read_spill_as_stream( /// Returns total number of the rows spilled to disk. pub(crate) fn spill_record_batches( batches: Vec, - path: PathBuf, - schema: SchemaRef, + path: &Path, + schema: &Schema, ) -> Result { - let mut writer = IPCWriter::new(path.as_ref(), schema.as_ref())?; + let mut writer = IPCWriter::new(path, schema)?; for batch in batches { writer.write(&batch)?; } @@ -75,7 +76,7 @@ pub(crate) fn spill_record_batches( Ok(writer.num_rows) } -fn read_spill(sender: Sender>, path: &Path) -> Result<()> { +fn read_spill(sender: &Sender>, path: &Path) -> Result<()> { let file = BufReader::new(File::open(path)?); let reader = FileReader::try_new(file, None)?; for batch in reader { @@ -91,13 +92,13 @@ fn read_spill(sender: Sender>, path: &Path) -> Result<()> { /// Return `total_rows` what is spilled pub fn spill_record_batch_by_size( batch: &RecordBatch, - path: PathBuf, - schema: SchemaRef, + path: &Path, + schema: &Schema, batch_size_rows: usize, ) -> Result<()> { let mut offset = 0; let total_rows = batch.num_rows(); - let mut writer = IPCWriter::new(&path, schema.as_ref())?; + let mut writer = IPCWriter::new(path, schema)?; while offset < total_rows { let length = std::cmp::min(total_rows - offset, batch_size_rows); @@ -119,7 +120,6 @@ mod tests { use datafusion_execution::DiskManager; use std::fs::File; use std::io::BufReader; - use std::sync::Arc; #[test] fn test_batch_spill_and_read() -> Result<()> { @@ -140,11 +140,7 @@ mod tests { let spill_file = disk_manager.create_tmp_file("Test Spill")?; let schema = batch1.schema(); let num_rows = batch1.num_rows() + batch2.num_rows(); - let cnt = spill_record_batches( - vec![batch1, batch2], - spill_file.path().into(), - Arc::clone(&schema), - ); + let cnt = spill_record_batches(vec![batch1, batch2], spill_file.path(), &schema); assert_eq!(cnt.unwrap(), num_rows); let file = BufReader::new(File::open(spill_file.path())?); @@ -168,12 +164,7 @@ mod tests { let spill_file = disk_manager.create_tmp_file("Test Spill")?; let schema = batch1.schema(); - spill_record_batch_by_size( - &batch1, - spill_file.path().into(), - Arc::clone(&schema), - 1, - )?; + spill_record_batch_by_size(&batch1, spill_file.path(), &schema, 1)?; let file = BufReader::new(File::open(spill_file.path())?); let reader = arrow::ipc::reader::FileReader::try_new(file, None)?; diff --git a/datafusion/physical-plan/src/topk/mod.rs b/datafusion/physical-plan/src/topk/mod.rs index d3f1a4fd96caf..2001a8cf4e486 100644 --- a/datafusion/physical-plan/src/topk/mod.rs +++ b/datafusion/physical-plan/src/topk/mod.rs @@ -103,7 +103,7 @@ impl TopK { expr: Vec, k: usize, batch_size: usize, - runtime: Arc, + runtime: &Arc, metrics: &ExecutionPlanMetricsSet, partition: usize, ) -> Result { diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index 78b25686054d8..5393671dd9cba 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -253,7 +253,7 @@ impl ExecutionPlan for UnionExec { Ok(stats .into_iter() - .reduce(stats_union) + .reduce(|a, b| stats_union(a, &b)) .unwrap_or_else(|| Statistics::new_unknown(&self.schema()))) } @@ -438,7 +438,7 @@ impl ExecutionPlan for InterleaveExec { Ok(stats .into_iter() - .reduce(stats_union) + .reduce(|a, b| stats_union(a, &b)) .unwrap_or_else(|| Statistics::new_unknown(&self.schema()))) } @@ -559,7 +559,7 @@ impl Stream for CombinedRecordBatchStream { fn col_stats_union( mut left: ColumnStatistics, - right: ColumnStatistics, + right: &ColumnStatistics, ) -> ColumnStatistics { left.distinct_count = Precision::Absent; left.min_value = left.min_value.min(&right.min_value); @@ -569,13 +569,13 @@ fn col_stats_union( left } -fn stats_union(mut left: Statistics, right: Statistics) -> Statistics { +fn stats_union(mut left: Statistics, right: &Statistics) -> Statistics { left.num_rows = left.num_rows.add(&right.num_rows); left.total_byte_size = left.total_byte_size.add(&right.total_byte_size); left.column_statistics = left .column_statistics .into_iter() - .zip(right.column_statistics) + .zip(right.column_statistics.iter()) .map(|(a, b)| col_stats_union(a, b)) .collect::>(); left @@ -583,6 +583,7 @@ fn stats_union(mut left: Statistics, right: Statistics) -> Statistics { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use crate::collect; use crate::memory::MemoryExec; @@ -697,7 +698,7 @@ mod tests { ], }; - let result = stats_union(left, right); + let result = stats_union(left, &right); let expected = Statistics { num_rows: Precision::Exact(12), total_byte_size: Precision::Exact(52), diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs index b99d0d8388709..e473ec143a944 100644 --- a/datafusion/physical-plan/src/unnest.rs +++ b/datafusion/physical-plan/src/unnest.rs @@ -678,6 +678,7 @@ fn flatten_list_cols_from_indices( #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use arrow::datatypes::Field; use arrow_array::{GenericListArray, OffsetSizeTrait, StringArray}; diff --git a/datafusion/physical-plan/src/values.rs b/datafusion/physical-plan/src/values.rs index 3ea27d62d80b8..13d1ab54c71c4 100644 --- a/datafusion/physical-plan/src/values.rs +++ b/datafusion/physical-plan/src/values.rs @@ -50,7 +50,7 @@ impl ValuesExec { /// create a new values exec from data as expr pub fn try_new( schema: SchemaRef, - data: Vec>>, + data: &[Vec>], ) -> Result { if data.is_empty() { return plan_err!("Values list cannot be empty"); @@ -219,7 +219,7 @@ mod tests { #[tokio::test] async fn values_empty_case() -> Result<()> { let schema = test::aggr_test_schema(); - let empty = ValuesExec::try_new(schema, vec![]); + let empty = ValuesExec::try_new(schema, &[]); assert!(empty.is_err()); Ok(()) } @@ -260,9 +260,9 @@ mod tests { DataType::UInt32, false, )])); - let _ = ValuesExec::try_new(Arc::clone(&schema), vec![vec![lit(1u32)]]).unwrap(); + let _ = ValuesExec::try_new(Arc::clone(&schema), &[vec![lit(1u32)]]).unwrap(); // Test that a null value is rejected - let _ = ValuesExec::try_new(schema, vec![vec![lit(ScalarValue::UInt32(None))]]) + let _ = ValuesExec::try_new(schema, &[vec![lit(ScalarValue::UInt32(None))]]) .unwrap_err(); } } diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 56823e6dec2d2..720abf9d47c77 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -182,7 +182,7 @@ fn get_scalar_value_from_args( }) } -fn get_signed_integer(value: ScalarValue) -> Result { +fn get_signed_integer(value: &ScalarValue) -> Result { if !value.data_type().is_integer() { return Err(DataFusionError::Execution( "Expected an integer value".to_string(), @@ -191,7 +191,7 @@ fn get_signed_integer(value: ScalarValue) -> Result { value.cast_to(&DataType::Int64)?.try_into() } -fn get_unsigned_integer(value: ScalarValue) -> Result { +fn get_unsigned_integer(value: &ScalarValue) -> Result { if !value.data_type().is_integer() { return Err(DataFusionError::Execution( "Expected an integer value".to_string(), @@ -238,10 +238,10 @@ fn create_built_in_window_expr( } if n.is_unsigned() { - let n = get_unsigned_integer(n)?; + let n = get_unsigned_integer(&n)?; Arc::new(Ntile::new(name, n, out_data_type)) } else { - let n: i64 = get_signed_integer(n)?; + let n: i64 = get_signed_integer(&n)?; if n <= 0 { return exec_err!("NTILE requires a positive integer"); } @@ -251,7 +251,7 @@ fn create_built_in_window_expr( BuiltInWindowFunction::Lag => { let arg = Arc::clone(&args[0]); let shift_offset = get_scalar_value_from_args(args, 1)? - .map(get_signed_integer) + .map(|scalar| get_signed_integer(&scalar)) .map_or(Ok(None), |v| v.map(Some))?; let default_value = get_casted_value(get_scalar_value_from_args(args, 2)?, out_data_type)?; @@ -267,7 +267,7 @@ fn create_built_in_window_expr( BuiltInWindowFunction::Lead => { let arg = Arc::clone(&args[0]); let shift_offset = get_scalar_value_from_args(args, 1)? - .map(get_signed_integer) + .map(|scalar| get_signed_integer(&scalar)) .map_or(Ok(None), |v| v.map(Some))?; let default_value = get_casted_value(get_scalar_value_from_args(args, 2)?, out_data_type)?; @@ -289,8 +289,7 @@ fn create_built_in_window_expr( .ok_or_else(|| { exec_datafusion_err!("Expected a signed integer literal for the second argument of nth_value, got {}", args[1]) })? - .value() - .clone(), + .value(), )?; Arc::new(NthValue::nth( name, diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs index afe9700ed08cf..efbc58698e6d3 100644 --- a/datafusion/physical-plan/src/windows/window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs @@ -78,7 +78,7 @@ impl WindowAggExec { let ordered_partition_by_indices = get_ordered_partition_by_indices(window_expr[0].partition_by(), &input); - let cache = Self::compute_properties(Arc::clone(&schema), &input, &window_expr); + let cache = Self::compute_properties(&schema, &input, &window_expr); Ok(Self { input, window_expr, @@ -116,12 +116,12 @@ impl WindowAggExec { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( - schema: SchemaRef, + schema: &SchemaRef, input: &Arc, window_expr: &[Arc], ) -> PlanProperties { // Calculate equivalence properties: - let eq_properties = window_equivalence_properties(&schema, input, window_expr); + let eq_properties = window_equivalence_properties(schema, input, window_expr); // Get output partitioning: // Because we can have repartitioning using the partition keys this diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs index 74b6073a415ef..a76dabb9f19b6 100644 --- a/datafusion/proto/src/physical_plan/mod.rs +++ b/datafusion/proto/src/physical_plan/mod.rs @@ -1471,9 +1471,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let agg = exec .aggr_expr() .iter() - .map(|expr| { - serialize_physical_aggr_expr(expr.to_owned(), extension_codec) - }) + .map(|expr| serialize_physical_aggr_expr(expr, extension_codec)) .collect::>>()?; let agg_names = exec diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs index 25be7de61cc3f..bb92d102a6e80 100644 --- a/datafusion/proto/src/physical_plan/to_proto.rs +++ b/datafusion/proto/src/physical_plan/to_proto.rs @@ -49,7 +49,7 @@ use crate::protobuf::{ use super::PhysicalExtensionCodec; pub fn serialize_physical_aggr_expr( - aggr_expr: Arc, + aggr_expr: &Arc, codec: &dyn PhysicalExtensionCodec, ) -> Result { let expressions = serialize_physical_exprs(&aggr_expr.expressions(), codec)?; diff --git a/datafusion/proto/tests/cases/mod.rs b/datafusion/proto/tests/cases/mod.rs index fbb2cd8f1e832..d0752cffbb2c7 100644 --- a/datafusion/proto/tests/cases/mod.rs +++ b/datafusion/proto/tests/cases/mod.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in tests + use std::any::Any; use arrow::datatypes::DataType; diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index dd3b99b0768b2..cf1545abedbbb 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -1379,22 +1379,22 @@ fn round_trip_scalar_values() { ScalarStructBuilder::new() .with_scalar( Field::new("a", DataType::Int32, true), - ScalarValue::from(23i32), + &ScalarValue::from(23i32), ) .with_scalar( Field::new("b", DataType::Boolean, false), - ScalarValue::from(false), + &ScalarValue::from(false), ) .build() .unwrap(), ScalarStructBuilder::new() .with_scalar( Field::new("a", DataType::Int32, true), - ScalarValue::from(23i32), + &ScalarValue::from(23i32), ) .with_scalar( Field::new("b", DataType::Boolean, false), - ScalarValue::from(false), + &ScalarValue::from(false), ) .with_scalar( Field::new( @@ -1405,7 +1405,7 @@ fn round_trip_scalar_values() { ), false, ), - ScalarValue::Dictionary( + &ScalarValue::Dictionary( Box::new(DataType::UInt16), Box::new("value".into()), ), diff --git a/datafusion/sql/src/cte.rs b/datafusion/sql/src/cte.rs index 4c380f0b37a31..342dd01e1974a 100644 --- a/datafusion/sql/src/cte.rs +++ b/datafusion/sql/src/cte.rs @@ -47,7 +47,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // Create a logical plan for the CTE let cte_plan = if is_recursive { - self.recursive_cte(cte_name.clone(), *cte.query, planner_context)? + self.recursive_cte(&cte_name, *cte.query, planner_context)? } else { self.non_recursive_cte(*cte.query, planner_context)? }; @@ -71,7 +71,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { fn recursive_cte( &self, - cte_name: String, + cte_name: &str, mut cte_query: Query, planner_context: &mut PlannerContext, ) -> Result { @@ -136,7 +136,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // ---------- Step 2: Create a temporary relation ------------------ // Step 2.1: Create a table source for the temporary relation let work_table_source = self.context_provider.create_cte_work_table( - &cte_name, + cte_name, Arc::new(Schema::from(static_plan.schema().as_ref())), )?; @@ -149,14 +149,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { )? .build()?; - let name = cte_name.clone(); + let name = cte_name.to_string(); // Step 2.3: Register the temporary relation in the planning context // For all the self references in the variadic term, we'll replace it // with the temporary relation we created above by temporarily registering // it as a CTE. This temporary relation in the planning context will be // replaced by the actual CTE plan once we're done with the planning. - planner_context.insert_cte(cte_name.clone(), work_table_plan); + planner_context.insert_cte(cte_name.to_string(), work_table_plan); // ---------- Step 3: Compile the recursive term ------------------ // this uses the named_relation we inserted above to resolve the @@ -168,7 +168,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // if not, it is a non-recursive CTE if !has_work_table_reference(&recursive_plan, &work_table_source) { // Remove the work table plan from the context - planner_context.remove_cte(&cte_name); + planner_context.remove_cte(cte_name); // Compile it as a non-recursive CTE return self.set_operation_to_plan( SetOperator::Union, diff --git a/datafusion/sql/src/expr/binary_op.rs b/datafusion/sql/src/expr/binary_op.rs index fcb57e8a82e4b..d421cafdfb1cc 100644 --- a/datafusion/sql/src/expr/binary_op.rs +++ b/datafusion/sql/src/expr/binary_op.rs @@ -21,7 +21,7 @@ use datafusion_expr::Operator; use sqlparser::ast::BinaryOperator; impl<'a, S: ContextProvider> SqlToRel<'a, S> { - pub(crate) fn parse_sql_binary_op(&self, op: BinaryOperator) -> Result { + pub(crate) fn parse_sql_binary_op(&self, op: &BinaryOperator) -> Result { match op { BinaryOperator::Gt => Ok(Operator::Gt), BinaryOperator::GtEq => Ok(Operator::GtEq), diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 035fd3816c6cc..96842236f7ef9 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -125,7 +125,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let RawBinaryExpr { op, left, right } = binary_expr; Ok(Expr::BinaryExpr(BinaryExpr::new( Box::new(left), - self.parse_sql_binary_op(op)?, + self.parse_sql_binary_op(&op)?, Box::new(right), ))) } @@ -600,7 +600,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } SQLExpr::Struct { values, fields } => { - self.parse_struct(schema, planner_context, values, fields) + self.parse_struct(schema, planner_context, values, &fields) } SQLExpr::Position { expr, r#in } => { self.sql_position_to_expr(*expr, *r#in, schema, planner_context) @@ -680,7 +680,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { schema: &DFSchema, planner_context: &mut PlannerContext, values: Vec, - fields: Vec, + fields: &[StructField], ) -> Result { if !fields.is_empty() { return not_impl_err!("Struct fields are not supported yet"); @@ -712,7 +712,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ) -> Result { match values.first() { Some(SQLExpr::Identifier(_)) | Some(SQLExpr::Value(_)) => { - self.parse_struct(schema, planner_context, values, vec![]) + self.parse_struct(schema, planner_context, values, &[]) } None => not_impl_err!("Empty tuple not supported yet"), _ => { diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 2df8d89c59bc8..46333e66d128c 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -313,7 +313,7 @@ impl<'a> DFParser<'a> { break; } if expecting_statement_delimiter { - return parser.expected("end of statement", parser.parser.peek_token()); + return parser.expected("end of statement", &parser.parser.peek_token()); } let statement = parser.parse_statement()?; @@ -335,7 +335,7 @@ impl<'a> DFParser<'a> { fn expected( &self, expected: &str, - found: TokenWithLocation, + found: &TokenWithLocation, ) -> Result { parser_err!(format!("Expected {expected}, found: {found}")) } @@ -562,7 +562,7 @@ impl<'a> DFParser<'a> { let identifier = self.parser.parse_identifier(false)?; partitions.push(identifier.to_string()); } else { - return self.expected("partition name", self.parser.peek_token()); + return self.expected("partition name", &self.parser.peek_token()); } let comma = self.parser.consume_token(&Token::Comma); if self.parser.consume_token(&Token::RParen) { @@ -571,7 +571,7 @@ impl<'a> DFParser<'a> { } else if !comma { return self.expected( "',' or ')' after partition definition", - self.parser.peek_token(), + &self.parser.peek_token(), ); } } @@ -643,7 +643,7 @@ impl<'a> DFParser<'a> { } else { return self.expected( "column name or constraint definition", - self.parser.peek_token(), + &self.parser.peek_token(), ); } let comma = self.parser.consume_token(&Token::Comma); @@ -653,7 +653,7 @@ impl<'a> DFParser<'a> { } else if !comma { return self.expected( "',' or ')' after column definition", - self.parser.peek_token(), + &self.parser.peek_token(), ); } } @@ -678,7 +678,7 @@ impl<'a> DFParser<'a> { } else { return self.expected( "constraint details after CONSTRAINT ", - self.parser.peek_token(), + &self.parser.peek_token(), ); } } else if let Some(option) = self.parser.parse_optional_column_option()? { @@ -832,7 +832,7 @@ impl<'a> DFParser<'a> { let token = self.parser.next_token(); match &token.token { Token::Word(w) => parse_file_type(&w.value), - _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", token), + _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", &token), } } @@ -855,7 +855,7 @@ impl<'a> DFParser<'a> { } else if !comma { return self.expected( "',' or ')' after option definition", - self.parser.peek_token(), + &self.parser.peek_token(), ); } } @@ -865,6 +865,7 @@ impl<'a> DFParser<'a> { #[cfg(test)] mod tests { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use sqlparser::ast::Expr::Identifier; use sqlparser::ast::{BinaryOperator, DataType, Expr, Ident}; diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 5cbe1d7c014ad..840da39dc27bb 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -104,8 +104,8 @@ impl ValueNormalizer { Self { normalize } } - pub fn normalize(&self, value: Value) -> Option { - match (value_to_string(&value), self.normalize) { + pub fn normalize(&self, value: &Value) -> Option { + match (value_to_string(value), self.normalize) { (Some(s), true) => Some(s.to_ascii_lowercase()), (Some(s), false) => Some(s), (None, _) => None, diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 3dfc379b039a8..3be8645c61611 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -219,7 +219,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { hivevar, variables, value, - } => self.set_variable_to_plan(local, hivevar, &variables, value), + } => self.set_variable_to_plan(local, hivevar, &variables, &value), Statement::CreateTable(CreateTable { query, @@ -460,14 +460,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { full, db_name, filter, - } => self.show_tables_to_plan(extended, full, db_name, filter), + } => self.show_tables_to_plan(extended, full, &db_name, &filter), Statement::ShowColumns { extended, full, table_name, filter, - } => self.show_columns_to_plan(extended, full, table_name, filter), + } => self.show_columns_to_plan(extended, full, table_name, &filter), Statement::Insert(Insert { or, @@ -540,7 +540,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { if returning.is_some() { plan_err!("Update-returning clause not yet supported")?; } - self.update_to_plan(table, assignments, from, selection) + self.update_to_plan(table, &assignments, from, selection) } Statement::Delete(Delete { @@ -842,8 +842,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { &self, extended: bool, full: bool, - db_name: Option, - filter: Option, + db_name: &Option, + filter: &Option, ) -> Result { if self.has_table("information_schema", "tables") { // we only support the basic "SHOW TABLES" @@ -1068,8 +1068,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { return plan_err!("Option {key} is specified multiple times"); } - let Some(value_string) = self.value_normalizer.normalize(value.clone()) - else { + let Some(value_string) = self.value_normalizer.normalize(&value) else { return plan_err!("Unsupported Value {}", value); }; @@ -1182,7 +1181,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { local: bool, hivevar: bool, variables: &OneOrManyWithParens, - value: Vec, + value: &[SQLExpr], ) -> Result { if local { return not_impl_err!("LOCAL is not supported"); @@ -1244,16 +1243,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { predicate_expr: Option, ) -> Result { // Do a table lookup to verify the table exists - let table_ref = self.object_name_to_table_reference(table_name.clone())?; + let table_name_string = object_name_to_string(&table_name); + let table_ref = self.object_name_to_table_reference(table_name)?; let table_source = self.context_provider.get_table_source(table_ref.clone())?; let schema = (*table_source.schema()).clone(); let schema = DFSchema::try_from(schema)?; - let scan = LogicalPlanBuilder::scan( - object_name_to_string(&table_name), - table_source, - None, - )? - .build()?; + let scan = + LogicalPlanBuilder::scan(table_name_string, table_source, None)?.build()?; let mut planner_context = PlannerContext::new(); let source = match predicate_expr { @@ -1285,7 +1281,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { fn update_to_plan( &self, table: TableWithJoins, - assignments: Vec, + assignments: &[Assignment], from: Option, predicate_expr: Option, ) -> Result { @@ -1523,7 +1519,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { extended: bool, full: bool, sql_table_name: ObjectName, - filter: Option, + filter: &Option, ) -> Result { if filter.is_some() { return plan_err!("SHOW COLUMNS with WHERE or LIKE is not supported"); diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 478cfd0ecd907..42324303b9b4a 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -320,7 +320,7 @@ impl Unparser<'_> { return self.derive(plan, relation); } if let Some(query_ref) = query { - query_ref.order_by(self.sorts_to_sql(sort.expr.clone())?); + query_ref.order_by(self.sorts_to_sql(&sort.expr)?); } else { return internal_err!( "Sort operator only valid in a statement context." @@ -363,7 +363,7 @@ impl Unparser<'_> { .collect::>>()?; if let Some(sort_expr) = &on.sort_expr { if let Some(query_ref) = query { - query_ref.order_by(self.sorts_to_sql(sort_expr.clone())?); + query_ref.order_by(self.sorts_to_sql(sort_expr)?); } else { return internal_err!( "Sort operator only valid in a statement context." @@ -550,7 +550,7 @@ impl Unparser<'_> { } } - fn sorts_to_sql(&self, sort_exprs: Vec) -> Result> { + fn sorts_to_sql(&self, sort_exprs: &[SortExpr]) -> Result> { sort_exprs .iter() .map(|sort_expr| self.sort_to_sql(sort_expr)) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 5a203703e9675..213257d2e2a71 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in tests + use std::any::Any; #[cfg(test)] use std::collections::HashMap; diff --git a/datafusion/sqllogictest/src/engines/mod.rs b/datafusion/sqllogictest/src/engines/mod.rs index a6a0886332ed7..7fa2c93f0c2f7 100644 --- a/datafusion/sqllogictest/src/engines/mod.rs +++ b/datafusion/sqllogictest/src/engines/mod.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![allow(clippy::needless_pass_by_value)] // OK in tests + /// Implementation of sqllogictest for datafusion. mod conversion; mod datafusion_engine; diff --git a/datafusion/substrait/src/extensions.rs b/datafusion/substrait/src/extensions.rs index 459d0e0c5ae58..a4e9ed53bf8f0 100644 --- a/datafusion/substrait/src/extensions.rs +++ b/datafusion/substrait/src/extensions.rs @@ -39,7 +39,7 @@ impl Extensions { /// Registers a function and returns the anchor (reference) to it. If the function has already /// been registered, it returns the existing anchor. /// Function names are case-insensitive (converted to lowercase). - pub fn register_function(&mut self, function_name: String) -> u32 { + pub fn register_function(&mut self, function_name: &str) -> u32 { let function_name = function_name.to_lowercase(); // Some functions are named differently in Substrait default extensions than in DF @@ -63,7 +63,7 @@ impl Extensions { /// Registers a type and returns the anchor (reference) to it. If the type has already /// been registered, it returns the existing anchor. - pub fn register_type(&mut self, type_name: String) -> u32 { + pub fn register_type(&mut self, type_name: &str) -> u32 { let type_name = type_name.to_lowercase(); match self.types.iter().find(|(_, t)| *t == &type_name) { Some((type_anchor, _)) => *type_anchor, // Type has been registered diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs index 01a854ffbdf24..e2319e173ce29 100644 --- a/datafusion/substrait/src/logical_plan/consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer.rs @@ -1874,10 +1874,13 @@ fn from_substrait_literal( &mut entry_name_idx, )?; ScalarStructBuilder::new() - .with_scalar(Field::new("key", key_sv.data_type(), false), key_sv) + .with_scalar( + Field::new("key", key_sv.data_type(), false), + &key_sv, + ) .with_scalar( Field::new("value", value_sv.data_type(), true), - value_sv, + &value_sv, ) .build() }) @@ -1937,7 +1940,8 @@ fn from_substrait_literal( let sv = from_substrait_literal(field, extensions, dfs_names, name_idx)?; // We assume everything to be nullable, since Arrow's strict about things matching // and it's hard to match otherwise. - builder = builder.with_scalar(Field::new(name, sv.data_type(), true), sv); + builder = + builder.with_scalar(Field::new(name, sv.data_type(), true), &sv); } builder.build()? } diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs index f323ae146600f..ca3c2e8256e57 100644 --- a/datafusion/substrait/src/logical_plan/producer.rs +++ b/datafusion/substrait/src/logical_plan/producer.rs @@ -771,7 +771,7 @@ pub fn to_substrait_agg_measure( for arg in args { arguments.push(FunctionArgument { arg_type: Some(ArgType::Value(to_substrait_rex(ctx, arg, schema, 0, extensions)?)) }); } - let function_anchor = extensions.register_function(func.name().to_string()); + let function_anchor = extensions.register_function(func.name()); Ok(Measure { measure: Some(AggregateFunction { function_reference: function_anchor, @@ -831,7 +831,7 @@ pub fn make_binary_op_scalar_func( op: Operator, extensions: &mut Extensions, ) -> Expression { - let function_anchor = extensions.register_function(operator_to_name(op).to_string()); + let function_anchor = extensions.register_function(operator_to_name(op)); Expression { rex_type: Some(RexType::ScalarFunction(ScalarFunction { function_reference: function_anchor, @@ -902,7 +902,7 @@ pub fn to_substrait_rex( }; if *negated { - let function_anchor = extensions.register_function("not".to_string()); + let function_anchor = extensions.register_function("not"); Ok(Expression { rex_type: Some(RexType::ScalarFunction(ScalarFunction { @@ -933,7 +933,7 @@ pub fn to_substrait_rex( }); } - let function_anchor = extensions.register_function(fun.name().to_string()); + let function_anchor = extensions.register_function(fun.name()); Ok(Expression { rex_type: Some(RexType::ScalarFunction(ScalarFunction { function_reference: function_anchor, @@ -1104,7 +1104,7 @@ pub fn to_substrait_rex( null_treatment: _, }) => { // function reference - let function_anchor = extensions.register_function(fun.to_string()); + let function_anchor = extensions.register_function(&fun.to_string()); // arguments let mut arguments: Vec = vec![]; for arg in args { @@ -1181,7 +1181,7 @@ pub fn to_substrait_rex( }))), }; if *negated { - let function_anchor = extensions.register_function("not".to_string()); + let function_anchor = extensions.register_function("not"); Ok(Expression { rex_type: Some(RexType::ScalarFunction(ScalarFunction { @@ -1420,9 +1420,8 @@ fn to_substrait_type( // Substrait doesn't currently support this type, so we represent it as a UDT Ok(substrait::proto::Type { kind: Some(r#type::Kind::UserDefined(r#type::UserDefined { - type_reference: extensions.register_type( - INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string(), - ), + type_reference: extensions + .register_type(INTERVAL_MONTH_DAY_NANO_TYPE_NAME), type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability, type_parameters: vec![], @@ -1595,9 +1594,9 @@ fn make_substrait_like_expr( extensions: &mut Extensions, ) -> Result { let function_anchor = if ignore_case { - extensions.register_function("ilike".to_string()) + extensions.register_function("ilike") } else { - extensions.register_function("like".to_string()) + extensions.register_function("like") }; let expr = to_substrait_rex(ctx, expr, schema, col_ref_offset, extensions)?; let pattern = to_substrait_rex(ctx, pattern, schema, col_ref_offset, extensions)?; @@ -1628,7 +1627,7 @@ fn make_substrait_like_expr( }; if negated { - let function_anchor = extensions.register_function("not".to_string()); + let function_anchor = extensions.register_function("not"); Ok(Expression { rex_type: Some(RexType::ScalarFunction(ScalarFunction { @@ -1887,7 +1886,7 @@ fn to_substrait_literal( ( LiteralType::UserDefined(UserDefined { type_reference: extensions - .register_type(INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()), + .register_type(INTERVAL_MONTH_DAY_NANO_TYPE_NAME), type_parameters: vec![], val: Some(user_defined::Val::Value(ProtoAny { type_url: INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string(), @@ -2072,7 +2071,7 @@ fn to_substrait_unary_scalar_fn( col_ref_offset: usize, extensions: &mut Extensions, ) -> Result { - let function_anchor = extensions.register_function(fn_name.to_string()); + let function_anchor = extensions.register_function(fn_name); let substrait_expr = to_substrait_rex(ctx, arg, schema, col_ref_offset, extensions)?; Ok(Expression { @@ -2155,6 +2154,7 @@ fn substrait_field_ref(index: usize) -> Result { #[cfg(test)] mod test { + #![allow(clippy::needless_pass_by_value)] // OK in tests use super::*; use crate::logical_plan::consumer::{ from_substrait_literal_without_names, from_substrait_type_without_names, @@ -2267,9 +2267,9 @@ mod test { let c2 = Field::new("c2", DataType::Utf8, true); round_trip_literal( ScalarStructBuilder::new() - .with_scalar(c0.to_owned(), ScalarValue::Boolean(Some(true))) - .with_scalar(c1.to_owned(), ScalarValue::Int32(Some(1))) - .with_scalar(c2.to_owned(), ScalarValue::Utf8(None)) + .with_scalar(c0.to_owned(), &ScalarValue::Boolean(Some(true))) + .with_scalar(c1.to_owned(), &ScalarValue::Int32(Some(1))) + .with_scalar(c2.to_owned(), &ScalarValue::Utf8(None)) .build()?, )?; round_trip_literal(ScalarStructBuilder::new_null(vec![c0, c1, c2]))?;