Enable needless_pass_by_value clippy lint

apache · Aug 30, 2024 · 978f0b5 · 978f0b5
1 parent 5e146e1
commit 978f0b5
Show file tree

Hide file tree

Showing 167 changed files with 687 additions and 604 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -158,8 +158,12 @@ panic = 'unwind'
 rpath = false
 
 [workspace.lints.clippy]
+# Note: we run clippy with `-D warnings`, warnings are not allowed.
 # Detects large stack-allocated futures that may cause stack overflow crashes (see threshold in clippy.toml)
 large_futures = "warn"
+# Detects functions that do not need to move their parameters and thus potentially require clones.
+# Performance-related, OK to suppress in test code.
+needless_pass_by_value = "warn"
 
 [workspace.lints.rust]
 unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] }

diff --git a/datafusion-examples/examples/custom_datasource.rs b/datafusion-examples/examples/custom_datasource.rs
@@ -121,7 +121,11 @@ impl CustomDataSource {
         projections: Option<&Vec<usize>>,
         schema: SchemaRef,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(CustomExec::new(projections, schema, self.clone())))
+        Ok(Arc::new(CustomExec::new(
+            projections,
+            &schema,
+            self.clone(),
+        )))
     }
 
     pub(crate) fn populate_users(&self) {
@@ -196,10 +200,10 @@ struct CustomExec {
 impl CustomExec {
     fn new(
         projections: Option<&Vec<usize>>,
-        schema: SchemaRef,
+        schema: &SchemaRef,
         db: CustomDataSource,
     ) -> Self {
-        let projected_schema = project_schema(&schema, projections).unwrap();
+        let projected_schema = project_schema(schema, projections).unwrap();
         let cache = Self::compute_properties(projected_schema.clone());
         Self {
             db,

diff --git a/datafusion-examples/examples/flight/flight_server.rs b/datafusion-examples/examples/flight/flight_server.rs
@@ -186,6 +186,7 @@ impl FlightService for FlightServiceImpl {
     }
 }
 
+#[allow(clippy::needless_pass_by_value)] // moved value better aligns with .map_err() usages
 fn to_tonic_err(e: datafusion::error::DataFusionError) -> Status {
     Status::internal(format!("{e:?}"))
 }

diff --git a/datafusion-examples/examples/pruning.rs b/datafusion-examples/examples/pruning.rs
@@ -64,7 +64,7 @@ async fn main() {
     //
     // Note the predicate does not automatically coerce types or simplify
     // expressions. See expr_api.rs examples for how to do this if required
-    let predicate = create_pruning_predicate(expr, &my_catalog.schema);
+    let predicate = create_pruning_predicate(&expr, &my_catalog.schema);
 
     // Evaluate the predicate for the three files in the catalog
     let prune_results = predicate.prune(&my_catalog).unwrap();
@@ -184,10 +184,10 @@ impl PruningStatistics for MyCatalog {
     }
 }
 
-fn create_pruning_predicate(expr: Expr, schema: &SchemaRef) -> PruningPredicate {
+fn create_pruning_predicate(expr: &Expr, schema: &SchemaRef) -> PruningPredicate {
     let df_schema = DFSchema::try_from(schema.as_ref().clone()).unwrap();
     let props = ExecutionProps::new();
-    let physical_expr = create_physical_expr(&expr, &df_schema, &props).unwrap();
+    let physical_expr = create_physical_expr(expr, &df_schema, &props).unwrap();
     PruningPredicate::try_new(physical_expr, schema.clone()).unwrap()
 }
 

diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
@@ -309,7 +309,7 @@ mod tests {
     use arrow_schema::SchemaBuilder;
     use std::sync::Arc;
 
-    fn create_qualified_schema(qualifier: &str, names: Vec<&str>) -> Result<DFSchema> {
+    fn create_qualified_schema(qualifier: &str, names: &[&str]) -> Result<DFSchema> {
         let mut schema_builder = SchemaBuilder::new();
         schema_builder.extend(
             names
@@ -322,9 +322,9 @@ mod tests {
 
     #[test]
     fn test_normalize_with_schemas_and_ambiguity_check() -> Result<()> {
-        let schema1 = create_qualified_schema("t1", vec!["a", "b"])?;
-        let schema2 = create_qualified_schema("t2", vec!["c", "d"])?;
-        let schema3 = create_qualified_schema("t3", vec!["a", "b", "c", "d", "e"])?;
+        let schema1 = create_qualified_schema("t1", &["a", "b"])?;
+        let schema2 = create_qualified_schema("t2", &["c", "d"])?;
+        let schema3 = create_qualified_schema("t3", &["a", "b", "c", "d", "e"])?;
 
         // already normalized
         let col = Column::new(Some("t1"), "a");

diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
@@ -812,6 +812,7 @@ mod test {
         Err(ArrowError::SchemaError("bar".to_string()).into())
     }
 
+    #[allow(clippy::needless_pass_by_value)] // OK in tests
     fn do_root_test(e: DataFusionError, exp: DataFusionError) {
         let e = e.find_root();
 

diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
@@ -447,22 +447,22 @@ mod tests {
 
     fn extract_column_options(
         props: &WriterProperties,
-        col: ColumnPath,
+        col: &ColumnPath,
     ) -> ParquetColumnOptions {
-        let bloom_filter_default_props = props.bloom_filter_properties(&col);
+        let bloom_filter_default_props = props.bloom_filter_properties(col);
 
         ParquetColumnOptions {
             bloom_filter_enabled: Some(bloom_filter_default_props.is_some()),
-            encoding: props.encoding(&col).map(|s| s.to_string()),
-            dictionary_enabled: Some(props.dictionary_enabled(&col)),
-            compression: match props.compression(&col) {
+            encoding: props.encoding(col).map(|s| s.to_string()),
+            dictionary_enabled: Some(props.dictionary_enabled(col)),
+            compression: match props.compression(col) {
                 Compression::ZSTD(lvl) => {
                     Some(format!("zstd({})", lvl.compression_level()))
                 }
                 _ => None,
             },
             statistics_enabled: Some(
-                match props.statistics_enabled(&col) {
+                match props.statistics_enabled(col) {
                     EnabledStatistics::None => "none",
                     EnabledStatistics::Chunk => "chunk",
                     EnabledStatistics::Page => "page",
@@ -471,18 +471,18 @@ mod tests {
             ),
             bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp),
             bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv),
-            max_statistics_size: Some(props.max_statistics_size(&col)),
+            max_statistics_size: Some(props.max_statistics_size(col)),
         }
     }
 
     /// For testing only, take a single write's props and convert back into the session config.
     /// (use identity to confirm correct.)
     fn session_config_from_writer_props(props: &WriterProperties) -> TableParquetOptions {
         let default_col = ColumnPath::from("col doesn't have specific config");
-        let default_col_props = extract_column_options(props, default_col);
+        let default_col_props = extract_column_options(props, &default_col);
 
         let configured_col = ColumnPath::from(COL_NAME);
-        let configured_col_props = extract_column_options(props, configured_col);
+        let configured_col_props = extract_column_options(props, &configured_col);
 
         let key_value_metadata = props
             .key_value_metadata()

diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs
@@ -143,7 +143,7 @@ fn hash_array_primitive<T>(
 /// with the new hash using `combine_hashes`
 #[cfg(not(feature = "force_hash_collisions"))]
 fn hash_array<T>(
-    array: T,
+    array: &T,
     random_state: &RandomState,
     hashes_buffer: &mut [u64],
     rehash: bool,
@@ -380,16 +380,16 @@ pub fn create_hashes<'a>(
         downcast_primitive_array! {
             array => hash_array_primitive(array, random_state, hashes_buffer, rehash),
             DataType::Null => hash_null(random_state, hashes_buffer, rehash),
-            DataType::Boolean => hash_array(as_boolean_array(array)?, random_state, hashes_buffer, rehash),
-            DataType::Utf8 => hash_array(as_string_array(array)?, random_state, hashes_buffer, rehash),
-            DataType::Utf8View => hash_array(as_string_view_array(array)?, random_state, hashes_buffer, rehash),
-            DataType::LargeUtf8 => hash_array(as_largestring_array(array), random_state, hashes_buffer, rehash),
-            DataType::Binary => hash_array(as_generic_binary_array::<i32>(array)?, random_state, hashes_buffer, rehash),
-            DataType::BinaryView => hash_array(as_binary_view_array(array)?, random_state, hashes_buffer, rehash),
-            DataType::LargeBinary => hash_array(as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, rehash),
+            DataType::Boolean => hash_array(&as_boolean_array(array)?, random_state, hashes_buffer, rehash),
+            DataType::Utf8 => hash_array(&as_string_array(array)?, random_state, hashes_buffer, rehash),
+            DataType::Utf8View => hash_array(&as_string_view_array(array)?, random_state, hashes_buffer, rehash),
+            DataType::LargeUtf8 => hash_array(&as_largestring_array(array), random_state, hashes_buffer, rehash),
+            DataType::Binary => hash_array(&as_generic_binary_array::<i32>(array)?, random_state, hashes_buffer, rehash),
+            DataType::BinaryView => hash_array(&as_binary_view_array(array)?, random_state, hashes_buffer, rehash),
+            DataType::LargeBinary => hash_array(&as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, rehash),
             DataType::FixedSizeBinary(_) => {
                 let array: &FixedSizeBinaryArray = array.as_any().downcast_ref().unwrap();
-                hash_array(array, random_state, hashes_buffer, rehash)
+                hash_array(&array, random_state, hashes_buffer, rehash)
             }
             DataType::Decimal128(_, _) => {
                 let array = as_primitive_array::<Decimal128Type>(array)?;

diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
@@ -721,19 +721,19 @@ impl std::hash::Hash for ScalarValue {
                 v.hash(state)
             }
             List(arr) => {
-                hash_nested_array(arr.to_owned() as ArrayRef, state);
+                hash_nested_array(&(arr.to_owned() as ArrayRef), state);
             }
             LargeList(arr) => {
-                hash_nested_array(arr.to_owned() as ArrayRef, state);
+                hash_nested_array(&(arr.to_owned() as ArrayRef), state);
             }
             FixedSizeList(arr) => {
-                hash_nested_array(arr.to_owned() as ArrayRef, state);
+                hash_nested_array(&(arr.to_owned() as ArrayRef), state);
             }
             Struct(arr) => {
-                hash_nested_array(arr.to_owned() as ArrayRef, state);
+                hash_nested_array(&(arr.to_owned() as ArrayRef), state);
             }
             Map(arr) => {
-                hash_nested_array(arr.to_owned() as ArrayRef, state);
+                hash_nested_array(&(arr.to_owned() as ArrayRef), state);
             }
             Date32(v) => v.hash(state),
             Date64(v) => v.hash(state),
@@ -767,7 +767,7 @@ impl std::hash::Hash for ScalarValue {
     }
 }
 
-fn hash_nested_array<H: std::hash::Hasher>(arr: ArrayRef, state: &mut H) {
+fn hash_nested_array<H: std::hash::Hasher>(arr: &ArrayRef, state: &mut H) {
     let arrays = vec![arr.to_owned()];
     let hashes_buffer = &mut vec![0; arr.len()];
     let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0);
@@ -3220,7 +3220,7 @@ impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
         value
             .into_iter()
             .fold(ScalarStructBuilder::new(), |builder, (name, value)| {
-                builder.with_name_and_scalar(name, value)
+                builder.with_name_and_scalar(name, &value)
             })
             .build()
             .unwrap()
@@ -3527,9 +3527,11 @@ impl fmt::Display for ScalarValue {
                 )?,
                 None => write!(f, "NULL")?,
             },
-            ScalarValue::List(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
-            ScalarValue::LargeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
-            ScalarValue::FixedSizeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
+            ScalarValue::List(arr) => fmt_list(&(arr.to_owned() as ArrayRef), f)?,
+            ScalarValue::LargeList(arr) => fmt_list(&(arr.to_owned() as ArrayRef), f)?,
+            ScalarValue::FixedSizeList(arr) => {
+                fmt_list(&(arr.to_owned() as ArrayRef), f)?
+            }
             ScalarValue::Date32(e) => {
                 format_option!(f, e.map(|v| Date32Type::to_naive_date(v).to_string()))?
             }
@@ -3636,7 +3638,7 @@ impl fmt::Display for ScalarValue {
     }
 }
 
-fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result {
+fn fmt_list(arr: &ArrayRef, f: &mut fmt::Formatter) -> fmt::Result {
     // ScalarValue List, LargeList, FixedSizeList should always have a single element
     assert_eq!(arr.len(), 1);
     let options = FormatOptions::default().with_display_error(true);
@@ -4394,6 +4396,7 @@ mod tests {
     }
 
     // Verifies that ScalarValue has the same behavior with compute kernal when it overflows.
+    #[allow(clippy::needless_pass_by_value)] // OK in tests
     fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
     where
         T: ArrowNumericType,
@@ -5822,6 +5825,7 @@ mod tests {
     }
 
     // mimics how casting work on scalar values by `casting` `scalar` to `desired_type`
+    #[allow(clippy::needless_pass_by_value)] // OK in tests
     fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) {
         // convert from scalar --> Array to call cast
         let scalar_array = scalar.to_array().expect("Failed to convert to array");
@@ -6426,8 +6430,8 @@ mod tests {
         let field_b = Field::new("b", DataType::Utf8, true);
 
         let s = ScalarStructBuilder::new()
-            .with_scalar(field_a, ScalarValue::from(1i32))
-            .with_scalar(field_b, ScalarValue::Utf8(None))
+            .with_scalar(field_a, &ScalarValue::from(1i32))
+            .with_scalar(field_b, &ScalarValue::Utf8(None))
             .build()
             .unwrap();
 

diff --git a/datafusion/common/src/scalar/struct_builder.rs b/datafusion/common/src/scalar/struct_builder.rs
@@ -87,15 +87,15 @@ impl ScalarStructBuilder {
     }
 
     /// Add the specified field and `ScalarValue` to the struct.
-    pub fn with_scalar(self, field: impl IntoFieldRef, value: ScalarValue) -> Self {
+    pub fn with_scalar(self, field: impl IntoFieldRef, value: &ScalarValue) -> Self {
         // valid scalar value should not fail
         let array = value.to_array().unwrap();
         self.with_array(field, array)
     }
 
     /// Add a field with the specified name and value to the struct.
     /// the field is created with the specified data type and as non nullable
-    pub fn with_name_and_scalar(self, name: &str, value: ScalarValue) -> Self {
+    pub fn with_name_and_scalar(self, name: &str, value: &ScalarValue) -> Self {
         let field = Field::new(name, value.data_type(), false);
         self.with_scalar(field, value)
     }

diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs
@@ -263,7 +263,7 @@ impl Statistics {
     /// parameter to compute global statistics in a multi-partition setting.
     pub fn with_fetch(
         mut self,
-        schema: SchemaRef,
+        schema: &SchemaRef,
         fetch: Option<usize>,
         skip: usize,
         n_partitions: usize,
@@ -317,7 +317,7 @@ impl Statistics {
                 ..
             } => check_num_rows(fetch.and_then(|v| v.checked_mul(n_partitions)), false),
         };
-        self.column_statistics = Statistics::unknown_column(&schema);
+        self.column_statistics = Statistics::unknown_column(schema);
         self.total_byte_size = Precision::Absent;
         Ok(self)
     }

diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
@@ -444,7 +444,7 @@ pub fn arrays_into_list_array(
 }
 
 /// Helper function to convert a ListArray into a vector of ArrayRefs.
-pub fn list_to_arrays<O: OffsetSizeTrait>(a: ArrayRef) -> Vec<ArrayRef> {
+pub fn list_to_arrays<O: OffsetSizeTrait>(a: &ArrayRef) -> Vec<ArrayRef> {
     a.as_list::<O>().iter().flatten().collect::<Vec<_>>()
 }
 

diff --git a/datafusion/core/benches/aggregate_query_sql.rs b/datafusion/core/benches/aggregate_query_sql.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions
+
 #[macro_use]
 extern crate criterion;
 extern crate arrow;

diff --git a/datafusion/core/benches/data_utils/mod.rs b/datafusion/core/benches/data_utils/mod.rs
@@ -17,6 +17,8 @@
 
 //! This module provides the in-memory table for more realistic benchmarking.
 
+#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions
+
 use arrow::{
     array::Float32Array,
     array::Float64Array,

diff --git a/datafusion/core/benches/distinct_query_sql.rs b/datafusion/core/benches/distinct_query_sql.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions
+
 #[macro_use]
 extern crate criterion;
 extern crate arrow;

diff --git a/datafusion/core/benches/math_query_sql.rs b/datafusion/core/benches/math_query_sql.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions
+
 #[macro_use]
 extern crate criterion;
 use criterion::Criterion;

diff --git a/datafusion/core/benches/physical_plan.rs b/datafusion/core/benches/physical_plan.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions
+
 #[macro_use]
 extern crate criterion;
 use criterion::{BatchSize, Criterion};

diff --git a/datafusion/core/benches/sort_limit_query_sql.rs b/datafusion/core/benches/sort_limit_query_sql.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions
+
 #[macro_use]
 extern crate criterion;
 use criterion::Criterion;

diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions
+
 #[macro_use]
 extern crate criterion;
 extern crate arrow;

diff --git a/datafusion/core/benches/topk_aggregate.rs b/datafusion/core/benches/topk_aggregate.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![allow(clippy::needless_pass_by_value)] // OK in benchmark helper functions
+
 mod data_utils;
 use arrow::util::pretty::pretty_format_batches;
 use criterion::{criterion_group, criterion_main, Criterion};