From 1d86724a05fa9b926af1d66fac5e1418a01f0cd1 Mon Sep 17 00:00:00 2001 From: June <61218022+itsjunetime@users.noreply.github.com> Date: Tue, 13 Aug 2024 06:43:30 -0600 Subject: [PATCH] fix: Fix various complaints from the latest nightly clippy (#11958) * fix: Fix various complaints from the latest nightly clippy * fix: run fmt for ci :/ * fix: Update cli lockfile since that's what ci wants --- datafusion-cli/Cargo.lock | 68 ++++++++++--------- datafusion/common/Cargo.toml | 1 + datafusion/common/src/error.rs | 67 ++++++++---------- datafusion/common/src/lib.rs | 12 ++++ .../physical_plan/parquet/row_filter.rs | 11 +-- datafusion/functions/src/datetime/common.rs | 60 +++++++--------- datafusion/functions/src/datetime/to_date.rs | 4 +- datafusion/physical-expr/benches/case_when.rs | 4 +- .../physical-expr/src/expressions/case.rs | 2 +- datafusion/sql/src/parser.rs | 4 +- datafusion/sql/src/unparser/plan.rs | 18 ++--- 11 files changed, 116 insertions(+), 135 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 134cde8976d6..90995c1d116a 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -347,13 +347,14 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.0.15" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc65048dd435533bb1baf2ed9956b9a278fbfdcf90301b39ee117f06c0199d37" +checksum = "dc1835b7f27878de8525dc71410b5a31cdcc5f230aed5ba5df968e09c201b23d" dependencies = [ "anstyle", "bstr", "doc-comment", + "libc", "predicates", "predicates-core", "predicates-tree", @@ -386,7 +387,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -874,9 +875,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.8" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549" +checksum = "e9e8aabfac534be767c909e0690571677d49f41bd8465ae876fe043d52ba5292" dependencies = [ "jobserver", "libc", @@ -1022,9 +1023,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "core2" @@ -1037,9 +1038,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.12" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +checksum = "51e852e6dc9a5bed1fae92dd2375037bf2b768725bf3be87811edee3249d09ad" dependencies = [ "libc", ] @@ -1103,7 +1104,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f" dependencies = [ "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -1240,6 +1241,7 @@ dependencies = [ "num_cpus", "object_store", "parquet", + "paste", "sqlparser", ] @@ -1762,7 +1764,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -2441,9 +2443,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" dependencies = [ "hermit-abi 0.3.9", "libc", @@ -2785,7 +2787,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -3386,29 +3388,29 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.205" +version = "1.0.207" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150" +checksum = "5665e14a49a4ea1b91029ba7d3bca9f299e1f7cfa194388ccc20f14743e784f2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.205" +version = "1.0.207" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1" +checksum = "6aea2634c86b0e8ef2cfdc0c340baede54ec27b1e46febd7f80dffb2aa44a00e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] name = "serde_json" -version = "1.0.122" +version = "1.0.124" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" +checksum = "66ad62847a56b3dba58cc891acd13884b9c61138d330c0d7b6181713d4fce38d" dependencies = [ "itoa", "memchr", @@ -3537,7 +3539,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -3583,7 +3585,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -3596,7 +3598,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -3618,9 +3620,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.72" +version = "2.0.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" +checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7" dependencies = [ "proc-macro2", "quote", @@ -3684,7 +3686,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -3778,7 +3780,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -3875,7 +3877,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -3920,7 +3922,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -4074,7 +4076,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", "wasm-bindgen-shared", ] @@ -4108,7 +4110,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4383,7 +4385,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 85dfb2e8f73a..8435d0632576 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -60,6 +60,7 @@ libc = "0.2.140" num_cpus = { workspace = true } object_store = { workspace = true, optional = true } parquet = { workspace = true, optional = true, default-features = true } +paste = "1.0.15" pyo3 = { version = "0.21.0", optional = true } sqlparser = { workspace = true } diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index f62acaf0493b..a5c2b3e55bc7 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -481,13 +481,6 @@ macro_rules! unwrap_or_internal_err { }; } -macro_rules! with_dollar_sign { - ($($body:tt)*) => { - macro_rules! __with_dollar_sign { $($body)* } - __with_dollar_sign!($); - } -} - /// Add a macros for concise DataFusionError::* errors declaration /// supports placeholders the same way as `format!` /// Examples: @@ -501,37 +494,37 @@ macro_rules! with_dollar_sign { /// `NAME_DF_ERR` - macro name for wrapping DataFusionError::*. Needed to keep backtrace opportunity /// in construction where DataFusionError::* used directly, like `map_err`, `ok_or_else`, etc macro_rules! make_error { - ($NAME_ERR:ident, $NAME_DF_ERR: ident, $ERR:ident) => { - with_dollar_sign! { - ($d:tt) => { - /// Macro wraps `$ERR` to add backtrace feature - #[macro_export] - macro_rules! $NAME_DF_ERR { - ($d($d args:expr),*) => { - $crate::DataFusionError::$ERR( - format!( - "{}{}", - format!($d($d args),*), - $crate::DataFusionError::get_back_trace(), - ).into() - ) - } + ($NAME_ERR:ident, $NAME_DF_ERR: ident, $ERR:ident) => { make_error!(@inner ($), $NAME_ERR, $NAME_DF_ERR, $ERR); }; + (@inner ($d:tt), $NAME_ERR:ident, $NAME_DF_ERR:ident, $ERR:ident) => { + ::paste::paste!{ + /// Macro wraps `$ERR` to add backtrace feature + #[macro_export] + macro_rules! $NAME_DF_ERR { + ($d($d args:expr),*) => { + $crate::DataFusionError::$ERR( + ::std::format!( + "{}{}", + ::std::format!($d($d args),*), + $crate::DataFusionError::get_back_trace(), + ).into() + ) } + } - /// Macro wraps Err(`$ERR`) to add backtrace feature - #[macro_export] - macro_rules! $NAME_ERR { - ($d($d args:expr),*) => { - Err($crate::DataFusionError::$ERR( - format!( - "{}{}", - format!($d($d args),*), - $crate::DataFusionError::get_back_trace(), - ).into() - )) - } + /// Macro wraps Err(`$ERR`) to add backtrace feature + #[macro_export] + macro_rules! $NAME_ERR { + ($d($d args:expr),*) => { + Err($crate::[<_ $NAME_DF_ERR>]!($d($d args),*)) } } + + #[doc(hidden)] + #[allow(unused)] + pub use $NAME_ERR as [<_ $NAME_ERR>]; + #[doc(hidden)] + #[allow(unused)] + pub use $NAME_DF_ERR as [<_ $NAME_DF_ERR>]; } }; } @@ -613,12 +606,6 @@ macro_rules! schema_err { // To avoid compiler error when using macro in the same crate: // macros from the current crate cannot be referred to by absolute paths -pub use config_err as _config_err; -pub use internal_datafusion_err as _internal_datafusion_err; -pub use internal_err as _internal_err; -pub use not_impl_err as _not_impl_err; -pub use plan_datafusion_err as _plan_datafusion_err; -pub use plan_err as _plan_err; pub use schema_err as _schema_err; /// Create a "field not found" DataFusion::SchemaError diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs index 8cd64e7d16a2..19af889e426a 100644 --- a/datafusion/common/src/lib.rs +++ b/datafusion/common/src/lib.rs @@ -73,6 +73,18 @@ pub use table_reference::{ResolvedTableReference, TableReference}; pub use unnest::UnnestOptions; pub use utils::project_schema; +// These are hidden from docs purely to avoid polluting the public view of what this crate exports. +// These are just re-exports of macros by the same name, which gets around the 'cannot refer to +// macro-expanded macro_export macros by their full path' error. +// The design to get around this comes from this comment: +// https://github.com/rust-lang/rust/pull/52234#issuecomment-976702997 +#[doc(hidden)] +pub use error::{ + _config_datafusion_err, _exec_datafusion_err, _internal_datafusion_err, + _not_impl_datafusion_err, _plan_datafusion_err, _resources_datafusion_err, + _substrait_datafusion_err, +}; + /// Downcast an Arrow Array to a concrete type, return an `DataFusionError::Internal` if the cast is /// not possible. In normal usage of DataFusion the downcast should always succeed. /// diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs index f9cce5f783ff..9de132169389 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs @@ -341,14 +341,9 @@ pub fn build_row_filter( let mut candidates: Vec = predicates .into_iter() .flat_map(|expr| { - if let Ok(candidate) = - FilterCandidateBuilder::new(expr.clone(), file_schema, table_schema) - .build(metadata) - { - candidate - } else { - None - } + FilterCandidateBuilder::new(expr.clone(), file_schema, table_schema) + .build(metadata) + .unwrap_or_default() }) .collect(); diff --git a/datafusion/functions/src/datetime/common.rs b/datafusion/functions/src/datetime/common.rs index 4f48ab188403..6048eeeaa554 100644 --- a/datafusion/functions/src/datetime/common.rs +++ b/datafusion/functions/src/datetime/common.rs @@ -28,7 +28,9 @@ use chrono::{DateTime, TimeZone, Utc}; use itertools::Either; use datafusion_common::cast::as_generic_string_array; -use datafusion_common::{exec_err, DataFusionError, Result, ScalarType, ScalarValue}; +use datafusion_common::{ + exec_err, unwrap_or_internal_err, DataFusionError, Result, ScalarType, ScalarValue, +}; use datafusion_expr::ColumnarValue; /// Error message if nanosecond conversion request beyond supported interval @@ -227,46 +229,34 @@ where // if the first argument is a scalar utf8 all arguments are expected to be scalar utf8 ColumnarValue::Scalar(scalar) => match scalar { ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => { - let mut val: Option> = None; - let mut err: Option = None; + let a = a.as_ref(); + // ASK: Why do we trust `a` to be non-null at this point? + let a = unwrap_or_internal_err!(a); - match a { - Some(a) => { - // enumerate all the values finding the first one that returns an Ok result - for (pos, v) in args.iter().enumerate().skip(1) { - if let ColumnarValue::Scalar(s) = v { - if let ScalarValue::Utf8(x) | ScalarValue::LargeUtf8(x) = - s - { - if let Some(s) = x { - match op(a.as_str(), s.as_str()) { - Ok(r) => { - val = Some(Ok(ColumnarValue::Scalar( - S::scalar(Some(op2(r))), - ))); - break; - } - Err(e) => { - err = Some(e); - } - } - } - } else { - return exec_err!("Unsupported data type {s:?} for function {name}, arg # {pos}"); - } - } else { - return exec_err!("Unsupported data type {v:?} for function {name}, arg # {pos}"); + let mut ret = None; + + for (pos, v) in args.iter().enumerate().skip(1) { + let ColumnarValue::Scalar( + ScalarValue::Utf8(x) | ScalarValue::LargeUtf8(x), + ) = v + else { + return exec_err!("Unsupported data type {v:?} for function {name}, arg # {pos}"); + }; + + if let Some(s) = x { + match op(a.as_str(), s.as_str()) { + Ok(r) => { + ret = Some(Ok(ColumnarValue::Scalar(S::scalar(Some( + op2(r), + ))))); + break; } + Err(e) => ret = Some(Err(e)), } } - None => (), } - if let Some(v) = val { - v - } else { - Err(err.unwrap()) - } + unwrap_or_internal_err!(ret) } other => { exec_err!("Unsupported data type {other:?} for function {name}") diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs index e491c0b55508..cc5ffa73c8f1 100644 --- a/datafusion/functions/src/datetime/to_date.rs +++ b/datafusion/functions/src/datetime/to_date.rs @@ -58,7 +58,7 @@ impl ToDateFunc { }, "to_date", ), - n if n >= 2 => handle_multiple::( + 2.. => handle_multiple::( args, |s, format| { string_to_timestamp_nanos_formatted(s, format) @@ -72,7 +72,7 @@ impl ToDateFunc { |n| n, "to_date", ), - _ => exec_err!("Unsupported 0 argument count for function to_date"), + 0 => exec_err!("Unsupported 0 argument count for function to_date"), } } } diff --git a/datafusion/physical-expr/benches/case_when.rs b/datafusion/physical-expr/benches/case_when.rs index 8a34f34a82db..9eda1277c263 100644 --- a/datafusion/physical-expr/benches/case_when.rs +++ b/datafusion/physical-expr/benches/case_when.rs @@ -44,12 +44,12 @@ fn criterion_benchmark(c: &mut Criterion) { if i % 7 == 0 { c2.append_null(); } else { - c2.append_value(&format!("string {i}")); + c2.append_value(format!("string {i}")); } if i % 9 == 0 { c3.append_null(); } else { - c3.append_value(&format!("other string {i}")); + c3.append_value(format!("other string {i}")); } } let c1 = Arc::new(c1.finish()); diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs index 583a4ef32542..c6afb5c05985 100644 --- a/datafusion/physical-expr/src/expressions/case.rs +++ b/datafusion/physical-expr/src/expressions/case.rs @@ -1146,7 +1146,7 @@ mod tests { if i % 7 == 0 { c2.append_null(); } else { - c2.append_value(&format!("string {i}")); + c2.append_value(format!("string {i}")); } } let c1 = Arc::new(c1.finish()); diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 40dd368f9e80..dcb33aa7b44f 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -523,9 +523,7 @@ impl<'a> DFParser<'a> { Ok(n) => Ok(Value::Number(n, l)), // The tokenizer should have ensured `n` is an integer // so this should not be possible - Err(e) => parser_err!(format!( - "Unexpected error: could not parse '{n}' as number: {e}" - )), + Err(e) => match e {}, }, _ => self.parser.expected("string or numeric value", next_token), } diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 277efd5fe700..024f33fb2c7d 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -359,18 +359,14 @@ impl Unparser<'_> { .iter() .map(|e| self.select_item_to_sql(e)) .collect::>>()?; - match &on.sort_expr { - Some(sort_expr) => { - if let Some(query_ref) = query { - query_ref - .order_by(self.sort_to_sql(sort_expr.clone())?); - } else { - return internal_err!( - "Sort operator only valid in a statement context." - ); - } + if let Some(sort_expr) = &on.sort_expr { + if let Some(query_ref) = query { + query_ref.order_by(self.sort_to_sql(sort_expr.clone())?); + } else { + return internal_err!( + "Sort operator only valid in a statement context." + ); } - None => {} } select.projection(items); (ast::Distinct::On(exprs), on.input.as_ref())