From bd59d2d7cb1334b3aa2792cec5effc277034a3e4 Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Fri, 20 Sep 2024 18:53:19 -0500 Subject: [PATCH 1/3] auto-fix self --- Cargo.toml | 3 + src/arrow2/Cargo.toml | 3 + src/arrow2/src/array/binary/mod.rs | 14 +- src/arrow2/src/array/binary/mutable.rs | 2 +- src/arrow2/src/array/binary/mutable_values.rs | 4 +- src/arrow2/src/array/boolean/mod.rs | 6 +- src/arrow2/src/array/boolean/mutable.rs | 6 +- src/arrow2/src/array/dictionary/ffi.rs | 2 +- src/arrow2/src/array/dictionary/mutable.rs | 2 +- src/arrow2/src/array/equal/mod.rs | 24 +- .../src/array/fixed_size_binary/mutable.rs | 2 +- .../src/array/fixed_size_list/mutable.rs | 2 +- src/arrow2/src/array/growable/binary.rs | 2 +- src/arrow2/src/array/growable/boolean.rs | 2 +- src/arrow2/src/array/growable/fixed_binary.rs | 2 +- src/arrow2/src/array/growable/null.rs | 2 +- src/arrow2/src/array/growable/primitive.rs | 2 +- src/arrow2/src/array/growable/structure.rs | 2 +- src/arrow2/src/array/growable/union.rs | 2 +- src/arrow2/src/array/indexable.rs | 2 +- src/arrow2/src/array/list/mutable.rs | 4 +- src/arrow2/src/array/primitive/mod.rs | 8 +- src/arrow2/src/array/primitive/mutable.rs | 2 +- src/arrow2/src/array/struct_/mutable.rs | 2 +- src/arrow2/src/array/utf8/mod.rs | 16 +- src/arrow2/src/array/utf8/mutable.rs | 2 +- src/arrow2/src/array/utf8/mutable_values.rs | 4 +- src/arrow2/src/bitmap/immutable.rs | 8 +- src/arrow2/src/bitmap/mutable.rs | 4 +- src/arrow2/src/bitmap/utils/zip_validity.rs | 4 +- src/arrow2/src/buffer/immutable.rs | 2 +- src/arrow2/src/buffer/mod.rs | 2 +- .../src/compute/arithmetics/basic/add.rs | 20 +- .../src/compute/arithmetics/basic/div.rs | 8 +- .../src/compute/arithmetics/basic/mul.rs | 20 +- .../src/compute/arithmetics/basic/rem.rs | 8 +- .../src/compute/arithmetics/basic/sub.rs | 20 +- .../src/compute/arithmetics/decimal/add.rs | 12 +- .../src/compute/arithmetics/decimal/div.rs | 8 +- .../src/compute/arithmetics/decimal/mul.rs | 12 +- .../src/compute/arithmetics/decimal/sub.rs | 12 +- src/arrow2/src/datatypes/field.rs | 2 +- src/arrow2/src/datatypes/mod.rs | 48 +-- src/arrow2/src/datatypes/schema.rs | 2 +- src/arrow2/src/ffi/array.rs | 4 +- src/arrow2/src/ffi/schema.rs | 2 +- src/arrow2/src/io/csv/mod.rs | 4 +- src/arrow2/src/io/csv/read_async/mod.rs | 2 +- src/arrow2/src/io/csv/write/serialize.rs | 2 +- src/arrow2/src/io/flight/mod.rs | 2 +- src/arrow2/src/io/ipc/append/mod.rs | 4 +- src/arrow2/src/io/ipc/read/error.rs | 4 +- src/arrow2/src/io/ipc/read/stream.rs | 2 +- src/arrow2/src/io/json/read/deserialize.rs | 12 +- src/arrow2/src/io/json/read/mod.rs | 2 +- src/arrow2/src/io/json/write/utf8.rs | 18 +- src/arrow2/src/io/json_integration/mod.rs | 2 +- src/arrow2/src/io/orc/mod.rs | 2 +- src/arrow2/src/io/parquet/mod.rs | 8 +- .../src/io/parquet/read/deserialize/utils.rs | 4 +- src/arrow2/src/io/parquet/read/schema/mod.rs | 2 +- src/arrow2/src/io/parquet/write/pages.rs | 8 +- src/arrow2/src/types/native.rs | 20 +- src/common/arrow-ffi/Cargo.toml | 3 + src/common/daft-config/Cargo.toml | 3 + src/common/daft-config/src/lib.rs | 2 +- src/common/daft-config/src/python.rs | 19 +- src/common/display/Cargo.toml | 3 + src/common/error/Cargo.toml | 3 + src/common/error/src/python.rs | 2 +- src/common/file-formats/Cargo.toml | 3 + src/common/hashable-float-wrapper/Cargo.toml | 3 + src/common/io-config/Cargo.toml | 3 + src/common/io-config/src/http.rs | 4 +- src/common/io-config/src/lib.rs | 4 +- src/common/io-config/src/python.rs | 22 +- src/common/io-config/src/s3.rs | 2 +- src/common/py-serde/Cargo.toml | 3 + src/common/resource-request/Cargo.toml | 3 + src/common/resource-request/src/lib.rs | 14 +- src/common/system-info/Cargo.toml | 3 + src/common/system-info/src/lib.rs | 2 +- src/common/tracing/Cargo.toml | 3 + src/common/treenode/Cargo.toml | 3 + src/common/treenode/src/lib.rs | 46 +-- src/common/version/Cargo.toml | 3 + src/daft-compression/Cargo.toml | 3 + src/daft-core/Cargo.toml | 3 + .../src/array/fixed_size_list_array.rs | 4 +- src/daft-core/src/array/from.rs | 38 +- src/daft-core/src/array/from_iter.rs | 18 +- src/daft-core/src/array/image_array.rs | 2 +- src/daft-core/src/array/list_array.rs | 4 +- src/daft-core/src/array/mod.rs | 10 +- src/daft-core/src/array/ops/apply.rs | 18 +- .../src/array/ops/approx_count_distinct.rs | 2 +- src/daft-core/src/array/ops/arange.rs | 2 +- src/daft-core/src/array/ops/between.rs | 4 +- src/daft-core/src/array/ops/bitwise.rs | 8 +- src/daft-core/src/array/ops/broadcast.rs | 14 +- src/daft-core/src/array/ops/cast.rs | 6 +- src/daft-core/src/array/ops/compare_agg.rs | 42 +-- src/daft-core/src/array/ops/comparison.rs | 230 ++++------- src/daft-core/src/array/ops/concat.rs | 8 +- src/daft-core/src/array/ops/concat_agg.rs | 6 +- src/daft-core/src/array/ops/filter.rs | 4 +- src/daft-core/src/array/ops/from_arrow.rs | 12 +- src/daft-core/src/array/ops/full.rs | 8 +- src/daft-core/src/array/ops/if_else.rs | 10 +- src/daft-core/src/array/ops/is_in.rs | 8 +- src/daft-core/src/array/ops/list_agg.rs | 8 +- src/daft-core/src/array/ops/sort.rs | 8 +- src/daft-core/src/array/ops/take.rs | 19 +- src/daft-core/src/array/ops/trigonometry.rs | 24 +- src/daft-core/src/array/ops/truncate.rs | 10 +- src/daft-core/src/array/ops/utf8.rs | 131 +++---- .../src/array/pseudo_arrow/compute.rs | 2 +- src/daft-core/src/array/pseudo_arrow/mod.rs | 2 +- .../src/array/pseudo_arrow/python.rs | 13 +- src/daft-core/src/array/serdes.rs | 2 +- src/daft-core/src/array/struct_array.rs | 4 +- src/daft-core/src/count_mode.rs | 10 +- src/daft-core/src/datatypes/logical.rs | 2 +- src/daft-core/src/join.rs | 26 +- src/daft-core/src/python/series.rs | 4 +- .../src/series/array_impl/data_array.rs | 2 +- .../src/series/array_impl/logical_array.rs | 2 +- src/daft-core/src/series/mod.rs | 4 +- src/daft-core/src/series/ops/abs.rs | 2 +- src/daft-core/src/series/ops/agg.rs | 28 +- src/daft-core/src/series/ops/between.rs | 8 +- src/daft-core/src/series/ops/broadcast.rs | 2 +- src/daft-core/src/series/ops/cast.rs | 2 +- src/daft-core/src/series/ops/cbrt.rs | 2 +- src/daft-core/src/series/ops/ceil.rs | 2 +- src/daft-core/src/series/ops/comparison.rs | 6 +- src/daft-core/src/series/ops/concat.rs | 2 +- src/daft-core/src/series/ops/exp.rs | 2 +- src/daft-core/src/series/ops/filter.rs | 2 +- src/daft-core/src/series/ops/float.rs | 6 +- src/daft-core/src/series/ops/floor.rs | 2 +- src/daft-core/src/series/ops/if_else.rs | 2 +- src/daft-core/src/series/ops/is_in.rs | 2 +- src/daft-core/src/series/ops/list.rs | 18 +- src/daft-core/src/series/ops/log.rs | 8 +- src/daft-core/src/series/ops/map.rs | 2 +- src/daft-core/src/series/ops/minhash.rs | 2 +- src/daft-core/src/series/ops/not.rs | 2 +- src/daft-core/src/series/ops/null.rs | 6 +- src/daft-core/src/series/ops/repeat.rs | 2 +- src/daft-core/src/series/ops/round.rs | 2 +- src/daft-core/src/series/ops/shift.rs | 4 +- src/daft-core/src/series/ops/sign.rs | 2 +- .../src/series/ops/sketch_percentile.rs | 2 +- src/daft-core/src/series/ops/sort.rs | 4 +- src/daft-core/src/series/ops/sqrt.rs | 2 +- src/daft-core/src/series/ops/struct_.rs | 2 +- src/daft-core/src/series/ops/take.rs | 6 +- src/daft-core/src/series/ops/utf8.rs | 69 ++-- src/daft-csv/Cargo.toml | 3 + src/daft-csv/src/lib.rs | 6 +- src/daft-decoding/Cargo.toml | 3 + src/daft-dsl/Cargo.toml | 3 + src/daft-dsl/src/expr.rs | 56 +-- src/daft-dsl/src/functions/python/mod.rs | 4 +- src/daft-dsl/src/lit.rs | 18 +- src/daft-dsl/src/python.rs | 4 +- src/daft-functions-json/Cargo.toml | 3 + src/daft-functions/Cargo.toml | 3 + src/daft-functions/src/distance/cosine.rs | 6 +- src/daft-functions/src/lib.rs | 8 +- src/daft-functions/src/tokenize/bpe.rs | 20 +- src/daft-functions/src/uri/download.rs | 2 +- src/daft-functions/src/uri/upload.rs | 2 +- src/daft-image/Cargo.toml | 3 + src/daft-image/src/image_buffer.rs | 7 +- src/daft-io/Cargo.toml | 3 + src/daft-io/src/azure_blob.rs | 14 +- src/daft-io/src/google_cloud.rs | 24 +- src/daft-io/src/http.rs | 10 +- src/daft-io/src/huggingface.rs | 14 +- src/daft-io/src/lib.rs | 44 +-- src/daft-io/src/local.rs | 16 +- src/daft-io/src/object_io.rs | 8 +- src/daft-io/src/object_store_glob.rs | 12 +- src/daft-io/src/s3_like.rs | 56 +-- src/daft-io/src/stats.rs | 2 +- src/daft-json/Cargo.toml | 3 + src/daft-json/src/lib.rs | 6 +- src/daft-local-execution/Cargo.toml | 3 + src/daft-local-execution/src/channel.rs | 4 +- .../anti_semi_hash_join_probe.rs | 6 +- .../src/intermediate_ops/hash_join_probe.rs | 6 +- src/daft-local-execution/src/lib.rs | 4 +- src/daft-local-execution/src/pipeline.rs | 10 +- .../src/sinks/blocking_sink.rs | 2 +- .../src/sinks/streaming_sink.rs | 2 +- src/daft-micropartition/Cargo.toml | 3 + src/daft-micropartition/src/lib.rs | 2 +- src/daft-micropartition/src/micropartition.rs | 8 +- src/daft-micropartition/src/ops/agg.rs | 4 +- .../src/ops/cast_to_schema.rs | 4 +- src/daft-micropartition/src/ops/concat.rs | 2 +- .../src/ops/eval_expressions.rs | 4 +- src/daft-micropartition/src/ops/join.rs | 2 +- src/daft-micropartition/src/ops/partition.rs | 18 +- src/daft-micropartition/src/ops/pivot.rs | 2 +- src/daft-micropartition/src/ops/slice.rs | 2 +- src/daft-micropartition/src/python.rs | 2 +- src/daft-minhash/Cargo.toml | 3 + src/daft-parquet/Cargo.toml | 3 + src/daft-parquet/src/file.rs | 6 +- src/daft-parquet/src/lib.rs | 8 +- src/daft-parquet/src/metadata.rs | 24 +- src/daft-parquet/src/read.rs | 8 +- src/daft-parquet/src/read_planner.rs | 2 +- src/daft-parquet/src/statistics/mod.rs | 6 +- src/daft-physical-plan/Cargo.toml | 3 + src/daft-physical-plan/src/local_plan.rs | 40 +- src/daft-plan/Cargo.toml | 3 + src/daft-plan/src/builder.rs | 8 +- src/daft-plan/src/display.rs | 62 +-- .../src/logical_ops/actor_pool_project.rs | 2 +- .../src/logical_optimization/optimizer.rs | 4 +- src/daft-plan/src/logical_plan.rs | 8 +- src/daft-plan/src/partitioning.rs | 2 +- .../src/physical_ops/actor_pool_project.rs | 2 +- .../src/physical_optimization/optimizer.rs | 8 +- .../src/physical_optimization/rules/rule.rs | 2 +- src/daft-plan/src/physical_plan.rs | 8 +- src/daft-plan/src/physical_planner/planner.rs | 10 +- src/daft-plan/src/source_info/mod.rs | 2 +- src/daft-scan/Cargo.toml | 3 + src/daft-scan/src/glob.rs | 2 +- src/daft-scan/src/lib.rs | 16 +- src/daft-scan/src/python.rs | 14 +- src/daft-scan/src/storage_config.rs | 4 +- src/daft-scheduler/Cargo.toml | 3 + src/daft-scheduler/src/adaptive.rs | 7 +- src/daft-schema/Cargo.toml | 3 + src/daft-schema/src/dtype.rs | 357 +++++++++--------- src/daft-schema/src/image_format.rs | 4 +- src/daft-schema/src/image_mode.rs | 8 +- src/daft-schema/src/python/datatype.rs | 10 +- src/daft-schema/src/python/field.rs | 4 +- src/daft-schema/src/python/schema.rs | 18 +- src/daft-schema/src/schema.rs | 18 +- src/daft-schema/src/time_unit.rs | 24 +- src/daft-sketch/Cargo.toml | 3 + src/daft-sketch/src/arrow2_serde.rs | 2 +- src/daft-sql/Cargo.toml | 3 + src/daft-sql/src/catalog.rs | 4 +- src/daft-sql/src/error.rs | 16 +- src/daft-sql/src/modules/aggs.rs | 2 +- src/daft-sql/src/planner.rs | 6 +- src/daft-sql/src/python.rs | 4 +- src/daft-stats/Cargo.toml | 3 + src/daft-stats/src/column_stats/comparison.rs | 82 ++-- src/daft-stats/src/column_stats/mod.rs | 22 +- src/daft-stats/src/lib.rs | 2 +- src/daft-stats/src/table_stats.rs | 12 +- src/daft-table/Cargo.toml | 3 + src/daft-table/src/lib.rs | 32 +- src/daft-table/src/ops/agg.rs | 14 +- src/daft-table/src/ops/joins/mod.rs | 2 +- src/daft-table/src/ops/pivot.rs | 2 +- src/daft-table/src/ops/sort.rs | 2 +- src/daft-table/src/ops/unpivot.rs | 2 +- src/daft-table/src/python.rs | 17 +- src/hyperloglog/Cargo.toml | 3 + src/parquet2/Cargo.toml | 3 + src/parquet2/src/encoding/bitpacked/mod.rs | 10 +- src/parquet2/src/error.rs | 6 +- src/parquet2/src/metadata/column_order.rs | 4 +- src/parquet2/src/metadata/file_metadata.rs | 2 +- src/parquet2/src/metadata/row_metadata.rs | 6 +- src/parquet2/src/page/mod.rs | 24 +- src/parquet2/src/parquet_bridge.rs | 218 +++++------ .../src/schema/io_thrift/from_thrift.rs | 2 +- .../src/schema/types/converted_type.rs | 12 +- src/parquet2/src/schema/types/parquet_type.rs | 18 +- .../src/schema/types/physical_type.rs | 16 +- src/parquet2/src/write/row_group.rs | 8 +- 283 files changed, 1573 insertions(+), 1682 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 53f9b894b9..4f2978da7b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -222,6 +222,9 @@ version = "0.11.0" features = ["derive", "rc"] version = "1.0.200" +[workspace.lints.clippy] +use-self = "deny" + [workspace.package] edition = "2021" version = "0.3.0-dev0" diff --git a/src/arrow2/Cargo.toml b/src/arrow2/Cargo.toml index 0664947831..ddee4f616e 100644 --- a/src/arrow2/Cargo.toml +++ b/src/arrow2/Cargo.toml @@ -232,6 +232,9 @@ simd = [] bench = false name = "arrow2" +[lints] +workspace = true + [package] authors = [ "Jorge C. Leitao ", diff --git a/src/arrow2/src/array/binary/mod.rs b/src/arrow2/src/array/binary/mod.rs index 0dbe71704d..3a89456e96 100644 --- a/src/arrow2/src/array/binary/mod.rs +++ b/src/arrow2/src/array/binary/mod.rs @@ -237,26 +237,26 @@ impl BinaryArray { if let Some(bitmap) = self.validity { match bitmap.into_mut() { // Safety: invariants are preserved - Left(bitmap) => Left(BinaryArray::new( + Left(bitmap) => Left(Self::new( self.data_type, self.offsets, self.values, Some(bitmap), )), Right(mutable_bitmap) => match (self.values.into_mut(), self.offsets.into_mut()) { - (Left(values), Left(offsets)) => Left(BinaryArray::new( + (Left(values), Left(offsets)) => Left(Self::new( self.data_type, offsets, values, Some(mutable_bitmap.into()), )), - (Left(values), Right(offsets)) => Left(BinaryArray::new( + (Left(values), Right(offsets)) => Left(Self::new( self.data_type, offsets.into(), values, Some(mutable_bitmap.into()), )), - (Right(values), Left(offsets)) => Left(BinaryArray::new( + (Right(values), Left(offsets)) => Left(Self::new( self.data_type, offsets, values.into(), @@ -276,15 +276,15 @@ impl BinaryArray { } else { match (self.values.into_mut(), self.offsets.into_mut()) { (Left(values), Left(offsets)) => { - Left(BinaryArray::new(self.data_type, offsets, values, None)) + Left(Self::new(self.data_type, offsets, values, None)) } - (Left(values), Right(offsets)) => Left(BinaryArray::new( + (Left(values), Right(offsets)) => Left(Self::new( self.data_type, offsets.into(), values, None, )), - (Right(values), Left(offsets)) => Left(BinaryArray::new( + (Right(values), Left(offsets)) => Left(Self::new( self.data_type, offsets, values.into(), diff --git a/src/arrow2/src/array/binary/mutable.rs b/src/arrow2/src/array/binary/mutable.rs index 13aed8704f..7068fc84ef 100644 --- a/src/arrow2/src/array/binary/mutable.rs +++ b/src/arrow2/src/array/binary/mutable.rs @@ -31,7 +31,7 @@ impl From> for BinaryArray { let validity: Option = x.into(); validity }); - let array: BinaryArray = other.values.into(); + let array: Self = other.values.into(); array.with_validity(validity) } } diff --git a/src/arrow2/src/array/binary/mutable_values.rs b/src/arrow2/src/array/binary/mutable_values.rs index ad6b09f368..557171b4d3 100644 --- a/src/arrow2/src/array/binary/mutable_values.rs +++ b/src/arrow2/src/array/binary/mutable_values.rs @@ -26,7 +26,7 @@ pub struct MutableBinaryValuesArray { impl From> for BinaryArray { fn from(other: MutableBinaryValuesArray) -> Self { - BinaryArray::::new( + Self::new( other.data_type, other.offsets.into(), other.values.into(), @@ -37,7 +37,7 @@ impl From> for BinaryArray { impl From> for MutableBinaryArray { fn from(other: MutableBinaryValuesArray) -> Self { - MutableBinaryArray::::try_new(other.data_type, other.offsets, other.values, None) + Self::try_new(other.data_type, other.offsets, other.values, None) .expect("MutableBinaryValuesArray is consistent with MutableBinaryArray") } } diff --git a/src/arrow2/src/array/boolean/mod.rs b/src/arrow2/src/array/boolean/mod.rs index b817dab283..4ad296d4d9 100644 --- a/src/arrow2/src/array/boolean/mod.rs +++ b/src/arrow2/src/array/boolean/mod.rs @@ -237,9 +237,9 @@ impl BooleanArray { if let Some(bitmap) = self.validity { match bitmap.into_mut() { - Left(bitmap) => Left(BooleanArray::new(self.data_type, self.values, Some(bitmap))), + Left(bitmap) => Left(Self::new(self.data_type, self.values, Some(bitmap))), Right(mutable_bitmap) => match self.values.into_mut() { - Left(immutable) => Left(BooleanArray::new( + Left(immutable) => Left(Self::new( self.data_type, immutable, Some(mutable_bitmap.into()), @@ -252,7 +252,7 @@ impl BooleanArray { } } else { match self.values.into_mut() { - Left(immutable) => Left(BooleanArray::new(self.data_type, immutable, None)), + Left(immutable) => Left(Self::new(self.data_type, immutable, None)), Right(mutable) => { Right(MutableBooleanArray::try_new(self.data_type, mutable, None).unwrap()) } diff --git a/src/arrow2/src/array/boolean/mutable.rs b/src/arrow2/src/array/boolean/mutable.rs index 213db18e92..b2162523dd 100644 --- a/src/arrow2/src/array/boolean/mutable.rs +++ b/src/arrow2/src/array/boolean/mutable.rs @@ -26,7 +26,7 @@ pub struct MutableBooleanArray { impl From for BooleanArray { fn from(other: MutableBooleanArray) -> Self { - BooleanArray::new( + Self::new( other.data_type, other.values.into(), other.validity.map(|x| x.into()), @@ -267,7 +267,7 @@ impl MutableBooleanArray { ) -> Self { let mut mutable = MutableBitmap::new(); mutable.extend_from_trusted_len_iter_unchecked(iterator); - MutableBooleanArray::try_new(DataType::Boolean, mutable, None).unwrap() + Self::try_new(DataType::Boolean, mutable, None).unwrap() } /// Creates a new [`MutableBooleanArray`] from a slice of `bool`. @@ -474,7 +474,7 @@ impl>> FromIterator for MutableBoolea None }; - MutableBooleanArray::try_new(DataType::Boolean, values, validity).unwrap() + Self::try_new(DataType::Boolean, values, validity).unwrap() } } diff --git a/src/arrow2/src/array/dictionary/ffi.rs b/src/arrow2/src/array/dictionary/ffi.rs index fb84665276..af17fafc21 100644 --- a/src/arrow2/src/array/dictionary/ffi.rs +++ b/src/arrow2/src/array/dictionary/ffi.rs @@ -39,6 +39,6 @@ impl FromFfi for DictionaryArray let values = ffi::try_from(values)?; // the assumption of this trait - DictionaryArray::::try_new_unchecked(data_type, keys, values) + Self::try_new_unchecked(data_type, keys, values) } } diff --git a/src/arrow2/src/array/dictionary/mutable.rs b/src/arrow2/src/array/dictionary/mutable.rs index b48a57a945..2e02f958ef 100644 --- a/src/arrow2/src/array/dictionary/mutable.rs +++ b/src/arrow2/src/array/dictionary/mutable.rs @@ -39,7 +39,7 @@ impl From> for D fn from(other: MutableDictionaryArray) -> Self { // Safety - the invariant of this struct ensures that this is up-held unsafe { - DictionaryArray::::try_new_unchecked( + Self::try_new_unchecked( other.data_type, other.keys.into(), other.map.into_values().as_box(), diff --git a/src/arrow2/src/array/equal/mod.rs b/src/arrow2/src/array/equal/mod.rs index 2bb3ba77f1..4e0a70664e 100644 --- a/src/arrow2/src/array/equal/mod.rs +++ b/src/arrow2/src/array/equal/mod.rs @@ -34,7 +34,7 @@ impl PartialEq for Box { } } -impl PartialEq for NullArray { +impl PartialEq for NullArray { fn eq(&self, other: &Self) -> bool { null::equal(self, other) } @@ -58,13 +58,13 @@ impl PartialEq> for &dyn Array { } } -impl PartialEq> for PrimitiveArray { +impl PartialEq for PrimitiveArray { fn eq(&self, other: &Self) -> bool { primitive::equal::(self, other) } } -impl PartialEq for BooleanArray { +impl PartialEq for BooleanArray { fn eq(&self, other: &Self) -> bool { equal(self, other) } @@ -76,7 +76,7 @@ impl PartialEq<&dyn Array> for BooleanArray { } } -impl PartialEq> for Utf8Array { +impl PartialEq for Utf8Array { fn eq(&self, other: &Self) -> bool { utf8::equal(self, other) } @@ -94,7 +94,7 @@ impl PartialEq> for &dyn Array { } } -impl PartialEq> for BinaryArray { +impl PartialEq for BinaryArray { fn eq(&self, other: &Self) -> bool { binary::equal(self, other) } @@ -112,7 +112,7 @@ impl PartialEq> for &dyn Array { } } -impl PartialEq for FixedSizeBinaryArray { +impl PartialEq for FixedSizeBinaryArray { fn eq(&self, other: &Self) -> bool { fixed_size_binary::equal(self, other) } @@ -124,7 +124,7 @@ impl PartialEq<&dyn Array> for FixedSizeBinaryArray { } } -impl PartialEq> for ListArray { +impl PartialEq for ListArray { fn eq(&self, other: &Self) -> bool { list::equal(self, other) } @@ -136,7 +136,7 @@ impl PartialEq<&dyn Array> for ListArray { } } -impl PartialEq for FixedSizeListArray { +impl PartialEq for FixedSizeListArray { fn eq(&self, other: &Self) -> bool { fixed_size_list::equal(self, other) } @@ -148,7 +148,7 @@ impl PartialEq<&dyn Array> for FixedSizeListArray { } } -impl PartialEq for StructArray { +impl PartialEq for StructArray { fn eq(&self, other: &Self) -> bool { struct_::equal(self, other) } @@ -160,7 +160,7 @@ impl PartialEq<&dyn Array> for StructArray { } } -impl PartialEq> for DictionaryArray { +impl PartialEq for DictionaryArray { fn eq(&self, other: &Self) -> bool { dictionary::equal(self, other) } @@ -172,7 +172,7 @@ impl PartialEq<&dyn Array> for DictionaryArray { } } -impl PartialEq for UnionArray { +impl PartialEq for UnionArray { fn eq(&self, other: &Self) -> bool { union::equal(self, other) } @@ -184,7 +184,7 @@ impl PartialEq<&dyn Array> for UnionArray { } } -impl PartialEq for MapArray { +impl PartialEq for MapArray { fn eq(&self, other: &Self) -> bool { map::equal(self, other) } diff --git a/src/arrow2/src/array/fixed_size_binary/mutable.rs b/src/arrow2/src/array/fixed_size_binary/mutable.rs index 9009f2702d..e0a73611be 100644 --- a/src/arrow2/src/array/fixed_size_binary/mutable.rs +++ b/src/arrow2/src/array/fixed_size_binary/mutable.rs @@ -23,7 +23,7 @@ pub struct MutableFixedSizeBinaryArray { impl From for FixedSizeBinaryArray { fn from(other: MutableFixedSizeBinaryArray) -> Self { - FixedSizeBinaryArray::new( + Self::new( other.data_type, other.values.into(), other.validity.map(|x| x.into()), diff --git a/src/arrow2/src/array/fixed_size_list/mutable.rs b/src/arrow2/src/array/fixed_size_list/mutable.rs index 1e387a2f70..718c3222e2 100644 --- a/src/arrow2/src/array/fixed_size_list/mutable.rs +++ b/src/arrow2/src/array/fixed_size_list/mutable.rs @@ -24,7 +24,7 @@ pub struct MutableFixedSizeListArray { impl From> for FixedSizeListArray { fn from(mut other: MutableFixedSizeListArray) -> Self { - FixedSizeListArray::new( + Self::new( other.data_type, other.values.as_box(), other.validity.map(|x| x.into()), diff --git a/src/arrow2/src/array/growable/binary.rs b/src/arrow2/src/array/growable/binary.rs index 53ff0ae4fe..ca2f9d0156 100644 --- a/src/arrow2/src/array/growable/binary.rs +++ b/src/arrow2/src/array/growable/binary.rs @@ -97,7 +97,7 @@ impl<'a, O: Offset> Growable<'a> for GrowableBinary<'a, O> { impl<'a, O: Offset> From> for BinaryArray { fn from(val: GrowableBinary<'a, O>) -> Self { - BinaryArray::::new( + Self::new( val.data_type, val.offsets.into(), val.values.into(), diff --git a/src/arrow2/src/array/growable/boolean.rs b/src/arrow2/src/array/growable/boolean.rs index 0cb1213403..1f9a193d1d 100644 --- a/src/arrow2/src/array/growable/boolean.rs +++ b/src/arrow2/src/array/growable/boolean.rs @@ -91,6 +91,6 @@ impl<'a> Growable<'a> for GrowableBoolean<'a> { impl<'a> From> for BooleanArray { fn from(val: GrowableBoolean<'a>) -> Self { - BooleanArray::new(val.data_type, val.values.into(), val.validity.into()) + Self::new(val.data_type, val.values.into(), val.validity.into()) } } diff --git a/src/arrow2/src/array/growable/fixed_binary.rs b/src/arrow2/src/array/growable/fixed_binary.rs index 763bd59c81..b51125612f 100644 --- a/src/arrow2/src/array/growable/fixed_binary.rs +++ b/src/arrow2/src/array/growable/fixed_binary.rs @@ -94,7 +94,7 @@ impl<'a> Growable<'a> for GrowableFixedSizeBinary<'a> { impl<'a> From> for FixedSizeBinaryArray { fn from(val: GrowableFixedSizeBinary<'a>) -> Self { - FixedSizeBinaryArray::new( + Self::new( val.arrays[0].data_type().clone(), val.values.into(), val.validity.into(), diff --git a/src/arrow2/src/array/growable/null.rs b/src/arrow2/src/array/growable/null.rs index ac97c47828..20a9d308b4 100644 --- a/src/arrow2/src/array/growable/null.rs +++ b/src/arrow2/src/array/growable/null.rs @@ -54,6 +54,6 @@ impl<'a> Growable<'a> for GrowableNull { impl From for NullArray { fn from(val: GrowableNull) -> Self { - NullArray::new(val.data_type, val.length) + Self::new(val.data_type, val.length) } } diff --git a/src/arrow2/src/array/growable/primitive.rs b/src/arrow2/src/array/growable/primitive.rs index e443756cb9..441c6d0642 100644 --- a/src/arrow2/src/array/growable/primitive.rs +++ b/src/arrow2/src/array/growable/primitive.rs @@ -101,6 +101,6 @@ impl<'a, T: NativeType> Growable<'a> for GrowablePrimitive<'a, T> { impl<'a, T: NativeType> From> for PrimitiveArray { #[inline] fn from(val: GrowablePrimitive<'a, T>) -> Self { - PrimitiveArray::::new(val.data_type, val.values.into(), val.validity.into()) + Self::new(val.data_type, val.values.into(), val.validity.into()) } } diff --git a/src/arrow2/src/array/growable/structure.rs b/src/arrow2/src/array/growable/structure.rs index ddf5899422..3e4e5f27df 100644 --- a/src/arrow2/src/array/growable/structure.rs +++ b/src/arrow2/src/array/growable/structure.rs @@ -129,7 +129,7 @@ impl<'a> From> for StructArray { fn from(val: GrowableStruct<'a>) -> Self { let values = val.values.into_iter().map(|mut x| x.as_box()).collect(); - StructArray::new( + Self::new( val.arrays[0].data_type().clone(), values, val.validity.into(), diff --git a/src/arrow2/src/array/growable/union.rs b/src/arrow2/src/array/growable/union.rs index cccde2ee96..2b93936833 100644 --- a/src/arrow2/src/array/growable/union.rs +++ b/src/arrow2/src/array/growable/union.rs @@ -111,7 +111,7 @@ impl<'a> From> for UnionArray { fn from(val: GrowableUnion<'a>) -> Self { let fields = val.fields.into_iter().map(|mut x| x.as_box()).collect(); - UnionArray::new( + Self::new( val.arrays[0].data_type().clone(), val.types.into(), fields, diff --git a/src/arrow2/src/array/indexable.rs b/src/arrow2/src/array/indexable.rs index 76001bfcf5..7ef7b28155 100644 --- a/src/arrow2/src/array/indexable.rs +++ b/src/arrow2/src/array/indexable.rs @@ -49,7 +49,7 @@ impl Indexable for MutableBooleanArray { impl AsIndexed for bool { #[inline] - fn as_indexed(&self) -> &bool { + fn as_indexed(&self) -> &Self { self } } diff --git a/src/arrow2/src/array/list/mutable.rs b/src/arrow2/src/array/list/mutable.rs index d24475e86d..1f4003f25c 100644 --- a/src/arrow2/src/array/list/mutable.rs +++ b/src/arrow2/src/array/list/mutable.rs @@ -54,7 +54,7 @@ impl Default for MutableListArray { impl From> for ListArray { fn from(mut other: MutableListArray) -> Self { - ListArray::new( + Self::new( other.data_type, other.offsets.into(), other.values.as_box(), @@ -266,7 +266,7 @@ impl MutableListArray { impl MutableArray for MutableListArray { fn len(&self) -> usize { - MutableListArray::len(self) + Self::len(self) } fn validity(&self) -> Option<&MutableBitmap> { diff --git a/src/arrow2/src/array/primitive/mod.rs b/src/arrow2/src/array/primitive/mod.rs index 15058d1130..a9dbd2bc02 100644 --- a/src/arrow2/src/array/primitive/mod.rs +++ b/src/arrow2/src/array/primitive/mod.rs @@ -330,7 +330,7 @@ impl PrimitiveArray { if let Some(bitmap) = self.validity { match bitmap.into_mut() { - Left(bitmap) => Left(PrimitiveArray::new( + Left(bitmap) => Left(Self::new( self.data_type, self.values, Some(bitmap), @@ -344,7 +344,7 @@ impl PrimitiveArray { ) .unwrap(), ), - Left(values) => Left(PrimitiveArray::new( + Left(values) => Left(Self::new( self.data_type, values, Some(mutable_bitmap.into()), @@ -356,7 +356,7 @@ impl PrimitiveArray { Right(values) => { Right(MutablePrimitiveArray::try_new(self.data_type, values, None).unwrap()) } - Left(values) => Left(PrimitiveArray::new(self.data_type, values, None)), + Left(values) => Left(Self::new(self.data_type, values, None)), } } } @@ -509,6 +509,6 @@ pub type UInt64Vec = MutablePrimitiveArray; impl Default for PrimitiveArray { fn default() -> Self { - PrimitiveArray::new(T::PRIMITIVE.into(), Default::default(), None) + Self::new(T::PRIMITIVE.into(), Default::default(), None) } } diff --git a/src/arrow2/src/array/primitive/mutable.rs b/src/arrow2/src/array/primitive/mutable.rs index 1d320ebf0c..7548abc3cc 100644 --- a/src/arrow2/src/array/primitive/mutable.rs +++ b/src/arrow2/src/array/primitive/mutable.rs @@ -34,7 +34,7 @@ impl From> for PrimitiveArray { } }); - PrimitiveArray::::new(other.data_type, other.values.into(), validity) + Self::new(other.data_type, other.values.into(), validity) } } diff --git a/src/arrow2/src/array/struct_/mutable.rs b/src/arrow2/src/array/struct_/mutable.rs index 4754474fb5..e8ab673b40 100644 --- a/src/arrow2/src/array/struct_/mutable.rs +++ b/src/arrow2/src/array/struct_/mutable.rs @@ -79,7 +79,7 @@ impl From for StructArray { None }; - StructArray::new( + Self::new( other.data_type, other.values.into_iter().map(|mut v| v.as_box()).collect(), validity, diff --git a/src/arrow2/src/array/utf8/mod.rs b/src/arrow2/src/array/utf8/mod.rs index 6a64505a7b..69a060c80a 100644 --- a/src/arrow2/src/array/utf8/mod.rs +++ b/src/arrow2/src/array/utf8/mod.rs @@ -259,7 +259,7 @@ impl Utf8Array { match bitmap.into_mut() { // Safety: invariants are preserved Left(bitmap) => Left(unsafe { - Utf8Array::new_unchecked( + Self::new_unchecked( self.data_type, self.offsets, self.values, @@ -270,7 +270,7 @@ impl Utf8Array { (Left(values), Left(offsets)) => { // Safety: invariants are preserved Left(unsafe { - Utf8Array::new_unchecked( + Self::new_unchecked( self.data_type, offsets, values, @@ -281,7 +281,7 @@ impl Utf8Array { (Left(values), Right(offsets)) => { // Safety: invariants are preserved Left(unsafe { - Utf8Array::new_unchecked( + Self::new_unchecked( self.data_type, offsets.into(), values, @@ -292,7 +292,7 @@ impl Utf8Array { (Right(values), Left(offsets)) => { // Safety: invariants are preserved Left(unsafe { - Utf8Array::new_unchecked( + Self::new_unchecked( self.data_type, offsets, values.into(), @@ -313,13 +313,13 @@ impl Utf8Array { } else { match (self.values.into_mut(), self.offsets.into_mut()) { (Left(values), Left(offsets)) => { - Left(unsafe { Utf8Array::new_unchecked(self.data_type, offsets, values, None) }) + Left(unsafe { Self::new_unchecked(self.data_type, offsets, values, None) }) } (Left(values), Right(offsets)) => Left(unsafe { - Utf8Array::new_unchecked(self.data_type, offsets.into(), values, None) + Self::new_unchecked(self.data_type, offsets.into(), values, None) }), (Right(values), Left(offsets)) => Left(unsafe { - Utf8Array::new_unchecked(self.data_type, offsets, values.into(), None) + Self::new_unchecked(self.data_type, offsets, values.into(), None) }), (Right(values), Right(offsets)) => Right(unsafe { MutableUtf8Array::new_unchecked(self.data_type, offsets, values, None) @@ -546,6 +546,6 @@ impl Default for Utf8Array { } else { DataType::Utf8 }; - Utf8Array::new(data_type, Default::default(), Default::default(), None) + Self::new(data_type, Default::default(), Default::default(), None) } } diff --git a/src/arrow2/src/array/utf8/mutable.rs b/src/arrow2/src/array/utf8/mutable.rs index 355dff2ae1..bad87fcd61 100644 --- a/src/arrow2/src/array/utf8/mutable.rs +++ b/src/arrow2/src/array/utf8/mutable.rs @@ -29,7 +29,7 @@ impl From> for Utf8Array { let validity: Option = x.into(); validity }); - let array: Utf8Array = other.values.into(); + let array: Self = other.values.into(); array.with_validity(validity) } } diff --git a/src/arrow2/src/array/utf8/mutable_values.rs b/src/arrow2/src/array/utf8/mutable_values.rs index aad32a7fd0..0b4d3bcdbf 100644 --- a/src/arrow2/src/array/utf8/mutable_values.rs +++ b/src/arrow2/src/array/utf8/mutable_values.rs @@ -30,7 +30,7 @@ impl From> for Utf8Array { // `MutableUtf8ValuesArray` has the same invariants as `Utf8Array` and thus // `Utf8Array` can be safely created from `MutableUtf8ValuesArray` without checks. unsafe { - Utf8Array::::new_unchecked( + Self::new_unchecked( other.data_type, other.offsets.into(), other.values.into(), @@ -45,7 +45,7 @@ impl From> for MutableUtf8Array { // Safety: // `MutableUtf8ValuesArray` has the same invariants as `MutableUtf8Array` unsafe { - MutableUtf8Array::::new_unchecked(other.data_type, other.offsets, other.values, None) + Self::new_unchecked(other.data_type, other.offsets, other.values, None) } } } diff --git a/src/arrow2/src/bitmap/immutable.rs b/src/arrow2/src/bitmap/immutable.rs index b7e0069a15..498d9bdc43 100644 --- a/src/arrow2/src/bitmap/immutable.rs +++ b/src/arrow2/src/bitmap/immutable.rs @@ -298,7 +298,7 @@ impl Bitmap { // don't use `MutableBitmap::from_len_zeroed().into()` // it triggers a bitcount let bytes = vec![0; length.saturating_add(7) / 8]; - unsafe { Bitmap::from_inner_unchecked(Arc::new(bytes.into()), 0, length, length) } + unsafe { Self::from_inner_unchecked(Arc::new(bytes.into()), 0, length, length) } } /// Initializes an new [`Bitmap`] filled with set values. @@ -307,7 +307,7 @@ impl Bitmap { // just set each byte to u8::MAX // we will not access data with index >= length let bytes = vec![0b11111111u8; length.saturating_add(7) / 8]; - unsafe { Bitmap::from_inner_unchecked(Arc::new(bytes.into()), 0, length, 0) } + unsafe { Self::from_inner_unchecked(Arc::new(bytes.into()), 0, length, 0) } } /// Counts the nulls (unset bits) starting from `offset` bits and for `length` bits. @@ -321,7 +321,7 @@ impl Bitmap { /// Panics iff `length <= bytes.len() * 8` #[inline] pub fn from_u8_slice>(slice: T, length: usize) -> Self { - Bitmap::try_new(slice.as_ref().to_vec(), length).unwrap() + Self::try_new(slice.as_ref().to_vec(), length).unwrap() } /// Alias for `Bitmap::try_new().unwrap()` @@ -330,7 +330,7 @@ impl Bitmap { /// This function panics iff `length <= bytes.len() * 8` #[inline] pub fn from_u8_vec(vec: Vec, length: usize) -> Self { - Bitmap::try_new(vec, length).unwrap() + Self::try_new(vec, length).unwrap() } /// Returns whether the bit at position `i` is set. diff --git a/src/arrow2/src/bitmap/mutable.rs b/src/arrow2/src/bitmap/mutable.rs index cb77decd84..8f0fd30d1b 100644 --- a/src/arrow2/src/bitmap/mutable.rs +++ b/src/arrow2/src/bitmap/mutable.rs @@ -330,7 +330,7 @@ impl MutableBitmap { impl From for Bitmap { #[inline] fn from(buffer: MutableBitmap) -> Self { - Bitmap::try_new(buffer.buffer, buffer.length).unwrap() + Self::try_new(buffer.buffer, buffer.length).unwrap() } } @@ -359,7 +359,7 @@ impl From for Option { impl> From

for MutableBitmap { #[inline] fn from(slice: P) -> Self { - MutableBitmap::from_trusted_len_iter(slice.as_ref().iter().copied()) + Self::from_trusted_len_iter(slice.as_ref().iter().copied()) } } diff --git a/src/arrow2/src/bitmap/utils/zip_validity.rs b/src/arrow2/src/bitmap/utils/zip_validity.rs index abcccfa489..dc85757d23 100644 --- a/src/arrow2/src/bitmap/utils/zip_validity.rs +++ b/src/arrow2/src/bitmap/utils/zip_validity.rs @@ -202,7 +202,7 @@ where /// Unwrap into an iterator that has no null values. pub fn unwrap_required(self) -> I { match self { - ZipValidity::Required(i) => i, + Self::Required(i) => i, _ => panic!("Could not 'unwrap_required'. 'ZipValidity' iterator has nulls."), } } @@ -210,7 +210,7 @@ where /// Unwrap into an iterator that has null values. pub fn unwrap_optional(self) -> ZipValidityIter { match self { - ZipValidity::Optional(i) => i, + Self::Optional(i) => i, _ => panic!("Could not 'unwrap_optional'. 'ZipValidity' iterator has no nulls."), } } diff --git a/src/arrow2/src/buffer/immutable.rs b/src/arrow2/src/buffer/immutable.rs index 2a2a0d312a..d318a9d76e 100644 --- a/src/arrow2/src/buffer/immutable.rs +++ b/src/arrow2/src/buffer/immutable.rs @@ -77,7 +77,7 @@ impl Buffer { /// Auxiliary method to create a new Buffer pub(crate) fn from_bytes(bytes: Bytes) -> Self { let length = bytes.len(); - Buffer { + Self { data: Arc::new(bytes), offset: 0, length, diff --git a/src/arrow2/src/buffer/mod.rs b/src/arrow2/src/buffer/mod.rs index 9d93a9df24..7b54c6395d 100644 --- a/src/arrow2/src/buffer/mod.rs +++ b/src/arrow2/src/buffer/mod.rs @@ -51,7 +51,7 @@ impl From> for Bytes { #[inline] fn from(data: Vec) -> Self { let inner: BytesInner = data.into(); - Bytes(inner) + Self(inner) } } diff --git a/src/arrow2/src/compute/arithmetics/basic/add.rs b/src/arrow2/src/compute/arithmetics/basic/add.rs index c7a80f2c70..31635b125e 100644 --- a/src/arrow2/src/compute/arithmetics/basic/add.rs +++ b/src/arrow2/src/compute/arithmetics/basic/add.rs @@ -138,50 +138,50 @@ where } // Implementation of ArrayAdd trait for PrimitiveArrays -impl ArrayAdd> for PrimitiveArray +impl ArrayAdd for PrimitiveArray where T: NativeArithmetics + Add, { - fn add(&self, rhs: &PrimitiveArray) -> Self { + fn add(&self, rhs: &Self) -> Self { add(self, rhs) } } -impl ArrayWrappingAdd> for PrimitiveArray +impl ArrayWrappingAdd for PrimitiveArray where T: NativeArithmetics + WrappingAdd, { - fn wrapping_add(&self, rhs: &PrimitiveArray) -> Self { + fn wrapping_add(&self, rhs: &Self) -> Self { wrapping_add(self, rhs) } } // Implementation of ArrayCheckedAdd trait for PrimitiveArrays -impl ArrayCheckedAdd> for PrimitiveArray +impl ArrayCheckedAdd for PrimitiveArray where T: NativeArithmetics + CheckedAdd, { - fn checked_add(&self, rhs: &PrimitiveArray) -> Self { + fn checked_add(&self, rhs: &Self) -> Self { checked_add(self, rhs) } } // Implementation of ArraySaturatingAdd trait for PrimitiveArrays -impl ArraySaturatingAdd> for PrimitiveArray +impl ArraySaturatingAdd for PrimitiveArray where T: NativeArithmetics + SaturatingAdd, { - fn saturating_add(&self, rhs: &PrimitiveArray) -> Self { + fn saturating_add(&self, rhs: &Self) -> Self { saturating_add(self, rhs) } } // Implementation of ArraySaturatingAdd trait for PrimitiveArrays -impl ArrayOverflowingAdd> for PrimitiveArray +impl ArrayOverflowingAdd for PrimitiveArray where T: NativeArithmetics + OverflowingAdd, { - fn overflowing_add(&self, rhs: &PrimitiveArray) -> (Self, Bitmap) { + fn overflowing_add(&self, rhs: &Self) -> (Self, Bitmap) { overflowing_add(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/basic/div.rs b/src/arrow2/src/compute/arithmetics/basic/div.rs index b3fbd8e2f8..88a5d1180b 100644 --- a/src/arrow2/src/compute/arithmetics/basic/div.rs +++ b/src/arrow2/src/compute/arithmetics/basic/div.rs @@ -79,21 +79,21 @@ where } // Implementation of ArrayDiv trait for PrimitiveArrays -impl ArrayDiv> for PrimitiveArray +impl ArrayDiv for PrimitiveArray where T: NativeArithmetics + Div, { - fn div(&self, rhs: &PrimitiveArray) -> Self { + fn div(&self, rhs: &Self) -> Self { div(self, rhs) } } // Implementation of ArrayCheckedDiv trait for PrimitiveArrays -impl ArrayCheckedDiv> for PrimitiveArray +impl ArrayCheckedDiv for PrimitiveArray where T: NativeArithmetics + CheckedDiv, { - fn checked_div(&self, rhs: &PrimitiveArray) -> Self { + fn checked_div(&self, rhs: &Self) -> Self { checked_div(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/basic/mul.rs b/src/arrow2/src/compute/arithmetics/basic/mul.rs index 95861c54eb..64a95aa362 100644 --- a/src/arrow2/src/compute/arithmetics/basic/mul.rs +++ b/src/arrow2/src/compute/arithmetics/basic/mul.rs @@ -139,50 +139,50 @@ where } // Implementation of ArrayMul trait for PrimitiveArrays -impl ArrayMul> for PrimitiveArray +impl ArrayMul for PrimitiveArray where T: NativeArithmetics + Mul, { - fn mul(&self, rhs: &PrimitiveArray) -> Self { + fn mul(&self, rhs: &Self) -> Self { mul(self, rhs) } } -impl ArrayWrappingMul> for PrimitiveArray +impl ArrayWrappingMul for PrimitiveArray where T: NativeArithmetics + WrappingMul, { - fn wrapping_mul(&self, rhs: &PrimitiveArray) -> Self { + fn wrapping_mul(&self, rhs: &Self) -> Self { wrapping_mul(self, rhs) } } // Implementation of ArrayCheckedMul trait for PrimitiveArrays -impl ArrayCheckedMul> for PrimitiveArray +impl ArrayCheckedMul for PrimitiveArray where T: NativeArithmetics + CheckedMul, { - fn checked_mul(&self, rhs: &PrimitiveArray) -> Self { + fn checked_mul(&self, rhs: &Self) -> Self { checked_mul(self, rhs) } } // Implementation of ArraySaturatingMul trait for PrimitiveArrays -impl ArraySaturatingMul> for PrimitiveArray +impl ArraySaturatingMul for PrimitiveArray where T: NativeArithmetics + SaturatingMul, { - fn saturating_mul(&self, rhs: &PrimitiveArray) -> Self { + fn saturating_mul(&self, rhs: &Self) -> Self { saturating_mul(self, rhs) } } // Implementation of ArraySaturatingMul trait for PrimitiveArrays -impl ArrayOverflowingMul> for PrimitiveArray +impl ArrayOverflowingMul for PrimitiveArray where T: NativeArithmetics + OverflowingMul, { - fn overflowing_mul(&self, rhs: &PrimitiveArray) -> (Self, Bitmap) { + fn overflowing_mul(&self, rhs: &Self) -> (Self, Bitmap) { overflowing_mul(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/basic/rem.rs b/src/arrow2/src/compute/arithmetics/basic/rem.rs index b2035ecfd1..cba93d5d12 100644 --- a/src/arrow2/src/compute/arithmetics/basic/rem.rs +++ b/src/arrow2/src/compute/arithmetics/basic/rem.rs @@ -61,20 +61,20 @@ where binary_checked(lhs, rhs, lhs.data_type().clone(), op) } -impl ArrayRem> for PrimitiveArray +impl ArrayRem for PrimitiveArray where T: NativeArithmetics + Rem, { - fn rem(&self, rhs: &PrimitiveArray) -> Self { + fn rem(&self, rhs: &Self) -> Self { rem(self, rhs) } } -impl ArrayCheckedRem> for PrimitiveArray +impl ArrayCheckedRem for PrimitiveArray where T: NativeArithmetics + CheckedRem, { - fn checked_rem(&self, rhs: &PrimitiveArray) -> Self { + fn checked_rem(&self, rhs: &Self) -> Self { checked_rem(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/basic/sub.rs b/src/arrow2/src/compute/arithmetics/basic/sub.rs index 09c4d610df..327a9836fe 100644 --- a/src/arrow2/src/compute/arithmetics/basic/sub.rs +++ b/src/arrow2/src/compute/arithmetics/basic/sub.rs @@ -138,50 +138,50 @@ where } // Implementation of ArraySub trait for PrimitiveArrays -impl ArraySub> for PrimitiveArray +impl ArraySub for PrimitiveArray where T: NativeArithmetics + Sub, { - fn sub(&self, rhs: &PrimitiveArray) -> Self { + fn sub(&self, rhs: &Self) -> Self { sub(self, rhs) } } -impl ArrayWrappingSub> for PrimitiveArray +impl ArrayWrappingSub for PrimitiveArray where T: NativeArithmetics + WrappingSub, { - fn wrapping_sub(&self, rhs: &PrimitiveArray) -> Self { + fn wrapping_sub(&self, rhs: &Self) -> Self { wrapping_sub(self, rhs) } } // Implementation of ArrayCheckedSub trait for PrimitiveArrays -impl ArrayCheckedSub> for PrimitiveArray +impl ArrayCheckedSub for PrimitiveArray where T: NativeArithmetics + CheckedSub, { - fn checked_sub(&self, rhs: &PrimitiveArray) -> Self { + fn checked_sub(&self, rhs: &Self) -> Self { checked_sub(self, rhs) } } // Implementation of ArraySaturatingSub trait for PrimitiveArrays -impl ArraySaturatingSub> for PrimitiveArray +impl ArraySaturatingSub for PrimitiveArray where T: NativeArithmetics + SaturatingSub, { - fn saturating_sub(&self, rhs: &PrimitiveArray) -> Self { + fn saturating_sub(&self, rhs: &Self) -> Self { saturating_sub(self, rhs) } } // Implementation of ArraySaturatingSub trait for PrimitiveArrays -impl ArrayOverflowingSub> for PrimitiveArray +impl ArrayOverflowingSub for PrimitiveArray where T: NativeArithmetics + OverflowingSub, { - fn overflowing_sub(&self, rhs: &PrimitiveArray) -> (Self, Bitmap) { + fn overflowing_sub(&self, rhs: &Self) -> (Self, Bitmap) { overflowing_sub(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/decimal/add.rs b/src/arrow2/src/compute/arithmetics/decimal/add.rs index 2e956c2318..98f05e4bd0 100644 --- a/src/arrow2/src/compute/arithmetics/decimal/add.rs +++ b/src/arrow2/src/compute/arithmetics/decimal/add.rs @@ -135,22 +135,22 @@ pub fn checked_add(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> Pr } // Implementation of ArrayAdd trait for PrimitiveArrays -impl ArrayAdd> for PrimitiveArray { - fn add(&self, rhs: &PrimitiveArray) -> Self { +impl ArrayAdd for PrimitiveArray { + fn add(&self, rhs: &Self) -> Self { add(self, rhs) } } // Implementation of ArrayCheckedAdd trait for PrimitiveArrays -impl ArrayCheckedAdd> for PrimitiveArray { - fn checked_add(&self, rhs: &PrimitiveArray) -> Self { +impl ArrayCheckedAdd for PrimitiveArray { + fn checked_add(&self, rhs: &Self) -> Self { checked_add(self, rhs) } } // Implementation of ArraySaturatingAdd trait for PrimitiveArrays -impl ArraySaturatingAdd> for PrimitiveArray { - fn saturating_add(&self, rhs: &PrimitiveArray) -> Self { +impl ArraySaturatingAdd for PrimitiveArray { + fn saturating_add(&self, rhs: &Self) -> Self { saturating_add(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/decimal/div.rs b/src/arrow2/src/compute/arithmetics/decimal/div.rs index f120c7b3e2..df1468bb8b 100644 --- a/src/arrow2/src/compute/arithmetics/decimal/div.rs +++ b/src/arrow2/src/compute/arithmetics/decimal/div.rs @@ -200,15 +200,15 @@ pub fn checked_div(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> Pr } // Implementation of ArrayDiv trait for PrimitiveArrays -impl ArrayDiv> for PrimitiveArray { - fn div(&self, rhs: &PrimitiveArray) -> Self { +impl ArrayDiv for PrimitiveArray { + fn div(&self, rhs: &Self) -> Self { div(self, rhs) } } // Implementation of ArrayCheckedDiv trait for PrimitiveArrays -impl ArrayCheckedDiv> for PrimitiveArray { - fn checked_div(&self, rhs: &PrimitiveArray) -> Self { +impl ArrayCheckedDiv for PrimitiveArray { + fn checked_div(&self, rhs: &Self) -> Self { checked_div(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/decimal/mul.rs b/src/arrow2/src/compute/arithmetics/decimal/mul.rs index f050952e95..021564916f 100644 --- a/src/arrow2/src/compute/arithmetics/decimal/mul.rs +++ b/src/arrow2/src/compute/arithmetics/decimal/mul.rs @@ -205,22 +205,22 @@ pub fn checked_mul(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> Pr } // Implementation of ArrayMul trait for PrimitiveArrays -impl ArrayMul> for PrimitiveArray { - fn mul(&self, rhs: &PrimitiveArray) -> Self { +impl ArrayMul for PrimitiveArray { + fn mul(&self, rhs: &Self) -> Self { mul(self, rhs) } } // Implementation of ArrayCheckedMul trait for PrimitiveArrays -impl ArrayCheckedMul> for PrimitiveArray { - fn checked_mul(&self, rhs: &PrimitiveArray) -> Self { +impl ArrayCheckedMul for PrimitiveArray { + fn checked_mul(&self, rhs: &Self) -> Self { checked_mul(self, rhs) } } // Implementation of ArraySaturatingMul trait for PrimitiveArrays -impl ArraySaturatingMul> for PrimitiveArray { - fn saturating_mul(&self, rhs: &PrimitiveArray) -> Self { +impl ArraySaturatingMul for PrimitiveArray { + fn saturating_mul(&self, rhs: &Self) -> Self { saturating_mul(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/decimal/sub.rs b/src/arrow2/src/compute/arithmetics/decimal/sub.rs index ad7d2020b8..ac69b5b09e 100644 --- a/src/arrow2/src/compute/arithmetics/decimal/sub.rs +++ b/src/arrow2/src/compute/arithmetics/decimal/sub.rs @@ -98,22 +98,22 @@ pub fn saturating_sub( } // Implementation of ArraySub trait for PrimitiveArrays -impl ArraySub> for PrimitiveArray { - fn sub(&self, rhs: &PrimitiveArray) -> Self { +impl ArraySub for PrimitiveArray { + fn sub(&self, rhs: &Self) -> Self { sub(self, rhs) } } // Implementation of ArrayCheckedSub trait for PrimitiveArrays -impl ArrayCheckedSub> for PrimitiveArray { - fn checked_sub(&self, rhs: &PrimitiveArray) -> Self { +impl ArrayCheckedSub for PrimitiveArray { + fn checked_sub(&self, rhs: &Self) -> Self { checked_sub(self, rhs) } } // Implementation of ArraySaturatingSub trait for PrimitiveArrays -impl ArraySaturatingSub> for PrimitiveArray { - fn saturating_sub(&self, rhs: &PrimitiveArray) -> Self { +impl ArraySaturatingSub for PrimitiveArray { + fn saturating_sub(&self, rhs: &Self) -> Self { saturating_sub(self, rhs) } } diff --git a/src/arrow2/src/datatypes/field.rs b/src/arrow2/src/datatypes/field.rs index 59eb894a3e..0df48e37b9 100644 --- a/src/arrow2/src/datatypes/field.rs +++ b/src/arrow2/src/datatypes/field.rs @@ -25,7 +25,7 @@ pub struct Field { impl Field { /// Creates a new [`Field`]. pub fn new>(name: T, data_type: DataType, is_nullable: bool) -> Self { - Field { + Self { name: name.into(), data_type, is_nullable, diff --git a/src/arrow2/src/datatypes/mod.rs b/src/arrow2/src/datatypes/mod.rs index 2debc5a4f2..655293f495 100644 --- a/src/arrow2/src/datatypes/mod.rs +++ b/src/arrow2/src/datatypes/mod.rs @@ -262,7 +262,7 @@ impl DataType { /// Returns `&self` for all but [`DataType::Extension`]. For [`DataType::Extension`], /// (recursively) returns the inner [`DataType`]. /// Never returns the variant [`DataType::Extension`]. - pub fn to_logical_type(&self) -> &DataType { + pub fn to_logical_type(&self) -> &Self { use DataType::*; match self { Extension(_, key, _) => key.to_logical_type(), @@ -274,14 +274,14 @@ impl DataType { impl From for DataType { fn from(item: IntegerType) -> Self { match item { - IntegerType::Int8 => DataType::Int8, - IntegerType::Int16 => DataType::Int16, - IntegerType::Int32 => DataType::Int32, - IntegerType::Int64 => DataType::Int64, - IntegerType::UInt8 => DataType::UInt8, - IntegerType::UInt16 => DataType::UInt16, - IntegerType::UInt32 => DataType::UInt32, - IntegerType::UInt64 => DataType::UInt64, + IntegerType::Int8 => Self::Int8, + IntegerType::Int16 => Self::Int16, + IntegerType::Int32 => Self::Int32, + IntegerType::Int64 => Self::Int64, + IntegerType::UInt8 => Self::UInt8, + IntegerType::UInt16 => Self::UInt16, + IntegerType::UInt32 => Self::UInt32, + IntegerType::UInt64 => Self::UInt64, } } } @@ -289,21 +289,21 @@ impl From for DataType { impl From for DataType { fn from(item: PrimitiveType) -> Self { match item { - PrimitiveType::Int8 => DataType::Int8, - PrimitiveType::Int16 => DataType::Int16, - PrimitiveType::Int32 => DataType::Int32, - PrimitiveType::Int64 => DataType::Int64, - PrimitiveType::UInt8 => DataType::UInt8, - PrimitiveType::UInt16 => DataType::UInt16, - PrimitiveType::UInt32 => DataType::UInt32, - PrimitiveType::UInt64 => DataType::UInt64, - PrimitiveType::Int128 => DataType::Decimal(32, 32), - PrimitiveType::Int256 => DataType::Decimal256(32, 32), - PrimitiveType::Float16 => DataType::Float16, - PrimitiveType::Float32 => DataType::Float32, - PrimitiveType::Float64 => DataType::Float64, - PrimitiveType::DaysMs => DataType::Interval(IntervalUnit::DayTime), - PrimitiveType::MonthDayNano => DataType::Interval(IntervalUnit::MonthDayNano), + PrimitiveType::Int8 => Self::Int8, + PrimitiveType::Int16 => Self::Int16, + PrimitiveType::Int32 => Self::Int32, + PrimitiveType::Int64 => Self::Int64, + PrimitiveType::UInt8 => Self::UInt8, + PrimitiveType::UInt16 => Self::UInt16, + PrimitiveType::UInt32 => Self::UInt32, + PrimitiveType::UInt64 => Self::UInt64, + PrimitiveType::Int128 => Self::Decimal(32, 32), + PrimitiveType::Int256 => Self::Decimal256(32, 32), + PrimitiveType::Float16 => Self::Float16, + PrimitiveType::Float32 => Self::Float32, + PrimitiveType::Float64 => Self::Float64, + PrimitiveType::DaysMs => Self::Interval(IntervalUnit::DayTime), + PrimitiveType::MonthDayNano => Self::Interval(IntervalUnit::MonthDayNano), } } } diff --git a/src/arrow2/src/datatypes/schema.rs b/src/arrow2/src/datatypes/schema.rs index d90f9d88c8..020c75cc53 100644 --- a/src/arrow2/src/datatypes/schema.rs +++ b/src/arrow2/src/datatypes/schema.rs @@ -41,7 +41,7 @@ impl Schema { }) .collect(); - Schema { + Self { fields, metadata: self.metadata, } diff --git a/src/arrow2/src/ffi/array.rs b/src/arrow2/src/ffi/array.rs index 6b859e0f65..d6bc00017a 100644 --- a/src/arrow2/src/ffi/array.rs +++ b/src/arrow2/src/ffi/array.rs @@ -110,12 +110,12 @@ impl ArrowArray { let children_ptr = children .into_iter() - .map(|child| Box::into_raw(Box::new(ArrowArray::new(child)))) + .map(|child| Box::into_raw(Box::new(Self::new(child)))) .collect::>(); let n_children = children_ptr.len() as i64; let dictionary_ptr = - dictionary.map(|array| Box::into_raw(Box::new(ArrowArray::new(array)))); + dictionary.map(|array| Box::into_raw(Box::new(Self::new(array)))); let length = array.len() as i64; let null_count = array.null_count() as i64; diff --git a/src/arrow2/src/ffi/schema.rs b/src/arrow2/src/ffi/schema.rs index 28e13f884a..453d2ba27b 100644 --- a/src/arrow2/src/ffi/schema.rs +++ b/src/arrow2/src/ffi/schema.rs @@ -72,7 +72,7 @@ impl ArrowSchema { flags += *is_ordered as i64; // we do not store field info in the dict values, so can't recover it all :( let field = Field::new("", values.as_ref().clone(), true); - Some(Box::new(ArrowSchema::new(&field))) + Some(Box::new(Self::new(&field))) } else { None }; diff --git a/src/arrow2/src/io/csv/mod.rs b/src/arrow2/src/io/csv/mod.rs index a53c1231d5..1f7be08fee 100644 --- a/src/arrow2/src/io/csv/mod.rs +++ b/src/arrow2/src/io/csv/mod.rs @@ -10,13 +10,13 @@ mod utils; #[cfg(feature = "io_csv_read")] impl From for Error { fn from(error: csv::Error) -> Self { - Error::External("".to_string(), Box::new(error)) + Self::External("".to_string(), Box::new(error)) } } impl From for Error { fn from(error: chrono::ParseError) -> Self { - Error::External("".to_string(), Box::new(error)) + Self::External("".to_string(), Box::new(error)) } } diff --git a/src/arrow2/src/io/csv/read_async/mod.rs b/src/arrow2/src/io/csv/read_async/mod.rs index a912366f21..04b97702f4 100644 --- a/src/arrow2/src/io/csv/read_async/mod.rs +++ b/src/arrow2/src/io/csv/read_async/mod.rs @@ -16,6 +16,6 @@ pub use csv_async::Error as CSVError; impl From for crate::error::Error { fn from(error: CSVError) -> Self { - crate::error::Error::External("".to_string(), Box::new(error)) + Self::External("".to_string(), Box::new(error)) } } diff --git a/src/arrow2/src/io/csv/write/serialize.rs b/src/arrow2/src/io/csv/write/serialize.rs index 67a3d998e0..a669b055e3 100644 --- a/src/arrow2/src/io/csv/write/serialize.rs +++ b/src/arrow2/src/io/csv/write/serialize.rs @@ -40,7 +40,7 @@ pub struct SerializeOptions { impl Default for SerializeOptions { fn default() -> Self { - SerializeOptions { + Self { date32_format: None, date64_format: None, time32_format: None, diff --git a/src/arrow2/src/io/flight/mod.rs b/src/arrow2/src/io/flight/mod.rs index 943f148730..0849853382 100644 --- a/src/arrow2/src/io/flight/mod.rs +++ b/src/arrow2/src/io/flight/mod.rs @@ -50,7 +50,7 @@ pub fn serialize_batch( impl From for FlightData { fn from(data: EncodedData) -> Self { - FlightData { + Self { data_header: data.ipc_message, data_body: data.arrow_data, ..Default::default() diff --git a/src/arrow2/src/io/ipc/append/mod.rs b/src/arrow2/src/io/ipc/append/mod.rs index 1fc066845d..8a00598325 100644 --- a/src/arrow2/src/io/ipc/append/mod.rs +++ b/src/arrow2/src/io/ipc/append/mod.rs @@ -25,7 +25,7 @@ impl FileWriter { mut writer: R, metadata: FileMetadata, options: WriteOptions, - ) -> Result> { + ) -> Result { if metadata.ipc_schema.is_little_endian != is_native_little_endian() { return Err(Error::nyi( "Appending to a file of a non-native endianness is still not supported", @@ -54,7 +54,7 @@ impl FileWriter { writer.seek(SeekFrom::Start(offset))?; - Ok(FileWriter { + Ok(Self { writer, options, schema: metadata.schema, diff --git a/src/arrow2/src/io/ipc/read/error.rs b/src/arrow2/src/io/ipc/read/error.rs index cbac69aef2..fd11b0ba0a 100644 --- a/src/arrow2/src/io/ipc/read/error.rs +++ b/src/arrow2/src/io/ipc/read/error.rs @@ -101,12 +101,12 @@ pub enum OutOfSpecKind { impl From for Error { fn from(kind: OutOfSpecKind) -> Self { - Error::OutOfSpec(format!("{kind:?}")) + Self::OutOfSpec(format!("{kind:?}")) } } impl From for Error { fn from(error: arrow_format::ipc::planus::Error) -> Self { - Error::OutOfSpec(error.to_string()) + Self::OutOfSpec(error.to_string()) } } diff --git a/src/arrow2/src/io/ipc/read/stream.rs b/src/arrow2/src/io/ipc/read/stream.rs index e3b834a3f7..a534f2103f 100644 --- a/src/arrow2/src/io/ipc/read/stream.rs +++ b/src/arrow2/src/io/ipc/read/stream.rs @@ -79,7 +79,7 @@ impl StreamState { /// /// If the `StreamState` was `Waiting`. pub fn unwrap(self) -> Chunk> { - if let StreamState::Some(batch) = self { + if let Self::Some(batch) = self { batch } else { panic!("The batch is not available") diff --git a/src/arrow2/src/io/json/read/deserialize.rs b/src/arrow2/src/io/json/read/deserialize.rs index 4331b2e3a7..e80131d53e 100644 --- a/src/arrow2/src/io/json/read/deserialize.rs +++ b/src/arrow2/src/io/json/read/deserialize.rs @@ -460,37 +460,37 @@ pub(crate) trait Container { impl Container for MutableBinaryArray { fn with_capacity(capacity: usize) -> Self { - MutableBinaryArray::with_capacity(capacity) + Self::with_capacity(capacity) } } impl Container for MutableBooleanArray { fn with_capacity(capacity: usize) -> Self { - MutableBooleanArray::with_capacity(capacity) + Self::with_capacity(capacity) } } impl Container for MutableFixedSizeBinaryArray { fn with_capacity(capacity: usize) -> Self { - MutableFixedSizeBinaryArray::with_capacity(capacity, 0) + Self::with_capacity(capacity, 0) } } impl Container for MutableListArray { fn with_capacity(capacity: usize) -> Self { - MutableListArray::with_capacity(capacity) + Self::with_capacity(capacity) } } impl Container for MutablePrimitiveArray { fn with_capacity(capacity: usize) -> Self { - MutablePrimitiveArray::with_capacity(capacity) + Self::with_capacity(capacity) } } impl Container for MutableUtf8Array { fn with_capacity(capacity: usize) -> Self { - MutableUtf8Array::with_capacity(capacity) + Self::with_capacity(capacity) } } diff --git a/src/arrow2/src/io/json/read/mod.rs b/src/arrow2/src/io/json/read/mod.rs index 087da38d50..2cac510dd5 100644 --- a/src/arrow2/src/io/json/read/mod.rs +++ b/src/arrow2/src/io/json/read/mod.rs @@ -13,6 +13,6 @@ use crate::error::Error; impl From for Error { fn from(error: json_deserializer::Error) -> Self { - Error::ExternalFormat(error.to_string()) + Self::ExternalFormat(error.to_string()) } } diff --git a/src/arrow2/src/io/json/write/utf8.rs b/src/arrow2/src/io/json/write/utf8.rs index b8c9852217..8cb1eb434c 100644 --- a/src/arrow2/src/io/json/write/utf8.rs +++ b/src/arrow2/src/io/json/write/utf8.rs @@ -89,16 +89,16 @@ pub enum CharEscape { impl CharEscape { #[inline] - fn from_escape_table(escape: u8, byte: u8) -> CharEscape { + fn from_escape_table(escape: u8, byte: u8) -> Self { match escape { - self::BB => CharEscape::Backspace, - self::TT => CharEscape::Tab, - self::NN => CharEscape::LineFeed, - self::FF => CharEscape::FormFeed, - self::RR => CharEscape::CarriageReturn, - self::QU => CharEscape::Quote, - self::BS => CharEscape::ReverseSolidus, - self::UU => CharEscape::AsciiControl(byte), + self::BB => Self::Backspace, + self::TT => Self::Tab, + self::NN => Self::LineFeed, + self::FF => Self::FormFeed, + self::RR => Self::CarriageReturn, + self::QU => Self::Quote, + self::BS => Self::ReverseSolidus, + self::UU => Self::AsciiControl(byte), _ => unreachable!(), } } diff --git a/src/arrow2/src/io/json_integration/mod.rs b/src/arrow2/src/io/json_integration/mod.rs index ada2441646..cfdae5faca 100644 --- a/src/arrow2/src/io/json_integration/mod.rs +++ b/src/arrow2/src/io/json_integration/mod.rs @@ -122,6 +122,6 @@ pub struct ArrowJsonColumn { impl From for Error { fn from(error: serde_json::Error) -> Self { - Error::ExternalFormat(error.to_string()) + Self::ExternalFormat(error.to_string()) } } diff --git a/src/arrow2/src/io/orc/mod.rs b/src/arrow2/src/io/orc/mod.rs index 9b982a3ca7..06fce9dd6d 100644 --- a/src/arrow2/src/io/orc/mod.rs +++ b/src/arrow2/src/io/orc/mod.rs @@ -7,6 +7,6 @@ use crate::error::Error; impl From for Error { fn from(error: format::error::Error) -> Self { - Error::ExternalFormat(format!("{error:?}")) + Self::ExternalFormat(format!("{error:?}")) } } diff --git a/src/arrow2/src/io/parquet/mod.rs b/src/arrow2/src/io/parquet/mod.rs index 7fe33f8564..e656097853 100644 --- a/src/arrow2/src/io/parquet/mod.rs +++ b/src/arrow2/src/io/parquet/mod.rs @@ -17,18 +17,18 @@ impl From for Error { let message = "Failed to read a compressed parquet file. \ Use the cargo feature \"io_parquet_compression\" to read compressed parquet files." .to_string(); - Error::ExternalFormat(message) + Self::ExternalFormat(message) } parquet2::error::Error::Transport(msg) => { - Error::Io(std::io::Error::new(std::io::ErrorKind::Other, msg)) + Self::Io(std::io::Error::new(std::io::ErrorKind::Other, msg)) } - _ => Error::ExternalFormat(error.to_string()), + _ => Self::ExternalFormat(error.to_string()), } } } impl From for parquet2::error::Error { fn from(error: Error) -> Self { - parquet2::error::Error::OutOfSpec(error.to_string()) + Self::OutOfSpec(error.to_string()) } } diff --git a/src/arrow2/src/io/parquet/read/deserialize/utils.rs b/src/arrow2/src/io/parquet/read/deserialize/utils.rs index 9c4855813f..822d6ec6e4 100644 --- a/src/arrow2/src/io/parquet/read/deserialize/utils.rs +++ b/src/arrow2/src/io/parquet/read/deserialize/utils.rs @@ -40,7 +40,7 @@ pub(super) trait Pushable: Sized { impl Pushable for MutableBitmap { #[inline] fn reserve(&mut self, additional: usize) { - MutableBitmap::reserve(self, additional) + Self::reserve(self, additional) } #[inline] fn len(&self) -> usize { @@ -66,7 +66,7 @@ impl Pushable for MutableBitmap { impl Pushable for Vec { #[inline] fn reserve(&mut self, additional: usize) { - Vec::reserve(self, additional) + Self::reserve(self, additional) } #[inline] fn len(&self) -> usize { diff --git a/src/arrow2/src/io/parquet/read/schema/mod.rs b/src/arrow2/src/io/parquet/read/schema/mod.rs index 293473c233..6d65e564e2 100644 --- a/src/arrow2/src/io/parquet/read/schema/mod.rs +++ b/src/arrow2/src/io/parquet/read/schema/mod.rs @@ -29,7 +29,7 @@ pub struct SchemaInferenceOptions { impl Default for SchemaInferenceOptions { fn default() -> Self { - SchemaInferenceOptions { + Self { int96_coerce_to_timeunit: TimeUnit::Nanosecond, } } diff --git a/src/arrow2/src/io/parquet/write/pages.rs b/src/arrow2/src/io/parquet/write/pages.rs index cf7654fe15..98dc8c2812 100644 --- a/src/arrow2/src/io/parquet/write/pages.rs +++ b/src/arrow2/src/io/parquet/write/pages.rs @@ -49,10 +49,10 @@ impl Nested { /// Returns the length (number of rows) of the element pub fn len(&self) -> usize { match self { - Nested::Primitive(_, _, length) => *length, - Nested::List(nested) => nested.offsets.len_proxy(), - Nested::LargeList(nested) => nested.offsets.len_proxy(), - Nested::Struct(_, _, len) => *len, + Self::Primitive(_, _, length) => *length, + Self::List(nested) => nested.offsets.len_proxy(), + Self::LargeList(nested) => nested.offsets.len_proxy(), + Self::Struct(_, _, len) => *len, } } } diff --git a/src/arrow2/src/types/native.rs b/src/arrow2/src/types/native.rs index 1d5c0ada72..666c05be23 100644 --- a/src/arrow2/src/types/native.rs +++ b/src/arrow2/src/types/native.rs @@ -338,7 +338,7 @@ pub struct f16(pub u16); impl PartialEq for f16 { #[inline] - fn eq(&self, other: &f16) -> bool { + fn eq(&self, other: &Self) -> bool { if self.is_nan() || other.is_nan() { false } else { @@ -350,7 +350,7 @@ impl PartialEq for f16 { // see https://github.com/starkat99/half-rs/blob/main/src/binary16.rs impl f16 { /// The difference between 1.0 and the next largest representable number. - pub const EPSILON: f16 = f16(0x1400u16); + pub const EPSILON: Self = Self(0x1400u16); #[inline] #[must_use] @@ -360,8 +360,8 @@ impl f16 { /// Casts from u16. #[inline] - pub const fn from_bits(bits: u16) -> f16 { - f16(bits) + pub const fn from_bits(bits: u16) -> Self { + Self(bits) } /// Casts to u16. @@ -430,7 +430,7 @@ impl f16 { if exp == 0x7F80_0000u32 { // Set mantissa MSB for NaN (and also keep shifted mantissa bits) let nan_bit = if man == 0 { 0 } else { 0x0200u32 }; - return f16(((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 13)) as u16); + return Self(((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 13)) as u16); } // The number is normalized, start assembling half precision version @@ -441,7 +441,7 @@ impl f16 { // Check for exponent overflow, return +infinity if half_exp >= 0x1F { - return f16((half_sign | 0x7C00u32) as u16); + return Self((half_sign | 0x7C00u32) as u16); } // Check for underflow @@ -449,7 +449,7 @@ impl f16 { // Check mantissa for what we can do if 14 - half_exp > 24 { // No rounding possibility, so this is a full underflow, return signed zero - return f16(half_sign as u16); + return Self(half_sign as u16); } // Don't forget about hidden leading mantissa bit when assembling mantissa let man = man | 0x0080_0000u32; @@ -460,7 +460,7 @@ impl f16 { half_man += 1; } // No exponent for subnormals - return f16((half_sign | half_man) as u16); + return Self((half_sign | half_man) as u16); } // Rebias the exponent @@ -470,9 +470,9 @@ impl f16 { let round_bit = 0x0000_1000u32; if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { // Round it - f16(((half_sign | half_exp | half_man) + 1) as u16) + Self(((half_sign | half_exp | half_man) + 1) as u16) } else { - f16((half_sign | half_exp | half_man) as u16) + Self((half_sign | half_exp | half_man) as u16) } } } diff --git a/src/common/arrow-ffi/Cargo.toml b/src/common/arrow-ffi/Cargo.toml index b45af25939..88575a3e1e 100644 --- a/src/common/arrow-ffi/Cargo.toml +++ b/src/common/arrow-ffi/Cargo.toml @@ -5,6 +5,9 @@ pyo3 = {workspace = true, optional = true} [features] python = ["dep:pyo3"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "common-arrow-ffi" diff --git a/src/common/daft-config/Cargo.toml b/src/common/daft-config/Cargo.toml index 6a208fdcae..212ce37ab2 100644 --- a/src/common/daft-config/Cargo.toml +++ b/src/common/daft-config/Cargo.toml @@ -7,6 +7,9 @@ serde = {workspace = true} [features] python = ["dep:pyo3", "common-io-config/python"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "common-daft-config" diff --git a/src/common/daft-config/src/lib.rs b/src/common/daft-config/src/lib.rs index dcaef0a2f8..077d4b7e83 100644 --- a/src/common/daft-config/src/lib.rs +++ b/src/common/daft-config/src/lib.rs @@ -60,7 +60,7 @@ pub struct DaftExecutionConfig { impl Default for DaftExecutionConfig { fn default() -> Self { - DaftExecutionConfig { + Self { scan_tasks_min_size_bytes: 96 * 1024 * 1024, // 96MB scan_tasks_max_size_bytes: 384 * 1024 * 1024, // 384MB broadcast_join_size_bytes_threshold: 10 * 1024 * 1024, // 10 MiB diff --git a/src/common/daft-config/src/python.rs b/src/common/daft-config/src/python.rs index 5dda71eda8..44bb95c1b0 100644 --- a/src/common/daft-config/src/python.rs +++ b/src/common/daft-config/src/python.rs @@ -17,27 +17,24 @@ pub struct PyDaftPlanningConfig { impl PyDaftPlanningConfig { #[new] pub fn new() -> Self { - PyDaftPlanningConfig::default() + Self::default() } #[staticmethod] pub fn from_env() -> Self { - PyDaftPlanningConfig { + Self { config: Arc::new(DaftPlanningConfig::from_env()), } } - fn with_config_values( - &mut self, - default_io_config: Option, - ) -> PyResult { + fn with_config_values(&mut self, default_io_config: Option) -> PyResult { let mut config = self.config.as_ref().clone(); if let Some(default_io_config) = default_io_config { config.default_io_config = default_io_config.config; } - Ok(PyDaftPlanningConfig { + Ok(Self { config: Arc::new(config), }) } @@ -67,12 +64,12 @@ pub struct PyDaftExecutionConfig { impl PyDaftExecutionConfig { #[new] pub fn new() -> Self { - PyDaftExecutionConfig::default() + Self::default() } #[staticmethod] pub fn from_env() -> Self { - PyDaftExecutionConfig { + Self { config: Arc::new(DaftExecutionConfig::from_env()), } } @@ -98,7 +95,7 @@ impl PyDaftExecutionConfig { enable_aqe: Option, enable_native_executor: Option, default_morsel_size: Option, - ) -> PyResult { + ) -> PyResult { let mut config = self.config.as_ref().clone(); if let Some(scan_tasks_max_size_bytes) = scan_tasks_max_size_bytes { @@ -161,7 +158,7 @@ impl PyDaftExecutionConfig { config.default_morsel_size = default_morsel_size; } - Ok(PyDaftExecutionConfig { + Ok(Self { config: Arc::new(config), }) } diff --git a/src/common/display/Cargo.toml b/src/common/display/Cargo.toml index 3fe4ea2774..55fd06966c 100644 --- a/src/common/display/Cargo.toml +++ b/src/common/display/Cargo.toml @@ -8,6 +8,9 @@ textwrap = {version = "0.16.1"} [features] python = ["dep:pyo3"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "common-display" diff --git a/src/common/error/Cargo.toml b/src/common/error/Cargo.toml index 8da47b9fee..b64ef5c901 100644 --- a/src/common/error/Cargo.toml +++ b/src/common/error/Cargo.toml @@ -8,6 +8,9 @@ thiserror = {workspace = true} [features] python = ["dep:pyo3"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "common-error" diff --git a/src/common/error/src/python.rs b/src/common/error/src/python.rs index b6b4e48523..917dafdc78 100644 --- a/src/common/error/src/python.rs +++ b/src/common/error/src/python.rs @@ -10,7 +10,7 @@ import_exception!(daft.exceptions, ByteStreamError); import_exception!(daft.exceptions, SocketError); impl std::convert::From for pyo3::PyErr { - fn from(err: DaftError) -> pyo3::PyErr { + fn from(err: DaftError) -> Self { match err { DaftError::PyO3Error(pyerr) => pyerr, DaftError::FileNotFound { path, source } => { diff --git a/src/common/file-formats/Cargo.toml b/src/common/file-formats/Cargo.toml index 9dc2121eb2..2b3495b6d1 100644 --- a/src/common/file-formats/Cargo.toml +++ b/src/common/file-formats/Cargo.toml @@ -9,6 +9,9 @@ serde_json = {workspace = true, optional = true} [features] python = ["dep:pyo3", "dep:serde_json", "common-error/python", "common-py-serde/python", "daft-schema/python"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "common-file-formats" diff --git a/src/common/hashable-float-wrapper/Cargo.toml b/src/common/hashable-float-wrapper/Cargo.toml index ce370ed965..541535f260 100644 --- a/src/common/hashable-float-wrapper/Cargo.toml +++ b/src/common/hashable-float-wrapper/Cargo.toml @@ -1,6 +1,9 @@ [dependencies] serde = {workspace = true} +[lints] +workspace = true + [package] edition = {workspace = true} name = "common-hashable-float-wrapper" diff --git a/src/common/io-config/Cargo.toml b/src/common/io-config/Cargo.toml index b1273b4499..a66e9bfa23 100644 --- a/src/common/io-config/Cargo.toml +++ b/src/common/io-config/Cargo.toml @@ -12,6 +12,9 @@ typetag = "0.2.16" [features] python = ["dep:pyo3", "common-error/python", "common-py-serde/python"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "common-io-config" diff --git a/src/common/io-config/src/http.rs b/src/common/io-config/src/http.rs index 275c55f106..6241de3028 100644 --- a/src/common/io-config/src/http.rs +++ b/src/common/io-config/src/http.rs @@ -12,7 +12,7 @@ pub struct HTTPConfig { impl Default for HTTPConfig { fn default() -> Self { - HTTPConfig { + Self { user_agent: "daft/0.0.1".to_string(), // NOTE: Ideally we grab the version of Daft, but that requires a dependency on daft-core bearer_token: None, } @@ -21,7 +21,7 @@ impl Default for HTTPConfig { impl HTTPConfig { pub fn new>(bearer_token: Option) -> Self { - HTTPConfig { + Self { bearer_token: bearer_token.map(|t| t.into()), ..Default::default() } diff --git a/src/common/io-config/src/lib.rs b/src/common/io-config/src/lib.rs index b50b4e4185..46e4de278d 100644 --- a/src/common/io-config/src/lib.rs +++ b/src/common/io-config/src/lib.rs @@ -73,12 +73,12 @@ impl<'de> Deserialize<'de> for ObfuscatedString { D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; - Ok(ObfuscatedString(s.into())) + Ok(Self(s.into())) } } impl From for ObfuscatedString { fn from(value: String) -> Self { - ObfuscatedString(value.into()) + Self(value.into()) } } diff --git a/src/common/io-config/src/python.rs b/src/common/io-config/src/python.rs index ef1276bbc6..6ae67a2443 100644 --- a/src/common/io-config/src/python.rs +++ b/src/common/io-config/src/python.rs @@ -154,7 +154,7 @@ impl IOConfig { gcs: Option, http: Option, ) -> Self { - IOConfig { + Self { config: config::IOConfig { s3: s3.unwrap_or_default().config, azure: azure.unwrap_or_default().config, @@ -171,7 +171,7 @@ impl IOConfig { gcs: Option, http: Option, ) -> Self { - IOConfig { + Self { config: config::IOConfig { s3: s3.map(|s3| s3.config).unwrap_or(self.config.s3.clone()), azure: azure @@ -274,7 +274,7 @@ impl S3Config { profile_name: Option, ) -> PyResult { let def = crate::S3Config::default(); - Ok(S3Config { + Ok(Self { config: crate::S3Config { region_name: region_name.or(def.region_name), endpoint_url: endpoint_url.or(def.endpoint_url), @@ -333,7 +333,7 @@ impl S3Config { force_virtual_addressing: Option, profile_name: Option, ) -> PyResult { - Ok(S3Config { + Ok(Self { config: crate::S3Config { region_name: region_name.or_else(|| self.config.region_name.clone()), endpoint_url: endpoint_url.or_else(|| self.config.endpoint_url.clone()), @@ -545,7 +545,7 @@ impl S3Credentials { }) .transpose()?; - Ok(S3Credentials { + Ok(Self { credentials: crate::S3Credentials { key_id, access_key, @@ -606,7 +606,7 @@ pub struct PyS3CredentialsProvider { impl PyS3CredentialsProvider { pub fn new(provider: Bound) -> PyResult { let hash = provider.hash()?; - Ok(PyS3CredentialsProvider { + Ok(Self { provider: provider.into(), hash, }) @@ -693,7 +693,7 @@ impl AzureConfig { use_ssl: Option, ) -> Self { let def = crate::AzureConfig::default(); - AzureConfig { + Self { config: crate::AzureConfig { storage_account: storage_account.or(def.storage_account), access_key: access_key.map(|v| v.into()).or(def.access_key), @@ -725,7 +725,7 @@ impl AzureConfig { endpoint_url: Option, use_ssl: Option, ) -> Self { - AzureConfig { + Self { config: crate::AzureConfig { storage_account: storage_account.or_else(|| self.config.storage_account.clone()), access_key: access_key @@ -835,7 +835,7 @@ impl GCSConfig { anonymous: Option, ) -> Self { let def = crate::GCSConfig::default(); - GCSConfig { + Self { config: crate::GCSConfig { project_id: project_id.or(def.project_id), credentials: credentials.map(|v| v.into()).or(def.credentials), @@ -852,7 +852,7 @@ impl GCSConfig { token: Option, anonymous: Option, ) -> Self { - GCSConfig { + Self { config: crate::GCSConfig { project_id: project_id.or_else(|| self.config.project_id.clone()), credentials: credentials @@ -907,7 +907,7 @@ impl From for IOConfig { impl HTTPConfig { #[new] pub fn new(bearer_token: Option) -> Self { - HTTPConfig { + Self { config: crate::HTTPConfig::new(bearer_token), } } diff --git a/src/common/io-config/src/s3.rs b/src/common/io-config/src/s3.rs index a6e4fc97b5..cb02fad7fb 100644 --- a/src/common/io-config/src/s3.rs +++ b/src/common/io-config/src/s3.rs @@ -138,7 +138,7 @@ impl S3Config { impl Default for S3Config { fn default() -> Self { - S3Config { + Self { region_name: None, endpoint_url: None, key_id: None, diff --git a/src/common/py-serde/Cargo.toml b/src/common/py-serde/Cargo.toml index e799b0a574..60117ef209 100644 --- a/src/common/py-serde/Cargo.toml +++ b/src/common/py-serde/Cargo.toml @@ -6,6 +6,9 @@ serde = {workspace = true} [features] python = ["dep:pyo3"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "common-py-serde" diff --git a/src/common/resource-request/Cargo.toml b/src/common/resource-request/Cargo.toml index a2db514585..d72d63b796 100644 --- a/src/common/resource-request/Cargo.toml +++ b/src/common/resource-request/Cargo.toml @@ -7,6 +7,9 @@ serde = {workspace = true} [features] python = ["dep:pyo3", "common-py-serde/python"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "common-resource-request" diff --git a/src/common/resource-request/src/lib.rs b/src/common/resource-request/src/lib.rs index 9367a7af06..a422c91475 100644 --- a/src/common/resource-request/src/lib.rs +++ b/src/common/resource-request/src/lib.rs @@ -85,7 +85,7 @@ impl ResourceRequest { /// /// Currently, this returns true unless one resource request has a non-zero CPU request and the other task has a /// non-zero GPU request. - pub fn is_pipeline_compatible_with(&self, other: &ResourceRequest) -> bool { + pub fn is_pipeline_compatible_with(&self, other: &Self) -> bool { let self_num_cpus = self.num_cpus; let self_num_gpus = self.num_gpus; let other_num_cpus = other.num_cpus; @@ -100,7 +100,7 @@ impl ResourceRequest { } } - pub fn max(&self, other: &ResourceRequest) -> Self { + pub fn max(&self, other: &Self) -> Self { let max_num_cpus = lift(float_max, self.num_cpus, other.num_cpus); let max_num_gpus = lift(float_max, self.num_gpus, other.num_gpus); let max_memory_bytes = lift(std::cmp::max, self.memory_bytes, other.memory_bytes); @@ -152,8 +152,8 @@ impl Hash for ResourceRequest { } } -impl AsRef for ResourceRequest { - fn as_ref(&self) -> &ResourceRequest { +impl AsRef for ResourceRequest { + fn as_ref(&self) -> &Self { self } } @@ -200,21 +200,21 @@ impl ResourceRequest { } pub fn with_num_cpus(&self, num_cpus: Option) -> Self { - ResourceRequest { + Self { num_cpus, ..self.clone() } } pub fn with_num_gpus(&self, num_gpus: Option) -> Self { - ResourceRequest { + Self { num_gpus, ..self.clone() } } pub fn with_memory_bytes(&self, memory_bytes: Option) -> Self { - ResourceRequest { + Self { memory_bytes, ..self.clone() } diff --git a/src/common/system-info/Cargo.toml b/src/common/system-info/Cargo.toml index 6f1f367ee7..6548cca9db 100644 --- a/src/common/system-info/Cargo.toml +++ b/src/common/system-info/Cargo.toml @@ -5,6 +5,9 @@ sysinfo = "0.30.7" [features] python = ["dep:pyo3"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "common-system-info" diff --git a/src/common/system-info/src/lib.rs b/src/common/system-info/src/lib.rs index 237e75ba8f..3ef6ba180e 100644 --- a/src/common/system-info/src/lib.rs +++ b/src/common/system-info/src/lib.rs @@ -9,7 +9,7 @@ pub struct SystemInfo { impl Default for SystemInfo { fn default() -> Self { - SystemInfo { + Self { info: sysinfo::System::new_with_specifics( RefreshKind::new() .with_cpu(CpuRefreshKind::everything()) diff --git a/src/common/tracing/Cargo.toml b/src/common/tracing/Cargo.toml index e4f1c87c7a..d72c27a8dc 100644 --- a/src/common/tracing/Cargo.toml +++ b/src/common/tracing/Cargo.toml @@ -4,6 +4,9 @@ tracing = {workspace = true} tracing-chrome = "0.7.2" tracing-subscriber = "0.3" +[lints] +workspace = true + [package] edition = {workspace = true} name = "common-tracing" diff --git a/src/common/treenode/Cargo.toml b/src/common/treenode/Cargo.toml index 15e2771271..2a7ebda4f4 100644 --- a/src/common/treenode/Cargo.toml +++ b/src/common/treenode/Cargo.toml @@ -4,6 +4,9 @@ common-error = {path = "../error", default-features = false} [features] python = ["common-error/python"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "common-treenode" diff --git a/src/common/treenode/src/lib.rs b/src/common/treenode/src/lib.rs index 68da6c47c8..2507de4986 100644 --- a/src/common/treenode/src/lib.rs +++ b/src/common/treenode/src/lib.rs @@ -540,38 +540,29 @@ pub enum TreeNodeRecursion { impl TreeNodeRecursion { /// Continues visiting nodes with `f` depending on the current [`TreeNodeRecursion`] /// value and the fact that `f` is visiting the current node's children. - pub fn visit_children Result>( - self, - f: F, - ) -> Result { + pub fn visit_children Result>(self, f: F) -> Result { match self { - TreeNodeRecursion::Continue => f(), - TreeNodeRecursion::Jump => Ok(TreeNodeRecursion::Continue), - TreeNodeRecursion::Stop => Ok(self), + Self::Continue => f(), + Self::Jump => Ok(Self::Continue), + Self::Stop => Ok(self), } } /// Continues visiting nodes with `f` depending on the current [`TreeNodeRecursion`] /// value and the fact that `f` is visiting the current node's sibling. - pub fn visit_sibling Result>( - self, - f: F, - ) -> Result { + pub fn visit_sibling Result>(self, f: F) -> Result { match self { - TreeNodeRecursion::Continue | TreeNodeRecursion::Jump => f(), - TreeNodeRecursion::Stop => Ok(self), + Self::Continue | Self::Jump => f(), + Self::Stop => Ok(self), } } /// Continues visiting nodes with `f` depending on the current [`TreeNodeRecursion`] /// value and the fact that `f` is visiting the current node's parent. - pub fn visit_parent Result>( - self, - f: F, - ) -> Result { + pub fn visit_parent Result>(self, f: F) -> Result { match self { - TreeNodeRecursion::Continue => f(), - TreeNodeRecursion::Jump | TreeNodeRecursion::Stop => Ok(self), + Self::Continue => f(), + Self::Jump | Self::Stop => Ok(self), } } } @@ -670,10 +661,7 @@ impl Transformed { /// Maps the [`Transformed`] object to the result of the given `f` depending on the /// current [`TreeNodeRecursion`] value and the fact that `f` is changing the current /// node's children. - pub fn transform_children Result>>( - mut self, - f: F, - ) -> Result> { + pub fn transform_children Result>(mut self, f: F) -> Result { match self.tnr { TreeNodeRecursion::Continue => { return f(self.data).map(|mut t| { @@ -692,10 +680,7 @@ impl Transformed { /// Maps the [`Transformed`] object to the result of the given `f` depending on the /// current [`TreeNodeRecursion`] value and the fact that `f` is changing the current /// node's sibling. - pub fn transform_sibling Result>>( - self, - f: F, - ) -> Result> { + pub fn transform_sibling Result>(self, f: F) -> Result { match self.tnr { TreeNodeRecursion::Continue | TreeNodeRecursion::Jump => f(self.data).map(|mut t| { t.transformed |= self.transformed; @@ -708,10 +693,7 @@ impl Transformed { /// Maps the [`Transformed`] object to the result of the given `f` depending on the /// current [`TreeNodeRecursion`] value and the fact that `f` is changing the current /// node's parent. - pub fn transform_parent Result>>( - self, - f: F, - ) -> Result> { + pub fn transform_parent Result>(self, f: F) -> Result { match self.tnr { TreeNodeRecursion::Continue => f(self.data).map(|mut t| { t.transformed |= self.transformed; @@ -951,7 +933,7 @@ mod tests { } impl TestTreeNode { - fn new(children: Vec>, data: T) -> Self { + fn new(children: Vec, data: T) -> Self { Self { children, data } } } diff --git a/src/common/version/Cargo.toml b/src/common/version/Cargo.toml index 69f162f811..be36a90711 100644 --- a/src/common/version/Cargo.toml +++ b/src/common/version/Cargo.toml @@ -1,3 +1,6 @@ +[lints] +workspace = true + [package] edition = {workspace = true} name = "common-version" diff --git a/src/daft-compression/Cargo.toml b/src/daft-compression/Cargo.toml index 6b695535e5..9ce9b1f862 100644 --- a/src/daft-compression/Cargo.toml +++ b/src/daft-compression/Cargo.toml @@ -3,6 +3,9 @@ async-compression = {workspace = true} tokio = {workspace = true} url = {workspace = true} +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-compression" diff --git a/src/daft-core/Cargo.toml b/src/daft-core/Cargo.toml index 36ea9b68df..ec15924316 100644 --- a/src/daft-core/Cargo.toml +++ b/src/daft-core/Cargo.toml @@ -63,6 +63,9 @@ python = [ "daft-schema/python" ] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-core" diff --git a/src/daft-core/src/array/fixed_size_list_array.rs b/src/daft-core/src/array/fixed_size_list_array.rs index d265f42929..a8b5048b82 100644 --- a/src/daft-core/src/array/fixed_size_list_array.rs +++ b/src/daft-core/src/array/fixed_size_list_array.rs @@ -53,7 +53,7 @@ impl FixedSizeListArray { field ), } - FixedSizeListArray { + Self { field, flat_child, validity, @@ -90,7 +90,7 @@ impl FixedSizeListArray { growable .build() - .map(|s| s.downcast::().unwrap().clone()) + .map(|s| s.downcast::().unwrap().clone()) } pub fn len(&self) -> usize { diff --git a/src/daft-core/src/array/from.rs b/src/daft-core/src/array/from.rs index 3ef75a23a7..b48c16a4ba 100644 --- a/src/daft-core/src/array/from.rs +++ b/src/daft-core/src/array/from.rs @@ -15,35 +15,35 @@ impl From<(&str, Box>)) -> Self { let (name, array) = item; - DataArray::new(Field::new(name, T::get_dtype()).into(), array).unwrap() + Self::new(Field::new(name, T::get_dtype()).into(), array).unwrap() } } impl From<(&str, Box)> for NullArray { fn from(item: (&str, Box)) -> Self { let (name, array) = item; - DataArray::new(Field::new(name, DataType::Null).into(), array).unwrap() + Self::new(Field::new(name, DataType::Null).into(), array).unwrap() } } impl From<(&str, Box>)> for Utf8Array { fn from(item: (&str, Box>)) -> Self { let (name, array) = item; - DataArray::new(Field::new(name, DataType::Utf8).into(), array).unwrap() + Self::new(Field::new(name, DataType::Utf8).into(), array).unwrap() } } impl From<(&str, Box>)> for BinaryArray { fn from(item: (&str, Box>)) -> Self { let (name, array) = item; - DataArray::new(Field::new(name, DataType::Binary).into(), array).unwrap() + Self::new(Field::new(name, DataType::Binary).into(), array).unwrap() } } impl From<(&str, Box)> for FixedSizeBinaryArray { fn from(item: (&str, Box)) -> Self { let (name, array) = item; - DataArray::new( + Self::new( Field::new(name, DataType::FixedSizeBinary(array.size())).into(), array, ) @@ -58,7 +58,7 @@ where fn from((name, array, length): (&str, I, usize)) -> Self { let array = Cow::from(array); let array = array.into_owned(); - DataArray::new( + Self::new( Field::new(name, DataType::FixedSizeBinary(length)).into(), Box::new(arrow2::array::FixedSizeBinaryArray::new( arrow2::datatypes::DataType::FixedSizeBinary(length), @@ -79,7 +79,7 @@ where let arrow_array = Box::new(arrow2::array::PrimitiveArray::::from_slice( slice, )); - DataArray::new(Field::new(name, T::get_dtype()).into(), arrow_array).unwrap() + Self::new(Field::new(name, T::get_dtype()).into(), arrow_array).unwrap() } } @@ -90,7 +90,7 @@ where fn from(item: (&str, Vec)) -> Self { let (name, v) = item; let arrow_array = Box::new(arrow2::array::PrimitiveArray::::from_vec(v)); - DataArray::new(Field::new(name, T::get_dtype()).into(), arrow_array).unwrap() + Self::new(Field::new(name, T::get_dtype()).into(), arrow_array).unwrap() } } @@ -98,7 +98,7 @@ impl From<(&str, &[bool])> for BooleanArray { fn from(item: (&str, &[bool])) -> Self { let (name, slice) = item; let arrow_array = Box::new(arrow2::array::BooleanArray::from_slice(slice)); - DataArray::new(Field::new(name, DataType::Boolean).into(), arrow_array).unwrap() + Self::new(Field::new(name, DataType::Boolean).into(), arrow_array).unwrap() } } @@ -108,14 +108,14 @@ impl From<(&str, &[Option])> for BooleanArray { let arrow_array = Box::new(arrow2::array::BooleanArray::from_trusted_len_iter( slice.iter().cloned(), )); - DataArray::new(Field::new(name, DataType::Boolean).into(), arrow_array).unwrap() + Self::new(Field::new(name, DataType::Boolean).into(), arrow_array).unwrap() } } impl From<(&str, arrow2::array::BooleanArray)> for BooleanArray { fn from(item: (&str, arrow2::array::BooleanArray)) -> Self { let (name, arrow_array) = item; - DataArray::new( + Self::new( Field::new(name, DataType::Boolean).into(), Box::new(arrow_array), ) @@ -126,7 +126,7 @@ impl From<(&str, arrow2::array::BooleanArray)> for BooleanArray { impl From<(&str, arrow2::bitmap::Bitmap)> for BooleanArray { fn from(item: (&str, arrow2::bitmap::Bitmap)) -> Self { let (name, bitmap) = item; - DataArray::new( + Self::new( Field::new(name, DataType::Boolean).into(), Box::new(arrow2::array::BooleanArray::new( arrow2::datatypes::DataType::Boolean, @@ -141,7 +141,7 @@ impl From<(&str, arrow2::bitmap::Bitmap)> for BooleanArray { impl From<(&str, Box)> for BooleanArray { fn from(item: (&str, Box)) -> Self { let (name, arrow_array) = item; - DataArray::new(Field::new(name, DataType::Boolean).into(), arrow_array).unwrap() + Self::new(Field::new(name, DataType::Boolean).into(), arrow_array).unwrap() } } @@ -155,7 +155,7 @@ impl From<(&str, Vec)> for crate::datatypes::PythonArray { PseudoArrowArray::::from_pyobj_vec(vec_pyobj), ); let field = Field::new(name, DataType::Python); - DataArray::new(field.into(), arrow_array).unwrap() + Self::new(field.into(), arrow_array).unwrap() } } @@ -163,7 +163,7 @@ impl> From<(&str, &[T])> for DataArray { fn from(item: (&str, &[T])) -> Self { let (name, slice) = item; let arrow_array = Box::new(arrow2::array::Utf8Array::::from_slice(slice)); - DataArray::new(Field::new(name, DataType::Utf8).into(), arrow_array).unwrap() + Self::new(Field::new(name, DataType::Utf8).into(), arrow_array).unwrap() } } @@ -171,7 +171,7 @@ impl From<(&str, &[u8])> for BinaryArray { fn from(item: (&str, &[u8])) -> Self { let (name, slice) = item; let arrow_array = Box::new(arrow2::array::BinaryArray::::from_slice([slice])); - DataArray::new(Field::new(name, DataType::Binary).into(), arrow_array).unwrap() + Self::new(Field::new(name, DataType::Binary).into(), arrow_array).unwrap() } } @@ -183,7 +183,7 @@ impl>> TryFrom<(F, Box)) -> DaftResult { let (field, array) = item; let field: Arc = field.into(); - DataArray::new(field, array) + Self::new(field, array) } } @@ -211,7 +211,7 @@ impl TryFrom<(&str, Vec, Vec)> for BinaryArray { data.into(), None, )?; - DataArray::new( + Self::new( Field::new(name, DataType::Binary).into(), Box::new(bin_array), ) @@ -234,6 +234,6 @@ impl ), ) -> DaftResult { let (name, array) = item; - DataArray::new(Field::new(name, DataType::Python).into(), Box::new(array)) + Self::new(Field::new(name, DataType::Python).into(), Box::new(array)) } } diff --git a/src/daft-core/src/array/from_iter.rs b/src/daft-core/src/array/from_iter.rs index 484b676f30..45e0e112a9 100644 --- a/src/daft-core/src/array/from_iter.rs +++ b/src/daft-core/src/array/from_iter.rs @@ -11,7 +11,7 @@ where ) -> Self { let arrow_array = Box::new(arrow2::array::PrimitiveArray::::from_trusted_len_iter(iter)); - DataArray::new(Field::new(name, T::get_dtype()).into(), arrow_array).unwrap() + Self::new(Field::new(name, T::get_dtype()).into(), arrow_array).unwrap() } } @@ -21,7 +21,7 @@ impl Utf8Array { iter: impl arrow2::trusted_len::TrustedLen>, ) -> Self { let arrow_array = Box::new(arrow2::array::Utf8Array::::from_trusted_len_iter(iter)); - DataArray::new( + Self::new( Field::new(name, crate::datatypes::DataType::Utf8).into(), arrow_array, ) @@ -37,7 +37,7 @@ impl BinaryArray { let arrow_array = Box::new(arrow2::array::BinaryArray::::from_trusted_len_iter( iter, )); - DataArray::new( + Self::new( Field::new(name, crate::datatypes::DataType::Binary).into(), arrow_array, ) @@ -52,7 +52,7 @@ impl FixedSizeBinaryArray { size: usize, ) -> Self { let arrow_array = Box::new(arrow2::array::FixedSizeBinaryArray::from_iter(iter, size)); - DataArray::new( + Self::new( Field::new(name, crate::datatypes::DataType::FixedSizeBinary(size)).into(), arrow_array, ) @@ -66,7 +66,7 @@ impl BooleanArray { iter: impl arrow2::trusted_len::TrustedLen>, ) -> Self { let arrow_array = Box::new(arrow2::array::BooleanArray::from_trusted_len_iter(iter)); - DataArray::new( + Self::new( Field::new(name, crate::datatypes::DataType::Boolean).into(), arrow_array, ) @@ -85,7 +85,7 @@ where let arrow_array = Box::new( arrow2::array::PrimitiveArray::::from_trusted_len_values_iter(iter), ); - DataArray::new(Field::new(name, T::get_dtype()).into(), arrow_array).unwrap() + Self::new(Field::new(name, T::get_dtype()).into(), arrow_array).unwrap() } } @@ -96,7 +96,7 @@ impl Utf8Array { ) -> Self { let arrow_array = Box::new(arrow2::array::Utf8Array::::from_trusted_len_values_iter(iter)); - DataArray::new(Field::new(name, DataType::Utf8).into(), arrow_array).unwrap() + Self::new(Field::new(name, DataType::Utf8).into(), arrow_array).unwrap() } } @@ -107,7 +107,7 @@ impl BinaryArray { ) -> Self { let arrow_array = Box::new(arrow2::array::BinaryArray::::from_trusted_len_values_iter(iter)); - DataArray::new(Field::new(name, DataType::Binary).into(), arrow_array).unwrap() + Self::new(Field::new(name, DataType::Binary).into(), arrow_array).unwrap() } } @@ -119,6 +119,6 @@ impl BooleanArray { let arrow_array = Box::new(arrow2::array::BooleanArray::from_trusted_len_values_iter( iter, )); - DataArray::new(Field::new(name, DataType::Boolean).into(), arrow_array).unwrap() + Self::new(Field::new(name, DataType::Boolean).into(), arrow_array).unwrap() } } diff --git a/src/daft-core/src/array/image_array.rs b/src/daft-core/src/array/image_array.rs index 205075efbf..5daa11d42d 100644 --- a/src/daft-core/src/array/image_array.rs +++ b/src/daft-core/src/array/image_array.rs @@ -19,7 +19,7 @@ impl BBox { .downcast_ref::() .unwrap() .iter(); - BBox( + Self( *iter.next().unwrap().unwrap(), *iter.next().unwrap().unwrap(), *iter.next().unwrap().unwrap(), diff --git a/src/daft-core/src/array/list_array.rs b/src/daft-core/src/array/list_array.rs index 964503b271..538c24e716 100644 --- a/src/daft-core/src/array/list_array.rs +++ b/src/daft-core/src/array/list_array.rs @@ -53,7 +53,7 @@ impl ListArray { field ), } - ListArray { + Self { field, flat_child, offsets, @@ -102,7 +102,7 @@ impl ListArray { growable .build() - .map(|s| s.downcast::().unwrap().clone()) + .map(|s| s.downcast::().unwrap().clone()) } pub fn len(&self) -> usize { diff --git a/src/daft-core/src/array/mod.rs b/src/daft-core/src/array/mod.rs index 21a811b403..7c300c6a38 100644 --- a/src/daft-core/src/array/mod.rs +++ b/src/daft-core/src/array/mod.rs @@ -30,7 +30,7 @@ pub struct DataArray { impl Clone for DataArray { fn clone(&self) -> Self { - DataArray::new(self.field.clone(), self.data.clone()).unwrap() + Self::new(self.field.clone(), self.data.clone()).unwrap() } } @@ -44,7 +44,7 @@ impl DataArray where T: DaftPhysicalType, { - pub fn new(field: Arc, data: Box) -> DaftResult> { + pub fn new(field: Arc, data: Box) -> DaftResult { assert!( field.dtype.is_physical(), "Can only construct DataArray for PhysicalTypes, got {}", @@ -61,7 +61,7 @@ where } } - Ok(DataArray { + Ok(Self { field, data, marker_: PhantomData, @@ -93,7 +93,7 @@ where ))); } let with_bitmap = self.data.with_validity(Some(Bitmap::from(validity))); - DataArray::new(self.field.clone(), with_bitmap) + Self::new(self.field.clone(), with_bitmap) } pub fn with_validity(&self, validity: Option) -> DaftResult { @@ -107,7 +107,7 @@ where ))); } let with_bitmap = self.data.with_validity(validity); - DataArray::new(self.field.clone(), with_bitmap) + Self::new(self.field.clone(), with_bitmap) } pub fn validity(&self) -> Option<&Bitmap> { diff --git a/src/daft-core/src/array/ops/apply.rs b/src/daft-core/src/array/ops/apply.rs index 159232e602..f5388bfbc6 100644 --- a/src/daft-core/src/array/ops/apply.rs +++ b/src/daft-core/src/array/ops/apply.rs @@ -20,7 +20,7 @@ where PrimitiveArray::from_trusted_len_values_iter(arr.values_iter().map(|v| func(*v))) .with_validity(arr.validity().cloned()); - Ok(DataArray::from((self.name(), Box::new(result_arr)))) + Ok(Self::from((self.name(), Box::new(result_arr)))) } // applies a native binary function to two DataArrays, maintaining validity. @@ -44,17 +44,13 @@ where zip(lhs_arr.values_iter(), rhs_arr.values_iter()).map(|(a, b)| func(*a, *b)), ) .with_validity(validity); - Ok(DataArray::from((self.name(), Box::new(result_arr)))) + Ok(Self::from((self.name(), Box::new(result_arr)))) } (l_size, 1) => { if let Some(value) = rhs.get(0) { self.apply(|v| func(v, value)) } else { - Ok(DataArray::::full_null( - self.name(), - self.data_type(), - l_size, - )) + Ok(Self::full_null(self.name(), self.data_type(), l_size)) } } (1, r_size) => { @@ -65,13 +61,9 @@ where rhs_arr.values_iter().map(|v| func(value, *v)), ) .with_validity(rhs_arr.validity().cloned()); - Ok(DataArray::from((self.name(), Box::new(result_arr)))) + Ok(Self::from((self.name(), Box::new(result_arr)))) } else { - Ok(DataArray::::full_null( - self.name(), - self.data_type(), - r_size, - )) + Ok(Self::full_null(self.name(), self.data_type(), r_size)) } } (l, r) => Err(DaftError::ValueError(format!( diff --git a/src/daft-core/src/array/ops/approx_count_distinct.rs b/src/daft-core/src/array/ops/approx_count_distinct.rs index 068275d2ed..66f2008ed1 100644 --- a/src/daft-core/src/array/ops/approx_count_distinct.rs +++ b/src/daft-core/src/array/ops/approx_count_distinct.rs @@ -10,7 +10,7 @@ use crate::{ }; impl DaftApproxCountDistinctAggable for UInt64Array { - type Output = DaftResult; + type Output = DaftResult; fn approx_count_distinct(&self) -> Self::Output { let mut set = HashSet::with_capacity_and_hasher(self.len(), IdentityBuildHasher::default()); diff --git a/src/daft-core/src/array/ops/arange.rs b/src/daft-core/src/array/ops/arange.rs index 33da976928..a9c4e62b1d 100644 --- a/src/daft-core/src/array/ops/arange.rs +++ b/src/daft-core/src/array/ops/arange.rs @@ -19,7 +19,7 @@ where let arrow_array = Box::new(arrow2::array::PrimitiveArray::::from_vec(data)); let data_array = Int64Array::from((name.as_ref(), arrow_array)); let casted_array = data_array.cast(&T::get_dtype())?; - let downcasted = casted_array.downcast::>()?; + let downcasted = casted_array.downcast::()?; Ok(downcasted.clone()) } } diff --git a/src/daft-core/src/array/ops/between.rs b/src/daft-core/src/array/ops/between.rs index 90c22d2151..09e1914372 100644 --- a/src/daft-core/src/array/ops/between.rs +++ b/src/daft-core/src/array/ops/between.rs @@ -6,13 +6,13 @@ use crate::{ datatypes::{BooleanArray, DaftNumericType}, }; -impl DaftBetween<&DataArray, &DataArray> for DataArray +impl DaftBetween<&Self, &Self> for DataArray where T: DaftNumericType, { type Output = DaftResult; - fn between(&self, lower: &DataArray, upper: &DataArray) -> Self::Output { + fn between(&self, lower: &Self, upper: &Self) -> Self::Output { let are_two_equal_and_single_one = |v_size, l_size, u_size: usize| { [v_size, l_size, u_size] .iter() diff --git a/src/daft-core/src/array/ops/bitwise.rs b/src/daft-core/src/array/ops/bitwise.rs index 6a38eee06a..4d4630bbad 100644 --- a/src/daft-core/src/array/ops/bitwise.rs +++ b/src/daft-core/src/array/ops/bitwise.rs @@ -8,7 +8,7 @@ use crate::{ datatypes::{DaftIntegerType, DaftNumericType}, }; -impl DaftLogical<&DataArray> for DataArray +impl DaftLogical<&Self> for DataArray where T: DaftIntegerType, ::Native: @@ -16,15 +16,15 @@ where { type Output = DaftResult; - fn and(&self, rhs: &DataArray) -> Self::Output { + fn and(&self, rhs: &Self) -> Self::Output { self.binary_apply(rhs, |lhs, rhs| lhs.bitand(rhs)) } - fn or(&self, rhs: &DataArray) -> Self::Output { + fn or(&self, rhs: &Self) -> Self::Output { self.binary_apply(rhs, |lhs, rhs| lhs.bitor(rhs)) } - fn xor(&self, rhs: &DataArray) -> Self::Output { + fn xor(&self, rhs: &Self) -> Self::Output { self.binary_apply(rhs, |lhs, rhs| lhs.bitxor(rhs)) } } diff --git a/src/daft-core/src/array/ops/broadcast.rs b/src/daft-core/src/array/ops/broadcast.rs index b372ab2ef6..2a81dd5a5e 100644 --- a/src/daft-core/src/array/ops/broadcast.rs +++ b/src/daft-core/src/array/ops/broadcast.rs @@ -35,7 +35,7 @@ where impl Broadcastable for DataArray where T: DaftPhysicalType + 'static, - DataArray: GrowableArray, + Self: GrowableArray, { fn broadcast(&self, num: usize) -> DaftResult { if self.len() != 1 { @@ -48,7 +48,7 @@ where if self.is_valid(0) { generic_growable_broadcast(self, num, self.name(), self.data_type()) } else { - Ok(DataArray::full_null(self.name(), self.data_type(), num)) + Ok(Self::full_null(self.name(), self.data_type(), num)) } } } @@ -65,11 +65,7 @@ impl Broadcastable for FixedSizeListArray { if self.is_valid(0) { generic_growable_broadcast(self, num, self.name(), self.data_type()) } else { - Ok(FixedSizeListArray::full_null( - self.name(), - self.data_type(), - num, - )) + Ok(Self::full_null(self.name(), self.data_type(), num)) } } } @@ -86,7 +82,7 @@ impl Broadcastable for ListArray { if self.is_valid(0) { generic_growable_broadcast(self, num, self.name(), self.data_type()) } else { - Ok(ListArray::full_null(self.name(), self.data_type(), num)) + Ok(Self::full_null(self.name(), self.data_type(), num)) } } } @@ -103,7 +99,7 @@ impl Broadcastable for StructArray { if self.is_valid(0) { generic_growable_broadcast(self, num, self.name(), self.data_type()) } else { - Ok(StructArray::full_null(self.name(), self.data_type(), num)) + Ok(Self::full_null(self.name(), self.data_type(), num)) } } } diff --git a/src/daft-core/src/array/ops/cast.rs b/src/daft-core/src/array/ops/cast.rs index 3f14b8f2f5..c3dbe0c209 100644 --- a/src/daft-core/src/array/ops/cast.rs +++ b/src/daft-core/src/array/ops/cast.rs @@ -1956,7 +1956,7 @@ impl FixedSizeListArray { ))); } let casted_child = self.flat_child.cast(child_dtype.as_ref())?; - Ok(FixedSizeListArray::new( + Ok(Self::new( Field::new(self.name().to_string(), dtype.clone()), casted_child, self.validity().cloned(), @@ -2018,7 +2018,7 @@ impl FixedSizeListArray { impl ListArray { pub fn cast(&self, dtype: &DataType) -> DaftResult { match dtype { - DataType::List(child_dtype) => Ok(ListArray::new( + DataType::List(child_dtype) => Ok(Self::new( Field::new(self.name(), dtype.clone()), self.flat_child.cast(child_dtype.as_ref())?, self.offsets().clone(), @@ -2138,7 +2138,7 @@ impl StructArray { }, ) .collect::>>(); - Ok(StructArray::new( + Ok(Self::new( Field::new(self.name(), dtype.clone()), casted_series?, self.validity().cloned(), diff --git a/src/daft-core/src/array/ops/compare_agg.rs b/src/daft-core/src/array/ops/compare_agg.rs index 0fc139b36d..5d5237d9c1 100644 --- a/src/daft-core/src/array/ops/compare_agg.rs +++ b/src/daft-core/src/array/ops/compare_agg.rs @@ -64,7 +64,7 @@ where T::Native: PartialOrd, ::Simd: arrow2::compute::aggregate::SimdOrd, { - type Output = DaftResult>; + type Output = DaftResult; fn min(&self) -> Self::Output { let primitive_arr = self.as_arrow(); @@ -72,7 +72,7 @@ where let result = arrow2::compute::aggregate::min_primitive(primitive_arr); let arrow_array = Box::new(arrow2::array::PrimitiveArray::from([result])); - DataArray::new(self.field.clone(), arrow_array) + Self::new(self.field.clone(), arrow_array) } fn max(&self) -> Self::Output { @@ -81,7 +81,7 @@ where let result = arrow2::compute::aggregate::max_primitive(primitive_arr); let arrow_array = Box::new(arrow2::array::PrimitiveArray::from([result])); - DataArray::new(self.field.clone(), arrow_array) + Self::new(self.field.clone(), arrow_array) } fn grouped_min(&self, groups: &GroupIndices) -> Self::Output { grouped_cmp_native( @@ -157,14 +157,14 @@ where } impl DaftCompareAggable for DataArray { - type Output = DaftResult>; + type Output = DaftResult; fn min(&self) -> Self::Output { let arrow_array: &arrow2::array::Utf8Array = self.as_arrow(); let result = arrow2::compute::aggregate::min_string(arrow_array); let res_arrow_array = arrow2::array::Utf8Array::::from([result]); - DataArray::new(self.field.clone(), Box::new(res_arrow_array)) + Self::new(self.field.clone(), Box::new(res_arrow_array)) } fn max(&self) -> Self::Output { let arrow_array: &arrow2::array::Utf8Array = self.as_arrow(); @@ -172,7 +172,7 @@ impl DaftCompareAggable for DataArray { let result = arrow2::compute::aggregate::max_string(arrow_array); let res_arrow_array = arrow2::array::Utf8Array::::from([result]); - DataArray::new(self.field.clone(), Box::new(res_arrow_array)) + Self::new(self.field.clone(), Box::new(res_arrow_array)) } fn grouped_min(&self, groups: &GroupIndices) -> Self::Output { @@ -237,14 +237,14 @@ where } impl DaftCompareAggable for DataArray { - type Output = DaftResult>; + type Output = DaftResult; fn min(&self) -> Self::Output { let arrow_array: &arrow2::array::BinaryArray = self.as_arrow(); let result = arrow2::compute::aggregate::min_binary(arrow_array); let res_arrow_array = arrow2::array::BinaryArray::::from([result]); - DataArray::new(self.field.clone(), Box::new(res_arrow_array)) + Self::new(self.field.clone(), Box::new(res_arrow_array)) } fn max(&self) -> Self::Output { let arrow_array: &arrow2::array::BinaryArray = self.as_arrow(); @@ -252,7 +252,7 @@ impl DaftCompareAggable for DataArray { let result = arrow2::compute::aggregate::max_binary(arrow_array); let res_arrow_array = arrow2::array::BinaryArray::::from([result]); - DataArray::new(self.field.clone(), Box::new(res_arrow_array)) + Self::new(self.field.clone(), Box::new(res_arrow_array)) } fn grouped_min(&self, groups: &GroupIndices) -> Self::Output { @@ -354,7 +354,7 @@ where } impl DaftCompareAggable for DataArray { - type Output = DaftResult>; + type Output = DaftResult; fn min(&self) -> Self::Output { cmp_fixed_size_binary(self, |l, r| l.min(r)) } @@ -423,14 +423,14 @@ fn grouped_cmp_bool( } impl DaftCompareAggable for DataArray { - type Output = DaftResult>; + type Output = DaftResult; fn min(&self) -> Self::Output { let arrow_array: &arrow2::array::BooleanArray = self.as_arrow(); let result = arrow2::compute::aggregate::min_boolean(arrow_array); let res_arrow_array = arrow2::array::BooleanArray::from([result]); - DataArray::new(self.field.clone(), Box::new(res_arrow_array)) + Self::new(self.field.clone(), Box::new(res_arrow_array)) } fn max(&self) -> Self::Output { let arrow_array: &arrow2::array::BooleanArray = self.as_arrow(); @@ -438,7 +438,7 @@ impl DaftCompareAggable for DataArray { let result = arrow2::compute::aggregate::max_boolean(arrow_array); let res_arrow_array = arrow2::array::BooleanArray::from([result]); - DataArray::new(self.field.clone(), Box::new(res_arrow_array)) + Self::new(self.field.clone(), Box::new(res_arrow_array)) } fn grouped_min(&self, groups: &GroupIndices) -> Self::Output { @@ -451,11 +451,11 @@ impl DaftCompareAggable for DataArray { } impl DaftCompareAggable for DataArray { - type Output = DaftResult>; + type Output = DaftResult; fn min(&self) -> Self::Output { let res_arrow_array = arrow2::array::NullArray::new(arrow2::datatypes::DataType::Null, 1); - DataArray::new(self.field.clone(), Box::new(res_arrow_array)) + Self::new(self.field.clone(), Box::new(res_arrow_array)) } fn max(&self) -> Self::Output { @@ -464,19 +464,11 @@ impl DaftCompareAggable for DataArray { } fn grouped_min(&self, groups: &super::GroupIndices) -> Self::Output { - Ok(DataArray::full_null( - self.name(), - self.data_type(), - groups.len(), - )) + Ok(Self::full_null(self.name(), self.data_type(), groups.len())) } fn grouped_max(&self, groups: &super::GroupIndices) -> Self::Output { - Ok(DataArray::full_null( - self.name(), - self.data_type(), - groups.len(), - )) + Ok(Self::full_null(self.name(), self.data_type(), groups.len())) } } diff --git a/src/daft-core/src/array/ops/comparison.rs b/src/daft-core/src/array/ops/comparison.rs index 0f76b338ff..aee84893de 100644 --- a/src/daft-core/src/array/ops/comparison.rs +++ b/src/daft-core/src/array/ops/comparison.rs @@ -23,13 +23,13 @@ where } } -impl DaftCompare<&DataArray> for DataArray +impl DaftCompare<&Self> for DataArray where T: DaftNumericType, { type Output = DaftResult; - fn equal(&self, rhs: &DataArray) -> Self::Output { + fn equal(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -69,7 +69,7 @@ where } } - fn not_equal(&self, rhs: &DataArray) -> Self::Output { + fn not_equal(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -109,7 +109,7 @@ where } } - fn lt(&self, rhs: &DataArray) -> Self::Output { + fn lt(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -149,7 +149,7 @@ where } } - fn lte(&self, rhs: &DataArray) -> Self::Output { + fn lte(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -189,7 +189,7 @@ where } } - fn gt(&self, rhs: &DataArray) -> Self::Output { + fn gt(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -229,7 +229,7 @@ where } } - fn gte(&self, rhs: &DataArray) -> Self::Output { + fn gte(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -337,15 +337,15 @@ where } } -impl DaftCompare<&BooleanArray> for BooleanArray { - type Output = DaftResult; +impl DaftCompare<&Self> for BooleanArray { + type Output = DaftResult; - fn equal(&self, rhs: &BooleanArray) -> Self::Output { + fn equal(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = arrow_bitmap_and_helper(self.as_arrow().validity(), rhs.as_arrow().validity()); - Ok(BooleanArray::from(( + Ok(Self::from(( self.name(), comparison::eq(self.as_arrow(), rhs.as_arrow()).with_validity(validity), ))) @@ -354,22 +354,14 @@ impl DaftCompare<&BooleanArray> for BooleanArray { if let Some(value) = rhs.get(0) { self.equal(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - l_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, l_size)) } } (1, r_size) => { if let Some(value) = self.get(0) { rhs.equal(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - r_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, r_size)) } } (l, r) => Err(DaftError::ValueError(format!( @@ -380,12 +372,12 @@ impl DaftCompare<&BooleanArray> for BooleanArray { } } - fn not_equal(&self, rhs: &BooleanArray) -> Self::Output { + fn not_equal(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = arrow_bitmap_and_helper(self.as_arrow().validity(), rhs.as_arrow().validity()); - Ok(BooleanArray::from(( + Ok(Self::from(( self.name(), comparison::neq(self.as_arrow(), rhs.as_arrow()).with_validity(validity), ))) @@ -394,22 +386,14 @@ impl DaftCompare<&BooleanArray> for BooleanArray { if let Some(value) = rhs.get(0) { self.not_equal(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - l_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, l_size)) } } (1, r_size) => { if let Some(value) = self.get(0) { rhs.not_equal(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - r_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, r_size)) } } (l, r) => Err(DaftError::ValueError(format!( @@ -420,12 +404,12 @@ impl DaftCompare<&BooleanArray> for BooleanArray { } } - fn lt(&self, rhs: &BooleanArray) -> Self::Output { + fn lt(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = arrow_bitmap_and_helper(self.as_arrow().validity(), rhs.as_arrow().validity()); - Ok(BooleanArray::from(( + Ok(Self::from(( self.name(), comparison::lt(self.as_arrow(), rhs.as_arrow()).with_validity(validity), ))) @@ -434,22 +418,14 @@ impl DaftCompare<&BooleanArray> for BooleanArray { if let Some(value) = rhs.get(0) { self.lt(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - l_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, l_size)) } } (1, r_size) => { if let Some(value) = self.get(0) { rhs.gt(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - r_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, r_size)) } } (l, r) => Err(DaftError::ValueError(format!( @@ -460,12 +436,12 @@ impl DaftCompare<&BooleanArray> for BooleanArray { } } - fn lte(&self, rhs: &BooleanArray) -> Self::Output { + fn lte(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = arrow_bitmap_and_helper(self.as_arrow().validity(), rhs.as_arrow().validity()); - Ok(BooleanArray::from(( + Ok(Self::from(( self.name(), comparison::lt_eq(self.as_arrow(), rhs.as_arrow()).with_validity(validity), ))) @@ -474,22 +450,14 @@ impl DaftCompare<&BooleanArray> for BooleanArray { if let Some(value) = rhs.get(0) { self.lte(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - l_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, l_size)) } } (1, r_size) => { if let Some(value) = self.get(0) { rhs.gte(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - r_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, r_size)) } } (l, r) => Err(DaftError::ValueError(format!( @@ -500,12 +468,12 @@ impl DaftCompare<&BooleanArray> for BooleanArray { } } - fn gt(&self, rhs: &BooleanArray) -> Self::Output { + fn gt(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = arrow_bitmap_and_helper(self.as_arrow().validity(), rhs.as_arrow().validity()); - Ok(BooleanArray::from(( + Ok(Self::from(( self.name(), comparison::gt(self.as_arrow(), rhs.as_arrow()).with_validity(validity), ))) @@ -514,22 +482,14 @@ impl DaftCompare<&BooleanArray> for BooleanArray { if let Some(value) = rhs.get(0) { self.gt(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - l_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, l_size)) } } (1, r_size) => { if let Some(value) = self.get(0) { rhs.lt(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - r_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, r_size)) } } (l, r) => Err(DaftError::ValueError(format!( @@ -540,12 +500,12 @@ impl DaftCompare<&BooleanArray> for BooleanArray { } } - fn gte(&self, rhs: &BooleanArray) -> Self::Output { + fn gte(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = arrow_bitmap_and_helper(self.as_arrow().validity(), rhs.as_arrow().validity()); - Ok(BooleanArray::from(( + Ok(Self::from(( self.name(), comparison::gt_eq(self.as_arrow(), rhs.as_arrow()).with_validity(validity), ))) @@ -554,22 +514,14 @@ impl DaftCompare<&BooleanArray> for BooleanArray { if let Some(value) = rhs.get(0) { self.gte(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - l_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, l_size)) } } (1, r_size) => { if let Some(value) = self.get(0) { rhs.lte(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - r_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, r_size)) } } (l, r) => Err(DaftError::ValueError(format!( @@ -582,14 +534,14 @@ impl DaftCompare<&BooleanArray> for BooleanArray { } impl DaftCompare for BooleanArray { - type Output = DaftResult; + type Output = DaftResult; fn equal(&self, rhs: bool) -> Self::Output { let validity = self.as_arrow().validity().cloned(); let arrow_result = comparison::boolean::eq_scalar(self.as_arrow(), rhs).with_validity(validity); - Ok(BooleanArray::from((self.name(), arrow_result))) + Ok(Self::from((self.name(), arrow_result))) } fn not_equal(&self, rhs: bool) -> Self::Output { @@ -597,7 +549,7 @@ impl DaftCompare for BooleanArray { let arrow_result = comparison::boolean::neq_scalar(self.as_arrow(), rhs).with_validity(validity); - Ok(BooleanArray::from((self.name(), arrow_result))) + Ok(Self::from((self.name(), arrow_result))) } fn lt(&self, rhs: bool) -> Self::Output { @@ -605,7 +557,7 @@ impl DaftCompare for BooleanArray { let arrow_result = comparison::boolean::lt_scalar(self.as_arrow(), rhs).with_validity(validity); - Ok(BooleanArray::from((self.name(), arrow_result))) + Ok(Self::from((self.name(), arrow_result))) } fn lte(&self, rhs: bool) -> Self::Output { @@ -613,7 +565,7 @@ impl DaftCompare for BooleanArray { let arrow_result = comparison::boolean::lt_eq_scalar(self.as_arrow(), rhs).with_validity(validity); - Ok(BooleanArray::from((self.name(), arrow_result))) + Ok(Self::from((self.name(), arrow_result))) } fn gt(&self, rhs: bool) -> Self::Output { @@ -621,7 +573,7 @@ impl DaftCompare for BooleanArray { let arrow_result = comparison::boolean::gt_scalar(self.as_arrow(), rhs).with_validity(validity); - Ok(BooleanArray::from((self.name(), arrow_result))) + Ok(Self::from((self.name(), arrow_result))) } fn gte(&self, rhs: bool) -> Self::Output { @@ -629,7 +581,7 @@ impl DaftCompare for BooleanArray { let arrow_result = comparison::boolean::gt_eq_scalar(self.as_arrow(), rhs).with_validity(validity); - Ok(BooleanArray::from((self.name(), arrow_result))) + Ok(Self::from((self.name(), arrow_result))) } } @@ -646,9 +598,9 @@ impl Not for &BooleanArray { } } -impl DaftLogical<&BooleanArray> for BooleanArray { - type Output = DaftResult; - fn and(&self, rhs: &BooleanArray) -> Self::Output { +impl DaftLogical<&Self> for BooleanArray { + type Output = DaftResult; + fn and(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -656,7 +608,7 @@ impl DaftLogical<&BooleanArray> for BooleanArray { let result_bitmap = arrow2::bitmap::and(self.as_arrow().values(), rhs.as_arrow().values()); - Ok(BooleanArray::from(( + Ok(Self::from(( self.name(), arrow2::array::BooleanArray::new( arrow2::datatypes::DataType::Boolean, @@ -669,22 +621,14 @@ impl DaftLogical<&BooleanArray> for BooleanArray { if let Some(value) = rhs.get(0) { self.and(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - l_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, l_size)) } } (1, r_size) => { if let Some(value) = self.get(0) { rhs.and(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - r_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, r_size)) } } (l, r) => Err(DaftError::ValueError(format!( @@ -695,7 +639,7 @@ impl DaftLogical<&BooleanArray> for BooleanArray { } } - fn or(&self, rhs: &BooleanArray) -> Self::Output { + fn or(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -703,7 +647,7 @@ impl DaftLogical<&BooleanArray> for BooleanArray { let result_bitmap = arrow2::bitmap::or(self.as_arrow().values(), rhs.as_arrow().values()); - Ok(BooleanArray::from(( + Ok(Self::from(( self.name(), arrow2::array::BooleanArray::new( arrow2::datatypes::DataType::Boolean, @@ -716,22 +660,14 @@ impl DaftLogical<&BooleanArray> for BooleanArray { if let Some(value) = rhs.get(0) { self.or(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - l_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, l_size)) } } (1, r_size) => { if let Some(value) = self.get(0) { rhs.or(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - r_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, r_size)) } } (l, r) => Err(DaftError::ValueError(format!( @@ -742,7 +678,7 @@ impl DaftLogical<&BooleanArray> for BooleanArray { } } - fn xor(&self, rhs: &BooleanArray) -> Self::Output { + fn xor(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -750,7 +686,7 @@ impl DaftLogical<&BooleanArray> for BooleanArray { let result_bitmap = arrow2::bitmap::xor(self.as_arrow().values(), rhs.as_arrow().values()); - Ok(BooleanArray::from(( + Ok(Self::from(( self.name(), arrow2::array::BooleanArray::new( arrow2::datatypes::DataType::Boolean, @@ -763,22 +699,14 @@ impl DaftLogical<&BooleanArray> for BooleanArray { if let Some(value) = rhs.get(0) { self.xor(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - l_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, l_size)) } } (1, r_size) => { if let Some(value) = self.get(0) { rhs.xor(value) } else { - Ok(BooleanArray::full_null( - self.name(), - &DataType::Boolean, - r_size, - )) + Ok(Self::full_null(self.name(), &DataType::Boolean, r_size)) } } (l, r) => Err(DaftError::ValueError(format!( @@ -815,7 +743,7 @@ macro_rules! null_array_comparison_method { }; } -impl DaftCompare<&NullArray> for NullArray { +impl DaftCompare<&Self> for NullArray { type Output = DaftResult; null_array_comparison_method!(equal); null_array_comparison_method!(not_equal); @@ -826,7 +754,7 @@ impl DaftCompare<&NullArray> for NullArray { } impl DaftLogical for BooleanArray { - type Output = DaftResult; + type Output = DaftResult; fn and(&self, rhs: bool) -> Self::Output { let validity = self.as_arrow().validity(); if rhs { @@ -838,7 +766,7 @@ impl DaftLogical for BooleanArray { Bitmap::new_zeroed(self.len()), validity.cloned(), ); - return Ok(BooleanArray::from((self.name(), arrow_array))); + return Ok(Self::from((self.name(), arrow_array))); } } @@ -851,7 +779,7 @@ impl DaftLogical for BooleanArray { Bitmap::new_zeroed(self.len()).not(), validity.cloned(), ); - return Ok(BooleanArray::from((self.name(), arrow_array))); + return Ok(Self::from((self.name(), arrow_array))); } else { Ok(self.clone()) } @@ -866,10 +794,10 @@ impl DaftLogical for BooleanArray { } } -impl DaftCompare<&Utf8Array> for Utf8Array { +impl DaftCompare<&Self> for Utf8Array { type Output = DaftResult; - fn equal(&self, rhs: &Utf8Array) -> Self::Output { + fn equal(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -909,7 +837,7 @@ impl DaftCompare<&Utf8Array> for Utf8Array { } } - fn not_equal(&self, rhs: &Utf8Array) -> Self::Output { + fn not_equal(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -949,7 +877,7 @@ impl DaftCompare<&Utf8Array> for Utf8Array { } } - fn lt(&self, rhs: &Utf8Array) -> Self::Output { + fn lt(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -989,7 +917,7 @@ impl DaftCompare<&Utf8Array> for Utf8Array { } } - fn lte(&self, rhs: &Utf8Array) -> Self::Output { + fn lte(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -1029,7 +957,7 @@ impl DaftCompare<&Utf8Array> for Utf8Array { } } - fn gt(&self, rhs: &Utf8Array) -> Self::Output { + fn gt(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -1069,7 +997,7 @@ impl DaftCompare<&Utf8Array> for Utf8Array { } } - fn gte(&self, rhs: &Utf8Array) -> Self::Output { + fn gte(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -1162,10 +1090,10 @@ impl DaftCompare<&str> for Utf8Array { } } -impl DaftCompare<&BinaryArray> for BinaryArray { +impl DaftCompare<&Self> for BinaryArray { type Output = DaftResult; - fn equal(&self, rhs: &BinaryArray) -> Self::Output { + fn equal(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -1205,7 +1133,7 @@ impl DaftCompare<&BinaryArray> for BinaryArray { } } - fn not_equal(&self, rhs: &BinaryArray) -> Self::Output { + fn not_equal(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -1245,7 +1173,7 @@ impl DaftCompare<&BinaryArray> for BinaryArray { } } - fn lt(&self, rhs: &BinaryArray) -> Self::Output { + fn lt(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -1285,7 +1213,7 @@ impl DaftCompare<&BinaryArray> for BinaryArray { } } - fn lte(&self, rhs: &BinaryArray) -> Self::Output { + fn lte(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -1325,7 +1253,7 @@ impl DaftCompare<&BinaryArray> for BinaryArray { } } - fn gt(&self, rhs: &BinaryArray) -> Self::Output { + fn gt(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -1365,7 +1293,7 @@ impl DaftCompare<&BinaryArray> for BinaryArray { } } - fn gte(&self, rhs: &BinaryArray) -> Self::Output { + fn gte(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => { let validity = @@ -1515,10 +1443,10 @@ where ) } -impl DaftCompare<&FixedSizeBinaryArray> for FixedSizeBinaryArray { +impl DaftCompare<&Self> for FixedSizeBinaryArray { type Output = DaftResult; - fn equal(&self, rhs: &FixedSizeBinaryArray) -> Self::Output { + fn equal(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => compare_fixed_size_binary(self, rhs, |lhs, rhs| lhs == rhs), (l_size, 1) => { @@ -1551,7 +1479,7 @@ impl DaftCompare<&FixedSizeBinaryArray> for FixedSizeBinaryArray { } } - fn not_equal(&self, rhs: &FixedSizeBinaryArray) -> Self::Output { + fn not_equal(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => compare_fixed_size_binary(self, rhs, |lhs, rhs| lhs != rhs), (l_size, 1) => { @@ -1584,7 +1512,7 @@ impl DaftCompare<&FixedSizeBinaryArray> for FixedSizeBinaryArray { } } - fn lt(&self, rhs: &FixedSizeBinaryArray) -> Self::Output { + fn lt(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => compare_fixed_size_binary(self, rhs, |lhs, rhs| lhs < rhs), (l_size, 1) => { @@ -1617,7 +1545,7 @@ impl DaftCompare<&FixedSizeBinaryArray> for FixedSizeBinaryArray { } } - fn lte(&self, rhs: &FixedSizeBinaryArray) -> Self::Output { + fn lte(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => compare_fixed_size_binary(self, rhs, |lhs, rhs| lhs <= rhs), (l_size, 1) => { @@ -1650,7 +1578,7 @@ impl DaftCompare<&FixedSizeBinaryArray> for FixedSizeBinaryArray { } } - fn gt(&self, rhs: &FixedSizeBinaryArray) -> Self::Output { + fn gt(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => compare_fixed_size_binary(self, rhs, |lhs, rhs| lhs > rhs), (l_size, 1) => { @@ -1683,7 +1611,7 @@ impl DaftCompare<&FixedSizeBinaryArray> for FixedSizeBinaryArray { } } - fn gte(&self, rhs: &FixedSizeBinaryArray) -> Self::Output { + fn gte(&self, rhs: &Self) -> Self::Output { match (self.len(), rhs.len()) { (x, y) if x == y => compare_fixed_size_binary(self, rhs, |lhs, rhs| lhs >= rhs), (l_size, 1) => { diff --git a/src/daft-core/src/array/ops/concat.rs b/src/daft-core/src/array/ops/concat.rs index b83f2b150b..3424b46811 100644 --- a/src/daft-core/src/array/ops/concat.rs +++ b/src/daft-core/src/array/ops/concat.rs @@ -94,20 +94,20 @@ where }) .collect(), )); - DataArray::new(field.clone(), cat_array) + Self::new(field.clone(), cat_array) } crate::datatypes::DataType::Utf8 => { let cat_array = utf8_concat(arrow_arrays.as_slice())?; - DataArray::new(field.clone(), cat_array) + Self::new(field.clone(), cat_array) } crate::datatypes::DataType::Binary => { let cat_array = binary_concat(arrow_arrays.as_slice())?; - DataArray::new(field.clone(), cat_array) + Self::new(field.clone(), cat_array) } _ => { let cat_array: Box = arrow2::compute::concatenate::concatenate(arrow_arrays.as_slice())?; - DataArray::try_from((field.clone(), cat_array)) + Self::try_from((field.clone(), cat_array)) } } } diff --git a/src/daft-core/src/array/ops/concat_agg.rs b/src/daft-core/src/array/ops/concat_agg.rs index 6713597897..5b4b7c8892 100644 --- a/src/daft-core/src/array/ops/concat_agg.rs +++ b/src/daft-core/src/array/ops/concat_agg.rs @@ -61,7 +61,7 @@ impl DaftConcatAggable for ListArray { fn concat(&self) -> Self::Output { if self.null_count() == 0 { let new_offsets = OffsetsBuffer::::try_from(vec![0, *self.offsets().last()])?; - return Ok(ListArray::new( + return Ok(Self::new( self.field.clone(), self.flat_child.clone(), new_offsets, @@ -94,7 +94,7 @@ impl DaftConcatAggable for ListArray { let new_child = child_growable.build()?; let new_offsets = OffsetsBuffer::::try_from(vec![0, new_child.len() as i64])?; - Ok(ListArray::new( + Ok(Self::new( self.field.clone(), new_child, new_offsets, @@ -137,7 +137,7 @@ impl DaftConcatAggable for ListArray { Some(arrow2::bitmap::Bitmap::from(group_valids)) }; - Ok(ListArray::new( + Ok(Self::new( self.field.clone(), child_array_growable.build()?, new_offsets.into(), diff --git a/src/daft-core/src/array/ops/filter.rs b/src/daft-core/src/array/ops/filter.rs index e255b10119..f17740a549 100644 --- a/src/daft-core/src/array/ops/filter.rs +++ b/src/daft-core/src/array/ops/filter.rs @@ -28,7 +28,7 @@ impl crate::datatypes::PythonArray { use arrow2::array::Array; use pyo3::PyObject; - use crate::{array::pseudo_arrow::PseudoArrowArray, datatypes::PythonType}; + use crate::array::pseudo_arrow::PseudoArrowArray; let mask = mask.as_arrow(); @@ -71,7 +71,7 @@ impl crate::datatypes::PythonArray { let arrow_array: Box = Box::new(PseudoArrowArray::new(new_values.into(), new_validity)); - DataArray::::new(self.field().clone().into(), arrow_array) + Self::new(self.field().clone().into(), arrow_array) } } diff --git a/src/daft-core/src/array/ops/from_arrow.rs b/src/daft-core/src/array/ops/from_arrow.rs index a635fe6e21..1739b524a9 100644 --- a/src/daft-core/src/array/ops/from_arrow.rs +++ b/src/daft-core/src/array/ops/from_arrow.rs @@ -21,7 +21,7 @@ where impl FromArrow for DataArray { fn from_arrow(field: FieldRef, arrow_arr: Box) -> DaftResult { - DataArray::::try_from((field.clone(), arrow_arr)) + Self::try_from((field.clone(), arrow_arr)) } } @@ -42,7 +42,7 @@ where data_array_field, physical_arrow_arr, )?; - Ok(LogicalArray::::new(field.clone(), physical)) + Ok(Self::new(field.clone(), physical)) } } @@ -57,7 +57,7 @@ impl FromArrow for FixedSizeListArray { let arrow_arr = arrow_arr.as_ref().as_any().downcast_ref::().unwrap(); let arrow_child_array = arrow_arr.values(); let child_series = Series::from_arrow(Arc::new(Field::new("item", daft_child_dtype.as_ref().clone())), arrow_child_array.clone())?; - Ok(FixedSizeListArray::new( + Ok(Self::new( field.clone(), child_series, arrow_arr.validity().cloned(), @@ -91,7 +91,7 @@ impl FromArrow for ListArray { Arc::new(Field::new("list", daft_child_dtype.as_ref().clone())), arrow_child_array.clone(), )?; - Ok(ListArray::new( + Ok(Self::new( field.clone(), child_series, arrow_arr.offsets().clone(), @@ -108,7 +108,7 @@ impl FromArrow for ListArray { Arc::new(Field::new("map", daft_child_dtype.as_ref().clone())), arrow_child_array.clone(), )?; - Ok(ListArray::new( + Ok(Self::new( field.clone(), child_series, map_arr.offsets().into(), @@ -138,7 +138,7 @@ impl FromArrow for StructArray { Series::from_arrow(Arc::new(daft_field.clone()), arrow_arr.to_boxed()) }).collect::>>()?; - Ok(StructArray::new( + Ok(Self::new( field.clone(), child_series, arrow_arr.validity().cloned(), diff --git a/src/daft-core/src/array/ops/full.rs b/src/daft-core/src/array/ops/full.rs index 9950116cce..ac65be6a7a 100644 --- a/src/daft-core/src/array/ops/full.rs +++ b/src/daft-core/src/array/ops/full.rs @@ -36,7 +36,7 @@ where if dtype.is_python() { let py_none = Python::with_gil(|py: Python| py.None()); - return DataArray::new( + return Self::new( field.into(), Box::new(PseudoArrowArray::from_pyobj_vec(vec![py_none; length])), ) @@ -45,7 +45,7 @@ where let arrow_dtype = dtype.to_arrow(); match arrow_dtype { - Ok(arrow_dtype) => DataArray::::new( + Ok(arrow_dtype) => Self::new( Arc::new(Field::new(name.to_string(), dtype.clone())), arrow2::array::new_null_array(arrow_dtype, length), ) @@ -58,7 +58,7 @@ where let field = Field::new(name, dtype.clone()); #[cfg(feature = "python")] if dtype.is_python() { - return DataArray::new( + return Self::new( field.into(), Box::new(PseudoArrowArray::from_pyobj_vec(vec![])), ) @@ -67,7 +67,7 @@ where let arrow_dtype = dtype.to_arrow(); match arrow_dtype { - Ok(arrow_dtype) => DataArray::::new( + Ok(arrow_dtype) => Self::new( Arc::new(Field::new(name.to_string(), dtype.clone())), arrow2::array::new_empty_array(arrow_dtype), ) diff --git a/src/daft-core/src/array/ops/if_else.rs b/src/daft-core/src/array/ops/if_else.rs index b92db36528..8981ac2e1f 100644 --- a/src/daft-core/src/array/ops/if_else.rs +++ b/src/daft-core/src/array/ops/if_else.rs @@ -115,13 +115,9 @@ fn generic_if_else( impl DataArray where T: DaftPhysicalType, - DataArray: GrowableArray + IntoSeries, + Self: GrowableArray + IntoSeries, { - pub fn if_else( - &self, - other: &DataArray, - predicate: &BooleanArray, - ) -> DaftResult> { + pub fn if_else(&self, other: &Self, predicate: &BooleanArray) -> DaftResult { generic_if_else( predicate, self.name(), @@ -131,7 +127,7 @@ where self.len(), other.len(), )? - .downcast::>() + .downcast::() .cloned() } } diff --git a/src/daft-core/src/array/ops/is_in.rs b/src/daft-core/src/array/ops/is_in.rs index 304ec2df4b..24e78e8f29 100644 --- a/src/daft-core/src/array/ops/is_in.rs +++ b/src/daft-core/src/array/ops/is_in.rs @@ -24,7 +24,7 @@ macro_rules! collect_to_set_and_check_membership { }}; } -impl DaftIsIn<&DataArray> for DataArray +impl DaftIsIn<&Self> for DataArray where T: DaftIntegerType, ::Native: Ord, @@ -33,7 +33,7 @@ where { type Output = DaftResult; - fn is_in(&self, rhs: &DataArray) -> Self::Output { + fn is_in(&self, rhs: &Self) -> Self::Output { collect_to_set_and_check_membership!(self, rhs) } } @@ -76,10 +76,10 @@ impl_is_in_non_numeric_array!(Utf8Array); impl_is_in_non_numeric_array!(BinaryArray); impl_is_in_non_numeric_array!(FixedSizeBinaryArray); -impl DaftIsIn<&NullArray> for NullArray { +impl DaftIsIn<&Self> for NullArray { type Output = DaftResult; - fn is_in(&self, _rhs: &NullArray) -> Self::Output { + fn is_in(&self, _rhs: &Self) -> Self::Output { // If self and rhs are null array then return a full null array Ok(BooleanArray::full_null( self.name(), diff --git a/src/daft-core/src/array/ops/list_agg.rs b/src/daft-core/src/array/ops/list_agg.rs index 6e47d011ac..0792a17675 100644 --- a/src/daft-core/src/array/ops/list_agg.rs +++ b/src/daft-core/src/array/ops/list_agg.rs @@ -13,8 +13,8 @@ use crate::{ impl DaftListAggable for DataArray where T: DaftArrowBackedType, - DataArray: IntoSeries, - DataArray: GrowableArray, + Self: IntoSeries, + Self: GrowableArray, { type Output = DaftResult; fn list(&self) -> Self::Output { @@ -60,7 +60,7 @@ where #[cfg(feature = "python")] impl DaftListAggable for crate::datatypes::PythonArray { - type Output = DaftResult; + type Output = DaftResult; fn list(&self) -> Self::Output { use pyo3::{prelude::*, types::PyList}; @@ -97,7 +97,7 @@ impl DaftListAggable for crate::datatypes::PythonArray { } impl DaftListAggable for ListArray { - type Output = DaftResult; + type Output = DaftResult; fn list(&self) -> Self::Output { // TODO(FixedSizeList) diff --git a/src/daft-core/src/array/ops/sort.rs b/src/daft-core/src/array/ops/sort.rs index d28c920419..ba2d791101 100644 --- a/src/daft-core/src/array/ops/sort.rs +++ b/src/daft-core/src/array/ops/sort.rs @@ -148,7 +148,7 @@ where None, ); - Ok(DataArray::::from((self.name(), Box::new(result)))) + Ok(Self::from((self.name(), Box::new(result)))) } } @@ -240,7 +240,7 @@ impl Float32Array { None, ); - Ok(Float32Array::from((self.name(), Box::new(result)))) + Ok(Self::from((self.name(), Box::new(result)))) } } @@ -332,7 +332,7 @@ impl Float64Array { None, ); - Ok(Float64Array::from((self.name(), Box::new(result)))) + Ok(Self::from((self.name(), Box::new(result)))) } } @@ -462,7 +462,7 @@ impl BooleanArray { let result = arrow2::compute::sort::sort(self.data(), &options, None)?; - BooleanArray::try_from((self.field.clone(), result)) + Self::try_from((self.field.clone(), result)) } } diff --git a/src/daft-core/src/array/ops/take.rs b/src/daft-core/src/array/ops/take.rs index 301a311594..fbd42e8615 100644 --- a/src/daft-core/src/array/ops/take.rs +++ b/src/daft-core/src/array/ops/take.rs @@ -80,7 +80,7 @@ impl FixedSizeBinaryArray { I: DaftIntegerType, ::Native: arrow2::types::Index, { - let mut growable = FixedSizeBinaryArray::make_growable( + let mut growable = Self::make_growable( self.name(), self.data_type(), vec![self], @@ -99,10 +99,7 @@ impl FixedSizeBinaryArray { } } - Ok(growable - .build()? - .downcast::()? - .clone()) + Ok(growable.build()?.downcast::()?.clone()) } } @@ -116,7 +113,7 @@ impl crate::datatypes::PythonArray { use arrow2::array::Array; use pyo3::prelude::*; - use crate::{array::pseudo_arrow::PseudoArrowArray, datatypes::PythonType}; + use crate::array::pseudo_arrow::PseudoArrowArray; let indices = idx.as_arrow(); @@ -165,7 +162,7 @@ impl crate::datatypes::PythonArray { let arrow_array: Box = Box::new(PseudoArrowArray::new(new_values.into(), new_validity)); - DataArray::::new(self.field().clone().into(), arrow_array) + Self::new(self.field().clone().into(), arrow_array) } } @@ -175,7 +172,7 @@ impl FixedSizeListArray { I: DaftIntegerType, ::Native: arrow2::types::Index, { - let mut growable = FixedSizeListArray::make_growable( + let mut growable = Self::make_growable( self.name(), self.data_type(), vec![self], @@ -194,7 +191,7 @@ impl FixedSizeListArray { } } - Ok(growable.build()?.downcast::()?.clone()) + Ok(growable.build()?.downcast::()?.clone()) } } @@ -215,7 +212,7 @@ impl ListArray { } }) .sum(); - let mut growable = ::GrowableType::new( + let mut growable = ::GrowableType::new( self.name(), self.data_type(), vec![self], @@ -235,7 +232,7 @@ impl ListArray { } } - Ok(growable.build()?.downcast::()?.clone()) + Ok(growable.build()?.downcast::()?.clone()) } } diff --git a/src/daft-core/src/array/ops/trigonometry.rs b/src/daft-core/src/array/ops/trigonometry.rs index 659e11e39e..673b2e1a38 100644 --- a/src/daft-core/src/array/ops/trigonometry.rs +++ b/src/daft-core/src/array/ops/trigonometry.rs @@ -27,18 +27,18 @@ pub enum TrigonometricFunction { impl TrigonometricFunction { pub fn fn_name(&self) -> &'static str { match self { - TrigonometricFunction::Sin => "sin", - TrigonometricFunction::Cos => "cos", - TrigonometricFunction::Tan => "tan", - TrigonometricFunction::Cot => "cot", - TrigonometricFunction::ArcSin => "arcsin", - TrigonometricFunction::ArcCos => "arccos", - TrigonometricFunction::ArcTan => "arctan", - TrigonometricFunction::Radians => "radians", - TrigonometricFunction::Degrees => "degrees", - TrigonometricFunction::ArcTanh => "arctanh", - TrigonometricFunction::ArcCosh => "arccosh", - TrigonometricFunction::ArcSinh => "arcsinh", + Self::Sin => "sin", + Self::Cos => "cos", + Self::Tan => "tan", + Self::Cot => "cot", + Self::ArcSin => "arcsin", + Self::ArcCos => "arccos", + Self::ArcTan => "arctan", + Self::Radians => "radians", + Self::Degrees => "degrees", + Self::ArcTanh => "arctanh", + Self::ArcCosh => "arccosh", + Self::ArcSinh => "arcsinh", } } } diff --git a/src/daft-core/src/array/ops/truncate.rs b/src/daft-core/src/array/ops/truncate.rs index 83a5c1b0b4..c939cd89ac 100644 --- a/src/daft-core/src/array/ops/truncate.rs +++ b/src/daft-core/src/array/ops/truncate.rs @@ -44,7 +44,7 @@ impl_int_truncate!(UInt32Type); impl_int_truncate!(UInt64Type); impl Decimal128Array { - pub fn iceberg_truncate(&self, w: i64) -> DaftResult { + pub fn iceberg_truncate(&self, w: i64) -> DaftResult { let as_arrow = self.as_arrow(); let trun_value = as_arrow.into_iter().map(|v| { v.map(|i| { @@ -62,17 +62,17 @@ impl Decimal128Array { } impl Utf8Array { - pub fn iceberg_truncate(&self, w: i64) -> DaftResult { + pub fn iceberg_truncate(&self, w: i64) -> DaftResult { let as_arrow = self.as_arrow(); let substring = arrow2::compute::substring::utf8_substring(as_arrow, 0, &Some(w)); - Ok(Utf8Array::from((self.name(), Box::new(substring)))) + Ok(Self::from((self.name(), Box::new(substring)))) } } impl BinaryArray { - pub fn iceberg_truncate(&self, w: i64) -> DaftResult { + pub fn iceberg_truncate(&self, w: i64) -> DaftResult { let as_arrow = self.as_arrow(); let substring = arrow2::compute::substring::binary_substring(as_arrow, 0, &Some(w)); - Ok(BinaryArray::from((self.name(), Box::new(substring)))) + Ok(Self::from((self.name(), Box::new(substring)))) } } diff --git a/src/daft-core/src/array/ops/utf8.rs b/src/daft-core/src/array/ops/utf8.rs index a8e80623c1..ebac895e20 100644 --- a/src/daft-core/src/array/ops/utf8.rs +++ b/src/daft-core/src/array/ops/utf8.rs @@ -351,7 +351,7 @@ pub struct Utf8NormalizeOptions { } impl Utf8Array { - pub fn endswith(&self, pattern: &Utf8Array) -> DaftResult { + pub fn endswith(&self, pattern: &Self) -> DaftResult { self.binary_broadcasted_compare( pattern, |data: &str, pat: &str| Ok(data.ends_with(pat)), @@ -359,7 +359,7 @@ impl Utf8Array { ) } - pub fn startswith(&self, pattern: &Utf8Array) -> DaftResult { + pub fn startswith(&self, pattern: &Self) -> DaftResult { self.binary_broadcasted_compare( pattern, |data: &str, pat: &str| Ok(data.starts_with(pat)), @@ -367,7 +367,7 @@ impl Utf8Array { ) } - pub fn contains(&self, pattern: &Utf8Array) -> DaftResult { + pub fn contains(&self, pattern: &Self) -> DaftResult { self.binary_broadcasted_compare( pattern, |data: &str, pat: &str| Ok(data.contains(pat)), @@ -375,7 +375,7 @@ impl Utf8Array { ) } - pub fn match_(&self, pattern: &Utf8Array) -> DaftResult { + pub fn match_(&self, pattern: &Self) -> DaftResult { if pattern.len() == 1 { let pattern_scalar_value = pattern.get(0); return match pattern_scalar_value { @@ -403,7 +403,7 @@ impl Utf8Array { ) } - pub fn split(&self, pattern: &Utf8Array, regex: bool) -> DaftResult { + pub fn split(&self, pattern: &Self, regex: bool) -> DaftResult { let (is_full_null, expected_size) = parse_inputs(self, &[pattern]) .map_err(|e| DaftError::ValueError(format!("Error in split: {e}")))?; if is_full_null { @@ -483,18 +483,14 @@ impl Utf8Array { Ok(result) } - pub fn extract(&self, pattern: &Utf8Array, index: usize) -> DaftResult { + pub fn extract(&self, pattern: &Self, index: usize) -> DaftResult { let (is_full_null, expected_size) = parse_inputs(self, &[pattern]) .map_err(|e| DaftError::ValueError(format!("Error in extract: {e}")))?; if is_full_null { - return Ok(Utf8Array::full_null( - self.name(), - &DataType::Utf8, - expected_size, - )); + return Ok(Self::full_null(self.name(), &DataType::Utf8, expected_size)); } if expected_size == 0 { - return Ok(Utf8Array::empty(self.name(), &DataType::Utf8)); + return Ok(Self::empty(self.name(), &DataType::Utf8)); } let self_iter = create_broadcasted_str_iter(self, expected_size); @@ -516,7 +512,7 @@ impl Utf8Array { Ok(result) } - pub fn extract_all(&self, pattern: &Utf8Array, index: usize) -> DaftResult { + pub fn extract_all(&self, pattern: &Self, index: usize) -> DaftResult { let (is_full_null, expected_size) = parse_inputs(self, &[pattern]) .map_err(|e| DaftError::ValueError(format!("Error in extract_all: {e}")))?; if is_full_null { @@ -552,23 +548,14 @@ impl Utf8Array { Ok(result) } - pub fn replace( - &self, - pattern: &Utf8Array, - replacement: &Utf8Array, - regex: bool, - ) -> DaftResult { + pub fn replace(&self, pattern: &Self, replacement: &Self, regex: bool) -> DaftResult { let (is_full_null, expected_size) = parse_inputs(self, &[pattern, replacement]) .map_err(|e| DaftError::ValueError(format!("Error in replace: {e}")))?; if is_full_null { - return Ok(Utf8Array::full_null( - self.name(), - &DataType::Utf8, - expected_size, - )); + return Ok(Self::full_null(self.name(), &DataType::Utf8, expected_size)); } if expected_size == 0 { - return Ok(Utf8Array::empty(self.name(), &DataType::Utf8)); + return Ok(Self::empty(self.name(), &DataType::Utf8)); } let self_iter = create_broadcasted_str_iter(self, expected_size); @@ -622,27 +609,27 @@ impl Utf8Array { Ok(UInt64Array::from((self.name(), Box::new(arrow_result)))) } - pub fn lower(&self) -> DaftResult { + pub fn lower(&self) -> DaftResult { self.unary_broadcasted_op(|val| val.to_lowercase().into()) } - pub fn upper(&self) -> DaftResult { + pub fn upper(&self) -> DaftResult { self.unary_broadcasted_op(|val| val.to_uppercase().into()) } - pub fn lstrip(&self) -> DaftResult { + pub fn lstrip(&self) -> DaftResult { self.unary_broadcasted_op(|val| val.trim_start().into()) } - pub fn rstrip(&self) -> DaftResult { + pub fn rstrip(&self) -> DaftResult { self.unary_broadcasted_op(|val| val.trim_end().into()) } - pub fn reverse(&self) -> DaftResult { + pub fn reverse(&self) -> DaftResult { self.unary_broadcasted_op(|val| val.chars().rev().collect::().into()) } - pub fn capitalize(&self) -> DaftResult { + pub fn capitalize(&self) -> DaftResult { self.unary_broadcasted_op(|val| { let mut chars = val.chars(); match chars.next() { @@ -658,7 +645,7 @@ impl Utf8Array { }) } - pub fn find(&self, substr: &Utf8Array) -> DaftResult { + pub fn find(&self, substr: &Self) -> DaftResult { let (is_full_null, expected_size) = parse_inputs(self, &[substr]) .map_err(|e| DaftError::ValueError(format!("Error in find: {e}")))?; if is_full_null { @@ -689,7 +676,7 @@ impl Utf8Array { Ok(result) } - pub fn like(&self, pattern: &Utf8Array) -> DaftResult { + pub fn like(&self, pattern: &Self) -> DaftResult { let (is_full_null, expected_size) = parse_inputs(self, &[pattern]) .map_err(|e| DaftError::ValueError(format!("Error in like: {e}")))?; if is_full_null { @@ -734,7 +721,7 @@ impl Utf8Array { Ok(result) } - pub fn ilike(&self, pattern: &Utf8Array) -> DaftResult { + pub fn ilike(&self, pattern: &Self) -> DaftResult { let (is_full_null, expected_size) = parse_inputs(self, &[pattern]) .map_err(|e| DaftError::ValueError(format!("Error in ilike: {e}")))?; if is_full_null { @@ -781,7 +768,7 @@ impl Utf8Array { Ok(result) } - pub fn left(&self, nchars: &DataArray) -> DaftResult + pub fn left(&self, nchars: &DataArray) -> DaftResult where I: DaftIntegerType, ::Native: Ord, @@ -789,14 +776,10 @@ impl Utf8Array { let (is_full_null, expected_size) = parse_inputs(self, &[nchars]) .map_err(|e| DaftError::ValueError(format!("Error in left: {e}")))?; if is_full_null { - return Ok(Utf8Array::full_null( - self.name(), - &DataType::Utf8, - expected_size, - )); + return Ok(Self::full_null(self.name(), &DataType::Utf8, expected_size)); } if expected_size == 0 { - return Ok(Utf8Array::empty(self.name(), &DataType::Utf8)); + return Ok(Self::empty(self.name(), &DataType::Utf8)); } fn left_most_chars(val: &str, n: usize) -> &str { @@ -808,7 +791,7 @@ impl Utf8Array { } let self_iter = create_broadcasted_str_iter(self, expected_size); - let result: Utf8Array = match nchars.len() { + let result: Self = match nchars.len() { 1 => { let n = nchars.get(0).unwrap(); let n: usize = NumCast::from(n).ok_or_else(|| { @@ -819,7 +802,7 @@ impl Utf8Array { let arrow_result = self_iter .map(|val| Some(left_most_chars(val?, n))) .collect::>(); - Utf8Array::from((self.name(), Box::new(arrow_result))) + Self::from((self.name(), Box::new(arrow_result))) } _ => { let arrow_result = self_iter @@ -837,14 +820,14 @@ impl Utf8Array { }) .collect::>>()?; - Utf8Array::from((self.name(), Box::new(arrow_result))) + Self::from((self.name(), Box::new(arrow_result))) } }; assert_eq!(result.len(), expected_size); Ok(result) } - pub fn right(&self, nchars: &DataArray) -> DaftResult + pub fn right(&self, nchars: &DataArray) -> DaftResult where I: DaftIntegerType, ::Native: Ord, @@ -852,14 +835,10 @@ impl Utf8Array { let (is_full_null, expected_size) = parse_inputs(self, &[nchars]) .map_err(|e| DaftError::ValueError(format!("Error in right: {e}")))?; if is_full_null { - return Ok(Utf8Array::full_null( - self.name(), - &DataType::Utf8, - expected_size, - )); + return Ok(Self::full_null(self.name(), &DataType::Utf8, expected_size)); } if expected_size == 0 { - return Ok(Utf8Array::empty(self.name(), &DataType::Utf8)); + return Ok(Self::empty(self.name(), &DataType::Utf8)); } fn right_most_chars(val: &str, nchar: usize) -> &str { @@ -872,7 +851,7 @@ impl Utf8Array { } let self_iter = create_broadcasted_str_iter(self, expected_size); - let result: Utf8Array = match nchars.len() { + let result: Self = match nchars.len() { 1 => { let n = nchars.get(0).unwrap(); let n: usize = NumCast::from(n).ok_or_else(|| { @@ -883,7 +862,7 @@ impl Utf8Array { let arrow_result = self_iter .map(|val| Some(right_most_chars(val?, n))) .collect::>(); - Utf8Array::from((self.name(), Box::new(arrow_result))) + Self::from((self.name(), Box::new(arrow_result))) } _ => { let arrow_result = self_iter @@ -901,7 +880,7 @@ impl Utf8Array { }) .collect::>>()?; - Utf8Array::from((self.name(), Box::new(arrow_result))) + Self::from((self.name(), Box::new(arrow_result))) } }; assert_eq!(result.len(), expected_size); @@ -993,7 +972,7 @@ impl Utf8Array { Ok(result) } - pub fn repeat(&self, n: &DataArray) -> DaftResult + pub fn repeat(&self, n: &DataArray) -> DaftResult where I: DaftIntegerType, ::Native: Ord, @@ -1001,19 +980,15 @@ impl Utf8Array { let (is_full_null, expected_size) = parse_inputs(self, &[n]) .map_err(|e| DaftError::ValueError(format!("Error in repeat: {e}")))?; if is_full_null { - return Ok(Utf8Array::full_null( - self.name(), - &DataType::Utf8, - expected_size, - )); + return Ok(Self::full_null(self.name(), &DataType::Utf8, expected_size)); } if expected_size == 0 { - return Ok(Utf8Array::empty(self.name(), &DataType::Utf8)); + return Ok(Self::empty(self.name(), &DataType::Utf8)); } let self_iter = create_broadcasted_str_iter(self, expected_size); - let result: Utf8Array = match n.len() { + let result: Self = match n.len() { 1 => { let n = n.get(0).unwrap(); let n: usize = NumCast::from(n).ok_or_else(|| { @@ -1024,7 +999,7 @@ impl Utf8Array { let arrow_result = self_iter .map(|val| Some(val?.repeat(n))) .collect::>(); - Utf8Array::from((self.name(), Box::new(arrow_result))) + Self::from((self.name(), Box::new(arrow_result))) } _ => { let arrow_result = self_iter @@ -1042,7 +1017,7 @@ impl Utf8Array { }) .collect::>>()?; - Utf8Array::from((self.name(), Box::new(arrow_result))) + Self::from((self.name(), Box::new(arrow_result))) } }; @@ -1054,7 +1029,7 @@ impl Utf8Array { &self, start: &DataArray, length: Option<&DataArray>, - ) -> DaftResult + ) -> DaftResult where I: DaftIntegerType, ::Native: Ord, @@ -1066,7 +1041,7 @@ impl Utf8Array { .map_err(|e| DaftError::ValueError(format!("Error in substr: {e}")))?; if is_full_null { - return Ok(Utf8Array::full_null(name, &DataType::Utf8, expected_size)); + return Ok(Self::full_null(name, &DataType::Utf8, expected_size)); } let self_iter = create_broadcasted_str_iter(self, expected_size); @@ -1172,9 +1147,9 @@ impl Utf8Array { pub fn pad( &self, length: &DataArray, - padchar: &Utf8Array, + padchar: &Self, placement: PadPlacement, - ) -> DaftResult + ) -> DaftResult where I: DaftIntegerType, ::Native: Ord, @@ -1216,11 +1191,7 @@ impl Utf8Array { || length.null_count() == length.len() || padchar.null_count() == padchar.len() { - return Ok(Utf8Array::full_null( - self.name(), - &DataType::Utf8, - expected_size, - )); + return Ok(Self::full_null(self.name(), &DataType::Utf8, expected_size)); } fn pad_str( @@ -1260,7 +1231,7 @@ impl Utf8Array { let self_iter = create_broadcasted_str_iter(self, expected_size); let padchar_iter = create_broadcasted_str_iter(padchar, expected_size); - let result: Utf8Array = match length.len() { + let result: Self = match length.len() { 1 => { let len = length.get(0).unwrap(); let len: usize = NumCast::from(len).ok_or_else(|| { @@ -1278,7 +1249,7 @@ impl Utf8Array { }) .collect::>>()?; - Utf8Array::from((self.name(), Box::new(arrow_result))) + Self::from((self.name(), Box::new(arrow_result))) } _ => { let length_iter = length.as_arrow().iter(); @@ -1298,7 +1269,7 @@ impl Utf8Array { }) .collect::>>()?; - Utf8Array::from((self.name(), Box::new(arrow_result))) + Self::from((self.name(), Box::new(arrow_result))) } }; @@ -1343,8 +1314,8 @@ impl Utf8Array { Ok(result) } - pub fn normalize(&self, opts: Utf8NormalizeOptions) -> DaftResult { - Ok(Utf8Array::from_iter( + pub fn normalize(&self, opts: Utf8NormalizeOptions) -> DaftResult { + Ok(Self::from_iter( self.name(), self.as_arrow().iter().map(|maybe_s| { if let Some(s) = maybe_s { @@ -1432,7 +1403,7 @@ impl Utf8Array { Ok(UInt64Array::from_iter(self.name(), iter)) } - fn unary_broadcasted_op(&self, operation: ScalarKernel) -> DaftResult + fn unary_broadcasted_op(&self, operation: ScalarKernel) -> DaftResult where ScalarKernel: Fn(&str) -> Cow<'_, str>, { @@ -1442,7 +1413,7 @@ impl Utf8Array { .map(|val| Some(operation(val?))) .collect::>() .with_validity(self_arrow.validity().cloned()); - Ok(Utf8Array::from((self.name(), Box::new(arrow_result)))) + Ok(Self::from((self.name(), Box::new(arrow_result)))) } } diff --git a/src/daft-core/src/array/pseudo_arrow/compute.rs b/src/daft-core/src/array/pseudo_arrow/compute.rs index 8f513a5857..65b11a69c1 100644 --- a/src/daft-core/src/array/pseudo_arrow/compute.rs +++ b/src/daft-core/src/array/pseudo_arrow/compute.rs @@ -26,6 +26,6 @@ impl PseudoArrowArray { let concatenated_validity = Bitmap::from_iter(bitmaps.iter().flat_map(|bitmap| bitmap.iter())); - PseudoArrowArray::new(concatenated_values.into(), Some(concatenated_validity)) + Self::new(concatenated_values.into(), Some(concatenated_validity)) } } diff --git a/src/daft-core/src/array/pseudo_arrow/mod.rs b/src/daft-core/src/array/pseudo_arrow/mod.rs index d54abaac7d..ff1b1ab9fb 100644 --- a/src/daft-core/src/array/pseudo_arrow/mod.rs +++ b/src/daft-core/src/array/pseudo_arrow/mod.rs @@ -298,7 +298,7 @@ impl Array for PseudoArrowArray { } fn with_validity(&self, validity: Option) -> Box { - Box::new(PseudoArrowArray { + Box::new(Self { values: self.values.clone(), validity, }) diff --git a/src/daft-core/src/array/pseudo_arrow/python.rs b/src/daft-core/src/array/pseudo_arrow/python.rs index 0beb5fd950..1bdc73cb2c 100644 --- a/src/daft-core/src/array/pseudo_arrow/python.rs +++ b/src/daft-core/src/array/pseudo_arrow/python.rs @@ -11,7 +11,7 @@ impl PseudoArrowArray { let validity: arrow2::bitmap::Bitmap = Python::with_gil(|py| { arrow2::bitmap::Bitmap::from_iter(pyobj_vec.iter().map(|pyobj| !pyobj.is_none(py))) }); - PseudoArrowArray::new(pyobj_vec.into(), Some(validity)) + Self::new(pyobj_vec.into(), Some(validity)) } pub fn to_pyobj_vec(&self) -> Vec { @@ -42,15 +42,10 @@ impl PseudoArrowArray { let (new_values, new_validity): (Vec, Vec) = { lhs.as_any() - .downcast_ref::>() + .downcast_ref::() .unwrap() .iter() - .zip( - rhs.as_any() - .downcast_ref::>() - .unwrap() - .iter(), - ) + .zip(rhs.as_any().downcast_ref::().unwrap().iter()) .zip(predicate.iter()) .map(|((self_val, other_val), pred_val)| match pred_val { None => None, @@ -66,6 +61,6 @@ impl PseudoArrowArray { let new_validity: Option = Some(Bitmap::from_iter(new_validity)); - PseudoArrowArray::new(new_values.into(), new_validity) + Self::new(new_values.into(), new_validity) } } diff --git a/src/daft-core/src/array/serdes.rs b/src/daft-core/src/array/serdes.rs index 9ba3905680..cc908c0dd6 100644 --- a/src/daft-core/src/array/serdes.rs +++ b/src/daft-core/src/array/serdes.rs @@ -27,7 +27,7 @@ where ::Item: serde::Serialize, { fn new(iter: I) -> Self { - IterSer { + Self { iter: RefCell::new(Some(iter)), } } diff --git a/src/daft-core/src/array/struct_array.rs b/src/daft-core/src/array/struct_array.rs index fb0c50fb25..996680ede5 100644 --- a/src/daft-core/src/array/struct_array.rs +++ b/src/daft-core/src/array/struct_array.rs @@ -64,7 +64,7 @@ impl StructArray { ) } - StructArray { + Self { field, children, validity, @@ -108,7 +108,7 @@ impl StructArray { growable .build() - .map(|s| s.downcast::().unwrap().clone()) + .map(|s| s.downcast::().unwrap().clone()) } pub fn len(&self) -> usize { diff --git a/src/daft-core/src/count_mode.rs b/src/daft-core/src/count_mode.rs index 7ef22f452f..0b1ea12368 100644 --- a/src/daft-core/src/count_mode.rs +++ b/src/daft-core/src/count_mode.rs @@ -40,7 +40,7 @@ impl CountMode { impl_bincode_py_state_serialization!(CountMode); impl CountMode { - pub fn iterator() -> std::slice::Iter<'static, CountMode> { + pub fn iterator() -> std::slice::Iter<'static, Self> { static COUNT_MODES: [CountMode; 3] = [CountMode::All, CountMode::Valid, CountMode::Null]; COUNT_MODES.iter() } @@ -51,13 +51,13 @@ impl FromStr for CountMode { fn from_str(count_mode: &str) -> DaftResult { match count_mode { - "all" => Ok(CountMode::All), - "valid" => Ok(CountMode::Valid), - "null" => Ok(CountMode::Null), + "all" => Ok(Self::All), + "valid" => Ok(Self::Valid), + "null" => Ok(Self::Null), _ => Err(DaftError::TypeError(format!( "Count mode {} is not supported; only the following modes are supported: {:?}", count_mode, - CountMode::iterator().as_slice() + Self::iterator().as_slice() ))), } } diff --git a/src/daft-core/src/datatypes/logical.rs b/src/daft-core/src/datatypes/logical.rs index df48b30524..86d84535e1 100644 --- a/src/daft-core/src/datatypes/logical.rs +++ b/src/daft-core/src/datatypes/logical.rs @@ -44,7 +44,7 @@ impl LogicalArrayImpl { &field.dtype.to_physical(), physical.data_type() ); - LogicalArrayImpl { + Self { physical, field, marker_: PhantomData, diff --git a/src/daft-core/src/join.rs b/src/daft-core/src/join.rs index 62746fbfb1..13e682fe14 100644 --- a/src/daft-core/src/join.rs +++ b/src/daft-core/src/join.rs @@ -38,7 +38,7 @@ impl JoinType { impl_bincode_py_state_serialization!(JoinType); impl JoinType { - pub fn iterator() -> std::slice::Iter<'static, JoinType> { + pub fn iterator() -> std::slice::Iter<'static, Self> { static JOIN_TYPES: [JoinType; 6] = [ JoinType::Inner, JoinType::Left, @@ -56,16 +56,16 @@ impl FromStr for JoinType { fn from_str(join_type: &str) -> DaftResult { match join_type { - "inner" => Ok(JoinType::Inner), - "left" => Ok(JoinType::Left), - "right" => Ok(JoinType::Right), - "outer" => Ok(JoinType::Outer), - "anti" => Ok(JoinType::Anti), - "semi" => Ok(JoinType::Semi), + "inner" => Ok(Self::Inner), + "left" => Ok(Self::Left), + "right" => Ok(Self::Right), + "outer" => Ok(Self::Outer), + "anti" => Ok(Self::Anti), + "semi" => Ok(Self::Semi), _ => Err(DaftError::TypeError(format!( "Join type {} is not supported; only the following types are supported: {:?}", join_type, - JoinType::iterator().as_slice() + Self::iterator().as_slice() ))), } } @@ -98,7 +98,7 @@ impl JoinStrategy { impl_bincode_py_state_serialization!(JoinStrategy); impl JoinStrategy { - pub fn iterator() -> std::slice::Iter<'static, JoinStrategy> { + pub fn iterator() -> std::slice::Iter<'static, Self> { static JOIN_STRATEGIES: [JoinStrategy; 3] = [ JoinStrategy::Hash, JoinStrategy::SortMerge, @@ -113,13 +113,13 @@ impl FromStr for JoinStrategy { fn from_str(join_strategy: &str) -> DaftResult { match join_strategy { - "hash" => Ok(JoinStrategy::Hash), - "sort_merge" => Ok(JoinStrategy::SortMerge), - "broadcast" => Ok(JoinStrategy::Broadcast), + "hash" => Ok(Self::Hash), + "sort_merge" => Ok(Self::SortMerge), + "broadcast" => Ok(Self::Broadcast), _ => Err(DaftError::TypeError(format!( "Join strategy {} is not supported; only the following strategies are supported: {:?}", join_strategy, - JoinStrategy::iterator().as_slice() + Self::iterator().as_slice() ))), } } diff --git a/src/daft-core/src/python/series.rs b/src/daft-core/src/python/series.rs index 5764d5d610..f57bf3f829 100644 --- a/src/daft-core/src/python/series.rs +++ b/src/daft-core/src/python/series.rs @@ -299,7 +299,7 @@ impl PySeries { Ok(self.series.argsort(descending)?.into()) } - pub fn hash(&self, seed: Option) -> PyResult { + pub fn hash(&self, seed: Option) -> PyResult { let seed_series; let mut seed_array = None; if let Some(s) = seed { @@ -710,7 +710,7 @@ impl PySeries { impl From for PySeries { fn from(value: series::Series) -> Self { - PySeries { series: value } + Self { series: value } } } diff --git a/src/daft-core/src/series/array_impl/data_array.rs b/src/daft-core/src/series/array_impl/data_array.rs index f1cac0d31b..c210d5cdb2 100644 --- a/src/daft-core/src/series/array_impl/data_array.rs +++ b/src/daft-core/src/series/array_impl/data_array.rs @@ -18,7 +18,7 @@ use crate::{ impl IntoSeries for DataArray where - ArrayWrapper>: SeriesLike, + ArrayWrapper: SeriesLike, { fn into_series(self) -> Series { Series { diff --git a/src/daft-core/src/series/array_impl/logical_array.rs b/src/daft-core/src/series/array_impl/logical_array.rs index bec7f069f7..9076907579 100644 --- a/src/daft-core/src/series/array_impl/logical_array.rs +++ b/src/daft-core/src/series/array_impl/logical_array.rs @@ -11,7 +11,7 @@ use crate::{ impl IntoSeries for LogicalArray where L: DaftLogicalType, - ArrayWrapper>: SeriesLike, + ArrayWrapper: SeriesLike, { fn into_series(self) -> Series { Series { diff --git a/src/daft-core/src/series/mod.rs b/src/daft-core/src/series/mod.rs index 92384296d5..0aa91d281c 100644 --- a/src/daft-core/src/series/mod.rs +++ b/src/daft-core/src/series/mod.rs @@ -87,7 +87,7 @@ impl Series { pub fn field(&self) -> &Field { self.inner.field() } - pub fn as_physical(&self) -> DaftResult { + pub fn as_physical(&self) -> DaftResult { let physical_dtype = self.data_type().to_physical(); if &physical_dtype == self.data_type() { Ok(self.clone()) @@ -108,7 +108,7 @@ impl Series { ) } - pub fn with_validity(&self, validity: Option) -> DaftResult { + pub fn with_validity(&self, validity: Option) -> DaftResult { self.inner.with_validity(validity) } diff --git a/src/daft-core/src/series/ops/abs.rs b/src/daft-core/src/series/ops/abs.rs index 1ea9b47587..6a32bc65bb 100644 --- a/src/daft-core/src/series/ops/abs.rs +++ b/src/daft-core/src/series/ops/abs.rs @@ -6,7 +6,7 @@ use crate::{ }; impl Series { - pub fn abs(&self) -> DaftResult { + pub fn abs(&self) -> DaftResult { match self.data_type() { DataType::Int8 => Ok(self.i8().unwrap().abs()?.into_series()), DataType::Int16 => Ok(self.i16().unwrap().abs()?.into_series()), diff --git a/src/daft-core/src/series/ops/agg.rs b/src/daft-core/src/series/ops/agg.rs index 44a4c10348..7b01b80437 100644 --- a/src/daft-core/src/series/ops/agg.rs +++ b/src/daft-core/src/series/ops/agg.rs @@ -14,7 +14,7 @@ use crate::{ }; impl Series { - pub fn count(&self, groups: Option<&GroupIndices>, mode: CountMode) -> DaftResult { + pub fn count(&self, groups: Option<&GroupIndices>, mode: CountMode) -> DaftResult { use crate::array::ops::DaftCountAggable; let s = self.as_physical()?; with_match_physical_daft_types!(s.data_type(), |$T| { @@ -25,7 +25,7 @@ impl Series { }) } - pub fn sum(&self, groups: Option<&GroupIndices>) -> DaftResult { + pub fn sum(&self, groups: Option<&GroupIndices>) -> DaftResult { use crate::{array::ops::DaftSumAggable, datatypes::DataType::*}; match self.data_type() { @@ -94,7 +94,7 @@ impl Series { } } - pub fn approx_sketch(&self, groups: Option<&GroupIndices>) -> DaftResult { + pub fn approx_sketch(&self, groups: Option<&GroupIndices>) -> DaftResult { use crate::{array::ops::DaftApproxSketchAggable, datatypes::DataType::*}; // Upcast all numeric types to float64 and compute approx_sketch. @@ -119,7 +119,7 @@ impl Series { } } - pub fn merge_sketch(&self, groups: Option<&GroupIndices>) -> DaftResult { + pub fn merge_sketch(&self, groups: Option<&GroupIndices>) -> DaftResult { use crate::{array::ops::DaftMergeSketchAggable, datatypes::DataType::*}; match self.data_type() { @@ -138,7 +138,7 @@ impl Series { } } - pub fn hll_merge(&self, groups: Option<&GroupIndices>) -> DaftResult { + pub fn hll_merge(&self, groups: Option<&GroupIndices>) -> DaftResult { let downcasted_self = self.downcast::()?; let series = match groups { Some(groups) => downcasted_self.grouped_hll_merge(groups), @@ -148,7 +148,7 @@ impl Series { Ok(series) } - pub fn mean(&self, groups: Option<&GroupIndices>) -> DaftResult { + pub fn mean(&self, groups: Option<&GroupIndices>) -> DaftResult { use crate::{array::ops::DaftMeanAggable, datatypes::DataType::*}; // Upcast all numeric types to float64 and use f64 mean kernel. @@ -169,19 +169,15 @@ impl Series { } } - pub fn min(&self, groups: Option<&GroupIndices>) -> DaftResult { + pub fn min(&self, groups: Option<&GroupIndices>) -> DaftResult { self.inner.min(groups) } - pub fn max(&self, groups: Option<&GroupIndices>) -> DaftResult { + pub fn max(&self, groups: Option<&GroupIndices>) -> DaftResult { self.inner.max(groups) } - pub fn any_value( - &self, - groups: Option<&GroupIndices>, - ignore_nulls: bool, - ) -> DaftResult { + pub fn any_value(&self, groups: Option<&GroupIndices>, ignore_nulls: bool) -> DaftResult { let indices = match groups { Some(groups) => { if self.data_type().is_null() { @@ -212,17 +208,17 @@ impl Series { } }; - self.take(&Series::from_arrow( + self.take(&Self::from_arrow( Field::new("", DataType::UInt64).into(), indices, )?) } - pub fn agg_list(&self, groups: Option<&GroupIndices>) -> DaftResult { + pub fn agg_list(&self, groups: Option<&GroupIndices>) -> DaftResult { self.inner.agg_list(groups) } - pub fn agg_concat(&self, groups: Option<&GroupIndices>) -> DaftResult { + pub fn agg_concat(&self, groups: Option<&GroupIndices>) -> DaftResult { use crate::array::ops::DaftConcatAggable; match self.data_type() { DataType::List(..) => { diff --git a/src/daft-core/src/series/ops/between.rs b/src/daft-core/src/series/ops/between.rs index 55f9c2e283..4e3d8c89d5 100644 --- a/src/daft-core/src/series/ops/between.rs +++ b/src/daft-core/src/series/ops/between.rs @@ -10,7 +10,7 @@ use crate::{ }; impl Series { - pub fn between(&self, lower: &Series, upper: &Series) -> DaftResult { + pub fn between(&self, lower: &Self, upper: &Self) -> DaftResult { let (_output_type, _intermediate, lower_comp_type) = InferDataType::from(self.data_type()) .comparison_op(&InferDataType::from(lower.data_type()))?; let (_output_type, _intermediate, upper_comp_type) = InferDataType::from(self.data_type()) @@ -29,11 +29,7 @@ impl Series { .downcast::()? .clone() .into_series()), - DataType::Null => Ok(Series::full_null( - self.name(), - &DataType::Boolean, - self.len(), - )), + DataType::Null => Ok(Self::full_null(self.name(), &DataType::Boolean, self.len())), _ => with_match_numeric_daft_types!(comp_type, |$T| { let casted_value = it_value.cast(&comp_type)?; let casted_lower = it_lower.cast(&comp_type)?; diff --git a/src/daft-core/src/series/ops/broadcast.rs b/src/daft-core/src/series/ops/broadcast.rs index 809364c4b8..e0bd28c9ea 100644 --- a/src/daft-core/src/series/ops/broadcast.rs +++ b/src/daft-core/src/series/ops/broadcast.rs @@ -2,7 +2,7 @@ use common_error::DaftResult; use crate::series::Series; impl Series { - pub fn broadcast(&self, num: usize) -> DaftResult { + pub fn broadcast(&self, num: usize) -> DaftResult { self.inner.broadcast(num) } } diff --git a/src/daft-core/src/series/ops/cast.rs b/src/daft-core/src/series/ops/cast.rs index 3d31edddf4..c83f24b33b 100644 --- a/src/daft-core/src/series/ops/cast.rs +++ b/src/daft-core/src/series/ops/cast.rs @@ -3,7 +3,7 @@ use common_error::DaftResult; use crate::{datatypes::DataType, series::Series}; impl Series { - pub fn cast(&self, datatype: &DataType) -> DaftResult { + pub fn cast(&self, datatype: &DataType) -> DaftResult { self.inner.cast(datatype) } } diff --git a/src/daft-core/src/series/ops/cbrt.rs b/src/daft-core/src/series/ops/cbrt.rs index 3d6cb2b8de..8eb1a46758 100644 --- a/src/daft-core/src/series/ops/cbrt.rs +++ b/src/daft-core/src/series/ops/cbrt.rs @@ -6,7 +6,7 @@ use crate::{ }; impl Series { - pub fn cbrt(&self) -> DaftResult { + pub fn cbrt(&self) -> DaftResult { let casted_dtype = self.to_floating_data_type()?; let casted_self = self .cast(&casted_dtype) diff --git a/src/daft-core/src/series/ops/ceil.rs b/src/daft-core/src/series/ops/ceil.rs index 0097acc353..e65e416fe1 100644 --- a/src/daft-core/src/series/ops/ceil.rs +++ b/src/daft-core/src/series/ops/ceil.rs @@ -6,7 +6,7 @@ use crate::{ }; impl Series { - pub fn ceil(&self) -> DaftResult { + pub fn ceil(&self) -> DaftResult { match self.data_type() { DataType::Int8 | DataType::Int16 diff --git a/src/daft-core/src/series/ops/comparison.rs b/src/daft-core/src/series/ops/comparison.rs index df411a6c9f..67ac7c66ec 100644 --- a/src/daft-core/src/series/ops/comparison.rs +++ b/src/daft-core/src/series/ops/comparison.rs @@ -14,7 +14,7 @@ macro_rules! call_inner { }; } -impl DaftCompare<&Series> for Series { +impl DaftCompare<&Self> for Series { type Output = DaftResult; call_inner!(equal); @@ -25,8 +25,8 @@ impl DaftCompare<&Series> for Series { call_inner!(gte); } -impl DaftLogical<&Series> for Series { - type Output = DaftResult; +impl DaftLogical<&Self> for Series { + type Output = DaftResult; call_inner!(and); call_inner!(or); diff --git a/src/daft-core/src/series/ops/concat.rs b/src/daft-core/src/series/ops/concat.rs index c83038ef82..9103255faf 100644 --- a/src/daft-core/src/series/ops/concat.rs +++ b/src/daft-core/src/series/ops/concat.rs @@ -6,7 +6,7 @@ use crate::{ }; impl Series { - pub fn concat(series: &[&Series]) -> DaftResult { + pub fn concat(series: &[&Self]) -> DaftResult { if series.is_empty() { return Err(DaftError::ValueError( "Need at least 1 series to perform concat".to_string(), diff --git a/src/daft-core/src/series/ops/exp.rs b/src/daft-core/src/series/ops/exp.rs index 95a43fc3c2..915e8baefe 100644 --- a/src/daft-core/src/series/ops/exp.rs +++ b/src/daft-core/src/series/ops/exp.rs @@ -6,7 +6,7 @@ use crate::{ }; impl Series { - pub fn exp(&self) -> DaftResult { + pub fn exp(&self) -> DaftResult { match self.data_type() { DataType::Float32 => Ok(self.f32().unwrap().exp()?.into_series()), DataType::Float64 => Ok(self.f64().unwrap().exp()?.into_series()), diff --git a/src/daft-core/src/series/ops/filter.rs b/src/daft-core/src/series/ops/filter.rs index 06488ac92e..15847237d1 100644 --- a/src/daft-core/src/series/ops/filter.rs +++ b/src/daft-core/src/series/ops/filter.rs @@ -3,7 +3,7 @@ use common_error::{DaftError, DaftResult}; use crate::{datatypes::BooleanArray, series::Series}; impl Series { - pub fn filter(&self, mask: &BooleanArray) -> DaftResult { + pub fn filter(&self, mask: &BooleanArray) -> DaftResult { match (self.len(), mask.len()) { (_, 1) => { if Some(true) == mask.get(0) { diff --git a/src/daft-core/src/series/ops/float.rs b/src/daft-core/src/series/ops/float.rs index 92fc9c48b7..c2da199591 100644 --- a/src/daft-core/src/series/ops/float.rs +++ b/src/daft-core/src/series/ops/float.rs @@ -6,21 +6,21 @@ use crate::{ }; impl Series { - pub fn is_nan(&self) -> DaftResult { + pub fn is_nan(&self) -> DaftResult { use crate::array::ops::DaftIsNan; with_match_float_and_null_daft_types!(self.data_type(), |$T| { Ok(DaftIsNan::is_nan(self.downcast::<<$T as DaftDataType>::ArrayType>()?)?.into_series()) }) } - pub fn is_inf(&self) -> DaftResult { + pub fn is_inf(&self) -> DaftResult { use crate::array::ops::DaftIsInf; with_match_float_and_null_daft_types!(self.data_type(), |$T| { Ok(DaftIsInf::is_inf(self.downcast::<<$T as DaftDataType>::ArrayType>()?)?.into_series()) }) } - pub fn not_nan(&self) -> DaftResult { + pub fn not_nan(&self) -> DaftResult { use crate::array::ops::DaftNotNan; with_match_float_and_null_daft_types!(self.data_type(), |$T| { Ok(DaftNotNan::not_nan(self.downcast::<<$T as DaftDataType>::ArrayType>()?)?.into_series()) diff --git a/src/daft-core/src/series/ops/floor.rs b/src/daft-core/src/series/ops/floor.rs index c574c3da3e..f216c3412d 100644 --- a/src/daft-core/src/series/ops/floor.rs +++ b/src/daft-core/src/series/ops/floor.rs @@ -6,7 +6,7 @@ use crate::{ }; impl Series { - pub fn floor(&self) -> DaftResult { + pub fn floor(&self) -> DaftResult { match self.data_type() { DataType::Int8 | DataType::Int16 diff --git a/src/daft-core/src/series/ops/if_else.rs b/src/daft-core/src/series/ops/if_else.rs index 26cc553f54..fa573d4f40 100644 --- a/src/daft-core/src/series/ops/if_else.rs +++ b/src/daft-core/src/series/ops/if_else.rs @@ -4,7 +4,7 @@ use super::cast_series_to_supertype; use crate::series::Series; impl Series { - pub fn if_else(&self, other: &Series, predicate: &Series) -> DaftResult { + pub fn if_else(&self, other: &Self, predicate: &Self) -> DaftResult { let casted_series = cast_series_to_supertype(&[self, other])?; assert!(casted_series.len() == 2); diff --git a/src/daft-core/src/series/ops/is_in.rs b/src/daft-core/src/series/ops/is_in.rs index 7acc601a37..7b2386b745 100644 --- a/src/daft-core/src/series/ops/is_in.rs +++ b/src/daft-core/src/series/ops/is_in.rs @@ -14,7 +14,7 @@ fn default(name: &str, size: usize) -> DaftResult { } impl Series { - pub fn is_in(&self, items: &Self) -> DaftResult { + pub fn is_in(&self, items: &Self) -> DaftResult { if items.is_empty() { return default(self.name(), self.len()); } diff --git a/src/daft-core/src/series/ops/list.rs b/src/daft-core/src/series/ops/list.rs index d00f5440c2..d9a17dd087 100644 --- a/src/daft-core/src/series/ops/list.rs +++ b/src/daft-core/src/series/ops/list.rs @@ -7,7 +7,7 @@ use crate::{ }; impl Series { - pub fn explode(&self) -> DaftResult { + pub fn explode(&self) -> DaftResult { match self.data_type() { DataType::List(_) => self.list()?.explode(), DataType::FixedSizeList(..) => self.fixed_size_list()?.explode(), @@ -55,7 +55,7 @@ impl Series { } } - pub fn list_get(&self, idx: &Series, default: &Series) -> DaftResult { + pub fn list_get(&self, idx: &Self, default: &Self) -> DaftResult { let idx = idx.cast(&DataType::Int64)?; let idx_arr = idx.i64().unwrap(); @@ -69,7 +69,7 @@ impl Series { } } - pub fn list_slice(&self, start: &Series, end: &Series) -> DaftResult { + pub fn list_slice(&self, start: &Self, end: &Self) -> DaftResult { let start = start.cast(&DataType::Int64)?; let start_arr = start.i64().unwrap(); let end_arr = if end.data_type().is_integer() { @@ -89,7 +89,7 @@ impl Series { } } - pub fn list_chunk(&self, size: usize) -> DaftResult { + pub fn list_chunk(&self, size: usize) -> DaftResult { match self.data_type() { DataType::List(_) => self.list()?.get_chunks(size), DataType::FixedSizeList(..) => self.fixed_size_list()?.get_chunks(size), @@ -99,7 +99,7 @@ impl Series { } } - pub fn list_sum(&self) -> DaftResult { + pub fn list_sum(&self) -> DaftResult { match self.data_type() { DataType::List(_) => self.list()?.sum(), DataType::FixedSizeList(..) => self.fixed_size_list()?.sum(), @@ -110,7 +110,7 @@ impl Series { } } - pub fn list_mean(&self) -> DaftResult { + pub fn list_mean(&self) -> DaftResult { match self.data_type() { DataType::List(_) => self.list()?.mean(), DataType::FixedSizeList(..) => self.fixed_size_list()?.mean(), @@ -121,7 +121,7 @@ impl Series { } } - pub fn list_min(&self) -> DaftResult { + pub fn list_min(&self) -> DaftResult { match self.data_type() { DataType::List(_) => self.list()?.min(), DataType::FixedSizeList(..) => self.fixed_size_list()?.min(), @@ -132,7 +132,7 @@ impl Series { } } - pub fn list_max(&self) -> DaftResult { + pub fn list_max(&self) -> DaftResult { match self.data_type() { DataType::List(_) => self.list()?.max(), DataType::FixedSizeList(..) => self.fixed_size_list()?.max(), @@ -143,7 +143,7 @@ impl Series { } } - pub fn list_sort(&self, desc: &Series) -> DaftResult { + pub fn list_sort(&self, desc: &Self) -> DaftResult { let desc_arr = desc.bool()?; match self.data_type() { diff --git a/src/daft-core/src/series/ops/log.rs b/src/daft-core/src/series/ops/log.rs index 73ded27a88..842fb7cb0a 100644 --- a/src/daft-core/src/series/ops/log.rs +++ b/src/daft-core/src/series/ops/log.rs @@ -6,7 +6,7 @@ use crate::{ }; impl Series { - pub fn log2(&self) -> DaftResult { + pub fn log2(&self) -> DaftResult { match self.data_type() { DataType::Int8 | DataType::Int16 @@ -28,7 +28,7 @@ impl Series { } } - pub fn log10(&self) -> DaftResult { + pub fn log10(&self) -> DaftResult { match self.data_type() { DataType::Int8 | DataType::Int16 @@ -50,7 +50,7 @@ impl Series { } } - pub fn log(&self, base: f64) -> DaftResult { + pub fn log(&self, base: f64) -> DaftResult { match self.data_type() { DataType::Int8 | DataType::Int16 @@ -72,7 +72,7 @@ impl Series { } } - pub fn ln(&self) -> DaftResult { + pub fn ln(&self) -> DaftResult { use crate::series::array_impl::IntoSeries; match self.data_type() { DataType::Int8 diff --git a/src/daft-core/src/series/ops/map.rs b/src/daft-core/src/series/ops/map.rs index 85461b1fe0..b624cd8aac 100644 --- a/src/daft-core/src/series/ops/map.rs +++ b/src/daft-core/src/series/ops/map.rs @@ -3,7 +3,7 @@ use common_error::{DaftError, DaftResult}; use crate::{datatypes::DataType, series::Series}; impl Series { - pub fn map_get(&self, key: &Series) -> DaftResult { + pub fn map_get(&self, key: &Self) -> DaftResult { match self.data_type() { DataType::Map(_) => self.map()?.map_get(key), dt => Err(DaftError::TypeError(format!( diff --git a/src/daft-core/src/series/ops/minhash.rs b/src/daft-core/src/series/ops/minhash.rs index 527dc75fdd..a6a7bb9247 100644 --- a/src/daft-core/src/series/ops/minhash.rs +++ b/src/daft-core/src/series/ops/minhash.rs @@ -7,7 +7,7 @@ use crate::{ }; impl Series { - pub fn minhash(&self, num_hashes: usize, ngram_size: usize, seed: u32) -> DaftResult { + pub fn minhash(&self, num_hashes: usize, ngram_size: usize, seed: u32) -> DaftResult { match self.data_type() { DataType::Utf8 => Ok(self .utf8()? diff --git a/src/daft-core/src/series/ops/not.rs b/src/daft-core/src/series/ops/not.rs index c6a8216614..8372fde4be 100644 --- a/src/daft-core/src/series/ops/not.rs +++ b/src/daft-core/src/series/ops/not.rs @@ -16,7 +16,7 @@ impl Not for &Series { } impl Not for Series { - type Output = DaftResult; + type Output = DaftResult; fn not(self) -> Self::Output { (&self).not() } diff --git a/src/daft-core/src/series/ops/null.rs b/src/daft-core/src/series/ops/null.rs index 00df3b5860..d8ffeb0933 100644 --- a/src/daft-core/src/series/ops/null.rs +++ b/src/daft-core/src/series/ops/null.rs @@ -3,15 +3,15 @@ use common_error::DaftResult; use crate::series::Series; impl Series { - pub fn is_null(&self) -> DaftResult { + pub fn is_null(&self) -> DaftResult { self.inner.is_null() } - pub fn not_null(&self) -> DaftResult { + pub fn not_null(&self) -> DaftResult { self.inner.not_null() } - pub fn fill_null(&self, fill_value: &Series) -> DaftResult { + pub fn fill_null(&self, fill_value: &Self) -> DaftResult { let predicate = self.not_null()?; self.if_else(fill_value, &predicate) } diff --git a/src/daft-core/src/series/ops/repeat.rs b/src/daft-core/src/series/ops/repeat.rs index eddde7e463..1bd1e438cc 100644 --- a/src/daft-core/src/series/ops/repeat.rs +++ b/src/daft-core/src/series/ops/repeat.rs @@ -6,6 +6,6 @@ use crate::series::Series; impl Series { pub(crate) fn repeat(&self, n: usize) -> DaftResult { let many_self = std::iter::repeat(self).take(n).collect_vec(); - Series::concat(&many_self) + Self::concat(&many_self) } } diff --git a/src/daft-core/src/series/ops/round.rs b/src/daft-core/src/series/ops/round.rs index 6f968063fb..8c74bd90c5 100644 --- a/src/daft-core/src/series/ops/round.rs +++ b/src/daft-core/src/series/ops/round.rs @@ -6,7 +6,7 @@ use crate::{ }; impl Series { - pub fn round(&self, decimal: i32) -> DaftResult { + pub fn round(&self, decimal: i32) -> DaftResult { match self.data_type() { DataType::Int8 | DataType::Int16 diff --git a/src/daft-core/src/series/ops/shift.rs b/src/daft-core/src/series/ops/shift.rs index 1ba5275ae5..9f6cfcc38b 100644 --- a/src/daft-core/src/series/ops/shift.rs +++ b/src/daft-core/src/series/ops/shift.rs @@ -3,7 +3,7 @@ use common_error::{DaftError, DaftResult}; use crate::{datatypes::DataType, series::Series}; impl Series { - pub fn shift_left(&self, bits: &Self) -> DaftResult { + pub fn shift_left(&self, bits: &Self) -> DaftResult { use crate::series::array_impl::IntoSeries; if !bits.data_type().is_integer() { return Err(DaftError::TypeError(format!( @@ -52,7 +52,7 @@ impl Series { } } - pub fn shift_right(&self, bits: &Self) -> DaftResult { + pub fn shift_right(&self, bits: &Self) -> DaftResult { use crate::series::array_impl::IntoSeries; if !bits.data_type().is_integer() { return Err(DaftError::TypeError(format!( diff --git a/src/daft-core/src/series/ops/sign.rs b/src/daft-core/src/series/ops/sign.rs index 53ecb67088..aedc027502 100644 --- a/src/daft-core/src/series/ops/sign.rs +++ b/src/daft-core/src/series/ops/sign.rs @@ -6,7 +6,7 @@ use crate::{ }; impl Series { - pub fn sign(&self) -> DaftResult { + pub fn sign(&self) -> DaftResult { match self.data_type() { DataType::UInt8 => Ok(self.u8().unwrap().sign_unsigned()?.into_series()), DataType::UInt16 => Ok(self.u16().unwrap().sign_unsigned()?.into_series()), diff --git a/src/daft-core/src/series/ops/sketch_percentile.rs b/src/daft-core/src/series/ops/sketch_percentile.rs index 23e85b6d89..0cebfa739c 100644 --- a/src/daft-core/src/series/ops/sketch_percentile.rs +++ b/src/daft-core/src/series/ops/sketch_percentile.rs @@ -7,7 +7,7 @@ impl Series { &self, percentiles: &[f64], force_list_output: bool, - ) -> DaftResult { + ) -> DaftResult { use crate::datatypes::DataType::*; match self.data_type() { diff --git a/src/daft-core/src/series/ops/sort.rs b/src/daft-core/src/series/ops/sort.rs index 4c591cc744..48ad1288ba 100644 --- a/src/daft-core/src/series/ops/sort.rs +++ b/src/daft-core/src/series/ops/sort.rs @@ -6,7 +6,7 @@ use crate::{ }; impl Series { - pub fn argsort(&self, descending: bool) -> DaftResult { + pub fn argsort(&self, descending: bool) -> DaftResult { let series = self.as_physical()?; with_match_comparable_daft_types!(series.data_type(), |$T| { let downcasted = series.downcast::<<$T as DaftDataType>::ArrayType>()?; @@ -14,7 +14,7 @@ impl Series { }) } - pub fn argsort_multikey(sort_keys: &[Series], descending: &[bool]) -> DaftResult { + pub fn argsort_multikey(sort_keys: &[Self], descending: &[bool]) -> DaftResult { if sort_keys.len() != descending.len() { return Err(DaftError::ValueError(format!( "sort_keys and descending length must match, got {} vs {}", diff --git a/src/daft-core/src/series/ops/sqrt.rs b/src/daft-core/src/series/ops/sqrt.rs index 614f8f9fc2..f127b84d32 100644 --- a/src/daft-core/src/series/ops/sqrt.rs +++ b/src/daft-core/src/series/ops/sqrt.rs @@ -6,7 +6,7 @@ use crate::{ }; impl Series { - pub fn sqrt(&self) -> DaftResult { + pub fn sqrt(&self) -> DaftResult { let casted_dtype = self.to_floating_data_type()?; let casted_self = self .cast(&casted_dtype) diff --git a/src/daft-core/src/series/ops/struct_.rs b/src/daft-core/src/series/ops/struct_.rs index 4dabb8b0f3..2c5421e13a 100644 --- a/src/daft-core/src/series/ops/struct_.rs +++ b/src/daft-core/src/series/ops/struct_.rs @@ -3,7 +3,7 @@ use common_error::{DaftError, DaftResult}; use crate::{datatypes::DataType, series::Series}; impl Series { - pub fn struct_get(&self, name: &str) -> DaftResult { + pub fn struct_get(&self, name: &str) -> DaftResult { match self.data_type() { DataType::Struct(_) => self.struct_()?.get(name), dt => Err(DaftError::TypeError(format!( diff --git a/src/daft-core/src/series/ops/take.rs b/src/daft-core/src/series/ops/take.rs index 9ff218757e..80ec81f958 100644 --- a/src/daft-core/src/series/ops/take.rs +++ b/src/daft-core/src/series/ops/take.rs @@ -8,19 +8,19 @@ use crate::{ }; impl Series { - pub fn head(&self, num: usize) -> DaftResult { + pub fn head(&self, num: usize) -> DaftResult { if num >= self.len() { return Ok(self.clone()); } self.inner.head(num) } - pub fn slice(&self, start: usize, end: usize) -> DaftResult { + pub fn slice(&self, start: usize, end: usize) -> DaftResult { let l = self.len(); self.inner.slice(start.min(l), end.min(l)) } - pub fn take(&self, idx: &Series) -> DaftResult { + pub fn take(&self, idx: &Self) -> DaftResult { self.inner.take(idx) } diff --git a/src/daft-core/src/series/ops/utf8.rs b/src/daft-core/src/series/ops/utf8.rs index d4fe19bde3..64d112a984 100644 --- a/src/daft-core/src/series/ops/utf8.rs +++ b/src/daft-core/src/series/ops/utf8.rs @@ -8,10 +8,7 @@ use crate::{ }; impl Series { - pub fn with_utf8_array( - &self, - f: impl Fn(&Utf8Array) -> DaftResult, - ) -> DaftResult { + pub fn with_utf8_array(&self, f: impl Fn(&Utf8Array) -> DaftResult) -> DaftResult { match self.data_type() { DataType::Utf8 => f(self.utf8()?), DataType::Null => Ok(self.clone()), @@ -21,44 +18,44 @@ impl Series { } } - pub fn utf8_endswith(&self, pattern: &Series) -> DaftResult { + pub fn utf8_endswith(&self, pattern: &Self) -> DaftResult { self.with_utf8_array(|arr| { pattern.with_utf8_array(|pattern_arr| Ok(arr.endswith(pattern_arr)?.into_series())) }) } - pub fn utf8_startswith(&self, pattern: &Series) -> DaftResult { + pub fn utf8_startswith(&self, pattern: &Self) -> DaftResult { self.with_utf8_array(|arr| { pattern.with_utf8_array(|pattern_arr| Ok(arr.startswith(pattern_arr)?.into_series())) }) } - pub fn utf8_contains(&self, pattern: &Series) -> DaftResult { + pub fn utf8_contains(&self, pattern: &Self) -> DaftResult { self.with_utf8_array(|arr| { pattern.with_utf8_array(|pattern_arr| Ok(arr.contains(pattern_arr)?.into_series())) }) } - pub fn utf8_match(&self, pattern: &Series) -> DaftResult { + pub fn utf8_match(&self, pattern: &Self) -> DaftResult { self.with_utf8_array(|arr| { pattern.with_utf8_array(|pattern_arr| Ok(arr.match_(pattern_arr)?.into_series())) }) } - pub fn utf8_split(&self, pattern: &Series, regex: bool) -> DaftResult { + pub fn utf8_split(&self, pattern: &Self, regex: bool) -> DaftResult { self.with_utf8_array(|arr| { pattern.with_utf8_array(|pattern_arr| Ok(arr.split(pattern_arr, regex)?.into_series())) }) } - pub fn utf8_extract(&self, pattern: &Series, index: usize) -> DaftResult { + pub fn utf8_extract(&self, pattern: &Self, index: usize) -> DaftResult { self.with_utf8_array(|arr| { pattern .with_utf8_array(|pattern_arr| Ok(arr.extract(pattern_arr, index)?.into_series())) }) } - pub fn utf8_extract_all(&self, pattern: &Series, index: usize) -> DaftResult { + pub fn utf8_extract_all(&self, pattern: &Self, index: usize) -> DaftResult { self.with_utf8_array(|arr| { pattern.with_utf8_array(|pattern_arr| { Ok(arr.extract_all(pattern_arr, index)?.into_series()) @@ -68,10 +65,10 @@ impl Series { pub fn utf8_replace( &self, - pattern: &Series, - replacement: &Series, + pattern: &Self, + replacement: &Self, regex: bool, - ) -> DaftResult { + ) -> DaftResult { self.with_utf8_array(|arr| { pattern.with_utf8_array(|pattern_arr| { replacement.with_utf8_array(|replacement_arr| { @@ -83,39 +80,39 @@ impl Series { }) } - pub fn utf8_length(&self) -> DaftResult { + pub fn utf8_length(&self) -> DaftResult { self.with_utf8_array(|arr| Ok(arr.length()?.into_series())) } - pub fn utf8_length_bytes(&self) -> DaftResult { + pub fn utf8_length_bytes(&self) -> DaftResult { self.with_utf8_array(|arr| Ok(arr.length_bytes()?.into_series())) } - pub fn utf8_lower(&self) -> DaftResult { + pub fn utf8_lower(&self) -> DaftResult { self.with_utf8_array(|arr| Ok(arr.lower()?.into_series())) } - pub fn utf8_upper(&self) -> DaftResult { + pub fn utf8_upper(&self) -> DaftResult { self.with_utf8_array(|arr| Ok(arr.upper()?.into_series())) } - pub fn utf8_lstrip(&self) -> DaftResult { + pub fn utf8_lstrip(&self) -> DaftResult { self.with_utf8_array(|arr| Ok(arr.lstrip()?.into_series())) } - pub fn utf8_rstrip(&self) -> DaftResult { + pub fn utf8_rstrip(&self) -> DaftResult { self.with_utf8_array(|arr| Ok(arr.rstrip()?.into_series())) } - pub fn utf8_reverse(&self) -> DaftResult { + pub fn utf8_reverse(&self) -> DaftResult { self.with_utf8_array(|arr| Ok(arr.reverse()?.into_series())) } - pub fn utf8_capitalize(&self) -> DaftResult { + pub fn utf8_capitalize(&self) -> DaftResult { self.with_utf8_array(|arr| Ok(arr.capitalize()?.into_series())) } - pub fn utf8_left(&self, nchars: &Series) -> DaftResult { + pub fn utf8_left(&self, nchars: &Self) -> DaftResult { self.with_utf8_array(|arr| { if nchars.data_type().is_integer() { with_match_integer_daft_types!(nchars.data_type(), |$T| { @@ -132,7 +129,7 @@ impl Series { }) } - pub fn utf8_right(&self, nchars: &Series) -> DaftResult { + pub fn utf8_right(&self, nchars: &Self) -> DaftResult { self.with_utf8_array(|arr| { if nchars.data_type().is_integer() { with_match_integer_daft_types!(nchars.data_type(), |$T| { @@ -149,13 +146,13 @@ impl Series { }) } - pub fn utf8_find(&self, substr: &Series) -> DaftResult { + pub fn utf8_find(&self, substr: &Self) -> DaftResult { self.with_utf8_array(|arr| { substr.with_utf8_array(|substr_arr| Ok(arr.find(substr_arr)?.into_series())) }) } - pub fn utf8_lpad(&self, length: &Series, pad: &Series) -> DaftResult { + pub fn utf8_lpad(&self, length: &Self, pad: &Self) -> DaftResult { self.with_utf8_array(|arr| { pad.with_utf8_array(|pad_arr| { if length.data_type().is_integer() { @@ -174,7 +171,7 @@ impl Series { }) } - pub fn utf8_rpad(&self, length: &Series, pad: &Series) -> DaftResult { + pub fn utf8_rpad(&self, length: &Self, pad: &Self) -> DaftResult { self.with_utf8_array(|arr| { pad.with_utf8_array(|pad_arr| { if length.data_type().is_integer() { @@ -193,7 +190,7 @@ impl Series { }) } - pub fn utf8_repeat(&self, n: &Series) -> DaftResult { + pub fn utf8_repeat(&self, n: &Self) -> DaftResult { self.with_utf8_array(|arr| { if n.data_type().is_integer() { with_match_integer_daft_types!(n.data_type(), |$T| { @@ -210,19 +207,19 @@ impl Series { }) } - pub fn utf8_like(&self, pattern: &Series) -> DaftResult { + pub fn utf8_like(&self, pattern: &Self) -> DaftResult { self.with_utf8_array(|arr| { pattern.with_utf8_array(|pattern_arr| Ok(arr.like(pattern_arr)?.into_series())) }) } - pub fn utf8_ilike(&self, pattern: &Series) -> DaftResult { + pub fn utf8_ilike(&self, pattern: &Self) -> DaftResult { self.with_utf8_array(|arr| { pattern.with_utf8_array(|pattern_arr| Ok(arr.ilike(pattern_arr)?.into_series())) }) } - pub fn utf8_substr(&self, start: &Series, length: &Series) -> DaftResult { + pub fn utf8_substr(&self, start: &Self, length: &Self) -> DaftResult { self.with_utf8_array(|arr| { if start.data_type().is_integer() { with_match_integer_daft_types!(start.data_type(), |$T| { @@ -250,24 +247,24 @@ impl Series { }) } - pub fn utf8_to_date(&self, format: &str) -> DaftResult { + pub fn utf8_to_date(&self, format: &str) -> DaftResult { self.with_utf8_array(|arr| Ok(arr.to_date(format)?.into_series())) } - pub fn utf8_to_datetime(&self, format: &str, timezone: Option<&str>) -> DaftResult { + pub fn utf8_to_datetime(&self, format: &str, timezone: Option<&str>) -> DaftResult { self.with_utf8_array(|arr| Ok(arr.to_datetime(format, timezone)?.into_series())) } - pub fn utf8_normalize(&self, opts: Utf8NormalizeOptions) -> DaftResult { + pub fn utf8_normalize(&self, opts: Utf8NormalizeOptions) -> DaftResult { self.with_utf8_array(|arr| Ok(arr.normalize(opts)?.into_series())) } pub fn utf8_count_matches( &self, - patterns: &Series, + patterns: &Self, whole_word: bool, case_sensitive: bool, - ) -> DaftResult { + ) -> DaftResult { self.with_utf8_array(|arr| { patterns.with_utf8_array(|pattern_arr| { Ok(arr diff --git a/src/daft-csv/Cargo.toml b/src/daft-csv/Cargo.toml index d672e30f35..dde511422b 100644 --- a/src/daft-csv/Cargo.toml +++ b/src/daft-csv/Cargo.toml @@ -25,6 +25,9 @@ rstest = {workspace = true} [features] python = ["dep:pyo3", "common-error/python", "common-py-serde/python", "daft-core/python", "daft-io/python", "daft-table/python", "daft-dsl/python"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-csv" diff --git a/src/daft-csv/src/lib.rs b/src/daft-csv/src/lib.rs index 17d254d520..b49245edab 100644 --- a/src/daft-csv/src/lib.rs +++ b/src/daft-csv/src/lib.rs @@ -43,17 +43,17 @@ pub enum Error { } impl From for DaftError { - fn from(err: Error) -> DaftError { + fn from(err: Error) -> Self { match err { Error::IOError { source } => source.into(), - _ => DaftError::External(err.into()), + _ => Self::External(err.into()), } } } impl From for Error { fn from(err: daft_io::Error) -> Self { - Error::IOError { source: err } + Self::IOError { source: err } } } diff --git a/src/daft-decoding/Cargo.toml b/src/daft-decoding/Cargo.toml index 89a17866c2..7874436f04 100644 --- a/src/daft-decoding/Cargo.toml +++ b/src/daft-decoding/Cargo.toml @@ -6,6 +6,9 @@ csv-async = "1.2.6" fast-float = "0.2.0" simdutf8 = "0.1.3" +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-decoding" diff --git a/src/daft-dsl/Cargo.toml b/src/daft-dsl/Cargo.toml index e9a236d6dd..cc72281e2e 100644 --- a/src/daft-dsl/Cargo.toml +++ b/src/daft-dsl/Cargo.toml @@ -19,6 +19,9 @@ typetag = "0.2.16" python = ["dep:pyo3", "common-error/python", "daft-core/python", "common-treenode/python", "common-py-serde/python", "common-resource-request/python"] test-utils = [] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-dsl" diff --git a/src/daft-dsl/src/expr.rs b/src/daft-dsl/src/expr.rs index affb5f08e3..0e2a3362a5 100644 --- a/src/daft-dsl/src/expr.rs +++ b/src/daft-dsl/src/expr.rs @@ -402,7 +402,7 @@ impl AggExpr { } } - pub fn from_name_and_child_expr(name: &str, child: ExprRef) -> DaftResult { + pub fn from_name_and_child_expr(name: &str, child: ExprRef) -> DaftResult { use AggExpr::*; match name { "count" => Ok(Count(child, CountMode::Valid)), @@ -421,12 +421,12 @@ impl AggExpr { impl From<&AggExpr> for ExprRef { fn from(agg_expr: &AggExpr) -> Self { - Arc::new(Expr::Agg(agg_expr.clone())) + Self::new(Expr::Agg(agg_expr.clone())) } } -impl AsRef for Expr { - fn as_ref(&self) -> &Expr { +impl AsRef for Expr { + fn as_ref(&self) -> &Self { self } } @@ -437,11 +437,11 @@ impl Expr { } pub fn alias>>(self: &ExprRef, name: S) -> ExprRef { - Expr::Alias(self.clone(), name.into()).into() + Self::Alias(self.clone(), name.into()).into() } pub fn if_else(self: ExprRef, if_true: ExprRef, if_false: ExprRef) -> ExprRef { - Expr::IfElse { + Self::IfElse { if_true, if_false, predicate: self, @@ -450,19 +450,19 @@ impl Expr { } pub fn cast(self: ExprRef, dtype: &DataType) -> ExprRef { - Expr::Cast(self, dtype.clone()).into() + Self::Cast(self, dtype.clone()).into() } pub fn count(self: ExprRef, mode: CountMode) -> ExprRef { - Expr::Agg(AggExpr::Count(self, mode)).into() + Self::Agg(AggExpr::Count(self, mode)).into() } pub fn sum(self: ExprRef) -> ExprRef { - Expr::Agg(AggExpr::Sum(self)).into() + Self::Agg(AggExpr::Sum(self)).into() } pub fn approx_count_distinct(self: ExprRef) -> ExprRef { - Expr::Agg(AggExpr::ApproxCountDistinct(self)).into() + Self::Agg(AggExpr::ApproxCountDistinct(self)).into() } pub fn approx_percentiles( @@ -470,7 +470,7 @@ impl Expr { percentiles: &[f64], force_list_output: bool, ) -> ExprRef { - Expr::Agg(AggExpr::ApproxPercentile(ApproxPercentileParams { + Self::Agg(AggExpr::ApproxPercentile(ApproxPercentileParams { child: self, percentiles: percentiles.iter().map(|f| FloatWrapper(*f)).collect(), force_list_output, @@ -483,7 +483,7 @@ impl Expr { percentiles: &[f64], force_list_output: bool, ) -> ExprRef { - Expr::Function { + Self::Function { func: FunctionExpr::Sketch(SketchExpr::Percentile { percentiles: HashableVecPercentiles(percentiles.to_vec()), force_list_output, @@ -494,52 +494,52 @@ impl Expr { } pub fn mean(self: ExprRef) -> ExprRef { - Expr::Agg(AggExpr::Mean(self)).into() + Self::Agg(AggExpr::Mean(self)).into() } pub fn min(self: ExprRef) -> ExprRef { - Expr::Agg(AggExpr::Min(self)).into() + Self::Agg(AggExpr::Min(self)).into() } pub fn max(self: ExprRef) -> ExprRef { - Expr::Agg(AggExpr::Max(self)).into() + Self::Agg(AggExpr::Max(self)).into() } pub fn any_value(self: ExprRef, ignore_nulls: bool) -> ExprRef { - Expr::Agg(AggExpr::AnyValue(self, ignore_nulls)).into() + Self::Agg(AggExpr::AnyValue(self, ignore_nulls)).into() } pub fn agg_list(self: ExprRef) -> ExprRef { - Expr::Agg(AggExpr::List(self)).into() + Self::Agg(AggExpr::List(self)).into() } pub fn agg_concat(self: ExprRef) -> ExprRef { - Expr::Agg(AggExpr::Concat(self)).into() + Self::Agg(AggExpr::Concat(self)).into() } #[allow(clippy::should_implement_trait)] pub fn not(self: ExprRef) -> ExprRef { - Expr::Not(self).into() + Self::Not(self).into() } pub fn is_null(self: ExprRef) -> ExprRef { - Expr::IsNull(self).into() + Self::IsNull(self).into() } pub fn not_null(self: ExprRef) -> ExprRef { - Expr::NotNull(self).into() + Self::NotNull(self).into() } pub fn fill_null(self: ExprRef, fill_value: ExprRef) -> ExprRef { - Expr::FillNull(self, fill_value).into() + Self::FillNull(self, fill_value).into() } pub fn is_in(self: ExprRef, items: ExprRef) -> ExprRef { - Expr::IsIn(self, items).into() + Self::IsIn(self, items).into() } pub fn between(self: ExprRef, lower: ExprRef, upper: ExprRef) -> ExprRef { - Expr::Between(self, lower, upper).into() + Self::Between(self, lower, upper).into() } pub fn eq(self: ExprRef, other: ExprRef) -> ExprRef { @@ -677,7 +677,7 @@ impl Expr { } } - pub fn with_new_children(&self, children: Vec) -> Expr { + pub fn with_new_children(&self, children: Vec) -> Self { use Expr::*; match self { // no children @@ -884,8 +884,8 @@ impl Expr { ))); } match predicate.as_ref() { - Expr::Literal(lit::LiteralValue::Boolean(true)) => if_true.to_field(schema), - Expr::Literal(lit::LiteralValue::Boolean(false)) => { + Self::Literal(lit::LiteralValue::Boolean(true)) => if_true.to_field(schema), + Self::Literal(lit::LiteralValue::Boolean(false)) => { Ok(if_false.to_field(schema)?.rename(if_true.name())) } _ => { @@ -1029,7 +1029,7 @@ impl Expr { /// If the expression is a literal, return it. Otherwise, return None. pub fn as_literal(&self) -> Option<&lit::LiteralValue> { match self { - Expr::Literal(lit) => Some(lit), + Self::Literal(lit) => Some(lit), _ => None, } } diff --git a/src/daft-dsl/src/functions/python/mod.rs b/src/daft-dsl/src/functions/python/mod.rs index d4690e3ec6..378611851a 100644 --- a/src/daft-dsl/src/functions/python/mod.rs +++ b/src/daft-dsl/src/functions/python/mod.rs @@ -26,8 +26,8 @@ impl PythonUDF { #[inline] pub fn get_evaluator(&self) -> &dyn FunctionEvaluator { match self { - PythonUDF::Stateless(stateless_python_udf) => stateless_python_udf, - PythonUDF::Stateful(stateful_python_udf) => stateful_python_udf, + Self::Stateless(stateless_python_udf) => stateless_python_udf, + Self::Stateful(stateful_python_udf) => stateful_python_udf, } } } diff --git a/src/daft-dsl/src/lit.rs b/src/daft-dsl/src/lit.rs index 065c2a4d4e..5db0f05a3d 100644 --- a/src/daft-dsl/src/lit.rs +++ b/src/daft-dsl/src/lit.rs @@ -241,7 +241,7 @@ impl LiteralValue { /// If the liter is a boolean, return it. Otherwise, return None. pub fn as_bool(&self) -> Option { match self { - LiteralValue::Boolean(b) => Some(*b), + Self::Boolean(b) => Some(*b), _ => None, } } @@ -249,56 +249,56 @@ impl LiteralValue { /// If the literal is a string, return it. Otherwise, return None. pub fn as_str(&self) -> Option<&str> { match self { - LiteralValue::Utf8(s) => Some(s), + Self::Utf8(s) => Some(s), _ => None, } } /// If the literal is `Binary`, return it. Otherwise, return None. pub fn as_binary(&self) -> Option<&[u8]> { match self { - LiteralValue::Binary(b) => Some(b), + Self::Binary(b) => Some(b), _ => None, } } /// If the literal is `Int32`, return it. Otherwise, return None. pub fn as_i32(&self) -> Option { match self { - LiteralValue::Int32(i) => Some(*i), + Self::Int32(i) => Some(*i), _ => None, } } /// If the literal is `UInt32`, return it. Otherwise, return None. pub fn as_u32(&self) -> Option { match self { - LiteralValue::UInt32(i) => Some(*i), + Self::UInt32(i) => Some(*i), _ => None, } } /// If the literal is `Int64`, return it. Otherwise, return None. pub fn as_i64(&self) -> Option { match self { - LiteralValue::Int64(i) => Some(*i), + Self::Int64(i) => Some(*i), _ => None, } } /// If the literal is `UInt64`, return it. Otherwise, return None. pub fn as_u64(&self) -> Option { match self { - LiteralValue::UInt64(i) => Some(*i), + Self::UInt64(i) => Some(*i), _ => None, } } /// If the literal is `Float64`, return it. Otherwise, return None. pub fn as_f64(&self) -> Option { match self { - LiteralValue::Float64(f) => Some(*f), + Self::Float64(f) => Some(*f), _ => None, } } /// If the literal is a series, return it. Otherwise, return None. pub fn as_series(&self) -> Option<&Series> { match self { - LiteralValue::Series(series) => Some(series), + Self::Series(series) => Some(series), _ => None, } } diff --git a/src/daft-dsl/src/python.rs b/src/daft-dsl/src/python.rs index af56dc68d8..a4a54e74c9 100644 --- a/src/daft-dsl/src/python.rs +++ b/src/daft-dsl/src/python.rs @@ -651,13 +651,13 @@ impl_bincode_py_state_serialization!(PyExpr); impl From for PyExpr { fn from(value: crate::ExprRef) -> Self { - PyExpr { expr: value } + Self { expr: value } } } impl From for PyExpr { fn from(value: crate::Expr) -> Self { - PyExpr { + Self { expr: Arc::new(value), } } diff --git a/src/daft-functions-json/Cargo.toml b/src/daft-functions-json/Cargo.toml index e27077b497..1f7547bcc3 100644 --- a/src/daft-functions-json/Cargo.toml +++ b/src/daft-functions-json/Cargo.toml @@ -21,6 +21,9 @@ python = [ "daft-dsl/python" ] +[lints] +workspace = true + [package] name = "daft-functions-json" edition.workspace = true diff --git a/src/daft-functions/Cargo.toml b/src/daft-functions/Cargo.toml index 9be2a86dc2..febb241e13 100644 --- a/src/daft-functions/Cargo.toml +++ b/src/daft-functions/Cargo.toml @@ -30,6 +30,9 @@ python = [ "common-io-config/python" ] +[lints] +workspace = true + [package] name = "daft-functions" edition.workspace = true diff --git a/src/daft-functions/src/distance/cosine.rs b/src/daft-functions/src/distance/cosine.rs index c7065ff655..170587c1bb 100644 --- a/src/daft-functions/src/distance/cosine.rs +++ b/src/daft-functions/src/distance/cosine.rs @@ -14,9 +14,9 @@ trait SpatialSimilarity { impl SpatialSimilarity for f64 { fn cosine(a: &[Self], b: &[Self]) -> Option { - let xy = a.iter().zip(b).map(|(a, b)| a * b).sum::(); - let x_sq = a.iter().map(|x| x.powi(2)).sum::().sqrt(); - let y_sq = b.iter().map(|x| x.powi(2)).sum::().sqrt(); + let xy = a.iter().zip(b).map(|(a, b)| a * b).sum::(); + let x_sq = a.iter().map(|x| x.powi(2)).sum::().sqrt(); + let y_sq = b.iter().map(|x| x.powi(2)).sum::().sqrt(); Some(1.0 - xy / (x_sq * y_sq)) } } diff --git a/src/daft-functions/src/lib.rs b/src/daft-functions/src/lib.rs index 0a8486864e..a8b1c8d0cd 100644 --- a/src/daft-functions/src/lib.rs +++ b/src/daft-functions/src/lib.rs @@ -60,13 +60,13 @@ pub enum Error { } impl From for std::io::Error { - fn from(err: Error) -> std::io::Error { - std::io::Error::new(std::io::ErrorKind::Other, err) + fn from(err: Error) -> Self { + Self::new(std::io::ErrorKind::Other, err) } } impl From for DaftError { - fn from(err: Error) -> DaftError { - DaftError::External(err.into()) + fn from(err: Error) -> Self { + Self::External(err.into()) } } diff --git a/src/daft-functions/src/tokenize/bpe.rs b/src/daft-functions/src/tokenize/bpe.rs index 18307092b4..c35e41771e 100644 --- a/src/daft-functions/src/tokenize/bpe.rs +++ b/src/daft-functions/src/tokenize/bpe.rs @@ -62,16 +62,16 @@ impl From for DaftError { fn from(err: Error) -> Self { use Error::*; match err { - Base64Decode { .. } => DaftError::ValueError(err.to_string()), - RankNumberParse { .. } => DaftError::ValueError(err.to_string()), - InvalidUtf8Sequence { .. } => DaftError::ValueError(err.to_string()), - InvalidTokenLine { .. } => DaftError::ValueError(err.to_string()), - EmptyTokenFile {} => DaftError::ValueError(err.to_string()), - BPECreation { .. } => DaftError::ComputeError(err.to_string()), - BadToken { .. } => DaftError::ValueError(err.to_string()), - Decode { .. } => DaftError::ComputeError(err.to_string()), - MissingPattern {} => DaftError::ValueError(err.to_string()), - UnsupportedSpecialTokens { .. } => DaftError::ValueError(err.to_string()), + Base64Decode { .. } => Self::ValueError(err.to_string()), + RankNumberParse { .. } => Self::ValueError(err.to_string()), + InvalidUtf8Sequence { .. } => Self::ValueError(err.to_string()), + InvalidTokenLine { .. } => Self::ValueError(err.to_string()), + EmptyTokenFile {} => Self::ValueError(err.to_string()), + BPECreation { .. } => Self::ComputeError(err.to_string()), + BadToken { .. } => Self::ValueError(err.to_string()), + Decode { .. } => Self::ComputeError(err.to_string()), + MissingPattern {} => Self::ValueError(err.to_string()), + UnsupportedSpecialTokens { .. } => Self::ValueError(err.to_string()), } } } diff --git a/src/daft-functions/src/uri/download.rs b/src/daft-functions/src/uri/download.rs index 4bacbdb3a9..9f107e95c1 100644 --- a/src/daft-functions/src/uri/download.rs +++ b/src/daft-functions/src/uri/download.rs @@ -29,7 +29,7 @@ impl ScalarUDF for DownloadFunction { } fn evaluate(&self, inputs: &[Series]) -> DaftResult { - let DownloadFunction { + let Self { max_connections, raise_error_on_failure, multi_thread, diff --git a/src/daft-functions/src/uri/upload.rs b/src/daft-functions/src/uri/upload.rs index 14f7a3721c..d4c606955f 100644 --- a/src/daft-functions/src/uri/upload.rs +++ b/src/daft-functions/src/uri/upload.rs @@ -26,7 +26,7 @@ impl ScalarUDF for UploadFunction { } fn evaluate(&self, inputs: &[Series]) -> DaftResult { - let UploadFunction { + let Self { location, config, max_connections, diff --git a/src/daft-image/Cargo.toml b/src/daft-image/Cargo.toml index 6dab7b3b8f..205ad8b9a0 100644 --- a/src/daft-image/Cargo.toml +++ b/src/daft-image/Cargo.toml @@ -18,6 +18,9 @@ python = [ "common-error/python" ] +[lints] +workspace = true + [package] name = "daft-image" edition.workspace = true diff --git a/src/daft-image/src/image_buffer.rs b/src/daft-image/src/image_buffer.rs index 18db67e0df..f1595aaf1f 100644 --- a/src/daft-image/src/image_buffer.rs +++ b/src/daft-image/src/image_buffer.rs @@ -45,12 +45,7 @@ macro_rules! with_method_on_image_buffer { } impl<'a> DaftImageBuffer<'a> { - pub fn from_raw( - mode: &ImageMode, - width: u32, - height: u32, - data: Cow<'a, [u8]>, - ) -> DaftImageBuffer<'a> { + pub fn from_raw(mode: &ImageMode, width: u32, height: u32, data: Cow<'a, [u8]>) -> Self { use DaftImageBuffer::*; match mode { ImageMode::L => L(ImageBuffer::from_raw(width, height, data).unwrap()), diff --git a/src/daft-io/Cargo.toml b/src/daft-io/Cargo.toml index 42cc984204..433090370e 100644 --- a/src/daft-io/Cargo.toml +++ b/src/daft-io/Cargo.toml @@ -54,6 +54,9 @@ python = [ "common-file-formats/python" ] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-io" diff --git a/src/daft-io/src/azure_blob.rs b/src/daft-io/src/azure_blob.rs index de2d05bd5d..a52092bd4e 100644 --- a/src/daft-io/src/azure_blob.rs +++ b/src/daft-io/src/azure_blob.rs @@ -110,27 +110,27 @@ impl From for super::Error { match error { UnableToReadBytes { path, source } | UnableToOpenFile { path, source } => { match source.as_http_error().map(|v| v.status().into()) { - Some(404) | Some(410) => super::Error::NotFound { + Some(404) | Some(410) => Self::NotFound { path, source: source.into(), }, - Some(401) => super::Error::Unauthorized { + Some(401) => Self::Unauthorized { store: super::SourceType::AzureBlob, path, source: source.into(), }, - None | Some(_) => super::Error::UnableToOpenFile { + None | Some(_) => Self::UnableToOpenFile { path, source: source.into(), }, } } - NotFound { ref path } => super::Error::NotFound { + NotFound { ref path } => Self::NotFound { path: path.into(), source: error.into(), }, - NotAFile { path } => super::Error::NotAFile { path }, - _ => super::Error::Generic { + NotAFile { path } => Self::NotAFile { path }, + _ => Self::Generic { store: super::SourceType::AzureBlob, source: error.into(), }, @@ -225,7 +225,7 @@ impl AzureBlobSource { BlobServiceClient::new(storage_account, storage_credentials) }; - Ok(AzureBlobSource { + Ok(Self { blob_client: blob_client.into(), } .into()) diff --git a/src/daft-io/src/google_cloud.rs b/src/daft-io/src/google_cloud.rs index 5435fedeb2..fe399ab3ec 100644 --- a/src/daft-io/src/google_cloud.rs +++ b/src/daft-io/src/google_cloud.rs @@ -58,50 +58,50 @@ impl From for super::Error { | UnableToOpenFile { path, source } | UnableToListObjects { path, source } => match source { GError::HttpClient(err) => match err.status().map(|s| s.as_u16()) { - Some(404) | Some(410) => super::Error::NotFound { + Some(404) | Some(410) => Self::NotFound { path, source: err.into(), }, - Some(401) => super::Error::Unauthorized { + Some(401) => Self::Unauthorized { store: super::SourceType::GCS, path, source: err.into(), }, - _ => super::Error::UnableToOpenFile { + _ => Self::UnableToOpenFile { path, source: err.into(), }, }, GError::Response(err) => match err.code { - 404 | 410 => super::Error::NotFound { + 404 | 410 => Self::NotFound { path, source: err.into(), }, - 401 => super::Error::Unauthorized { + 401 => Self::Unauthorized { store: super::SourceType::GCS, path, source: err.into(), }, - _ => super::Error::UnableToOpenFile { + _ => Self::UnableToOpenFile { path, source: err.into(), }, }, - GError::TokenSource(err) => super::Error::UnableToLoadCredentials { + GError::TokenSource(err) => Self::UnableToLoadCredentials { store: super::SourceType::GCS, source: err, }, }, - NotFound { ref path } => super::Error::NotFound { + NotFound { ref path } => Self::NotFound { path: path.into(), source: error.into(), }, - InvalidUrl { path, source } => super::Error::InvalidUrl { path, source }, - UnableToLoadCredentials { source } => super::Error::UnableToLoadCredentials { + InvalidUrl { path, source } => Self::InvalidUrl { path, source }, + UnableToLoadCredentials { source } => Self::UnableToLoadCredentials { store: super::SourceType::GCS, source: source.into(), }, - NotAFile { path } => super::Error::NotAFile { path }, + NotAFile { path } => Self::NotAFile { path }, } } } @@ -392,7 +392,7 @@ impl GCSSource { } let client = Client::new(client_config); - Ok(GCSSource { + Ok(Self { client: GCSClientWrapper(client), } .into()) diff --git a/src/daft-io/src/http.rs b/src/daft-io/src/http.rs index 1f3a3bef11..14571fd79f 100644 --- a/src/daft-io/src/http.rs +++ b/src/daft-io/src/http.rs @@ -147,17 +147,17 @@ impl From for super::Error { use Error::*; match error { UnableToOpenFile { path, source } => match source.status().map(|v| v.as_u16()) { - Some(404) | Some(410) => super::Error::NotFound { + Some(404) | Some(410) => Self::NotFound { path, source: source.into(), }, - None | Some(_) => super::Error::UnableToOpenFile { + None | Some(_) => Self::UnableToOpenFile { path, source: source.into(), }, }, - UnableToDetermineSize { path } => super::Error::UnableToDetermineSize { path }, - _ => super::Error::Generic { + UnableToDetermineSize { path } => Self::UnableToDetermineSize { path }, + _ => Self::Generic { store: super::SourceType::Http, source: error.into(), }, @@ -174,7 +174,7 @@ impl HttpSource { .context(UnableToCreateHeaderSnafu)?, ); - Ok(HttpSource { + Ok(Self { client: reqwest::ClientBuilder::default() .pool_max_idle_per_host(70) .default_headers(default_headers) diff --git a/src/daft-io/src/huggingface.rs b/src/daft-io/src/huggingface.rs index 3aacea186d..f10f2d8de3 100644 --- a/src/daft-io/src/huggingface.rs +++ b/src/daft-io/src/huggingface.rs @@ -128,7 +128,7 @@ impl FromStr for HFPathParts { let (repository, uri) = if let Some((repo, uri)) = uri.split_once('/') { (repo, uri) } else { - return Some(HFPathParts { + return Some(Self { bucket: bucket.to_string(), repository: format!("{}/{}", username, uri), revision: "main".to_string(), @@ -150,7 +150,7 @@ impl FromStr for HFPathParts { // ^--------------^ let path = uri.to_string().trim_end_matches('/').to_string(); - Some(HFPathParts { + Some(Self { bucket: bucket.to_string(), repository, revision, @@ -221,17 +221,17 @@ impl From for super::Error { use Error::*; match error { UnableToOpenFile { path, source } => match source.status().map(|v| v.as_u16()) { - Some(404) | Some(410) => super::Error::NotFound { + Some(404) | Some(410) => Self::NotFound { path, source: source.into(), }, - None | Some(_) => super::Error::UnableToOpenFile { + None | Some(_) => Self::UnableToOpenFile { path, source: source.into(), }, }, - UnableToDetermineSize { path } => super::Error::UnableToDetermineSize { path }, - _ => super::Error::Generic { + UnableToDetermineSize { path } => Self::UnableToDetermineSize { path }, + _ => Self::Generic { store: super::SourceType::Http, source: error.into(), }, @@ -256,7 +256,7 @@ impl HFSource { ); } - Ok(HFSource { + Ok(Self { http_source: HttpSource { client: reqwest::ClientBuilder::default() .pool_max_idle_per_host(70) diff --git a/src/daft-io/src/lib.rs b/src/daft-io/src/lib.rs index f5c5834541..6fdaac2368 100644 --- a/src/daft-io/src/lib.rs +++ b/src/daft-io/src/lib.rs @@ -142,34 +142,34 @@ pub enum Error { } impl From for DaftError { - fn from(err: Error) -> DaftError { + fn from(err: Error) -> Self { use Error::*; match err { - NotFound { path, source } => DaftError::FileNotFound { path, source }, - ConnectTimeout { .. } => DaftError::ConnectTimeout(err.into()), - ReadTimeout { .. } => DaftError::ReadTimeout(err.into()), - UnableToReadBytes { .. } => DaftError::ByteStreamError(err.into()), - SocketError { .. } => DaftError::SocketError(err.into()), + NotFound { path, source } => Self::FileNotFound { path, source }, + ConnectTimeout { .. } => Self::ConnectTimeout(err.into()), + ReadTimeout { .. } => Self::ReadTimeout(err.into()), + UnableToReadBytes { .. } => Self::ByteStreamError(err.into()), + SocketError { .. } => Self::SocketError(err.into()), // We have to repeat everything above for the case we have an Arc since we can't move the error. CachedError { ref source } => match source.as_ref() { - NotFound { path, source: _ } => DaftError::FileNotFound { + NotFound { path, source: _ } => Self::FileNotFound { path: path.clone(), source: err.into(), }, - ConnectTimeout { .. } => DaftError::ConnectTimeout(err.into()), - ReadTimeout { .. } => DaftError::ReadTimeout(err.into()), - UnableToReadBytes { .. } => DaftError::ByteStreamError(err.into()), - SocketError { .. } => DaftError::SocketError(err.into()), - _ => DaftError::External(err.into()), + ConnectTimeout { .. } => Self::ConnectTimeout(err.into()), + ReadTimeout { .. } => Self::ReadTimeout(err.into()), + UnableToReadBytes { .. } => Self::ByteStreamError(err.into()), + SocketError { .. } => Self::SocketError(err.into()), + _ => Self::External(err.into()), }, - _ => DaftError::External(err.into()), + _ => Self::External(err.into()), } } } impl From for std::io::Error { - fn from(err: Error) -> std::io::Error { - std::io::Error::new(std::io::ErrorKind::Other, err) + fn from(err: Error) -> Self { + Self::new(std::io::ErrorKind::Other, err) } } @@ -183,7 +183,7 @@ pub struct IOClient { impl IOClient { pub fn new(config: Arc) -> Result { - Ok(IOClient { + Ok(Self { source_type_to_store: tokio::sync::RwLock::new(HashMap::new()), config, }) @@ -361,12 +361,12 @@ pub enum SourceType { impl std::fmt::Display for SourceType { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { - SourceType::File => write!(f, "file"), - SourceType::Http => write!(f, "http"), - SourceType::S3 => write!(f, "s3"), - SourceType::AzureBlob => write!(f, "AzureBlob"), - SourceType::GCS => write!(f, "gcs"), - SourceType::HF => write!(f, "hf"), + Self::File => write!(f, "file"), + Self::Http => write!(f, "http"), + Self::S3 => write!(f, "s3"), + Self::AzureBlob => write!(f, "AzureBlob"), + Self::GCS => write!(f, "gcs"), + Self::HF => write!(f, "hf"), } } } diff --git a/src/daft-io/src/local.rs b/src/daft-io/src/local.rs index fb32d65b27..4ed9eaa54b 100644 --- a/src/daft-io/src/local.rs +++ b/src/daft-io/src/local.rs @@ -87,11 +87,11 @@ impl From for super::Error { UnableToOpenFile { path, source } | UnableToFetchDirectoryEntries { path, source } => { use std::io::ErrorKind::*; match source.kind() { - NotFound => super::Error::NotFound { + NotFound => Self::NotFound { path, source: source.into(), }, - _ => super::Error::UnableToOpenFile { + _ => Self::UnableToOpenFile { path, source: source.into(), }, @@ -100,21 +100,21 @@ impl From for super::Error { UnableToFetchFileMetadata { path, source } => { use std::io::ErrorKind::*; match source.kind() { - NotFound | IsADirectory => super::Error::NotFound { + NotFound | IsADirectory => Self::NotFound { path, source: source.into(), }, - _ => super::Error::UnableToOpenFile { + _ => Self::UnableToOpenFile { path, source: source.into(), }, } } - UnableToReadBytes { path, source } => super::Error::UnableToReadBytes { path, source }, + UnableToReadBytes { path, source } => Self::UnableToReadBytes { path, source }, UnableToWriteToFile { path, source } | UnableToOpenFileForWriting { path, source } => { - super::Error::UnableToWriteToFile { path, source } + Self::UnableToWriteToFile { path, source } } - _ => super::Error::Generic { + _ => Self::Generic { store: super::SourceType::File, source: error.into(), }, @@ -124,7 +124,7 @@ impl From for super::Error { impl LocalSource { pub async fn get_client() -> super::Result> { - Ok(LocalSource {}.into()) + Ok(Self {}.into()) } } diff --git a/src/daft-io/src/object_io.rs b/src/daft-io/src/object_io.rs index d3fa97601a..32bf328f17 100644 --- a/src/daft-io/src/object_io.rs +++ b/src/daft-io/src/object_io.rs @@ -109,7 +109,7 @@ impl GetResult { .source .get(&rp.input, rp.range.clone(), rp.io_stats.clone()) .await?; - if let GetResult::Stream(stream, size, permit, _) = get_result { + if let Self::Stream(stream, size, permit, _) = get_result { result = collect_bytes(stream, size, permit).await; } else { unreachable!("Retrying a stream should always be a stream"); @@ -125,9 +125,9 @@ impl GetResult { pub fn with_retry(self, params: StreamingRetryParams) -> Self { match self { - GetResult::File(..) => self, - GetResult::Stream(s, size, permit, _) => { - GetResult::Stream(s, size, permit, Some(Box::new(params))) + Self::File(..) => self, + Self::Stream(s, size, permit, _) => { + Self::Stream(s, size, permit, Some(Box::new(params))) } } } diff --git a/src/daft-io/src/object_store_glob.rs b/src/daft-io/src/object_store_glob.rs index 58261d5fbf..13b43f773c 100644 --- a/src/daft-io/src/object_store_glob.rs +++ b/src/daft-io/src/object_store_glob.rs @@ -58,7 +58,7 @@ impl GlobState { } pub fn advance(self, path: String, idx: usize, fanout_factor: usize) -> Self { - GlobState { + Self { current_path: path, current_fragment_idx: idx, current_fanout: self.current_fanout * fanout_factor, @@ -67,7 +67,7 @@ impl GlobState { } pub fn with_wildcard_mode(self) -> Self { - GlobState { + Self { wildcard_mode: true, ..self } @@ -126,7 +126,7 @@ impl GlobFragment { } } - GlobFragment { + Self { data: data.to_string(), first_wildcard_idx, escaped_data, @@ -139,11 +139,11 @@ impl GlobFragment { } /// Joins a slice of GlobFragments together with a separator - pub fn join(fragments: &[GlobFragment], sep: &str) -> Self { - GlobFragment::new( + pub fn join(fragments: &[Self], sep: &str) -> Self { + Self::new( fragments .iter() - .map(|frag: &GlobFragment| frag.data.as_str()) + .map(|frag: &Self| frag.data.as_str()) .join(sep) .as_str(), ) diff --git a/src/daft-io/src/s3_like.rs b/src/daft-io/src/s3_like.rs index 97c9641e58..2766011ae7 100644 --- a/src/daft-io/src/s3_like.rs +++ b/src/daft-io/src/s3_like.rs @@ -124,120 +124,120 @@ impl From for super::Error { match error { UnableToOpenFile { path, source } => match source { - SdkError::TimeoutError(_) => super::Error::ReadTimeout { + SdkError::TimeoutError(_) => Self::ReadTimeout { path, source: source.into(), }, SdkError::DispatchFailure(ref dispatch) => { if dispatch.is_timeout() { - super::Error::ConnectTimeout { + Self::ConnectTimeout { path, source: source.into(), } } else if dispatch.is_io() { - super::Error::SocketError { + Self::SocketError { path, source: source.into(), } } else { - super::Error::UnableToOpenFile { + Self::UnableToOpenFile { path, source: source.into(), } } } _ => match source.into_service_error() { - GetObjectError::NoSuchKey(no_such_key) => super::Error::NotFound { + GetObjectError::NoSuchKey(no_such_key) => Self::NotFound { path, source: no_such_key.into(), }, - GetObjectError::Unhandled(v) => super::Error::Unhandled { + GetObjectError::Unhandled(v) => Self::Unhandled { path, msg: DisplayErrorContext(v).to_string(), }, - err => super::Error::UnableToOpenFile { + err => Self::UnableToOpenFile { path, source: err.into(), }, }, }, UnableToHeadFile { path, source } => match source { - SdkError::TimeoutError(_) => super::Error::ReadTimeout { + SdkError::TimeoutError(_) => Self::ReadTimeout { path, source: source.into(), }, SdkError::DispatchFailure(ref dispatch) => { if dispatch.is_timeout() { - super::Error::ConnectTimeout { + Self::ConnectTimeout { path, source: source.into(), } } else if dispatch.is_io() { - super::Error::SocketError { + Self::SocketError { path, source: source.into(), } } else { - super::Error::UnableToOpenFile { + Self::UnableToOpenFile { path, source: source.into(), } } } _ => match source.into_service_error() { - HeadObjectError::NotFound(no_such_key) => super::Error::NotFound { + HeadObjectError::NotFound(no_such_key) => Self::NotFound { path, source: no_such_key.into(), }, - HeadObjectError::Unhandled(v) => super::Error::Unhandled { + HeadObjectError::Unhandled(v) => Self::Unhandled { path, msg: DisplayErrorContext(v).to_string(), }, - err => super::Error::UnableToOpenFile { + err => Self::UnableToOpenFile { path, source: err.into(), }, }, }, UnableToListObjects { path, source } => match source { - SdkError::TimeoutError(_) => super::Error::ReadTimeout { + SdkError::TimeoutError(_) => Self::ReadTimeout { path, source: source.into(), }, SdkError::DispatchFailure(ref dispatch) => { if dispatch.is_timeout() { - super::Error::ConnectTimeout { + Self::ConnectTimeout { path, source: source.into(), } } else if dispatch.is_io() { - super::Error::SocketError { + Self::SocketError { path, source: source.into(), } } else { - super::Error::UnableToOpenFile { + Self::UnableToOpenFile { path, source: source.into(), } } } _ => match source.into_service_error() { - ListObjectsV2Error::NoSuchBucket(no_such_key) => super::Error::NotFound { + ListObjectsV2Error::NoSuchBucket(no_such_key) => Self::NotFound { path, source: no_such_key.into(), }, - ListObjectsV2Error::Unhandled(v) => super::Error::Unhandled { + ListObjectsV2Error::Unhandled(v) => Self::Unhandled { path, msg: DisplayErrorContext(v).to_string(), }, - err => super::Error::UnableToOpenFile { + err => Self::UnableToOpenFile { path, source: err.into(), }, }, }, - InvalidUrl { path, source } => super::Error::InvalidUrl { path, source }, + InvalidUrl { path, source } => Self::InvalidUrl { path, source }, UnableToReadBytes { path, source } => { use std::error::Error; let io_error = if let Some(source) = source.source() { @@ -247,21 +247,21 @@ impl From for super::Error { } else { std::io::Error::new(io::ErrorKind::Other, source) }; - super::Error::UnableToReadBytes { + Self::UnableToReadBytes { path, source: io_error, } } - NotAFile { path } => super::Error::NotAFile { path }, - UnableToLoadCredentials { source } => super::Error::UnableToLoadCredentials { + NotAFile { path } => Self::NotAFile { path }, + UnableToLoadCredentials { source } => Self::UnableToLoadCredentials { store: SourceType::S3, source: source.into(), }, - NotFound { ref path } => super::Error::NotFound { + NotFound { ref path } => Self::NotFound { path: path.into(), source: error.into(), }, - err => super::Error::Generic { + err => Self::Generic { store: SourceType::S3, source: err.into(), }, @@ -553,7 +553,7 @@ async fn build_client(config: &S3Config) -> super::Result { const REGION_HEADER: &str = "x-amz-bucket-region"; impl S3LikeSource { - pub async fn get_client(config: &S3Config) -> super::Result> { + pub async fn get_client(config: &S3Config) -> super::Result> { Ok(build_client(config).await?.into()) } diff --git a/src/daft-io/src/stats.rs b/src/daft-io/src/stats.rs index 0fc15be4d3..32aabd1b90 100644 --- a/src/daft-io/src/stats.rs +++ b/src/daft-io/src/stats.rs @@ -49,7 +49,7 @@ pub(crate) struct IOStatsByteStreamContextHandle { impl IOStatsContext { pub fn new>>(name: S) -> IOStatsRef { - Arc::new(IOStatsContext { + Arc::new(Self { name: name.into(), num_get_requests: atomic::AtomicUsize::new(0), num_head_requests: atomic::AtomicUsize::new(0), diff --git a/src/daft-json/Cargo.toml b/src/daft-json/Cargo.toml index 241156403c..1cf8308a7a 100644 --- a/src/daft-json/Cargo.toml +++ b/src/daft-json/Cargo.toml @@ -38,6 +38,9 @@ python = [ "daft-dsl/python" ] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-json" diff --git a/src/daft-json/src/lib.rs b/src/daft-json/src/lib.rs index 1c56ba7167..6f935b8e4c 100644 --- a/src/daft-json/src/lib.rs +++ b/src/daft-json/src/lib.rs @@ -50,17 +50,17 @@ pub enum Error { } impl From for DaftError { - fn from(err: Error) -> DaftError { + fn from(err: Error) -> Self { match err { Error::IOError { source } => source.into(), - _ => DaftError::External(err.into()), + _ => Self::External(err.into()), } } } impl From for Error { fn from(err: daft_io::Error) -> Self { - Error::IOError { source: err } + Self::IOError { source: err } } } diff --git a/src/daft-local-execution/Cargo.toml b/src/daft-local-execution/Cargo.toml index 2dac516672..cd061c1c35 100644 --- a/src/daft-local-execution/Cargo.toml +++ b/src/daft-local-execution/Cargo.toml @@ -29,6 +29,9 @@ tracing = {workspace = true} [features] python = ["dep:pyo3", "common-daft-config/python", "common-file-formats/python", "common-error/python", "daft-dsl/python", "daft-io/python", "daft-micropartition/python", "daft-plan/python", "daft-scan/python", "common-display/python"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-local-execution" diff --git a/src/daft-local-execution/src/channel.rs b/src/daft-local-execution/src/channel.rs index bb22b9d4ea..4bc6fd1f5c 100644 --- a/src/daft-local-execution/src/channel.rs +++ b/src/daft-local-execution/src/channel.rs @@ -91,8 +91,8 @@ pub enum PipelineReceiver { impl PipelineReceiver { pub async fn recv(&mut self) -> Option { match self { - PipelineReceiver::InOrder(rr) => rr.recv().await, - PipelineReceiver::OutOfOrder(r) => r.recv().await, + Self::InOrder(rr) => rr.recv().await, + Self::OutOfOrder(r) => r.recv().await, } } } diff --git a/src/daft-local-execution/src/intermediate_ops/anti_semi_hash_join_probe.rs b/src/daft-local-execution/src/intermediate_ops/anti_semi_hash_join_probe.rs index 14bc949ff0..13e79b5ede 100644 --- a/src/daft-local-execution/src/intermediate_ops/anti_semi_hash_join_probe.rs +++ b/src/daft-local-execution/src/intermediate_ops/anti_semi_hash_join_probe.rs @@ -19,15 +19,15 @@ enum AntiSemiProbeState { impl AntiSemiProbeState { fn set_table(&mut self, table: &Arc) { - if let AntiSemiProbeState::Building = self { - *self = AntiSemiProbeState::ReadyToProbe(table.clone()); + if let Self::Building = self { + *self = Self::ReadyToProbe(table.clone()); } else { panic!("AntiSemiProbeState should only be in Building state when setting table") } } fn get_probeable(&self) -> &Arc { - if let AntiSemiProbeState::ReadyToProbe(probeable) = self { + if let Self::ReadyToProbe(probeable) = self { probeable } else { panic!("AntiSemiProbeState should only be in ReadyToProbe state when getting probeable") diff --git a/src/daft-local-execution/src/intermediate_ops/hash_join_probe.rs b/src/daft-local-execution/src/intermediate_ops/hash_join_probe.rs index f849064964..0a037dc6bb 100644 --- a/src/daft-local-execution/src/intermediate_ops/hash_join_probe.rs +++ b/src/daft-local-execution/src/intermediate_ops/hash_join_probe.rs @@ -21,15 +21,15 @@ enum HashJoinProbeState { impl HashJoinProbeState { fn set_table(&mut self, table: &Arc, tables: &Arc>) { - if let HashJoinProbeState::Building = self { - *self = HashJoinProbeState::ReadyToProbe(table.clone(), tables.clone()); + if let Self::Building = self { + *self = Self::ReadyToProbe(table.clone(), tables.clone()); } else { panic!("HashJoinProbeState should only be in Building state when setting table") } } fn get_probeable_and_table(&self) -> (&Arc, &Arc>) { - if let HashJoinProbeState::ReadyToProbe(probe_table, tables) = self { + if let Self::ReadyToProbe(probe_table, tables) = self { (probe_table, tables) } else { panic!("get_probeable_and_table can only be used during the ReadyToProbe Phase") diff --git a/src/daft-local-execution/src/lib.rs b/src/daft-local-execution/src/lib.rs index 732f306768..1356689a08 100644 --- a/src/daft-local-execution/src/lib.rs +++ b/src/daft-local-execution/src/lib.rs @@ -76,7 +76,7 @@ pub enum Error { } impl From for DaftError { - fn from(err: Error) -> DaftError { + fn from(err: Error) -> Self { match err { Error::PipelineCreationError { source, plan_name } => { log::error!("Error creating pipeline from {}", plan_name); @@ -86,7 +86,7 @@ impl From for DaftError { log::error!("Error when running pipeline node {}", node_name); source } - _ => DaftError::External(err.into()), + _ => Self::External(err.into()), } } } diff --git a/src/daft-local-execution/src/pipeline.rs b/src/daft-local-execution/src/pipeline.rs index 62935dfff0..9f7da9b915 100644 --- a/src/daft-local-execution/src/pipeline.rs +++ b/src/daft-local-execution/src/pipeline.rs @@ -42,33 +42,33 @@ pub enum PipelineResultType { impl From> for PipelineResultType { fn from(data: Arc) -> Self { - PipelineResultType::Data(data) + Self::Data(data) } } impl From<(Arc, Arc>)> for PipelineResultType { fn from((probe_table, tables): (Arc, Arc>)) -> Self { - PipelineResultType::ProbeTable(probe_table, tables) + Self::ProbeTable(probe_table, tables) } } impl PipelineResultType { pub fn as_data(&self) -> &Arc { match self { - PipelineResultType::Data(data) => data, + Self::Data(data) => data, _ => panic!("Expected data"), } } pub fn as_probe_table(&self) -> (&Arc, &Arc>) { match self { - PipelineResultType::ProbeTable(probe_table, tables) => (probe_table, tables), + Self::ProbeTable(probe_table, tables) => (probe_table, tables), _ => panic!("Expected probe table"), } } pub fn should_broadcast(&self) -> bool { - matches!(self, PipelineResultType::ProbeTable(_, _)) + matches!(self, Self::ProbeTable(_, _)) } } diff --git a/src/daft-local-execution/src/sinks/blocking_sink.rs b/src/daft-local-execution/src/sinks/blocking_sink.rs index 09e42ae81f..8894db503d 100644 --- a/src/daft-local-execution/src/sinks/blocking_sink.rs +++ b/src/daft-local-execution/src/sinks/blocking_sink.rs @@ -34,7 +34,7 @@ pub(crate) struct BlockingSinkNode { impl BlockingSinkNode { pub(crate) fn new(op: Box, child: Box) -> Self { let name = op.name(); - BlockingSinkNode { + Self { op: Arc::new(tokio::sync::Mutex::new(op)), name, child, diff --git a/src/daft-local-execution/src/sinks/streaming_sink.rs b/src/daft-local-execution/src/sinks/streaming_sink.rs index 1804a3e07e..5b188c4ad8 100644 --- a/src/daft-local-execution/src/sinks/streaming_sink.rs +++ b/src/daft-local-execution/src/sinks/streaming_sink.rs @@ -38,7 +38,7 @@ pub(crate) struct StreamingSinkNode { impl StreamingSinkNode { pub(crate) fn new(op: Box, children: Vec>) -> Self { let name = op.name(); - StreamingSinkNode { + Self { op: Arc::new(tokio::sync::Mutex::new(op)), name, children, diff --git a/src/daft-micropartition/Cargo.toml b/src/daft-micropartition/Cargo.toml index ee2322d8c9..2b8a405ef8 100644 --- a/src/daft-micropartition/Cargo.toml +++ b/src/daft-micropartition/Cargo.toml @@ -19,6 +19,9 @@ snafu = {workspace = true} [features] python = ["dep:pyo3", "common-error/python", "common-file-formats/python", "daft-core/python", "daft-dsl/python", "daft-table/python", "daft-io/python", "daft-parquet/python", "daft-scan/python", "daft-stats/python"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-micropartition" diff --git a/src/daft-micropartition/src/lib.rs b/src/daft-micropartition/src/lib.rs index 1933ab3b78..1a01f4e933 100644 --- a/src/daft-micropartition/src/lib.rs +++ b/src/daft-micropartition/src/lib.rs @@ -47,7 +47,7 @@ impl From for DaftError { fn from(value: Error) -> Self { match value { Error::DaftCoreCompute { source } => source, - _ => DaftError::External(value.into()), + _ => Self::External(value.into()), } } } diff --git a/src/daft-micropartition/src/micropartition.rs b/src/daft-micropartition/src/micropartition.rs index cc8439583c..c55a31ff64 100644 --- a/src/daft-micropartition/src/micropartition.rs +++ b/src/daft-micropartition/src/micropartition.rs @@ -38,7 +38,7 @@ pub(crate) enum TableState { impl Display for TableState { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - TableState::Unloaded(scan_task) => { + Self::Unloaded(scan_task) => { write!( f, "TableState: Unloaded. To load from: {:#?}", @@ -49,7 +49,7 @@ impl Display for TableState { .collect::>() ) } - TableState::Loaded(tables) => { + Self::Loaded(tables) => { writeln!(f, "TableState: Loaded. {} tables", tables.len())?; for tab in tables.iter() { writeln!(f, "{}", tab)?; @@ -524,7 +524,7 @@ impl MicroPartition { let statistics = statistics .cast_to_schema_with_fill(schema.clone(), fill_map.as_ref()) .expect("Statistics cannot be casted to schema"); - MicroPartition { + Self { schema, state: Mutex::new(TableState::Unloaded(scan_task)), metadata, @@ -557,7 +557,7 @@ impl MicroPartition { }); let tables_len_sum = tables.iter().map(|t| t.len()).sum(); - MicroPartition { + Self { schema, state: Mutex::new(TableState::Loaded(tables)), metadata: TableMetadata { diff --git a/src/daft-micropartition/src/ops/agg.rs b/src/daft-micropartition/src/ops/agg.rs index 108ea3e09d..18f265c3eb 100644 --- a/src/daft-micropartition/src/ops/agg.rs +++ b/src/daft-micropartition/src/ops/agg.rs @@ -15,7 +15,7 @@ impl MicroPartition { [] => { let empty_table = Table::empty(Some(self.schema.clone()))?; let agged = empty_table.agg(to_agg, group_by)?; - Ok(MicroPartition::new_loaded( + Ok(Self::new_loaded( agged.schema.clone(), vec![agged].into(), None, @@ -23,7 +23,7 @@ impl MicroPartition { } [t] => { let agged = t.agg(to_agg, group_by)?; - Ok(MicroPartition::new_loaded( + Ok(Self::new_loaded( agged.schema.clone(), vec![agged].into(), None, diff --git a/src/daft-micropartition/src/ops/cast_to_schema.rs b/src/daft-micropartition/src/ops/cast_to_schema.rs index e83c0774e1..1612a83eae 100644 --- a/src/daft-micropartition/src/ops/cast_to_schema.rs +++ b/src/daft-micropartition/src/ops/cast_to_schema.rs @@ -30,14 +30,14 @@ impl MicroPartition { scan_task.pushdowns.clone(), )) }; - Ok(MicroPartition::new_unloaded( + Ok(Self::new_unloaded( maybe_new_scan_task, self.metadata.clone(), pruned_statistics.expect("Unloaded MicroPartition should have statistics"), )) } // If Tables are already loaded, we map `Table::cast_to_schema` on each Table - TableState::Loaded(tables) => Ok(MicroPartition::new_loaded( + TableState::Loaded(tables) => Ok(Self::new_loaded( schema.clone(), Arc::new( tables diff --git a/src/daft-micropartition/src/ops/concat.rs b/src/daft-micropartition/src/ops/concat.rs index 904e68324a..682f75f4ce 100644 --- a/src/daft-micropartition/src/ops/concat.rs +++ b/src/daft-micropartition/src/ops/concat.rs @@ -47,7 +47,7 @@ impl MicroPartition { } let new_len = all_tables.iter().map(|t| t.len()).sum(); - Ok(MicroPartition { + Ok(Self { schema: mps.first().unwrap().schema.clone(), state: Mutex::new(TableState::Loaded(all_tables.into())), metadata: TableMetadata { length: new_len }, diff --git a/src/daft-micropartition/src/ops/eval_expressions.rs b/src/daft-micropartition/src/ops/eval_expressions.rs index 7a9b8bed0e..8ac5966a2e 100644 --- a/src/daft-micropartition/src/ops/eval_expressions.rs +++ b/src/daft-micropartition/src/ops/eval_expressions.rs @@ -45,7 +45,7 @@ impl MicroPartition { .map(|s| s.eval_expression_list(exprs, &expected_schema)) .transpose()?; - Ok(MicroPartition::new_loaded( + Ok(Self::new_loaded( expected_schema.into(), Arc::new(evaluated_tables), eval_stats, @@ -85,7 +85,7 @@ impl MicroPartition { } } - Ok(MicroPartition::new_loaded( + Ok(Self::new_loaded( Arc::new(expected_schema), Arc::new(evaluated_tables), eval_stats, diff --git a/src/daft-micropartition/src/ops/join.rs b/src/daft-micropartition/src/ops/join.rs index aef268d669..bac67f12db 100644 --- a/src/daft-micropartition/src/ops/join.rs +++ b/src/daft-micropartition/src/ops/join.rs @@ -70,7 +70,7 @@ impl MicroPartition { ([], _) | (_, []) => Ok(Self::empty(Some(join_schema))), ([lt], [rt]) => { let joined_table = table_join(lt, rt, left_on, right_on, how)?; - Ok(MicroPartition::new_loaded( + Ok(Self::new_loaded( join_schema, vec![joined_table].into(), None, diff --git a/src/daft-micropartition/src/ops/partition.rs b/src/daft-micropartition/src/ops/partition.rs index d0358ff178..8ca24e276f 100644 --- a/src/daft-micropartition/src/ops/partition.rs +++ b/src/daft-micropartition/src/ops/partition.rs @@ -24,20 +24,11 @@ fn transpose2(v: Vec>) -> Vec> { } impl MicroPartition { - fn vec_part_tables_to_mps( - &self, - part_tables: Vec>, - ) -> DaftResult> { + fn vec_part_tables_to_mps(&self, part_tables: Vec>) -> DaftResult> { let part_tables = transpose2(part_tables); Ok(part_tables .into_iter() - .map(|v| { - MicroPartition::new_loaded( - self.schema.clone(), - Arc::new(v), - self.statistics.clone(), - ) - }) + .map(|v| Self::new_loaded(self.schema.clone(), Arc::new(v), self.statistics.clone())) .collect()) } @@ -127,11 +118,10 @@ impl MicroPartition { let mps = tables .into_iter() - .map(|t| MicroPartition::new_loaded(self.schema.clone(), Arc::new(vec![t]), None)) + .map(|t| Self::new_loaded(self.schema.clone(), Arc::new(vec![t]), None)) .collect::>(); - let values = - MicroPartition::new_loaded(values.schema.clone(), Arc::new(vec![values]), None); + let values = Self::new_loaded(values.schema.clone(), Arc::new(vec![values]), None); Ok((mps, values)) } diff --git a/src/daft-micropartition/src/ops/pivot.rs b/src/daft-micropartition/src/ops/pivot.rs index 90e6fa110e..3a4ad964b9 100644 --- a/src/daft-micropartition/src/ops/pivot.rs +++ b/src/daft-micropartition/src/ops/pivot.rs @@ -25,7 +25,7 @@ impl MicroPartition { } [t] => { let pivoted = t.pivot(group_by, pivot_col, values_col, names)?; - Ok(MicroPartition::new_loaded( + Ok(Self::new_loaded( pivoted.schema.clone(), vec![pivoted].into(), None, diff --git a/src/daft-micropartition/src/ops/slice.rs b/src/daft-micropartition/src/ops/slice.rs index fa6cb858f1..11c5f37dec 100644 --- a/src/daft-micropartition/src/ops/slice.rs +++ b/src/daft-micropartition/src/ops/slice.rs @@ -44,7 +44,7 @@ impl MicroPartition { } } - Ok(MicroPartition::new_loaded( + Ok(Self::new_loaded( self.schema.clone(), slices_tables.into(), self.statistics.clone(), diff --git a/src/daft-micropartition/src/python.rs b/src/daft-micropartition/src/python.rs index 03423d1a37..2060b3de30 100644 --- a/src/daft-micropartition/src/python.rs +++ b/src/daft-micropartition/src/python.rs @@ -890,7 +890,7 @@ impl From for PyMicroPartition { impl From> for PyMicroPartition { fn from(value: Arc) -> Self { - PyMicroPartition { inner: value } + Self { inner: value } } } diff --git a/src/daft-minhash/Cargo.toml b/src/daft-minhash/Cargo.toml index d058339686..b902171b03 100644 --- a/src/daft-minhash/Cargo.toml +++ b/src/daft-minhash/Cargo.toml @@ -3,6 +3,9 @@ common-error = {path = "../common/error", default-features = false} fastrand = "2.1.0" mur3 = "0.1.0" +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-minhash" diff --git a/src/daft-parquet/Cargo.toml b/src/daft-parquet/Cargo.toml index 3e1a4876b8..f03726b12c 100644 --- a/src/daft-parquet/Cargo.toml +++ b/src/daft-parquet/Cargo.toml @@ -30,6 +30,9 @@ bincode = {workspace = true} [features] python = ["dep:pyo3", "common-error/python", "daft-core/python", "daft-io/python", "daft-table/python", "daft-stats/python", "daft-dsl/python", "common-arrow-ffi/python"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-parquet" diff --git a/src/daft-parquet/src/file.rs b/src/daft-parquet/src/file.rs index 3b84579b6d..818a8f5de1 100644 --- a/src/daft-parquet/src/file.rs +++ b/src/daft-parquet/src/file.rs @@ -67,7 +67,7 @@ where S: futures::Stream> + std::marker::Unpin, { pub fn new(src: S, handle: tokio::runtime::Handle) -> Self { - StreamIterator { + Self { curr: None, src: tokio::sync::Mutex::new(src), handle, @@ -204,7 +204,7 @@ impl ParquetReaderBuilder { .await?; let metadata = read_parquet_metadata(uri, size, io_client, io_stats, field_id_mapping).await?; - Ok(ParquetReaderBuilder { + Ok(Self { uri: uri.into(), metadata, selected_columns: None, @@ -324,7 +324,7 @@ impl ParquetFileReader { row_ranges: Vec, chunk_size: Option, ) -> super::Result { - Ok(ParquetFileReader { + Ok(Self { uri, metadata: Arc::new(metadata), arrow_schema: arrow_schema.into(), diff --git a/src/daft-parquet/src/lib.rs b/src/daft-parquet/src/lib.rs index d1057e95f7..db1567d02e 100644 --- a/src/daft-parquet/src/lib.rs +++ b/src/daft-parquet/src/lib.rs @@ -203,18 +203,18 @@ pub enum Error { } impl From for DaftError { - fn from(err: Error) -> DaftError { + fn from(err: Error) -> Self { match err { Error::DaftIOError { source } => source.into(), - Error::FileReadTimeout { .. } => DaftError::ReadTimeout(err.into()), - _ => DaftError::External(err.into()), + Error::FileReadTimeout { .. } => Self::ReadTimeout(err.into()), + _ => Self::External(err.into()), } } } impl From for Error { fn from(err: daft_io::Error) -> Self { - Error::DaftIOError { source: err } + Self::DaftIOError { source: err } } } diff --git a/src/daft-parquet/src/metadata.rs b/src/daft-parquet/src/metadata.rs index dd7c186145..c769262d8e 100644 --- a/src/daft-parquet/src/metadata.rs +++ b/src/daft-parquet/src/metadata.rs @@ -26,7 +26,7 @@ impl TreeNode for ParquetTypeWrapper { ParquetType::GroupType { fields, .. } => { for child in fields.iter() { // TODO: Expensive clone here because of ParquetTypeWrapper type, can we get rid of this? - match op(&ParquetTypeWrapper(child.clone()))? { + match op(&Self(child.clone()))? { TreeNodeRecursion::Continue => {} TreeNodeRecursion::Jump => return Ok(TreeNodeRecursion::Continue), TreeNodeRecursion::Stop => return Ok(TreeNodeRecursion::Stop), @@ -50,19 +50,15 @@ impl TreeNode for ParquetTypeWrapper { logical_type, converted_type, fields, - } => Ok(Transformed::yes(ParquetTypeWrapper( - ParquetType::GroupType { - fields: fields - .into_iter() - .map(|child| { - transform(ParquetTypeWrapper(child)).map(|wrapper| wrapper.data.0) - }) - .collect::>>()?, - field_info, - logical_type, - converted_type, - }, - ))), + } => Ok(Transformed::yes(Self(ParquetType::GroupType { + fields: fields + .into_iter() + .map(|child| transform(Self(child)).map(|wrapper| wrapper.data.0)) + .collect::>>()?, + field_info, + logical_type, + converted_type, + }))), } } } diff --git a/src/daft-parquet/src/read.rs b/src/daft-parquet/src/read.rs index 38974e2b01..8e9cda9d9b 100644 --- a/src/daft-parquet/src/read.rs +++ b/src/daft-parquet/src/read.rs @@ -29,10 +29,10 @@ pub struct ParquetSchemaInferenceOptions { impl ParquetSchemaInferenceOptions { pub fn new(coerce_int96_timestamp_unit: Option) -> Self { - let default: ParquetSchemaInferenceOptions = Default::default(); + let default: Self = Default::default(); let coerce_int96_timestamp_unit = coerce_int96_timestamp_unit.unwrap_or(default.coerce_int96_timestamp_unit); - ParquetSchemaInferenceOptions { + Self { coerce_int96_timestamp_unit, } } @@ -40,7 +40,7 @@ impl ParquetSchemaInferenceOptions { impl Default for ParquetSchemaInferenceOptions { fn default() -> Self { - ParquetSchemaInferenceOptions { + Self { coerce_int96_timestamp_unit: TimeUnit::Nanoseconds, } } @@ -50,7 +50,7 @@ impl From for arrow2::io::parquet::read::schema::SchemaInferenceOptions { fn from(value: ParquetSchemaInferenceOptions) -> Self { - arrow2::io::parquet::read::schema::SchemaInferenceOptions { + Self { int96_coerce_to_timeunit: value.coerce_int96_timestamp_unit.to_arrow(), } } diff --git a/src/daft-parquet/src/read_planner.rs b/src/daft-parquet/src/read_planner.rs index 6337cd3330..aca3b3c870 100644 --- a/src/daft-parquet/src/read_planner.rs +++ b/src/daft-parquet/src/read_planner.rs @@ -132,7 +132,7 @@ pub(crate) struct ReadPlanner { impl ReadPlanner { pub fn new(source: &str) -> Self { - ReadPlanner { + Self { source: source.into(), ranges: vec![], passes: vec![], diff --git a/src/daft-parquet/src/statistics/mod.rs b/src/daft-parquet/src/statistics/mod.rs index 82dce1a5d8..2827c84355 100644 --- a/src/daft-parquet/src/statistics/mod.rs +++ b/src/daft-parquet/src/statistics/mod.rs @@ -26,7 +26,7 @@ pub(super) enum Error { impl From for Error { fn from(value: daft_stats::Error) -> Self { match value { - daft_stats::Error::DaftCoreCompute { source } => Error::DaftCoreCompute { source }, + daft_stats::Error::DaftCoreCompute { source } => Self::DaftCoreCompute { source }, _ => Self::DaftStats { source: value }, } } @@ -38,7 +38,7 @@ impl From for DaftError { fn from(value: Error) -> Self { match value { Error::DaftCoreCompute { source } => source, - _ => DaftError::External(value.into()), + _ => Self::External(value.into()), } } } @@ -47,6 +47,6 @@ pub(super) struct Wrap(T); impl From for Wrap { fn from(value: T) -> Self { - Wrap(value) + Self(value) } } diff --git a/src/daft-physical-plan/Cargo.toml b/src/daft-physical-plan/Cargo.toml index 9ba603ab52..778b8b8560 100644 --- a/src/daft-physical-plan/Cargo.toml +++ b/src/daft-physical-plan/Cargo.toml @@ -8,6 +8,9 @@ daft-scan = {path = "../daft-scan", default-features = false} log = {workspace = true} strum = {version = "0.26", features = ["derive"]} +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-physical-plan" diff --git a/src/daft-physical-plan/src/local_plan.rs b/src/daft-physical-plan/src/local_plan.rs index 30bc879a2f..548e6505d8 100644 --- a/src/daft-physical-plan/src/local_plan.rs +++ b/src/daft-physical-plan/src/local_plan.rs @@ -56,7 +56,7 @@ impl LocalPhysicalPlan { } pub(crate) fn in_memory_scan(in_memory_info: InMemoryInfo) -> LocalPhysicalPlanRef { - LocalPhysicalPlan::InMemoryScan(InMemoryScan { + Self::InMemoryScan(InMemoryScan { info: in_memory_info, plan_stats: PlanStats {}, }) @@ -67,7 +67,7 @@ impl LocalPhysicalPlan { scan_tasks: Vec, schema: SchemaRef, ) -> LocalPhysicalPlanRef { - LocalPhysicalPlan::PhysicalScan(PhysicalScan { + Self::PhysicalScan(PhysicalScan { scan_tasks, schema, plan_stats: PlanStats {}, @@ -77,7 +77,7 @@ impl LocalPhysicalPlan { pub(crate) fn filter(input: LocalPhysicalPlanRef, predicate: ExprRef) -> LocalPhysicalPlanRef { let schema = input.schema().clone(); - LocalPhysicalPlan::Filter(Filter { + Self::Filter(Filter { input, predicate, schema, @@ -88,7 +88,7 @@ impl LocalPhysicalPlan { pub(crate) fn limit(input: LocalPhysicalPlanRef, num_rows: i64) -> LocalPhysicalPlanRef { let schema = input.schema().clone(); - LocalPhysicalPlan::Limit(Limit { + Self::Limit(Limit { input, num_rows, schema, @@ -102,7 +102,7 @@ impl LocalPhysicalPlan { projection: Vec, schema: SchemaRef, ) -> LocalPhysicalPlanRef { - LocalPhysicalPlan::Project(Project { + Self::Project(Project { input, projection, schema, @@ -116,7 +116,7 @@ impl LocalPhysicalPlan { aggregations: Vec, schema: SchemaRef, ) -> LocalPhysicalPlanRef { - LocalPhysicalPlan::UnGroupedAggregate(UnGroupedAggregate { + Self::UnGroupedAggregate(UnGroupedAggregate { input, aggregations, schema, @@ -131,7 +131,7 @@ impl LocalPhysicalPlan { group_by: Vec, schema: SchemaRef, ) -> LocalPhysicalPlanRef { - LocalPhysicalPlan::HashAggregate(HashAggregate { + Self::HashAggregate(HashAggregate { input, aggregations, group_by, @@ -147,7 +147,7 @@ impl LocalPhysicalPlan { descending: Vec, ) -> LocalPhysicalPlanRef { let schema = input.schema().clone(); - LocalPhysicalPlan::Sort(Sort { + Self::Sort(Sort { input, sort_by, descending, @@ -165,7 +165,7 @@ impl LocalPhysicalPlan { join_type: JoinType, schema: SchemaRef, ) -> LocalPhysicalPlanRef { - LocalPhysicalPlan::HashJoin(HashJoin { + Self::HashJoin(HashJoin { left, right, left_on, @@ -181,7 +181,7 @@ impl LocalPhysicalPlan { other: LocalPhysicalPlanRef, ) -> LocalPhysicalPlanRef { let schema = input.schema().clone(); - LocalPhysicalPlan::Concat(Concat { + Self::Concat(Concat { input, other, schema, @@ -192,16 +192,16 @@ impl LocalPhysicalPlan { pub fn schema(&self) -> &SchemaRef { match self { - LocalPhysicalPlan::PhysicalScan(PhysicalScan { schema, .. }) - | LocalPhysicalPlan::Filter(Filter { schema, .. }) - | LocalPhysicalPlan::Limit(Limit { schema, .. }) - | LocalPhysicalPlan::Project(Project { schema, .. }) - | LocalPhysicalPlan::UnGroupedAggregate(UnGroupedAggregate { schema, .. }) - | LocalPhysicalPlan::HashAggregate(HashAggregate { schema, .. }) - | LocalPhysicalPlan::Sort(Sort { schema, .. }) - | LocalPhysicalPlan::HashJoin(HashJoin { schema, .. }) - | LocalPhysicalPlan::Concat(Concat { schema, .. }) => schema, - LocalPhysicalPlan::InMemoryScan(InMemoryScan { info, .. }) => &info.source_schema, + Self::PhysicalScan(PhysicalScan { schema, .. }) + | Self::Filter(Filter { schema, .. }) + | Self::Limit(Limit { schema, .. }) + | Self::Project(Project { schema, .. }) + | Self::UnGroupedAggregate(UnGroupedAggregate { schema, .. }) + | Self::HashAggregate(HashAggregate { schema, .. }) + | Self::Sort(Sort { schema, .. }) + | Self::HashJoin(HashJoin { schema, .. }) + | Self::Concat(Concat { schema, .. }) => schema, + Self::InMemoryScan(InMemoryScan { info, .. }) => &info.source_schema, _ => todo!("{:?}", self), } } diff --git a/src/daft-plan/Cargo.toml b/src/daft-plan/Cargo.toml index d2cd422dba..62c764616f 100644 --- a/src/daft-plan/Cargo.toml +++ b/src/daft-plan/Cargo.toml @@ -57,6 +57,9 @@ python = [ "daft-schema/python" ] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-plan" diff --git a/src/daft-plan/src/builder.rs b/src/daft-plan/src/builder.rs index 0098d72405..a10b11953b 100644 --- a/src/daft-plan/src/builder.rs +++ b/src/daft-plan/src/builder.rs @@ -53,8 +53,8 @@ impl LogicalPlanBuilder { } } -impl From<&LogicalPlanBuilder> for LogicalPlanBuilder { - fn from(builder: &LogicalPlanBuilder) -> Self { +impl From<&Self> for LogicalPlanBuilder { + fn from(builder: &Self) -> Self { Self { plan: builder.plan.clone(), config: builder.config.clone(), @@ -105,7 +105,7 @@ impl LogicalPlanBuilder { )); let logical_plan: LogicalPlan = logical_ops::Source::new(schema.clone(), source_info.into()).into(); - Ok(LogicalPlanBuilder::new(logical_plan.into(), None)) + Ok(Self::new(logical_plan.into(), None)) } pub fn table_scan( @@ -139,7 +139,7 @@ impl LogicalPlanBuilder { }; let logical_plan: LogicalPlan = logical_ops::Source::new(output_schema, source_info.into()).into(); - Ok(LogicalPlanBuilder::new(logical_plan.into(), None)) + Ok(Self::new(logical_plan.into(), None)) } pub fn select(&self, to_select: Vec) -> DaftResult { diff --git a/src/daft-plan/src/display.rs b/src/daft-plan/src/display.rs index a89276cc04..76ba0e599a 100644 --- a/src/daft-plan/src/display.rs +++ b/src/daft-plan/src/display.rs @@ -22,40 +22,40 @@ impl TreeDisplay for crate::LogicalPlan { impl TreeDisplay for crate::physical_plan::PhysicalPlan { fn display_as(&self, level: DisplayLevel) -> String { match self { - crate::PhysicalPlan::InMemoryScan(scan) => scan.display_as(level), - crate::PhysicalPlan::TabularScan(scan) => scan.display_as(level), - crate::PhysicalPlan::EmptyScan(scan) => scan.display_as(level), - crate::PhysicalPlan::Project(p) => p.display_as(level), - crate::PhysicalPlan::ActorPoolProject(p) => p.display_as(level), - crate::PhysicalPlan::Filter(f) => f.display_as(level), - crate::PhysicalPlan::Limit(limit) => limit.display_as(level), - crate::PhysicalPlan::Explode(explode) => explode.display_as(level), - crate::PhysicalPlan::Unpivot(unpivot) => unpivot.display_as(level), - crate::PhysicalPlan::Sort(sort) => sort.display_as(level), - crate::PhysicalPlan::Split(split) => split.display_as(level), - crate::PhysicalPlan::Sample(sample) => sample.display_as(level), - crate::PhysicalPlan::MonotonicallyIncreasingId(id) => id.display_as(level), - crate::PhysicalPlan::Coalesce(coalesce) => coalesce.display_as(level), - crate::PhysicalPlan::Flatten(flatten) => flatten.display_as(level), - crate::PhysicalPlan::FanoutRandom(fanout) => fanout.display_as(level), - crate::PhysicalPlan::FanoutByHash(fanout) => fanout.display_as(level), - crate::PhysicalPlan::FanoutByRange(fanout) => fanout.display_as(level), - crate::PhysicalPlan::ReduceMerge(reduce) => reduce.display_as(level), - crate::PhysicalPlan::Aggregate(aggr) => aggr.display_as(level), - crate::PhysicalPlan::Pivot(pivot) => pivot.display_as(level), - crate::PhysicalPlan::Concat(concat) => concat.display_as(level), - crate::PhysicalPlan::HashJoin(join) => join.display_as(level), - crate::PhysicalPlan::SortMergeJoin(join) => join.display_as(level), - crate::PhysicalPlan::BroadcastJoin(join) => join.display_as(level), - crate::PhysicalPlan::TabularWriteParquet(write) => write.display_as(level), - crate::PhysicalPlan::TabularWriteJson(write) => write.display_as(level), - crate::PhysicalPlan::TabularWriteCsv(write) => write.display_as(level), + Self::InMemoryScan(scan) => scan.display_as(level), + Self::TabularScan(scan) => scan.display_as(level), + Self::EmptyScan(scan) => scan.display_as(level), + Self::Project(p) => p.display_as(level), + Self::ActorPoolProject(p) => p.display_as(level), + Self::Filter(f) => f.display_as(level), + Self::Limit(limit) => limit.display_as(level), + Self::Explode(explode) => explode.display_as(level), + Self::Unpivot(unpivot) => unpivot.display_as(level), + Self::Sort(sort) => sort.display_as(level), + Self::Split(split) => split.display_as(level), + Self::Sample(sample) => sample.display_as(level), + Self::MonotonicallyIncreasingId(id) => id.display_as(level), + Self::Coalesce(coalesce) => coalesce.display_as(level), + Self::Flatten(flatten) => flatten.display_as(level), + Self::FanoutRandom(fanout) => fanout.display_as(level), + Self::FanoutByHash(fanout) => fanout.display_as(level), + Self::FanoutByRange(fanout) => fanout.display_as(level), + Self::ReduceMerge(reduce) => reduce.display_as(level), + Self::Aggregate(aggr) => aggr.display_as(level), + Self::Pivot(pivot) => pivot.display_as(level), + Self::Concat(concat) => concat.display_as(level), + Self::HashJoin(join) => join.display_as(level), + Self::SortMergeJoin(join) => join.display_as(level), + Self::BroadcastJoin(join) => join.display_as(level), + Self::TabularWriteParquet(write) => write.display_as(level), + Self::TabularWriteJson(write) => write.display_as(level), + Self::TabularWriteCsv(write) => write.display_as(level), #[cfg(feature = "python")] - crate::PhysicalPlan::IcebergWrite(write) => write.display_as(level), + Self::IcebergWrite(write) => write.display_as(level), #[cfg(feature = "python")] - crate::PhysicalPlan::DeltaLakeWrite(write) => write.display_as(level), + Self::DeltaLakeWrite(write) => write.display_as(level), #[cfg(feature = "python")] - crate::PhysicalPlan::LanceWrite(write) => write.display_as(level), + Self::LanceWrite(write) => write.display_as(level), } } diff --git a/src/daft-plan/src/logical_ops/actor_pool_project.rs b/src/daft-plan/src/logical_ops/actor_pool_project.rs index b353636240..97b511b238 100644 --- a/src/daft-plan/src/logical_ops/actor_pool_project.rs +++ b/src/daft-plan/src/logical_ops/actor_pool_project.rs @@ -58,7 +58,7 @@ impl ActorPoolProject { let projected_schema = Schema::new(fields).context(CreationSnafu)?.into(); - Ok(ActorPoolProject { + Ok(Self { input, projection, projected_schema, diff --git a/src/daft-plan/src/logical_optimization/optimizer.rs b/src/daft-plan/src/logical_optimization/optimizer.rs index 5e065e9213..535eb16448 100644 --- a/src/daft-plan/src/logical_optimization/optimizer.rs +++ b/src/daft-plan/src/logical_optimization/optimizer.rs @@ -23,7 +23,7 @@ pub struct OptimizerConfig { impl OptimizerConfig { fn new(max_optimizer_passes: usize, enable_actor_pool_projections: bool) -> Self { - OptimizerConfig { + Self { default_max_optimizer_passes: max_optimizer_passes, enable_actor_pool_projections, } @@ -33,7 +33,7 @@ impl OptimizerConfig { impl Default for OptimizerConfig { fn default() -> Self { // Default to a max of 5 optimizer passes for a given batch. - OptimizerConfig::new(5, false) + Self::new(5, false) } } diff --git a/src/daft-plan/src/logical_plan.rs b/src/daft-plan/src/logical_plan.rs index 2cb2ca4834..849a0402f8 100644 --- a/src/daft-plan/src/logical_plan.rs +++ b/src/daft-plan/src/logical_plan.rs @@ -214,7 +214,7 @@ impl LogicalPlan { } } - pub fn children(&self) -> Vec<&LogicalPlan> { + pub fn children(&self) -> Vec<&Self> { match self { Self::Source(..) => vec![], Self::Project(Project { input, .. }) => vec![input], @@ -238,7 +238,7 @@ impl LogicalPlan { } } - pub fn with_new_children(&self, children: &[Arc]) -> LogicalPlan { + pub fn with_new_children(&self, children: &[Arc]) -> Self { match children { [input] => match self { Self::Source(_) => panic!("Source nodes don't have children, with_new_children() should never be called for Source ops"), @@ -309,8 +309,8 @@ pub(crate) enum Error { pub(crate) type Result = std::result::Result; impl From for DaftError { - fn from(err: Error) -> DaftError { - DaftError::External(err.into()) + fn from(err: Error) -> Self { + Self::External(err.into()) } } diff --git a/src/daft-plan/src/partitioning.rs b/src/daft-plan/src/partitioning.rs index a460437a5a..9f5be356be 100644 --- a/src/daft-plan/src/partitioning.rs +++ b/src/daft-plan/src/partitioning.rs @@ -428,6 +428,6 @@ impl UnknownClusteringConfig { impl Default for UnknownClusteringConfig { fn default() -> Self { - UnknownClusteringConfig::new(1) + Self::new(1) } } diff --git a/src/daft-plan/src/physical_ops/actor_pool_project.rs b/src/daft-plan/src/physical_ops/actor_pool_project.rs index ae55af05b6..bca86ebf08 100644 --- a/src/daft-plan/src/physical_ops/actor_pool_project.rs +++ b/src/daft-plan/src/physical_ops/actor_pool_project.rs @@ -53,7 +53,7 @@ impl ActorPoolProject { return Err(DaftError::InternalError(format!("Expected ActorPoolProject to have exactly 1 stateful UDF expression but found: {num_stateful_udf_exprs}"))); } - Ok(ActorPoolProject { + Ok(Self { input, projection, clustering_spec, diff --git a/src/daft-plan/src/physical_optimization/optimizer.rs b/src/daft-plan/src/physical_optimization/optimizer.rs index 51e2b7ade3..58e2d43e53 100644 --- a/src/daft-plan/src/physical_optimization/optimizer.rs +++ b/src/daft-plan/src/physical_optimization/optimizer.rs @@ -16,13 +16,13 @@ pub struct PhysicalOptimizerConfig { impl PhysicalOptimizerConfig { #[allow(dead_code)] // used in test pub fn new(max_passes: usize) -> Self { - PhysicalOptimizerConfig { max_passes } + Self { max_passes } } } impl Default for PhysicalOptimizerConfig { fn default() -> Self { - PhysicalOptimizerConfig { max_passes: 5 } + Self { max_passes: 5 } } } @@ -37,7 +37,7 @@ impl PhysicalOptimizer { rule_batches: Vec, config: PhysicalOptimizerConfig, ) -> Self { - PhysicalOptimizer { + Self { rule_batches, config, } @@ -53,7 +53,7 @@ impl PhysicalOptimizer { impl Default for PhysicalOptimizer { fn default() -> Self { - PhysicalOptimizer { + Self { rule_batches: vec![PhysicalOptimizerRuleBatch::new( vec![ Box::new(ReorderPartitionKeys {}), diff --git a/src/daft-plan/src/physical_optimization/rules/rule.rs b/src/daft-plan/src/physical_optimization/rules/rule.rs index 8d20891424..ec49563d04 100644 --- a/src/daft-plan/src/physical_optimization/rules/rule.rs +++ b/src/daft-plan/src/physical_optimization/rules/rule.rs @@ -30,7 +30,7 @@ impl PhysicalOptimizerRuleBatch { rules: Vec>, strategy: PhysicalRuleExecutionStrategy, ) -> Self { - PhysicalOptimizerRuleBatch { rules, strategy } + Self { rules, strategy } } fn optimize_once(&self, plan: PhysicalPlanRef) -> DaftResult> { diff --git a/src/daft-plan/src/physical_plan.rs b/src/daft-plan/src/physical_plan.rs index 6302612e3d..615d656b92 100644 --- a/src/daft-plan/src/physical_plan.rs +++ b/src/daft-plan/src/physical_plan.rs @@ -62,7 +62,7 @@ pub struct ApproxStats { impl ApproxStats { fn empty() -> Self { - ApproxStats { + Self { lower_bound_rows: 0, upper_bound_rows: None, lower_bound_bytes: 0, @@ -70,7 +70,7 @@ impl ApproxStats { } } fn apply usize>(&self, f: F) -> Self { - ApproxStats { + Self { lower_bound_rows: f(self.lower_bound_rows), upper_bound_rows: self.upper_bound_rows.map(&f), lower_bound_bytes: f(self.lower_bound_rows), @@ -411,7 +411,7 @@ impl PhysicalPlan { } } - pub fn children(&self) -> Vec<&PhysicalPlan> { + pub fn children(&self) -> Vec<&Self> { match self { Self::InMemoryScan(..) => vec![], Self::TabularScan(..) | Self::EmptyScan(..) => vec![], @@ -457,7 +457,7 @@ impl PhysicalPlan { } } - pub fn with_new_children(&self, children: &[PhysicalPlanRef]) -> PhysicalPlan { + pub fn with_new_children(&self, children: &[PhysicalPlanRef]) -> Self { match children { [input] => match self { Self::InMemoryScan(..) => panic!("Source nodes don't have children, with_new_children() should never be called for source ops"), diff --git a/src/daft-plan/src/physical_planner/planner.rs b/src/daft-plan/src/physical_planner/planner.rs index 685f8dd028..5071c1bce2 100644 --- a/src/daft-plan/src/physical_planner/planner.rs +++ b/src/daft-plan/src/physical_planner/planner.rs @@ -258,18 +258,18 @@ pub enum QueryStageOutput { impl QueryStageOutput { pub fn unwrap(self) -> (Option, PhysicalPlanRef) { match self { - QueryStageOutput::Partial { + Self::Partial { physical_plan, source_id, } => (Some(source_id), physical_plan), - QueryStageOutput::Final { physical_plan } => (None, physical_plan), + Self::Final { physical_plan } => (None, physical_plan), } } pub fn source_id(&self) -> Option { match self { - QueryStageOutput::Partial { source_id, .. } => Some(*source_id), - QueryStageOutput::Final { .. } => None, + Self::Partial { source_id, .. } => Some(*source_id), + Self::Final { .. } => None, } } } @@ -293,7 +293,7 @@ pub struct AdaptivePlanner { impl AdaptivePlanner { pub fn new(logical_plan: LogicalPlanRef, cfg: Arc) -> Self { - AdaptivePlanner { + Self { logical_plan, cfg, status: AdaptivePlannerStatus::Ready, diff --git a/src/daft-plan/src/source_info/mod.rs b/src/daft-plan/src/source_info/mod.rs index ef5582ca3a..360b4f7d7c 100644 --- a/src/daft-plan/src/source_info/mod.rs +++ b/src/daft-plan/src/source_info/mod.rs @@ -87,7 +87,7 @@ pub struct PlaceHolderInfo { impl PlaceHolderInfo { pub fn new(source_schema: SchemaRef, clustering_spec: ClusteringSpecRef) -> Self { - PlaceHolderInfo { + Self { source_schema, clustering_spec, source_id: PLACEHOLDER_ID_COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst), diff --git a/src/daft-scan/Cargo.toml b/src/daft-scan/Cargo.toml index 7f9added6d..d4c5e5a230 100644 --- a/src/daft-scan/Cargo.toml +++ b/src/daft-scan/Cargo.toml @@ -24,6 +24,9 @@ snafu = {workspace = true} [features] python = ["dep:pyo3", "common-error/python", "daft-core/python", "daft-dsl/python", "daft-table/python", "daft-stats/python", "common-file-formats/python", "common-io-config/python", "common-daft-config/python", "daft-schema/python"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-scan" diff --git a/src/daft-scan/src/glob.rs b/src/daft-scan/src/glob.rs index 5383235c67..6563898c4d 100644 --- a/src/daft-scan/src/glob.rs +++ b/src/daft-scan/src/glob.rs @@ -49,7 +49,7 @@ enum Error { impl From for DaftError { fn from(value: Error) -> Self { match &value { - Error::GlobNoMatch { glob_path } => DaftError::FileNotFound { + Error::GlobNoMatch { glob_path } => Self::FileNotFound { path: glob_path.clone(), source: Box::new(value), }, diff --git a/src/daft-scan/src/lib.rs b/src/daft-scan/src/lib.rs index f8c9781be4..10cc0c6804 100644 --- a/src/daft-scan/src/lib.rs +++ b/src/daft-scan/src/lib.rs @@ -85,7 +85,7 @@ pub enum Error { impl From for DaftError { fn from(value: Error) -> Self { - DaftError::External(value.into()) + Self::External(value.into()) } } @@ -409,7 +409,7 @@ impl ScanTask { } } - pub fn merge(sc1: &ScanTask, sc2: &ScanTask) -> Result { + pub fn merge(sc1: &Self, sc2: &Self) -> Result { if sc1.partition_spec() != sc2.partition_spec() { return Err(Error::DifferingPartitionSpecsInScanTaskMerge { ps1: sc1.partition_spec().cloned(), @@ -440,7 +440,7 @@ impl ScanTask { p2: sc2.pushdowns.clone(), }); } - Ok(ScanTask::new( + Ok(Self::new( sc1.sources .clone() .into_iter() @@ -676,7 +676,7 @@ impl PartitionField { match (&source_field, &transform) { (Some(_), Some(_)) => { // TODO ADD VALIDATION OF TRANSFORM based on types - Ok(PartitionField { + Ok(Self { field, source_field, transform, @@ -686,7 +686,7 @@ impl PartitionField { "transform set in PartitionField: {} but source_field not set", tfm ))), - _ => Ok(PartitionField { + _ => Ok(Self { field, source_field, transform, @@ -787,8 +787,8 @@ impl Hash for ScanOperatorRef { } } -impl PartialEq for ScanOperatorRef { - fn eq(&self, other: &ScanOperatorRef) -> bool { +impl PartialEq for ScanOperatorRef { + fn eq(&self, other: &Self) -> bool { Arc::ptr_eq(&self.0, &other.0) } } @@ -1014,7 +1014,7 @@ mod test { let mut sources: Vec = Vec::new(); for _ in 0..num_sources { - sources.push(format!("../../tests/assets/parquet-data/mvp.parquet")); + sources.push("../../tests/assets/parquet-data/mvp.parquet".to_string()); } let glob_scan_operator: GlobScanOperator = GlobScanOperator::try_new( diff --git a/src/daft-scan/src/python.rs b/src/daft-scan/src/python.rs index af5b23a1db..fac37ccb48 100644 --- a/src/daft-scan/src/python.rs +++ b/src/daft-scan/src/python.rs @@ -93,7 +93,7 @@ pub mod pylib { file_format_config.into(), storage_config.into(), )); - Ok(ScanOperatorHandle { + Ok(Self { scan_op: ScanOperatorRef(operator), }) }) @@ -116,7 +116,7 @@ pub mod pylib { infer_schema, schema.map(|s| s.schema), )?); - Ok(ScanOperatorHandle { + Ok(Self { scan_op: ScanOperatorRef(operator), }) }) @@ -127,7 +127,7 @@ pub mod pylib { let scan_op = ScanOperatorRef(Arc::new(PythonScanOperatorBridge::from_python_abc( py_scan, py, )?)); - Ok(ScanOperatorHandle { scan_op }) + Ok(Self { scan_op }) } } #[pyclass(module = "daft.daft")] @@ -349,7 +349,7 @@ pub mod pylib { storage_config.into(), pushdowns.map(|p| p.0.as_ref().clone()).unwrap_or_default(), ); - Ok(Some(PyScanTask(scan_task.into()))) + Ok(Some(Self(scan_task.into()))) } #[allow(clippy::too_many_arguments)] @@ -381,7 +381,7 @@ pub mod pylib { storage_config.into(), pushdowns.map(|p| p.0.as_ref().clone()).unwrap_or_default(), ); - Ok(PyScanTask(scan_task.into())) + Ok(Self(scan_task.into())) } #[allow(clippy::too_many_arguments)] @@ -425,7 +425,7 @@ pub mod pylib { ))), pushdowns.map(|p| p.0.as_ref().clone()).unwrap_or_default(), ); - Ok(PyScanTask(scan_task.into())) + Ok(Self(scan_task.into())) } pub fn __repr__(&self) -> PyResult { @@ -464,7 +464,7 @@ pub mod pylib { source_field.map(|f| f.into()), transform.map(|e| e.0), )?; - Ok(PyPartitionField(Arc::new(p_field))) + Ok(Self(Arc::new(p_field))) } pub fn __repr__(&self) -> PyResult { diff --git a/src/daft-scan/src/storage_config.rs b/src/daft-scan/src/storage_config.rs index ced4c95315..d169e06510 100644 --- a/src/daft-scan/src/storage_config.rs +++ b/src/daft-scan/src/storage_config.rs @@ -26,7 +26,7 @@ impl StorageConfig { // Grab an IOClient and Runtime // TODO: This should be cleaned up and hidden behind a better API from daft-io match self { - StorageConfig::Native(cfg) => { + Self::Native(cfg) => { let multithreaded_io = cfg.multithreaded_io; Ok(( get_runtime(multithreaded_io)?, @@ -37,7 +37,7 @@ impl StorageConfig { )) } #[cfg(feature = "python")] - StorageConfig::Python(cfg) => { + Self::Python(cfg) => { let multithreaded_io = true; // Hardcode to use multithreaded IO if Python storage config is used for data fetches Ok(( get_runtime(multithreaded_io)?, diff --git a/src/daft-scheduler/Cargo.toml b/src/daft-scheduler/Cargo.toml index bfd4ca7985..14228f79b9 100644 --- a/src/daft-scheduler/Cargo.toml +++ b/src/daft-scheduler/Cargo.toml @@ -29,6 +29,9 @@ python = [ "daft-dsl/python" ] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-scheduler" diff --git a/src/daft-scheduler/src/adaptive.rs b/src/daft-scheduler/src/adaptive.rs index 0e4a2bbc77..e701bd0e73 100644 --- a/src/daft-scheduler/src/adaptive.rs +++ b/src/daft-scheduler/src/adaptive.rs @@ -17,7 +17,7 @@ pub struct AdaptivePhysicalPlanScheduler { impl AdaptivePhysicalPlanScheduler { pub fn new(logical_plan: Arc, cfg: Arc) -> Self { - AdaptivePhysicalPlanScheduler { + Self { planner: AdaptivePlanner::new(logical_plan, cfg), } } @@ -34,10 +34,7 @@ impl AdaptivePhysicalPlanScheduler { ) -> PyResult { py.allow_threads(|| { let logical_plan = logical_plan_builder.builder.build(); - Ok(AdaptivePhysicalPlanScheduler::new( - logical_plan, - cfg.config.clone(), - )) + Ok(Self::new(logical_plan, cfg.config.clone())) }) } pub fn next(&mut self, py: Python) -> PyResult<(Option, PhysicalPlanScheduler)> { diff --git a/src/daft-schema/Cargo.toml b/src/daft-schema/Cargo.toml index df6db1718c..ed6ecde2b7 100644 --- a/src/daft-schema/Cargo.toml +++ b/src/daft-schema/Cargo.toml @@ -23,6 +23,9 @@ python = [ "common-arrow-ffi/python" ] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-schema" diff --git a/src/daft-schema/src/dtype.rs b/src/daft-schema/src/dtype.rs index 48d9414aab..c17f1d5760 100644 --- a/src/daft-schema/src/dtype.rs +++ b/src/daft-schema/src/dtype.rs @@ -171,7 +171,7 @@ struct DataTypePayload { impl DataTypePayload { pub fn new(datatype: &DataType) -> Self { - DataTypePayload { + Self { datatype: datatype.clone(), daft_version: common_version::VERSION.into(), daft_build_type: common_version::DAFT_BUILD_TYPE.into(), @@ -181,48 +181,48 @@ impl DataTypePayload { const DAFT_SUPER_EXTENSION_NAME: &str = "daft.super_extension"; impl DataType { - pub fn new_null() -> DataType { - DataType::Null + pub fn new_null() -> Self { + Self::Null } - pub fn new_list(datatype: DataType) -> DataType { - DataType::List(Box::new(datatype)) + pub fn new_list(datatype: Self) -> Self { + Self::List(Box::new(datatype)) } - pub fn new_fixed_size_list(datatype: DataType, size: usize) -> DataType { - DataType::FixedSizeList(Box::new(datatype), size) + pub fn new_fixed_size_list(datatype: Self, size: usize) -> Self { + Self::FixedSizeList(Box::new(datatype), size) } pub fn to_arrow(&self) -> DaftResult { match self { - DataType::Null => Ok(ArrowType::Null), - DataType::Boolean => Ok(ArrowType::Boolean), - DataType::Int8 => Ok(ArrowType::Int8), - DataType::Int16 => Ok(ArrowType::Int16), - DataType::Int32 => Ok(ArrowType::Int32), - DataType::Int64 => Ok(ArrowType::Int64), + Self::Null => Ok(ArrowType::Null), + Self::Boolean => Ok(ArrowType::Boolean), + Self::Int8 => Ok(ArrowType::Int8), + Self::Int16 => Ok(ArrowType::Int16), + Self::Int32 => Ok(ArrowType::Int32), + Self::Int64 => Ok(ArrowType::Int64), // Must maintain same default mapping as Arrow2, otherwise this will throw errors in // DataArray::new() which makes strong assumptions about the arrow/Daft types // https://github.com/jorgecarleitao/arrow2/blob/b0734542c2fef5d2d0c7b6ffce5d094de371168a/src/datatypes/mod.rs#L493 - DataType::Int128 => Ok(ArrowType::Decimal(32, 32)), - DataType::UInt8 => Ok(ArrowType::UInt8), - DataType::UInt16 => Ok(ArrowType::UInt16), - DataType::UInt32 => Ok(ArrowType::UInt32), - DataType::UInt64 => Ok(ArrowType::UInt64), + Self::Int128 => Ok(ArrowType::Decimal(32, 32)), + Self::UInt8 => Ok(ArrowType::UInt8), + Self::UInt16 => Ok(ArrowType::UInt16), + Self::UInt32 => Ok(ArrowType::UInt32), + Self::UInt64 => Ok(ArrowType::UInt64), // DataType::Float16 => Ok(ArrowType::Float16), - DataType::Float32 => Ok(ArrowType::Float32), - DataType::Float64 => Ok(ArrowType::Float64), - DataType::Decimal128(precision, scale) => Ok(ArrowType::Decimal(*precision, *scale)), - DataType::Timestamp(unit, timezone) => { + Self::Float32 => Ok(ArrowType::Float32), + Self::Float64 => Ok(ArrowType::Float64), + Self::Decimal128(precision, scale) => Ok(ArrowType::Decimal(*precision, *scale)), + Self::Timestamp(unit, timezone) => { Ok(ArrowType::Timestamp(unit.to_arrow(), timezone.clone())) } - DataType::Date => Ok(ArrowType::Date32), - DataType::Time(unit) => Ok(ArrowType::Time64(unit.to_arrow())), - DataType::Duration(unit) => Ok(ArrowType::Duration(unit.to_arrow())), - DataType::Binary => Ok(ArrowType::LargeBinary), - DataType::FixedSizeBinary(size) => Ok(ArrowType::FixedSizeBinary(*size)), - DataType::Utf8 => Ok(ArrowType::LargeUtf8), - DataType::FixedSizeList(child_dtype, size) => Ok(ArrowType::FixedSizeList( + Self::Date => Ok(ArrowType::Date32), + Self::Time(unit) => Ok(ArrowType::Time64(unit.to_arrow())), + Self::Duration(unit) => Ok(ArrowType::Duration(unit.to_arrow())), + Self::Binary => Ok(ArrowType::LargeBinary), + Self::FixedSizeBinary(size) => Ok(ArrowType::FixedSizeBinary(*size)), + Self::Utf8 => Ok(ArrowType::LargeUtf8), + Self::FixedSizeList(child_dtype, size) => Ok(ArrowType::FixedSizeList( Box::new(arrow2::datatypes::Field::new( "item", child_dtype.to_arrow()?, @@ -230,10 +230,10 @@ impl DataType { )), *size, )), - DataType::List(field) => Ok(ArrowType::LargeList(Box::new( + Self::List(field) => Ok(ArrowType::LargeList(Box::new( arrow2::datatypes::Field::new("item", field.to_arrow()?, true), ))), - DataType::Map(field) => Ok(ArrowType::Map( + Self::Map(field) => Ok(ArrowType::Map( Box::new(arrow2::datatypes::Field::new( "item", field.to_arrow()?, @@ -241,27 +241,27 @@ impl DataType { )), false, )), - DataType::Struct(fields) => Ok({ + Self::Struct(fields) => Ok({ let fields = fields .iter() .map(|f| f.to_arrow()) .collect::>>()?; ArrowType::Struct(fields) }), - DataType::Extension(name, dtype, metadata) => Ok(ArrowType::Extension( + Self::Extension(name, dtype, metadata) => Ok(ArrowType::Extension( name.clone(), Box::new(dtype.to_arrow()?), metadata.clone(), )), - DataType::Embedding(..) - | DataType::Image(..) - | DataType::FixedShapeImage(..) - | DataType::Tensor(..) - | DataType::FixedShapeTensor(..) - | DataType::SparseTensor(..) - | DataType::FixedShapeSparseTensor(..) => { + Self::Embedding(..) + | Self::Image(..) + | Self::FixedShapeImage(..) + | Self::Tensor(..) + | Self::FixedShapeTensor(..) + | Self::SparseTensor(..) + | Self::FixedShapeSparseTensor(..) => { let physical = Box::new(self.to_physical()); - let logical_extension = DataType::Extension( + let logical_extension = Self::Extension( DAFT_SUPER_EXTENSION_NAME.into(), physical, Some(self.to_json()?), @@ -269,16 +269,16 @@ impl DataType { logical_extension.to_arrow() } #[cfg(feature = "python")] - DataType::Python => Err(DaftError::TypeError(format!( + Self::Python => Err(DaftError::TypeError(format!( "Can not convert {self:?} into arrow type" ))), - DataType::Unknown => Err(DaftError::TypeError(format!( + Self::Unknown => Err(DaftError::TypeError(format!( "Can not convert {self:?} into arrow type" ))), } } - pub fn to_physical(&self) -> DataType { + pub fn to_physical(&self) -> Self { use DataType::*; match self { Decimal128(..) => Int128, @@ -293,7 +293,7 @@ impl DataType { Image(mode) => Struct(vec![ Field::new( "data", - List(Box::new(mode.map_or(DataType::UInt8, |m| m.get_dtype()))), + List(Box::new(mode.map_or(Self::UInt8, |m| m.get_dtype()))), ), Field::new("channel", UInt16), Field::new("height", UInt32), @@ -306,7 +306,7 @@ impl DataType { ), Tensor(dtype) => Struct(vec![ Field::new("data", List(Box::new(*dtype.clone()))), - Field::new("shape", List(Box::new(DataType::UInt64))), + Field::new("shape", List(Box::new(Self::UInt64))), ]), FixedShapeTensor(dtype, shape) => FixedSizeList( Box::new(*dtype.clone()), @@ -314,12 +314,12 @@ impl DataType { ), SparseTensor(dtype) => Struct(vec![ Field::new("values", List(Box::new(*dtype.clone()))), - Field::new("indices", List(Box::new(DataType::UInt64))), - Field::new("shape", List(Box::new(DataType::UInt64))), + Field::new("indices", List(Box::new(Self::UInt64))), + Field::new("shape", List(Box::new(Self::UInt64))), ]), FixedShapeSparseTensor(dtype, _) => Struct(vec![ Field::new("values", List(Box::new(*dtype.clone()))), - Field::new("indices", List(Box::new(DataType::UInt64))), + Field::new("indices", List(Box::new(Self::UInt64))), ]), _ => { assert!(self.is_physical()); @@ -329,15 +329,15 @@ impl DataType { } #[inline] - pub fn nested_dtype(&self) -> Option<&DataType> { + pub fn nested_dtype(&self) -> Option<&Self> { match self { - DataType::Map(dtype) - | DataType::List(dtype) - | DataType::FixedSizeList(dtype, _) - | DataType::FixedShapeTensor(dtype, _) - | DataType::SparseTensor(dtype) - | DataType::FixedShapeSparseTensor(dtype, _) - | DataType::Tensor(dtype) => Some(dtype), + Self::Map(dtype) + | Self::List(dtype) + | Self::FixedSizeList(dtype, _) + | Self::FixedShapeTensor(dtype, _) + | Self::SparseTensor(dtype) + | Self::FixedShapeSparseTensor(dtype, _) + | Self::Tensor(dtype) => Some(dtype), _ => None, } } @@ -350,19 +350,19 @@ impl DataType { #[inline] pub fn is_numeric(&self) -> bool { match self { - DataType::Int8 - | DataType::Int16 - | DataType::Int32 - | DataType::Int64 - | DataType::Int128 - | DataType::UInt8 - | DataType::UInt16 - | DataType::UInt32 - | DataType::UInt64 + Self::Int8 + | Self::Int16 + | Self::Int32 + | Self::Int64 + | Self::Int128 + | Self::UInt8 + | Self::UInt16 + | Self::UInt32 + | Self::UInt64 // DataType::Float16 - | DataType::Float32 - | DataType::Float64 => true, - DataType::Extension(_, inner, _) => inner.is_numeric(), + | Self::Float32 + | Self::Float64 => true, + Self::Extension(_, inner, _) => inner.is_numeric(), _ => false } } @@ -370,10 +370,10 @@ impl DataType { #[inline] pub fn is_fixed_size_numeric(&self) -> bool { match self { - DataType::FixedSizeList(dtype, ..) - | DataType::Embedding(dtype, ..) - | DataType::FixedShapeTensor(dtype, ..) - | DataType::FixedShapeSparseTensor(dtype, ..) => dtype.is_numeric(), + Self::FixedSizeList(dtype, ..) + | Self::Embedding(dtype, ..) + | Self::FixedShapeTensor(dtype, ..) + | Self::FixedShapeSparseTensor(dtype, ..) => dtype.is_numeric(), _ => false, } } @@ -381,8 +381,8 @@ impl DataType { #[inline] pub fn fixed_size(&self) -> Option { match self { - DataType::FixedSizeList(_, size) => Some(*size), - DataType::Embedding(_, size) => Some(*size), + Self::FixedSizeList(_, size) => Some(*size), + Self::Embedding(_, size) => Some(*size), _ => None, } } @@ -391,15 +391,15 @@ impl DataType { pub fn is_integer(&self) -> bool { matches!( self, - DataType::Int8 - | DataType::Int16 - | DataType::Int32 - | DataType::Int64 - | DataType::Int128 - | DataType::UInt8 - | DataType::UInt16 - | DataType::UInt32 - | DataType::UInt64 + Self::Int8 + | Self::Int16 + | Self::Int32 + | Self::Int64 + | Self::Int128 + | Self::UInt8 + | Self::UInt16 + | Self::UInt32 + | Self::UInt64 ) } @@ -408,89 +408,89 @@ impl DataType { matches!( self, // DataType::Float16 | - DataType::Float32 | DataType::Float64 + Self::Float32 | Self::Float64 ) } #[inline] pub fn is_temporal(&self) -> bool { match self { - DataType::Date | DataType::Timestamp(..) => true, - DataType::Extension(_, inner, _) => inner.is_temporal(), + Self::Date | Self::Timestamp(..) => true, + Self::Extension(_, inner, _) => inner.is_temporal(), _ => false, } } #[inline] pub fn is_tensor(&self) -> bool { - matches!(self, DataType::Tensor(..)) + matches!(self, Self::Tensor(..)) } #[inline] pub fn is_sparse_tensor(&self) -> bool { - matches!(self, DataType::SparseTensor(..)) + matches!(self, Self::SparseTensor(..)) } #[inline] pub fn is_fixed_shape_tensor(&self) -> bool { - matches!(self, DataType::FixedShapeTensor(..)) + matches!(self, Self::FixedShapeTensor(..)) } #[inline] pub fn is_fixed_shape_sparse_tensor(&self) -> bool { - matches!(self, DataType::FixedShapeSparseTensor(..)) + matches!(self, Self::FixedShapeSparseTensor(..)) } #[inline] pub fn is_image(&self) -> bool { - matches!(self, DataType::Image(..)) + matches!(self, Self::Image(..)) } #[inline] pub fn is_fixed_shape_image(&self) -> bool { - matches!(self, DataType::FixedShapeImage(..)) + matches!(self, Self::FixedShapeImage(..)) } #[inline] pub fn is_map(&self) -> bool { - matches!(self, DataType::Map(..)) + matches!(self, Self::Map(..)) } #[inline] pub fn is_list(&self) -> bool { - matches!(self, DataType::List(..)) + matches!(self, Self::List(..)) } #[inline] pub fn is_string(&self) -> bool { - matches!(self, DataType::Utf8) + matches!(self, Self::Utf8) } #[inline] pub fn is_boolean(&self) -> bool { - matches!(self, DataType::Boolean) + matches!(self, Self::Boolean) } #[inline] pub fn is_null(&self) -> bool { match self { - DataType::Null => true, - DataType::Extension(_, inner, _) => inner.is_null(), + Self::Null => true, + Self::Extension(_, inner, _) => inner.is_null(), _ => false, } } #[inline] pub fn is_extension(&self) -> bool { - matches!(self, DataType::Extension(..)) + matches!(self, Self::Extension(..)) } #[inline] pub fn is_python(&self) -> bool { match self { #[cfg(feature = "python")] - DataType::Python => true, - DataType::Extension(_, inner, _) => inner.is_python(), + Self::Python => true, + Self::Extension(_, inner, _) => inner.is_python(), _ => false, } } @@ -499,18 +499,18 @@ impl DataType { pub fn to_floating_representation(&self) -> DaftResult { let data_type = match self { // All numeric types that coerce to `f32` - DataType::Int8 => DataType::Float32, - DataType::Int16 => DataType::Float32, - DataType::UInt8 => DataType::Float32, - DataType::UInt16 => DataType::Float32, - DataType::Float32 => DataType::Float32, + Self::Int8 => Self::Float32, + Self::Int16 => Self::Float32, + Self::UInt8 => Self::Float32, + Self::UInt16 => Self::Float32, + Self::Float32 => Self::Float32, // All numeric types that coerce to `f64` - DataType::Int32 => DataType::Float64, - DataType::Int64 => DataType::Float64, - DataType::UInt32 => DataType::Float64, - DataType::UInt64 => DataType::Float64, - DataType::Float64 => DataType::Float64, + Self::Int32 => Self::Float64, + Self::Int64 => Self::Float64, + Self::UInt32 => Self::Float64, + Self::UInt64 => Self::Float64, + Self::Float64 => Self::Float64, _ => { return Err(DaftError::TypeError(format!( @@ -527,33 +527,33 @@ impl DataType { const DEFAULT_LIST_LEN: f64 = 4.; let elem_size = match self.to_physical() { - DataType::Null => Some(0.), - DataType::Boolean => Some(0.125), - DataType::Int8 => Some(1.), - DataType::Int16 => Some(2.), - DataType::Int32 => Some(4.), - DataType::Int64 => Some(8.), - DataType::Int128 => Some(16.), - DataType::UInt8 => Some(1.), - DataType::UInt16 => Some(2.), - DataType::UInt32 => Some(4.), - DataType::UInt64 => Some(8.), - DataType::Float32 => Some(4.), - DataType::Float64 => Some(8.), - DataType::Utf8 => Some(VARIABLE_TYPE_SIZE), - DataType::Binary => Some(VARIABLE_TYPE_SIZE), - DataType::FixedSizeBinary(size) => Some(size as f64), - DataType::FixedSizeList(dtype, len) => { + Self::Null => Some(0.), + Self::Boolean => Some(0.125), + Self::Int8 => Some(1.), + Self::Int16 => Some(2.), + Self::Int32 => Some(4.), + Self::Int64 => Some(8.), + Self::Int128 => Some(16.), + Self::UInt8 => Some(1.), + Self::UInt16 => Some(2.), + Self::UInt32 => Some(4.), + Self::UInt64 => Some(8.), + Self::Float32 => Some(4.), + Self::Float64 => Some(8.), + Self::Utf8 => Some(VARIABLE_TYPE_SIZE), + Self::Binary => Some(VARIABLE_TYPE_SIZE), + Self::FixedSizeBinary(size) => Some(size as f64), + Self::FixedSizeList(dtype, len) => { dtype.estimate_size_bytes().map(|b| b * (len as f64)) } - DataType::List(dtype) => dtype.estimate_size_bytes().map(|b| b * DEFAULT_LIST_LEN), - DataType::Struct(fields) => Some( + Self::List(dtype) => dtype.estimate_size_bytes().map(|b| b * DEFAULT_LIST_LEN), + Self::Struct(fields) => Some( fields .iter() .map(|f| f.dtype.estimate_size_bytes().unwrap_or(0f64)) .sum(), ), - DataType::Extension(_, dtype, _) => dtype.estimate_size_bytes(), + Self::Extension(_, dtype, _) => dtype.estimate_size_bytes(), _ => None, }; // add bitmap @@ -564,19 +564,19 @@ impl DataType { pub fn is_logical(&self) -> bool { matches!( self, - DataType::Decimal128(..) - | DataType::Date - | DataType::Time(..) - | DataType::Timestamp(..) - | DataType::Duration(..) - | DataType::Embedding(..) - | DataType::Image(..) - | DataType::FixedShapeImage(..) - | DataType::Tensor(..) - | DataType::FixedShapeTensor(..) - | DataType::SparseTensor(..) - | DataType::FixedShapeSparseTensor(..) - | DataType::Map(..) + Self::Decimal128(..) + | Self::Date + | Self::Time(..) + | Self::Timestamp(..) + | Self::Duration(..) + | Self::Embedding(..) + | Self::Image(..) + | Self::FixedShapeImage(..) + | Self::Tensor(..) + | Self::FixedShapeTensor(..) + | Self::SparseTensor(..) + | Self::FixedShapeSparseTensor(..) + | Self::Map(..) ) } @@ -587,13 +587,10 @@ impl DataType { #[inline] pub fn is_nested(&self) -> bool { - let p: DataType = self.to_physical(); + let p: Self = self.to_physical(); matches!( p, - DataType::List(..) - | DataType::FixedSizeList(..) - | DataType::Struct(..) - | DataType::Map(..) + Self::List(..) | Self::FixedSizeList(..) | Self::Struct(..) | Self::Map(..) ) } @@ -611,42 +608,40 @@ impl DataType { impl From<&ArrowType> for DataType { fn from(item: &ArrowType) -> Self { match item { - ArrowType::Null => DataType::Null, - ArrowType::Boolean => DataType::Boolean, - ArrowType::Int8 => DataType::Int8, - ArrowType::Int16 => DataType::Int16, - ArrowType::Int32 => DataType::Int32, - ArrowType::Int64 => DataType::Int64, - ArrowType::UInt8 => DataType::UInt8, - ArrowType::UInt16 => DataType::UInt16, - ArrowType::UInt32 => DataType::UInt32, - ArrowType::UInt64 => DataType::UInt64, + ArrowType::Null => Self::Null, + ArrowType::Boolean => Self::Boolean, + ArrowType::Int8 => Self::Int8, + ArrowType::Int16 => Self::Int16, + ArrowType::Int32 => Self::Int32, + ArrowType::Int64 => Self::Int64, + ArrowType::UInt8 => Self::UInt8, + ArrowType::UInt16 => Self::UInt16, + ArrowType::UInt32 => Self::UInt32, + ArrowType::UInt64 => Self::UInt64, // ArrowType::Float16 => DataType::Float16, - ArrowType::Float32 => DataType::Float32, - ArrowType::Float64 => DataType::Float64, - ArrowType::Timestamp(unit, timezone) => { - DataType::Timestamp(unit.into(), timezone.clone()) - } - ArrowType::Date32 => DataType::Date, - ArrowType::Date64 => DataType::Timestamp(TimeUnit::Milliseconds, None), + ArrowType::Float32 => Self::Float32, + ArrowType::Float64 => Self::Float64, + ArrowType::Timestamp(unit, timezone) => Self::Timestamp(unit.into(), timezone.clone()), + ArrowType::Date32 => Self::Date, + ArrowType::Date64 => Self::Timestamp(TimeUnit::Milliseconds, None), ArrowType::Time32(timeunit) | ArrowType::Time64(timeunit) => { - DataType::Time(timeunit.into()) + Self::Time(timeunit.into()) } - ArrowType::Duration(timeunit) => DataType::Duration(timeunit.into()), - ArrowType::FixedSizeBinary(size) => DataType::FixedSizeBinary(*size), - ArrowType::Binary | ArrowType::LargeBinary => DataType::Binary, - ArrowType::Utf8 | ArrowType::LargeUtf8 => DataType::Utf8, - ArrowType::Decimal(precision, scale) => DataType::Decimal128(*precision, *scale), + ArrowType::Duration(timeunit) => Self::Duration(timeunit.into()), + ArrowType::FixedSizeBinary(size) => Self::FixedSizeBinary(*size), + ArrowType::Binary | ArrowType::LargeBinary => Self::Binary, + ArrowType::Utf8 | ArrowType::LargeUtf8 => Self::Utf8, + ArrowType::Decimal(precision, scale) => Self::Decimal128(*precision, *scale), ArrowType::List(field) | ArrowType::LargeList(field) => { - DataType::List(Box::new(field.as_ref().data_type().into())) + Self::List(Box::new(field.as_ref().data_type().into())) } ArrowType::FixedSizeList(field, size) => { - DataType::FixedSizeList(Box::new(field.as_ref().data_type().into()), *size) + Self::FixedSizeList(Box::new(field.as_ref().data_type().into()), *size) } - ArrowType::Map(field, ..) => DataType::Map(Box::new(field.as_ref().data_type().into())), + ArrowType::Map(field, ..) => Self::Map(Box::new(field.as_ref().data_type().into())), ArrowType::Struct(fields) => { let fields: Vec = fields.iter().map(|fld| fld.into()).collect(); - DataType::Struct(fields) + Self::Struct(fields) } ArrowType::Extension(name, dtype, metadata) => { if name == DAFT_SUPER_EXTENSION_NAME { @@ -656,7 +651,7 @@ impl From<&ArrowType> for DataType { } } } - DataType::Extension( + Self::Extension( name.clone(), Box::new(dtype.as_ref().into()), metadata.clone(), @@ -673,9 +668,9 @@ impl From<&ImageMode> for DataType { use ImageMode::*; match mode { - L16 | LA16 | RGB16 | RGBA16 => DataType::UInt16, - RGB32F | RGBA32F => DataType::Float32, - _ => DataType::UInt8, + L16 | LA16 | RGB16 | RGBA16 => Self::UInt16, + RGB32F | RGBA32F => Self::Float32, + _ => Self::UInt8, } } } diff --git a/src/daft-schema/src/image_format.rs b/src/daft-schema/src/image_format.rs index f7f41a516f..93ec40963e 100644 --- a/src/daft-schema/src/image_format.rs +++ b/src/daft-schema/src/image_format.rs @@ -38,7 +38,7 @@ impl ImageFormat { } impl ImageFormat { - pub fn iterator() -> std::slice::Iter<'static, ImageFormat> { + pub fn iterator() -> std::slice::Iter<'static, Self> { use ImageFormat::*; static FORMATS: [ImageFormat; 5] = [PNG, JPEG, TIFF, GIF, BMP]; @@ -61,7 +61,7 @@ impl FromStr for ImageFormat { _ => Err(DaftError::TypeError(format!( "Image format {} is not supported; only the following formats are supported: {:?}", format, - ImageFormat::iterator().as_slice() + Self::iterator().as_slice() ))), } } diff --git a/src/daft-schema/src/image_mode.rs b/src/daft-schema/src/image_mode.rs index be2eebd8d3..9b41875ff0 100644 --- a/src/daft-schema/src/image_mode.rs +++ b/src/daft-schema/src/image_mode.rs @@ -75,12 +75,12 @@ impl ImageMode { "1" | "P" | "CMYK" | "YCbCr" | "LAB" | "HSV" | "I" | "F" | "PA" | "RGBX" | "RGBa" | "La" | "I;16" | "I;16L" | "I;16B" | "I;16N" | "BGR;15" | "BGR;16" | "BGR;24" => Err(DaftError::TypeError(format!( "PIL image mode {} is not supported; only the following modes are supported: {:?}", mode, - ImageMode::iterator().as_slice() + Self::iterator().as_slice() ))), _ => Err(DaftError::TypeError(format!( "Image mode {} is not a valid PIL image mode; see https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes for valid PIL image modes. Of these, only the following modes are supported by Daft: {:?}", mode, - ImageMode::iterator().as_slice() + Self::iterator().as_slice() ))), } } @@ -114,7 +114,7 @@ impl ImageMode { RGBA | RGBA16 | RGBA32F => 4, } } - pub fn iterator() -> std::slice::Iter<'static, ImageMode> { + pub fn iterator() -> std::slice::Iter<'static, Self> { use ImageMode::*; static MODES: [ImageMode; 10] = @@ -146,7 +146,7 @@ impl FromStr for ImageMode { _ => Err(DaftError::TypeError(format!( "Image mode {} is not supported; only the following modes are supported: {:?}", mode, - ImageMode::iterator().as_slice() + Self::iterator().as_slice() ))), } } diff --git a/src/daft-schema/src/python/datatype.rs b/src/daft-schema/src/python/datatype.rs index 9128eceec2..ceff5e18f3 100644 --- a/src/daft-schema/src/python/datatype.rs +++ b/src/daft-schema/src/python/datatype.rs @@ -14,7 +14,7 @@ pub struct PyTimeUnit { impl From for PyTimeUnit { fn from(value: TimeUnit) -> Self { - PyTimeUnit { timeunit: value } + Self { timeunit: value } } } @@ -217,7 +217,7 @@ impl PyDataType { } #[staticmethod] - pub fn r#struct(fields: IndexMap) -> Self { + pub fn r#struct(fields: IndexMap) -> Self { DataType::Struct( fields .into_iter() @@ -395,8 +395,8 @@ impl PyDataType { } pub fn is_equal(&self, other: Bound) -> PyResult { - if other.is_instance_of::() { - let other = other.extract::()?; + if other.is_instance_of::() { + let other = other.extract::()?; Ok(self.dtype == other.dtype) } else { Ok(false) @@ -423,7 +423,7 @@ impl_bincode_py_state_serialization!(PyDataType); impl From for PyDataType { fn from(value: DataType) -> Self { - PyDataType { dtype: value } + Self { dtype: value } } } diff --git a/src/daft-schema/src/python/field.rs b/src/daft-schema/src/python/field.rs index 2e39915843..8360d233dc 100644 --- a/src/daft-schema/src/python/field.rs +++ b/src/daft-schema/src/python/field.rs @@ -26,7 +26,7 @@ impl PyField { Ok(self.field.dtype.clone().into()) } - pub fn eq(&self, other: &PyField) -> PyResult { + pub fn eq(&self, other: &Self) -> PyResult { Ok(self.field.eq(&other.field)) } } @@ -35,7 +35,7 @@ impl_bincode_py_state_serialization!(PyField); impl From for PyField { fn from(field: Field) -> Self { - PyField { field } + Self { field } } } diff --git a/src/daft-schema/src/python/schema.rs b/src/daft-schema/src/python/schema.rs index 2aa39c9abc..3a13583ba8 100644 --- a/src/daft-schema/src/python/schema.rs +++ b/src/daft-schema/src/python/schema.rs @@ -46,12 +46,12 @@ impl PySchema { self.schema.names() } - pub fn union(&self, other: &PySchema) -> PyResult { + pub fn union(&self, other: &Self) -> PyResult { let new_schema = Arc::new(self.schema.union(&other.schema)?); Ok(new_schema.into()) } - pub fn eq(&self, other: &PySchema) -> PyResult { + pub fn eq(&self, other: &Self) -> PyResult { Ok(self.schema.fields.eq(&other.schema.fields)) } @@ -60,22 +60,20 @@ impl PySchema { } #[staticmethod] - pub fn from_field_name_and_types( - names_and_types: Vec<(String, PyDataType)>, - ) -> PyResult { + pub fn from_field_name_and_types(names_and_types: Vec<(String, PyDataType)>) -> PyResult { let fields = names_and_types .iter() .map(|(name, pydtype)| Field::new(name, pydtype.clone().into())) .collect(); let schema = schema::Schema::new(fields)?; - Ok(PySchema { + Ok(Self { schema: schema.into(), }) } #[staticmethod] - pub fn from_fields(fields: Vec) -> PyResult { - Ok(PySchema { + pub fn from_fields(fields: Vec) -> PyResult { + Ok(Self { schema: schema::Schema::new(fields.iter().map(|f| f.field.clone()).collect())?.into(), }) } @@ -96,7 +94,7 @@ impl PySchema { Ok(self.schema.truncated_table_string()) } - pub fn apply_hints(&self, hints: &PySchema) -> PyResult { + pub fn apply_hints(&self, hints: &Self) -> PyResult { let new_schema = Arc::new(self.schema.apply_hints(&hints.schema)?); Ok(new_schema.into()) } @@ -106,7 +104,7 @@ impl_bincode_py_state_serialization!(PySchema); impl From for PySchema { fn from(schema: schema::SchemaRef) -> Self { - PySchema { schema } + Self { schema } } } diff --git a/src/daft-schema/src/schema.rs b/src/daft-schema/src/schema.rs index 7b0328be15..04c0d88c71 100644 --- a/src/daft-schema/src/schema.rs +++ b/src/daft-schema/src/schema.rs @@ -41,10 +41,10 @@ impl Schema { } } - Ok(Schema { fields: map }) + Ok(Self { fields: map }) } - pub fn exclude>(&self, names: &[S]) -> DaftResult { + pub fn exclude>(&self, names: &[S]) -> DaftResult { let mut fields = IndexMap::new(); let names = names.iter().map(|s| s.as_ref()).collect::>(); for (name, field) in self.fields.iter() { @@ -53,11 +53,11 @@ impl Schema { } } - Ok(Schema { fields }) + Ok(Self { fields }) } pub fn empty() -> Self { - Schema { + Self { fields: indexmap::IndexMap::new(), } } @@ -96,7 +96,7 @@ impl Schema { self.fields.is_empty() } - pub fn union(&self, other: &Schema) -> DaftResult { + pub fn union(&self, other: &Self) -> DaftResult { let self_keys: HashSet<&String> = HashSet::from_iter(self.fields.keys()); let other_keys: HashSet<&String> = HashSet::from_iter(self.fields.keys()); match self_keys.difference(&other_keys).count() { @@ -105,7 +105,7 @@ impl Schema { for (k, v) in self.fields.iter().chain(other.fields.iter()) { fields.insert(k.clone(), v.clone()); } - Ok(Schema { fields }) + Ok(Self { fields }) } _ => Err(DaftError::ValueError( "Cannot union two schemas with overlapping keys".to_string(), @@ -113,7 +113,7 @@ impl Schema { } } - pub fn apply_hints(&self, hints: &Schema) -> DaftResult { + pub fn apply_hints(&self, hints: &Self) -> DaftResult { let applied_fields = self .fields .iter() @@ -123,7 +123,7 @@ impl Schema { }) .collect::>(); - Ok(Schema { + Ok(Self { fields: applied_fields, }) } @@ -238,7 +238,7 @@ impl Schema { } /// Returns a new schema with only the specified columns in the new schema - pub fn project>(self: Arc, columns: &[S]) -> DaftResult { + pub fn project>(self: Arc, columns: &[S]) -> DaftResult { let new_fields = columns .iter() .map(|i| { diff --git a/src/daft-schema/src/time_unit.rs b/src/daft-schema/src/time_unit.rs index 50cdcb1e57..d4b17b0e7c 100644 --- a/src/daft-schema/src/time_unit.rs +++ b/src/daft-schema/src/time_unit.rs @@ -16,19 +16,19 @@ impl TimeUnit { #![allow(clippy::wrong_self_convention)] pub fn to_arrow(&self) -> ArrowTimeUnit { match self { - TimeUnit::Nanoseconds => ArrowTimeUnit::Nanosecond, - TimeUnit::Microseconds => ArrowTimeUnit::Microsecond, - TimeUnit::Milliseconds => ArrowTimeUnit::Millisecond, - TimeUnit::Seconds => ArrowTimeUnit::Second, + Self::Nanoseconds => ArrowTimeUnit::Nanosecond, + Self::Microseconds => ArrowTimeUnit::Microsecond, + Self::Milliseconds => ArrowTimeUnit::Millisecond, + Self::Seconds => ArrowTimeUnit::Second, } } pub fn to_scale_factor(&self) -> i64 { match self { - TimeUnit::Seconds => 1, - TimeUnit::Milliseconds => 1000, - TimeUnit::Microseconds => 1_000_000, - TimeUnit::Nanoseconds => 1_000_000_000, + Self::Seconds => 1, + Self::Milliseconds => 1000, + Self::Microseconds => 1_000_000, + Self::Nanoseconds => 1_000_000_000, } } } @@ -36,10 +36,10 @@ impl TimeUnit { impl From<&ArrowTimeUnit> for TimeUnit { fn from(tu: &ArrowTimeUnit) -> Self { match tu { - ArrowTimeUnit::Nanosecond => TimeUnit::Nanoseconds, - ArrowTimeUnit::Microsecond => TimeUnit::Microseconds, - ArrowTimeUnit::Millisecond => TimeUnit::Milliseconds, - ArrowTimeUnit::Second => TimeUnit::Seconds, + ArrowTimeUnit::Nanosecond => Self::Nanoseconds, + ArrowTimeUnit::Microsecond => Self::Microseconds, + ArrowTimeUnit::Millisecond => Self::Milliseconds, + ArrowTimeUnit::Second => Self::Seconds, } } } diff --git a/src/daft-sketch/Cargo.toml b/src/daft-sketch/Cargo.toml index a1cb65528a..5612624cf3 100644 --- a/src/daft-sketch/Cargo.toml +++ b/src/daft-sketch/Cargo.toml @@ -8,6 +8,9 @@ serde_arrow = {version = "0.11.0", features = ["arrow2-0-17"]} sketches-ddsketch = {workspace = true} snafu = {workspace = true} +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-sketch" diff --git a/src/daft-sketch/src/arrow2_serde.rs b/src/daft-sketch/src/arrow2_serde.rs index cb36653e36..4213d0180c 100644 --- a/src/daft-sketch/src/arrow2_serde.rs +++ b/src/daft-sketch/src/arrow2_serde.rs @@ -19,7 +19,7 @@ impl From for DaftError { use Error::*; match value { DeserializationError { source } => { - DaftError::ComputeError(format!("Deserialization error: {}", source)) + Self::ComputeError(format!("Deserialization error: {}", source)) } } } diff --git a/src/daft-sql/Cargo.toml b/src/daft-sql/Cargo.toml index c15a71f948..9e29995415 100644 --- a/src/daft-sql/Cargo.toml +++ b/src/daft-sql/Cargo.toml @@ -16,6 +16,9 @@ rstest = {workspace = true} [features] python = ["dep:pyo3", "common-error/python", "daft-functions/python"] +[lints] +workspace = true + [package] name = "daft-sql" edition.workspace = true diff --git a/src/daft-sql/src/catalog.rs b/src/daft-sql/src/catalog.rs index 4da8ca6c8a..3495d32703 100644 --- a/src/daft-sql/src/catalog.rs +++ b/src/daft-sql/src/catalog.rs @@ -11,7 +11,7 @@ pub struct SQLCatalog { impl SQLCatalog { /// Create an empty catalog pub fn new() -> Self { - SQLCatalog { + Self { tables: HashMap::new(), } } @@ -27,7 +27,7 @@ impl SQLCatalog { } /// Copy from another catalog, using tables from other in case of conflict - pub fn copy_from(&mut self, other: &SQLCatalog) { + pub fn copy_from(&mut self, other: &Self) { for (name, plan) in other.tables.iter() { self.tables.insert(name.clone(), plan.clone()); } diff --git a/src/daft-sql/src/error.rs b/src/daft-sql/src/error.rs index d948c2bdb3..31f8a400ed 100644 --- a/src/daft-sql/src/error.rs +++ b/src/daft-sql/src/error.rs @@ -27,42 +27,42 @@ pub enum PlannerError { impl From for PlannerError { fn from(value: DaftError) -> Self { - PlannerError::DaftError { source: value } + Self::DaftError { source: value } } } impl From for PlannerError { fn from(value: TokenizerError) -> Self { - PlannerError::TokenizeError { source: value } + Self::TokenizeError { source: value } } } impl From for PlannerError { fn from(value: ParserError) -> Self { - PlannerError::SQLParserError { source: value } + Self::SQLParserError { source: value } } } impl PlannerError { pub fn column_not_found, B: Into>(column_name: A, relation: B) -> Self { - PlannerError::ColumnNotFound { + Self::ColumnNotFound { column_name: column_name.into(), relation: relation.into(), } } pub fn table_not_found>(table_name: S) -> Self { - PlannerError::TableNotFound { + Self::TableNotFound { message: table_name.into(), } } pub fn unsupported_sql(sql: String) -> Self { - PlannerError::UnsupportedSQL { message: sql } + Self::UnsupportedSQL { message: sql } } pub fn invalid_operation>(message: S) -> Self { - PlannerError::InvalidOperation { + Self::InvalidOperation { message: message.into(), } } @@ -112,7 +112,7 @@ impl From for DaftError { if let PlannerError::DaftError { source } = value { source } else { - DaftError::External(Box::new(value)) + Self::External(Box::new(value)) } } } diff --git a/src/daft-sql/src/modules/aggs.rs b/src/daft-sql/src/modules/aggs.rs index 74ee294fbc..695d3c9c79 100644 --- a/src/daft-sql/src/modules/aggs.rs +++ b/src/daft-sql/src/modules/aggs.rs @@ -34,7 +34,7 @@ impl SQLModule for SQLModuleAggs { impl SQLFunction for AggExpr { fn to_expr(&self, inputs: &[FunctionArg], planner: &SQLPlanner) -> SQLPlannerResult { // COUNT(*) needs a bit of extra handling, so we process that outside of `to_expr` - if let AggExpr::Count(_, _) = self { + if let Self::Count(_, _) = self { handle_count(inputs, planner) } else { let inputs = self.args_to_expr_unnamed(inputs, planner)?; diff --git a/src/daft-sql/src/planner.rs b/src/daft-sql/src/planner.rs index af111b1532..92d856cc4a 100644 --- a/src/daft-sql/src/planner.rs +++ b/src/daft-sql/src/planner.rs @@ -36,7 +36,7 @@ pub(crate) struct Relation { impl Relation { pub fn new(inner: LogicalPlanBuilder, name: String) -> Self { - Relation { inner, name } + Self { inner, name } } pub(crate) fn schema(&self) -> SchemaRef { self.inner.schema() @@ -50,7 +50,7 @@ pub struct SQLPlanner { impl Default for SQLPlanner { fn default() -> Self { - SQLPlanner { + Self { catalog: SQLCatalog::new(), current_relation: None, } @@ -59,7 +59,7 @@ impl Default for SQLPlanner { impl SQLPlanner { pub fn new(context: SQLCatalog) -> Self { - SQLPlanner { + Self { catalog: context, current_relation: None, } diff --git a/src/daft-sql/src/python.rs b/src/daft-sql/src/python.rs index 216aaba3d8..9201b7fccc 100644 --- a/src/daft-sql/src/python.rs +++ b/src/daft-sql/src/python.rs @@ -34,7 +34,7 @@ impl PyCatalog { /// Construct an empty PyCatalog. #[staticmethod] pub fn new() -> Self { - PyCatalog { + Self { catalog: SQLCatalog::new(), } } @@ -46,7 +46,7 @@ impl PyCatalog { } /// Copy from another catalog, using tables from other in case of conflict - pub fn copy_from(&mut self, other: &PyCatalog) { + pub fn copy_from(&mut self, other: &Self) { self.catalog.copy_from(&other.catalog); } diff --git a/src/daft-stats/Cargo.toml b/src/daft-stats/Cargo.toml index f3de4bbec1..7cea2fe37d 100644 --- a/src/daft-stats/Cargo.toml +++ b/src/daft-stats/Cargo.toml @@ -10,6 +10,9 @@ snafu = {workspace = true} [features] python = ["common-error/python", "daft-core/python", "daft-dsl/python", "daft-table/python"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-stats" diff --git a/src/daft-stats/src/column_stats/comparison.rs b/src/daft-stats/src/column_stats/comparison.rs index 1d3d923666..7e2021744c 100644 --- a/src/daft-stats/src/column_stats/comparison.rs +++ b/src/daft-stats/src/column_stats/comparison.rs @@ -6,20 +6,15 @@ use snafu::ResultExt; use super::ColumnRangeStatistics; use crate::DaftCoreComputeSnafu; -impl DaftCompare<&ColumnRangeStatistics> for ColumnRangeStatistics { - type Output = crate::Result; - fn equal(&self, rhs: &ColumnRangeStatistics) -> Self::Output { +impl DaftCompare<&Self> for ColumnRangeStatistics { + type Output = crate::Result; + fn equal(&self, rhs: &Self) -> Self::Output { // lower_bound: do they exactly overlap // upper_bound: is there any overlap match (self, rhs) { - (ColumnRangeStatistics::Missing, _) | (_, ColumnRangeStatistics::Missing) => { - Ok(ColumnRangeStatistics::Missing) - } - ( - ColumnRangeStatistics::Loaded(s_lower, s_upper), - ColumnRangeStatistics::Loaded(r_lower, r_upper), - ) => { + (Self::Missing, _) | (_, Self::Missing) => Ok(Self::Missing), + (Self::Loaded(s_lower, s_upper), Self::Loaded(r_lower, r_upper)) => { let exactly_overlap = (s_lower.equal(r_lower).context(DaftCoreComputeSnafu)?) .and(&s_upper.equal(r_upper).context(DaftCoreComputeSnafu)?) .context(DaftCoreComputeSnafu)? @@ -40,27 +35,22 @@ impl DaftCompare<&ColumnRangeStatistics> for ColumnRangeStatistics { .or(&rhs_lower_in_self_bounds) .context(DaftCoreComputeSnafu)? .into_series(); - Ok(ColumnRangeStatistics::Loaded(exactly_overlap, any_overlap)) + Ok(Self::Loaded(exactly_overlap, any_overlap)) } } } - fn not_equal(&self, rhs: &ColumnRangeStatistics) -> Self::Output { + fn not_equal(&self, rhs: &Self) -> Self::Output { // invert of equal self.equal(rhs)?.not() } - fn gt(&self, rhs: &ColumnRangeStatistics) -> Self::Output { + fn gt(&self, rhs: &Self) -> Self::Output { // lower_bound: True greater (self.lower > rhs.upper) // upper_bound: some value that can be greater (self.upper > rhs.lower) match (self, rhs) { - (ColumnRangeStatistics::Missing, _) | (_, ColumnRangeStatistics::Missing) => { - Ok(ColumnRangeStatistics::Missing) - } - ( - ColumnRangeStatistics::Loaded(s_lower, s_upper), - ColumnRangeStatistics::Loaded(r_lower, r_upper), - ) => { + (Self::Missing, _) | (_, Self::Missing) => Ok(Self::Missing), + (Self::Loaded(s_lower, s_upper), Self::Loaded(r_lower, r_upper)) => { let maybe_greater = s_upper .gt(r_lower) .context(DaftCoreComputeSnafu)? @@ -69,20 +59,15 @@ impl DaftCompare<&ColumnRangeStatistics> for ColumnRangeStatistics { .gt(r_upper) .context(DaftCoreComputeSnafu)? .into_series(); - Ok(ColumnRangeStatistics::Loaded(always_greater, maybe_greater)) + Ok(Self::Loaded(always_greater, maybe_greater)) } } } - fn gte(&self, rhs: &ColumnRangeStatistics) -> Self::Output { + fn gte(&self, rhs: &Self) -> Self::Output { match (self, rhs) { - (ColumnRangeStatistics::Missing, _) | (_, ColumnRangeStatistics::Missing) => { - Ok(ColumnRangeStatistics::Missing) - } - ( - ColumnRangeStatistics::Loaded(s_lower, s_upper), - ColumnRangeStatistics::Loaded(r_lower, r_upper), - ) => { + (Self::Missing, _) | (_, Self::Missing) => Ok(Self::Missing), + (Self::Loaded(s_lower, s_upper), Self::Loaded(r_lower, r_upper)) => { let maybe_gte = s_upper .gte(r_lower) .context(DaftCoreComputeSnafu)? @@ -91,23 +76,18 @@ impl DaftCompare<&ColumnRangeStatistics> for ColumnRangeStatistics { .gte(r_upper) .context(DaftCoreComputeSnafu)? .into_series(); - Ok(ColumnRangeStatistics::Loaded(always_gte, maybe_gte)) + Ok(Self::Loaded(always_gte, maybe_gte)) } } } - fn lt(&self, rhs: &ColumnRangeStatistics) -> Self::Output { + fn lt(&self, rhs: &Self) -> Self::Output { // lower_bound: True less than (self.upper < rhs.lower) // upper_bound: some value that can be less than (self.lower < rhs.upper) match (self, rhs) { - (ColumnRangeStatistics::Missing, _) | (_, ColumnRangeStatistics::Missing) => { - Ok(ColumnRangeStatistics::Missing) - } - ( - ColumnRangeStatistics::Loaded(s_lower, s_upper), - ColumnRangeStatistics::Loaded(r_lower, r_upper), - ) => { + (Self::Missing, _) | (_, Self::Missing) => Ok(Self::Missing), + (Self::Loaded(s_lower, s_upper), Self::Loaded(r_lower, r_upper)) => { let maybe_lt = s_lower .lt(r_upper) .context(DaftCoreComputeSnafu)? @@ -116,20 +96,15 @@ impl DaftCompare<&ColumnRangeStatistics> for ColumnRangeStatistics { .lt(r_lower) .context(DaftCoreComputeSnafu)? .into_series(); - Ok(ColumnRangeStatistics::Loaded(always_lt, maybe_lt)) + Ok(Self::Loaded(always_lt, maybe_lt)) } } } - fn lte(&self, rhs: &ColumnRangeStatistics) -> Self::Output { + fn lte(&self, rhs: &Self) -> Self::Output { match (self, rhs) { - (ColumnRangeStatistics::Missing, _) | (_, ColumnRangeStatistics::Missing) => { - Ok(ColumnRangeStatistics::Missing) - } - ( - ColumnRangeStatistics::Loaded(s_lower, s_upper), - ColumnRangeStatistics::Loaded(r_lower, r_upper), - ) => { + (Self::Missing, _) | (_, Self::Missing) => Ok(Self::Missing), + (Self::Loaded(s_lower, s_upper), Self::Loaded(r_lower, r_upper)) => { let maybe_lte = s_lower .lte(r_upper) .context(DaftCoreComputeSnafu)? @@ -138,7 +113,7 @@ impl DaftCompare<&ColumnRangeStatistics> for ColumnRangeStatistics { .lte(r_lower) .context(DaftCoreComputeSnafu)? .into_series(); - Ok(ColumnRangeStatistics::Loaded(always_lte, maybe_lte)) + Ok(Self::Loaded(always_lte, maybe_lte)) } } } @@ -147,13 +122,8 @@ impl DaftCompare<&ColumnRangeStatistics> for ColumnRangeStatistics { impl ColumnRangeStatistics { pub fn union(&self, rhs: &Self) -> crate::Result { match (self, rhs) { - (ColumnRangeStatistics::Missing, _) | (_, ColumnRangeStatistics::Missing) => { - Ok(ColumnRangeStatistics::Missing) - } - ( - ColumnRangeStatistics::Loaded(s_lower, s_upper), - ColumnRangeStatistics::Loaded(r_lower, r_upper), - ) => { + (Self::Missing, _) | (_, Self::Missing) => Ok(Self::Missing), + (Self::Loaded(s_lower, s_upper), Self::Loaded(r_lower, r_upper)) => { let new_min = s_lower.if_else( r_lower, &(s_lower.lt(r_lower)) @@ -167,7 +137,7 @@ impl ColumnRangeStatistics { .into_series(), ); - Ok(ColumnRangeStatistics::Loaded( + Ok(Self::Loaded( new_min.context(DaftCoreComputeSnafu)?, new_max.context(DaftCoreComputeSnafu)?, )) diff --git a/src/daft-stats/src/column_stats/mod.rs b/src/daft-stats/src/column_stats/mod.rs index e8dc82f2f8..df96daa373 100644 --- a/src/daft-stats/src/column_stats/mod.rs +++ b/src/daft-stats/src/column_stats/mod.rs @@ -42,13 +42,13 @@ impl ColumnRangeStatistics { assert_eq!(l.data_type(), u.data_type(), ""); // If creating on incompatible types, default to `Missing` - if !ColumnRangeStatistics::supports_dtype(l.data_type()) { - return Ok(ColumnRangeStatistics::Missing); + if !Self::supports_dtype(l.data_type()) { + return Ok(Self::Missing); } - Ok(ColumnRangeStatistics::Loaded(l, u)) + Ok(Self::Loaded(l, u)) } - _ => Ok(ColumnRangeStatistics::Missing), + _ => Ok(Self::Missing), } } @@ -148,18 +148,16 @@ impl ColumnRangeStatistics { pub fn cast(&self, dtype: &DataType) -> crate::Result { match self { // `Missing` is casted to `Missing` - ColumnRangeStatistics::Missing => Ok(ColumnRangeStatistics::Missing), + Self::Missing => Ok(Self::Missing), // If the type to cast to matches the current type exactly, short-circuit the logic here. This should be the // most common case (e.g. parsing a Parquet file with the same types as the inferred types) - ColumnRangeStatistics::Loaded(l, r) if l.data_type() == dtype => { - Ok(ColumnRangeStatistics::Loaded(l.clone(), r.clone())) - } + Self::Loaded(l, r) if l.data_type() == dtype => Ok(Self::Loaded(l.clone(), r.clone())), // Only certain types are allowed to be casted in the context of ColumnRangeStatistics // as casting may not correctly preserve ordering of elements. We allow-list some type combinations // but for most combinations, we will default to `ColumnRangeStatistics::Missing`. - ColumnRangeStatistics::Loaded(l, r) => { + Self::Loaded(l, r) => { match (l.data_type(), dtype) { // Int casting to higher bitwidths (DataType::Int8, DataType::Int16) | @@ -187,11 +185,11 @@ impl ColumnRangeStatistics { (DataType::Int64, DataType::Timestamp(..)) | // Binary to Utf8 (DataType::Binary, DataType::Utf8) - => Ok(ColumnRangeStatistics::Loaded( + => Ok(Self::Loaded( l.cast(dtype).context(DaftCoreComputeSnafu)?, r.cast(dtype).context(DaftCoreComputeSnafu)?, )), - _ => Ok(ColumnRangeStatistics::Missing) + _ => Ok(Self::Missing) } } } @@ -240,7 +238,7 @@ pub enum Error { impl From for crate::Error { fn from(value: Error) -> Self { - crate::Error::MissingStatistics { source: value } + Self::MissingStatistics { source: value } } } diff --git a/src/daft-stats/src/lib.rs b/src/daft-stats/src/lib.rs index 3bf362782f..f73a05101a 100644 --- a/src/daft-stats/src/lib.rs +++ b/src/daft-stats/src/lib.rs @@ -40,7 +40,7 @@ impl From for DaftError { fn from(value: Error) -> Self { match value { Error::DaftCoreCompute { source } => source, - _ => DaftError::External(value.into()), + _ => Self::External(value.into()), } } } diff --git a/src/daft-stats/src/table_stats.rs b/src/daft-stats/src/table_stats.rs index 40b2e220c2..0fff747c98 100644 --- a/src/daft-stats/src/table_stats.rs +++ b/src/daft-stats/src/table_stats.rs @@ -31,7 +31,7 @@ impl TableStatistics { let stats = ColumnRangeStatistics::new(Some(col.slice(0, 1)?), Some(col.slice(1, 2)?))?; columns.insert(name, stats); } - Ok(TableStatistics { columns }) + Ok(Self { columns }) } pub fn from_table(table: &Table) -> Self { @@ -41,7 +41,7 @@ impl TableStatistics { let stats = ColumnRangeStatistics::from_series(col); columns.insert(name, stats); } - TableStatistics { columns } + Self { columns } } pub fn union(&self, other: &Self) -> crate::Result { @@ -61,7 +61,7 @@ impl TableStatistics { }?; columns.insert(col.clone(), res_col); } - Ok(TableStatistics { columns }) + Ok(Self { columns }) } pub fn eval_expression_list( @@ -151,7 +151,7 @@ impl TableStatistics { } } - pub fn cast_to_schema(&self, schema: SchemaRef) -> crate::Result { + pub fn cast_to_schema(&self, schema: SchemaRef) -> crate::Result { self.cast_to_schema_with_fill(schema, None) } @@ -159,7 +159,7 @@ impl TableStatistics { &self, schema: SchemaRef, fill_map: Option<&HashMap<&str, ExprRef>>, - ) -> crate::Result { + ) -> crate::Result { let mut columns = IndexMap::new(); for (field_name, field) in schema.fields.iter() { let crs = match self.columns.get(field_name) { @@ -175,7 +175,7 @@ impl TableStatistics { }; columns.insert(field_name.clone(), crs); } - Ok(TableStatistics { columns }) + Ok(Self { columns }) } } diff --git a/src/daft-table/Cargo.toml b/src/daft-table/Cargo.toml index 56931e04d5..5682fa41a2 100644 --- a/src/daft-table/Cargo.toml +++ b/src/daft-table/Cargo.toml @@ -17,6 +17,9 @@ serde = {workspace = true} [features] python = ["dep:pyo3", "common-error/python", "daft-core/python", "daft-dsl/python", "common-arrow-ffi/python", "common-display/python", "daft-image/python"] +[lints] +workspace = true + [package] edition = {workspace = true} name = "daft-table" diff --git a/src/daft-table/src/lib.rs b/src/daft-table/src/lib.rs index 100ca31726..3669fda3f5 100644 --- a/src/daft-table/src/lib.rs +++ b/src/daft-table/src/lib.rs @@ -94,7 +94,7 @@ impl Table { }) .collect(); - Ok(Table::new_unchecked(schema, columns?, num_rows)) + Ok(Self::new_unchecked(schema, columns?, num_rows)) } /// Create a new [`Table`] and validate against `num_rows` @@ -121,7 +121,7 @@ impl Table { } } - Ok(Table::new_unchecked(schema, columns, num_rows)) + Ok(Self::new_unchecked(schema, columns, num_rows)) } /// Create a new [`Table`] without any validations @@ -135,7 +135,7 @@ impl Table { columns: Vec, num_rows: usize, ) -> Self { - Table { + Self { schema: schema.into(), columns, num_rows, @@ -149,7 +149,7 @@ impl Table { let series = Series::empty(field_name, &field.dtype); columns.push(series) } - Ok(Table::new_unchecked(schema, columns, 0)) + Ok(Self::new_unchecked(schema, columns, 0)) } /// Create a Table from a set of columns. @@ -179,7 +179,7 @@ impl Table { } } - Ok(Table::new_unchecked(schema, columns, num_rows)) + Ok(Self::new_unchecked(schema, columns, num_rows)) } pub fn num_columns(&self) -> usize { @@ -202,12 +202,12 @@ impl Table { let new_series: DaftResult> = self.columns.iter().map(|s| s.slice(start, end)).collect(); let new_num_rows = self.len().min(end - start); - Table::new_with_size(self.schema.clone(), new_series?, new_num_rows) + Self::new_with_size(self.schema.clone(), new_series?, new_num_rows) } pub fn head(&self, num: usize) -> DaftResult { if num >= self.len() { - return Ok(Table::new_unchecked( + return Ok(Self::new_unchecked( self.schema.clone(), self.columns.clone(), self.len(), @@ -346,15 +346,15 @@ impl Table { mask.len() - num_filtered }; - Table::new_with_size(self.schema.clone(), new_series?, num_rows) + Self::new_with_size(self.schema.clone(), new_series?, num_rows) } pub fn take(&self, idx: &Series) -> DaftResult { let new_series: DaftResult> = self.columns.iter().map(|s| s.take(idx)).collect(); - Table::new_with_size(self.schema.clone(), new_series?, idx.len()) + Self::new_with_size(self.schema.clone(), new_series?, idx.len()) } - pub fn concat>(tables: &[T]) -> DaftResult { + pub fn concat>(tables: &[T]) -> DaftResult { if tables.is_empty() { return Err(DaftError::ValueError( "Need at least 1 Table to perform concat".to_string(), @@ -384,14 +384,14 @@ impl Table { new_series.push(Series::concat(series_to_cat.as_slice())?); } - Table::new_with_size( + Self::new_with_size( first_table.schema.clone(), new_series, tables.iter().map(|t| t.as_ref().len()).sum(), ) } - pub fn union(&self, other: &Table) -> DaftResult { + pub fn union(&self, other: &Self) -> DaftResult { if self.num_rows != other.num_rows { return Err(DaftError::ValueError(format!( "Cannot union tables of length {} and {}", @@ -625,7 +625,7 @@ impl Table { (true, _) => result_series.iter().map(|s| s.len()).max().unwrap(), }; - Table::new_with_broadcast(new_schema, result_series, num_rows) + Self::new_with_broadcast(new_schema, result_series, num_rows) } pub fn as_physical(&self) -> DaftResult { @@ -635,7 +635,7 @@ impl Table { .map(|s| s.as_physical()) .collect::>>()?; let new_schema = Schema::new(new_series.iter().map(|s| s.field().clone()).collect())?; - Table::new_with_size(new_schema, new_series, self.len()) + Self::new_with_size(new_schema, new_series, self.len()) } pub fn cast_to_schema(&self, schema: &Schema) -> DaftResult { @@ -781,8 +781,8 @@ impl Display for Table { } } -impl AsRef for Table { - fn as_ref(&self) -> &Table { +impl AsRef for Table { + fn as_ref(&self) -> &Self { self } } diff --git a/src/daft-table/src/ops/agg.rs b/src/daft-table/src/ops/agg.rs index 33bbf635b6..70abdf69f4 100644 --- a/src/daft-table/src/ops/agg.rs +++ b/src/daft-table/src/ops/agg.rs @@ -5,7 +5,7 @@ use daft_dsl::{functions::FunctionExpr, AggExpr, Expr, ExprRef}; use crate::Table; impl Table { - pub fn agg(&self, to_agg: &[ExprRef], group_by: &[ExprRef]) -> DaftResult
{ + pub fn agg(&self, to_agg: &[ExprRef], group_by: &[ExprRef]) -> DaftResult { // Dispatch depending on whether we're doing groupby or just a global agg. match group_by.len() { 0 => self.agg_global(to_agg), @@ -13,11 +13,11 @@ impl Table { } } - pub fn agg_global(&self, to_agg: &[ExprRef]) -> DaftResult
{ + pub fn agg_global(&self, to_agg: &[ExprRef]) -> DaftResult { self.eval_expression_list(to_agg) } - pub fn agg_groupby(&self, to_agg: &[ExprRef], group_by: &[ExprRef]) -> DaftResult
{ + pub fn agg_groupby(&self, to_agg: &[ExprRef], group_by: &[ExprRef]) -> DaftResult { let agg_exprs = to_agg .iter() .map(|e| match e.as_ref() { @@ -68,7 +68,7 @@ impl Table { func: &FunctionExpr, inputs: &[ExprRef], group_by: &[ExprRef], - ) -> DaftResult
{ + ) -> DaftResult { use daft_core::array::ops::IntoGroups; use daft_dsl::functions::python::PythonUDF; @@ -100,7 +100,7 @@ impl Table { // Take fast path short circuit if there is only 1 group let (groupkeys_table, grouped_col) = if groupvals_indices.is_empty() { - let empty_groupkeys_table = Table::empty(Some(groupby_table.schema.clone()))?; + let empty_groupkeys_table = Self::empty(Some(groupby_table.schema.clone()))?; let empty_udf_output_col = Series::empty( evaluated_inputs .first() @@ -151,7 +151,7 @@ impl Table { .collect::>>()?; // Combine the broadcasted group keys into a Table - Table::from_nonempty_columns(broadcasted_groupkeys)? + Self::from_nonempty_columns(broadcasted_groupkeys)? }; Ok((broadcasted_groupkeys_table, evaluated_grouped_col)) @@ -162,7 +162,7 @@ impl Table { let concatenated_grouped_col = Series::concat(series_refs.as_slice())?; let table_refs = grouped_results.iter().map(|(t, _)| t).collect::>(); - let concatenated_groupkeys_table = Table::concat(table_refs.as_slice())?; + let concatenated_groupkeys_table = Self::concat(table_refs.as_slice())?; (concatenated_groupkeys_table, concatenated_grouped_col) }; diff --git a/src/daft-table/src/ops/joins/mod.rs b/src/daft-table/src/ops/joins/mod.rs index 2d5e80dae5..5bb4f77d4b 100644 --- a/src/daft-table/src/ops/joins/mod.rs +++ b/src/daft-table/src/ops/joins/mod.rs @@ -178,6 +178,6 @@ impl Table { let num_rows = lidx.len(); join_series = add_non_join_key_columns(self, right, lidx, ridx, join_series)?; - Table::new_with_size(join_schema, join_series, num_rows) + Self::new_with_size(join_schema, join_series, num_rows) } } diff --git a/src/daft-table/src/ops/pivot.rs b/src/daft-table/src/ops/pivot.rs index a0b07d20cd..4418d4365e 100644 --- a/src/daft-table/src/ops/pivot.rs +++ b/src/daft-table/src/ops/pivot.rs @@ -73,7 +73,7 @@ impl Table { pivot_col: ExprRef, values_col: ExprRef, names: Vec, - ) -> DaftResult
{ + ) -> DaftResult { // This function pivots the table based on the given group_by, pivot, and values column. // // At a high level this function does two things: diff --git a/src/daft-table/src/ops/sort.rs b/src/daft-table/src/ops/sort.rs index de082b1970..20e56c3b0b 100644 --- a/src/daft-table/src/ops/sort.rs +++ b/src/daft-table/src/ops/sort.rs @@ -5,7 +5,7 @@ use daft_dsl::ExprRef; use crate::Table; impl Table { - pub fn sort(&self, sort_keys: &[ExprRef], descending: &[bool]) -> DaftResult
{ + pub fn sort(&self, sort_keys: &[ExprRef], descending: &[bool]) -> DaftResult { let argsort = self.argsort(sort_keys, descending)?; self.take(&argsort) } diff --git a/src/daft-table/src/ops/unpivot.rs b/src/daft-table/src/ops/unpivot.rs index 37fba415fc..5b43777417 100644 --- a/src/daft-table/src/ops/unpivot.rs +++ b/src/daft-table/src/ops/unpivot.rs @@ -54,6 +54,6 @@ impl Table { ])?)?; let unpivot_series = [ids_series, vec![variable_series, value_series]].concat(); - Table::new_with_size(unpivot_schema, unpivot_series, unpivoted_len) + Self::new_with_size(unpivot_schema, unpivot_series, unpivoted_len) } } diff --git a/src/daft-table/src/python.rs b/src/daft-table/src/python.rs index 728383d23c..3bacbcf019 100644 --- a/src/daft-table/src/python.rs +++ b/src/daft-table/src/python.rs @@ -261,7 +261,7 @@ impl PyTable { .partition_by_hash(exprs.as_slice(), num_partitions as usize)? .into_iter() .map(|t| t.into()) - .collect::>()) + .collect::>()) }) } @@ -288,7 +288,7 @@ impl PyTable { .partition_by_random(num_partitions as usize, seed as u64)? .into_iter() .map(|t| t.into()) - .collect::>()) + .collect::>()) }) } @@ -306,7 +306,7 @@ impl PyTable { .partition_by_range(exprs.as_slice(), &boundaries.table, descending.as_slice())? .into_iter() .map(|t| t.into()) - .collect::>()) + .collect::>()) }) } @@ -318,10 +318,7 @@ impl PyTable { let exprs: Vec = partition_keys.into_iter().map(|e| e.into()).collect(); py.allow_threads(|| { let (tables, values) = self.table.partition_by_value(exprs.as_slice())?; - let pytables = tables - .into_iter() - .map(|t| t.into()) - .collect::>(); + let pytables = tables.into_iter().map(|t| t.into()).collect::>(); let values = values.into(); Ok((pytables, values)) }) @@ -407,7 +404,7 @@ impl PyTable { ) -> PyResult { let table = ffi::record_batches_to_table(py, record_batches.as_slice(), schema.schema.clone())?; - Ok(PyTable { table }) + Ok(Self { table }) } #[staticmethod] @@ -438,7 +435,7 @@ impl PyTable { } } - Ok(PyTable { + Ok(Self { table: Table::new_with_broadcast(Schema::new(fields)?, columns, num_rows)?, }) } @@ -462,7 +459,7 @@ impl PyTable { impl From
for PyTable { fn from(value: Table) -> Self { - PyTable { table: value } + Self { table: value } } } diff --git a/src/hyperloglog/Cargo.toml b/src/hyperloglog/Cargo.toml index fa430673f0..fa299abdea 100644 --- a/src/hyperloglog/Cargo.toml +++ b/src/hyperloglog/Cargo.toml @@ -1,5 +1,8 @@ [dependencies] +[lints] +workspace = true + [package] name = "hyperloglog" edition.workspace = true diff --git a/src/parquet2/Cargo.toml b/src/parquet2/Cargo.toml index 6d437e1578..b62be487f9 100644 --- a/src/parquet2/Cargo.toml +++ b/src/parquet2/Cargo.toml @@ -33,6 +33,9 @@ snappy = ["snap"] bench = false name = "parquet2" +[lints] +workspace = true + [package] authors = [ "Jorge C. Leitao for Error { - fn from(e: parquet_format_safe::thrift::Error) -> Error { + fn from(e: parquet_format_safe::thrift::Error) -> Self { match e { parquet_format_safe::thrift::Error::Transport(msg) => { - Error::Transport(format!("io error occurred when decoding thrift: {}", msg)) + Self::Transport(format!("io error occurred when decoding thrift: {}", msg)) } - _ => Error::OutOfSpec(format!("Invalid thrift: {}", e)), + _ => Self::OutOfSpec(format!("Invalid thrift: {}", e)), } } } diff --git a/src/parquet2/src/metadata/column_order.rs b/src/parquet2/src/metadata/column_order.rs index 1e8a258830..ea530f316c 100644 --- a/src/parquet2/src/metadata/column_order.rs +++ b/src/parquet2/src/metadata/column_order.rs @@ -20,8 +20,8 @@ impl ColumnOrder { /// Returns sort order associated with this column order. pub fn sort_order(&self) -> SortOrder { match *self { - ColumnOrder::TypeDefinedOrder(order) => order, - ColumnOrder::Undefined => SortOrder::Signed, + Self::TypeDefinedOrder(order) => order, + Self::Undefined => SortOrder::Signed, } } } diff --git a/src/parquet2/src/metadata/file_metadata.rs b/src/parquet2/src/metadata/file_metadata.rs index 0e2e110bd6..56dedcf020 100644 --- a/src/parquet2/src/metadata/file_metadata.rs +++ b/src/parquet2/src/metadata/file_metadata.rs @@ -143,7 +143,7 @@ impl FileMetaData { .column_orders .map(|orders| parse_column_orders(&orders, &schema_descr)); - Ok(FileMetaData { + Ok(Self { version: metadata.version, num_rows: metadata.num_rows.try_into()?, created_by: metadata.created_by, diff --git a/src/parquet2/src/metadata/row_metadata.rs b/src/parquet2/src/metadata/row_metadata.rs index 22c1c0e26d..d44e415689 100644 --- a/src/parquet2/src/metadata/row_metadata.rs +++ b/src/parquet2/src/metadata/row_metadata.rs @@ -21,7 +21,7 @@ impl RowGroupMetaData { columns: Vec, num_rows: usize, total_byte_size: usize, - ) -> RowGroupMetaData { + ) -> Self { Self { columns, num_rows, @@ -56,7 +56,7 @@ impl RowGroupMetaData { pub(crate) fn try_from_thrift( schema_descr: &SchemaDescriptor, rg: RowGroup, - ) -> Result { + ) -> Result { if schema_descr.columns().len() != rg.columns.len() { return Err(Error::oos(format!("The number of columns in the row group ({}) must be equal to the number of columns in the schema ({})", rg.columns.len(), schema_descr.columns().len()))); } @@ -71,7 +71,7 @@ impl RowGroupMetaData { }) .collect::>>()?; - Ok(RowGroupMetaData { + Ok(Self { columns, num_rows, total_byte_size, diff --git a/src/parquet2/src/page/mod.rs b/src/parquet2/src/page/mod.rs index 03b5425e47..2b1e4fc49b 100644 --- a/src/parquet2/src/page/mod.rs +++ b/src/parquet2/src/page/mod.rs @@ -125,8 +125,8 @@ pub enum DataPageHeader { impl DataPageHeader { pub fn num_values(&self) -> usize { match &self { - DataPageHeader::V1(d) => d.num_values as usize, - DataPageHeader::V2(d) => d.num_values as usize, + Self::V1(d) => d.num_values as usize, + Self::V2(d) => d.num_values as usize, } } } @@ -262,36 +262,36 @@ pub enum CompressedPage { impl CompressedPage { pub(crate) fn buffer(&mut self) -> &mut Vec { match self { - CompressedPage::Data(page) => &mut page.buffer, - CompressedPage::Dict(page) => &mut page.buffer, + Self::Data(page) => &mut page.buffer, + Self::Dict(page) => &mut page.buffer, } } pub(crate) fn compression(&self) -> Compression { match self { - CompressedPage::Data(page) => page.compression(), - CompressedPage::Dict(page) => page.compression(), + Self::Data(page) => page.compression(), + Self::Dict(page) => page.compression(), } } pub(crate) fn num_values(&self) -> usize { match self { - CompressedPage::Data(page) => page.num_values(), - CompressedPage::Dict(_) => 0, + Self::Data(page) => page.num_values(), + Self::Dict(_) => 0, } } pub(crate) fn selected_rows(&self) -> Option<&[Interval]> { match self { - CompressedPage::Data(page) => page.selected_rows(), - CompressedPage::Dict(_) => None, + Self::Data(page) => page.selected_rows(), + Self::Dict(_) => None, } } pub(crate) fn uncompressed_size(&self) -> usize { match self { - CompressedPage::Data(page) => page.uncompressed_page_size, - CompressedPage::Dict(page) => page.uncompressed_page_size, + Self::Data(page) => page.uncompressed_page_size, + Self::Dict(page) => page.uncompressed_page_size, } } } diff --git a/src/parquet2/src/parquet_bridge.rs b/src/parquet2/src/parquet_bridge.rs index 0ccb1f3544..428bfa6dc5 100644 --- a/src/parquet2/src/parquet_bridge.rs +++ b/src/parquet2/src/parquet_bridge.rs @@ -27,9 +27,9 @@ impl TryFrom for Repetition { fn try_from(repetition: FieldRepetitionType) -> Result { Ok(match repetition { - FieldRepetitionType::REQUIRED => Repetition::Required, - FieldRepetitionType::OPTIONAL => Repetition::Optional, - FieldRepetitionType::REPEATED => Repetition::Repeated, + FieldRepetitionType::REQUIRED => Self::Required, + FieldRepetitionType::OPTIONAL => Self::Optional, + FieldRepetitionType::REPEATED => Self::Repeated, _ => return Err(Error::oos("Thrift out of range")), }) } @@ -38,9 +38,9 @@ impl TryFrom for Repetition { impl From for FieldRepetitionType { fn from(repetition: Repetition) -> Self { match repetition { - Repetition::Required => FieldRepetitionType::REQUIRED, - Repetition::Optional => FieldRepetitionType::OPTIONAL, - Repetition::Repeated => FieldRepetitionType::REPEATED, + Repetition::Required => Self::REQUIRED, + Repetition::Optional => Self::OPTIONAL, + Repetition::Repeated => Self::REPEATED, } } } @@ -62,14 +62,14 @@ impl TryFrom for Compression { fn try_from(codec: CompressionCodec) -> Result { Ok(match codec { - CompressionCodec::UNCOMPRESSED => Compression::Uncompressed, - CompressionCodec::SNAPPY => Compression::Snappy, - CompressionCodec::GZIP => Compression::Gzip, - CompressionCodec::LZO => Compression::Lzo, - CompressionCodec::BROTLI => Compression::Brotli, - CompressionCodec::LZ4 => Compression::Lz4, - CompressionCodec::ZSTD => Compression::Zstd, - CompressionCodec::LZ4_RAW => Compression::Lz4Raw, + CompressionCodec::UNCOMPRESSED => Self::Uncompressed, + CompressionCodec::SNAPPY => Self::Snappy, + CompressionCodec::GZIP => Self::Gzip, + CompressionCodec::LZO => Self::Lzo, + CompressionCodec::BROTLI => Self::Brotli, + CompressionCodec::LZ4 => Self::Lz4, + CompressionCodec::ZSTD => Self::Zstd, + CompressionCodec::LZ4_RAW => Self::Lz4Raw, _ => return Err(Error::oos("Thrift out of range")), }) } @@ -78,14 +78,14 @@ impl TryFrom for Compression { impl From for CompressionCodec { fn from(codec: Compression) -> Self { match codec { - Compression::Uncompressed => CompressionCodec::UNCOMPRESSED, - Compression::Snappy => CompressionCodec::SNAPPY, - Compression::Gzip => CompressionCodec::GZIP, - Compression::Lzo => CompressionCodec::LZO, - Compression::Brotli => CompressionCodec::BROTLI, - Compression::Lz4 => CompressionCodec::LZ4, - Compression::Zstd => CompressionCodec::ZSTD, - Compression::Lz4Raw => CompressionCodec::LZ4_RAW, + Compression::Uncompressed => Self::UNCOMPRESSED, + Compression::Snappy => Self::SNAPPY, + Compression::Gzip => Self::GZIP, + Compression::Lzo => Self::LZO, + Compression::Brotli => Self::BROTLI, + Compression::Lz4 => Self::LZ4, + Compression::Zstd => Self::ZSTD, + Compression::Lz4Raw => Self::LZ4_RAW, } } } @@ -108,14 +108,14 @@ pub enum CompressionOptions { impl From for Compression { fn from(value: CompressionOptions) -> Self { match value { - CompressionOptions::Uncompressed => Compression::Uncompressed, - CompressionOptions::Snappy => Compression::Snappy, - CompressionOptions::Gzip(_) => Compression::Gzip, - CompressionOptions::Lzo => Compression::Lzo, - CompressionOptions::Brotli(_) => Compression::Brotli, - CompressionOptions::Lz4 => Compression::Lz4, - CompressionOptions::Zstd(_) => Compression::Zstd, - CompressionOptions::Lz4Raw => Compression::Lz4Raw, + CompressionOptions::Uncompressed => Self::Uncompressed, + CompressionOptions::Snappy => Self::Snappy, + CompressionOptions::Gzip(_) => Self::Gzip, + CompressionOptions::Lzo => Self::Lzo, + CompressionOptions::Brotli(_) => Self::Brotli, + CompressionOptions::Lz4 => Self::Lz4, + CompressionOptions::Zstd(_) => Self::Zstd, + CompressionOptions::Lz4Raw => Self::Lz4Raw, } } } @@ -123,14 +123,14 @@ impl From for Compression { impl From for CompressionCodec { fn from(codec: CompressionOptions) -> Self { match codec { - CompressionOptions::Uncompressed => CompressionCodec::UNCOMPRESSED, - CompressionOptions::Snappy => CompressionCodec::SNAPPY, - CompressionOptions::Gzip(_) => CompressionCodec::GZIP, - CompressionOptions::Lzo => CompressionCodec::LZO, - CompressionOptions::Brotli(_) => CompressionCodec::BROTLI, - CompressionOptions::Lz4 => CompressionCodec::LZ4, - CompressionOptions::Zstd(_) => CompressionCodec::ZSTD, - CompressionOptions::Lz4Raw => CompressionCodec::LZ4_RAW, + CompressionOptions::Uncompressed => Self::UNCOMPRESSED, + CompressionOptions::Snappy => Self::SNAPPY, + CompressionOptions::Gzip(_) => Self::GZIP, + CompressionOptions::Lzo => Self::LZO, + CompressionOptions::Brotli(_) => Self::BROTLI, + CompressionOptions::Lz4 => Self::LZ4, + CompressionOptions::Zstd(_) => Self::ZSTD, + CompressionOptions::Lz4Raw => Self::LZ4_RAW, } } } @@ -266,9 +266,9 @@ impl TryFrom for PageType { fn try_from(type_: ParquetPageType) -> Result { Ok(match type_ { - ParquetPageType::DATA_PAGE => PageType::DataPage, - ParquetPageType::DATA_PAGE_V2 => PageType::DataPageV2, - ParquetPageType::DICTIONARY_PAGE => PageType::DictionaryPage, + ParquetPageType::DATA_PAGE => Self::DataPage, + ParquetPageType::DATA_PAGE_V2 => Self::DataPageV2, + ParquetPageType::DICTIONARY_PAGE => Self::DictionaryPage, _ => return Err(Error::oos("Thrift out of range")), }) } @@ -277,9 +277,9 @@ impl TryFrom for PageType { impl From for ParquetPageType { fn from(type_: PageType) -> Self { match type_ { - PageType::DataPage => ParquetPageType::DATA_PAGE, - PageType::DataPageV2 => ParquetPageType::DATA_PAGE_V2, - PageType::DictionaryPage => ParquetPageType::DICTIONARY_PAGE, + PageType::DataPage => Self::DATA_PAGE, + PageType::DataPageV2 => Self::DATA_PAGE_V2, + PageType::DictionaryPage => Self::DICTIONARY_PAGE, } } } @@ -331,15 +331,15 @@ impl TryFrom for Encoding { fn try_from(encoding: ParquetEncoding) -> Result { Ok(match encoding { - ParquetEncoding::PLAIN => Encoding::Plain, - ParquetEncoding::PLAIN_DICTIONARY => Encoding::PlainDictionary, - ParquetEncoding::RLE => Encoding::Rle, - ParquetEncoding::BIT_PACKED => Encoding::BitPacked, - ParquetEncoding::DELTA_BINARY_PACKED => Encoding::DeltaBinaryPacked, - ParquetEncoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DeltaLengthByteArray, - ParquetEncoding::DELTA_BYTE_ARRAY => Encoding::DeltaByteArray, - ParquetEncoding::RLE_DICTIONARY => Encoding::RleDictionary, - ParquetEncoding::BYTE_STREAM_SPLIT => Encoding::ByteStreamSplit, + ParquetEncoding::PLAIN => Self::Plain, + ParquetEncoding::PLAIN_DICTIONARY => Self::PlainDictionary, + ParquetEncoding::RLE => Self::Rle, + ParquetEncoding::BIT_PACKED => Self::BitPacked, + ParquetEncoding::DELTA_BINARY_PACKED => Self::DeltaBinaryPacked, + ParquetEncoding::DELTA_LENGTH_BYTE_ARRAY => Self::DeltaLengthByteArray, + ParquetEncoding::DELTA_BYTE_ARRAY => Self::DeltaByteArray, + ParquetEncoding::RLE_DICTIONARY => Self::RleDictionary, + ParquetEncoding::BYTE_STREAM_SPLIT => Self::ByteStreamSplit, _ => return Err(Error::oos("Thrift out of range")), }) } @@ -348,15 +348,15 @@ impl TryFrom for Encoding { impl From for ParquetEncoding { fn from(encoding: Encoding) -> Self { match encoding { - Encoding::Plain => ParquetEncoding::PLAIN, - Encoding::PlainDictionary => ParquetEncoding::PLAIN_DICTIONARY, - Encoding::Rle => ParquetEncoding::RLE, - Encoding::BitPacked => ParquetEncoding::BIT_PACKED, - Encoding::DeltaBinaryPacked => ParquetEncoding::DELTA_BINARY_PACKED, - Encoding::DeltaLengthByteArray => ParquetEncoding::DELTA_LENGTH_BYTE_ARRAY, - Encoding::DeltaByteArray => ParquetEncoding::DELTA_BYTE_ARRAY, - Encoding::RleDictionary => ParquetEncoding::RLE_DICTIONARY, - Encoding::ByteStreamSplit => ParquetEncoding::BYTE_STREAM_SPLIT, + Encoding::Plain => Self::PLAIN, + Encoding::PlainDictionary => Self::PLAIN_DICTIONARY, + Encoding::Rle => Self::RLE, + Encoding::BitPacked => Self::BIT_PACKED, + Encoding::DeltaBinaryPacked => Self::DELTA_BINARY_PACKED, + Encoding::DeltaLengthByteArray => Self::DELTA_LENGTH_BYTE_ARRAY, + Encoding::DeltaByteArray => Self::DELTA_BYTE_ARRAY, + Encoding::RleDictionary => Self::RLE_DICTIONARY, + Encoding::ByteStreamSplit => Self::BYTE_STREAM_SPLIT, } } } @@ -381,9 +381,9 @@ impl TryFrom for BoundaryOrder { fn try_from(encoding: ParquetBoundaryOrder) -> Result { Ok(match encoding { - ParquetBoundaryOrder::UNORDERED => BoundaryOrder::Unordered, - ParquetBoundaryOrder::ASCENDING => BoundaryOrder::Ascending, - ParquetBoundaryOrder::DESCENDING => BoundaryOrder::Descending, + ParquetBoundaryOrder::UNORDERED => Self::Unordered, + ParquetBoundaryOrder::ASCENDING => Self::Ascending, + ParquetBoundaryOrder::DESCENDING => Self::Descending, _ => return Err(Error::oos("BoundaryOrder Thrift value out of range")), }) } @@ -392,9 +392,9 @@ impl TryFrom for BoundaryOrder { impl From for ParquetBoundaryOrder { fn from(encoding: BoundaryOrder) -> Self { match encoding { - BoundaryOrder::Unordered => ParquetBoundaryOrder::UNORDERED, - BoundaryOrder::Ascending => ParquetBoundaryOrder::ASCENDING, - BoundaryOrder::Descending => ParquetBoundaryOrder::DESCENDING, + BoundaryOrder::Unordered => Self::UNORDERED, + BoundaryOrder::Ascending => Self::ASCENDING, + BoundaryOrder::Descending => Self::DESCENDING, } } } @@ -443,9 +443,9 @@ pub enum TimeUnit { impl From for TimeUnit { fn from(encoding: ParquetTimeUnit) -> Self { match encoding { - ParquetTimeUnit::MILLIS(_) => TimeUnit::Milliseconds, - ParquetTimeUnit::MICROS(_) => TimeUnit::Microseconds, - ParquetTimeUnit::NANOS(_) => TimeUnit::Nanoseconds, + ParquetTimeUnit::MILLIS(_) => Self::Milliseconds, + ParquetTimeUnit::MICROS(_) => Self::Microseconds, + ParquetTimeUnit::NANOS(_) => Self::Nanoseconds, } } } @@ -453,9 +453,9 @@ impl From for TimeUnit { impl From for ParquetTimeUnit { fn from(unit: TimeUnit) -> Self { match unit { - TimeUnit::Milliseconds => ParquetTimeUnit::MILLIS(Default::default()), - TimeUnit::Microseconds => ParquetTimeUnit::MICROS(Default::default()), - TimeUnit::Nanoseconds => ParquetTimeUnit::NANOS(Default::default()), + TimeUnit::Milliseconds => Self::MILLIS(Default::default()), + TimeUnit::Microseconds => Self::MICROS(Default::default()), + TimeUnit::Nanoseconds => Self::NANOS(Default::default()), } } } @@ -503,8 +503,8 @@ pub enum GroupLogicalType { impl From for ParquetLogicalType { fn from(type_: GroupLogicalType) -> Self { match type_ { - GroupLogicalType::Map => ParquetLogicalType::MAP(Default::default()), - GroupLogicalType::List => ParquetLogicalType::LIST(Default::default()), + GroupLogicalType::Map => Self::MAP(Default::default()), + GroupLogicalType::List => Self::LIST(Default::default()), } } } @@ -512,17 +512,17 @@ impl From for ParquetLogicalType { impl From<(i32, bool)> for IntegerType { fn from((bit_width, is_signed): (i32, bool)) -> Self { match (bit_width, is_signed) { - (8, true) => IntegerType::Int8, - (16, true) => IntegerType::Int16, - (32, true) => IntegerType::Int32, - (64, true) => IntegerType::Int64, - (8, false) => IntegerType::UInt8, - (16, false) => IntegerType::UInt16, - (32, false) => IntegerType::UInt32, - (64, false) => IntegerType::UInt64, + (8, true) => Self::Int8, + (16, true) => Self::Int16, + (32, true) => Self::Int32, + (64, true) => Self::Int64, + (8, false) => Self::UInt8, + (16, false) => Self::UInt16, + (32, false) => Self::UInt32, + (64, false) => Self::UInt64, // The above are the only possible annotations for parquet's int32. Anything else // is a deviation to the parquet specification and we ignore - _ => IntegerType::Int32, + _ => Self::Int32, } } } @@ -547,28 +547,28 @@ impl TryFrom for PrimitiveLogicalType { fn try_from(type_: ParquetLogicalType) -> Result { Ok(match type_ { - ParquetLogicalType::STRING(_) => PrimitiveLogicalType::String, - ParquetLogicalType::ENUM(_) => PrimitiveLogicalType::Enum, - ParquetLogicalType::DECIMAL(decimal) => PrimitiveLogicalType::Decimal( + ParquetLogicalType::STRING(_) => Self::String, + ParquetLogicalType::ENUM(_) => Self::Enum, + ParquetLogicalType::DECIMAL(decimal) => Self::Decimal( decimal.precision.try_into()?, decimal.scale.try_into()?, ), - ParquetLogicalType::DATE(_) => PrimitiveLogicalType::Date, - ParquetLogicalType::TIME(time) => PrimitiveLogicalType::Time { + ParquetLogicalType::DATE(_) => Self::Date, + ParquetLogicalType::TIME(time) => Self::Time { unit: time.unit.into(), is_adjusted_to_utc: time.is_adjusted_to_u_t_c, }, - ParquetLogicalType::TIMESTAMP(time) => PrimitiveLogicalType::Timestamp { + ParquetLogicalType::TIMESTAMP(time) => Self::Timestamp { unit: time.unit.into(), is_adjusted_to_utc: time.is_adjusted_to_u_t_c, }, ParquetLogicalType::INTEGER(int) => { - PrimitiveLogicalType::Integer((int.bit_width as i32, int.is_signed).into()) + Self::Integer((int.bit_width as i32, int.is_signed).into()) } - ParquetLogicalType::UNKNOWN(_) => PrimitiveLogicalType::Unknown, - ParquetLogicalType::JSON(_) => PrimitiveLogicalType::Json, - ParquetLogicalType::BSON(_) => PrimitiveLogicalType::Bson, - ParquetLogicalType::UUID(_) => PrimitiveLogicalType::Uuid, + ParquetLogicalType::UNKNOWN(_) => Self::Unknown, + ParquetLogicalType::JSON(_) => Self::Json, + ParquetLogicalType::BSON(_) => Self::Bson, + ParquetLogicalType::UUID(_) => Self::Uuid, _ => return Err(Error::oos("LogicalType value out of range")), }) } @@ -579,8 +579,8 @@ impl TryFrom for GroupLogicalType { fn try_from(type_: ParquetLogicalType) -> Result { Ok(match type_ { - ParquetLogicalType::LIST(_) => GroupLogicalType::List, - ParquetLogicalType::MAP(_) => GroupLogicalType::Map, + ParquetLogicalType::LIST(_) => Self::List, + ParquetLogicalType::MAP(_) => Self::Map, _ => return Err(Error::oos("LogicalType value out of range")), }) } @@ -589,40 +589,40 @@ impl TryFrom for GroupLogicalType { impl From for ParquetLogicalType { fn from(type_: PrimitiveLogicalType) -> Self { match type_ { - PrimitiveLogicalType::String => ParquetLogicalType::STRING(Default::default()), - PrimitiveLogicalType::Enum => ParquetLogicalType::ENUM(Default::default()), + PrimitiveLogicalType::String => Self::STRING(Default::default()), + PrimitiveLogicalType::Enum => Self::ENUM(Default::default()), PrimitiveLogicalType::Decimal(precision, scale) => { - ParquetLogicalType::DECIMAL(DecimalType { + Self::DECIMAL(DecimalType { precision: precision as i32, scale: scale as i32, }) } - PrimitiveLogicalType::Date => ParquetLogicalType::DATE(Default::default()), + PrimitiveLogicalType::Date => Self::DATE(Default::default()), PrimitiveLogicalType::Time { unit, is_adjusted_to_utc, - } => ParquetLogicalType::TIME(TimeType { + } => Self::TIME(TimeType { unit: unit.into(), is_adjusted_to_u_t_c: is_adjusted_to_utc, }), PrimitiveLogicalType::Timestamp { unit, is_adjusted_to_utc, - } => ParquetLogicalType::TIMESTAMP(TimestampType { + } => Self::TIMESTAMP(TimestampType { unit: unit.into(), is_adjusted_to_u_t_c: is_adjusted_to_utc, }), PrimitiveLogicalType::Integer(integer) => { let (bit_width, is_signed) = integer.into(); - ParquetLogicalType::INTEGER(IntType { + Self::INTEGER(IntType { bit_width: bit_width as i8, is_signed, }) } - PrimitiveLogicalType::Unknown => ParquetLogicalType::UNKNOWN(Default::default()), - PrimitiveLogicalType::Json => ParquetLogicalType::JSON(Default::default()), - PrimitiveLogicalType::Bson => ParquetLogicalType::BSON(Default::default()), - PrimitiveLogicalType::Uuid => ParquetLogicalType::UUID(Default::default()), + PrimitiveLogicalType::Unknown => Self::UNKNOWN(Default::default()), + PrimitiveLogicalType::Json => Self::JSON(Default::default()), + PrimitiveLogicalType::Bson => Self::BSON(Default::default()), + PrimitiveLogicalType::Uuid => Self::UUID(Default::default()), } } } diff --git a/src/parquet2/src/schema/io_thrift/from_thrift.rs b/src/parquet2/src/schema/io_thrift/from_thrift.rs index 68add24fd6..222401eff6 100644 --- a/src/parquet2/src/schema/io_thrift/from_thrift.rs +++ b/src/parquet2/src/schema/io_thrift/from_thrift.rs @@ -9,7 +9,7 @@ use super::super::types::ParquetType; impl ParquetType { /// Method to convert from Thrift. - pub fn try_from_thrift(elements: &[SchemaElement]) -> Result { + pub fn try_from_thrift(elements: &[SchemaElement]) -> Result { let mut index = 0; let mut schema_nodes = Vec::new(); while index < elements.len() { diff --git a/src/parquet2/src/schema/types/converted_type.rs b/src/parquet2/src/schema/types/converted_type.rs index b7db8847b7..51b9eb3136 100644 --- a/src/parquet2/src/schema/types/converted_type.rs +++ b/src/parquet2/src/schema/types/converted_type.rs @@ -149,9 +149,9 @@ impl TryFrom for GroupConvertedType { fn try_from(type_: ConvertedType) -> Result { Ok(match type_ { - ConvertedType::LIST => GroupConvertedType::List, - ConvertedType::MAP => GroupConvertedType::Map, - ConvertedType::MAP_KEY_VALUE => GroupConvertedType::MapKeyValue, + ConvertedType::LIST => Self::List, + ConvertedType::MAP => Self::Map, + ConvertedType::MAP_KEY_VALUE => Self::MapKeyValue, _ => return Err(Error::oos("LogicalType value out of range")), }) } @@ -160,9 +160,9 @@ impl TryFrom for GroupConvertedType { impl From for ConvertedType { fn from(type_: GroupConvertedType) -> Self { match type_ { - GroupConvertedType::Map => ConvertedType::MAP, - GroupConvertedType::List => ConvertedType::LIST, - GroupConvertedType::MapKeyValue => ConvertedType::MAP_KEY_VALUE, + GroupConvertedType::Map => Self::MAP, + GroupConvertedType::List => Self::LIST, + GroupConvertedType::MapKeyValue => Self::MAP_KEY_VALUE, } } } diff --git a/src/parquet2/src/schema/types/parquet_type.rs b/src/parquet2/src/schema/types/parquet_type.rs index c0fc739b44..32b45c7df1 100644 --- a/src/parquet2/src/schema/types/parquet_type.rs +++ b/src/parquet2/src/schema/types/parquet_type.rs @@ -70,7 +70,7 @@ impl ParquetType { /// Checks if `sub_type` schema is part of current schema. /// This method can be used to check if projected columns are part of the root schema. - pub fn check_contains(&self, sub_type: &ParquetType) -> bool { + pub fn check_contains(&self, sub_type: &Self) -> bool { let basic_match = self.get_field_info() == sub_type.get_field_info(); match (self, sub_type) { @@ -112,13 +112,13 @@ impl ParquetType { /// Constructors impl ParquetType { - pub(crate) fn new_root(name: String, fields: Vec) -> Self { + pub(crate) fn new_root(name: String, fields: Vec) -> Self { let field_info = FieldInfo { name, repetition: Repetition::Optional, id: None, }; - ParquetType::GroupType { + Self::GroupType { field_info, fields, logical_type: None, @@ -128,7 +128,7 @@ impl ParquetType { pub fn from_converted( name: String, - fields: Vec, + fields: Vec, repetition: Repetition, converted_type: Option, id: Option, @@ -139,7 +139,7 @@ impl ParquetType { id, }; - ParquetType::GroupType { + Self::GroupType { field_info, fields, converted_type, @@ -166,7 +166,7 @@ impl ParquetType { id, }; - Ok(ParquetType::PrimitiveType(PrimitiveType { + Ok(Self::PrimitiveType(PrimitiveType { field_info, converted_type, logical_type, @@ -177,7 +177,7 @@ impl ParquetType { /// Helper method to create a [`ParquetType::PrimitiveType`] optional field /// with no logical or converted types. pub fn from_physical(name: String, physical_type: PhysicalType) -> Self { - ParquetType::PrimitiveType(PrimitiveType::from_physical(name, physical_type)) + Self::PrimitiveType(PrimitiveType::from_physical(name, physical_type)) } pub fn from_group( @@ -185,7 +185,7 @@ impl ParquetType { repetition: Repetition, converted_type: Option, logical_type: Option, - fields: Vec, + fields: Vec, id: Option, ) -> Self { let field_info = FieldInfo { @@ -194,7 +194,7 @@ impl ParquetType { id, }; - ParquetType::GroupType { + Self::GroupType { field_info, logical_type, converted_type, diff --git a/src/parquet2/src/schema/types/physical_type.rs b/src/parquet2/src/schema/types/physical_type.rs index b114f51dad..985265eab4 100644 --- a/src/parquet2/src/schema/types/physical_type.rs +++ b/src/parquet2/src/schema/types/physical_type.rs @@ -22,17 +22,17 @@ impl TryFrom<(Type, Option)> for PhysicalType { fn try_from((type_, length): (Type, Option)) -> Result { Ok(match type_ { - Type::BOOLEAN => PhysicalType::Boolean, - Type::INT32 => PhysicalType::Int32, - Type::INT64 => PhysicalType::Int64, - Type::INT96 => PhysicalType::Int96, - Type::FLOAT => PhysicalType::Float, - Type::DOUBLE => PhysicalType::Double, - Type::BYTE_ARRAY => PhysicalType::ByteArray, + Type::BOOLEAN => Self::Boolean, + Type::INT32 => Self::Int32, + Type::INT64 => Self::Int64, + Type::INT96 => Self::Int96, + Type::FLOAT => Self::Float, + Type::DOUBLE => Self::Double, + Type::BYTE_ARRAY => Self::ByteArray, Type::FIXED_LEN_BYTE_ARRAY => { let length = length .ok_or_else(|| Error::oos("Length must be defined for FixedLenByteArray"))?; - PhysicalType::FixedLenByteArray(length.try_into()?) + Self::FixedLenByteArray(length.try_into()?) } _ => return Err(Error::oos("Unknown type")), }) diff --git a/src/parquet2/src/write/row_group.rs b/src/parquet2/src/write/row_group.rs index a39b1bc842..ff0396f8fc 100644 --- a/src/parquet2/src/write/row_group.rs +++ b/src/parquet2/src/write/row_group.rs @@ -26,8 +26,8 @@ pub struct ColumnOffsetsMetadata { } impl ColumnOffsetsMetadata { - pub fn from_column_chunk(column_chunk: &ColumnChunk) -> ColumnOffsetsMetadata { - ColumnOffsetsMetadata { + pub fn from_column_chunk(column_chunk: &ColumnChunk) -> Self { + Self { dictionary_page_offset: column_chunk .meta_data .as_ref() @@ -42,8 +42,8 @@ impl ColumnOffsetsMetadata { pub fn from_column_chunk_metadata( column_chunk_metadata: &ColumnChunkMetaData, - ) -> ColumnOffsetsMetadata { - ColumnOffsetsMetadata { + ) -> Self { + Self { dictionary_page_offset: column_chunk_metadata.dictionary_page_offset(), data_page_offset: Some(column_chunk_metadata.data_page_offset()), } From 33c6b8759b2255f2a7d6ecf0132af560544e9caa Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Wed, 25 Sep 2024 11:28:58 -0700 Subject: [PATCH 2/3] reset arrow2, parquet to main --- src/arrow2/Cargo.toml | 3 - src/arrow2/src/array/binary/mod.rs | 14 +- src/arrow2/src/array/binary/mutable.rs | 2 +- src/arrow2/src/array/binary/mutable_values.rs | 4 +- src/arrow2/src/array/boolean/mod.rs | 6 +- src/arrow2/src/array/boolean/mutable.rs | 6 +- src/arrow2/src/array/dictionary/ffi.rs | 2 +- src/arrow2/src/array/dictionary/mutable.rs | 2 +- src/arrow2/src/array/equal/mod.rs | 24 +- .../src/array/fixed_size_binary/mutable.rs | 2 +- .../src/array/fixed_size_list/mutable.rs | 2 +- src/arrow2/src/array/growable/binary.rs | 2 +- src/arrow2/src/array/growable/boolean.rs | 2 +- src/arrow2/src/array/growable/fixed_binary.rs | 2 +- src/arrow2/src/array/growable/null.rs | 2 +- src/arrow2/src/array/growable/primitive.rs | 2 +- src/arrow2/src/array/growable/structure.rs | 2 +- src/arrow2/src/array/growable/union.rs | 2 +- src/arrow2/src/array/indexable.rs | 2 +- src/arrow2/src/array/list/mutable.rs | 4 +- src/arrow2/src/array/primitive/mod.rs | 8 +- src/arrow2/src/array/primitive/mutable.rs | 2 +- src/arrow2/src/array/struct_/mutable.rs | 2 +- src/arrow2/src/array/utf8/mod.rs | 16 +- src/arrow2/src/array/utf8/mutable.rs | 2 +- src/arrow2/src/array/utf8/mutable_values.rs | 4 +- src/arrow2/src/bitmap/immutable.rs | 8 +- src/arrow2/src/bitmap/mutable.rs | 4 +- src/arrow2/src/bitmap/utils/zip_validity.rs | 4 +- src/arrow2/src/buffer/immutable.rs | 2 +- src/arrow2/src/buffer/mod.rs | 2 +- .../src/compute/arithmetics/basic/add.rs | 20 +- .../src/compute/arithmetics/basic/div.rs | 8 +- .../src/compute/arithmetics/basic/mul.rs | 20 +- .../src/compute/arithmetics/basic/rem.rs | 8 +- .../src/compute/arithmetics/basic/sub.rs | 20 +- .../src/compute/arithmetics/decimal/add.rs | 12 +- .../src/compute/arithmetics/decimal/div.rs | 8 +- .../src/compute/arithmetics/decimal/mul.rs | 12 +- .../src/compute/arithmetics/decimal/sub.rs | 12 +- src/arrow2/src/datatypes/field.rs | 2 +- src/arrow2/src/datatypes/mod.rs | 48 ++-- src/arrow2/src/datatypes/schema.rs | 2 +- src/arrow2/src/ffi/array.rs | 4 +- src/arrow2/src/ffi/schema.rs | 2 +- src/arrow2/src/io/csv/mod.rs | 4 +- src/arrow2/src/io/csv/read_async/mod.rs | 2 +- src/arrow2/src/io/csv/write/serialize.rs | 2 +- src/arrow2/src/io/flight/mod.rs | 2 +- src/arrow2/src/io/ipc/append/mod.rs | 4 +- src/arrow2/src/io/ipc/read/error.rs | 4 +- src/arrow2/src/io/ipc/read/stream.rs | 2 +- src/arrow2/src/io/json/read/deserialize.rs | 12 +- src/arrow2/src/io/json/read/mod.rs | 2 +- src/arrow2/src/io/json/write/utf8.rs | 18 +- src/arrow2/src/io/json_integration/mod.rs | 2 +- src/arrow2/src/io/orc/mod.rs | 2 +- src/arrow2/src/io/parquet/mod.rs | 8 +- .../src/io/parquet/read/deserialize/utils.rs | 4 +- src/arrow2/src/io/parquet/read/schema/mod.rs | 2 +- src/arrow2/src/io/parquet/write/pages.rs | 8 +- src/arrow2/src/types/native.rs | 20 +- src/parquet2/Cargo.toml | 3 - src/parquet2/src/encoding/bitpacked/mod.rs | 10 +- src/parquet2/src/error.rs | 6 +- src/parquet2/src/metadata/column_order.rs | 4 +- src/parquet2/src/metadata/file_metadata.rs | 2 +- src/parquet2/src/metadata/row_metadata.rs | 6 +- src/parquet2/src/page/mod.rs | 24 +- src/parquet2/src/parquet_bridge.rs | 218 +++++++++--------- .../src/schema/io_thrift/from_thrift.rs | 2 +- .../src/schema/types/converted_type.rs | 12 +- src/parquet2/src/schema/types/parquet_type.rs | 18 +- .../src/schema/types/physical_type.rs | 16 +- src/parquet2/src/write/row_group.rs | 8 +- 75 files changed, 370 insertions(+), 376 deletions(-) diff --git a/src/arrow2/Cargo.toml b/src/arrow2/Cargo.toml index ddee4f616e..0664947831 100644 --- a/src/arrow2/Cargo.toml +++ b/src/arrow2/Cargo.toml @@ -232,9 +232,6 @@ simd = [] bench = false name = "arrow2" -[lints] -workspace = true - [package] authors = [ "Jorge C. Leitao ", diff --git a/src/arrow2/src/array/binary/mod.rs b/src/arrow2/src/array/binary/mod.rs index 3a89456e96..0dbe71704d 100644 --- a/src/arrow2/src/array/binary/mod.rs +++ b/src/arrow2/src/array/binary/mod.rs @@ -237,26 +237,26 @@ impl BinaryArray { if let Some(bitmap) = self.validity { match bitmap.into_mut() { // Safety: invariants are preserved - Left(bitmap) => Left(Self::new( + Left(bitmap) => Left(BinaryArray::new( self.data_type, self.offsets, self.values, Some(bitmap), )), Right(mutable_bitmap) => match (self.values.into_mut(), self.offsets.into_mut()) { - (Left(values), Left(offsets)) => Left(Self::new( + (Left(values), Left(offsets)) => Left(BinaryArray::new( self.data_type, offsets, values, Some(mutable_bitmap.into()), )), - (Left(values), Right(offsets)) => Left(Self::new( + (Left(values), Right(offsets)) => Left(BinaryArray::new( self.data_type, offsets.into(), values, Some(mutable_bitmap.into()), )), - (Right(values), Left(offsets)) => Left(Self::new( + (Right(values), Left(offsets)) => Left(BinaryArray::new( self.data_type, offsets, values.into(), @@ -276,15 +276,15 @@ impl BinaryArray { } else { match (self.values.into_mut(), self.offsets.into_mut()) { (Left(values), Left(offsets)) => { - Left(Self::new(self.data_type, offsets, values, None)) + Left(BinaryArray::new(self.data_type, offsets, values, None)) } - (Left(values), Right(offsets)) => Left(Self::new( + (Left(values), Right(offsets)) => Left(BinaryArray::new( self.data_type, offsets.into(), values, None, )), - (Right(values), Left(offsets)) => Left(Self::new( + (Right(values), Left(offsets)) => Left(BinaryArray::new( self.data_type, offsets, values.into(), diff --git a/src/arrow2/src/array/binary/mutable.rs b/src/arrow2/src/array/binary/mutable.rs index 7068fc84ef..13aed8704f 100644 --- a/src/arrow2/src/array/binary/mutable.rs +++ b/src/arrow2/src/array/binary/mutable.rs @@ -31,7 +31,7 @@ impl From> for BinaryArray { let validity: Option = x.into(); validity }); - let array: Self = other.values.into(); + let array: BinaryArray = other.values.into(); array.with_validity(validity) } } diff --git a/src/arrow2/src/array/binary/mutable_values.rs b/src/arrow2/src/array/binary/mutable_values.rs index 557171b4d3..ad6b09f368 100644 --- a/src/arrow2/src/array/binary/mutable_values.rs +++ b/src/arrow2/src/array/binary/mutable_values.rs @@ -26,7 +26,7 @@ pub struct MutableBinaryValuesArray { impl From> for BinaryArray { fn from(other: MutableBinaryValuesArray) -> Self { - Self::new( + BinaryArray::::new( other.data_type, other.offsets.into(), other.values.into(), @@ -37,7 +37,7 @@ impl From> for BinaryArray { impl From> for MutableBinaryArray { fn from(other: MutableBinaryValuesArray) -> Self { - Self::try_new(other.data_type, other.offsets, other.values, None) + MutableBinaryArray::::try_new(other.data_type, other.offsets, other.values, None) .expect("MutableBinaryValuesArray is consistent with MutableBinaryArray") } } diff --git a/src/arrow2/src/array/boolean/mod.rs b/src/arrow2/src/array/boolean/mod.rs index 4ad296d4d9..b817dab283 100644 --- a/src/arrow2/src/array/boolean/mod.rs +++ b/src/arrow2/src/array/boolean/mod.rs @@ -237,9 +237,9 @@ impl BooleanArray { if let Some(bitmap) = self.validity { match bitmap.into_mut() { - Left(bitmap) => Left(Self::new(self.data_type, self.values, Some(bitmap))), + Left(bitmap) => Left(BooleanArray::new(self.data_type, self.values, Some(bitmap))), Right(mutable_bitmap) => match self.values.into_mut() { - Left(immutable) => Left(Self::new( + Left(immutable) => Left(BooleanArray::new( self.data_type, immutable, Some(mutable_bitmap.into()), @@ -252,7 +252,7 @@ impl BooleanArray { } } else { match self.values.into_mut() { - Left(immutable) => Left(Self::new(self.data_type, immutable, None)), + Left(immutable) => Left(BooleanArray::new(self.data_type, immutable, None)), Right(mutable) => { Right(MutableBooleanArray::try_new(self.data_type, mutable, None).unwrap()) } diff --git a/src/arrow2/src/array/boolean/mutable.rs b/src/arrow2/src/array/boolean/mutable.rs index b2162523dd..213db18e92 100644 --- a/src/arrow2/src/array/boolean/mutable.rs +++ b/src/arrow2/src/array/boolean/mutable.rs @@ -26,7 +26,7 @@ pub struct MutableBooleanArray { impl From for BooleanArray { fn from(other: MutableBooleanArray) -> Self { - Self::new( + BooleanArray::new( other.data_type, other.values.into(), other.validity.map(|x| x.into()), @@ -267,7 +267,7 @@ impl MutableBooleanArray { ) -> Self { let mut mutable = MutableBitmap::new(); mutable.extend_from_trusted_len_iter_unchecked(iterator); - Self::try_new(DataType::Boolean, mutable, None).unwrap() + MutableBooleanArray::try_new(DataType::Boolean, mutable, None).unwrap() } /// Creates a new [`MutableBooleanArray`] from a slice of `bool`. @@ -474,7 +474,7 @@ impl>> FromIterator for MutableBoolea None }; - Self::try_new(DataType::Boolean, values, validity).unwrap() + MutableBooleanArray::try_new(DataType::Boolean, values, validity).unwrap() } } diff --git a/src/arrow2/src/array/dictionary/ffi.rs b/src/arrow2/src/array/dictionary/ffi.rs index af17fafc21..fb84665276 100644 --- a/src/arrow2/src/array/dictionary/ffi.rs +++ b/src/arrow2/src/array/dictionary/ffi.rs @@ -39,6 +39,6 @@ impl FromFfi for DictionaryArray let values = ffi::try_from(values)?; // the assumption of this trait - Self::try_new_unchecked(data_type, keys, values) + DictionaryArray::::try_new_unchecked(data_type, keys, values) } } diff --git a/src/arrow2/src/array/dictionary/mutable.rs b/src/arrow2/src/array/dictionary/mutable.rs index 2e02f958ef..b48a57a945 100644 --- a/src/arrow2/src/array/dictionary/mutable.rs +++ b/src/arrow2/src/array/dictionary/mutable.rs @@ -39,7 +39,7 @@ impl From> for D fn from(other: MutableDictionaryArray) -> Self { // Safety - the invariant of this struct ensures that this is up-held unsafe { - Self::try_new_unchecked( + DictionaryArray::::try_new_unchecked( other.data_type, other.keys.into(), other.map.into_values().as_box(), diff --git a/src/arrow2/src/array/equal/mod.rs b/src/arrow2/src/array/equal/mod.rs index 4e0a70664e..2bb3ba77f1 100644 --- a/src/arrow2/src/array/equal/mod.rs +++ b/src/arrow2/src/array/equal/mod.rs @@ -34,7 +34,7 @@ impl PartialEq for Box { } } -impl PartialEq for NullArray { +impl PartialEq for NullArray { fn eq(&self, other: &Self) -> bool { null::equal(self, other) } @@ -58,13 +58,13 @@ impl PartialEq> for &dyn Array { } } -impl PartialEq for PrimitiveArray { +impl PartialEq> for PrimitiveArray { fn eq(&self, other: &Self) -> bool { primitive::equal::(self, other) } } -impl PartialEq for BooleanArray { +impl PartialEq for BooleanArray { fn eq(&self, other: &Self) -> bool { equal(self, other) } @@ -76,7 +76,7 @@ impl PartialEq<&dyn Array> for BooleanArray { } } -impl PartialEq for Utf8Array { +impl PartialEq> for Utf8Array { fn eq(&self, other: &Self) -> bool { utf8::equal(self, other) } @@ -94,7 +94,7 @@ impl PartialEq> for &dyn Array { } } -impl PartialEq for BinaryArray { +impl PartialEq> for BinaryArray { fn eq(&self, other: &Self) -> bool { binary::equal(self, other) } @@ -112,7 +112,7 @@ impl PartialEq> for &dyn Array { } } -impl PartialEq for FixedSizeBinaryArray { +impl PartialEq for FixedSizeBinaryArray { fn eq(&self, other: &Self) -> bool { fixed_size_binary::equal(self, other) } @@ -124,7 +124,7 @@ impl PartialEq<&dyn Array> for FixedSizeBinaryArray { } } -impl PartialEq for ListArray { +impl PartialEq> for ListArray { fn eq(&self, other: &Self) -> bool { list::equal(self, other) } @@ -136,7 +136,7 @@ impl PartialEq<&dyn Array> for ListArray { } } -impl PartialEq for FixedSizeListArray { +impl PartialEq for FixedSizeListArray { fn eq(&self, other: &Self) -> bool { fixed_size_list::equal(self, other) } @@ -148,7 +148,7 @@ impl PartialEq<&dyn Array> for FixedSizeListArray { } } -impl PartialEq for StructArray { +impl PartialEq for StructArray { fn eq(&self, other: &Self) -> bool { struct_::equal(self, other) } @@ -160,7 +160,7 @@ impl PartialEq<&dyn Array> for StructArray { } } -impl PartialEq for DictionaryArray { +impl PartialEq> for DictionaryArray { fn eq(&self, other: &Self) -> bool { dictionary::equal(self, other) } @@ -172,7 +172,7 @@ impl PartialEq<&dyn Array> for DictionaryArray { } } -impl PartialEq for UnionArray { +impl PartialEq for UnionArray { fn eq(&self, other: &Self) -> bool { union::equal(self, other) } @@ -184,7 +184,7 @@ impl PartialEq<&dyn Array> for UnionArray { } } -impl PartialEq for MapArray { +impl PartialEq for MapArray { fn eq(&self, other: &Self) -> bool { map::equal(self, other) } diff --git a/src/arrow2/src/array/fixed_size_binary/mutable.rs b/src/arrow2/src/array/fixed_size_binary/mutable.rs index e0a73611be..9009f2702d 100644 --- a/src/arrow2/src/array/fixed_size_binary/mutable.rs +++ b/src/arrow2/src/array/fixed_size_binary/mutable.rs @@ -23,7 +23,7 @@ pub struct MutableFixedSizeBinaryArray { impl From for FixedSizeBinaryArray { fn from(other: MutableFixedSizeBinaryArray) -> Self { - Self::new( + FixedSizeBinaryArray::new( other.data_type, other.values.into(), other.validity.map(|x| x.into()), diff --git a/src/arrow2/src/array/fixed_size_list/mutable.rs b/src/arrow2/src/array/fixed_size_list/mutable.rs index 718c3222e2..1e387a2f70 100644 --- a/src/arrow2/src/array/fixed_size_list/mutable.rs +++ b/src/arrow2/src/array/fixed_size_list/mutable.rs @@ -24,7 +24,7 @@ pub struct MutableFixedSizeListArray { impl From> for FixedSizeListArray { fn from(mut other: MutableFixedSizeListArray) -> Self { - Self::new( + FixedSizeListArray::new( other.data_type, other.values.as_box(), other.validity.map(|x| x.into()), diff --git a/src/arrow2/src/array/growable/binary.rs b/src/arrow2/src/array/growable/binary.rs index ca2f9d0156..53ff0ae4fe 100644 --- a/src/arrow2/src/array/growable/binary.rs +++ b/src/arrow2/src/array/growable/binary.rs @@ -97,7 +97,7 @@ impl<'a, O: Offset> Growable<'a> for GrowableBinary<'a, O> { impl<'a, O: Offset> From> for BinaryArray { fn from(val: GrowableBinary<'a, O>) -> Self { - Self::new( + BinaryArray::::new( val.data_type, val.offsets.into(), val.values.into(), diff --git a/src/arrow2/src/array/growable/boolean.rs b/src/arrow2/src/array/growable/boolean.rs index 1f9a193d1d..0cb1213403 100644 --- a/src/arrow2/src/array/growable/boolean.rs +++ b/src/arrow2/src/array/growable/boolean.rs @@ -91,6 +91,6 @@ impl<'a> Growable<'a> for GrowableBoolean<'a> { impl<'a> From> for BooleanArray { fn from(val: GrowableBoolean<'a>) -> Self { - Self::new(val.data_type, val.values.into(), val.validity.into()) + BooleanArray::new(val.data_type, val.values.into(), val.validity.into()) } } diff --git a/src/arrow2/src/array/growable/fixed_binary.rs b/src/arrow2/src/array/growable/fixed_binary.rs index b51125612f..763bd59c81 100644 --- a/src/arrow2/src/array/growable/fixed_binary.rs +++ b/src/arrow2/src/array/growable/fixed_binary.rs @@ -94,7 +94,7 @@ impl<'a> Growable<'a> for GrowableFixedSizeBinary<'a> { impl<'a> From> for FixedSizeBinaryArray { fn from(val: GrowableFixedSizeBinary<'a>) -> Self { - Self::new( + FixedSizeBinaryArray::new( val.arrays[0].data_type().clone(), val.values.into(), val.validity.into(), diff --git a/src/arrow2/src/array/growable/null.rs b/src/arrow2/src/array/growable/null.rs index 20a9d308b4..ac97c47828 100644 --- a/src/arrow2/src/array/growable/null.rs +++ b/src/arrow2/src/array/growable/null.rs @@ -54,6 +54,6 @@ impl<'a> Growable<'a> for GrowableNull { impl From for NullArray { fn from(val: GrowableNull) -> Self { - Self::new(val.data_type, val.length) + NullArray::new(val.data_type, val.length) } } diff --git a/src/arrow2/src/array/growable/primitive.rs b/src/arrow2/src/array/growable/primitive.rs index 441c6d0642..e443756cb9 100644 --- a/src/arrow2/src/array/growable/primitive.rs +++ b/src/arrow2/src/array/growable/primitive.rs @@ -101,6 +101,6 @@ impl<'a, T: NativeType> Growable<'a> for GrowablePrimitive<'a, T> { impl<'a, T: NativeType> From> for PrimitiveArray { #[inline] fn from(val: GrowablePrimitive<'a, T>) -> Self { - Self::new(val.data_type, val.values.into(), val.validity.into()) + PrimitiveArray::::new(val.data_type, val.values.into(), val.validity.into()) } } diff --git a/src/arrow2/src/array/growable/structure.rs b/src/arrow2/src/array/growable/structure.rs index 3e4e5f27df..ddf5899422 100644 --- a/src/arrow2/src/array/growable/structure.rs +++ b/src/arrow2/src/array/growable/structure.rs @@ -129,7 +129,7 @@ impl<'a> From> for StructArray { fn from(val: GrowableStruct<'a>) -> Self { let values = val.values.into_iter().map(|mut x| x.as_box()).collect(); - Self::new( + StructArray::new( val.arrays[0].data_type().clone(), values, val.validity.into(), diff --git a/src/arrow2/src/array/growable/union.rs b/src/arrow2/src/array/growable/union.rs index 2b93936833..cccde2ee96 100644 --- a/src/arrow2/src/array/growable/union.rs +++ b/src/arrow2/src/array/growable/union.rs @@ -111,7 +111,7 @@ impl<'a> From> for UnionArray { fn from(val: GrowableUnion<'a>) -> Self { let fields = val.fields.into_iter().map(|mut x| x.as_box()).collect(); - Self::new( + UnionArray::new( val.arrays[0].data_type().clone(), val.types.into(), fields, diff --git a/src/arrow2/src/array/indexable.rs b/src/arrow2/src/array/indexable.rs index 7ef7b28155..76001bfcf5 100644 --- a/src/arrow2/src/array/indexable.rs +++ b/src/arrow2/src/array/indexable.rs @@ -49,7 +49,7 @@ impl Indexable for MutableBooleanArray { impl AsIndexed for bool { #[inline] - fn as_indexed(&self) -> &Self { + fn as_indexed(&self) -> &bool { self } } diff --git a/src/arrow2/src/array/list/mutable.rs b/src/arrow2/src/array/list/mutable.rs index 1f4003f25c..d24475e86d 100644 --- a/src/arrow2/src/array/list/mutable.rs +++ b/src/arrow2/src/array/list/mutable.rs @@ -54,7 +54,7 @@ impl Default for MutableListArray { impl From> for ListArray { fn from(mut other: MutableListArray) -> Self { - Self::new( + ListArray::new( other.data_type, other.offsets.into(), other.values.as_box(), @@ -266,7 +266,7 @@ impl MutableListArray { impl MutableArray for MutableListArray { fn len(&self) -> usize { - Self::len(self) + MutableListArray::len(self) } fn validity(&self) -> Option<&MutableBitmap> { diff --git a/src/arrow2/src/array/primitive/mod.rs b/src/arrow2/src/array/primitive/mod.rs index a9dbd2bc02..15058d1130 100644 --- a/src/arrow2/src/array/primitive/mod.rs +++ b/src/arrow2/src/array/primitive/mod.rs @@ -330,7 +330,7 @@ impl PrimitiveArray { if let Some(bitmap) = self.validity { match bitmap.into_mut() { - Left(bitmap) => Left(Self::new( + Left(bitmap) => Left(PrimitiveArray::new( self.data_type, self.values, Some(bitmap), @@ -344,7 +344,7 @@ impl PrimitiveArray { ) .unwrap(), ), - Left(values) => Left(Self::new( + Left(values) => Left(PrimitiveArray::new( self.data_type, values, Some(mutable_bitmap.into()), @@ -356,7 +356,7 @@ impl PrimitiveArray { Right(values) => { Right(MutablePrimitiveArray::try_new(self.data_type, values, None).unwrap()) } - Left(values) => Left(Self::new(self.data_type, values, None)), + Left(values) => Left(PrimitiveArray::new(self.data_type, values, None)), } } } @@ -509,6 +509,6 @@ pub type UInt64Vec = MutablePrimitiveArray; impl Default for PrimitiveArray { fn default() -> Self { - Self::new(T::PRIMITIVE.into(), Default::default(), None) + PrimitiveArray::new(T::PRIMITIVE.into(), Default::default(), None) } } diff --git a/src/arrow2/src/array/primitive/mutable.rs b/src/arrow2/src/array/primitive/mutable.rs index 7548abc3cc..1d320ebf0c 100644 --- a/src/arrow2/src/array/primitive/mutable.rs +++ b/src/arrow2/src/array/primitive/mutable.rs @@ -34,7 +34,7 @@ impl From> for PrimitiveArray { } }); - Self::new(other.data_type, other.values.into(), validity) + PrimitiveArray::::new(other.data_type, other.values.into(), validity) } } diff --git a/src/arrow2/src/array/struct_/mutable.rs b/src/arrow2/src/array/struct_/mutable.rs index e8ab673b40..4754474fb5 100644 --- a/src/arrow2/src/array/struct_/mutable.rs +++ b/src/arrow2/src/array/struct_/mutable.rs @@ -79,7 +79,7 @@ impl From for StructArray { None }; - Self::new( + StructArray::new( other.data_type, other.values.into_iter().map(|mut v| v.as_box()).collect(), validity, diff --git a/src/arrow2/src/array/utf8/mod.rs b/src/arrow2/src/array/utf8/mod.rs index 69a060c80a..6a64505a7b 100644 --- a/src/arrow2/src/array/utf8/mod.rs +++ b/src/arrow2/src/array/utf8/mod.rs @@ -259,7 +259,7 @@ impl Utf8Array { match bitmap.into_mut() { // Safety: invariants are preserved Left(bitmap) => Left(unsafe { - Self::new_unchecked( + Utf8Array::new_unchecked( self.data_type, self.offsets, self.values, @@ -270,7 +270,7 @@ impl Utf8Array { (Left(values), Left(offsets)) => { // Safety: invariants are preserved Left(unsafe { - Self::new_unchecked( + Utf8Array::new_unchecked( self.data_type, offsets, values, @@ -281,7 +281,7 @@ impl Utf8Array { (Left(values), Right(offsets)) => { // Safety: invariants are preserved Left(unsafe { - Self::new_unchecked( + Utf8Array::new_unchecked( self.data_type, offsets.into(), values, @@ -292,7 +292,7 @@ impl Utf8Array { (Right(values), Left(offsets)) => { // Safety: invariants are preserved Left(unsafe { - Self::new_unchecked( + Utf8Array::new_unchecked( self.data_type, offsets, values.into(), @@ -313,13 +313,13 @@ impl Utf8Array { } else { match (self.values.into_mut(), self.offsets.into_mut()) { (Left(values), Left(offsets)) => { - Left(unsafe { Self::new_unchecked(self.data_type, offsets, values, None) }) + Left(unsafe { Utf8Array::new_unchecked(self.data_type, offsets, values, None) }) } (Left(values), Right(offsets)) => Left(unsafe { - Self::new_unchecked(self.data_type, offsets.into(), values, None) + Utf8Array::new_unchecked(self.data_type, offsets.into(), values, None) }), (Right(values), Left(offsets)) => Left(unsafe { - Self::new_unchecked(self.data_type, offsets, values.into(), None) + Utf8Array::new_unchecked(self.data_type, offsets, values.into(), None) }), (Right(values), Right(offsets)) => Right(unsafe { MutableUtf8Array::new_unchecked(self.data_type, offsets, values, None) @@ -546,6 +546,6 @@ impl Default for Utf8Array { } else { DataType::Utf8 }; - Self::new(data_type, Default::default(), Default::default(), None) + Utf8Array::new(data_type, Default::default(), Default::default(), None) } } diff --git a/src/arrow2/src/array/utf8/mutable.rs b/src/arrow2/src/array/utf8/mutable.rs index bad87fcd61..355dff2ae1 100644 --- a/src/arrow2/src/array/utf8/mutable.rs +++ b/src/arrow2/src/array/utf8/mutable.rs @@ -29,7 +29,7 @@ impl From> for Utf8Array { let validity: Option = x.into(); validity }); - let array: Self = other.values.into(); + let array: Utf8Array = other.values.into(); array.with_validity(validity) } } diff --git a/src/arrow2/src/array/utf8/mutable_values.rs b/src/arrow2/src/array/utf8/mutable_values.rs index 0b4d3bcdbf..aad32a7fd0 100644 --- a/src/arrow2/src/array/utf8/mutable_values.rs +++ b/src/arrow2/src/array/utf8/mutable_values.rs @@ -30,7 +30,7 @@ impl From> for Utf8Array { // `MutableUtf8ValuesArray` has the same invariants as `Utf8Array` and thus // `Utf8Array` can be safely created from `MutableUtf8ValuesArray` without checks. unsafe { - Self::new_unchecked( + Utf8Array::::new_unchecked( other.data_type, other.offsets.into(), other.values.into(), @@ -45,7 +45,7 @@ impl From> for MutableUtf8Array { // Safety: // `MutableUtf8ValuesArray` has the same invariants as `MutableUtf8Array` unsafe { - Self::new_unchecked(other.data_type, other.offsets, other.values, None) + MutableUtf8Array::::new_unchecked(other.data_type, other.offsets, other.values, None) } } } diff --git a/src/arrow2/src/bitmap/immutable.rs b/src/arrow2/src/bitmap/immutable.rs index 498d9bdc43..b7e0069a15 100644 --- a/src/arrow2/src/bitmap/immutable.rs +++ b/src/arrow2/src/bitmap/immutable.rs @@ -298,7 +298,7 @@ impl Bitmap { // don't use `MutableBitmap::from_len_zeroed().into()` // it triggers a bitcount let bytes = vec![0; length.saturating_add(7) / 8]; - unsafe { Self::from_inner_unchecked(Arc::new(bytes.into()), 0, length, length) } + unsafe { Bitmap::from_inner_unchecked(Arc::new(bytes.into()), 0, length, length) } } /// Initializes an new [`Bitmap`] filled with set values. @@ -307,7 +307,7 @@ impl Bitmap { // just set each byte to u8::MAX // we will not access data with index >= length let bytes = vec![0b11111111u8; length.saturating_add(7) / 8]; - unsafe { Self::from_inner_unchecked(Arc::new(bytes.into()), 0, length, 0) } + unsafe { Bitmap::from_inner_unchecked(Arc::new(bytes.into()), 0, length, 0) } } /// Counts the nulls (unset bits) starting from `offset` bits and for `length` bits. @@ -321,7 +321,7 @@ impl Bitmap { /// Panics iff `length <= bytes.len() * 8` #[inline] pub fn from_u8_slice>(slice: T, length: usize) -> Self { - Self::try_new(slice.as_ref().to_vec(), length).unwrap() + Bitmap::try_new(slice.as_ref().to_vec(), length).unwrap() } /// Alias for `Bitmap::try_new().unwrap()` @@ -330,7 +330,7 @@ impl Bitmap { /// This function panics iff `length <= bytes.len() * 8` #[inline] pub fn from_u8_vec(vec: Vec, length: usize) -> Self { - Self::try_new(vec, length).unwrap() + Bitmap::try_new(vec, length).unwrap() } /// Returns whether the bit at position `i` is set. diff --git a/src/arrow2/src/bitmap/mutable.rs b/src/arrow2/src/bitmap/mutable.rs index 8f0fd30d1b..cb77decd84 100644 --- a/src/arrow2/src/bitmap/mutable.rs +++ b/src/arrow2/src/bitmap/mutable.rs @@ -330,7 +330,7 @@ impl MutableBitmap { impl From for Bitmap { #[inline] fn from(buffer: MutableBitmap) -> Self { - Self::try_new(buffer.buffer, buffer.length).unwrap() + Bitmap::try_new(buffer.buffer, buffer.length).unwrap() } } @@ -359,7 +359,7 @@ impl From for Option { impl> From

for MutableBitmap { #[inline] fn from(slice: P) -> Self { - Self::from_trusted_len_iter(slice.as_ref().iter().copied()) + MutableBitmap::from_trusted_len_iter(slice.as_ref().iter().copied()) } } diff --git a/src/arrow2/src/bitmap/utils/zip_validity.rs b/src/arrow2/src/bitmap/utils/zip_validity.rs index dc85757d23..abcccfa489 100644 --- a/src/arrow2/src/bitmap/utils/zip_validity.rs +++ b/src/arrow2/src/bitmap/utils/zip_validity.rs @@ -202,7 +202,7 @@ where /// Unwrap into an iterator that has no null values. pub fn unwrap_required(self) -> I { match self { - Self::Required(i) => i, + ZipValidity::Required(i) => i, _ => panic!("Could not 'unwrap_required'. 'ZipValidity' iterator has nulls."), } } @@ -210,7 +210,7 @@ where /// Unwrap into an iterator that has null values. pub fn unwrap_optional(self) -> ZipValidityIter { match self { - Self::Optional(i) => i, + ZipValidity::Optional(i) => i, _ => panic!("Could not 'unwrap_optional'. 'ZipValidity' iterator has no nulls."), } } diff --git a/src/arrow2/src/buffer/immutable.rs b/src/arrow2/src/buffer/immutable.rs index d318a9d76e..2a2a0d312a 100644 --- a/src/arrow2/src/buffer/immutable.rs +++ b/src/arrow2/src/buffer/immutable.rs @@ -77,7 +77,7 @@ impl Buffer { /// Auxiliary method to create a new Buffer pub(crate) fn from_bytes(bytes: Bytes) -> Self { let length = bytes.len(); - Self { + Buffer { data: Arc::new(bytes), offset: 0, length, diff --git a/src/arrow2/src/buffer/mod.rs b/src/arrow2/src/buffer/mod.rs index 7b54c6395d..9d93a9df24 100644 --- a/src/arrow2/src/buffer/mod.rs +++ b/src/arrow2/src/buffer/mod.rs @@ -51,7 +51,7 @@ impl From> for Bytes { #[inline] fn from(data: Vec) -> Self { let inner: BytesInner = data.into(); - Self(inner) + Bytes(inner) } } diff --git a/src/arrow2/src/compute/arithmetics/basic/add.rs b/src/arrow2/src/compute/arithmetics/basic/add.rs index 31635b125e..c7a80f2c70 100644 --- a/src/arrow2/src/compute/arithmetics/basic/add.rs +++ b/src/arrow2/src/compute/arithmetics/basic/add.rs @@ -138,50 +138,50 @@ where } // Implementation of ArrayAdd trait for PrimitiveArrays -impl ArrayAdd for PrimitiveArray +impl ArrayAdd> for PrimitiveArray where T: NativeArithmetics + Add, { - fn add(&self, rhs: &Self) -> Self { + fn add(&self, rhs: &PrimitiveArray) -> Self { add(self, rhs) } } -impl ArrayWrappingAdd for PrimitiveArray +impl ArrayWrappingAdd> for PrimitiveArray where T: NativeArithmetics + WrappingAdd, { - fn wrapping_add(&self, rhs: &Self) -> Self { + fn wrapping_add(&self, rhs: &PrimitiveArray) -> Self { wrapping_add(self, rhs) } } // Implementation of ArrayCheckedAdd trait for PrimitiveArrays -impl ArrayCheckedAdd for PrimitiveArray +impl ArrayCheckedAdd> for PrimitiveArray where T: NativeArithmetics + CheckedAdd, { - fn checked_add(&self, rhs: &Self) -> Self { + fn checked_add(&self, rhs: &PrimitiveArray) -> Self { checked_add(self, rhs) } } // Implementation of ArraySaturatingAdd trait for PrimitiveArrays -impl ArraySaturatingAdd for PrimitiveArray +impl ArraySaturatingAdd> for PrimitiveArray where T: NativeArithmetics + SaturatingAdd, { - fn saturating_add(&self, rhs: &Self) -> Self { + fn saturating_add(&self, rhs: &PrimitiveArray) -> Self { saturating_add(self, rhs) } } // Implementation of ArraySaturatingAdd trait for PrimitiveArrays -impl ArrayOverflowingAdd for PrimitiveArray +impl ArrayOverflowingAdd> for PrimitiveArray where T: NativeArithmetics + OverflowingAdd, { - fn overflowing_add(&self, rhs: &Self) -> (Self, Bitmap) { + fn overflowing_add(&self, rhs: &PrimitiveArray) -> (Self, Bitmap) { overflowing_add(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/basic/div.rs b/src/arrow2/src/compute/arithmetics/basic/div.rs index 88a5d1180b..b3fbd8e2f8 100644 --- a/src/arrow2/src/compute/arithmetics/basic/div.rs +++ b/src/arrow2/src/compute/arithmetics/basic/div.rs @@ -79,21 +79,21 @@ where } // Implementation of ArrayDiv trait for PrimitiveArrays -impl ArrayDiv for PrimitiveArray +impl ArrayDiv> for PrimitiveArray where T: NativeArithmetics + Div, { - fn div(&self, rhs: &Self) -> Self { + fn div(&self, rhs: &PrimitiveArray) -> Self { div(self, rhs) } } // Implementation of ArrayCheckedDiv trait for PrimitiveArrays -impl ArrayCheckedDiv for PrimitiveArray +impl ArrayCheckedDiv> for PrimitiveArray where T: NativeArithmetics + CheckedDiv, { - fn checked_div(&self, rhs: &Self) -> Self { + fn checked_div(&self, rhs: &PrimitiveArray) -> Self { checked_div(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/basic/mul.rs b/src/arrow2/src/compute/arithmetics/basic/mul.rs index 64a95aa362..95861c54eb 100644 --- a/src/arrow2/src/compute/arithmetics/basic/mul.rs +++ b/src/arrow2/src/compute/arithmetics/basic/mul.rs @@ -139,50 +139,50 @@ where } // Implementation of ArrayMul trait for PrimitiveArrays -impl ArrayMul for PrimitiveArray +impl ArrayMul> for PrimitiveArray where T: NativeArithmetics + Mul, { - fn mul(&self, rhs: &Self) -> Self { + fn mul(&self, rhs: &PrimitiveArray) -> Self { mul(self, rhs) } } -impl ArrayWrappingMul for PrimitiveArray +impl ArrayWrappingMul> for PrimitiveArray where T: NativeArithmetics + WrappingMul, { - fn wrapping_mul(&self, rhs: &Self) -> Self { + fn wrapping_mul(&self, rhs: &PrimitiveArray) -> Self { wrapping_mul(self, rhs) } } // Implementation of ArrayCheckedMul trait for PrimitiveArrays -impl ArrayCheckedMul for PrimitiveArray +impl ArrayCheckedMul> for PrimitiveArray where T: NativeArithmetics + CheckedMul, { - fn checked_mul(&self, rhs: &Self) -> Self { + fn checked_mul(&self, rhs: &PrimitiveArray) -> Self { checked_mul(self, rhs) } } // Implementation of ArraySaturatingMul trait for PrimitiveArrays -impl ArraySaturatingMul for PrimitiveArray +impl ArraySaturatingMul> for PrimitiveArray where T: NativeArithmetics + SaturatingMul, { - fn saturating_mul(&self, rhs: &Self) -> Self { + fn saturating_mul(&self, rhs: &PrimitiveArray) -> Self { saturating_mul(self, rhs) } } // Implementation of ArraySaturatingMul trait for PrimitiveArrays -impl ArrayOverflowingMul for PrimitiveArray +impl ArrayOverflowingMul> for PrimitiveArray where T: NativeArithmetics + OverflowingMul, { - fn overflowing_mul(&self, rhs: &Self) -> (Self, Bitmap) { + fn overflowing_mul(&self, rhs: &PrimitiveArray) -> (Self, Bitmap) { overflowing_mul(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/basic/rem.rs b/src/arrow2/src/compute/arithmetics/basic/rem.rs index cba93d5d12..b2035ecfd1 100644 --- a/src/arrow2/src/compute/arithmetics/basic/rem.rs +++ b/src/arrow2/src/compute/arithmetics/basic/rem.rs @@ -61,20 +61,20 @@ where binary_checked(lhs, rhs, lhs.data_type().clone(), op) } -impl ArrayRem for PrimitiveArray +impl ArrayRem> for PrimitiveArray where T: NativeArithmetics + Rem, { - fn rem(&self, rhs: &Self) -> Self { + fn rem(&self, rhs: &PrimitiveArray) -> Self { rem(self, rhs) } } -impl ArrayCheckedRem for PrimitiveArray +impl ArrayCheckedRem> for PrimitiveArray where T: NativeArithmetics + CheckedRem, { - fn checked_rem(&self, rhs: &Self) -> Self { + fn checked_rem(&self, rhs: &PrimitiveArray) -> Self { checked_rem(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/basic/sub.rs b/src/arrow2/src/compute/arithmetics/basic/sub.rs index 327a9836fe..09c4d610df 100644 --- a/src/arrow2/src/compute/arithmetics/basic/sub.rs +++ b/src/arrow2/src/compute/arithmetics/basic/sub.rs @@ -138,50 +138,50 @@ where } // Implementation of ArraySub trait for PrimitiveArrays -impl ArraySub for PrimitiveArray +impl ArraySub> for PrimitiveArray where T: NativeArithmetics + Sub, { - fn sub(&self, rhs: &Self) -> Self { + fn sub(&self, rhs: &PrimitiveArray) -> Self { sub(self, rhs) } } -impl ArrayWrappingSub for PrimitiveArray +impl ArrayWrappingSub> for PrimitiveArray where T: NativeArithmetics + WrappingSub, { - fn wrapping_sub(&self, rhs: &Self) -> Self { + fn wrapping_sub(&self, rhs: &PrimitiveArray) -> Self { wrapping_sub(self, rhs) } } // Implementation of ArrayCheckedSub trait for PrimitiveArrays -impl ArrayCheckedSub for PrimitiveArray +impl ArrayCheckedSub> for PrimitiveArray where T: NativeArithmetics + CheckedSub, { - fn checked_sub(&self, rhs: &Self) -> Self { + fn checked_sub(&self, rhs: &PrimitiveArray) -> Self { checked_sub(self, rhs) } } // Implementation of ArraySaturatingSub trait for PrimitiveArrays -impl ArraySaturatingSub for PrimitiveArray +impl ArraySaturatingSub> for PrimitiveArray where T: NativeArithmetics + SaturatingSub, { - fn saturating_sub(&self, rhs: &Self) -> Self { + fn saturating_sub(&self, rhs: &PrimitiveArray) -> Self { saturating_sub(self, rhs) } } // Implementation of ArraySaturatingSub trait for PrimitiveArrays -impl ArrayOverflowingSub for PrimitiveArray +impl ArrayOverflowingSub> for PrimitiveArray where T: NativeArithmetics + OverflowingSub, { - fn overflowing_sub(&self, rhs: &Self) -> (Self, Bitmap) { + fn overflowing_sub(&self, rhs: &PrimitiveArray) -> (Self, Bitmap) { overflowing_sub(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/decimal/add.rs b/src/arrow2/src/compute/arithmetics/decimal/add.rs index 98f05e4bd0..2e956c2318 100644 --- a/src/arrow2/src/compute/arithmetics/decimal/add.rs +++ b/src/arrow2/src/compute/arithmetics/decimal/add.rs @@ -135,22 +135,22 @@ pub fn checked_add(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> Pr } // Implementation of ArrayAdd trait for PrimitiveArrays -impl ArrayAdd for PrimitiveArray { - fn add(&self, rhs: &Self) -> Self { +impl ArrayAdd> for PrimitiveArray { + fn add(&self, rhs: &PrimitiveArray) -> Self { add(self, rhs) } } // Implementation of ArrayCheckedAdd trait for PrimitiveArrays -impl ArrayCheckedAdd for PrimitiveArray { - fn checked_add(&self, rhs: &Self) -> Self { +impl ArrayCheckedAdd> for PrimitiveArray { + fn checked_add(&self, rhs: &PrimitiveArray) -> Self { checked_add(self, rhs) } } // Implementation of ArraySaturatingAdd trait for PrimitiveArrays -impl ArraySaturatingAdd for PrimitiveArray { - fn saturating_add(&self, rhs: &Self) -> Self { +impl ArraySaturatingAdd> for PrimitiveArray { + fn saturating_add(&self, rhs: &PrimitiveArray) -> Self { saturating_add(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/decimal/div.rs b/src/arrow2/src/compute/arithmetics/decimal/div.rs index df1468bb8b..f120c7b3e2 100644 --- a/src/arrow2/src/compute/arithmetics/decimal/div.rs +++ b/src/arrow2/src/compute/arithmetics/decimal/div.rs @@ -200,15 +200,15 @@ pub fn checked_div(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> Pr } // Implementation of ArrayDiv trait for PrimitiveArrays -impl ArrayDiv for PrimitiveArray { - fn div(&self, rhs: &Self) -> Self { +impl ArrayDiv> for PrimitiveArray { + fn div(&self, rhs: &PrimitiveArray) -> Self { div(self, rhs) } } // Implementation of ArrayCheckedDiv trait for PrimitiveArrays -impl ArrayCheckedDiv for PrimitiveArray { - fn checked_div(&self, rhs: &Self) -> Self { +impl ArrayCheckedDiv> for PrimitiveArray { + fn checked_div(&self, rhs: &PrimitiveArray) -> Self { checked_div(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/decimal/mul.rs b/src/arrow2/src/compute/arithmetics/decimal/mul.rs index 021564916f..f050952e95 100644 --- a/src/arrow2/src/compute/arithmetics/decimal/mul.rs +++ b/src/arrow2/src/compute/arithmetics/decimal/mul.rs @@ -205,22 +205,22 @@ pub fn checked_mul(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> Pr } // Implementation of ArrayMul trait for PrimitiveArrays -impl ArrayMul for PrimitiveArray { - fn mul(&self, rhs: &Self) -> Self { +impl ArrayMul> for PrimitiveArray { + fn mul(&self, rhs: &PrimitiveArray) -> Self { mul(self, rhs) } } // Implementation of ArrayCheckedMul trait for PrimitiveArrays -impl ArrayCheckedMul for PrimitiveArray { - fn checked_mul(&self, rhs: &Self) -> Self { +impl ArrayCheckedMul> for PrimitiveArray { + fn checked_mul(&self, rhs: &PrimitiveArray) -> Self { checked_mul(self, rhs) } } // Implementation of ArraySaturatingMul trait for PrimitiveArrays -impl ArraySaturatingMul for PrimitiveArray { - fn saturating_mul(&self, rhs: &Self) -> Self { +impl ArraySaturatingMul> for PrimitiveArray { + fn saturating_mul(&self, rhs: &PrimitiveArray) -> Self { saturating_mul(self, rhs) } } diff --git a/src/arrow2/src/compute/arithmetics/decimal/sub.rs b/src/arrow2/src/compute/arithmetics/decimal/sub.rs index ac69b5b09e..ad7d2020b8 100644 --- a/src/arrow2/src/compute/arithmetics/decimal/sub.rs +++ b/src/arrow2/src/compute/arithmetics/decimal/sub.rs @@ -98,22 +98,22 @@ pub fn saturating_sub( } // Implementation of ArraySub trait for PrimitiveArrays -impl ArraySub for PrimitiveArray { - fn sub(&self, rhs: &Self) -> Self { +impl ArraySub> for PrimitiveArray { + fn sub(&self, rhs: &PrimitiveArray) -> Self { sub(self, rhs) } } // Implementation of ArrayCheckedSub trait for PrimitiveArrays -impl ArrayCheckedSub for PrimitiveArray { - fn checked_sub(&self, rhs: &Self) -> Self { +impl ArrayCheckedSub> for PrimitiveArray { + fn checked_sub(&self, rhs: &PrimitiveArray) -> Self { checked_sub(self, rhs) } } // Implementation of ArraySaturatingSub trait for PrimitiveArrays -impl ArraySaturatingSub for PrimitiveArray { - fn saturating_sub(&self, rhs: &Self) -> Self { +impl ArraySaturatingSub> for PrimitiveArray { + fn saturating_sub(&self, rhs: &PrimitiveArray) -> Self { saturating_sub(self, rhs) } } diff --git a/src/arrow2/src/datatypes/field.rs b/src/arrow2/src/datatypes/field.rs index 0df48e37b9..59eb894a3e 100644 --- a/src/arrow2/src/datatypes/field.rs +++ b/src/arrow2/src/datatypes/field.rs @@ -25,7 +25,7 @@ pub struct Field { impl Field { /// Creates a new [`Field`]. pub fn new>(name: T, data_type: DataType, is_nullable: bool) -> Self { - Self { + Field { name: name.into(), data_type, is_nullable, diff --git a/src/arrow2/src/datatypes/mod.rs b/src/arrow2/src/datatypes/mod.rs index 655293f495..2debc5a4f2 100644 --- a/src/arrow2/src/datatypes/mod.rs +++ b/src/arrow2/src/datatypes/mod.rs @@ -262,7 +262,7 @@ impl DataType { /// Returns `&self` for all but [`DataType::Extension`]. For [`DataType::Extension`], /// (recursively) returns the inner [`DataType`]. /// Never returns the variant [`DataType::Extension`]. - pub fn to_logical_type(&self) -> &Self { + pub fn to_logical_type(&self) -> &DataType { use DataType::*; match self { Extension(_, key, _) => key.to_logical_type(), @@ -274,14 +274,14 @@ impl DataType { impl From for DataType { fn from(item: IntegerType) -> Self { match item { - IntegerType::Int8 => Self::Int8, - IntegerType::Int16 => Self::Int16, - IntegerType::Int32 => Self::Int32, - IntegerType::Int64 => Self::Int64, - IntegerType::UInt8 => Self::UInt8, - IntegerType::UInt16 => Self::UInt16, - IntegerType::UInt32 => Self::UInt32, - IntegerType::UInt64 => Self::UInt64, + IntegerType::Int8 => DataType::Int8, + IntegerType::Int16 => DataType::Int16, + IntegerType::Int32 => DataType::Int32, + IntegerType::Int64 => DataType::Int64, + IntegerType::UInt8 => DataType::UInt8, + IntegerType::UInt16 => DataType::UInt16, + IntegerType::UInt32 => DataType::UInt32, + IntegerType::UInt64 => DataType::UInt64, } } } @@ -289,21 +289,21 @@ impl From for DataType { impl From for DataType { fn from(item: PrimitiveType) -> Self { match item { - PrimitiveType::Int8 => Self::Int8, - PrimitiveType::Int16 => Self::Int16, - PrimitiveType::Int32 => Self::Int32, - PrimitiveType::Int64 => Self::Int64, - PrimitiveType::UInt8 => Self::UInt8, - PrimitiveType::UInt16 => Self::UInt16, - PrimitiveType::UInt32 => Self::UInt32, - PrimitiveType::UInt64 => Self::UInt64, - PrimitiveType::Int128 => Self::Decimal(32, 32), - PrimitiveType::Int256 => Self::Decimal256(32, 32), - PrimitiveType::Float16 => Self::Float16, - PrimitiveType::Float32 => Self::Float32, - PrimitiveType::Float64 => Self::Float64, - PrimitiveType::DaysMs => Self::Interval(IntervalUnit::DayTime), - PrimitiveType::MonthDayNano => Self::Interval(IntervalUnit::MonthDayNano), + PrimitiveType::Int8 => DataType::Int8, + PrimitiveType::Int16 => DataType::Int16, + PrimitiveType::Int32 => DataType::Int32, + PrimitiveType::Int64 => DataType::Int64, + PrimitiveType::UInt8 => DataType::UInt8, + PrimitiveType::UInt16 => DataType::UInt16, + PrimitiveType::UInt32 => DataType::UInt32, + PrimitiveType::UInt64 => DataType::UInt64, + PrimitiveType::Int128 => DataType::Decimal(32, 32), + PrimitiveType::Int256 => DataType::Decimal256(32, 32), + PrimitiveType::Float16 => DataType::Float16, + PrimitiveType::Float32 => DataType::Float32, + PrimitiveType::Float64 => DataType::Float64, + PrimitiveType::DaysMs => DataType::Interval(IntervalUnit::DayTime), + PrimitiveType::MonthDayNano => DataType::Interval(IntervalUnit::MonthDayNano), } } } diff --git a/src/arrow2/src/datatypes/schema.rs b/src/arrow2/src/datatypes/schema.rs index 020c75cc53..d90f9d88c8 100644 --- a/src/arrow2/src/datatypes/schema.rs +++ b/src/arrow2/src/datatypes/schema.rs @@ -41,7 +41,7 @@ impl Schema { }) .collect(); - Self { + Schema { fields, metadata: self.metadata, } diff --git a/src/arrow2/src/ffi/array.rs b/src/arrow2/src/ffi/array.rs index d6bc00017a..6b859e0f65 100644 --- a/src/arrow2/src/ffi/array.rs +++ b/src/arrow2/src/ffi/array.rs @@ -110,12 +110,12 @@ impl ArrowArray { let children_ptr = children .into_iter() - .map(|child| Box::into_raw(Box::new(Self::new(child)))) + .map(|child| Box::into_raw(Box::new(ArrowArray::new(child)))) .collect::>(); let n_children = children_ptr.len() as i64; let dictionary_ptr = - dictionary.map(|array| Box::into_raw(Box::new(Self::new(array)))); + dictionary.map(|array| Box::into_raw(Box::new(ArrowArray::new(array)))); let length = array.len() as i64; let null_count = array.null_count() as i64; diff --git a/src/arrow2/src/ffi/schema.rs b/src/arrow2/src/ffi/schema.rs index 453d2ba27b..28e13f884a 100644 --- a/src/arrow2/src/ffi/schema.rs +++ b/src/arrow2/src/ffi/schema.rs @@ -72,7 +72,7 @@ impl ArrowSchema { flags += *is_ordered as i64; // we do not store field info in the dict values, so can't recover it all :( let field = Field::new("", values.as_ref().clone(), true); - Some(Box::new(Self::new(&field))) + Some(Box::new(ArrowSchema::new(&field))) } else { None }; diff --git a/src/arrow2/src/io/csv/mod.rs b/src/arrow2/src/io/csv/mod.rs index 1f7be08fee..a53c1231d5 100644 --- a/src/arrow2/src/io/csv/mod.rs +++ b/src/arrow2/src/io/csv/mod.rs @@ -10,13 +10,13 @@ mod utils; #[cfg(feature = "io_csv_read")] impl From for Error { fn from(error: csv::Error) -> Self { - Self::External("".to_string(), Box::new(error)) + Error::External("".to_string(), Box::new(error)) } } impl From for Error { fn from(error: chrono::ParseError) -> Self { - Self::External("".to_string(), Box::new(error)) + Error::External("".to_string(), Box::new(error)) } } diff --git a/src/arrow2/src/io/csv/read_async/mod.rs b/src/arrow2/src/io/csv/read_async/mod.rs index 04b97702f4..a912366f21 100644 --- a/src/arrow2/src/io/csv/read_async/mod.rs +++ b/src/arrow2/src/io/csv/read_async/mod.rs @@ -16,6 +16,6 @@ pub use csv_async::Error as CSVError; impl From for crate::error::Error { fn from(error: CSVError) -> Self { - Self::External("".to_string(), Box::new(error)) + crate::error::Error::External("".to_string(), Box::new(error)) } } diff --git a/src/arrow2/src/io/csv/write/serialize.rs b/src/arrow2/src/io/csv/write/serialize.rs index a669b055e3..67a3d998e0 100644 --- a/src/arrow2/src/io/csv/write/serialize.rs +++ b/src/arrow2/src/io/csv/write/serialize.rs @@ -40,7 +40,7 @@ pub struct SerializeOptions { impl Default for SerializeOptions { fn default() -> Self { - Self { + SerializeOptions { date32_format: None, date64_format: None, time32_format: None, diff --git a/src/arrow2/src/io/flight/mod.rs b/src/arrow2/src/io/flight/mod.rs index 0849853382..943f148730 100644 --- a/src/arrow2/src/io/flight/mod.rs +++ b/src/arrow2/src/io/flight/mod.rs @@ -50,7 +50,7 @@ pub fn serialize_batch( impl From for FlightData { fn from(data: EncodedData) -> Self { - Self { + FlightData { data_header: data.ipc_message, data_body: data.arrow_data, ..Default::default() diff --git a/src/arrow2/src/io/ipc/append/mod.rs b/src/arrow2/src/io/ipc/append/mod.rs index 8a00598325..1fc066845d 100644 --- a/src/arrow2/src/io/ipc/append/mod.rs +++ b/src/arrow2/src/io/ipc/append/mod.rs @@ -25,7 +25,7 @@ impl FileWriter { mut writer: R, metadata: FileMetadata, options: WriteOptions, - ) -> Result { + ) -> Result> { if metadata.ipc_schema.is_little_endian != is_native_little_endian() { return Err(Error::nyi( "Appending to a file of a non-native endianness is still not supported", @@ -54,7 +54,7 @@ impl FileWriter { writer.seek(SeekFrom::Start(offset))?; - Ok(Self { + Ok(FileWriter { writer, options, schema: metadata.schema, diff --git a/src/arrow2/src/io/ipc/read/error.rs b/src/arrow2/src/io/ipc/read/error.rs index fd11b0ba0a..cbac69aef2 100644 --- a/src/arrow2/src/io/ipc/read/error.rs +++ b/src/arrow2/src/io/ipc/read/error.rs @@ -101,12 +101,12 @@ pub enum OutOfSpecKind { impl From for Error { fn from(kind: OutOfSpecKind) -> Self { - Self::OutOfSpec(format!("{kind:?}")) + Error::OutOfSpec(format!("{kind:?}")) } } impl From for Error { fn from(error: arrow_format::ipc::planus::Error) -> Self { - Self::OutOfSpec(error.to_string()) + Error::OutOfSpec(error.to_string()) } } diff --git a/src/arrow2/src/io/ipc/read/stream.rs b/src/arrow2/src/io/ipc/read/stream.rs index a534f2103f..e3b834a3f7 100644 --- a/src/arrow2/src/io/ipc/read/stream.rs +++ b/src/arrow2/src/io/ipc/read/stream.rs @@ -79,7 +79,7 @@ impl StreamState { /// /// If the `StreamState` was `Waiting`. pub fn unwrap(self) -> Chunk> { - if let Self::Some(batch) = self { + if let StreamState::Some(batch) = self { batch } else { panic!("The batch is not available") diff --git a/src/arrow2/src/io/json/read/deserialize.rs b/src/arrow2/src/io/json/read/deserialize.rs index e80131d53e..4331b2e3a7 100644 --- a/src/arrow2/src/io/json/read/deserialize.rs +++ b/src/arrow2/src/io/json/read/deserialize.rs @@ -460,37 +460,37 @@ pub(crate) trait Container { impl Container for MutableBinaryArray { fn with_capacity(capacity: usize) -> Self { - Self::with_capacity(capacity) + MutableBinaryArray::with_capacity(capacity) } } impl Container for MutableBooleanArray { fn with_capacity(capacity: usize) -> Self { - Self::with_capacity(capacity) + MutableBooleanArray::with_capacity(capacity) } } impl Container for MutableFixedSizeBinaryArray { fn with_capacity(capacity: usize) -> Self { - Self::with_capacity(capacity, 0) + MutableFixedSizeBinaryArray::with_capacity(capacity, 0) } } impl Container for MutableListArray { fn with_capacity(capacity: usize) -> Self { - Self::with_capacity(capacity) + MutableListArray::with_capacity(capacity) } } impl Container for MutablePrimitiveArray { fn with_capacity(capacity: usize) -> Self { - Self::with_capacity(capacity) + MutablePrimitiveArray::with_capacity(capacity) } } impl Container for MutableUtf8Array { fn with_capacity(capacity: usize) -> Self { - Self::with_capacity(capacity) + MutableUtf8Array::with_capacity(capacity) } } diff --git a/src/arrow2/src/io/json/read/mod.rs b/src/arrow2/src/io/json/read/mod.rs index 2cac510dd5..087da38d50 100644 --- a/src/arrow2/src/io/json/read/mod.rs +++ b/src/arrow2/src/io/json/read/mod.rs @@ -13,6 +13,6 @@ use crate::error::Error; impl From for Error { fn from(error: json_deserializer::Error) -> Self { - Self::ExternalFormat(error.to_string()) + Error::ExternalFormat(error.to_string()) } } diff --git a/src/arrow2/src/io/json/write/utf8.rs b/src/arrow2/src/io/json/write/utf8.rs index 8cb1eb434c..b8c9852217 100644 --- a/src/arrow2/src/io/json/write/utf8.rs +++ b/src/arrow2/src/io/json/write/utf8.rs @@ -89,16 +89,16 @@ pub enum CharEscape { impl CharEscape { #[inline] - fn from_escape_table(escape: u8, byte: u8) -> Self { + fn from_escape_table(escape: u8, byte: u8) -> CharEscape { match escape { - self::BB => Self::Backspace, - self::TT => Self::Tab, - self::NN => Self::LineFeed, - self::FF => Self::FormFeed, - self::RR => Self::CarriageReturn, - self::QU => Self::Quote, - self::BS => Self::ReverseSolidus, - self::UU => Self::AsciiControl(byte), + self::BB => CharEscape::Backspace, + self::TT => CharEscape::Tab, + self::NN => CharEscape::LineFeed, + self::FF => CharEscape::FormFeed, + self::RR => CharEscape::CarriageReturn, + self::QU => CharEscape::Quote, + self::BS => CharEscape::ReverseSolidus, + self::UU => CharEscape::AsciiControl(byte), _ => unreachable!(), } } diff --git a/src/arrow2/src/io/json_integration/mod.rs b/src/arrow2/src/io/json_integration/mod.rs index cfdae5faca..ada2441646 100644 --- a/src/arrow2/src/io/json_integration/mod.rs +++ b/src/arrow2/src/io/json_integration/mod.rs @@ -122,6 +122,6 @@ pub struct ArrowJsonColumn { impl From for Error { fn from(error: serde_json::Error) -> Self { - Self::ExternalFormat(error.to_string()) + Error::ExternalFormat(error.to_string()) } } diff --git a/src/arrow2/src/io/orc/mod.rs b/src/arrow2/src/io/orc/mod.rs index 06fce9dd6d..9b982a3ca7 100644 --- a/src/arrow2/src/io/orc/mod.rs +++ b/src/arrow2/src/io/orc/mod.rs @@ -7,6 +7,6 @@ use crate::error::Error; impl From for Error { fn from(error: format::error::Error) -> Self { - Self::ExternalFormat(format!("{error:?}")) + Error::ExternalFormat(format!("{error:?}")) } } diff --git a/src/arrow2/src/io/parquet/mod.rs b/src/arrow2/src/io/parquet/mod.rs index e656097853..7fe33f8564 100644 --- a/src/arrow2/src/io/parquet/mod.rs +++ b/src/arrow2/src/io/parquet/mod.rs @@ -17,18 +17,18 @@ impl From for Error { let message = "Failed to read a compressed parquet file. \ Use the cargo feature \"io_parquet_compression\" to read compressed parquet files." .to_string(); - Self::ExternalFormat(message) + Error::ExternalFormat(message) } parquet2::error::Error::Transport(msg) => { - Self::Io(std::io::Error::new(std::io::ErrorKind::Other, msg)) + Error::Io(std::io::Error::new(std::io::ErrorKind::Other, msg)) } - _ => Self::ExternalFormat(error.to_string()), + _ => Error::ExternalFormat(error.to_string()), } } } impl From for parquet2::error::Error { fn from(error: Error) -> Self { - Self::OutOfSpec(error.to_string()) + parquet2::error::Error::OutOfSpec(error.to_string()) } } diff --git a/src/arrow2/src/io/parquet/read/deserialize/utils.rs b/src/arrow2/src/io/parquet/read/deserialize/utils.rs index 822d6ec6e4..9c4855813f 100644 --- a/src/arrow2/src/io/parquet/read/deserialize/utils.rs +++ b/src/arrow2/src/io/parquet/read/deserialize/utils.rs @@ -40,7 +40,7 @@ pub(super) trait Pushable: Sized { impl Pushable for MutableBitmap { #[inline] fn reserve(&mut self, additional: usize) { - Self::reserve(self, additional) + MutableBitmap::reserve(self, additional) } #[inline] fn len(&self) -> usize { @@ -66,7 +66,7 @@ impl Pushable for MutableBitmap { impl Pushable for Vec { #[inline] fn reserve(&mut self, additional: usize) { - Self::reserve(self, additional) + Vec::reserve(self, additional) } #[inline] fn len(&self) -> usize { diff --git a/src/arrow2/src/io/parquet/read/schema/mod.rs b/src/arrow2/src/io/parquet/read/schema/mod.rs index 07f8a4876c..adb27b2fd9 100644 --- a/src/arrow2/src/io/parquet/read/schema/mod.rs +++ b/src/arrow2/src/io/parquet/read/schema/mod.rs @@ -66,7 +66,7 @@ pub struct SchemaInferenceOptions { impl Default for SchemaInferenceOptions { fn default() -> Self { - Self { + SchemaInferenceOptions { int96_coerce_to_timeunit: TimeUnit::Nanosecond, string_encoding: StringEncoding::default(), } diff --git a/src/arrow2/src/io/parquet/write/pages.rs b/src/arrow2/src/io/parquet/write/pages.rs index 98dc8c2812..cf7654fe15 100644 --- a/src/arrow2/src/io/parquet/write/pages.rs +++ b/src/arrow2/src/io/parquet/write/pages.rs @@ -49,10 +49,10 @@ impl Nested { /// Returns the length (number of rows) of the element pub fn len(&self) -> usize { match self { - Self::Primitive(_, _, length) => *length, - Self::List(nested) => nested.offsets.len_proxy(), - Self::LargeList(nested) => nested.offsets.len_proxy(), - Self::Struct(_, _, len) => *len, + Nested::Primitive(_, _, length) => *length, + Nested::List(nested) => nested.offsets.len_proxy(), + Nested::LargeList(nested) => nested.offsets.len_proxy(), + Nested::Struct(_, _, len) => *len, } } } diff --git a/src/arrow2/src/types/native.rs b/src/arrow2/src/types/native.rs index 666c05be23..1d5c0ada72 100644 --- a/src/arrow2/src/types/native.rs +++ b/src/arrow2/src/types/native.rs @@ -338,7 +338,7 @@ pub struct f16(pub u16); impl PartialEq for f16 { #[inline] - fn eq(&self, other: &Self) -> bool { + fn eq(&self, other: &f16) -> bool { if self.is_nan() || other.is_nan() { false } else { @@ -350,7 +350,7 @@ impl PartialEq for f16 { // see https://github.com/starkat99/half-rs/blob/main/src/binary16.rs impl f16 { /// The difference between 1.0 and the next largest representable number. - pub const EPSILON: Self = Self(0x1400u16); + pub const EPSILON: f16 = f16(0x1400u16); #[inline] #[must_use] @@ -360,8 +360,8 @@ impl f16 { /// Casts from u16. #[inline] - pub const fn from_bits(bits: u16) -> Self { - Self(bits) + pub const fn from_bits(bits: u16) -> f16 { + f16(bits) } /// Casts to u16. @@ -430,7 +430,7 @@ impl f16 { if exp == 0x7F80_0000u32 { // Set mantissa MSB for NaN (and also keep shifted mantissa bits) let nan_bit = if man == 0 { 0 } else { 0x0200u32 }; - return Self(((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 13)) as u16); + return f16(((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 13)) as u16); } // The number is normalized, start assembling half precision version @@ -441,7 +441,7 @@ impl f16 { // Check for exponent overflow, return +infinity if half_exp >= 0x1F { - return Self((half_sign | 0x7C00u32) as u16); + return f16((half_sign | 0x7C00u32) as u16); } // Check for underflow @@ -449,7 +449,7 @@ impl f16 { // Check mantissa for what we can do if 14 - half_exp > 24 { // No rounding possibility, so this is a full underflow, return signed zero - return Self(half_sign as u16); + return f16(half_sign as u16); } // Don't forget about hidden leading mantissa bit when assembling mantissa let man = man | 0x0080_0000u32; @@ -460,7 +460,7 @@ impl f16 { half_man += 1; } // No exponent for subnormals - return Self((half_sign | half_man) as u16); + return f16((half_sign | half_man) as u16); } // Rebias the exponent @@ -470,9 +470,9 @@ impl f16 { let round_bit = 0x0000_1000u32; if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { // Round it - Self(((half_sign | half_exp | half_man) + 1) as u16) + f16(((half_sign | half_exp | half_man) + 1) as u16) } else { - Self((half_sign | half_exp | half_man) as u16) + f16((half_sign | half_exp | half_man) as u16) } } } diff --git a/src/parquet2/Cargo.toml b/src/parquet2/Cargo.toml index b62be487f9..6d437e1578 100644 --- a/src/parquet2/Cargo.toml +++ b/src/parquet2/Cargo.toml @@ -33,9 +33,6 @@ snappy = ["snap"] bench = false name = "parquet2" -[lints] -workspace = true - [package] authors = [ "Jorge C. Leitao for Error { - fn from(e: parquet_format_safe::thrift::Error) -> Self { + fn from(e: parquet_format_safe::thrift::Error) -> Error { match e { parquet_format_safe::thrift::Error::Transport(msg) => { - Self::Transport(format!("io error occurred when decoding thrift: {}", msg)) + Error::Transport(format!("io error occurred when decoding thrift: {}", msg)) } - _ => Self::OutOfSpec(format!("Invalid thrift: {}", e)), + _ => Error::OutOfSpec(format!("Invalid thrift: {}", e)), } } } diff --git a/src/parquet2/src/metadata/column_order.rs b/src/parquet2/src/metadata/column_order.rs index ea530f316c..1e8a258830 100644 --- a/src/parquet2/src/metadata/column_order.rs +++ b/src/parquet2/src/metadata/column_order.rs @@ -20,8 +20,8 @@ impl ColumnOrder { /// Returns sort order associated with this column order. pub fn sort_order(&self) -> SortOrder { match *self { - Self::TypeDefinedOrder(order) => order, - Self::Undefined => SortOrder::Signed, + ColumnOrder::TypeDefinedOrder(order) => order, + ColumnOrder::Undefined => SortOrder::Signed, } } } diff --git a/src/parquet2/src/metadata/file_metadata.rs b/src/parquet2/src/metadata/file_metadata.rs index 56dedcf020..0e2e110bd6 100644 --- a/src/parquet2/src/metadata/file_metadata.rs +++ b/src/parquet2/src/metadata/file_metadata.rs @@ -143,7 +143,7 @@ impl FileMetaData { .column_orders .map(|orders| parse_column_orders(&orders, &schema_descr)); - Ok(Self { + Ok(FileMetaData { version: metadata.version, num_rows: metadata.num_rows.try_into()?, created_by: metadata.created_by, diff --git a/src/parquet2/src/metadata/row_metadata.rs b/src/parquet2/src/metadata/row_metadata.rs index d44e415689..22c1c0e26d 100644 --- a/src/parquet2/src/metadata/row_metadata.rs +++ b/src/parquet2/src/metadata/row_metadata.rs @@ -21,7 +21,7 @@ impl RowGroupMetaData { columns: Vec, num_rows: usize, total_byte_size: usize, - ) -> Self { + ) -> RowGroupMetaData { Self { columns, num_rows, @@ -56,7 +56,7 @@ impl RowGroupMetaData { pub(crate) fn try_from_thrift( schema_descr: &SchemaDescriptor, rg: RowGroup, - ) -> Result { + ) -> Result { if schema_descr.columns().len() != rg.columns.len() { return Err(Error::oos(format!("The number of columns in the row group ({}) must be equal to the number of columns in the schema ({})", rg.columns.len(), schema_descr.columns().len()))); } @@ -71,7 +71,7 @@ impl RowGroupMetaData { }) .collect::>>()?; - Ok(Self { + Ok(RowGroupMetaData { columns, num_rows, total_byte_size, diff --git a/src/parquet2/src/page/mod.rs b/src/parquet2/src/page/mod.rs index 2b1e4fc49b..03b5425e47 100644 --- a/src/parquet2/src/page/mod.rs +++ b/src/parquet2/src/page/mod.rs @@ -125,8 +125,8 @@ pub enum DataPageHeader { impl DataPageHeader { pub fn num_values(&self) -> usize { match &self { - Self::V1(d) => d.num_values as usize, - Self::V2(d) => d.num_values as usize, + DataPageHeader::V1(d) => d.num_values as usize, + DataPageHeader::V2(d) => d.num_values as usize, } } } @@ -262,36 +262,36 @@ pub enum CompressedPage { impl CompressedPage { pub(crate) fn buffer(&mut self) -> &mut Vec { match self { - Self::Data(page) => &mut page.buffer, - Self::Dict(page) => &mut page.buffer, + CompressedPage::Data(page) => &mut page.buffer, + CompressedPage::Dict(page) => &mut page.buffer, } } pub(crate) fn compression(&self) -> Compression { match self { - Self::Data(page) => page.compression(), - Self::Dict(page) => page.compression(), + CompressedPage::Data(page) => page.compression(), + CompressedPage::Dict(page) => page.compression(), } } pub(crate) fn num_values(&self) -> usize { match self { - Self::Data(page) => page.num_values(), - Self::Dict(_) => 0, + CompressedPage::Data(page) => page.num_values(), + CompressedPage::Dict(_) => 0, } } pub(crate) fn selected_rows(&self) -> Option<&[Interval]> { match self { - Self::Data(page) => page.selected_rows(), - Self::Dict(_) => None, + CompressedPage::Data(page) => page.selected_rows(), + CompressedPage::Dict(_) => None, } } pub(crate) fn uncompressed_size(&self) -> usize { match self { - Self::Data(page) => page.uncompressed_page_size, - Self::Dict(page) => page.uncompressed_page_size, + CompressedPage::Data(page) => page.uncompressed_page_size, + CompressedPage::Dict(page) => page.uncompressed_page_size, } } } diff --git a/src/parquet2/src/parquet_bridge.rs b/src/parquet2/src/parquet_bridge.rs index 428bfa6dc5..0ccb1f3544 100644 --- a/src/parquet2/src/parquet_bridge.rs +++ b/src/parquet2/src/parquet_bridge.rs @@ -27,9 +27,9 @@ impl TryFrom for Repetition { fn try_from(repetition: FieldRepetitionType) -> Result { Ok(match repetition { - FieldRepetitionType::REQUIRED => Self::Required, - FieldRepetitionType::OPTIONAL => Self::Optional, - FieldRepetitionType::REPEATED => Self::Repeated, + FieldRepetitionType::REQUIRED => Repetition::Required, + FieldRepetitionType::OPTIONAL => Repetition::Optional, + FieldRepetitionType::REPEATED => Repetition::Repeated, _ => return Err(Error::oos("Thrift out of range")), }) } @@ -38,9 +38,9 @@ impl TryFrom for Repetition { impl From for FieldRepetitionType { fn from(repetition: Repetition) -> Self { match repetition { - Repetition::Required => Self::REQUIRED, - Repetition::Optional => Self::OPTIONAL, - Repetition::Repeated => Self::REPEATED, + Repetition::Required => FieldRepetitionType::REQUIRED, + Repetition::Optional => FieldRepetitionType::OPTIONAL, + Repetition::Repeated => FieldRepetitionType::REPEATED, } } } @@ -62,14 +62,14 @@ impl TryFrom for Compression { fn try_from(codec: CompressionCodec) -> Result { Ok(match codec { - CompressionCodec::UNCOMPRESSED => Self::Uncompressed, - CompressionCodec::SNAPPY => Self::Snappy, - CompressionCodec::GZIP => Self::Gzip, - CompressionCodec::LZO => Self::Lzo, - CompressionCodec::BROTLI => Self::Brotli, - CompressionCodec::LZ4 => Self::Lz4, - CompressionCodec::ZSTD => Self::Zstd, - CompressionCodec::LZ4_RAW => Self::Lz4Raw, + CompressionCodec::UNCOMPRESSED => Compression::Uncompressed, + CompressionCodec::SNAPPY => Compression::Snappy, + CompressionCodec::GZIP => Compression::Gzip, + CompressionCodec::LZO => Compression::Lzo, + CompressionCodec::BROTLI => Compression::Brotli, + CompressionCodec::LZ4 => Compression::Lz4, + CompressionCodec::ZSTD => Compression::Zstd, + CompressionCodec::LZ4_RAW => Compression::Lz4Raw, _ => return Err(Error::oos("Thrift out of range")), }) } @@ -78,14 +78,14 @@ impl TryFrom for Compression { impl From for CompressionCodec { fn from(codec: Compression) -> Self { match codec { - Compression::Uncompressed => Self::UNCOMPRESSED, - Compression::Snappy => Self::SNAPPY, - Compression::Gzip => Self::GZIP, - Compression::Lzo => Self::LZO, - Compression::Brotli => Self::BROTLI, - Compression::Lz4 => Self::LZ4, - Compression::Zstd => Self::ZSTD, - Compression::Lz4Raw => Self::LZ4_RAW, + Compression::Uncompressed => CompressionCodec::UNCOMPRESSED, + Compression::Snappy => CompressionCodec::SNAPPY, + Compression::Gzip => CompressionCodec::GZIP, + Compression::Lzo => CompressionCodec::LZO, + Compression::Brotli => CompressionCodec::BROTLI, + Compression::Lz4 => CompressionCodec::LZ4, + Compression::Zstd => CompressionCodec::ZSTD, + Compression::Lz4Raw => CompressionCodec::LZ4_RAW, } } } @@ -108,14 +108,14 @@ pub enum CompressionOptions { impl From for Compression { fn from(value: CompressionOptions) -> Self { match value { - CompressionOptions::Uncompressed => Self::Uncompressed, - CompressionOptions::Snappy => Self::Snappy, - CompressionOptions::Gzip(_) => Self::Gzip, - CompressionOptions::Lzo => Self::Lzo, - CompressionOptions::Brotli(_) => Self::Brotli, - CompressionOptions::Lz4 => Self::Lz4, - CompressionOptions::Zstd(_) => Self::Zstd, - CompressionOptions::Lz4Raw => Self::Lz4Raw, + CompressionOptions::Uncompressed => Compression::Uncompressed, + CompressionOptions::Snappy => Compression::Snappy, + CompressionOptions::Gzip(_) => Compression::Gzip, + CompressionOptions::Lzo => Compression::Lzo, + CompressionOptions::Brotli(_) => Compression::Brotli, + CompressionOptions::Lz4 => Compression::Lz4, + CompressionOptions::Zstd(_) => Compression::Zstd, + CompressionOptions::Lz4Raw => Compression::Lz4Raw, } } } @@ -123,14 +123,14 @@ impl From for Compression { impl From for CompressionCodec { fn from(codec: CompressionOptions) -> Self { match codec { - CompressionOptions::Uncompressed => Self::UNCOMPRESSED, - CompressionOptions::Snappy => Self::SNAPPY, - CompressionOptions::Gzip(_) => Self::GZIP, - CompressionOptions::Lzo => Self::LZO, - CompressionOptions::Brotli(_) => Self::BROTLI, - CompressionOptions::Lz4 => Self::LZ4, - CompressionOptions::Zstd(_) => Self::ZSTD, - CompressionOptions::Lz4Raw => Self::LZ4_RAW, + CompressionOptions::Uncompressed => CompressionCodec::UNCOMPRESSED, + CompressionOptions::Snappy => CompressionCodec::SNAPPY, + CompressionOptions::Gzip(_) => CompressionCodec::GZIP, + CompressionOptions::Lzo => CompressionCodec::LZO, + CompressionOptions::Brotli(_) => CompressionCodec::BROTLI, + CompressionOptions::Lz4 => CompressionCodec::LZ4, + CompressionOptions::Zstd(_) => CompressionCodec::ZSTD, + CompressionOptions::Lz4Raw => CompressionCodec::LZ4_RAW, } } } @@ -266,9 +266,9 @@ impl TryFrom for PageType { fn try_from(type_: ParquetPageType) -> Result { Ok(match type_ { - ParquetPageType::DATA_PAGE => Self::DataPage, - ParquetPageType::DATA_PAGE_V2 => Self::DataPageV2, - ParquetPageType::DICTIONARY_PAGE => Self::DictionaryPage, + ParquetPageType::DATA_PAGE => PageType::DataPage, + ParquetPageType::DATA_PAGE_V2 => PageType::DataPageV2, + ParquetPageType::DICTIONARY_PAGE => PageType::DictionaryPage, _ => return Err(Error::oos("Thrift out of range")), }) } @@ -277,9 +277,9 @@ impl TryFrom for PageType { impl From for ParquetPageType { fn from(type_: PageType) -> Self { match type_ { - PageType::DataPage => Self::DATA_PAGE, - PageType::DataPageV2 => Self::DATA_PAGE_V2, - PageType::DictionaryPage => Self::DICTIONARY_PAGE, + PageType::DataPage => ParquetPageType::DATA_PAGE, + PageType::DataPageV2 => ParquetPageType::DATA_PAGE_V2, + PageType::DictionaryPage => ParquetPageType::DICTIONARY_PAGE, } } } @@ -331,15 +331,15 @@ impl TryFrom for Encoding { fn try_from(encoding: ParquetEncoding) -> Result { Ok(match encoding { - ParquetEncoding::PLAIN => Self::Plain, - ParquetEncoding::PLAIN_DICTIONARY => Self::PlainDictionary, - ParquetEncoding::RLE => Self::Rle, - ParquetEncoding::BIT_PACKED => Self::BitPacked, - ParquetEncoding::DELTA_BINARY_PACKED => Self::DeltaBinaryPacked, - ParquetEncoding::DELTA_LENGTH_BYTE_ARRAY => Self::DeltaLengthByteArray, - ParquetEncoding::DELTA_BYTE_ARRAY => Self::DeltaByteArray, - ParquetEncoding::RLE_DICTIONARY => Self::RleDictionary, - ParquetEncoding::BYTE_STREAM_SPLIT => Self::ByteStreamSplit, + ParquetEncoding::PLAIN => Encoding::Plain, + ParquetEncoding::PLAIN_DICTIONARY => Encoding::PlainDictionary, + ParquetEncoding::RLE => Encoding::Rle, + ParquetEncoding::BIT_PACKED => Encoding::BitPacked, + ParquetEncoding::DELTA_BINARY_PACKED => Encoding::DeltaBinaryPacked, + ParquetEncoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DeltaLengthByteArray, + ParquetEncoding::DELTA_BYTE_ARRAY => Encoding::DeltaByteArray, + ParquetEncoding::RLE_DICTIONARY => Encoding::RleDictionary, + ParquetEncoding::BYTE_STREAM_SPLIT => Encoding::ByteStreamSplit, _ => return Err(Error::oos("Thrift out of range")), }) } @@ -348,15 +348,15 @@ impl TryFrom for Encoding { impl From for ParquetEncoding { fn from(encoding: Encoding) -> Self { match encoding { - Encoding::Plain => Self::PLAIN, - Encoding::PlainDictionary => Self::PLAIN_DICTIONARY, - Encoding::Rle => Self::RLE, - Encoding::BitPacked => Self::BIT_PACKED, - Encoding::DeltaBinaryPacked => Self::DELTA_BINARY_PACKED, - Encoding::DeltaLengthByteArray => Self::DELTA_LENGTH_BYTE_ARRAY, - Encoding::DeltaByteArray => Self::DELTA_BYTE_ARRAY, - Encoding::RleDictionary => Self::RLE_DICTIONARY, - Encoding::ByteStreamSplit => Self::BYTE_STREAM_SPLIT, + Encoding::Plain => ParquetEncoding::PLAIN, + Encoding::PlainDictionary => ParquetEncoding::PLAIN_DICTIONARY, + Encoding::Rle => ParquetEncoding::RLE, + Encoding::BitPacked => ParquetEncoding::BIT_PACKED, + Encoding::DeltaBinaryPacked => ParquetEncoding::DELTA_BINARY_PACKED, + Encoding::DeltaLengthByteArray => ParquetEncoding::DELTA_LENGTH_BYTE_ARRAY, + Encoding::DeltaByteArray => ParquetEncoding::DELTA_BYTE_ARRAY, + Encoding::RleDictionary => ParquetEncoding::RLE_DICTIONARY, + Encoding::ByteStreamSplit => ParquetEncoding::BYTE_STREAM_SPLIT, } } } @@ -381,9 +381,9 @@ impl TryFrom for BoundaryOrder { fn try_from(encoding: ParquetBoundaryOrder) -> Result { Ok(match encoding { - ParquetBoundaryOrder::UNORDERED => Self::Unordered, - ParquetBoundaryOrder::ASCENDING => Self::Ascending, - ParquetBoundaryOrder::DESCENDING => Self::Descending, + ParquetBoundaryOrder::UNORDERED => BoundaryOrder::Unordered, + ParquetBoundaryOrder::ASCENDING => BoundaryOrder::Ascending, + ParquetBoundaryOrder::DESCENDING => BoundaryOrder::Descending, _ => return Err(Error::oos("BoundaryOrder Thrift value out of range")), }) } @@ -392,9 +392,9 @@ impl TryFrom for BoundaryOrder { impl From for ParquetBoundaryOrder { fn from(encoding: BoundaryOrder) -> Self { match encoding { - BoundaryOrder::Unordered => Self::UNORDERED, - BoundaryOrder::Ascending => Self::ASCENDING, - BoundaryOrder::Descending => Self::DESCENDING, + BoundaryOrder::Unordered => ParquetBoundaryOrder::UNORDERED, + BoundaryOrder::Ascending => ParquetBoundaryOrder::ASCENDING, + BoundaryOrder::Descending => ParquetBoundaryOrder::DESCENDING, } } } @@ -443,9 +443,9 @@ pub enum TimeUnit { impl From for TimeUnit { fn from(encoding: ParquetTimeUnit) -> Self { match encoding { - ParquetTimeUnit::MILLIS(_) => Self::Milliseconds, - ParquetTimeUnit::MICROS(_) => Self::Microseconds, - ParquetTimeUnit::NANOS(_) => Self::Nanoseconds, + ParquetTimeUnit::MILLIS(_) => TimeUnit::Milliseconds, + ParquetTimeUnit::MICROS(_) => TimeUnit::Microseconds, + ParquetTimeUnit::NANOS(_) => TimeUnit::Nanoseconds, } } } @@ -453,9 +453,9 @@ impl From for TimeUnit { impl From for ParquetTimeUnit { fn from(unit: TimeUnit) -> Self { match unit { - TimeUnit::Milliseconds => Self::MILLIS(Default::default()), - TimeUnit::Microseconds => Self::MICROS(Default::default()), - TimeUnit::Nanoseconds => Self::NANOS(Default::default()), + TimeUnit::Milliseconds => ParquetTimeUnit::MILLIS(Default::default()), + TimeUnit::Microseconds => ParquetTimeUnit::MICROS(Default::default()), + TimeUnit::Nanoseconds => ParquetTimeUnit::NANOS(Default::default()), } } } @@ -503,8 +503,8 @@ pub enum GroupLogicalType { impl From for ParquetLogicalType { fn from(type_: GroupLogicalType) -> Self { match type_ { - GroupLogicalType::Map => Self::MAP(Default::default()), - GroupLogicalType::List => Self::LIST(Default::default()), + GroupLogicalType::Map => ParquetLogicalType::MAP(Default::default()), + GroupLogicalType::List => ParquetLogicalType::LIST(Default::default()), } } } @@ -512,17 +512,17 @@ impl From for ParquetLogicalType { impl From<(i32, bool)> for IntegerType { fn from((bit_width, is_signed): (i32, bool)) -> Self { match (bit_width, is_signed) { - (8, true) => Self::Int8, - (16, true) => Self::Int16, - (32, true) => Self::Int32, - (64, true) => Self::Int64, - (8, false) => Self::UInt8, - (16, false) => Self::UInt16, - (32, false) => Self::UInt32, - (64, false) => Self::UInt64, + (8, true) => IntegerType::Int8, + (16, true) => IntegerType::Int16, + (32, true) => IntegerType::Int32, + (64, true) => IntegerType::Int64, + (8, false) => IntegerType::UInt8, + (16, false) => IntegerType::UInt16, + (32, false) => IntegerType::UInt32, + (64, false) => IntegerType::UInt64, // The above are the only possible annotations for parquet's int32. Anything else // is a deviation to the parquet specification and we ignore - _ => Self::Int32, + _ => IntegerType::Int32, } } } @@ -547,28 +547,28 @@ impl TryFrom for PrimitiveLogicalType { fn try_from(type_: ParquetLogicalType) -> Result { Ok(match type_ { - ParquetLogicalType::STRING(_) => Self::String, - ParquetLogicalType::ENUM(_) => Self::Enum, - ParquetLogicalType::DECIMAL(decimal) => Self::Decimal( + ParquetLogicalType::STRING(_) => PrimitiveLogicalType::String, + ParquetLogicalType::ENUM(_) => PrimitiveLogicalType::Enum, + ParquetLogicalType::DECIMAL(decimal) => PrimitiveLogicalType::Decimal( decimal.precision.try_into()?, decimal.scale.try_into()?, ), - ParquetLogicalType::DATE(_) => Self::Date, - ParquetLogicalType::TIME(time) => Self::Time { + ParquetLogicalType::DATE(_) => PrimitiveLogicalType::Date, + ParquetLogicalType::TIME(time) => PrimitiveLogicalType::Time { unit: time.unit.into(), is_adjusted_to_utc: time.is_adjusted_to_u_t_c, }, - ParquetLogicalType::TIMESTAMP(time) => Self::Timestamp { + ParquetLogicalType::TIMESTAMP(time) => PrimitiveLogicalType::Timestamp { unit: time.unit.into(), is_adjusted_to_utc: time.is_adjusted_to_u_t_c, }, ParquetLogicalType::INTEGER(int) => { - Self::Integer((int.bit_width as i32, int.is_signed).into()) + PrimitiveLogicalType::Integer((int.bit_width as i32, int.is_signed).into()) } - ParquetLogicalType::UNKNOWN(_) => Self::Unknown, - ParquetLogicalType::JSON(_) => Self::Json, - ParquetLogicalType::BSON(_) => Self::Bson, - ParquetLogicalType::UUID(_) => Self::Uuid, + ParquetLogicalType::UNKNOWN(_) => PrimitiveLogicalType::Unknown, + ParquetLogicalType::JSON(_) => PrimitiveLogicalType::Json, + ParquetLogicalType::BSON(_) => PrimitiveLogicalType::Bson, + ParquetLogicalType::UUID(_) => PrimitiveLogicalType::Uuid, _ => return Err(Error::oos("LogicalType value out of range")), }) } @@ -579,8 +579,8 @@ impl TryFrom for GroupLogicalType { fn try_from(type_: ParquetLogicalType) -> Result { Ok(match type_ { - ParquetLogicalType::LIST(_) => Self::List, - ParquetLogicalType::MAP(_) => Self::Map, + ParquetLogicalType::LIST(_) => GroupLogicalType::List, + ParquetLogicalType::MAP(_) => GroupLogicalType::Map, _ => return Err(Error::oos("LogicalType value out of range")), }) } @@ -589,40 +589,40 @@ impl TryFrom for GroupLogicalType { impl From for ParquetLogicalType { fn from(type_: PrimitiveLogicalType) -> Self { match type_ { - PrimitiveLogicalType::String => Self::STRING(Default::default()), - PrimitiveLogicalType::Enum => Self::ENUM(Default::default()), + PrimitiveLogicalType::String => ParquetLogicalType::STRING(Default::default()), + PrimitiveLogicalType::Enum => ParquetLogicalType::ENUM(Default::default()), PrimitiveLogicalType::Decimal(precision, scale) => { - Self::DECIMAL(DecimalType { + ParquetLogicalType::DECIMAL(DecimalType { precision: precision as i32, scale: scale as i32, }) } - PrimitiveLogicalType::Date => Self::DATE(Default::default()), + PrimitiveLogicalType::Date => ParquetLogicalType::DATE(Default::default()), PrimitiveLogicalType::Time { unit, is_adjusted_to_utc, - } => Self::TIME(TimeType { + } => ParquetLogicalType::TIME(TimeType { unit: unit.into(), is_adjusted_to_u_t_c: is_adjusted_to_utc, }), PrimitiveLogicalType::Timestamp { unit, is_adjusted_to_utc, - } => Self::TIMESTAMP(TimestampType { + } => ParquetLogicalType::TIMESTAMP(TimestampType { unit: unit.into(), is_adjusted_to_u_t_c: is_adjusted_to_utc, }), PrimitiveLogicalType::Integer(integer) => { let (bit_width, is_signed) = integer.into(); - Self::INTEGER(IntType { + ParquetLogicalType::INTEGER(IntType { bit_width: bit_width as i8, is_signed, }) } - PrimitiveLogicalType::Unknown => Self::UNKNOWN(Default::default()), - PrimitiveLogicalType::Json => Self::JSON(Default::default()), - PrimitiveLogicalType::Bson => Self::BSON(Default::default()), - PrimitiveLogicalType::Uuid => Self::UUID(Default::default()), + PrimitiveLogicalType::Unknown => ParquetLogicalType::UNKNOWN(Default::default()), + PrimitiveLogicalType::Json => ParquetLogicalType::JSON(Default::default()), + PrimitiveLogicalType::Bson => ParquetLogicalType::BSON(Default::default()), + PrimitiveLogicalType::Uuid => ParquetLogicalType::UUID(Default::default()), } } } diff --git a/src/parquet2/src/schema/io_thrift/from_thrift.rs b/src/parquet2/src/schema/io_thrift/from_thrift.rs index 222401eff6..68add24fd6 100644 --- a/src/parquet2/src/schema/io_thrift/from_thrift.rs +++ b/src/parquet2/src/schema/io_thrift/from_thrift.rs @@ -9,7 +9,7 @@ use super::super::types::ParquetType; impl ParquetType { /// Method to convert from Thrift. - pub fn try_from_thrift(elements: &[SchemaElement]) -> Result { + pub fn try_from_thrift(elements: &[SchemaElement]) -> Result { let mut index = 0; let mut schema_nodes = Vec::new(); while index < elements.len() { diff --git a/src/parquet2/src/schema/types/converted_type.rs b/src/parquet2/src/schema/types/converted_type.rs index 51b9eb3136..b7db8847b7 100644 --- a/src/parquet2/src/schema/types/converted_type.rs +++ b/src/parquet2/src/schema/types/converted_type.rs @@ -149,9 +149,9 @@ impl TryFrom for GroupConvertedType { fn try_from(type_: ConvertedType) -> Result { Ok(match type_ { - ConvertedType::LIST => Self::List, - ConvertedType::MAP => Self::Map, - ConvertedType::MAP_KEY_VALUE => Self::MapKeyValue, + ConvertedType::LIST => GroupConvertedType::List, + ConvertedType::MAP => GroupConvertedType::Map, + ConvertedType::MAP_KEY_VALUE => GroupConvertedType::MapKeyValue, _ => return Err(Error::oos("LogicalType value out of range")), }) } @@ -160,9 +160,9 @@ impl TryFrom for GroupConvertedType { impl From for ConvertedType { fn from(type_: GroupConvertedType) -> Self { match type_ { - GroupConvertedType::Map => Self::MAP, - GroupConvertedType::List => Self::LIST, - GroupConvertedType::MapKeyValue => Self::MAP_KEY_VALUE, + GroupConvertedType::Map => ConvertedType::MAP, + GroupConvertedType::List => ConvertedType::LIST, + GroupConvertedType::MapKeyValue => ConvertedType::MAP_KEY_VALUE, } } } diff --git a/src/parquet2/src/schema/types/parquet_type.rs b/src/parquet2/src/schema/types/parquet_type.rs index 32b45c7df1..c0fc739b44 100644 --- a/src/parquet2/src/schema/types/parquet_type.rs +++ b/src/parquet2/src/schema/types/parquet_type.rs @@ -70,7 +70,7 @@ impl ParquetType { /// Checks if `sub_type` schema is part of current schema. /// This method can be used to check if projected columns are part of the root schema. - pub fn check_contains(&self, sub_type: &Self) -> bool { + pub fn check_contains(&self, sub_type: &ParquetType) -> bool { let basic_match = self.get_field_info() == sub_type.get_field_info(); match (self, sub_type) { @@ -112,13 +112,13 @@ impl ParquetType { /// Constructors impl ParquetType { - pub(crate) fn new_root(name: String, fields: Vec) -> Self { + pub(crate) fn new_root(name: String, fields: Vec) -> Self { let field_info = FieldInfo { name, repetition: Repetition::Optional, id: None, }; - Self::GroupType { + ParquetType::GroupType { field_info, fields, logical_type: None, @@ -128,7 +128,7 @@ impl ParquetType { pub fn from_converted( name: String, - fields: Vec, + fields: Vec, repetition: Repetition, converted_type: Option, id: Option, @@ -139,7 +139,7 @@ impl ParquetType { id, }; - Self::GroupType { + ParquetType::GroupType { field_info, fields, converted_type, @@ -166,7 +166,7 @@ impl ParquetType { id, }; - Ok(Self::PrimitiveType(PrimitiveType { + Ok(ParquetType::PrimitiveType(PrimitiveType { field_info, converted_type, logical_type, @@ -177,7 +177,7 @@ impl ParquetType { /// Helper method to create a [`ParquetType::PrimitiveType`] optional field /// with no logical or converted types. pub fn from_physical(name: String, physical_type: PhysicalType) -> Self { - Self::PrimitiveType(PrimitiveType::from_physical(name, physical_type)) + ParquetType::PrimitiveType(PrimitiveType::from_physical(name, physical_type)) } pub fn from_group( @@ -185,7 +185,7 @@ impl ParquetType { repetition: Repetition, converted_type: Option, logical_type: Option, - fields: Vec, + fields: Vec, id: Option, ) -> Self { let field_info = FieldInfo { @@ -194,7 +194,7 @@ impl ParquetType { id, }; - Self::GroupType { + ParquetType::GroupType { field_info, logical_type, converted_type, diff --git a/src/parquet2/src/schema/types/physical_type.rs b/src/parquet2/src/schema/types/physical_type.rs index 985265eab4..b114f51dad 100644 --- a/src/parquet2/src/schema/types/physical_type.rs +++ b/src/parquet2/src/schema/types/physical_type.rs @@ -22,17 +22,17 @@ impl TryFrom<(Type, Option)> for PhysicalType { fn try_from((type_, length): (Type, Option)) -> Result { Ok(match type_ { - Type::BOOLEAN => Self::Boolean, - Type::INT32 => Self::Int32, - Type::INT64 => Self::Int64, - Type::INT96 => Self::Int96, - Type::FLOAT => Self::Float, - Type::DOUBLE => Self::Double, - Type::BYTE_ARRAY => Self::ByteArray, + Type::BOOLEAN => PhysicalType::Boolean, + Type::INT32 => PhysicalType::Int32, + Type::INT64 => PhysicalType::Int64, + Type::INT96 => PhysicalType::Int96, + Type::FLOAT => PhysicalType::Float, + Type::DOUBLE => PhysicalType::Double, + Type::BYTE_ARRAY => PhysicalType::ByteArray, Type::FIXED_LEN_BYTE_ARRAY => { let length = length .ok_or_else(|| Error::oos("Length must be defined for FixedLenByteArray"))?; - Self::FixedLenByteArray(length.try_into()?) + PhysicalType::FixedLenByteArray(length.try_into()?) } _ => return Err(Error::oos("Unknown type")), }) diff --git a/src/parquet2/src/write/row_group.rs b/src/parquet2/src/write/row_group.rs index ff0396f8fc..a39b1bc842 100644 --- a/src/parquet2/src/write/row_group.rs +++ b/src/parquet2/src/write/row_group.rs @@ -26,8 +26,8 @@ pub struct ColumnOffsetsMetadata { } impl ColumnOffsetsMetadata { - pub fn from_column_chunk(column_chunk: &ColumnChunk) -> Self { - Self { + pub fn from_column_chunk(column_chunk: &ColumnChunk) -> ColumnOffsetsMetadata { + ColumnOffsetsMetadata { dictionary_page_offset: column_chunk .meta_data .as_ref() @@ -42,8 +42,8 @@ impl ColumnOffsetsMetadata { pub fn from_column_chunk_metadata( column_chunk_metadata: &ColumnChunkMetaData, - ) -> Self { - Self { + ) -> ColumnOffsetsMetadata { + ColumnOffsetsMetadata { dictionary_page_offset: column_chunk_metadata.dictionary_page_offset(), data_page_offset: Some(column_chunk_metadata.data_page_offset()), } From 90e377e33117577c5d692f15350c33a90126966f Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Wed, 25 Sep 2024 11:29:15 -0700 Subject: [PATCH 3/3] fmt --- src/daft-core/src/array/ops/concat_agg.rs | 5 +---- src/daft-sql/src/modules/partitioning.rs | 16 ++++------------ 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/src/daft-core/src/array/ops/concat_agg.rs b/src/daft-core/src/array/ops/concat_agg.rs index 8e1a624f90..d3681ea3a5 100644 --- a/src/daft-core/src/array/ops/concat_agg.rs +++ b/src/daft-core/src/array/ops/concat_agg.rs @@ -208,10 +208,7 @@ impl DaftConcatAggable for DataArray { ))) }; - Ok(Self::from(( - self.field.name.as_ref(), - concat_per_group, - ))) + Ok(Self::from((self.field.name.as_ref(), concat_per_group))) } } diff --git a/src/daft-sql/src/modules/partitioning.rs b/src/daft-sql/src/modules/partitioning.rs index f0eef522ed..e833edd51d 100644 --- a/src/daft-sql/src/modules/partitioning.rs +++ b/src/daft-sql/src/modules/partitioning.rs @@ -32,18 +32,10 @@ impl SQLFunction for PartitioningExpr { planner: &crate::planner::SQLPlanner, ) -> crate::error::SQLPlannerResult { match self { - Self::Years => { - partitioning_helper(args, planner, "years", partitioning::years) - } - Self::Months => { - partitioning_helper(args, planner, "months", partitioning::months) - } - Self::Days => { - partitioning_helper(args, planner, "days", partitioning::days) - } - Self::Hours => { - partitioning_helper(args, planner, "hours", partitioning::hours) - } + Self::Years => partitioning_helper(args, planner, "years", partitioning::years), + Self::Months => partitioning_helper(args, planner, "months", partitioning::months), + Self::Days => partitioning_helper(args, planner, "days", partitioning::days), + Self::Hours => partitioning_helper(args, planner, "hours", partitioning::hours), Self::IcebergBucket(_) => { ensure!(args.len() == 2, "iceberg_bucket takes exactly 2 arguments"); let input = planner.plan_function_arg(&args[0])?;