Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CHORE] Upgrade Rust toolchain to 2024-08-01 #2639

Merged
merged 6 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,096 changes: 566 additions & 530 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion daft/expressions/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ def approx_percentiles(self, percentiles: builtins.float | builtins.list[builtin
│ --- ┆ --- │
│ Float64 ┆ FixedSizeList[Float64; 3] │
╞═════════════════════╪════════════════════════════════╡
│ 2.9742334234767167 ┆ [1.993661701417351, 2.9742334… │
│ 2.9742334234767163 ┆ [1.993661701417351, 2.9742334… │
╰─────────────────────┴────────────────────────────────╯
<BLANKLINE>
(Showing first 1 of 1 rows)
Expand Down
2 changes: 1 addition & 1 deletion rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[toolchain]
channel = "nightly-2024-04-01"
channel = "nightly-2024-08-01"
components = ["rustfmt", "clippy"]
profile = "minimal"
2 changes: 2 additions & 0 deletions src/arrow2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ full = [
"arrow",
"io_csv",
"io_csv_async",
"io_flight",
"io_json",
"io_ipc",
"io_ipc_write_async",
Expand All @@ -179,6 +180,7 @@ io_csv_async = ["io_csv_read_async"]
io_csv_read = ["csv", "lexical-core"]
io_csv_read_async = ["csv-async", "lexical-core", "futures"]
io_csv_write = ["csv-core", "streaming-iterator", "lexical-core"]
io_flight = ["arrow-format/flight-data"]
io_ipc = ["arrow-format"]
io_ipc_compression = ["lz4", "zstd"]
io_ipc_read_async = ["io_ipc", "futures", "async-stream"]
Expand Down
3 changes: 1 addition & 2 deletions src/arrow2/src/array/dictionary/typed_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,12 @@ impl<O: Offset> DictValue for Utf8Array<O> {
.ok_or(Error::InvalidArgumentError(
"could not convert array to dictionary value".into(),
))
.map(|arr| {
.inspect(|arr| {
assert_eq!(
arr.null_count(),
0,
"null values in values not supported in iteration"
);
arr
})
}
}
Expand Down
1 change: 1 addition & 0 deletions src/arrow2/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//! * [`BinaryArray`] and [`MutableBinaryArray`], an array of opaque variable length values
//! * [`ListArray`] and [`MutableListArray`], an array of arrays (e.g. `[[1, 2], None, [], [None]]`)
//! * [`StructArray`] and [`MutableStructArray`], an array of arrays identified by a string (e.g. `{"a": [1, 2], "b": [true, false]}`)
//!
//! All immutable arrays implement the trait object [`Array`] and that can be downcasted
//! to a concrete struct based on [`PhysicalType`](crate::datatypes::PhysicalType) available from [`Array::data_type`].
//! All immutable arrays are backed by [`Buffer`](crate::buffer::Buffer) and thus cloning and slicing them is `O(1)`.
Expand Down
1 change: 1 addition & 0 deletions src/arrow2/src/bitmap/utils/zip_validity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ where
/// This enum can be used in two distinct ways:
/// * as an iterator, via `Iterator::next`
/// * as an enum of two iterators, via `match self`
///
/// The latter allows specializalizing to when there are no nulls
#[derive(Debug, Clone)]
pub enum ZipValidity<T, I, V>
Expand Down
2 changes: 1 addition & 1 deletion src/arrow2/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{ops::Deref, sync::Arc, usize};
use std::{ops::Deref, sync::Arc};

use either::Either;

Expand Down
1 change: 1 addition & 0 deletions src/arrow2/src/compute/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ fn cast_list_to_fixed_size_list<O: Offset>(
/// * Time32 and Time64: precision lost when going to higher interval
/// * Timestamp and Date{32|64}: precision lost when going to higher interval
/// * Temporal to/from backing primitive: zero-copy with data type change
///
/// Unsupported Casts
/// * To or from `StructArray`
/// * List to primitive
Expand Down
1 change: 1 addition & 0 deletions src/arrow2/src/compute/comparison/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//! The functions are organized in two variants:
//! * statically typed
//! * dynamically typed
//!
//! The statically typed are available under each module of this module (e.g. [`primitive::eq`], [`primitive::lt_scalar`])
//! The dynamically typed are available in this module (e.g. [`eq`] or [`lt_scalar`]).
//!
Expand Down
11 changes: 1 addition & 10 deletions src/arrow2/src/compute/sort/row/interner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,7 @@ trait HashSingle: BuildHasher {
where
Self: Sized,
{
// Rewrite as `hasher.hash_one(&x)` after
// https://github.com/rust-lang/rust/issues/86161 is merged.
#[cfg(feature = "nightly_build")]
{
self.hash_one(x)
}
#[cfg(not(feature = "nightly_build"))]
{
self.hash_one(&x)
}
self.hash_one(&x)
}
}

Expand Down
1 change: 1 addition & 0 deletions src/arrow2/src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pub enum DataType {
/// * As used in the Olson time zone database (the "tz database" or
/// "tzdata"), such as "America/New_York"
/// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
///
/// When the timezone is not specified, the timestamp is considered to have no timezone
/// and is represented _as is_
Timestamp(TimeUnit, Option<String>),
Expand Down
2 changes: 1 addition & 1 deletion src/arrow2/src/io/csv/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,6 @@ where
.join(std::str::from_utf8(&[options.delimiter]).unwrap())
.as_bytes(),
)?;
writer.write_all(&[b'\n'])?;
writer.write_all(b"\n")?;
Ok(())
}
6 changes: 3 additions & 3 deletions src/arrow2/src/io/json/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,15 @@ where
W: std::io::Write,
I: FallibleStreamingIterator<Item = [u8], Error = Error>,
{
writer.write_all(&[b'['])?;
writer.write_all(b"[")?;
let mut is_first_row = true;
while let Some(block) = blocks.next()? {
if !is_first_row {
writer.write_all(&[b','])?;
writer.write_all(b",")?;
}
is_first_row = false;
writer.write_all(block)?;
}
writer.write_all(&[b']'])?;
writer.write_all(b"]")?;
Ok(())
}
1 change: 1 addition & 0 deletions src/arrow2/src/io/parquet/read/indexes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ pub fn compute_page_row_intervals(
/// For each field, the outermost vector corresponds to each parquet column:
/// a primitive field contains 1 column, a struct field with 2 primitive fields contain 2 columns.
/// The inner `Vec<Interval>` contains one [`Interval`] per page: its length equals the length of [`ColumnPageStatistics`].
///
/// It returns a single [`Vec<Interval>`] denoting the set of intervals that the predicate selects (over all columns).
///
/// This returns one item per `field`. For each field, there is one item per column (for non-nested types it returns one column)
Expand Down
3 changes: 1 addition & 2 deletions src/arrow2/src/io/parquet/write/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,8 @@ impl<T, I: Iterator<Item = T>> Iterator for ExactSizedIter<T, I> {

#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next().map(|x| {
self.iter.next().inspect(|_| {
self.remaining -= 1;
x
})
}

Expand Down
1 change: 0 additions & 1 deletion src/arrow2/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#![allow(clippy::type_complexity)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![cfg_attr(feature = "simd", feature(portable_simd))]
#![cfg_attr(feature = "nightly_build", feature(build_hasher_simple_hash_one))]

#[macro_use]
pub mod array;
Expand Down
3 changes: 1 addition & 2 deletions src/arrow2/src/scalar/fixed_size_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,9 @@ impl FixedSizeListScalar {
pub fn new(data_type: DataType, values: Option<Box<dyn Array>>) -> Self {
let (field, size) = FixedSizeListArray::get_child_and_size(&data_type);
let inner_data_type = field.data_type();
let values = values.map(|x| {
let values = values.inspect(|x| {
assert_eq!(inner_data_type, x.data_type());
assert_eq!(size, x.len());
x
});
Self { values, data_type }
}
Expand Down
1 change: 1 addition & 0 deletions src/arrow2/src/temporal_conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,7 @@ fn chrono_tz_utf_to_timestamp_ns<O: Offset>(
/// * parsed values with timezone other than `timezone` are converted to `timezone`.
/// * parsed values without timezone are null. Use [`utf8_to_naive_timestamp_ns`] to parse naive timezones.
/// * Null elements remain null; non-parsable elements are null.
///
/// The feature `"chrono-tz"` enables IANA and zoneinfo formats for `timezone`.
/// # Error
/// This function errors iff `timezone` is not parsable to an offset.
Expand Down
8 changes: 4 additions & 4 deletions src/common/treenode/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ macro_rules! handle_transform_recursion {
/// There are three categories of TreeNode APIs:
///
/// 1. "Inspecting" APIs to traverse a tree of `&TreeNodes`:
/// [`apply`], [`visit`], [`exists`].
/// [`apply`], [`visit`], [`exists`].
///
/// 2. "Transforming" APIs that traverse and consume a tree of `TreeNode`s
/// producing possibly changed `TreeNode`s: [`transform`], [`transform_up`],
/// [`transform_down`], [`transform_down_up`], and [`rewrite`].
/// producing possibly changed `TreeNode`s: [`transform`], [`transform_up`],
/// [`transform_down`], [`transform_down_up`], and [`rewrite`].
///
/// 3. Internal APIs used to implement the `TreeNode` API: [`apply_children`],
/// and [`map_children`].
/// and [`map_children`].
///
/// | Traversal Order | Inspecting | Transforming |
/// | --- | --- | --- |
Expand Down
1 change: 1 addition & 0 deletions src/daft-core/src/datatypes/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ pub enum DataType {
/// * As used in the Olson time zone database (the "tz database" or
/// "tzdata"), such as "America/New_York"
/// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
///
/// When the timezone is not specified, the timestamp is considered to have no timezone
/// and is represented _as is_
Timestamp(TimeUnit, Option<String>),
Expand Down
1 change: 1 addition & 0 deletions src/daft-dsl/src/lit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ pub enum LiteralValue {
/// * As used in the Olson time zone database (the "tz database" or
/// "tzdata"), such as "America/New_York"
/// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
///
/// When the timezone is not specified, the timestamp is considered to have no timezone
/// and is represented _as is_
Timestamp(i64, TimeUnit, Option<String>),
Expand Down
3 changes: 3 additions & 0 deletions src/parquet2/src/read/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ fn decompress_reuse<P: PageIterator>(
/// ### un-compressed pages:
/// > page iter: `a` is swapped with `b`
/// > decompress iter: `b` is swapped with `d`, `b` is swapped with `a`
///
/// therefore:
/// * `PageReader` has its buffer back
/// * `Decompressor`'s buffer is un-used
Expand All @@ -152,10 +153,12 @@ fn decompress_reuse<P: PageIterator>(
/// > * `b` is swapped with `a`
/// > * `c` is moved to `d`
/// > * (next iteration): `d` is moved to `c`
///
/// therefore, while the page is available:
/// * `PageReader` has its buffer back
/// * `Decompressor`'s buffer empty
/// * `DecompressedPage` has the decompressed buffer
///
/// after the page is used:
/// * `PageReader` has its buffer back
/// * `Decompressor` has its buffer back
Expand Down
53 changes: 39 additions & 14 deletions tests/table/numeric/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,19 @@
]


def lists_close_with_nones(a, b):
if len(a) != len(b):
return False
for x, y in zip(a, b):
if x is None and y is None:
continue
if x is not None and y is not None:
np.testing.assert_allclose([x], [y])
else:
return False
return True


@pytest.mark.parametrize("data_dtype, op", itertools.product(daft_numeric_types, OPS))
def test_table_numeric_expressions(data_dtype, op) -> None:
a, b = [5, 6, 7, 8], [1, 2, 3, 4]
Expand Down Expand Up @@ -392,12 +405,16 @@ def test_table_log10_bad_input() -> None:
def test_table_numeric_log(base: float) -> None:
table = MicroPartition.from_pydict({"a": [0.1, 0.01, 1.5, None], "b": [1, 10, None, None]})
log_table = table.eval_expression_list([col("a").log(base), col("b").log(base)])
assert [
math.log(v, base) if v is not None else v for v in table.get_column("a").to_pylist()
] == log_table.get_column("a").to_pylist()
assert [
math.log(v, base) if v is not None else v for v in table.get_column("b").to_pylist()
] == log_table.get_column("b").to_pylist()

assert lists_close_with_nones(
log_table.get_column("a").to_pylist(),
[math.log(v, base) if v is not None else None for v in table.get_column("a").to_pylist()],
)

assert lists_close_with_nones(
log_table.get_column("b").to_pylist(),
[math.log(v, base) if v is not None else None for v in table.get_column("b").to_pylist()],
)


def test_table_log_bad_input() -> None:
Expand All @@ -410,12 +427,14 @@ def test_table_log_bad_input() -> None:
def test_table_numeric_ln() -> None:
table = MicroPartition.from_pydict({"a": [0.1, 0.01, 1.5, None], "b": [1, 10, None, None]})
ln_table = table.eval_expression_list([col("a").ln(), col("b").ln()])
assert [math.log(v) if v is not None else v for v in table.get_column("a").to_pylist()] == ln_table.get_column(
"a"
).to_pylist()
assert [math.log(v) if v is not None else v for v in table.get_column("b").to_pylist()] == ln_table.get_column(
"b"
).to_pylist()
assert lists_close_with_nones(
[math.log(v) if v is not None else v for v in table.get_column("a").to_pylist()],
ln_table.get_column("a").to_pylist(),
)
assert lists_close_with_nones(
[math.log(v) if v is not None else v for v in table.get_column("b").to_pylist()],
ln_table.get_column("b").to_pylist(),
)


def test_table_ln_bad_input() -> None:
Expand All @@ -428,8 +447,14 @@ def test_table_ln_bad_input() -> None:
def test_table_exp() -> None:
table = MicroPartition.from_pydict({"a": [0.1, 0.01, None], "b": [1, 10, None]})
exp_table = table.eval_expression_list([col("a").exp(), col("b").exp()])
assert [1.1051709180756477, 1.010050167084168, None] == exp_table.get_column("a").to_pylist()
assert [2.718281828459045, 22026.465794806718, None] == exp_table.get_column("b").to_pylist()
assert lists_close_with_nones(
[1.1051709180756477, 1.010050167084168, None],
exp_table.get_column("a").to_pylist(),
)
assert lists_close_with_nones(
[2.718281828459045, 22026.465794806718, None],
exp_table.get_column("b").to_pylist(),
)


def test_table_numeric_sqrt() -> None:
Expand Down
2 changes: 1 addition & 1 deletion tools/check_for_rustls.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/bin/bash
cargo tree --workspace --all-features | grep -vzq rustls
cargo tree --workspace --all-features | grep -v 'rustls-pemfile' | grep -vzq 'rustls'
Loading