Skip to content
  •  
  •  
  •  
43 changes: 12 additions & 31 deletions .github/workflows/pr-test-suite.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,30 +71,32 @@ jobs:
unit-test:
needs: skipcheck
if: ${{ needs.skipcheck.outputs.skip == 'false' }}
runs-on: ${{ matrix.os }}
timeout-minutes: 45
runs-on: ubuntu-latest
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
python-version: ['3.9', '3.12']
daft-runner: [ray, native]
pyarrow-version: [8.0.0, 19.0.1]
flotilla: [1, 0]
os: [ubuntu-latest, macos-latest]
exclude:
- daft-runner: native
flotilla: 0
- daft-runner: ray
pyarrow-version: 8.0.0
- python-version: '3.9'
os: macos-latest
- pyarrow-version: 8.0.0
os: macos-latest
- python-version: '3.12'
pyarrow-version: 8.0.0
- os: macos-latest
flotilla: 0
steps:
- name: Free Disk Space (Ubuntu) # only run on ubuntu
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false

- run: |
sudo apt-get update
sudo apt-get install ffmpeg libsm6 libxext6 -y

- uses: actions/checkout@v4
- uses: moonrepo/setup-rust@v1
with:
Expand Down Expand Up @@ -135,27 +137,7 @@ jobs:
source .venv/bin/activate
uv pip install pyarrow==${{ matrix.pyarrow-version }}

# Rust code coverage does not work on ubuntu-latest, so we only run it on macOS
# For more info: https://github.com/Eventual-Inc/Daft/issues/3801
- name: Build library and Test with pytest (Linux)
if: ${{ (runner.os == 'Linux') }}
run: |
source .venv/bin/activate
maturin develop --uv
pytest --ignore tests/integration --collect-only -qq # run this to ensure no weird imports that result in `Collector` errors
pytest --cov=daft --ignore tests/integration --durations=0 | ./tools/capture-durations.sh "pytest_output.txt"
python tools/aggregate_test_durations.py pytest_output.txt
coverage combine -a --data-file='.coverage' || true
mkdir -p report-output
coverage xml -o ./report-output/coverage-${{ join(matrix.*, '-') }}.xml
env:
CARGO_TARGET_DIR: ./target

DAFT_RUNNER: ${{ matrix.daft-runner }}
DAFT_FLOTILLA: ${{ matrix.flotilla }}

- name: Build library and Test with pytest with code coverage (macOS)
if: ${{ (runner.os == 'macOS') }}
- name: Build library and Test with pytest
run: |
source .venv/bin/activate
cargo llvm-cov clean --workspace
Expand All @@ -180,7 +162,6 @@ jobs:
DAFT_FLOTILLA: ${{ matrix.flotilla }}

- name: Upload coverage report
if: ${{ (runner.os == 'macOS') }}
uses: actions/upload-artifact@v4
with:
name: coverage-reports-unit-tests-${{ join(matrix.*, '-') }}
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ repos:
- id: fmt
name: Rust code Formatting
description: Format files with cargo fmt.
entry: cargo fmt
entry: cargo fmt --manifest-path Cargo.toml
language: system
types: [rust]
args: [--]
Expand Down
23 changes: 21 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ crate-type = ["cdylib"]
name = "daft"

[package]
edition = "2021"
edition = "2024"
name = "daft"
publish = false
version = "0.3.0-dev0"
Expand Down Expand Up @@ -278,6 +278,7 @@ daft-sql = {path = "src/daft-sql"}
derive_builder = "0.20.2"
educe = "0.6.0"
futures = "0.3.30"
hashbrown = "0.16"
html-escape = "0.2.13"
image = {version = "0.25.5", default-features = false}
indexmap = "2.9.0"
Expand Down Expand Up @@ -454,5 +455,5 @@ wildcard_imports = "allow"
zero_sized_map_values = "allow"

[workspace.package]
edition = "2021"
edition = "2024"
version = "0.3.0-dev0"
2 changes: 1 addition & 1 deletion rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[toolchain]
channel = "nightly-2025-01-19"
channel = "nightly-2025-09-03"
components = ["rustfmt", "clippy"]
profile = "minimal"
2 changes: 1 addition & 1 deletion src/arrow2/src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ impl Bitmap {
///
/// The returned tuple contains:
/// * `.1`: The byte slice, truncated to the start of the first bit. So the start of the slice
/// is within the first 8 bits.
/// is within the first 8 bits.
/// * `.2`: The start offset in bits on a range `0 <= offsets < 8`.
/// * `.3`: The length in number of bits.
#[inline]
Expand Down
1 change: 1 addition & 0 deletions src/arrow2/src/compute/sort/row/fixed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ pub trait FixedLengthEncoding: Copy {

fn encode(self) -> Self::Encoded;

#[allow(dead_code)]
fn decode(encoded: Self::Encoded) -> Self;
}

Expand Down
15 changes: 7 additions & 8 deletions src/arrow2/src/compute/sort/row/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,16 @@ use std::{
sync::Arc,
};

use crate::{
array::{Array, BinaryArray, BooleanArray, DictionaryArray, PrimitiveArray, Utf8Array},
datatypes::PhysicalType,
error::*,
};
use crate::{compute::sort::SortOptions, datatypes::DataType};

use self::{
dictionary::{compute_dictionary_mapping, encode_dictionary},
interner::OrderPreservingInterner,
};
use crate::{
array::{Array, BinaryArray, BooleanArray, DictionaryArray, PrimitiveArray, Utf8Array},
compute::sort::SortOptions,
datatypes::{DataType, PhysicalType},
error::*,
};

mod dictionary;
mod fixed;
Expand Down Expand Up @@ -378,7 +377,7 @@ impl<'a> Eq for Row<'a> {}
impl<'a> PartialOrd for Row<'a> {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.data.cmp(other.data))
Some(self.cmp(other))
}
}

Expand Down
8 changes: 2 additions & 6 deletions src/arrow2/src/io/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,8 @@ impl From<parquet2::error::Error> for Error {
.to_string();
Error::ExternalFormat(message)
}
parquet2::error::Error::Transport(msg) => {
Error::Io(std::io::Error::new(std::io::ErrorKind::Other, msg))
}
parquet2::error::Error::IoError(msg) => {
Error::Io(std::io::Error::new(std::io::ErrorKind::Other, msg))
}
parquet2::error::Error::Transport(msg) => Error::Io(std::io::Error::other(msg)),
parquet2::error::Error::IoError(msg) => Error::Io(std::io::Error::other(msg)),
_ => Error::ExternalFormat(error.to_string()),
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/arrow2/src/io/parquet/read/deserialize/nested_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ use parquet2::{
read::levels::get_bit_width,
};

use super::{
super::Pages,
utils::{DecodedState, MaybeNext, PageState},
};
use crate::{array::Array, bitmap::MutableBitmap, error::Result};

use super::utils::{DecodedState, MaybeNext};
use super::{super::Pages, utils::PageState};

/// trait describing deserialized repetition and definition levels
pub trait Nested: std::fmt::Debug + Send + Sync {
fn inner(&mut self) -> (Vec<i64>, Option<MutableBitmap>);
Expand Down Expand Up @@ -455,8 +456,7 @@ pub(super) fn extend<'a, D: NestedDecoder<'a>>(
/// * `decoded` - The state of our decoded values.
/// * `decoder` - The decoder for the leaf-level type.
/// * `additional` - The number of top-level rows to read for the current chunk. This is the
/// min of `chunk size - number of rows existing in the current chunk` and
/// `rows_remaining`.
/// min of `chunk size - number of rows existing in the current chunk` and `rows_remaining`.
#[allow(clippy::too_many_arguments)]
fn extend_offsets2<'a, D: NestedDecoder<'a>>(
page: &mut NestedPage<'a>,
Expand Down
4 changes: 4 additions & 0 deletions src/arrow2/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#![allow(unused_unsafe)]
// don't want to deal with this right now
#![allow(deprecated)]
#![allow(mismatched_lifetime_syntaxes)]
#![allow(unused_parens)]
//
#![allow(clippy::len_without_is_empty)]
// this landed on 1.60. Let's not force everyone to bump just yet
Expand All @@ -17,6 +19,8 @@
#![allow(clippy::unnecessary_map_or)]
#![allow(clippy::manual_div_ceil)]
#![allow(clippy::map_all_any_identity)]
#![allow(clippy::manual_repeat_n)]
#![allow(clippy::manual_is_multiple_of)]
#![allow(unexpected_cfgs)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![cfg_attr(feature = "simd", feature(portable_simd))]
Expand Down
32 changes: 17 additions & 15 deletions src/arrow2/src/mmap/mod.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
//! Memory maps regions defined on the IPC format into [`Array`].
use std::collections::VecDeque;
use std::sync::Arc;
use std::{collections::VecDeque, sync::Arc};

mod array;

use crate::array::Array;
use crate::chunk::Chunk;
use crate::datatypes::{DataType, Field};
use crate::error::Error;

use crate::io::ipc::read::file::{get_dictionary_batch, get_record_batch};
use crate::io::ipc::read::{first_dict_field, Dictionaries, FileMetadata};
use crate::io::ipc::read::{IpcBuffer, Node, OutOfSpecKind};
use crate::io::ipc::{IpcField, CONTINUATION_MARKER};

use arrow_format::ipc::planus::ReadAsRoot;
use arrow_format::ipc::{Block, MessageRef, RecordBatchRef};
use arrow_format::ipc::{planus::ReadAsRoot, Block, MessageRef, RecordBatchRef};

use crate::{
array::Array,
chunk::Chunk,
datatypes::{DataType, Field},
error::Error,
io::ipc::{
read::{
file::{get_dictionary_batch, get_record_batch},
first_dict_field, Dictionaries, FileMetadata, IpcBuffer, Node, OutOfSpecKind,
},
IpcField, CONTINUATION_MARKER,
},
};

fn read_message(
mut bytes: &[u8],
Expand Down Expand Up @@ -190,7 +192,7 @@ unsafe fn mmap_dictionary<T: AsRef<[u8]>>(

let chunk = _mmap_record(
&[field],
&[first_ipc_field.clone()],
std::slice::from_ref(first_ipc_field),
data.clone(),
batch,
offset,
Expand Down
1 change: 0 additions & 1 deletion src/arrow2/src/types/simd/native.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,5 @@ native_simd!(i8x64, i8, 64, u64);
native_simd!(i16x32, i16, 32, u32);
native_simd!(i32x16, i32, 16, u16);
native_simd!(i64x8, i64, 8, u8);
native_simd!(f16x32, f16, 32, u32);
native_simd!(f32x16, f32, 16, u16);
native_simd!(f64x8, f64, 8, u8);
8 changes: 4 additions & 4 deletions src/common/arrow-ffi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ pub fn to_py_array<'py>(
Box::new(ffi::export_array_to_c(fixed_array))
});

let schema_ptr: *const ffi::ArrowSchema = &*schema;
let array_ptr: *const ffi::ArrowArray = &*arrow_arr;
let schema_ptr: *const ffi::ArrowSchema = &raw const *schema;
let array_ptr: *const ffi::ArrowArray = &raw const *arrow_arr;

let array = pyarrow.getattr(pyo3::intern!(py, "Array"))?.call_method1(
pyo3::intern!(py, "_import_from_c"),
Expand All @@ -72,7 +72,7 @@ pub fn field_to_py(
pyarrow: &Bound<PyModule>,
) -> PyResult<PyObject> {
let schema = Box::new(ffi::export_field_to_c(field));
let schema_ptr: *const ffi::ArrowSchema = &*schema;
let schema_ptr: *const ffi::ArrowSchema = &raw const *schema;

let field = pyarrow.getattr(pyo3::intern!(py, "Field"))?.call_method1(
pyo3::intern!(py, "_import_from_c"),
Expand All @@ -89,7 +89,7 @@ pub fn dtype_to_py<'py>(
pyarrow: Bound<'py, PyModule>,
) -> PyResult<Bound<'py, PyAny>> {
let schema = Box::new(ffi::export_field_to_c(&Field::new("", dtype.clone(), true)));
let schema_ptr: *const ffi::ArrowSchema = &*schema;
let schema_ptr: *const ffi::ArrowSchema = &raw const *schema;

let field = pyarrow.getattr(pyo3::intern!(py, "Field"))?.call_method1(
pyo3::intern!(py, "_import_from_c"),
Expand Down
1 change: 0 additions & 1 deletion src/common/daft-config/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![feature(let_chains)]
pub use common_io_config::IOConfig;
use serde::{Deserialize, Serialize};

Expand Down
4 changes: 2 additions & 2 deletions src/common/display/src/ascii.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::fmt;

use crate::{tree::TreeDisplay, DisplayLevel};
use crate::{DisplayLevel, tree::TreeDisplay};

// Print the tree recursively, and illustrate the tree structure with a single line per node + indentation.
fn fmt_tree_indent_style<'a, W: fmt::Write + 'a>(
Expand Down Expand Up @@ -42,7 +42,7 @@ pub fn fmt_tree_gitstyle<'a, W: fmt::Write + 'a>(
s: &'a mut W,
level: crate::DisplayLevel,
) -> fmt::Result {
use terminal_size::{terminal_size, Width};
use terminal_size::{Width, terminal_size};

// Print the current node.
// e.g. | | * <node contents line 1>
Expand Down
1 change: 0 additions & 1 deletion src/common/display/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![feature(let_chains)]
pub mod ascii;
pub mod mermaid;
pub mod table_display;
Expand Down
Loading
Loading