From 3fe5030948796196e97786f87a97e46eab1b5cbf Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 20 Nov 2025 09:23:19 -0800 Subject: [PATCH 01/58] infra: add docs for cleaning up testpypi artifacts (#1855) ## Which issue does this PR close? - Closes #1837 ## What changes are included in this PR? Used the [`pypi-cleanup`](https://github.com/arcivanov/pypi-cleanup) utility to bulk cleanup old artifacts of pyiceberg-core from testpypi for #1837. The alternative is to do this manually in the UI for each artifact. Adding this as reminder / instruction when the nightly pipeline fails again ## Are these changes tested? Co-authored-by: Fokko Driesprong --- .github/workflows/release_python_nightly.yml | 27 ++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/.github/workflows/release_python_nightly.yml b/.github/workflows/release_python_nightly.yml index 9c27554f9a..5ed6b1ed5f 100644 --- a/.github/workflows/release_python_nightly.yml +++ b/.github/workflows/release_python_nightly.yml @@ -128,9 +128,36 @@ jobs: - name: List downloaded artifacts run: ls -R bindings/python/dist - name: Publish to TestPyPI + id: publish-testpypi + continue-on-error: true uses: pypa/gh-action-pypi-publish@release/v1 with: repository-url: https://test.pypi.org/legacy/ skip-existing: true packages-dir: bindings/python/dist verbose: true + - name: Display error message on publish failure + if: steps.publish-testpypi.outcome == 'failure' + run: | + echo "::error::Failed to publish to TestPyPI" + echo "" + echo "⚠️ TestPyPI Publish Failed" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "" + echo "This may be due to TestPyPI storage limits." + echo "See: https://docs.pypi.org/project-management/storage-limits" + echo "" + echo "To resolve this issue, use the pypi-cleanup utility to clean up old TestPyPI artifacts:" + echo "https://pypi.org/project/pypi-cleanup/" + echo "" + echo " uvx pypi-cleanup --package pyiceberg-core --host https://test.pypi.org/ \\" + echo " --verbose -d 10 --do-it --username " + echo "" + echo "Requirements:" + echo " • Must be a maintainer for pyiceberg-core on TestPyPI" + echo " (https://test.pypi.org/project/pyiceberg-core)" + echo " • Requires TestPyPI password and 2FA" + echo " • ⚠️ ONLY do this for TestPyPI, NOT for production PyPI!" + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + exit 1 From 24932ba6cf3f72973a566e87c973e0ca92663a25 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Nov 2025 14:54:20 +0800 Subject: [PATCH 02/58] chore(deps): Bump actions/checkout from 5 to 6 (#1883) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/checkout](https://github.com/actions/checkout) from 5 to 6.
Release notes

Sourced from actions/checkout's releases.

v6.0.0

What's Changed

Full Changelog: https://github.com/actions/checkout/compare/v5.0.0...v6.0.0

v6-beta

What's Changed

Updated persist-credentials to store the credentials under $RUNNER_TEMP instead of directly in the local git config.

This requires a minimum Actions Runner version of v2.329.0 to access the persisted credentials for Docker container action scenarios.

v5.0.1

What's Changed

Full Changelog: https://github.com/actions/checkout/compare/v5...v5.0.1

Changelog

Sourced from actions/checkout's changelog.

Changelog

V6.0.0

V5.0.1

V5.0.0

V4.3.1

V4.3.0

v4.2.2

v4.2.1

v4.2.0

v4.1.7

v4.1.6

v4.1.5

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/checkout&package-manager=github_actions&previous-version=5&new-version=6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/audit.yml | 2 +- .github/workflows/bindings_python_ci.yml | 6 +++--- .github/workflows/ci.yml | 10 +++++----- .github/workflows/ci_typos.yml | 2 +- .github/workflows/publish.yml | 2 +- .github/workflows/release_python.yml | 4 ++-- .github/workflows/release_python_nightly.yml | 4 ++-- .github/workflows/website.yml | 2 +- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index 47b0c47874..28c8a3db75 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -40,7 +40,7 @@ jobs: security_audit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: diff --git a/.github/workflows/bindings_python_ci.yml b/.github/workflows/bindings_python_ci.yml index e9eabda2cb..ed2c03b763 100644 --- a/.github/workflows/bindings_python_ci.yml +++ b/.github/workflows/bindings_python_ci.yml @@ -44,7 +44,7 @@ jobs: check-rust: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Check format working-directory: "bindings/python" run: cargo fmt --all -- --check @@ -55,7 +55,7 @@ jobs: check-python: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: astral-sh/setup-uv@v7 with: version: "0.9.3" @@ -81,7 +81,7 @@ jobs: - macos-latest - windows-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: 3.12 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c0f360b982..7b7e81837e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,7 @@ jobs: - ubuntu-latest - macos-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder @@ -101,7 +101,7 @@ jobs: root-reserve-mb: 10240 temp-reserve-mb: 10240 - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder @@ -126,7 +126,7 @@ jobs: - macos-latest - windows-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder @@ -158,7 +158,7 @@ jobs: root-reserve-mb: 10240 temp-reserve-mb: 10240 - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder @@ -180,7 +180,7 @@ jobs: name: Verify MSRV runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install protoc uses: arduino/setup-protoc@v3 with: diff --git a/.github/workflows/ci_typos.yml b/.github/workflows/ci_typos.yml index 4c60369482..3e63a6926c 100644 --- a/.github/workflows/ci_typos.yml +++ b/.github/workflows/ci_typos.yml @@ -40,6 +40,6 @@ jobs: env: FORCE_COLOR: 1 steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Check typos uses: crate-ci/typos@v1.39.2 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 71d35001da..634cd73519 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -45,7 +45,7 @@ jobs: - "crates/catalog/sql" - "crates/integrations/datafusion" steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder diff --git a/.github/workflows/release_python.yml b/.github/workflows/release_python.yml index e6b7021c9b..9646085c7e 100644 --- a/.github/workflows/release_python.yml +++ b/.github/workflows/release_python.yml @@ -85,7 +85,7 @@ jobs: runs-on: ubuntu-latest needs: [validate-release-tag] steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install toml-cli if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }} @@ -128,7 +128,7 @@ jobs: } - { os: ubuntu-latest, target: "armv7l" } steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install toml-cli if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }} diff --git a/.github/workflows/release_python_nightly.yml b/.github/workflows/release_python_nightly.yml index 5ed6b1ed5f..1d6faaeee9 100644 --- a/.github/workflows/release_python_nightly.yml +++ b/.github/workflows/release_python_nightly.yml @@ -43,7 +43,7 @@ jobs: if: github.repository == 'apache/iceberg-rust' # Only run for apache repo runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp with: @@ -78,7 +78,7 @@ jobs: } - { os: ubuntu-latest, target: "armv7l" } steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp with: diff --git a/.github/workflows/website.yml b/.github/workflows/website.yml index c5925da6ae..1a52482b08 100644 --- a/.github/workflows/website.yml +++ b/.github/workflows/website.yml @@ -36,7 +36,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup mdBook uses: peaceiris/actions-mdbook@v2 From 18a757768b38c2f2b017a0eb2cbcb03e7e79748a Mon Sep 17 00:00:00 2001 From: Najib Boutaib <104867447+N-Boutaib@users.noreply.github.com> Date: Tue, 25 Nov 2025 11:56:00 +0100 Subject: [PATCH 03/58] Update apache-avro to v0.21.0 (#1881) ## Which issue does this PR close? None ## What changes are included in this PR? Upgrades `apache-avro` from `0.20.0` to `0.21.0` in order to get rid of the deprecated `xz2` crate. ## Are these changes tested? Build & tests succeeds --- Cargo.lock | 45 +++++++++++++++++++++++++++++++++++---------- Cargo.toml | 2 +- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 62478f32a0..4c82df79ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -169,6 +169,31 @@ dependencies = [ "zstd", ] +[[package]] +name = "apache-avro" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" +dependencies = [ + "bigdecimal", + "bon", + "digest", + "log", + "miniz_oxide", + "num-bigint", + "quad-rand", + "rand 0.9.2", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "strum 0.27.2", + "strum_macros 0.27.2", + "thiserror 2.0.17", + "uuid", + "zstd", +] + [[package]] name = "ar_archive_writer" version = "0.2.0" @@ -1925,7 +1950,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" dependencies = [ "ahash 0.8.12", - "apache-avro", + "apache-avro 0.20.0", "arrow", "arrow-ipc", "base64 0.22.1", @@ -1999,7 +2024,7 @@ version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10d40b6953ebc9099b37adfd12fde97eb73ff0cee44355c6dea64b8a4537d561" dependencies = [ - "apache-avro", + "apache-avro 0.20.0", "arrow", "async-trait", "bytes", @@ -2642,7 +2667,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2784,7 +2809,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3496,7 +3521,7 @@ name = "iceberg" version = "0.7.0" dependencies = [ "anyhow", - "apache-avro", + "apache-avro 0.21.0", "array-init", "arrow-arith", "arrow-array", @@ -4524,7 +4549,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5422,7 +5447,7 @@ dependencies = [ "once_cell", "socket2 0.6.1", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -5879,7 +5904,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6951,7 +6976,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -7777,7 +7802,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index c10c01d94a..7ca365dce2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,7 @@ rust-version = "1.87" [workspace.dependencies] anyhow = "1.0.72" -apache-avro = { version = "0.20", features = ["zstandard"] } +apache-avro = { version = "0.21", features = ["zstandard"] } array-init = "2" arrow-arith = "56.2" arrow-array = "56.2" From 7c2d4c3abd6c5496c7ecb76854b6ee0d7295369c Mon Sep 17 00:00:00 2001 From: Jonathan Chen Date: Tue, 25 Nov 2025 20:32:33 -0500 Subject: [PATCH 04/58] docs: Clarify functionality of `SnapshotProduceOperation` (#1874) ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? While refreshing myself on the internals of iceberg-rust, I felt that `SnapshotproduceOperation` should have documentation to be more clear to anybody who wants to work on Iceberg-rust ## Use of LLM Clarification I did use LLM to generate these docs however I reviewed it myself. --- crates/iceberg/src/transaction/snapshot.rs | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/crates/iceberg/src/transaction/snapshot.rs b/crates/iceberg/src/transaction/snapshot.rs index 6b3d0e4ff4..d59828ce31 100644 --- a/crates/iceberg/src/transaction/snapshot.rs +++ b/crates/iceberg/src/transaction/snapshot.rs @@ -34,13 +34,53 @@ use crate::{Error, ErrorKind, TableRequirement, TableUpdate}; const META_ROOT_PATH: &str = "metadata"; +/// A trait that defines how different table operations produce new snapshots. +/// +/// `SnapshotProduceOperation` is used by [`SnapshotProducer`] to customize snapshot creation +/// based on the type of operation being performed (e.g., `Append`, `Overwrite`, `Delete`, etc.). +/// Each operation type implements this trait to specify: +/// - Which operation type to record in the snapshot summary +/// - Which existing manifest files should be included in the new snapshot +/// - Which manifest entries should be marked as deleted +/// +/// # When it accomplishes +/// +/// This trait is used during the snapshot creation process in [`SnapshotProducer::commit()`]: +/// +/// 1. **Operation Type Recording**: The `operation()` method determines which operation type +/// (e.g., `Operation::Append`, `Operation::Overwrite`) is recorded in the snapshot summary. +/// This metadata helps track what kind of change was made to the table. +/// +/// 2. **Manifest File Selection**: The `existing_manifest()` method determines which existing +/// manifest files from the current snapshot should be carried forward to the new snapshot. +/// For example: +/// - An `Append` operation typically includes all existing manifests plus new ones +/// - An `Overwrite` operation might exclude manifests for partitions being overwritten +/// +/// 3. **Delete Entry Processing**: The `delete_entries()` method is intended for future delete +/// operations to specify which manifest entries should be marked as deleted. pub(crate) trait SnapshotProduceOperation: Send + Sync { + /// Returns the operation type that will be recorded in the snapshot summary. + /// + /// This determines what kind of operation is being performed (e.g., `Append`, `Overwrite`), + /// which is stored in the snapshot metadata for tracking and auditing purposes. fn operation(&self) -> Operation; + + /// Returns manifest entries that should be marked as deleted in the new snapshot. #[allow(unused)] fn delete_entries( &self, snapshot_produce: &SnapshotProducer, ) -> impl Future>> + Send; + + /// Returns existing manifest files that should be included in the new snapshot. + /// + /// This method determines which manifest files from the current snapshot should be + /// carried forward to the new snapshot. The selection depends on the operation type: + /// + /// - **Append operations**: Typically include all existing manifests + /// - **Overwrite operations**: May exclude manifests for partitions being overwritten + /// - **Delete operations**: May exclude manifests for partitions being deleted fn existing_manifest( &self, snapshot_produce: &SnapshotProducer<'_>, From bf984c754630cd4b536853c81528b9b69a3dab4d Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Thu, 27 Nov 2025 04:50:40 -0800 Subject: [PATCH 05/58] feat(datafusion): Split IcebergTableProvider into static and non-static table provider (#1879) --- bindings/python/Cargo.lock | 1 + .../python/src/datafusion_table_provider.rs | 6 +- .../tests/shared_tests/datafusion.rs | 4 +- crates/integrations/datafusion/src/schema.rs | 6 +- .../integrations/datafusion/src/table/mod.rs | 468 ++++++++++++++---- .../src/table/table_provider_factory.rs | 8 +- .../tests/integration_datafusion_test.rs | 12 - 7 files changed, 390 insertions(+), 115 deletions(-) diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 8249414b8d..814c9afb35 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -2313,6 +2313,7 @@ dependencies = [ "chrono", "derive_builder", "expect-test", + "flate2", "fnv", "futures", "itertools 0.13.0", diff --git a/bindings/python/src/datafusion_table_provider.rs b/bindings/python/src/datafusion_table_provider.rs index b5e1bf952e..8db7223b34 100644 --- a/bindings/python/src/datafusion_table_provider.rs +++ b/bindings/python/src/datafusion_table_provider.rs @@ -23,7 +23,7 @@ use datafusion_ffi::table_provider::FFI_TableProvider; use iceberg::TableIdent; use iceberg::io::FileIO; use iceberg::table::StaticTable; -use iceberg_datafusion::table::IcebergTableProvider; +use iceberg_datafusion::table::IcebergStaticTableProvider; use pyo3::exceptions::PyRuntimeError; use pyo3::prelude::*; use pyo3::types::PyCapsule; @@ -32,7 +32,7 @@ use crate::runtime::runtime; #[pyclass(name = "IcebergDataFusionTable")] pub struct PyIcebergDataFusionTable { - inner: Arc, + inner: Arc, } #[pymethods] @@ -69,7 +69,7 @@ impl PyIcebergDataFusionTable { let table = static_table.into_table(); - IcebergTableProvider::try_new_from_table(table) + IcebergStaticTableProvider::try_new_from_table(table) .await .map_err(|e| { PyRuntimeError::new_err(format!("Failed to create table provider: {e}")) diff --git a/crates/integration_tests/tests/shared_tests/datafusion.rs b/crates/integration_tests/tests/shared_tests/datafusion.rs index 81bbb5f54c..60dd9f36c8 100644 --- a/crates/integration_tests/tests/shared_tests/datafusion.rs +++ b/crates/integration_tests/tests/shared_tests/datafusion.rs @@ -26,7 +26,7 @@ use datafusion::error::DataFusionError; use datafusion::prelude::SessionContext; use iceberg::{Catalog, CatalogBuilder, TableIdent}; use iceberg_catalog_rest::RestCatalogBuilder; -use iceberg_datafusion::IcebergTableProvider; +use iceberg_datafusion::IcebergStaticTableProvider; use parquet::arrow::PARQUET_FIELD_ID_META_KEY; use crate::get_shared_containers; @@ -47,7 +47,7 @@ async fn test_basic_queries() -> Result<(), DataFusionError> { let ctx = SessionContext::new(); let table_provider = Arc::new( - IcebergTableProvider::try_new_from_table(table) + IcebergStaticTableProvider::try_new_from_table(table) .await .unwrap(), ); diff --git a/crates/integrations/datafusion/src/schema.rs b/crates/integrations/datafusion/src/schema.rs index 3920ee73ca..31bbdbd67f 100644 --- a/crates/integrations/datafusion/src/schema.rs +++ b/crates/integrations/datafusion/src/schema.rs @@ -28,6 +28,7 @@ use iceberg::inspect::MetadataTableType; use iceberg::{Catalog, NamespaceIdent, Result}; use crate::table::IcebergTableProvider; +use crate::to_datafusion_error; /// Represents a [`SchemaProvider`] for the Iceberg [`Catalog`], managing /// access to table providers within a specific namespace. @@ -113,7 +114,10 @@ impl SchemaProvider for IcebergSchemaProvider { let metadata_table_type = MetadataTableType::try_from(metadata_table_name).map_err(DataFusionError::Plan)?; if let Some(table) = self.tables.get(table_name) { - let metadata_table = table.metadata_table(metadata_table_type); + let metadata_table = table + .metadata_table(metadata_table_type) + .await + .map_err(to_datafusion_error)?; return Ok(Some(Arc::new(metadata_table))); } else { return Ok(None); diff --git a/crates/integrations/datafusion/src/table/mod.rs b/crates/integrations/datafusion/src/table/mod.rs index 42a3baad3b..8527668d6c 100644 --- a/crates/integrations/datafusion/src/table/mod.rs +++ b/crates/integrations/datafusion/src/table/mod.rs @@ -15,6 +15,16 @@ // specific language governing permissions and limitations // under the License. +//! Iceberg table providers for DataFusion. +//! +//! This module provides two table provider implementations: +//! +//! - [`IcebergTableProvider`]: Catalog-backed provider with automatic metadata refresh. +//! Use for write operations and when you need to see the latest table state. +//! +//! - [`IcebergStaticTableProvider`]: Static provider for read-only access to a specific +//! table snapshot. Use for consistent analytical queries or time-travel scenarios. + pub mod metadata_table; pub mod table_provider_factory; @@ -38,98 +48,61 @@ use iceberg::table::Table; use iceberg::{Catalog, Error, ErrorKind, NamespaceIdent, Result, TableIdent}; use metadata_table::IcebergMetadataTableProvider; +use crate::error::to_datafusion_error; use crate::physical_plan::commit::IcebergCommitExec; use crate::physical_plan::project::project_with_partition; use crate::physical_plan::repartition::repartition; use crate::physical_plan::scan::IcebergTableScan; use crate::physical_plan::write::IcebergWriteExec; -/// Represents a [`TableProvider`] for the Iceberg [`Catalog`], -/// managing access to a [`Table`]. +/// Catalog-backed table provider with automatic metadata refresh. +/// +/// This provider loads fresh table metadata from the catalog on every scan and write +/// operation, ensuring you always see the latest table state. Use this when you need +/// write operations or want to see the most up-to-date data. +/// +/// For read-only access to a specific snapshot without catalog overhead, use +/// [`IcebergStaticTableProvider`] instead. #[derive(Debug, Clone)] pub struct IcebergTableProvider { - /// A table in the catalog. - table: Table, - /// Table snapshot id that will be queried via this provider. - snapshot_id: Option, - /// A reference-counted arrow `Schema`. + /// The catalog that manages this table + catalog: Arc, + /// The table identifier (namespace + name) + table_ident: TableIdent, + /// A reference-counted arrow `Schema` (cached at construction) schema: ArrowSchemaRef, - /// The catalog that the table belongs to. - catalog: Option>, } impl IcebergTableProvider { - pub(crate) fn new(table: Table, schema: ArrowSchemaRef) -> Self { - IcebergTableProvider { - table, - snapshot_id: None, - schema, - catalog: None, - } - } - /// Asynchronously tries to construct a new [`IcebergTableProvider`] - /// using the given client and table name to fetch an actual [`Table`] - /// in the provided namespace. + /// Creates a new catalog-backed table provider. + /// + /// Loads the table once to get the initial schema, then stores the catalog + /// reference for future metadata refreshes on each operation. pub(crate) async fn try_new( - client: Arc, + catalog: Arc, namespace: NamespaceIdent, name: impl Into, ) -> Result { - let ident = TableIdent::new(namespace, name.into()); - let table = client.load_table(&ident).await?; + let table_ident = TableIdent::new(namespace, name.into()); + // Load table once to get initial schema + let table = catalog.load_table(&table_ident).await?; let schema = Arc::new(schema_to_arrow_schema(table.metadata().current_schema())?); Ok(IcebergTableProvider { - table, - snapshot_id: None, - schema, - catalog: Some(client), - }) - } - - /// Asynchronously tries to construct a new [`IcebergTableProvider`] - /// using the given table. Can be used to create a table provider from an existing table regardless of the catalog implementation. - pub async fn try_new_from_table(table: Table) -> Result { - let schema = Arc::new(schema_to_arrow_schema(table.metadata().current_schema())?); - Ok(IcebergTableProvider { - table, - snapshot_id: None, - schema, - catalog: None, - }) - } - - /// Asynchronously tries to construct a new [`IcebergTableProvider`] - /// using a specific snapshot of the given table. Can be used to create a table provider from an existing table regardless of the catalog implementation. - pub async fn try_new_from_table_snapshot(table: Table, snapshot_id: i64) -> Result { - let snapshot = table - .metadata() - .snapshot_by_id(snapshot_id) - .ok_or_else(|| { - Error::new( - ErrorKind::Unexpected, - format!( - "snapshot id {snapshot_id} not found in table {}", - table.identifier().name() - ), - ) - })?; - let schema = snapshot.schema(table.metadata())?; - let schema = Arc::new(schema_to_arrow_schema(&schema)?); - Ok(IcebergTableProvider { - table, - snapshot_id: Some(snapshot_id), + catalog, + table_ident, schema, - catalog: None, }) } - pub(crate) fn metadata_table(&self, r#type: MetadataTableType) -> IcebergMetadataTableProvider { - IcebergMetadataTableProvider { - table: self.table.clone(), - r#type, - } + pub(crate) async fn metadata_table( + &self, + r#type: MetadataTableType, + ) -> Result { + // Load fresh table metadata for metadata table access + let table = self.catalog.load_table(&self.table_ident).await?; + Ok(IcebergMetadataTableProvider { table, r#type }) } } @@ -154,9 +127,17 @@ impl TableProvider for IcebergTableProvider { filters: &[Expr], _limit: Option, ) -> DFResult> { + // Load fresh table metadata from catalog + let table = self + .catalog + .load_table(&self.table_ident) + .await + .map_err(to_datafusion_error)?; + + // Create scan with fresh metadata (always use current snapshot) Ok(Arc::new(IcebergTableScan::new( - self.table.clone(), - self.snapshot_id, + table, + None, // Always use current snapshot for catalog-backed provider self.schema.clone(), projection, filters, @@ -177,17 +158,18 @@ impl TableProvider for IcebergTableProvider { input: Arc, _insert_op: InsertOp, ) -> DFResult> { - let Some(catalog) = self.catalog.clone() else { - return Err(DataFusionError::Execution( - "Catalog cannot be none for insert_into".to_string(), - )); - }; + // Load fresh table metadata from catalog + let table = self + .catalog + .load_table(&self.table_ident) + .await + .map_err(to_datafusion_error)?; - let partition_spec = self.table.metadata().default_partition_spec(); + let partition_spec = table.metadata().default_partition_spec(); // Step 1: Project partition values for partitioned tables let plan_with_partition = if !partition_spec.is_unpartitioned() { - project_with_partition(input, &self.table)? + project_with_partition(input, &table)? } else { input }; @@ -200,14 +182,11 @@ impl TableProvider for IcebergTableProvider { ) })?; - let repartitioned_plan = repartition( - plan_with_partition, - self.table.metadata_ref(), - target_partitions, - )?; + let repartitioned_plan = + repartition(plan_with_partition, table.metadata_ref(), target_partitions)?; let write_plan = Arc::new(IcebergWriteExec::new( - self.table.clone(), + table.clone(), repartitioned_plan, self.schema.clone(), )); @@ -216,21 +195,139 @@ impl TableProvider for IcebergTableProvider { let coalesce_partitions = Arc::new(CoalescePartitionsExec::new(write_plan)); Ok(Arc::new(IcebergCommitExec::new( - self.table.clone(), - catalog, + table, + self.catalog.clone(), coalesce_partitions, self.schema.clone(), ))) } } +/// Static table provider for read-only snapshot access. +/// +/// This provider holds a cached table instance and does not refresh metadata or support +/// write operations. Use this for consistent analytical queries, time-travel scenarios, +/// or when you want to avoid catalog overhead. +/// +/// For catalog-backed tables with write support and automatic refresh, use +/// [`IcebergTableProvider`] instead. +#[derive(Debug, Clone)] +pub struct IcebergStaticTableProvider { + /// The static table instance (never refreshed) + table: Table, + /// Optional snapshot ID for this static view + snapshot_id: Option, + /// A reference-counted arrow `Schema` + schema: ArrowSchemaRef, +} + +impl IcebergStaticTableProvider { + /// Creates a static provider from a table instance. + /// + /// Uses the table's current snapshot for all queries. Does not support write operations. + pub async fn try_new_from_table(table: Table) -> Result { + let schema = Arc::new(schema_to_arrow_schema(table.metadata().current_schema())?); + Ok(IcebergStaticTableProvider { + table, + snapshot_id: None, + schema, + }) + } + + /// Creates a static provider for a specific table snapshot. + /// + /// Queries the specified snapshot for all operations. Useful for time-travel queries. + /// Does not support write operations. + pub async fn try_new_from_table_snapshot(table: Table, snapshot_id: i64) -> Result { + let snapshot = table + .metadata() + .snapshot_by_id(snapshot_id) + .ok_or_else(|| { + Error::new( + ErrorKind::Unexpected, + format!( + "snapshot id {snapshot_id} not found in table {}", + table.identifier().name() + ), + ) + })?; + let table_schema = snapshot.schema(table.metadata())?; + let schema = Arc::new(schema_to_arrow_schema(&table_schema)?); + Ok(IcebergStaticTableProvider { + table, + snapshot_id: Some(snapshot_id), + schema, + }) + } +} + +#[async_trait] +impl TableProvider for IcebergStaticTableProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> ArrowSchemaRef { + self.schema.clone() + } + + fn table_type(&self) -> TableType { + TableType::Base + } + + async fn scan( + &self, + _state: &dyn Session, + projection: Option<&Vec>, + filters: &[Expr], + _limit: Option, + ) -> DFResult> { + // Use cached table (no refresh) + Ok(Arc::new(IcebergTableScan::new( + self.table.clone(), + self.snapshot_id, + self.schema.clone(), + projection, + filters, + ))) + } + + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> DFResult> { + // Push down all filters, as a single source of truth, the scanner will drop the filters which couldn't be push down + Ok(vec![TableProviderFilterPushDown::Inexact; filters.len()]) + } + + async fn insert_into( + &self, + _state: &dyn Session, + _input: Arc, + _insert_op: InsertOp, + ) -> DFResult> { + Err(to_datafusion_error(Error::new( + ErrorKind::FeatureUnsupported, + "Write operations are not supported on IcebergStaticTableProvider. \ + Use IcebergTableProvider with a catalog for write support." + .to_string(), + ))) + } +} + #[cfg(test)] mod tests { + use std::collections::HashMap; + use std::sync::Arc; + use datafusion::common::Column; use datafusion::prelude::SessionContext; - use iceberg::TableIdent; use iceberg::io::FileIO; + use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder}; + use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; use iceberg::table::{StaticTable, Table}; + use iceberg::{Catalog, CatalogBuilder, NamespaceIdent, TableCreation, TableIdent}; + use tempfile::TempDir; use super::*; @@ -253,10 +350,59 @@ mod tests { static_table.into_table() } + async fn get_test_catalog_and_table() -> (Arc, NamespaceIdent, String, TempDir) { + let temp_dir = TempDir::new().unwrap(); + let warehouse_path = temp_dir.path().to_str().unwrap().to_string(); + + let catalog = MemoryCatalogBuilder::default() + .load( + "memory", + HashMap::from([(MEMORY_CATALOG_WAREHOUSE.to_string(), warehouse_path.clone())]), + ) + .await + .unwrap(); + + let namespace = NamespaceIdent::new("test_ns".to_string()); + catalog + .create_namespace(&namespace, HashMap::new()) + .await + .unwrap(); + + let schema = Schema::builder() + .with_schema_id(0) + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build() + .unwrap(); + + let table_creation = TableCreation::builder() + .name("test_table".to_string()) + .location(format!("{}/test_table", warehouse_path)) + .schema(schema) + .properties(HashMap::new()) + .build(); + + catalog + .create_table(&namespace, table_creation) + .await + .unwrap(); + + ( + Arc::new(catalog), + namespace, + "test_table".to_string(), + temp_dir, + ) + } + + // Tests for IcebergStaticTableProvider + #[tokio::test] - async fn test_try_new_from_table() { + async fn test_static_provider_from_table() { let table = get_test_table_from_metadata_file().await; - let table_provider = IcebergTableProvider::try_new_from_table(table.clone()) + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) .await .unwrap(); let ctx = SessionContext::new(); @@ -278,11 +424,11 @@ mod tests { } #[tokio::test] - async fn test_try_new_from_table_snapshot() { + async fn test_static_provider_from_snapshot() { let table = get_test_table_from_metadata_file().await; let snapshot_id = table.metadata().snapshots().next().unwrap().snapshot_id(); let table_provider = - IcebergTableProvider::try_new_from_table_snapshot(table.clone(), snapshot_id) + IcebergStaticTableProvider::try_new_from_table_snapshot(table.clone(), snapshot_id) .await .unwrap(); let ctx = SessionContext::new(); @@ -304,16 +450,152 @@ mod tests { } #[tokio::test] - async fn test_physical_input_schema_consistent_with_logical_input_schema() { + async fn test_static_provider_rejects_writes() { + let table = get_test_table_from_metadata_file().await; + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) + .await + .unwrap(); + let ctx = SessionContext::new(); + ctx.register_table("mytable", Arc::new(table_provider)) + .unwrap(); + + // Attempt to insert into the static provider should fail + let result = ctx.sql("INSERT INTO mytable VALUES (1, 2, 3)").await; + + // The error should occur during planning or execution + // We expect an error indicating write operations are not supported + assert!( + result.is_err() || { + let df = result.unwrap(); + df.collect().await.is_err() + } + ); + } + + #[tokio::test] + async fn test_static_provider_scan() { let table = get_test_table_from_metadata_file().await; - let table_provider = IcebergTableProvider::try_new_from_table(table.clone()) + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) .await .unwrap(); let ctx = SessionContext::new(); ctx.register_table("mytable", Arc::new(table_provider)) .unwrap(); + + // Test that scan operations work correctly let df = ctx.sql("SELECT count(*) FROM mytable").await.unwrap(); let physical_plan = df.create_physical_plan().await; - assert!(physical_plan.is_ok()) + assert!(physical_plan.is_ok()); + } + + // Tests for IcebergTableProvider + + #[tokio::test] + async fn test_catalog_backed_provider_creation() { + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + // Test creating a catalog-backed provider + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + // Verify the schema is loaded correctly + let schema = provider.schema(); + assert_eq!(schema.fields().len(), 2); + assert_eq!(schema.field(0).name(), "id"); + assert_eq!(schema.field(1).name(), "name"); + } + + #[tokio::test] + async fn test_catalog_backed_provider_scan() { + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + ctx.register_table("test_table", Arc::new(provider)) + .unwrap(); + + // Test that scan operations work correctly + let df = ctx.sql("SELECT * FROM test_table").await.unwrap(); + + // Verify the schema in the query result + let df_schema = df.schema(); + assert_eq!(df_schema.fields().len(), 2); + assert_eq!(df_schema.field(0).name(), "id"); + assert_eq!(df_schema.field(1).name(), "name"); + + let physical_plan = df.create_physical_plan().await; + assert!(physical_plan.is_ok()); + } + + #[tokio::test] + async fn test_catalog_backed_provider_insert() { + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + ctx.register_table("test_table", Arc::new(provider)) + .unwrap(); + + // Test that insert operations work correctly + let result = ctx.sql("INSERT INTO test_table VALUES (1, 'test')").await; + + // Insert should succeed (or at least not fail during planning) + assert!(result.is_ok()); + + // Try to execute the insert plan + let df = result.unwrap(); + let execution_result = df.collect().await; + + // The execution should succeed + assert!(execution_result.is_ok()); + } + + #[tokio::test] + async fn test_physical_input_schema_consistent_with_logical_input_schema() { + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + ctx.register_table("test_table", Arc::new(provider)) + .unwrap(); + + // Create a query plan + let df = ctx.sql("SELECT id, name FROM test_table").await.unwrap(); + + // Get logical schema before consuming df + let logical_schema = df.schema().clone(); + + // Get physical plan (this consumes df) + let physical_plan = df.create_physical_plan().await.unwrap(); + let physical_schema = physical_plan.schema(); + + // Verify that logical and physical schemas are consistent + assert_eq!( + logical_schema.fields().len(), + physical_schema.fields().len() + ); + + for (logical_field, physical_field) in logical_schema + .fields() + .iter() + .zip(physical_schema.fields().iter()) + { + assert_eq!(logical_field.name(), physical_field.name()); + assert_eq!(logical_field.data_type(), physical_field.data_type()); + } } } diff --git a/crates/integrations/datafusion/src/table/table_provider_factory.rs b/crates/integrations/datafusion/src/table/table_provider_factory.rs index e8e87dd318..8c0c8e90de 100644 --- a/crates/integrations/datafusion/src/table/table_provider_factory.rs +++ b/crates/integrations/datafusion/src/table/table_provider_factory.rs @@ -24,12 +24,11 @@ use datafusion::catalog::{Session, TableProvider, TableProviderFactory}; use datafusion::error::Result as DFResult; use datafusion::logical_expr::CreateExternalTable; use datafusion::sql::TableReference; -use iceberg::arrow::schema_to_arrow_schema; use iceberg::io::FileIO; use iceberg::table::StaticTable; use iceberg::{Error, ErrorKind, Result, TableIdent}; -use super::IcebergTableProvider; +use super::IcebergStaticTableProvider; use crate::to_datafusion_error; /// A factory that implements DataFusion's `TableProviderFactory` to create `IcebergTableProvider` instances. @@ -126,10 +125,11 @@ impl TableProviderFactory for IcebergTableProviderFactory { .map_err(to_datafusion_error)? .into_table(); - let schema = schema_to_arrow_schema(table.metadata().current_schema()) + let provider = IcebergStaticTableProvider::try_new_from_table(table) + .await .map_err(to_datafusion_error)?; - Ok(Arc::new(IcebergTableProvider::new(table, Arc::new(schema)))) + Ok(Arc::new(provider)) } } diff --git a/crates/integrations/datafusion/tests/integration_datafusion_test.rs b/crates/integrations/datafusion/tests/integration_datafusion_test.rs index fdf5b17d18..3ad84f383e 100644 --- a/crates/integrations/datafusion/tests/integration_datafusion_test.rs +++ b/crates/integrations/datafusion/tests/integration_datafusion_test.rs @@ -492,10 +492,6 @@ async fn test_insert_into() -> Result<()> { .unwrap(); assert_eq!(rows_inserted.value(0), 2); - // Refresh context to avoid getting stale table - let catalog = Arc::new(IcebergCatalogProvider::try_new(client).await?); - ctx.register_catalog("catalog", catalog); - // Query the table to verify the inserted data let df = ctx .sql("SELECT * FROM catalog.test_insert_into.my_table") @@ -650,10 +646,6 @@ async fn test_insert_into_nested() -> Result<()> { .unwrap(); assert_eq!(rows_inserted.value(0), 2); - // Refresh context to avoid getting stale table - let catalog = Arc::new(IcebergCatalogProvider::try_new(client).await?); - ctx.register_catalog("catalog", catalog); - // Query the table to verify the inserted data let df = ctx .sql("SELECT * FROM catalog.test_insert_nested.nested_table ORDER BY id") @@ -880,10 +872,6 @@ async fn test_insert_into_partitioned() -> Result<()> { .unwrap(); assert_eq!(rows_inserted.value(0), 5); - // Refresh catalog to get updated table - let catalog = Arc::new(IcebergCatalogProvider::try_new(client.clone()).await?); - ctx.register_catalog("catalog", catalog); - // Query the table to verify data let df = ctx .sql("SELECT * FROM catalog.test_partitioned_write.partitioned_table ORDER BY id") From 617be094eed3adf04b77b36225bbda15a33527d9 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 28 Nov 2025 19:36:44 -0800 Subject: [PATCH 06/58] infra: use new `del_branch_on_merge` in .asf.yaml (#1888) ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? Previous use of `del_branch_on_merge` ``` github: del_branch_on_merge: true ``` is deprecated, https://github.com/apache/infrastructure-asfyaml?tab=readme-ov-file#delete-branch-on-merge New way is to ``` github: pull_requests: del_branch_on_merge: true ``` https://github.com/apache/infrastructure-asfyaml?tab=readme-ov-file#pull_requests ## Are these changes tested? --- .asf.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.asf.yaml b/.asf.yaml index f46c437a79..b217fd7807 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -43,7 +43,9 @@ github: dismiss_stale_reviews: false required_linear_history: true - del_branch_on_merge: true + pull_requests: + # auto-delete head branches after being merged + del_branch_on_merge: true features: wiki: false issues: true From 2871b4760a42e5d8d4261c025793f6cf02bd1628 Mon Sep 17 00:00:00 2001 From: dentiny Date: Sun, 30 Nov 2025 22:49:53 -0800 Subject: [PATCH 07/58] Upgrade opendal to v0.55 (#1895) ## What changes are included in this PR? Dependency upgrade is supposed to perform automatically via dependabot, but somehow I didn't see it this weekend. ## Are these changes tested? Checked with local iceberg unit tests and seem fine. --- Cargo.lock | 24 +++++++++++++++++++++--- Cargo.toml | 4 ++-- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4c82df79ff..d29f6e97f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4015,10 +4015,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" dependencies = [ "jiff-static", + "jiff-tzdb-platform", "log", "portable-atomic", "portable-atomic-util", "serde", + "windows-sys 0.59.0", ] [[package]] @@ -4032,6 +4034,21 @@ dependencies = [ "syn 2.0.108", ] +[[package]] +name = "jiff-tzdb" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1283705eb0a21404d2bfd6eef2a7593d240bc42a0bdb39db0ad6fa2ec026524" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" +dependencies = [ + "jiff-tzdb", +] + [[package]] name = "jobserver" version = "0.1.34" @@ -4742,20 +4759,20 @@ checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "opendal" -version = "0.54.1" +version = "0.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42afda58fa2cf50914402d132cc1caacff116a85d10c72ab2082bb7c50021754" +checksum = "d075ab8a203a6ab4bc1bce0a4b9fe486a72bf8b939037f4b78d95386384bc80a" dependencies = [ "anyhow", "backon", "base64 0.22.1", "bytes", - "chrono", "crc32c", "futures", "getrandom 0.2.16", "http 1.3.1", "http-body 1.0.1", + "jiff", "log", "md-5", "percent-encoding", @@ -4765,6 +4782,7 @@ dependencies = [ "serde", "serde_json", "tokio", + "url", "uuid", ] diff --git a/Cargo.toml b/Cargo.toml index 7ca365dce2..0512155534 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,9 +81,9 @@ http = "1.2" iceberg = { version = "0.7.0", path = "./crates/iceberg" } iceberg-catalog-glue = { version = "0.7.0", path = "./crates/catalog/glue" } iceberg-catalog-hms = { version = "0.7.0", path = "./crates/catalog/hms" } -iceberg-catalog-sql = { version = "0.7.0", path = "./crates/catalog/sql" } iceberg-catalog-rest = { version = "0.7.0", path = "./crates/catalog/rest" } iceberg-catalog-s3tables = { version = "0.7.0", path = "./crates/catalog/s3tables" } +iceberg-catalog-sql = { version = "0.7.0", path = "./crates/catalog/sql" } iceberg-datafusion = { version = "0.7.0", path = "./crates/integrations/datafusion" } indicatif = "0.18" itertools = "0.13" @@ -99,7 +99,7 @@ motore-macros = "0.4.3" murmur3 = "0.5.2" num-bigint = "0.4.6" once_cell = "1.20" -opendal = "0.54.0" +opendal = "0.55.0" ordered-float = "4" parquet = "56.2" pilota = "0.11.10" From e20910ed71fa63d4b9459a29add8698656530160 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Dec 2025 16:51:05 +0800 Subject: [PATCH 08/58] chore(deps): Bump http from 1.3.1 to 1.4.0 (#1892) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [http](https://github.com/hyperium/http) from 1.3.1 to 1.4.0.
Release notes

Sourced from http's releases.

v1.4.0

Highlights

  • Add StatusCode::EARLY_HINTS constant for 103 Early Hints.
  • Make StatusCode::from_u16 now a const fn.
  • Make Authority::from_static now a const fn.
  • Make PathAndQuery::from_static now a const fn.
  • MSRV increased to 1.57 (allows legible const fn panic messages).

What's Changed

New Contributors

Full Changelog: https://github.com/hyperium/http/compare/v1.3.1...v1.4.0

Changelog

Sourced from http's changelog.

1.4.0 (November 24, 2025)

  • Add StatusCode::EARLY_HINTS constant for 103 Early Hints.
  • Make StatusCode::from_u16 now a const fn.
  • Make Authority::from_static now a const fn.
  • Make PathAndQuery::from_static now a const fn.
  • MSRV increased to 1.57 (allows legible const fn panic messages).
Commits
  • b9625d8 v1.4.0
  • 50b009c refactor(header): inline FNV hasher to reduce dependencies (#796)
  • b370d36 feat(uri): make Authority/PathAndQuery::from_static const (#786)
  • 0d74251 chore(ci): update to actions/checkout@v5 (#800)
  • a760767 docs: remove unnecessary extern crate sentence (#799)
  • fb1d457 refactor(header): use better panic message in const HeaderName and HeaderValu...
  • 20dbd6e feat(status): Add 103 EARLY_HINTS status code (#758)
  • e7a7337 chore: bump MSRV to 1.57
  • 1888e28 tests: downgrade rand back to 0.8 for now
  • 918bbc3 chore: minor improvement for docs (#790)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=http&package-manager=cargo&previous-version=1.3.1&new-version=1.4.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d29f6e97f4..736f2b1461 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -665,7 +665,7 @@ dependencies = [ "bytes", "fastrand", "hex", - "http 1.3.1", + "http 1.4.0", "ring", "time", "tokio", @@ -859,7 +859,7 @@ dependencies = [ "hex", "hmac", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "percent-encoding", "sha2", "time", @@ -889,7 +889,7 @@ dependencies = [ "bytes-utils", "futures-core", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "percent-encoding", "pin-project-lite", @@ -909,7 +909,7 @@ dependencies = [ "h2 0.3.27", "h2 0.4.12", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "hyper 0.14.32", "hyper 1.7.0", @@ -970,7 +970,7 @@ dependencies = [ "bytes", "fastrand", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "http-body 1.0.1", "pin-project-lite", @@ -989,7 +989,7 @@ dependencies = [ "aws-smithy-types", "bytes", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "pin-project-lite", "tokio", "tracing", @@ -1007,7 +1007,7 @@ dependencies = [ "bytes-utils", "futures-core", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "http-body 1.0.1", "http-body-util", @@ -3191,7 +3191,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.3.1", + "http 1.4.0", "indexmap 2.12.0", "slab", "tokio", @@ -3326,12 +3326,11 @@ dependencies = [ [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -3353,7 +3352,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.3.1", + "http 1.4.0", ] [[package]] @@ -3364,7 +3363,7 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "pin-project-lite", ] @@ -3422,7 +3421,7 @@ dependencies = [ "futures-channel", "futures-core", "h2 0.4.12", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "httparse", "httpdate", @@ -3456,7 +3455,7 @@ version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http 1.3.1", + "http 1.4.0", "hyper 1.7.0", "hyper-util", "rustls 0.23.34", @@ -3479,7 +3478,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "hyper 1.7.0", "ipnet", @@ -3651,7 +3650,7 @@ dependencies = [ "async-trait", "chrono", "ctor", - "http 1.3.1", + "http 1.4.0", "iceberg", "iceberg_test_utils", "itertools 0.13.0", @@ -4420,7 +4419,7 @@ dependencies = [ "bytes", "colored", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "hyper 1.7.0", @@ -4720,7 +4719,7 @@ dependencies = [ "chrono", "form_urlencoded", "futures", - "http 1.3.1", + "http 1.4.0", "http-body-util", "humantime", "hyper 1.7.0", @@ -4770,7 +4769,7 @@ dependencies = [ "crc32c", "futures", "getrandom 0.2.16", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "jiff", "log", @@ -5704,7 +5703,7 @@ dependencies = [ "hex", "hmac", "home", - "http 1.3.1", + "http 1.4.0", "jsonwebtoken", "log", "once_cell", @@ -5732,7 +5731,7 @@ dependencies = [ "futures-core", "futures-util", "h2 0.4.12", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "hyper 1.7.0", @@ -7306,7 +7305,7 @@ dependencies = [ "bitflags", "bytes", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "iri-string", "pin-project-lite", From a371d82dd537bb266b52f6daa4b552f3c6f28b41 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Dec 2025 19:15:27 +0800 Subject: [PATCH 09/58] chore(deps): Bump crate-ci/typos from 1.39.2 to 1.40.0 (#1891) Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.39.2 to 1.40.0.
Release notes

Sourced from crate-ci/typos's releases.

v1.40.0

[1.40.0] - 2025-11-26

Features

Changelog

Sourced from crate-ci/typos's changelog.

[1.40.0] - 2025-11-26

Features

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=crate-ci/typos&package-manager=github_actions&previous-version=1.39.2&new-version=1.40.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Renjie Liu --- .github/workflows/ci_typos.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_typos.yml b/.github/workflows/ci_typos.yml index 3e63a6926c..6406b82142 100644 --- a/.github/workflows/ci_typos.yml +++ b/.github/workflows/ci_typos.yml @@ -42,4 +42,4 @@ jobs: steps: - uses: actions/checkout@v6 - name: Check typos - uses: crate-ci/typos@v1.39.2 + uses: crate-ci/typos@v1.40.0 From 5724fc556ed8699dfdba5fb657ea5dd9a733cbf1 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Mon, 1 Dec 2025 03:42:10 -0800 Subject: [PATCH 10/58] feat(datafusion): Add `sort_by_partition` to sort the input partitioned data (#1618) --- .../datafusion/src/physical_plan/mod.rs | 1 + .../datafusion/src/physical_plan/sort.rs | 244 ++++++++++++++++++ 2 files changed, 245 insertions(+) create mode 100644 crates/integrations/datafusion/src/physical_plan/sort.rs diff --git a/crates/integrations/datafusion/src/physical_plan/mod.rs b/crates/integrations/datafusion/src/physical_plan/mod.rs index eb58082fe5..5a9845cde0 100644 --- a/crates/integrations/datafusion/src/physical_plan/mod.rs +++ b/crates/integrations/datafusion/src/physical_plan/mod.rs @@ -21,6 +21,7 @@ pub(crate) mod metadata_scan; pub(crate) mod project; pub(crate) mod repartition; pub(crate) mod scan; +pub(crate) mod sort; pub(crate) mod write; pub(crate) const DATA_FILES_COL_NAME: &str = "data_files"; diff --git a/crates/integrations/datafusion/src/physical_plan/sort.rs b/crates/integrations/datafusion/src/physical_plan/sort.rs new file mode 100644 index 0000000000..2a57e16e43 --- /dev/null +++ b/crates/integrations/datafusion/src/physical_plan/sort.rs @@ -0,0 +1,244 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Partition-based sorting for Iceberg tables. + +use std::sync::Arc; + +use datafusion::arrow::compute::SortOptions; +use datafusion::common::Result as DFResult; +use datafusion::error::DataFusionError; +use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr}; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::physical_plan::expressions::Column; +use datafusion::physical_plan::sorts::sort::SortExec; +use iceberg::arrow::PROJECTED_PARTITION_VALUE_COLUMN; + +/// Sorts an ExecutionPlan by partition values for Iceberg tables. +/// +/// This function takes an input ExecutionPlan that has been extended with partition values +/// (via `project_with_partition`) and returns a SortExec that sorts by the partition column. +/// The partition values are expected to be in a struct column named `PROJECTED_PARTITION_VALUE_COLUMN`. +/// +/// For unpartitioned tables or plans without the partition column, returns an error. +/// +/// # Arguments +/// * `input` - The input ExecutionPlan with projected partition values +/// +/// # Returns +/// * `Ok(Arc)` - A SortExec that sorts by partition values +/// * `Err` - If the partition column is not found +/// +/// TODO remove dead_code mark when integrating with insert_into +#[allow(dead_code)] +pub(crate) fn sort_by_partition(input: Arc) -> DFResult> { + let schema = input.schema(); + + // Find the partition column in the schema + let (partition_column_index, _partition_field) = schema + .column_with_name(PROJECTED_PARTITION_VALUE_COLUMN) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "Partition column '{}' not found in schema. Ensure the plan has been extended with partition values using project_with_partition.", + PROJECTED_PARTITION_VALUE_COLUMN + )) + })?; + + // Create a single sort expression for the partition column + let column_expr = Arc::new(Column::new( + PROJECTED_PARTITION_VALUE_COLUMN, + partition_column_index, + )); + + let sort_expr = PhysicalSortExpr { + expr: column_expr, + options: SortOptions::default(), // Ascending, nulls last + }; + + // Create a SortExec with preserve_partitioning=true to ensure the output partitioning + // is the same as the input partitioning, and the data is sorted within each partition + let lex_ordering = LexOrdering::new(vec![sort_expr]).ok_or_else(|| { + DataFusionError::Plan("Failed to create LexOrdering from sort expression".to_string()) + })?; + + let sort_exec = SortExec::new(lex_ordering, input).with_preserve_partitioning(true); + + Ok(Arc::new(sort_exec)) +} + +#[cfg(test)] +mod tests { + use datafusion::arrow::array::{Int32Array, RecordBatch, StringArray, StructArray}; + use datafusion::arrow::datatypes::{DataType, Field, Fields, Schema as ArrowSchema}; + use datafusion::datasource::{MemTable, TableProvider}; + use datafusion::prelude::SessionContext; + + use super::*; + + #[tokio::test] + async fn test_sort_by_partition_basic() { + // Create a schema with a partition column + let partition_fields = + Fields::from(vec![Field::new("id_partition", DataType::Int32, false)]); + + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + Field::new( + PROJECTED_PARTITION_VALUE_COLUMN, + DataType::Struct(partition_fields.clone()), + false, + ), + ])); + + // Create test data with partition values + let id_array = Arc::new(Int32Array::from(vec![3, 1, 2])); + let name_array = Arc::new(StringArray::from(vec!["c", "a", "b"])); + let partition_array = Arc::new(StructArray::from(vec![( + Arc::new(Field::new("id_partition", DataType::Int32, false)), + Arc::new(Int32Array::from(vec![3, 1, 2])) as _, + )])); + + let batch = + RecordBatch::try_new(schema.clone(), vec![id_array, name_array, partition_array]) + .unwrap(); + + let ctx = SessionContext::new(); + let mem_table = MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap(); + let input = mem_table.scan(&ctx.state(), None, &[], None).await.unwrap(); + + // Apply sort + let sorted_plan = sort_by_partition(input).unwrap(); + + // Execute and verify + let result = datafusion::physical_plan::collect(sorted_plan, ctx.task_ctx()) + .await + .unwrap(); + + assert_eq!(result.len(), 1); + let result_batch = &result[0]; + + let id_col = result_batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + + // Verify data is sorted by partition value + assert_eq!(id_col.value(0), 1); + assert_eq!(id_col.value(1), 2); + assert_eq!(id_col.value(2), 3); + } + + #[tokio::test] + async fn test_sort_by_partition_missing_column() { + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + ])); + + let batch = RecordBatch::try_new(schema.clone(), vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["a", "b", "c"])), + ]) + .unwrap(); + + let ctx = SessionContext::new(); + let mem_table = MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap(); + let input = mem_table.scan(&ctx.state(), None, &[], None).await.unwrap(); + + let result = sort_by_partition(input); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Partition column '_partition' not found") + ); + } + + #[tokio::test] + async fn test_sort_by_partition_multi_field() { + // Test with multiple partition fields in the struct + let partition_fields = Fields::from(vec![ + Field::new("year", DataType::Int32, false), + Field::new("month", DataType::Int32, false), + ]); + + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("data", DataType::Utf8, false), + Field::new( + PROJECTED_PARTITION_VALUE_COLUMN, + DataType::Struct(partition_fields.clone()), + false, + ), + ])); + + // Create test data with partition values (year, month) + let id_array = Arc::new(Int32Array::from(vec![1, 2, 3, 4])); + let data_array = Arc::new(StringArray::from(vec!["a", "b", "c", "d"])); + + // Partition values: (2024, 2), (2024, 1), (2023, 12), (2024, 1) + let year_array = Arc::new(Int32Array::from(vec![2024, 2024, 2023, 2024])); + let month_array = Arc::new(Int32Array::from(vec![2, 1, 12, 1])); + + let partition_array = Arc::new(StructArray::from(vec![ + ( + Arc::new(Field::new("year", DataType::Int32, false)), + year_array as _, + ), + ( + Arc::new(Field::new("month", DataType::Int32, false)), + month_array as _, + ), + ])); + + let batch = + RecordBatch::try_new(schema.clone(), vec![id_array, data_array, partition_array]) + .unwrap(); + + let ctx = SessionContext::new(); + let mem_table = MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap(); + let input = mem_table.scan(&ctx.state(), None, &[], None).await.unwrap(); + + // Apply sort + let sorted_plan = sort_by_partition(input).unwrap(); + + // Execute and verify + let result = datafusion::physical_plan::collect(sorted_plan, ctx.task_ctx()) + .await + .unwrap(); + + assert_eq!(result.len(), 1); + let result_batch = &result[0]; + + let id_col = result_batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + + // Verify data is sorted by partition value (struct comparison) + // Expected order: (2023, 12), (2024, 1), (2024, 1), (2024, 2) + // Which corresponds to ids: 3, 2, 4, 1 + assert_eq!(id_col.value(0), 3); + assert_eq!(id_col.value(1), 2); + assert_eq!(id_col.value(2), 4); + assert_eq!(id_col.value(3), 1); + } +} From 052feaf3b6c6cd0e3310b1b8211ab3fed7d0e520 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 2 Dec 2025 18:01:47 +0800 Subject: [PATCH 11/58] rfc: Modularize `iceberg` Implementations (#1854) ## Which issue does this PR close? - Part of https://github.com/apache/iceberg-rust/issues/1819 ## What changes are included in this PR? Add RFC for iceberg-kernel ## Are these changes tested? --------- Signed-off-by: Xuanwo Co-authored-by: Kevin Liu Co-authored-by: Andrew Lamb Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- ...0001_modularize_iceberg_implementations.md | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 docs/rfcs/0001_modularize_iceberg_implementations.md diff --git a/docs/rfcs/0001_modularize_iceberg_implementations.md b/docs/rfcs/0001_modularize_iceberg_implementations.md new file mode 100644 index 0000000000..14bd478270 --- /dev/null +++ b/docs/rfcs/0001_modularize_iceberg_implementations.md @@ -0,0 +1,120 @@ + + +# RFC: Modularize `iceberg` Implementations + +## Background + +Issue #1819 highlighted that the current `iceberg` crate mixes the Iceberg protocol abstractions (catalog/table/plan/transaction) with concrete runtime, storage, and execution code (Tokio runtime wrappers, opendal-based `FileIO`, Arrow helpers, DataFusion glue, etc.). This coupling makes the crate heavy and blocks users from composing their own storage or execution stacks. + +Two principles have been agreed: +1. The `iceberg` crate remains the single source of truth for all protocol traits and data structures. We will not create a separate “kernel” crate or facade layer. +2. Concrete integrations (Tokio runtime, opendal `FileIO`, Arrow/DataFusion glue, catalog adapters, etc.) move out into dedicated companion crates. Users needing a ready path can depend on those crates (e.g., `iceberg-datafusion` or `integrations/local`), while custom stacks depend only on `iceberg`. + +This RFC focuses on modularizing implementations; detailed trait signatures (e.g., `FileIO`, `Runtime`) will be handled in separate RFCs. + +## Goals and Scope + +- Keep `iceberg` as the protocol crate (traits + metadata + planning), without bundling runtimes, storage adapters, or execution glue. +- Relocate concrete code into companion crates under `crates/fileio/*`, `crates/runtime/*`, and `crates/integrations/*`. +- Provide a staged plan for extracting Arrow-dependent APIs to avoid destabilizing file-format code. +- Minimize breaking surfaces: traits stay in `iceberg`; downstream crates mainly adjust dependencies. + +Out of scope: changes to the Iceberg table specification or catalog adapter external behavior; detailed trait method design (covered by follow-up RFCs). + +## Architecture Overview + +### Workspace Layout (target) + +``` +crates/ + iceberg/ # core traits, metadata, planning, transactions + fileio/ + opendal/ # e.g. `iceberg-fileio-opendal` + fs/ # other FileIO implementations + runtime/ + tokio/ # e.g. `iceberg-runtime-tokio` + smol/ + catalog/* # catalog adapters (REST, HMS, Glue, etc.) + integrations/ + local/ # simple local/arrow-based helper crate + datafusion/ # combines core + implementations for DF + cache-moka/ + playground/ +``` + +- `crates/iceberg` drops direct deps on opendal, Tokio, Arrow, and DataFusion. +- Implementation crates depend on `iceberg` to implement the traits. +- Higher-level crates (`integrations/local`, `iceberg-datafusion`) assemble the pieces for ready-to-use scenarios. + +### Core Trait Surfaces + +`FileIO`, `Runtime`, `Catalog`, `Table`, `Transaction`, `TableScan` (plan descriptors) all remain hosted in `iceberg`. Precise method signatures are deferred to dedicated RFCs to avoid locking details prematurely. + +### Usage Modes + +- **Custom stacks**: depend on `iceberg` and provide your own implementations. +- **Pre-built stacks**: depend on `integrations/local` or `iceberg-datafusion`, which bundle `iceberg` with selected runtime/FileIO/Arrow helpers. +- `iceberg` does not re-export companion crates; users compose explicitly. + +## Migration Plan (staged, with Arrow extraction phased) + +1. **Phase 1 – Confirm trait hosting, defer details** + - Keep all protocol traits in `iceberg`; move detailed API design (FileIO, Runtime, etc.) to separate RFCs. + - Add temporary shims/deprecations only when traits are finalized. + +2. **Phase 2 – First Arrow step: move `to_arrow()` out** + - Relocate the public `to_arrow()` API to `integrations/local` (or another higher-level crate). Core no longer exposes Arrow entry points. + - Keep internal Arrow-dependent helpers (e.g., `ArrowFileReader`) temporarily in `iceberg` to avoid breaking file-format flows. + +3. **Phase 3 – Gradual Arrow dependency removal** + - Incrementally migrate/replace Arrow-dependent internals (`ArrowFileReader`, format-specific readers) into `integrations/local` or other helper crates. + - Adjust file-format APIs as needed; expect this to be multi-release work. + +4. **Phase 4 – Dependency cleanup** + - Ensure catalog and integration crates depend only on `iceberg` plus the specific runtime/FileIO/helper crates they need. + - Verify build/test pipelines against the new dependency graph. + +5. **Phase 5 – Docs & release** + - Publish migration guides: where `to_arrow()` moved, how to assemble local/DataFusion stacks. + - Schedule deprecation windows for remaining Arrow helpers; target a breaking release once Arrow is fully removed from `iceberg`. + +## Compatibility + +- Short term: users of `Table::scan().to_arrow()` must switch to `integrations/local` (or another crate that rehosts that API). Other Arrow types stay temporarily but will migrate in later phases. +- Long term: `iceberg` will be Arrow-free; companion crates provide Arrow-based helpers. +- Tests/examples move alongside the implementations they exercise. + +## Risks and Mitigations + +| Risk | Description | Mitigation | +| ---- | ----------- | ---------- | +| Arrow dependency unwinding is complex | File-format readers may rely on Arrow types | Phase the work; move `to_arrow()` first, then refactor readers; document interim state | +| Discoverability | Users may not know where Arrow helpers went | Clear docs pointing to `integrations/local` and `iceberg-datafusion`; migration guide | +| Trait churn | Future trait RFCs may break early adopters | Use deprecation shims and communicate timelines | +| Duplicate impls | Multiple helper crates could overlap | Provide recommended combinations and feature guidance | + +## Open Questions + +1. Versioning: align companion crate versions with `iceberg`, or allow independent versions plus compatibility matrix? +2. Deprecation schedule: how long do we keep interim Arrow helpers before full removal from `iceberg`? + +## Conclusion + +We will keep `iceberg` as the protocol crate while modularizing concrete implementations. Arrow removal will be phased: first relocating `to_arrow()` to `integrations/local`, then gradually moving Arrow-dependent readers and helpers. This keeps the core lean, lets users compose their preferred runtime/FileIO stacks, and still offers ready-to-use combinations via companion crates. From 3d47be53fbb2486055d83524a0804079cf973c27 Mon Sep 17 00:00:00 2001 From: Leonz Date: Thu, 4 Dec 2025 13:11:54 +0800 Subject: [PATCH 12/58] refactor(writer): Make writer builders non-consuming in build (#1889) ## Which issue does this PR close? - Closes #1753. - Related https://github.com/apache/iceberg-rust/pull/1735#discussion_r2428605135 ## What changes are included in this PR? This change allows users to reuse builder instances without cloning when creating multiple writers with the same configuration. Modification non-consuming self in build function: - `IcebergWriterBuilder` - `RollingFileWriterBuilder` - `FileWriterBuilder` ## Are these changes tested? --- .../src/writer/base_writer/data_file_writer.rs | 6 +++--- .../src/writer/base_writer/equality_delete_writer.rs | 12 ++++++------ .../src/writer/file_writer/location_generator.rs | 4 ++-- crates/iceberg/src/writer/file_writer/mod.rs | 4 ++-- .../iceberg/src/writer/file_writer/parquet_writer.rs | 4 ++-- .../iceberg/src/writer/file_writer/rolling_writer.rs | 12 +++++------- crates/iceberg/src/writer/mod.rs | 8 +++----- .../src/writer/partitioning/clustered_writer.rs | 1 - .../iceberg/src/writer/partitioning/fanout_writer.rs | 1 - .../src/writer/partitioning/unpartitioned_writer.rs | 2 +- 10 files changed, 24 insertions(+), 30 deletions(-) diff --git a/crates/iceberg/src/writer/base_writer/data_file_writer.rs b/crates/iceberg/src/writer/base_writer/data_file_writer.rs index dcaa56cc97..cb7bd172ea 100644 --- a/crates/iceberg/src/writer/base_writer/data_file_writer.rs +++ b/crates/iceberg/src/writer/base_writer/data_file_writer.rs @@ -27,7 +27,7 @@ use crate::writer::{CurrentFileStatus, IcebergWriter, IcebergWriterBuilder}; use crate::{Error, ErrorKind, Result}; /// Builder for `DataFileWriter`. -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct DataFileWriterBuilder { inner: RollingFileWriterBuilder, } @@ -53,9 +53,9 @@ where { type R = DataFileWriter; - async fn build(self, partition_key: Option) -> Result { + async fn build(&self, partition_key: Option) -> Result { Ok(DataFileWriter { - inner: Some(self.inner.clone().build()), + inner: Some(self.inner.build()), partition_key, }) } diff --git a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs index 664ea84334..cd0b19148d 100644 --- a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs +++ b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs @@ -34,7 +34,7 @@ use crate::writer::{IcebergWriter, IcebergWriterBuilder}; use crate::{Error, ErrorKind, Result}; /// Builder for `EqualityDeleteWriter`. -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct EqualityDeleteFileWriterBuilder< B: FileWriterBuilder, L: LocationGenerator, @@ -60,7 +60,7 @@ where } /// Config for `EqualityDeleteWriter`. -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct EqualityDeleteWriterConfig { // Field ids used to determine row equality in equality delete files. equality_ids: Vec, @@ -123,11 +123,11 @@ where { type R = EqualityDeleteFileWriter; - async fn build(self, partition_key: Option) -> Result { + async fn build(&self, partition_key: Option) -> Result { Ok(EqualityDeleteFileWriter { - inner: Some(self.inner.clone().build()), - projector: self.config.projector, - equality_ids: self.config.equality_ids, + inner: Some(self.inner.build()), + projector: self.config.projector.clone(), + equality_ids: self.config.equality_ids.clone(), partition_key, }) } diff --git a/crates/iceberg/src/writer/file_writer/location_generator.rs b/crates/iceberg/src/writer/file_writer/location_generator.rs index a5cfc28292..0ad4d91ac6 100644 --- a/crates/iceberg/src/writer/file_writer/location_generator.rs +++ b/crates/iceberg/src/writer/file_writer/location_generator.rs @@ -24,7 +24,7 @@ use crate::Result; use crate::spec::{DataFileFormat, PartitionKey, TableMetadata}; /// `LocationGenerator` used to generate the location of data file. -pub trait LocationGenerator: Clone + Send + 'static { +pub trait LocationGenerator: Clone + Send + Sync + 'static { /// Generate an absolute path for the given file name that includes the partition path. /// /// # Arguments @@ -94,7 +94,7 @@ impl LocationGenerator for DefaultLocationGenerator { } /// `FileNameGeneratorTrait` used to generate file name for data file. The file name can be passed to `LocationGenerator` to generate the location of the file. -pub trait FileNameGenerator: Clone + Send + 'static { +pub trait FileNameGenerator: Clone + Send + Sync + 'static { /// Generate a file name. fn generate_file_name(&self) -> String; } diff --git a/crates/iceberg/src/writer/file_writer/mod.rs b/crates/iceberg/src/writer/file_writer/mod.rs index 2ed6414ce8..101919f5b3 100644 --- a/crates/iceberg/src/writer/file_writer/mod.rs +++ b/crates/iceberg/src/writer/file_writer/mod.rs @@ -36,11 +36,11 @@ pub mod rolling_writer; type DefaultOutput = Vec; /// File writer builder trait. -pub trait FileWriterBuilder: Send + Clone + 'static { +pub trait FileWriterBuilder: Clone + Send + Sync + 'static { /// The associated file writer type. type R: FileWriter; /// Build file writer. - fn build(self, output_file: OutputFile) -> impl Future> + Send; + fn build(&self, output_file: OutputFile) -> impl Future> + Send; } /// File writer focus on writing record batch to different physical file format.(Such as parquet. orc) diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index 3e9d1715c9..5cf031a9fb 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -81,11 +81,11 @@ impl ParquetWriterBuilder { impl FileWriterBuilder for ParquetWriterBuilder { type R = ParquetWriter; - async fn build(self, output_file: OutputFile) -> Result { + async fn build(&self, output_file: OutputFile) -> Result { Ok(ParquetWriter { schema: self.schema.clone(), inner_writer: None, - writer_properties: self.props, + writer_properties: self.props.clone(), current_row_num: 0, output_file, nan_value_count_visitor: NanValueCountVisitor::new_with_match_mode(self.match_mode), diff --git a/crates/iceberg/src/writer/file_writer/rolling_writer.rs b/crates/iceberg/src/writer/file_writer/rolling_writer.rs index 8f03654786..06246ab660 100644 --- a/crates/iceberg/src/writer/file_writer/rolling_writer.rs +++ b/crates/iceberg/src/writer/file_writer/rolling_writer.rs @@ -103,15 +103,15 @@ where } /// Build a new [`RollingFileWriter`]. - pub fn build(self) -> RollingFileWriter { + pub fn build(&self) -> RollingFileWriter { RollingFileWriter { inner: None, - inner_builder: self.inner_builder, + inner_builder: self.inner_builder.clone(), target_file_size: self.target_file_size, data_file_builders: vec![], - file_io: self.file_io, - location_generator: self.location_generator, - file_name_generator: self.file_name_generator, + file_io: self.file_io.clone(), + location_generator: self.location_generator.clone(), + file_name_generator: self.file_name_generator.clone(), } } } @@ -192,7 +192,6 @@ where // initialize inner writer self.inner = Some( self.inner_builder - .clone() .build(self.new_output_file(partition_key)?) .await?, ); @@ -206,7 +205,6 @@ where // start a new writer self.inner = Some( self.inner_builder - .clone() .build(self.new_output_file(partition_key)?) .await?, ); diff --git a/crates/iceberg/src/writer/mod.rs b/crates/iceberg/src/writer/mod.rs index a7892d49e1..d475230685 100644 --- a/crates/iceberg/src/writer/mod.rs +++ b/crates/iceberg/src/writer/mod.rs @@ -148,7 +148,7 @@ //! impl IcebergWriterBuilder for LatencyRecordWriterBuilder { //! type R = LatencyRecordWriter; //! -//! async fn build(self, partition_key: Option) -> Result { +//! async fn build(&self, partition_key: Option) -> Result { //! Ok(LatencyRecordWriter { //! inner_writer: self.inner_writer_builder.build(partition_key).await?, //! }) @@ -398,13 +398,11 @@ type DefaultOutput = Vec; /// The builder for iceberg writer. #[async_trait::async_trait] -pub trait IcebergWriterBuilder: - Send + Clone + 'static -{ +pub trait IcebergWriterBuilder: Send + Sync + 'static { /// The associated writer type. type R: IcebergWriter; /// Build the iceberg writer with an optional partition key. - async fn build(self, partition_key: Option) -> Result; + async fn build(&self, partition_key: Option) -> Result; } /// The iceberg writer used to write data to iceberg table. diff --git a/crates/iceberg/src/writer/partitioning/clustered_writer.rs b/crates/iceberg/src/writer/partitioning/clustered_writer.rs index 3587723965..01eb452083 100644 --- a/crates/iceberg/src/writer/partitioning/clustered_writer.rs +++ b/crates/iceberg/src/writer/partitioning/clustered_writer.rs @@ -118,7 +118,6 @@ where // Create a new writer for the new partition self.current_writer = Some( self.inner_builder - .clone() .build(Some(partition_key.clone())) .await?, ); diff --git a/crates/iceberg/src/writer/partitioning/fanout_writer.rs b/crates/iceberg/src/writer/partitioning/fanout_writer.rs index 796c1a4888..21a174b0d0 100644 --- a/crates/iceberg/src/writer/partitioning/fanout_writer.rs +++ b/crates/iceberg/src/writer/partitioning/fanout_writer.rs @@ -73,7 +73,6 @@ where if !self.partition_writers.contains_key(partition_key.data()) { let writer = self .inner_builder - .clone() .build(Some(partition_key.clone())) .await?; self.partition_writers diff --git a/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs b/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs index 0fb9cba3f1..29825a5416 100644 --- a/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs +++ b/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs @@ -75,7 +75,7 @@ where pub async fn write(&mut self, input: I) -> Result<()> { // Lazily create writer on first write if self.writer.is_none() { - self.writer = Some(self.inner_builder.clone().build(None).await?); + self.writer = Some(self.inner_builder.build(None).await?); } // Write directly to inner writer From 9fa3776cbd73159809e1c91ea1904ff9449d7a3d Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 5 Dec 2025 10:27:54 +0100 Subject: [PATCH 13/58] fix: Keep snapshot log on replace (#1896) ## Which issue does this PR close? Fixes remove_ref() to preserve snapshot log when removing MainBranch reference during CREATE OR REPLACE TABLE operations. Previously cleared entire snapshot history, causing testReplaceTableKeepsSnapshotLog RCK test to fail. Related Go Issue: https://github.com/apache/iceberg-go/pull/638 Java also does not clear the log: https://github.com/apache/iceberg/blob/16e84356dae1975fa04d8c3ecce30a90df18ca9f/core/src/main/java/org/apache/iceberg/TableMetadata.java#L1342-L1352 ## What changes are included in this PR? - Do not clear `snapshot_log` if ref to `main` branch is removed ## Are these changes tested? Yes --------- Signed-off-by: Xuanwo Co-authored-by: Xuanwo --- .../src/spec/table_metadata_builder.rs | 68 ++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs index 6b8ce1e6a5..eee4fec345 100644 --- a/crates/iceberg/src/spec/table_metadata_builder.rs +++ b/crates/iceberg/src/spec/table_metadata_builder.rs @@ -572,7 +572,6 @@ impl TableMetadataBuilder { pub fn remove_ref(mut self, ref_name: &str) -> Self { if ref_name == MAIN_BRANCH { self.metadata.current_snapshot_id = None; - self.metadata.snapshot_log.clear(); } if self.metadata.refs.remove(ref_name).is_some() || ref_name == MAIN_BRANCH { @@ -2237,6 +2236,73 @@ mod tests { assert_eq!(result.metadata.current_snapshot().unwrap().snapshot_id(), 2); } + #[test] + fn test_remove_main_ref_keeps_snapshot_log() { + let builder = builder_without_changes(FormatVersion::V2); + + let snapshot = Snapshot::builder() + .with_snapshot_id(1) + .with_timestamp_ms(builder.metadata.last_updated_ms + 1) + .with_sequence_number(0) + .with_schema_id(0) + .with_manifest_list("/snap-1.avro") + .with_summary(Summary { + operation: Operation::Append, + additional_properties: HashMap::from_iter(vec![ + ( + "spark.app.id".to_string(), + "local-1662532784305".to_string(), + ), + ("added-data-files".to_string(), "4".to_string()), + ("added-records".to_string(), "4".to_string()), + ("added-files-size".to_string(), "6001".to_string()), + ]), + }) + .build(); + + let result = builder + .add_snapshot(snapshot.clone()) + .unwrap() + .set_ref(MAIN_BRANCH, SnapshotReference { + snapshot_id: 1, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: Some(10), + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, + }) + .unwrap() + .build() + .unwrap(); + + // Verify snapshot log was created + assert_eq!(result.metadata.snapshot_log.len(), 1); + assert_eq!(result.metadata.snapshot_log[0].snapshot_id, 1); + assert_eq!(result.metadata.current_snapshot_id, Some(1)); + + // Remove the main ref + let result_after_remove = result + .metadata + .into_builder(Some( + "s3://bucket/test/location/metadata/metadata2.json".to_string(), + )) + .remove_ref(MAIN_BRANCH) + .build() + .unwrap(); + + // Verify snapshot log is kept even after removing main ref + assert_eq!(result_after_remove.metadata.snapshot_log.len(), 1); + assert_eq!(result_after_remove.metadata.snapshot_log[0].snapshot_id, 1); + assert_eq!(result_after_remove.metadata.current_snapshot_id, None); + assert_eq!(result_after_remove.changes.len(), 1); + assert_eq!( + result_after_remove.changes[0], + TableUpdate::RemoveSnapshotRef { + ref_name: MAIN_BRANCH.to_string() + } + ); + } + #[test] fn test_set_branch_snapshot_creates_branch_if_not_exists() { let builder = builder_without_changes(FormatVersion::V2); From 7e66ded6555a8c445f95b0e4837fed22c75fe46c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Dec 2025 01:53:06 +0800 Subject: [PATCH 14/58] chore(deps): Bump actions/stale from 10.1.0 to 10.1.1 (#1908) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/stale](https://github.com/actions/stale) from 10.1.0 to 10.1.1.
Release notes

Sourced from actions/stale's releases.

v10.1.1

What's Changed

Bug Fix

Improvement

Dependency Upgrades

New Contributors

Full Changelog: https://github.com/actions/stale/compare/v10...v10.1.1

Changelog

Sourced from actions/stale's changelog.

Changelog

Commits
  • 9971854 build(deps): bump actions/checkout from 4 to 6 (#1306)
  • 5611b9d build(deps): bump actions/publish-action from 0.3.0 to 0.4.0 (#1291)
  • fad0de8 Improves error handling when rate limiting is disabled on GHES. (#1300)
  • 39bea7d Add Missing Input Reading for only-issue-types (#1298)
  • e46bbab build(deps-dev): bump @​types/node from 20.10.3 to 24.2.0 and document breakin...
  • 65d1d48 build(deps-dev): bump eslint-config-prettier from 8.10.0 to 10.1.8 (#1276)
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/stale&package-manager=github_actions&previous-version=10.1.0&new-version=10.1.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/stale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 5e653cffe4..95a4fdc256 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -31,7 +31,7 @@ jobs: if: github.repository_owner == 'apache' runs-on: ubuntu-22.04 steps: - - uses: actions/stale@v10.1.0 + - uses: actions/stale@v10.1.1 with: stale-issue-label: 'stale' exempt-issue-labels: 'not-stale' From c0f9fdcd283ec650c64df7f367cd1ae473c24e62 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Mon, 8 Dec 2025 02:14:42 -0800 Subject: [PATCH 15/58] feat(datafusion): Add sqllogictest for DataFusion INSERT INTO (#1887) ## Which issue does this PR close? - Closes #1835 ## What changes are included in this PR? - Added a new schedule to run insert into sqllogic test ## Are these changes tested? It's a test itself --- crates/sqllogictest/src/engine/datafusion.rs | 71 ++++++++++- .../testdata/schedules/df_test.toml | 6 +- .../testdata/slts/df_test/insert_into.slt | 119 ++++++++++++++++++ .../testdata/slts/df_test/show_tables.slt | 6 + 4 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 crates/sqllogictest/testdata/slts/df_test/insert_into.slt diff --git a/crates/sqllogictest/src/engine/datafusion.rs b/crates/sqllogictest/src/engine/datafusion.rs index b3e37d9206..e3402dfa97 100644 --- a/crates/sqllogictest/src/engine/datafusion.rs +++ b/crates/sqllogictest/src/engine/datafusion.rs @@ -22,8 +22,9 @@ use std::sync::Arc; use datafusion::catalog::CatalogProvider; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_sqllogictest::DataFusion; -use iceberg::CatalogBuilder; use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder}; +use iceberg::spec::{NestedField, PrimitiveType, Schema, Transform, Type, UnboundPartitionSpec}; +use iceberg::{Catalog, CatalogBuilder, NamespaceIdent, TableCreation}; use iceberg_datafusion::IcebergCatalogProvider; use indicatif::ProgressBar; use toml::Table as TomlTable; @@ -84,8 +85,76 @@ impl DataFusionEngine { ) .await?; + // Create a test namespace for INSERT INTO tests + let namespace = NamespaceIdent::new("default".to_string()); + catalog.create_namespace(&namespace, HashMap::new()).await?; + + // Create test tables + Self::create_unpartitioned_table(&catalog, &namespace).await?; + Self::create_partitioned_table(&catalog, &namespace).await?; + Ok(Arc::new( IcebergCatalogProvider::try_new(Arc::new(catalog)).await?, )) } + + /// Create an unpartitioned test table with id and name columns + /// TODO: this can be removed when we support CREATE TABLE + async fn create_unpartitioned_table( + catalog: &impl Catalog, + namespace: &NamespaceIdent, + ) -> anyhow::Result<()> { + let schema = Schema::builder() + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::optional(2, "name", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build()?; + + catalog + .create_table( + namespace, + TableCreation::builder() + .name("test_unpartitioned_table".to_string()) + .schema(schema) + .build(), + ) + .await?; + + Ok(()) + } + + /// Create a partitioned test table with id, category, and value columns + /// Partitioned by category using identity transform + /// TODO: this can be removed when we support CREATE TABLE + async fn create_partitioned_table( + catalog: &impl Catalog, + namespace: &NamespaceIdent, + ) -> anyhow::Result<()> { + let schema = Schema::builder() + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::required(2, "category", Type::Primitive(PrimitiveType::String)).into(), + NestedField::optional(3, "value", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build()?; + + let partition_spec = UnboundPartitionSpec::builder() + .with_spec_id(0) + .add_partition_field(2, "category", Transform::Identity)? + .build(); + + catalog + .create_table( + namespace, + TableCreation::builder() + .name("test_partitioned_table".to_string()) + .schema(schema) + .partition_spec(partition_spec) + .build(), + ) + .await?; + + Ok(()) + } } diff --git a/crates/sqllogictest/testdata/schedules/df_test.toml b/crates/sqllogictest/testdata/schedules/df_test.toml index 0733744951..df5e638d5a 100644 --- a/crates/sqllogictest/testdata/schedules/df_test.toml +++ b/crates/sqllogictest/testdata/schedules/df_test.toml @@ -20,4 +20,8 @@ df = { type = "datafusion" } [[steps]] engine = "df" -slt = "df_test/show_tables.slt" \ No newline at end of file +slt = "df_test/show_tables.slt" + +[[steps]] +engine = "df" +slt = "df_test/insert_into.slt" diff --git a/crates/sqllogictest/testdata/slts/df_test/insert_into.slt b/crates/sqllogictest/testdata/slts/df_test/insert_into.slt new file mode 100644 index 0000000000..2ba33afcd1 --- /dev/null +++ b/crates/sqllogictest/testdata/slts/df_test/insert_into.slt @@ -0,0 +1,119 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Verify the table is initially empty +query IT rowsort +SELECT * FROM default.default.test_unpartitioned_table +---- + +# Insert a single row and verify the count +query I +INSERT INTO default.default.test_unpartitioned_table VALUES (1, 'Alice') +---- +1 + +# Verify the inserted row +query IT rowsort +SELECT * FROM default.default.test_unpartitioned_table +---- +1 Alice + +# Insert multiple rows and verify the count +query I +INSERT INTO default.default.test_unpartitioned_table VALUES (2, 'Bob'), (3, 'Charlie') +---- +2 + +# Verify all rows +query IT rowsort +SELECT * FROM default.default.test_unpartitioned_table +---- +1 Alice +2 Bob +3 Charlie + +# Insert with NULL value and verify the count +query I +INSERT INTO default.default.test_unpartitioned_table VALUES (4, NULL) +---- +1 + +# Verify NULL handling +query IT rowsort +SELECT * FROM default.default.test_unpartitioned_table +---- +1 Alice +2 Bob +3 Charlie +4 NULL + +# Test partitioned table - verify initially empty +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table +---- + +# Insert single row into partitioned table +query I +INSERT INTO default.default.test_partitioned_table VALUES (1, 'electronics', 'laptop') +---- +1 + +# Verify the inserted row in partitioned table +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table +---- +1 electronics laptop + +# Insert multiple rows with different partition values +query I +INSERT INTO default.default.test_partitioned_table VALUES (2, 'electronics', 'phone'), (3, 'books', 'novel'), (4, 'books', 'textbook'), (5, 'clothing', 'shirt') +---- +4 + +# Verify all rows in partitioned table +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table +---- +1 electronics laptop +2 electronics phone +3 books novel +4 books textbook +5 clothing shirt + +# Insert with NULL value in optional column +query I +INSERT INTO default.default.test_partitioned_table VALUES (6, 'electronics', NULL) +---- +1 + +# Verify NULL handling in partitioned table +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table +---- +1 electronics laptop +2 electronics phone +3 books novel +4 books textbook +5 clothing shirt +6 electronics NULL + +# Verify partition filtering works +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table WHERE category = 'books' +---- +3 books novel +4 books textbook diff --git a/crates/sqllogictest/testdata/slts/df_test/show_tables.slt b/crates/sqllogictest/testdata/slts/df_test/show_tables.slt index 34709d7359..c5da5f6276 100644 --- a/crates/sqllogictest/testdata/slts/df_test/show_tables.slt +++ b/crates/sqllogictest/testdata/slts/df_test/show_tables.slt @@ -25,6 +25,12 @@ datafusion information_schema routines VIEW datafusion information_schema schemata VIEW datafusion information_schema tables VIEW datafusion information_schema views VIEW +default default test_partitioned_table BASE TABLE +default default test_partitioned_table$manifests BASE TABLE +default default test_partitioned_table$snapshots BASE TABLE +default default test_unpartitioned_table BASE TABLE +default default test_unpartitioned_table$manifests BASE TABLE +default default test_unpartitioned_table$snapshots BASE TABLE default information_schema columns VIEW default information_schema df_settings VIEW default information_schema parameters VIEW From 84b2ba86765b96b3d4afc8b30ab29b1ef61be510 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 8 Dec 2025 18:45:42 +0800 Subject: [PATCH 16/58] refactor: Drop smol runtime support (#1900) ## Which issue does this PR close? - Closes https://github.com/apache/iceberg-rust/issues/1866 ## What changes are included in this PR? ## Are these changes tested? --------- Signed-off-by: Xuanwo Co-authored-by: Renjie Liu --- .github/workflows/ci.yml | 1 - Cargo.lock | 189 +----------------------------- Cargo.toml | 1 - crates/iceberg/Cargo.toml | 5 +- crates/iceberg/src/io/storage.rs | 8 ++ crates/iceberg/src/runtime/mod.rs | 58 ++------- 6 files changed, 25 insertions(+), 237 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7b7e81837e..f393309bcb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -143,7 +143,6 @@ jobs: matrix: test-suite: - { name: "default", args: "--all-targets --all-features --workspace" } - - { name: "smol", args: "--all-targets --no-default-features --features smol --features storage-all --workspace" } - { name: "doc", args: "--doc --all-features --workspace" } name: Unit Tests (${{ matrix.test-suite.name }}) steps: diff --git a/Cargo.lock b/Cargo.lock index 736f2b1461..4d9f2b99c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -465,18 +465,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "async-channel" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "924ed96dd52d1b75e9c1a3e6275715fd320f5f9439fb5a4a11fa51f4221158d2" -dependencies = [ - "concurrent-queue", - "event-listener-strategy", - "futures-core", - "pin-project-lite", -] - [[package]] name = "async-compression" version = "0.4.19" @@ -494,49 +482,6 @@ dependencies = [ "zstd-safe", ] -[[package]] -name = "async-executor" -version = "1.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497c00e0fd83a72a79a39fcbd8e3e2f055d6f6c7e025f3b3d91f4f8e76527fb8" -dependencies = [ - "async-task", - "concurrent-queue", - "fastrand", - "futures-lite", - "pin-project-lite", - "slab", -] - -[[package]] -name = "async-fs" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8034a681df4aed8b8edbd7fbe472401ecf009251c8b40556b304567052e294c5" -dependencies = [ - "async-lock", - "blocking", - "futures-lite", -] - -[[package]] -name = "async-io" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456b8a8feb6f42d237746d4b3e9a178494627745c3c56c6ea55d92ba50d026fc" -dependencies = [ - "autocfg", - "cfg-if", - "concurrent-queue", - "futures-io", - "futures-lite", - "parking", - "polling", - "rustix", - "slab", - "windows-sys 0.61.2", -] - [[package]] name = "async-lock" version = "3.4.1" @@ -548,35 +493,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "async-net" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b948000fad4873c1c9339d60f2623323a0cfd3816e5181033c6a5cb68b2accf7" -dependencies = [ - "async-io", - "blocking", - "futures-lite", -] - -[[package]] -name = "async-process" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc50921ec0055cdd8a16de48773bfeec5c972598674347252c0399676be7da75" -dependencies = [ - "async-channel", - "async-io", - "async-lock", - "async-signal", - "async-task", - "blocking", - "cfg-if", - "event-listener", - "futures-lite", - "rustix", -] - [[package]] name = "async-recursion" version = "1.1.1" @@ -588,30 +504,6 @@ dependencies = [ "syn 2.0.108", ] -[[package]] -name = "async-signal" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c070bbf59cd3570b6b2dd54cd772527c7c3620fce8be898406dd3ed6adc64c" -dependencies = [ - "async-io", - "async-lock", - "atomic-waker", - "cfg-if", - "futures-core", - "futures-io", - "rustix", - "signal-hook-registry", - "slab", - "windows-sys 0.61.2", -] - -[[package]] -name = "async-task" -version = "4.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" - [[package]] name = "async-trait" version = "0.1.89" @@ -1185,19 +1077,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "blocking" -version = "1.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e83f8d02be6967315521be875afa792a316e28d57b5a2d401897e2a7921b7f21" -dependencies = [ - "async-channel", - "async-task", - "futures-io", - "futures-lite", - "piper", -] - [[package]] name = "bon" version = "3.8.1" @@ -2667,7 +2546,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2809,7 +2688,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -3052,19 +2931,6 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" -[[package]] -name = "futures-lite" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad" -dependencies = [ - "fastrand", - "futures-core", - "futures-io", - "parking", - "pin-project-lite", -] - [[package]] name = "futures-macro" version = "0.3.31" @@ -3567,7 +3433,6 @@ dependencies = [ "serde_json", "serde_repr", "serde_with", - "smol", "strum 0.27.2", "tempfile", "thrift", @@ -4565,7 +4430,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -5077,17 +4942,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "piper" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066" -dependencies = [ - "atomic-waker", - "fastrand", - "futures-io", -] - [[package]] name = "pkcs1" version = "0.7.5" @@ -5132,20 +4986,6 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" -[[package]] -name = "polling" -version = "3.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218" -dependencies = [ - "cfg-if", - "concurrent-queue", - "hermit-abi", - "pin-project-lite", - "rustix", - "windows-sys 0.61.2", -] - [[package]] name = "port_scanner" version = "0.1.5" @@ -5921,7 +5761,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -6478,23 +6318,6 @@ dependencies = [ "serde", ] -[[package]] -name = "smol" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33bd3e260892199c3ccfc487c88b2da2265080acb316cd920da72fdfd7c599f" -dependencies = [ - "async-channel", - "async-executor", - "async-fs", - "async-io", - "async-lock", - "async-net", - "async-process", - "blocking", - "futures-lite", -] - [[package]] name = "snap" version = "1.1.1" @@ -6993,7 +6816,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -7819,7 +7642,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 0512155534..36093d92aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -116,7 +116,6 @@ serde_derive = "1.0.219" serde_json = "1.0.142" serde_repr = "0.1.16" serde_with = "3.4" -smol = "2.0.2" sqllogictest = "0.28.3" sqlx = { version = "0.8.1", default-features = false } stacker = "0.1.20" diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 895a5cf5e4..84c90d466f 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -29,7 +29,7 @@ license = { workspace = true } repository = { workspace = true } [features] -default = ["storage-memory", "storage-fs", "storage-s3", "tokio"] +default = ["storage-memory", "storage-fs", "storage-s3"] storage-all = ["storage-memory", "storage-fs", "storage-s3", "storage-gcs"] storage-azdls = ["opendal/services-azdls"] @@ -39,8 +39,6 @@ storage-memory = ["opendal/services-memory"] storage-oss = ["opendal/services-oss"] storage-s3 = ["opendal/services-s3", "reqsign"] -smol = ["dep:smol"] -tokio = ["tokio/rt-multi-thread"] [dependencies] anyhow = { workspace = true } @@ -85,7 +83,6 @@ serde_derive = { workspace = true } serde_json = { workspace = true } serde_repr = { workspace = true } serde_with = { workspace = true } -smol = { workspace = true, optional = true } strum = { workspace = true, features = ["derive"] } thrift = { workspace = true } tokio = { workspace = true, optional = false, features = ["sync"] } diff --git a/crates/iceberg/src/io/storage.rs b/crates/iceberg/src/io/storage.rs index d5f2ad8fab..5880ccca59 100644 --- a/crates/iceberg/src/io/storage.rs +++ b/crates/iceberg/src/io/storage.rs @@ -15,6 +15,12 @@ // specific language governing permissions and limitations // under the License. +#[cfg(any( + feature = "storage-s3", + feature = "storage-gcs", + feature = "storage-oss", + feature = "storage-azdls", +))] use std::sync::Arc; use opendal::layers::RetryLayer; @@ -71,6 +77,7 @@ impl Storage { /// Convert iceberg config to opendal config. pub(crate) fn build(file_io_builder: FileIOBuilder) -> crate::Result { let (scheme_str, props, extensions) = file_io_builder.into_parts(); + let _ = (&props, &extensions); let scheme = Self::parse_scheme(&scheme_str)?; match scheme { @@ -127,6 +134,7 @@ impl Storage { path: &'a impl AsRef, ) -> crate::Result<(Operator, &'a str)> { let path = path.as_ref(); + let _ = path; let (operator, relative_path): (Operator, &str) = match self { #[cfg(feature = "storage-memory")] Storage::Memory(op) => { diff --git a/crates/iceberg/src/runtime/mod.rs b/crates/iceberg/src/runtime/mod.rs index d0a3ce6602..61aa623f58 100644 --- a/crates/iceberg/src/runtime/mod.rs +++ b/crates/iceberg/src/runtime/mod.rs @@ -21,28 +21,20 @@ use std::future::Future; use std::pin::Pin; use std::task::{Context, Poll}; -pub enum JoinHandle { - #[cfg(feature = "tokio")] - Tokio(tokio::task::JoinHandle), - #[cfg(all(feature = "smol", not(feature = "tokio")))] - Smol(smol::Task), - #[cfg(all(not(feature = "smol"), not(feature = "tokio")))] - Unimplemented(Box), -} +use tokio::task; + +pub struct JoinHandle(task::JoinHandle); + +impl Unpin for JoinHandle {} impl Future for JoinHandle { type Output = T; fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { match self.get_mut() { - #[cfg(feature = "tokio")] - JoinHandle::Tokio(handle) => Pin::new(handle) + JoinHandle(handle) => Pin::new(handle) .poll(cx) - .map(|h| h.expect("tokio spawned task failed")), - #[cfg(all(feature = "smol", not(feature = "tokio")))] - JoinHandle::Smol(handle) => Pin::new(handle).poll(cx), - #[cfg(all(not(feature = "smol"), not(feature = "tokio")))] - JoinHandle::Unimplemented(_) => unimplemented!("no runtime has been enabled"), + .map(|r| r.expect("tokio spawned task failed")), } } } @@ -50,17 +42,10 @@ impl Future for JoinHandle { #[allow(dead_code)] pub fn spawn(f: F) -> JoinHandle where - F: Future + Send + 'static, + F: std::future::Future + Send + 'static, F::Output: Send + 'static, { - #[cfg(feature = "tokio")] - return JoinHandle::Tokio(tokio::task::spawn(f)); - - #[cfg(all(feature = "smol", not(feature = "tokio")))] - return JoinHandle::Smol(smol::spawn(f)); - - #[cfg(all(not(feature = "smol"), not(feature = "tokio")))] - unimplemented!("no runtime has been enabled") + JoinHandle(task::spawn(f)) } #[allow(dead_code)] @@ -69,45 +54,22 @@ where F: FnOnce() -> T + Send + 'static, T: Send + 'static, { - #[cfg(feature = "tokio")] - return JoinHandle::Tokio(tokio::task::spawn_blocking(f)); - - #[cfg(all(feature = "smol", not(feature = "tokio")))] - return JoinHandle::Smol(smol::unblock(f)); - - #[cfg(all(not(feature = "smol"), not(feature = "tokio")))] - unimplemented!("no runtime has been enabled") + JoinHandle(task::spawn_blocking(f)) } #[cfg(test)] mod tests { use super::*; - #[cfg(feature = "tokio")] #[tokio::test] async fn test_tokio_spawn() { let handle = spawn(async { 1 + 1 }); assert_eq!(handle.await, 2); } - #[cfg(feature = "tokio")] #[tokio::test] async fn test_tokio_spawn_blocking() { let handle = spawn_blocking(|| 1 + 1); assert_eq!(handle.await, 2); } - - #[cfg(all(feature = "smol", not(feature = "tokio")))] - #[smol::test] - async fn test_smol_spawn() { - let handle = spawn(async { 1 + 1 }); - assert_eq!(handle.await, 2); - } - - #[cfg(all(feature = "smol", not(feature = "tokio")))] - #[smo::test] - async fn test_smol_spawn_blocking() { - let handle = spawn_blocking(|| 1 + 1); - assert_eq!(handle.await, 2); - } } From 5c3c5a2800f2c1f61b6a306b66b32be35e96dd11 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Dec 2025 21:02:28 +0800 Subject: [PATCH 17/58] chore(deps): Bump minijinja from 2.12.0 to 2.13.0 (#1909) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4d9f2b99c3..7d23305002 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4214,9 +4214,9 @@ dependencies = [ [[package]] name = "minijinja" -version = "2.12.0" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9f264d75233323f4b7d2f03aefe8a990690cdebfbfe26ea86bcbaec5e9ac990" +checksum = "0adbe6e92a6ce0fd6c4aac593fdfd3e3950b0f61b1a63aa9731eb6fd85776fa3" dependencies = [ "serde", ] From 26b98394015c57631566aa3ff36cd4ccd7792b1a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Dec 2025 13:20:39 +0800 Subject: [PATCH 18/58] chore(deps): Bump uuid from 1.18.1 to 1.19.0 (#1910) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [uuid](https://github.com/uuid-rs/uuid) from 1.18.1 to 1.19.0.
Release notes

Sourced from uuid's releases.

v1.19.0

What's Changed

Full Changelog: https://github.com/uuid-rs/uuid/compare/v1.18.1...v1.19.0

Commits
  • 7527cef Merge pull request #849 from uuid-rs/cargo/v1.19.0
  • d0422fa prepare for 1.19.0 release
  • f9a36e2 Merge pull request #848 from paolobarbolini/maintenance
  • 029a57e Fix most clippy warnings
  • e73bb27 Upgrade to 2021 edition
  • c597622 Merge pull request #843 from paolobarbolini/serde_core-migration
  • 9835bd6 Switch serde dependency to serde_core
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=uuid&package-manager=cargo&previous-version=1.18.1&new-version=1.19.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Renjie Liu --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7d23305002..c7c103d279 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7387,13 +7387,13 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.18.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" dependencies = [ "getrandom 0.3.4", "js-sys", - "serde", + "serde_core", "wasm-bindgen", ] From 1384a4f2d71ed16b73f3b1f139d5dbd4e5035428 Mon Sep 17 00:00:00 2001 From: Gerald Berger <59661379+gbrgr@users.noreply.github.com> Date: Tue, 9 Dec 2025 11:46:49 +0100 Subject: [PATCH 19/58] feat(core): Add support for `_file` column (#1824) ## Which issue does this PR close? - Closes #1766. ## What changes are included in this PR? Integrates virtual field handling for the `_file` metadata column into `RecordBatchTransformer` using a pre-computed constants map, eliminating post-processing and duplicate lookups. ## Key Changes **New `metadata_columns.rs` module**: Centralized utilities for metadata columns - Constants: `RESERVED_FIELD_ID_FILE`, `RESERVED_COL_NAME_FILE` - Helper functions: `get_metadata_column_name()`, `get_metadata_field_id()`, `is_metadata_field()`, `is_metadata_column_name()` **Enhanced `RecordBatchTransformer`**: - Added `constant_fields: HashMap` - pre-computed during initialization - New `with_constant()` method - computes Arrow type once during setup - Updated to use pre-computed types and values (avoids duplicate lookups) - Handles `DataType::RunEndEncoded` for constant strings (memory efficient) **Simplified `reader.rs`**: - Pass full `project_field_ids` (including virtual) to RecordBatchTransformer - Single `with_constant()` call to register `_file` column - Removed post-processing loop **Updated `scan/mod.rs`**: - Use `is_metadata_column_name()` and `get_metadata_field_id()` instead of hardcoded checks ## Are these changes tested? Yes, comprehensive tests have been added to verify the functionality: ### New Tests (7 tests added) #### Table Scan API Tests (7 tests) 1. **`test_select_with_file_column`** - Verifies basic functionality of selecting `_file` with regular columns 2. **`test_select_file_column_position`** - Verifies column ordering is preserved 3. **`test_select_file_column_only`** - Tests selecting only the `_file` column 4. **`test_file_column_with_multiple_files`** - Tests multiple data files scenario 5. **`test_file_column_at_start`** - Tests `_file` at position 0 6. **`test_file_column_at_end`** - Tests `_file` at the last position 7. **`test_select_with_repeated_column_names`** - Tests repeated column selection --- crates/iceberg/src/arrow/reader.rs | 29 +- .../src/arrow/record_batch_transformer.rs | 476 ++++++++++-------- crates/iceberg/src/arrow/schema.rs | 54 ++ crates/iceberg/src/arrow/value.rs | 216 +++++++- crates/iceberg/src/lib.rs | 1 + crates/iceberg/src/metadata_columns.rs | 127 +++++ crates/iceberg/src/scan/mod.rs | 341 ++++++++++++- 7 files changed, 1031 insertions(+), 213 deletions(-) create mode 100644 crates/iceberg/src/metadata_columns.rs diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index ab5a96f751..de8a1420e4 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -54,6 +54,7 @@ use crate::expr::visitors::page_index_evaluator::PageIndexEvaluator; use crate::expr::visitors::row_group_metrics_evaluator::RowGroupMetricsEvaluator; use crate::expr::{BoundPredicate, BoundReference}; use crate::io::{FileIO, FileMetadata, FileRead}; +use crate::metadata_columns::{RESERVED_FIELD_ID_FILE, is_metadata_field}; use crate::scan::{ArrowRecordBatchStream, FileScanTask, FileScanTaskStream}; use crate::spec::{Datum, NameMapping, NestedField, PrimitiveType, Schema, Type}; use crate::utils::available_parallelism; @@ -250,12 +251,20 @@ impl ArrowReader { initial_stream_builder }; + // Filter out metadata fields for Parquet projection (they don't exist in files) + let project_field_ids_without_metadata: Vec = task + .project_field_ids + .iter() + .filter(|&&id| !is_metadata_field(id)) + .copied() + .collect(); + // Create projection mask based on field IDs // - If file has embedded IDs: field-ID-based projection (missing_field_ids=false) // - If name mapping applied: field-ID-based projection (missing_field_ids=true but IDs now match) // - If fallback IDs: position-based projection (missing_field_ids=true) let projection_mask = Self::get_arrow_projection_mask( - &task.project_field_ids, + &project_field_ids_without_metadata, &task.schema, record_batch_stream_builder.parquet_schema(), record_batch_stream_builder.schema(), @@ -266,16 +275,23 @@ impl ArrowReader { record_batch_stream_builder.with_projection(projection_mask.clone()); // RecordBatchTransformer performs any transformations required on the RecordBatches - // that come back from the file, such as type promotion, default column insertion - // and column re-ordering. + // that come back from the file, such as type promotion, default column insertion, + // column re-ordering, partition constants, and virtual field addition (like _file) let mut record_batch_transformer_builder = RecordBatchTransformerBuilder::new(task.schema_ref(), task.project_field_ids()); + // Add the _file metadata column if it's in the projected fields + if task.project_field_ids().contains(&RESERVED_FIELD_ID_FILE) { + let file_datum = Datum::string(task.data_file_path.clone()); + record_batch_transformer_builder = + record_batch_transformer_builder.with_constant(RESERVED_FIELD_ID_FILE, file_datum); + } + if let (Some(partition_spec), Some(partition_data)) = (task.partition_spec.clone(), task.partition.clone()) { record_batch_transformer_builder = - record_batch_transformer_builder.with_partition(partition_spec, partition_data); + record_batch_transformer_builder.with_partition(partition_spec, partition_data)?; } let mut record_batch_transformer = record_batch_transformer_builder.build(); @@ -416,7 +432,10 @@ impl ArrowReader { record_batch_stream_builder .build()? .map(move |batch| match batch { - Ok(batch) => record_batch_transformer.process_record_batch(batch), + Ok(batch) => { + // Process the record batch (type promotion, column reordering, virtual fields, etc.) + record_batch_transformer.process_record_batch(batch) + } Err(err) => Err(err.into()), }); diff --git a/crates/iceberg/src/arrow/record_batch_transformer.rs b/crates/iceberg/src/arrow/record_batch_transformer.rs index a20adb6a5a..f30d4a09c3 100644 --- a/crates/iceberg/src/arrow/record_batch_transformer.rs +++ b/crates/iceberg/src/arrow/record_batch_transformer.rs @@ -19,24 +19,23 @@ use std::collections::HashMap; use std::sync::Arc; use arrow_array::{ - Array as ArrowArray, ArrayRef, BinaryArray, BooleanArray, Date32Array, Float32Array, - Float64Array, Int32Array, Int64Array, NullArray, RecordBatch, RecordBatchOptions, StringArray, - StructArray, + Array as ArrowArray, ArrayRef, Int32Array, RecordBatch, RecordBatchOptions, RunArray, }; -use arrow_buffer::NullBuffer; use arrow_cast::cast; use arrow_schema::{ - DataType, FieldRef, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, SchemaRef, + DataType, Field, FieldRef, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, SchemaRef, }; use parquet::arrow::PARQUET_FIELD_ID_META_KEY; -use crate::arrow::schema_to_arrow_schema; +use crate::arrow::value::{create_primitive_array_repeated, create_primitive_array_single_element}; +use crate::arrow::{datum_to_arrow_type_with_ree, schema_to_arrow_schema}; +use crate::metadata_columns::get_metadata_field; use crate::spec::{ - Literal, PartitionSpec, PrimitiveLiteral, Schema as IcebergSchema, Struct, Transform, + Datum, Literal, PartitionSpec, PrimitiveLiteral, Schema as IcebergSchema, Struct, Transform, }; use crate::{Error, ErrorKind, Result}; -/// Build a map of field ID to constant value for identity-partitioned fields. +/// Build a map of field ID to constant value (as Datum) for identity-partitioned fields. /// /// Implements Iceberg spec "Column Projection" rule #1: use partition metadata constants /// only for identity-transformed fields. Non-identity transforms (bucket, truncate, year, etc.) @@ -53,20 +52,65 @@ use crate::{Error, ErrorKind, Result}; fn constants_map( partition_spec: &PartitionSpec, partition_data: &Struct, -) -> HashMap { + schema: &IcebergSchema, +) -> Result> { let mut constants = HashMap::new(); for (pos, field) in partition_spec.fields().iter().enumerate() { // Only identity transforms should use constant values from partition metadata if matches!(field.transform, Transform::Identity) { + // Get the field from schema to extract its type + let iceberg_field = schema.field_by_id(field.source_id).ok_or(Error::new( + ErrorKind::Unexpected, + format!("Field {} not found in schema", field.source_id), + ))?; + + // Ensure the field type is primitive + let prim_type = match &*iceberg_field.field_type { + crate::spec::Type::Primitive(prim_type) => prim_type, + _ => { + return Err(Error::new( + ErrorKind::Unexpected, + format!( + "Partition field {} has non-primitive type {:?}", + field.source_id, iceberg_field.field_type + ), + )); + } + }; + // Get the partition value for this field - if let Some(Literal::Primitive(value)) = &partition_data[pos] { - constants.insert(field.source_id, value.clone()); + // Handle both None (null) and Some(Literal::Primitive) cases + match &partition_data[pos] { + None => { + // TODO (https://github.com/apache/iceberg-rust/issues/1914): Add support for null datum values. + return Err(Error::new( + ErrorKind::Unexpected, + format!( + "Partition field {} has null value for identity transform", + field.source_id + ), + )); + } + Some(Literal::Primitive(value)) => { + // Create a Datum from the primitive type and value + let datum = Datum::new(prim_type.clone(), value.clone()); + constants.insert(field.source_id, datum); + } + Some(literal) => { + return Err(Error::new( + ErrorKind::Unexpected, + format!( + "Partition field {} has non-primitive value: {:?}", + field.source_id, literal + ), + )); + } } } } - constants + Ok(constants) } /// Indicates how a particular column in a processed RecordBatch should @@ -146,13 +190,13 @@ enum SchemaComparison { /// Builder for RecordBatchTransformer to improve ergonomics when constructing with optional parameters. /// -/// See [`RecordBatchTransformer`] for details on partition spec and partition data. +/// Constant fields are pre-computed for both virtual/metadata fields (like _file) and +/// identity-partitioned fields to avoid duplicate work during batch processing. #[derive(Debug)] pub(crate) struct RecordBatchTransformerBuilder { snapshot_schema: Arc, projected_iceberg_field_ids: Vec, - partition_spec: Option>, - partition_data: Option, + constant_fields: HashMap, } impl RecordBatchTransformerBuilder { @@ -163,32 +207,48 @@ impl RecordBatchTransformerBuilder { Self { snapshot_schema, projected_iceberg_field_ids: projected_iceberg_field_ids.to_vec(), - partition_spec: None, - partition_data: None, + constant_fields: HashMap::new(), } } + /// Add a constant value for a specific field ID. + /// This is used for virtual/metadata fields like _file that have constant values per batch. + /// + /// # Arguments + /// * `field_id` - The field ID to associate with the constant + /// * `datum` - The constant value (with type) for this field + pub(crate) fn with_constant(mut self, field_id: i32, datum: Datum) -> Self { + self.constant_fields.insert(field_id, datum); + self + } + /// Set partition spec and data together for identifying identity-transformed partition columns. /// /// Both partition_spec and partition_data must be provided together since the spec defines /// which fields are identity-partitioned, and the data provides their constant values. - /// One without the other cannot produce a valid constants map. + /// This method computes the partition constants and merges them into constant_fields. pub(crate) fn with_partition( mut self, partition_spec: Arc, partition_data: Struct, - ) -> Self { - self.partition_spec = Some(partition_spec); - self.partition_data = Some(partition_data); - self + ) -> Result { + // Compute partition constants for identity-transformed fields (already returns Datum) + let partition_constants = + constants_map(&partition_spec, &partition_data, &self.snapshot_schema)?; + + // Add partition constants to constant_fields + for (field_id, datum) in partition_constants { + self.constant_fields.insert(field_id, datum); + } + + Ok(self) } pub(crate) fn build(self) -> RecordBatchTransformer { RecordBatchTransformer { snapshot_schema: self.snapshot_schema, projected_iceberg_field_ids: self.projected_iceberg_field_ids, - partition_spec: self.partition_spec, - partition_data: self.partition_data, + constant_fields: self.constant_fields, batch_transform: None, } } @@ -228,16 +288,10 @@ impl RecordBatchTransformerBuilder { pub(crate) struct RecordBatchTransformer { snapshot_schema: Arc, projected_iceberg_field_ids: Vec, - - /// Partition spec for identifying identity-transformed partition columns (spec rule #1). - /// Only fields with identity transforms use partition data constants; non-identity transforms - /// (bucket, truncate, etc.) must read source columns from data files. - partition_spec: Option>, - - /// Partition data providing constant values for identity-transformed partition columns (spec rule #1). - /// For example, in a file at path `dept=engineering/file.parquet`, this would contain - /// the value "engineering" for the dept field. - partition_data: Option, + // Pre-computed constant field information: field_id -> Datum + // Includes both virtual/metadata fields (like _file) and identity-partitioned fields + // Datum holds both the Iceberg type and the value + constant_fields: HashMap, // BatchTransform gets lazily constructed based on the schema of // the first RecordBatch we receive from the file @@ -279,8 +333,7 @@ impl RecordBatchTransformer { record_batch.schema_ref(), self.snapshot_schema.as_ref(), &self.projected_iceberg_field_ids, - self.partition_spec.as_ref().map(|s| s.as_ref()), - self.partition_data.as_ref(), + &self.constant_fields, )?); self.process_record_batch(record_batch)? @@ -299,8 +352,7 @@ impl RecordBatchTransformer { source_schema: &ArrowSchemaRef, snapshot_schema: &IcebergSchema, projected_iceberg_field_ids: &[i32], - partition_spec: Option<&PartitionSpec>, - partition_data: Option<&Struct>, + constant_fields: &HashMap, ) -> Result { let mapped_unprojected_arrow_schema = Arc::new(schema_to_arrow_schema(snapshot_schema)?); let field_id_to_mapped_schema_map = @@ -311,22 +363,54 @@ impl RecordBatchTransformer { let fields: Result> = projected_iceberg_field_ids .iter() .map(|field_id| { - Ok(field_id_to_mapped_schema_map - .get(field_id) - .ok_or(Error::new(ErrorKind::Unexpected, "field not found"))? - .0 - .clone()) + // Check if this is a constant field + if constant_fields.contains_key(field_id) { + // For metadata/virtual fields (like _file), get name from metadata_columns + // For partition fields, get name from schema (they exist in schema) + if let Ok(iceberg_field) = get_metadata_field(*field_id) { + // This is a metadata/virtual field - convert Iceberg field to Arrow + let datum = constant_fields.get(field_id).ok_or(Error::new( + ErrorKind::Unexpected, + "constant field not found", + ))?; + let arrow_type = datum_to_arrow_type_with_ree(datum); + let arrow_field = + Field::new(&iceberg_field.name, arrow_type, !iceberg_field.required) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + iceberg_field.id.to_string(), + )])); + Ok(Arc::new(arrow_field)) + } else { + // This is a partition constant field (exists in schema but uses constant value) + let field = &field_id_to_mapped_schema_map + .get(field_id) + .ok_or(Error::new(ErrorKind::Unexpected, "field not found"))? + .0; + let datum = constant_fields.get(field_id).ok_or(Error::new( + ErrorKind::Unexpected, + "constant field not found", + ))?; + let arrow_type = datum_to_arrow_type_with_ree(datum); + // Use the type from constant_fields (REE for constants) + let constant_field = + Field::new(field.name(), arrow_type, field.is_nullable()) + .with_metadata(field.metadata().clone()); + Ok(Arc::new(constant_field)) + } + } else { + // Regular field - use schema as-is + Ok(field_id_to_mapped_schema_map + .get(field_id) + .ok_or(Error::new(ErrorKind::Unexpected, "field not found"))? + .0 + .clone()) + } }) .collect(); let target_schema = Arc::new(ArrowSchema::new(fields?)); - let constants_map = if let (Some(spec), Some(data)) = (partition_spec, partition_data) { - constants_map(spec, data) - } else { - HashMap::new() - }; - match Self::compare_schemas(source_schema, &target_schema) { SchemaComparison::Equivalent => Ok(BatchTransform::PassThrough), SchemaComparison::NameChangesOnly => Ok(BatchTransform::ModifySchema { target_schema }), @@ -336,8 +420,7 @@ impl RecordBatchTransformer { snapshot_schema, projected_iceberg_field_ids, field_id_to_mapped_schema_map, - constants_map, - partition_spec, + constant_fields, )?, target_schema, }), @@ -394,8 +477,7 @@ impl RecordBatchTransformer { snapshot_schema: &IcebergSchema, projected_iceberg_field_ids: &[i32], field_id_to_mapped_schema_map: HashMap, - constants_map: HashMap, - _partition_spec: Option<&PartitionSpec>, + constant_fields: &HashMap, ) -> Result> { let field_id_to_source_schema_map = Self::build_field_id_to_arrow_schema_map(source_schema)?; @@ -403,6 +485,18 @@ impl RecordBatchTransformer { projected_iceberg_field_ids .iter() .map(|field_id| { + // Check if this is a constant field (metadata/virtual or identity-partitioned) + // Constant fields always use their pre-computed constant values, regardless of whether + // they exist in the Parquet file. This is per Iceberg spec rule #1: partition metadata + // is authoritative and should be preferred over file data. + if let Some(datum) = constant_fields.get(field_id) { + let arrow_type = datum_to_arrow_type_with_ree(datum); + return Ok(ColumnSource::Add { + value: Some(datum.literal().clone()), + target_type: arrow_type, + }); + } + let (target_field, _) = field_id_to_mapped_schema_map .get(field_id) @@ -451,13 +545,8 @@ impl RecordBatchTransformer { ); // Apply spec's fallback steps for "not present" fields. - let column_source = if let Some(constant_value) = constants_map.get(field_id) { - // Rule #1: Identity partition constant - ColumnSource::Add { - value: Some(constant_value.clone()), - target_type: target_type.clone(), - } - } else if let Some(source) = field_by_id { + // Rule #1 (constants) is handled at the beginning of this function + let column_source = if let Some(source) = field_by_id { source } else { // Rules #2, #3 and #4: @@ -471,6 +560,7 @@ impl RecordBatchTransformer { None } }); + ColumnSource::Add { value: default_value, target_type: target_type.clone(), @@ -539,86 +629,36 @@ impl RecordBatchTransformer { prim_lit: &Option, num_rows: usize, ) -> Result { - Ok(match (target_type, prim_lit) { - (DataType::Boolean, Some(PrimitiveLiteral::Boolean(value))) => { - Arc::new(BooleanArray::from(vec![*value; num_rows])) - } - (DataType::Boolean, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(BooleanArray::from(vals)) - } - (DataType::Int32, Some(PrimitiveLiteral::Int(value))) => { - Arc::new(Int32Array::from(vec![*value; num_rows])) - } - (DataType::Int32, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Int32Array::from(vals)) - } - (DataType::Date32, Some(PrimitiveLiteral::Int(value))) => { - Arc::new(Date32Array::from(vec![*value; num_rows])) - } - (DataType::Date32, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Date32Array::from(vals)) - } - (DataType::Int64, Some(PrimitiveLiteral::Long(value))) => { - Arc::new(Int64Array::from(vec![*value; num_rows])) - } - (DataType::Int64, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Int64Array::from(vals)) - } - (DataType::Float32, Some(PrimitiveLiteral::Float(value))) => { - Arc::new(Float32Array::from(vec![value.0; num_rows])) - } - (DataType::Float32, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Float32Array::from(vals)) - } - (DataType::Float64, Some(PrimitiveLiteral::Double(value))) => { - Arc::new(Float64Array::from(vec![value.0; num_rows])) - } - (DataType::Float64, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Float64Array::from(vals)) - } - (DataType::Utf8, Some(PrimitiveLiteral::String(value))) => { - Arc::new(StringArray::from(vec![value.clone(); num_rows])) - } - (DataType::Utf8, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(StringArray::from(vals)) - } - (DataType::Binary, Some(PrimitiveLiteral::Binary(value))) => { - Arc::new(BinaryArray::from_vec(vec![value; num_rows])) - } - (DataType::Binary, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(BinaryArray::from_opt_vec(vals)) - } - (DataType::Struct(fields), None) => { - // Create a StructArray filled with nulls. Per Iceberg spec, optional struct fields - // default to null when added to the schema. We defer non-null default struct values - // and leave them as not implemented yet. - let null_arrays: Vec = fields - .iter() - .map(|field| Self::create_column(field.data_type(), &None, num_rows)) - .collect::>>()?; - - Arc::new(StructArray::new( - fields.clone(), - null_arrays, - Some(NullBuffer::new_null(num_rows)), + // Check if this is a RunEndEncoded type (for constant fields) + if let DataType::RunEndEncoded(_, values_field) = target_type { + // Helper to create a Run-End Encoded array + let create_ree_array = |values_array: ArrayRef| -> Result { + let run_ends = if num_rows == 0 { + Int32Array::from(Vec::::new()) + } else { + Int32Array::from(vec![num_rows as i32]) + }; + Ok(Arc::new( + RunArray::try_new(&run_ends, &values_array).map_err(|e| { + Error::new( + ErrorKind::Unexpected, + "Failed to create RunArray for constant value", + ) + .with_source(e) + })?, )) - } - (DataType::Null, _) => Arc::new(NullArray::new(num_rows)), - (dt, _) => { - return Err(Error::new( - ErrorKind::Unexpected, - format!("unexpected target column type {}", dt), - )); - } - }) + }; + + // Create the values array using the helper function + let values_array = + create_primitive_array_single_element(values_field.data_type(), prim_lit)?; + + // Wrap in Run-End Encoding + create_ree_array(values_array) + } else { + // Non-REE type (simple arrays for non-constant fields) + create_primitive_array_repeated(target_type, prim_lit, num_rows) + } } } @@ -639,6 +679,54 @@ mod test { }; use crate::spec::{Literal, NestedField, PrimitiveType, Schema, Struct, Type}; + /// Helper to extract string values from either StringArray or RunEndEncoded + /// Returns empty string for null values + fn get_string_value(array: &dyn Array, index: usize) -> String { + if let Some(string_array) = array.as_any().downcast_ref::() { + if string_array.is_null(index) { + String::new() + } else { + string_array.value(index).to_string() + } + } else if let Some(run_array) = array + .as_any() + .downcast_ref::>() + { + let values = run_array.values(); + let string_values = values + .as_any() + .downcast_ref::() + .expect("REE values should be StringArray"); + // For REE, all rows have the same value (index 0 in the values array) + if string_values.is_null(0) { + String::new() + } else { + string_values.value(0).to_string() + } + } else { + panic!("Expected StringArray or RunEndEncoded"); + } + } + + /// Helper to extract int values from either Int32Array or RunEndEncoded + fn get_int_value(array: &dyn Array, index: usize) -> i32 { + if let Some(int_array) = array.as_any().downcast_ref::() { + int_array.value(index) + } else if let Some(run_array) = array + .as_any() + .downcast_ref::>() + { + let values = run_array.values(); + let int_values = values + .as_any() + .downcast_ref::() + .expect("REE values should be Int32Array"); + int_values.value(0) + } else { + panic!("Expected Int32Array or RunEndEncoded"); + } + } + #[test] fn build_field_id_to_source_schema_map_works() { let arrow_schema = arrow_schema_already_same_as_target(); @@ -1137,6 +1225,7 @@ mod test { let mut transformer = RecordBatchTransformerBuilder::new(snapshot_schema, &projected_field_ids) .with_partition(partition_spec, partition_data) + .expect("Failed to add partition constants") .build(); // Create a Parquet RecordBatch with actual data @@ -1257,6 +1346,7 @@ mod test { let mut transformer = RecordBatchTransformerBuilder::new(snapshot_schema, &projected_field_ids) .with_partition(partition_spec, partition_data) + .expect("Failed to add partition constants") .build(); let parquet_batch = RecordBatch::try_new(parquet_schema, vec![ @@ -1271,30 +1361,23 @@ mod test { assert_eq!(result.num_columns(), 3); assert_eq!(result.num_rows(), 2); - let id_column = result - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(id_column.value(0), 100); - assert_eq!(id_column.value(1), 200); + // Use helpers to handle both simple and REE arrays + assert_eq!(get_int_value(result.column(0).as_ref(), 0), 100); + assert_eq!(get_int_value(result.column(0).as_ref(), 1), 200); - let dept_column = result - .column(1) - .as_any() - .downcast_ref::() - .unwrap(); - // This value MUST come from partition metadata (constant) - assert_eq!(dept_column.value(0), "engineering"); - assert_eq!(dept_column.value(1), "engineering"); + // dept column comes from partition metadata (constant) - will be REE + assert_eq!( + get_string_value(result.column(1).as_ref(), 0), + "engineering" + ); + assert_eq!( + get_string_value(result.column(1).as_ref(), 1), + "engineering" + ); - let name_column = result - .column(2) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(name_column.value(0), "Alice"); - assert_eq!(name_column.value(1), "Bob"); + // name column comes from file + assert_eq!(get_string_value(result.column(2).as_ref(), 0), "Alice"); + assert_eq!(get_string_value(result.column(2).as_ref(), 1), "Bob"); } /// Test bucket partitioning with renamed source column. @@ -1372,6 +1455,7 @@ mod test { let mut transformer = RecordBatchTransformerBuilder::new(snapshot_schema, &projected_field_ids) .with_partition(partition_spec, partition_data) + .expect("Failed to add partition constants") .build(); // Create a Parquet RecordBatch with actual data @@ -1476,6 +1560,7 @@ mod test { let mut transformer = RecordBatchTransformerBuilder::new(snapshot_schema, &projected_field_ids) .with_partition(partition_spec, partition_data) + .expect("Failed to add partition constants") .build(); let parquet_batch = RecordBatch::try_new(parquet_schema, vec![ @@ -1492,48 +1577,37 @@ mod test { // Verify each column demonstrates the correct spec rule: // Normal case: id from Parquet by field ID - let id_column = result - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(id_column.value(0), 100); - assert_eq!(id_column.value(1), 200); + // Use helpers to handle both simple and REE arrays + assert_eq!(get_int_value(result.column(0).as_ref(), 0), 100); + assert_eq!(get_int_value(result.column(0).as_ref(), 1), 200); + + // Rule #1: dept from partition metadata (identity transform) - will be REE + assert_eq!( + get_string_value(result.column(1).as_ref(), 0), + "engineering" + ); + assert_eq!( + get_string_value(result.column(1).as_ref(), 1), + "engineering" + ); - // Rule #1: dept from partition metadata (identity transform) - let dept_column = result - .column(1) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(dept_column.value(0), "engineering"); - assert_eq!(dept_column.value(1), "engineering"); + // Rule #2: data from Parquet via name mapping - will be regular array + assert_eq!(get_string_value(result.column(2).as_ref(), 0), "value1"); + assert_eq!(get_string_value(result.column(2).as_ref(), 1), "value2"); - // Rule #2: data from Parquet via name mapping - let data_column = result - .column(2) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(data_column.value(0), "value1"); - assert_eq!(data_column.value(1), "value2"); - - // Rule #3: category from initial_default - let category_column = result - .column(3) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(category_column.value(0), "default_category"); - assert_eq!(category_column.value(1), "default_category"); + // Rule #3: category from initial_default - will be REE + assert_eq!( + get_string_value(result.column(3).as_ref(), 0), + "default_category" + ); + assert_eq!( + get_string_value(result.column(3).as_ref(), 1), + "default_category" + ); - // Rule #4: notes is null (no default, not in Parquet, not in partition) - let notes_column = result - .column(4) - .as_any() - .downcast_ref::() - .unwrap(); - assert!(notes_column.is_null(0)); - assert!(notes_column.is_null(1)); + // Rule #4: notes is null (no default, not in Parquet, not in partition) - will be REE with null + // For null REE arrays, we still use the helper which handles extraction + assert_eq!(get_string_value(result.column(4).as_ref(), 0), ""); + assert_eq!(get_string_value(result.column(4).as_ref(), 1), ""); } } diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs index ec0135bd77..4f4f083c73 100644 --- a/crates/iceberg/src/arrow/schema.rs +++ b/crates/iceberg/src/arrow/schema.rs @@ -1019,6 +1019,60 @@ impl TryFrom<&crate::spec::Schema> for ArrowSchema { } } +/// Converts a Datum (Iceberg type + primitive literal) to its corresponding Arrow DataType +/// with Run-End Encoding (REE). +/// +/// This function is used for constant fields in record batches, where all values are the same. +/// Run-End Encoding provides efficient storage for such constant columns. +/// +/// # Arguments +/// * `datum` - The Datum to convert, which contains both type and value information +/// +/// # Returns +/// Arrow DataType with Run-End Encoding applied +/// +/// # Example +/// ``` +/// use iceberg::arrow::datum_to_arrow_type_with_ree; +/// use iceberg::spec::Datum; +/// +/// let datum = Datum::string("test_file.parquet"); +/// let ree_type = datum_to_arrow_type_with_ree(&datum); +/// // Returns: RunEndEncoded(Int32, Utf8) +/// ``` +pub fn datum_to_arrow_type_with_ree(datum: &Datum) -> DataType { + // Helper to create REE type with the given values type. + // Note: values field is nullable as Arrow expects this when building the + // final Arrow schema with `RunArray::try_new`. + let make_ree = |values_type: DataType| -> DataType { + let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int32, false)); + let values_field = Arc::new(Field::new("values", values_type, true)); + DataType::RunEndEncoded(run_ends_field, values_field) + }; + + // Match on the PrimitiveType from the Datum to determine the Arrow type + match datum.data_type() { + PrimitiveType::Boolean => make_ree(DataType::Boolean), + PrimitiveType::Int => make_ree(DataType::Int32), + PrimitiveType::Long => make_ree(DataType::Int64), + PrimitiveType::Float => make_ree(DataType::Float32), + PrimitiveType::Double => make_ree(DataType::Float64), + PrimitiveType::Date => make_ree(DataType::Date32), + PrimitiveType::Time => make_ree(DataType::Int64), + PrimitiveType::Timestamp => make_ree(DataType::Int64), + PrimitiveType::Timestamptz => make_ree(DataType::Int64), + PrimitiveType::TimestampNs => make_ree(DataType::Int64), + PrimitiveType::TimestamptzNs => make_ree(DataType::Int64), + PrimitiveType::String => make_ree(DataType::Utf8), + PrimitiveType::Uuid => make_ree(DataType::Binary), + PrimitiveType::Fixed(_) => make_ree(DataType::Binary), + PrimitiveType::Binary => make_ree(DataType::Binary), + PrimitiveType::Decimal { precision, scale } => { + make_ree(DataType::Decimal128(*precision as u8, *scale as i8)) + } + } +} + #[cfg(test)] mod tests { use std::collections::HashMap; diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index f1cf225bb4..0e0b85f073 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. +use std::sync::Arc; + use arrow_array::{ Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Decimal128Array, FixedSizeBinaryArray, FixedSizeListArray, Float32Array, Float64Array, Int32Array, Int64Array, LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, MapArray, StringArray, StructArray, Time64MicrosecondArray, TimestampMicrosecondArray, TimestampNanosecondArray, }; +use arrow_buffer::NullBuffer; use arrow_schema::{DataType, FieldRef}; use uuid::Uuid; use super::get_field_id; use crate::spec::{ - ListType, Literal, Map, MapType, NestedField, PartnerAccessor, PrimitiveType, + ListType, Literal, Map, MapType, NestedField, PartnerAccessor, PrimitiveLiteral, PrimitiveType, SchemaWithPartnerVisitor, Struct, StructType, Type, visit_struct_with_partner, visit_type_with_partner, }; @@ -617,6 +620,217 @@ pub fn arrow_primitive_to_literal( ) } +/// Create a single-element array from a primitive literal. +/// +/// This is used for creating constant arrays (Run-End Encoded arrays) where we need +/// a single value that represents all rows. +pub(crate) fn create_primitive_array_single_element( + data_type: &DataType, + prim_lit: &Option, +) -> Result { + match (data_type, prim_lit) { + (DataType::Boolean, Some(PrimitiveLiteral::Boolean(v))) => { + Ok(Arc::new(BooleanArray::from(vec![*v]))) + } + (DataType::Boolean, None) => Ok(Arc::new(BooleanArray::from(vec![Option::::None]))), + (DataType::Int32, Some(PrimitiveLiteral::Int(v))) => { + Ok(Arc::new(Int32Array::from(vec![*v]))) + } + (DataType::Int32, None) => Ok(Arc::new(Int32Array::from(vec![Option::::None]))), + (DataType::Date32, Some(PrimitiveLiteral::Int(v))) => { + Ok(Arc::new(Date32Array::from(vec![*v]))) + } + (DataType::Date32, None) => Ok(Arc::new(Date32Array::from(vec![Option::::None]))), + (DataType::Int64, Some(PrimitiveLiteral::Long(v))) => { + Ok(Arc::new(Int64Array::from(vec![*v]))) + } + (DataType::Int64, None) => Ok(Arc::new(Int64Array::from(vec![Option::::None]))), + (DataType::Float32, Some(PrimitiveLiteral::Float(v))) => { + Ok(Arc::new(Float32Array::from(vec![v.0]))) + } + (DataType::Float32, None) => Ok(Arc::new(Float32Array::from(vec![Option::::None]))), + (DataType::Float64, Some(PrimitiveLiteral::Double(v))) => { + Ok(Arc::new(Float64Array::from(vec![v.0]))) + } + (DataType::Float64, None) => Ok(Arc::new(Float64Array::from(vec![Option::::None]))), + (DataType::Utf8, Some(PrimitiveLiteral::String(v))) => { + Ok(Arc::new(StringArray::from(vec![v.as_str()]))) + } + (DataType::Utf8, None) => Ok(Arc::new(StringArray::from(vec![Option::<&str>::None]))), + (DataType::Binary, Some(PrimitiveLiteral::Binary(v))) => { + Ok(Arc::new(BinaryArray::from_vec(vec![v.as_slice()]))) + } + (DataType::Binary, None) => Ok(Arc::new(BinaryArray::from_opt_vec(vec![ + Option::<&[u8]>::None, + ]))), + (DataType::Decimal128(_, _), Some(PrimitiveLiteral::Int128(v))) => { + Ok(Arc::new(arrow_array::Decimal128Array::from(vec![{ *v }]))) + } + (DataType::Decimal128(_, _), Some(PrimitiveLiteral::UInt128(v))) => { + Ok(Arc::new(arrow_array::Decimal128Array::from(vec![ + *v as i128, + ]))) + } + (DataType::Decimal128(_, _), None) => { + Ok(Arc::new(arrow_array::Decimal128Array::from(vec![ + Option::::None, + ]))) + } + (DataType::Struct(fields), None) => { + // Create a single-element StructArray with nulls + let null_arrays: Vec = fields + .iter() + .map(|f| { + // Recursively create null arrays for struct fields + // For primitive fields in structs, use simple null arrays (not REE within struct) + match f.data_type() { + DataType::Boolean => { + Ok(Arc::new(BooleanArray::from(vec![Option::::None])) + as ArrayRef) + } + DataType::Int32 | DataType::Date32 => { + Ok(Arc::new(Int32Array::from(vec![Option::::None])) as ArrayRef) + } + DataType::Int64 => { + Ok(Arc::new(Int64Array::from(vec![Option::::None])) as ArrayRef) + } + DataType::Float32 => { + Ok(Arc::new(Float32Array::from(vec![Option::::None])) as ArrayRef) + } + DataType::Float64 => { + Ok(Arc::new(Float64Array::from(vec![Option::::None])) as ArrayRef) + } + DataType::Utf8 => { + Ok(Arc::new(StringArray::from(vec![Option::<&str>::None])) as ArrayRef) + } + DataType::Binary => { + Ok( + Arc::new(BinaryArray::from_opt_vec(vec![Option::<&[u8]>::None])) + as ArrayRef, + ) + } + _ => Err(Error::new( + ErrorKind::Unexpected, + format!("Unsupported struct field type: {:?}", f.data_type()), + )), + } + }) + .collect::>>()?; + Ok(Arc::new(arrow_array::StructArray::new( + fields.clone(), + null_arrays, + Some(arrow_buffer::NullBuffer::new_null(1)), + ))) + } + _ => Err(Error::new( + ErrorKind::Unexpected, + format!( + "Unsupported constant type combination: {:?} with {:?}", + data_type, prim_lit + ), + )), + } +} + +/// Create a repeated array from a primitive literal for a given number of rows. +/// +/// This is used for creating non-constant arrays where we need the same value +/// repeated for each row. +pub(crate) fn create_primitive_array_repeated( + data_type: &DataType, + prim_lit: &Option, + num_rows: usize, +) -> Result { + Ok(match (data_type, prim_lit) { + (DataType::Boolean, Some(PrimitiveLiteral::Boolean(value))) => { + Arc::new(BooleanArray::from(vec![*value; num_rows])) + } + (DataType::Boolean, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(BooleanArray::from(vals)) + } + (DataType::Int32, Some(PrimitiveLiteral::Int(value))) => { + Arc::new(Int32Array::from(vec![*value; num_rows])) + } + (DataType::Int32, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Int32Array::from(vals)) + } + (DataType::Date32, Some(PrimitiveLiteral::Int(value))) => { + Arc::new(Date32Array::from(vec![*value; num_rows])) + } + (DataType::Date32, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Date32Array::from(vals)) + } + (DataType::Int64, Some(PrimitiveLiteral::Long(value))) => { + Arc::new(Int64Array::from(vec![*value; num_rows])) + } + (DataType::Int64, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Int64Array::from(vals)) + } + (DataType::Float32, Some(PrimitiveLiteral::Float(value))) => { + Arc::new(Float32Array::from(vec![value.0; num_rows])) + } + (DataType::Float32, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Float32Array::from(vals)) + } + (DataType::Float64, Some(PrimitiveLiteral::Double(value))) => { + Arc::new(Float64Array::from(vec![value.0; num_rows])) + } + (DataType::Float64, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Float64Array::from(vals)) + } + (DataType::Utf8, Some(PrimitiveLiteral::String(value))) => { + Arc::new(StringArray::from(vec![value.clone(); num_rows])) + } + (DataType::Utf8, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(StringArray::from(vals)) + } + (DataType::Binary, Some(PrimitiveLiteral::Binary(value))) => { + Arc::new(BinaryArray::from_vec(vec![value; num_rows])) + } + (DataType::Binary, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(BinaryArray::from_opt_vec(vals)) + } + (DataType::Decimal128(_, _), Some(PrimitiveLiteral::Int128(value))) => { + Arc::new(Decimal128Array::from(vec![*value; num_rows])) + } + (DataType::Decimal128(_, _), Some(PrimitiveLiteral::UInt128(value))) => { + Arc::new(Decimal128Array::from(vec![*value as i128; num_rows])) + } + (DataType::Decimal128(_, _), None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Decimal128Array::from(vals)) + } + (DataType::Struct(fields), None) => { + // Create a StructArray filled with nulls + let null_arrays: Vec = fields + .iter() + .map(|field| create_primitive_array_repeated(field.data_type(), &None, num_rows)) + .collect::>>()?; + + Arc::new(StructArray::new( + fields.clone(), + null_arrays, + Some(NullBuffer::new_null(num_rows)), + )) + } + (DataType::Null, _) => Arc::new(arrow_array::NullArray::new(num_rows)), + (dt, _) => { + return Err(Error::new( + ErrorKind::Unexpected, + format!("unexpected target column type {}", dt), + )); + } + }) +} + #[cfg(test)] mod test { use std::collections::HashMap; diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs index aae8efed74..8d8f40f72d 100644 --- a/crates/iceberg/src/lib.rs +++ b/crates/iceberg/src/lib.rs @@ -96,4 +96,5 @@ mod utils; pub mod writer; mod delete_vector; +pub mod metadata_columns; pub mod puffin; diff --git a/crates/iceberg/src/metadata_columns.rs b/crates/iceberg/src/metadata_columns.rs new file mode 100644 index 0000000000..b11b5cadb2 --- /dev/null +++ b/crates/iceberg/src/metadata_columns.rs @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Metadata columns (virtual/reserved fields) for Iceberg tables. +//! +//! This module defines metadata columns that can be requested in projections +//! but are not stored in data files. Instead, they are computed on-the-fly +//! during reading. Examples include the _file column (file path) and future +//! columns like partition values or row numbers. + +use std::sync::Arc; + +use once_cell::sync::Lazy; + +use crate::spec::{NestedField, NestedFieldRef, PrimitiveType, Type}; +use crate::{Error, ErrorKind, Result}; + +/// Reserved field ID for the file path (_file) column per Iceberg spec +pub const RESERVED_FIELD_ID_FILE: i32 = i32::MAX - 1; + +/// Reserved column name for the file path metadata column +pub const RESERVED_COL_NAME_FILE: &str = "_file"; + +/// Documentation for the _file metadata column +pub const RESERVED_COL_DOC_FILE: &str = "Path of the file in which a row is stored"; + +/// Lazy-initialized Iceberg field definition for the _file metadata column. +/// This field represents the file path as a required string field. +static FILE_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_FILE, + RESERVED_COL_NAME_FILE, + Type::Primitive(PrimitiveType::String), + ) + .with_doc(RESERVED_COL_DOC_FILE), + ) +}); + +/// Returns the Iceberg field definition for the _file metadata column. +/// +/// # Returns +/// A reference to the _file field definition as an Iceberg NestedField +pub fn file_field() -> &'static NestedFieldRef { + &FILE_FIELD +} + +/// Returns the Iceberg field definition for a metadata field ID. +/// +/// # Arguments +/// * `field_id` - The metadata field ID +/// +/// # Returns +/// The Iceberg field definition for the metadata column, or an error if not a metadata field +pub fn get_metadata_field(field_id: i32) -> Result { + match field_id { + RESERVED_FIELD_ID_FILE => Ok(Arc::clone(file_field())), + _ if is_metadata_field(field_id) => { + // Future metadata fields can be added here + Err(Error::new( + ErrorKind::Unexpected, + format!( + "Metadata field ID {} recognized but field definition not implemented", + field_id + ), + )) + } + _ => Err(Error::new( + ErrorKind::Unexpected, + format!("Field ID {} is not a metadata field", field_id), + )), + } +} + +/// Returns the field ID for a metadata column name. +/// +/// # Arguments +/// * `column_name` - The metadata column name +/// +/// # Returns +/// The field ID of the metadata column, or an error if the column name is not recognized +pub fn get_metadata_field_id(column_name: &str) -> Result { + match column_name { + RESERVED_COL_NAME_FILE => Ok(RESERVED_FIELD_ID_FILE), + _ => Err(Error::new( + ErrorKind::Unexpected, + format!("Unknown/unsupported metadata column name: {column_name}"), + )), + } +} + +/// Checks if a field ID is a metadata field. +/// +/// # Arguments +/// * `field_id` - The field ID to check +/// +/// # Returns +/// `true` if the field ID is a (currently supported) metadata field, `false` otherwise +pub fn is_metadata_field(field_id: i32) -> bool { + field_id == RESERVED_FIELD_ID_FILE + // Additional metadata fields can be checked here in the future +} + +/// Checks if a column name is a metadata column. +/// +/// # Arguments +/// * `column_name` - The column name to check +/// +/// # Returns +/// `true` if the column name is a metadata column, `false` otherwise +pub fn is_metadata_column_name(column_name: &str) -> bool { + get_metadata_field_id(column_name).is_ok() +} diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs index 3e319ca062..24c03b0b2c 100644 --- a/crates/iceberg/src/scan/mod.rs +++ b/crates/iceberg/src/scan/mod.rs @@ -36,6 +36,7 @@ use crate::delete_file_index::DeleteFileIndex; use crate::expr::visitors::inclusive_metrics_evaluator::InclusiveMetricsEvaluator; use crate::expr::{Bind, BoundPredicate, Predicate}; use crate::io::FileIO; +use crate::metadata_columns::{get_metadata_field_id, is_metadata_column_name}; use crate::runtime::spawn; use crate::spec::{DataContentType, SnapshotRef}; use crate::table::Table; @@ -217,9 +218,13 @@ impl<'a> TableScanBuilder<'a> { let schema = snapshot.schema(self.table.metadata())?; - // Check that all column names exist in the schema. + // Check that all column names exist in the schema (skip reserved columns). if let Some(column_names) = self.column_names.as_ref() { for column_name in column_names { + // Skip reserved columns that don't exist in the schema + if is_metadata_column_name(column_name) { + continue; + } if schema.field_by_name(column_name).is_none() { return Err(Error::new( ErrorKind::DataInvalid, @@ -240,6 +245,12 @@ impl<'a> TableScanBuilder<'a> { }); for column_name in column_names.iter() { + // Handle metadata columns (like "_file") + if is_metadata_column_name(column_name) { + field_ids.push(get_metadata_field_id(column_name)?); + continue; + } + let field_id = schema.field_id_by_name(column_name).ok_or_else(|| { Error::new( ErrorKind::DataInvalid, @@ -254,10 +265,10 @@ impl<'a> TableScanBuilder<'a> { Error::new( ErrorKind::FeatureUnsupported, format!( - "Column {column_name} is not a direct child of schema but a nested field, which is not supported now. Schema: {schema}" - ), - ) - })?; + "Column {column_name} is not a direct child of schema but a nested field, which is not supported now. Schema: {schema}" + ), + ) + })?; field_ids.push(field_id); } @@ -559,8 +570,10 @@ pub mod tests { use std::fs::File; use std::sync::Arc; + use arrow_array::cast::AsArray; use arrow_array::{ - ArrayRef, BooleanArray, Float64Array, Int32Array, Int64Array, RecordBatch, StringArray, + Array, ArrayRef, BooleanArray, Float64Array, Int32Array, Int64Array, RecordBatch, + StringArray, }; use futures::{TryStreamExt, stream}; use minijinja::value::Value; @@ -575,6 +588,7 @@ pub mod tests { use crate::arrow::ArrowReaderBuilder; use crate::expr::{BoundPredicate, Reference}; use crate::io::{FileIO, OutputFile}; + use crate::metadata_columns::RESERVED_COL_NAME_FILE; use crate::scan::FileScanTask; use crate::spec::{ DataContentType, DataFileBuilder, DataFileFormat, Datum, Literal, ManifestEntry, @@ -1800,4 +1814,319 @@ pub mod tests { }; test_fn(task); } + + #[tokio::test] + async fn test_select_with_file_column() { + use arrow_array::cast::AsArray; + + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select regular columns plus the _file column + let table_scan = fixture + .table + .scan() + .select(["x", RESERVED_COL_NAME_FILE]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + // Verify we have 2 columns: x and _file + assert_eq!(batches[0].num_columns(), 2); + + // Verify the x column exists and has correct data + let x_col = batches[0].column_by_name("x").unwrap(); + let x_arr = x_col.as_primitive::(); + assert_eq!(x_arr.value(0), 1); + + // Verify the _file column exists + let file_col = batches[0].column_by_name(RESERVED_COL_NAME_FILE); + assert!( + file_col.is_some(), + "_file column should be present in the batch" + ); + + // Verify the _file column contains a file path + let file_col = file_col.unwrap(); + assert!( + matches!( + file_col.data_type(), + arrow_schema::DataType::RunEndEncoded(_, _) + ), + "_file column should use RunEndEncoded type" + ); + + // Decode the RunArray to verify it contains the file path + let run_array = file_col + .as_any() + .downcast_ref::>() + .expect("_file column should be a RunArray"); + + let values = run_array.values(); + let string_values = values.as_string::(); + assert_eq!(string_values.len(), 1, "Should have a single file path"); + + let file_path = string_values.value(0); + assert!( + file_path.ends_with(".parquet"), + "File path should end with .parquet, got: {}", + file_path + ); + } + + #[tokio::test] + async fn test_select_file_column_position() { + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select columns in specific order: x, _file, z + let table_scan = fixture + .table + .scan() + .select(["x", RESERVED_COL_NAME_FILE, "z"]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + assert_eq!(batches[0].num_columns(), 3); + + // Verify column order: x at position 0, _file at position 1, z at position 2 + let schema = batches[0].schema(); + assert_eq!(schema.field(0).name(), "x"); + assert_eq!(schema.field(1).name(), RESERVED_COL_NAME_FILE); + assert_eq!(schema.field(2).name(), "z"); + + // Verify columns by name also works + assert!(batches[0].column_by_name("x").is_some()); + assert!(batches[0].column_by_name(RESERVED_COL_NAME_FILE).is_some()); + assert!(batches[0].column_by_name("z").is_some()); + } + + #[tokio::test] + async fn test_select_file_column_only() { + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select only the _file column + let table_scan = fixture + .table + .scan() + .select([RESERVED_COL_NAME_FILE]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + // Should have exactly 1 column + assert_eq!(batches[0].num_columns(), 1); + + // Verify it's the _file column + let schema = batches[0].schema(); + assert_eq!(schema.field(0).name(), RESERVED_COL_NAME_FILE); + + // Verify the batch has the correct number of rows + // The scan reads files 1.parquet and 3.parquet (2.parquet is deleted) + // Each file has 1024 rows, so total is 2048 rows + let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); + assert_eq!(total_rows, 2048); + } + + #[tokio::test] + async fn test_file_column_with_multiple_files() { + use std::collections::HashSet; + + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select x and _file columns + let table_scan = fixture + .table + .scan() + .select(["x", RESERVED_COL_NAME_FILE]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + // Collect all unique file paths from the batches + let mut file_paths = HashSet::new(); + for batch in &batches { + let file_col = batch.column_by_name(RESERVED_COL_NAME_FILE).unwrap(); + let run_array = file_col + .as_any() + .downcast_ref::>() + .expect("_file column should be a RunArray"); + + let values = run_array.values(); + let string_values = values.as_string::(); + for i in 0..string_values.len() { + file_paths.insert(string_values.value(i).to_string()); + } + } + + // We should have multiple files (the test creates 1.parquet and 3.parquet) + assert!(!file_paths.is_empty(), "Should have at least one file path"); + + // All paths should end with .parquet + for path in &file_paths { + assert!( + path.ends_with(".parquet"), + "All file paths should end with .parquet, got: {}", + path + ); + } + } + + #[tokio::test] + async fn test_file_column_at_start() { + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select _file at the start + let table_scan = fixture + .table + .scan() + .select([RESERVED_COL_NAME_FILE, "x", "y"]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + assert_eq!(batches[0].num_columns(), 3); + + // Verify _file is at position 0 + let schema = batches[0].schema(); + assert_eq!(schema.field(0).name(), RESERVED_COL_NAME_FILE); + assert_eq!(schema.field(1).name(), "x"); + assert_eq!(schema.field(2).name(), "y"); + } + + #[tokio::test] + async fn test_file_column_at_end() { + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select _file at the end + let table_scan = fixture + .table + .scan() + .select(["x", "y", RESERVED_COL_NAME_FILE]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + assert_eq!(batches[0].num_columns(), 3); + + // Verify _file is at position 2 (the end) + let schema = batches[0].schema(); + assert_eq!(schema.field(0).name(), "x"); + assert_eq!(schema.field(1).name(), "y"); + assert_eq!(schema.field(2).name(), RESERVED_COL_NAME_FILE); + } + + #[tokio::test] + async fn test_select_with_repeated_column_names() { + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select with repeated column names - both regular columns and virtual columns + // Repeated columns should appear multiple times in the result (duplicates are allowed) + let table_scan = fixture + .table + .scan() + .select([ + "x", + RESERVED_COL_NAME_FILE, + "x", // x repeated + "y", + RESERVED_COL_NAME_FILE, // _file repeated + "y", // y repeated + ]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + // Verify we have exactly 6 columns (duplicates are allowed and preserved) + assert_eq!( + batches[0].num_columns(), + 6, + "Should have exactly 6 columns with duplicates" + ); + + let schema = batches[0].schema(); + + // Verify columns appear in the exact order requested: x, _file, x, y, _file, y + assert_eq!(schema.field(0).name(), "x", "Column 0 should be x"); + assert_eq!( + schema.field(1).name(), + RESERVED_COL_NAME_FILE, + "Column 1 should be _file" + ); + assert_eq!( + schema.field(2).name(), + "x", + "Column 2 should be x (duplicate)" + ); + assert_eq!(schema.field(3).name(), "y", "Column 3 should be y"); + assert_eq!( + schema.field(4).name(), + RESERVED_COL_NAME_FILE, + "Column 4 should be _file (duplicate)" + ); + assert_eq!( + schema.field(5).name(), + "y", + "Column 5 should be y (duplicate)" + ); + + // Verify all columns have correct data types + assert!( + matches!(schema.field(0).data_type(), arrow_schema::DataType::Int64), + "Column x should be Int64" + ); + assert!( + matches!(schema.field(2).data_type(), arrow_schema::DataType::Int64), + "Column x (duplicate) should be Int64" + ); + assert!( + matches!(schema.field(3).data_type(), arrow_schema::DataType::Int64), + "Column y should be Int64" + ); + assert!( + matches!(schema.field(5).data_type(), arrow_schema::DataType::Int64), + "Column y (duplicate) should be Int64" + ); + assert!( + matches!( + schema.field(1).data_type(), + arrow_schema::DataType::RunEndEncoded(_, _) + ), + "_file column should use RunEndEncoded type" + ); + assert!( + matches!( + schema.field(4).data_type(), + arrow_schema::DataType::RunEndEncoded(_, _) + ), + "_file column (duplicate) should use RunEndEncoded type" + ); + } } From d973fefb06bfb3555890492fad853dabd85fb186 Mon Sep 17 00:00:00 2001 From: Christian Date: Tue, 9 Dec 2025 14:15:01 +0100 Subject: [PATCH 20/58] feat: Make `rest` types public, add documentation (#1901) --- crates/catalog/rest/src/catalog.rs | 52 ++--- crates/catalog/rest/src/lib.rs | 1 + crates/catalog/rest/src/types.rs | 307 ++++++++++++++++++++++------- 3 files changed, 253 insertions(+), 107 deletions(-) diff --git a/crates/catalog/rest/src/catalog.rs b/crates/catalog/rest/src/catalog.rs index 39553f7554..ddbf6a4e01 100644 --- a/crates/catalog/rest/src/catalog.rs +++ b/crates/catalog/rest/src/catalog.rs @@ -41,9 +41,9 @@ use crate::client::{ HttpClient, deserialize_catalog_response, deserialize_unexpected_catalog_error, }; use crate::types::{ - CatalogConfig, CommitTableRequest, CommitTableResponse, CreateTableRequest, - ListNamespaceResponse, ListTableResponse, LoadTableResponse, NamespaceSerde, - RegisterTableRequest, RenameTableRequest, + CatalogConfig, CommitTableRequest, CommitTableResponse, CreateNamespaceRequest, + CreateTableRequest, ListNamespaceResponse, ListTablesResponse, LoadTableResult, + NamespaceResponse, RegisterTableRequest, RenameTableRequest, }; /// REST catalog URI @@ -466,13 +466,7 @@ impl Catalog for RestCatalog { deserialize_catalog_response::(http_response) .await?; - let ns_identifiers = response - .namespaces - .into_iter() - .map(NamespaceIdent::from_vec) - .collect::>>()?; - - namespaces.extend(ns_identifiers); + namespaces.extend(response.namespaces); match response.next_page_token { Some(token) => next_token = Some(token), @@ -502,9 +496,9 @@ impl Catalog for RestCatalog { let request = context .client .request(Method::POST, context.config.namespaces_endpoint()) - .json(&NamespaceSerde { - namespace: namespace.as_ref().clone(), - properties: Some(properties), + .json(&CreateNamespaceRequest { + namespace: namespace.clone(), + properties, }) .build()?; @@ -513,8 +507,8 @@ impl Catalog for RestCatalog { match http_response.status() { StatusCode::OK => { let response = - deserialize_catalog_response::(http_response).await?; - Namespace::try_from(response) + deserialize_catalog_response::(http_response).await?; + Ok(Namespace::from(response)) } StatusCode::CONFLICT => Err(Error::new( ErrorKind::Unexpected, @@ -537,8 +531,8 @@ impl Catalog for RestCatalog { match http_response.status() { StatusCode::OK => { let response = - deserialize_catalog_response::(http_response).await?; - Namespace::try_from(response) + deserialize_catalog_response::(http_response).await?; + Ok(Namespace::from(response)) } StatusCode::NOT_FOUND => Err(Error::new( ErrorKind::Unexpected, @@ -614,7 +608,7 @@ impl Catalog for RestCatalog { match http_response.status() { StatusCode::OK => { let response = - deserialize_catalog_response::(http_response).await?; + deserialize_catalog_response::(http_response).await?; identifiers.extend(response.identifiers); @@ -661,11 +655,7 @@ impl Catalog for RestCatalog { partition_spec: creation.partition_spec, write_order: creation.sort_order, stage_create: Some(false), - properties: if creation.properties.is_empty() { - None - } else { - Some(creation.properties) - }, + properties: creation.properties, }) .build()?; @@ -673,7 +663,7 @@ impl Catalog for RestCatalog { let response = match http_response.status() { StatusCode::OK => { - deserialize_catalog_response::(http_response).await? + deserialize_catalog_response::(http_response).await? } StatusCode::NOT_FOUND => { return Err(Error::new( @@ -697,7 +687,6 @@ impl Catalog for RestCatalog { let config = response .config - .unwrap_or_default() .into_iter() .chain(self.user_config.props.clone()) .collect(); @@ -735,7 +724,7 @@ impl Catalog for RestCatalog { let response = match http_response.status() { StatusCode::OK | StatusCode::NOT_MODIFIED => { - deserialize_catalog_response::(http_response).await? + deserialize_catalog_response::(http_response).await? } StatusCode::NOT_FOUND => { return Err(Error::new( @@ -748,7 +737,6 @@ impl Catalog for RestCatalog { let config = response .config - .unwrap_or_default() .into_iter() .chain(self.user_config.props.clone()) .collect(); @@ -861,9 +849,9 @@ impl Catalog for RestCatalog { let http_response = context.client.query_catalog(request).await?; - let response: LoadTableResponse = match http_response.status() { + let response: LoadTableResult = match http_response.status() { StatusCode::OK => { - deserialize_catalog_response::(http_response).await? + deserialize_catalog_response::(http_response).await? } StatusCode::NOT_FOUND => { return Err(Error::new( @@ -905,7 +893,7 @@ impl Catalog for RestCatalog { context.config.table_endpoint(commit.identifier()), ) .json(&CommitTableRequest { - identifier: commit.identifier().clone(), + identifier: Some(commit.identifier().clone()), requirements: commit.take_requirements(), updates: commit.take_updates(), }) @@ -2428,7 +2416,7 @@ mod tests { )) .unwrap(); let reader = BufReader::new(file); - let resp = serde_json::from_reader::<_, LoadTableResponse>(reader).unwrap(); + let resp = serde_json::from_reader::<_, LoadTableResult>(reader).unwrap(); Table::builder() .metadata(resp.metadata) @@ -2568,7 +2556,7 @@ mod tests { )) .unwrap(); let reader = BufReader::new(file); - let resp = serde_json::from_reader::<_, LoadTableResponse>(reader).unwrap(); + let resp = serde_json::from_reader::<_, LoadTableResult>(reader).unwrap(); Table::builder() .metadata(resp.metadata) diff --git a/crates/catalog/rest/src/lib.rs b/crates/catalog/rest/src/lib.rs index 70cdeaabd0..6bee950970 100644 --- a/crates/catalog/rest/src/lib.rs +++ b/crates/catalog/rest/src/lib.rs @@ -56,3 +56,4 @@ mod client; mod types; pub use catalog::*; +pub use types::*; diff --git a/crates/catalog/rest/src/types.rs b/crates/catalog/rest/src/types.rs index 70ed72051a..ab44c40ee3 100644 --- a/crates/catalog/rest/src/types.rs +++ b/crates/catalog/rest/src/types.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Request and response types for the Iceberg REST API. + use std::collections::HashMap; use iceberg::spec::{Schema, SortOrder, TableMetadata, UnboundPartitionSpec}; @@ -30,7 +32,8 @@ pub(super) struct CatalogConfig { } #[derive(Debug, Serialize, Deserialize)] -pub(super) struct ErrorResponse { +/// Wrapper for all non-2xx error responses from the REST API +pub struct ErrorResponse { error: ErrorModel, } @@ -41,11 +44,16 @@ impl From for Error { } #[derive(Debug, Serialize, Deserialize)] -pub(super) struct ErrorModel { - pub(super) message: String, - pub(super) r#type: String, - pub(super) code: u16, - pub(super) stack: Option>, +/// Error payload returned in a response with further details on the error +pub struct ErrorModel { + /// Human-readable error message + pub message: String, + /// Internal type definition of the error + pub r#type: String, + /// HTTP response code + pub code: u16, + /// Optional error stack / context + pub stack: Option>, } impl From for Error { @@ -96,106 +104,255 @@ pub(super) struct TokenResponse { pub(super) issued_token_type: Option, } -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct NamespaceSerde { - pub(super) namespace: Vec, - pub(super) properties: Option>, -} - -impl TryFrom for Namespace { - type Error = Error; - fn try_from(value: NamespaceSerde) -> std::result::Result { - Ok(Namespace::with_properties( - NamespaceIdent::from_vec(value.namespace)?, - value.properties.unwrap_or_default(), - )) - } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Namespace response +pub struct NamespaceResponse { + /// Namespace identifier + pub namespace: NamespaceIdent, + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + /// Properties stored on the namespace, if supported by the server. + pub properties: HashMap, } -impl From<&Namespace> for NamespaceSerde { +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Create namespace request +pub struct CreateNamespaceRequest { + /// Name of the namespace to create + pub namespace: NamespaceIdent, + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + /// Properties to set on the namespace + pub properties: HashMap, +} + +impl From<&Namespace> for NamespaceResponse { fn from(value: &Namespace) -> Self { Self { - namespace: value.name().as_ref().clone(), - properties: Some(value.properties().clone()), + namespace: value.name().clone(), + properties: value.properties().clone(), } } } -#[derive(Debug, Serialize, Deserialize)] +impl From for Namespace { + fn from(value: NamespaceResponse) -> Self { + Namespace::with_properties(value.namespace, value.properties) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct ListNamespaceResponse { - pub(super) namespaces: Vec>, - #[serde(default)] - pub(super) next_page_token: Option, +/// Response containing a list of namespace identifiers, with optional pagination support. +pub struct ListNamespaceResponse { + /// List of namespace identifiers returned by the server + pub namespaces: Vec, + /// Opaque token for pagination. If present, indicates there are more results available. + /// Use this value in subsequent requests to retrieve the next page. + pub next_page_token: Option, } -#[allow(dead_code)] -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct UpdateNamespacePropsRequest { - removals: Option>, - updates: Option>, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Request to update properties on a namespace. +/// +/// Properties that are not in the request are not modified or removed by this call. +/// Server implementations are not required to support namespace properties. +pub struct UpdateNamespacePropertiesRequest { + /// List of property keys to remove from the namespace + pub removals: Option>, + /// Map of property keys to values to set or update on the namespace + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub updates: HashMap, } -#[allow(dead_code)] -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct UpdateNamespacePropsResponse { - updated: Vec, - removed: Vec, - missing: Option>, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Response from updating namespace properties, indicating which properties were changed. +pub struct UpdateNamespacePropertiesResponse { + /// List of property keys that were added or updated + pub updated: Vec, + /// List of properties that were removed + pub removed: Vec, + /// List of properties requested for removal that were not found in the namespace's properties. + /// Represents a partial success response. Servers do not need to implement this. + #[serde(skip_serializing_if = "Option::is_none")] + pub missing: Option>, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct ListTableResponse { - pub(super) identifiers: Vec, +/// Response containing a list of table identifiers, with optional pagination support. +pub struct ListTablesResponse { + /// List of table identifiers under the requested namespace + pub identifiers: Vec, + /// Opaque token for pagination. If present, indicates there are more results available. + /// Use this value in subsequent requests to retrieve the next page. #[serde(default)] - pub(super) next_page_token: Option, + pub next_page_token: Option, } -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct RenameTableRequest { - pub(super) source: TableIdent, - pub(super) destination: TableIdent, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Request to rename a table from one identifier to another. +/// +/// It's valid to move a table across namespaces, but the server implementation +/// is not required to support it. +pub struct RenameTableRequest { + /// Current table identifier to rename + pub source: TableIdent, + /// New table identifier to rename to + pub destination: TableIdent, } -#[derive(Debug, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct LoadTableResponse { - pub(super) metadata_location: Option, - pub(super) metadata: TableMetadata, - pub(super) config: Option>, +/// Result returned when a table is successfully loaded or created. +/// +/// The table metadata JSON is returned in the `metadata` field. The corresponding file location +/// of table metadata should be returned in the `metadata_location` field, unless the metadata +/// is not yet committed. For example, a create transaction may return metadata that is staged +/// but not committed. +/// +/// The `config` map returns table-specific configuration for the table's resources, including +/// its HTTP client and FileIO. For example, config may contain a specific FileIO implementation +/// class for the table depending on its underlying storage. +pub struct LoadTableResult { + /// May be null if the table is staged as part of a transaction + pub metadata_location: Option, + /// The table's full metadata + pub metadata: TableMetadata, + /// Table-specific configuration overriding catalog configuration + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub config: HashMap, + /// Storage credentials for accessing table data. Clients should check this field + /// before falling back to credentials in the `config` field. + #[serde(skip_serializing_if = "Option::is_none")] + pub storage_credentials: Option>, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Storage credential for a specific location prefix. +/// +/// Indicates a storage location prefix where the credential is relevant. Clients should +/// choose the most specific prefix (by selecting the longest prefix) if several credentials +/// of the same type are available. +pub struct StorageCredential { + /// Storage location prefix where this credential is relevant + pub prefix: String, + /// Configuration map containing credential information + pub config: HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct CreateTableRequest { - pub(super) name: String, - pub(super) location: Option, - pub(super) schema: Schema, - pub(super) partition_spec: Option, - pub(super) write_order: Option, - pub(super) stage_create: Option, - pub(super) properties: Option>, +/// Request to create a new table in a namespace. +/// +/// If `stage_create` is false, the table is created immediately. +/// If `stage_create` is true, the table is not created, but table metadata is initialized +/// and returned. The service should prepare as needed for a commit to the table commit +/// endpoint to complete the create transaction. +pub struct CreateTableRequest { + /// Name of the table to create + pub name: String, + /// Optional table location. If not provided, the server will choose a location. + pub location: Option, + /// Table schema + pub schema: Schema, + /// Optional partition specification. If not provided, the table will be unpartitioned. + pub partition_spec: Option, + /// Optional sort order for the table + pub write_order: Option, + /// Whether to stage the create for a transaction (true) or create immediately (false) + pub stage_create: Option, + /// Optional properties to set on the table + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub properties: HashMap, } -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct CommitTableRequest { - pub(super) identifier: TableIdent, - pub(super) requirements: Vec, - pub(super) updates: Vec, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +/// Request to commit updates to a table. +/// +/// Commits have two parts: requirements and updates. Requirements are assertions that will +/// be validated before attempting to make and commit changes. Updates are changes to make +/// to table metadata. +/// +/// Create table transactions that are started by createTable with `stage-create` set to true +/// are committed using this request. Transactions should include all changes to the table, +/// including table initialization, like AddSchemaUpdate and SetCurrentSchemaUpdate. +pub struct CommitTableRequest { + /// Table identifier to update; must be present for CommitTransactionRequest + #[serde(skip_serializing_if = "Option::is_none")] + pub identifier: Option, + /// List of requirements that must be satisfied before committing changes + pub requirements: Vec, + /// List of updates to apply to the table metadata + pub updates: Vec, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct CommitTableResponse { - pub(super) metadata_location: String, - pub(super) metadata: TableMetadata, +/// Response returned when a table is successfully updated. +/// +/// The table metadata JSON is returned in the metadata field. The corresponding file location +/// of table metadata must be returned in the metadata-location field. Clients can check whether +/// metadata has changed by comparing metadata locations. +pub struct CommitTableResponse { + /// Location of the updated table metadata file + pub metadata_location: String, + /// The table's updated metadata + pub metadata: TableMetadata, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct RegisterTableRequest { - pub(super) name: String, - pub(super) metadata_location: String, - pub(super) overwrite: Option, +/// Request to register a table using an existing metadata file location. +pub struct RegisterTableRequest { + /// Name of the table to register + pub name: String, + /// Location of the metadata file for the table + pub metadata_location: String, + /// Whether to overwrite table metadata if the table already exists + pub overwrite: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_namespace_response_serde() { + let json = serde_json::json!({ + "namespace": ["nested", "ns"], + "properties": { + "key1": "value1", + "key2": "value2" + } + }); + let ns_response: NamespaceResponse = + serde_json::from_value(json.clone()).expect("Deserialization failed"); + assert_eq!(ns_response, NamespaceResponse { + namespace: NamespaceIdent::from_vec(vec!["nested".to_string(), "ns".to_string()]) + .unwrap(), + properties: HashMap::from([ + ("key1".to_string(), "value1".to_string()), + ("key2".to_string(), "value2".to_string()), + ]), + }); + assert_eq!( + serde_json::to_value(&ns_response).expect("Serialization failed"), + json + ); + + // Without properties + let json_no_props = serde_json::json!({ + "namespace": ["db", "schema"] + }); + let ns_response_no_props: NamespaceResponse = + serde_json::from_value(json_no_props.clone()).expect("Deserialization failed"); + assert_eq!(ns_response_no_props, NamespaceResponse { + namespace: NamespaceIdent::from_vec(vec!["db".to_string(), "schema".to_string()]) + .unwrap(), + properties: HashMap::new(), + }); + assert_eq!( + serde_json::to_value(&ns_response_no_props).expect("Serialization failed"), + json_no_props + ); + } } From d78f6567cabe917d75ae9884144a5a0f70e08ace Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 9 Dec 2025 09:57:36 -0500 Subject: [PATCH 21/58] chore: bump MSRV to 1.88, fix warnings and clippy errors (#1902) https://github.com/apache/iceberg-rust/pull/1899 requires a bump to MSRV 1.88. This version is within the policy of this project, and since the README mentions `MSRV is updated when we release iceberg-rust` and we're preparing 0.8, here's a PR for just MSRV 1.88. ## Which issue does this PR close? N/A. ## What changes are included in this PR? - Bump MSRV to 1.88 - Fix warnings - Fix errors found by `make check-clippy` - Format ## Are these changes tested? Existing tests --- Cargo.toml | 2 +- crates/catalog/glue/src/catalog.rs | 46 +++++------ crates/iceberg/src/arrow/reader.rs | 68 ++++++++-------- .../src/arrow/record_batch_projector.rs | 35 ++++----- .../src/arrow/record_batch_transformer.rs | 2 +- crates/iceberg/src/arrow/value.rs | 33 ++++---- crates/iceberg/src/catalog/mod.rs | 14 ++-- crates/iceberg/src/delete_vector.rs | 10 +-- .../src/expr/visitors/manifest_evaluator.rs | 26 +++---- .../src/expr/visitors/page_index_evaluator.rs | 16 ++-- .../expr/visitors/strict_metrics_evaluator.rs | 24 +++--- crates/iceberg/src/inspect/metadata_table.rs | 4 +- crates/iceberg/src/io/storage.rs | 4 +- crates/iceberg/src/io/storage_azdls.rs | 20 ++--- crates/iceberg/src/io/storage_gcs.rs | 26 +++---- crates/iceberg/src/io/storage_oss.rs | 2 +- crates/iceberg/src/io/storage_s3.rs | 24 +++--- crates/iceberg/src/metadata_columns.rs | 5 +- crates/iceberg/src/scan/mod.rs | 6 +- crates/iceberg/src/spec/datatypes.rs | 3 +- crates/iceberg/src/spec/manifest/writer.rs | 8 +- .../iceberg/src/spec/schema/prune_columns.rs | 34 ++++---- crates/iceberg/src/spec/table_metadata.rs | 71 +++++++++-------- crates/iceberg/src/spec/transform.rs | 78 +++++++++---------- crates/iceberg/src/spec/values/tests.rs | 2 +- .../iceberg/src/spec/view_metadata_builder.rs | 8 +- crates/iceberg/src/transaction/mod.rs | 2 +- crates/iceberg/src/transaction/snapshot.rs | 14 ++-- .../src/writer/file_writer/rolling_writer.rs | 24 +++--- crates/iceberg/tests/file_io_gcs_test.rs | 6 +- .../src/physical_plan/repartition.rs | 11 +-- .../datafusion/src/physical_plan/sort.rs | 3 +- .../integrations/datafusion/src/table/mod.rs | 2 +- .../tests/integration_datafusion_test.rs | 15 ++-- rust-toolchain.toml | 2 +- 35 files changed, 315 insertions(+), 335 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 36093d92aa..9904820dea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ version = "0.7.0" license = "Apache-2.0" repository = "https://github.com/apache/iceberg-rust" # Check the MSRV policy in README.md before changing this -rust-version = "1.87" +rust-version = "1.88" [workspace.dependencies] anyhow = "1.0.72" diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index dce287ed6e..37a7996f80 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -151,33 +151,33 @@ impl GlueCatalog { async fn new(config: GlueCatalogConfig) -> Result { let sdk_config = create_sdk_config(&config.props, config.uri.as_ref()).await; let mut file_io_props = config.props.clone(); - if !file_io_props.contains_key(S3_ACCESS_KEY_ID) { - if let Some(access_key_id) = file_io_props.get(AWS_ACCESS_KEY_ID) { - file_io_props.insert(S3_ACCESS_KEY_ID.to_string(), access_key_id.to_string()); - } + if !file_io_props.contains_key(S3_ACCESS_KEY_ID) + && let Some(access_key_id) = file_io_props.get(AWS_ACCESS_KEY_ID) + { + file_io_props.insert(S3_ACCESS_KEY_ID.to_string(), access_key_id.to_string()); } - if !file_io_props.contains_key(S3_SECRET_ACCESS_KEY) { - if let Some(secret_access_key) = file_io_props.get(AWS_SECRET_ACCESS_KEY) { - file_io_props.insert( - S3_SECRET_ACCESS_KEY.to_string(), - secret_access_key.to_string(), - ); - } + if !file_io_props.contains_key(S3_SECRET_ACCESS_KEY) + && let Some(secret_access_key) = file_io_props.get(AWS_SECRET_ACCESS_KEY) + { + file_io_props.insert( + S3_SECRET_ACCESS_KEY.to_string(), + secret_access_key.to_string(), + ); } - if !file_io_props.contains_key(S3_REGION) { - if let Some(region) = file_io_props.get(AWS_REGION_NAME) { - file_io_props.insert(S3_REGION.to_string(), region.to_string()); - } + if !file_io_props.contains_key(S3_REGION) + && let Some(region) = file_io_props.get(AWS_REGION_NAME) + { + file_io_props.insert(S3_REGION.to_string(), region.to_string()); } - if !file_io_props.contains_key(S3_SESSION_TOKEN) { - if let Some(session_token) = file_io_props.get(AWS_SESSION_TOKEN) { - file_io_props.insert(S3_SESSION_TOKEN.to_string(), session_token.to_string()); - } + if !file_io_props.contains_key(S3_SESSION_TOKEN) + && let Some(session_token) = file_io_props.get(AWS_SESSION_TOKEN) + { + file_io_props.insert(S3_SESSION_TOKEN.to_string(), session_token.to_string()); } - if !file_io_props.contains_key(S3_ENDPOINT) { - if let Some(aws_endpoint) = config.uri.as_ref() { - file_io_props.insert(S3_ENDPOINT.to_string(), aws_endpoint.to_string()); - } + if !file_io_props.contains_key(S3_ENDPOINT) + && let Some(aws_endpoint) = config.uri.as_ref() + { + file_io_props.insert(S3_ENDPOINT.to_string(), aws_endpoint.to_string()); } let client = aws_sdk_glue::Client::new(&sdk_config); diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index de8a1420e4..380d48530c 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -504,10 +504,10 @@ impl ArrowReader { // we need to call next() to update the cache with the newly positioned value. delete_vector_iter.advance_to(next_row_group_base_idx); // Only update the cache if the cached value is stale (in the skipped range) - if let Some(cached_idx) = next_deleted_row_idx_opt { - if cached_idx < next_row_group_base_idx { - next_deleted_row_idx_opt = delete_vector_iter.next(); - } + if let Some(cached_idx) = next_deleted_row_idx_opt + && cached_idx < next_row_group_base_idx + { + next_deleted_row_idx_opt = delete_vector_iter.next(); } // still increment the current page base index but then skip to the next row group @@ -861,10 +861,10 @@ impl ArrowReader { }; // If all row groups were filtered out, return an empty RowSelection (select no rows) - if let Some(selected_row_groups) = selected_row_groups { - if selected_row_groups.is_empty() { - return Ok(RowSelection::from(Vec::new())); - } + if let Some(selected_row_groups) = selected_row_groups + && selected_row_groups.is_empty() + { + return Ok(RowSelection::from(Vec::new())); } let mut selected_row_groups_idx = 0; @@ -897,10 +897,10 @@ impl ArrowReader { results.push(selections_for_page); - if let Some(selected_row_groups) = selected_row_groups { - if selected_row_groups_idx == selected_row_groups.len() { - break; - } + if let Some(selected_row_groups) = selected_row_groups + && selected_row_groups_idx == selected_row_groups.len() + { + break; } } @@ -1031,13 +1031,13 @@ fn apply_name_mapping_to_arrow_schema( let mut metadata = field.metadata().clone(); - if let Some(mapped_field) = mapped_field_opt { - if let Some(field_id) = mapped_field.field_id() { - // Field found in mapping with a field_id → assign it - metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string()); - } - // If field_id is None, leave the field without an ID (will be filtered by projection) + if let Some(mapped_field) = mapped_field_opt + && let Some(field_id) = mapped_field.field_id() + { + // Field found in mapping with a field_id → assign it + metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string()); } + // If field_id is None, leave the field without an ID (will be filtered by projection) // If field not found in mapping, leave it without an ID (will be filtered by projection) Field::new(field.name(), field.data_type().clone(), field.is_nullable()) @@ -2731,15 +2731,14 @@ message schema { // Step 4: Verify we got 199 rows (not 200) let total_rows: usize = result.iter().map(|b| b.num_rows()).sum(); - println!("Total rows read: {}", total_rows); + println!("Total rows read: {total_rows}"); println!("Expected: 199 rows (deleted row 199 which had id=200)"); // This assertion will FAIL before the fix and PASS after the fix assert_eq!( total_rows, 199, - "Expected 199 rows after deleting row 199, but got {} rows. \ - The bug causes position deletes in later row groups to be ignored.", - total_rows + "Expected 199 rows after deleting row 199, but got {total_rows} rows. \ + The bug causes position deletes in later row groups to be ignored." ); // Verify the deleted row (id=200) is not present @@ -2950,16 +2949,15 @@ message schema { // Row group 1 has 100 rows (ids 101-200), minus 1 delete (id=200) = 99 rows let total_rows: usize = result.iter().map(|b| b.num_rows()).sum(); - println!("Total rows read from row group 1: {}", total_rows); + println!("Total rows read from row group 1: {total_rows}"); println!("Expected: 99 rows (row group 1 has 100 rows, 1 delete at position 199)"); // This assertion will FAIL before the fix and PASS after the fix assert_eq!( total_rows, 99, - "Expected 99 rows from row group 1 after deleting position 199, but got {} rows. \ + "Expected 99 rows from row group 1 after deleting position 199, but got {total_rows} rows. \ The bug causes position deletes to be lost when advance_to() is followed by next() \ - when skipping unselected row groups.", - total_rows + when skipping unselected row groups." ); // Verify the deleted row (id=200) is not present @@ -3241,7 +3239,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -3338,7 +3336,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 3], @@ -3424,7 +3422,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2, 3], @@ -3524,7 +3522,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -3565,7 +3563,7 @@ message schema { assert_eq!(all_values.len(), 6); for i in 0..6 { - assert_eq!(all_names[i], format!("name_{}", i)); + assert_eq!(all_names[i], format!("name_{i}")); assert_eq!(all_values[i], i as i32); } } @@ -3653,7 +3651,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -3749,7 +3747,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 5, 2], @@ -3858,7 +3856,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2, 3], @@ -3997,7 +3995,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/data.parquet", table_location), + data_file_path: format!("{table_location}/data.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], diff --git a/crates/iceberg/src/arrow/record_batch_projector.rs b/crates/iceberg/src/arrow/record_batch_projector.rs index 45de0212e8..7028eee961 100644 --- a/crates/iceberg/src/arrow/record_batch_projector.rs +++ b/crates/iceberg/src/arrow/record_batch_projector.rs @@ -133,25 +133,24 @@ impl RecordBatchProjector { { for (pos, field) in fields.iter().enumerate() { let id = field_id_fetch_func(field)?; - if let Some(id) = id { - if target_field_id == id { - index_vec.push(pos); - return Ok(Some(field.clone())); - } + if let Some(id) = id + && target_field_id == id + { + index_vec.push(pos); + return Ok(Some(field.clone())); } - if let DataType::Struct(inner) = field.data_type() { - if searchable_field_func(field) { - if let Some(res) = Self::fetch_field_index( - inner, - index_vec, - target_field_id, - field_id_fetch_func, - searchable_field_func, - )? { - index_vec.push(pos); - return Ok(Some(res)); - } - } + if let DataType::Struct(inner) = field.data_type() + && searchable_field_func(field) + && let Some(res) = Self::fetch_field_index( + inner, + index_vec, + target_field_id, + field_id_fetch_func, + searchable_field_func, + )? + { + index_vec.push(pos); + return Ok(Some(res)); } } Ok(None) diff --git a/crates/iceberg/src/arrow/record_batch_transformer.rs b/crates/iceberg/src/arrow/record_batch_transformer.rs index f30d4a09c3..c4782464c1 100644 --- a/crates/iceberg/src/arrow/record_batch_transformer.rs +++ b/crates/iceberg/src/arrow/record_batch_transformer.rs @@ -582,7 +582,7 @@ impl RecordBatchTransformer { let this_field_id = field_id_str.parse().map_err(|e| { Error::new( ErrorKind::DataInvalid, - format!("field id not parseable as an i32: {}", e), + format!("field id not parseable as an i32: {e}"), ) })?; diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index 0e0b85f073..bc123d99e8 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -261,15 +261,15 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { "The partner is not a decimal128 array", ) })?; - if let DataType::Decimal128(arrow_precision, arrow_scale) = array.data_type() { - if *arrow_precision as u32 != *precision || *arrow_scale as u32 != *scale { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "The precision or scale ({arrow_precision},{arrow_scale}) of arrow decimal128 array is not compatible with iceberg decimal type ({precision},{scale})" - ), - )); - } + if let DataType::Decimal128(arrow_precision, arrow_scale) = array.data_type() + && (*arrow_precision as u32 != *precision || *arrow_scale as u32 != *scale) + { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "The precision or scale ({arrow_precision},{arrow_scale}) of arrow decimal128 array is not compatible with iceberg decimal type ({precision},{scale})" + ), + )); } Ok(array.iter().map(|v| v.map(Literal::decimal)).collect()) } @@ -351,10 +351,10 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { } else if let Some(array) = partner.as_any().downcast_ref::() { Ok(array.iter().map(|v| v.map(Literal::string)).collect()) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, "The partner is not a string array", - )); + )) } } PrimitiveType::Uuid => { @@ -418,10 +418,10 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { .map(|v| v.map(|v| Literal::binary(v.to_vec()))) .collect()) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, "The partner is not a binary array", - )); + )) } } } @@ -724,10 +724,7 @@ pub(crate) fn create_primitive_array_single_element( } _ => Err(Error::new( ErrorKind::Unexpected, - format!( - "Unsupported constant type combination: {:?} with {:?}", - data_type, prim_lit - ), + format!("Unsupported constant type combination: {data_type:?} with {prim_lit:?}"), )), } } @@ -825,7 +822,7 @@ pub(crate) fn create_primitive_array_repeated( (dt, _) => { return Err(Error::new( ErrorKind::Unexpected, - format!("unexpected target column type {}", dt), + format!("unexpected target column type {dt}"), )); } }) diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 27d5edaedb..f3a521379e 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -1000,13 +1000,13 @@ mod _serde_set_statistics { snapshot_id, statistics, } = SetStatistics::deserialize(deserializer)?; - if let Some(snapshot_id) = snapshot_id { - if snapshot_id != statistics.snapshot_id { - return Err(serde::de::Error::custom(format!( - "Snapshot id to set {snapshot_id} does not match the statistics file snapshot id {}", - statistics.snapshot_id - ))); - } + if let Some(snapshot_id) = snapshot_id + && snapshot_id != statistics.snapshot_id + { + return Err(serde::de::Error::custom(format!( + "Snapshot id to set {snapshot_id} does not match the statistics file snapshot id {}", + statistics.snapshot_id + ))); } Ok(statistics) diff --git a/crates/iceberg/src/delete_vector.rs b/crates/iceberg/src/delete_vector.rs index f382bf079e..df8a10193c 100644 --- a/crates/iceberg/src/delete_vector.rs +++ b/crates/iceberg/src/delete_vector.rs @@ -36,7 +36,7 @@ impl DeleteVector { } } - pub fn iter(&self) -> DeleteVectorIterator { + pub fn iter(&self) -> DeleteVectorIterator<'_> { let outer = self.inner.bitmaps(); DeleteVectorIterator { outer, inner: None } } @@ -93,10 +93,10 @@ impl Iterator for DeleteVectorIterator<'_> { type Item = u64; fn next(&mut self) -> Option { - if let Some(inner) = &mut self.inner { - if let Some(inner_next) = inner.bitmap_iter.next() { - return Some(u64::from(inner.high_bits) << 32 | u64::from(inner_next)); - } + if let Some(inner) = &mut self.inner + && let Some(inner_next) = inner.bitmap_iter.next() + { + return Some(u64::from(inner.high_bits) << 32 | u64::from(inner_next)); } if let Some((high_bits, next_bitmap)) = self.outer.next() { diff --git a/crates/iceberg/src/expr/visitors/manifest_evaluator.rs b/crates/iceberg/src/expr/visitors/manifest_evaluator.rs index abbd136cb1..770163ae95 100644 --- a/crates/iceberg/src/expr/visitors/manifest_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/manifest_evaluator.rs @@ -161,10 +161,10 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> { _predicate: &BoundPredicate, ) -> crate::Result { let field = self.field_summary_for_reference(reference); - if let Some(contains_nan) = field.contains_nan { - if !contains_nan { - return ROWS_CANNOT_MATCH; - } + if let Some(contains_nan) = field.contains_nan + && !contains_nan + { + return ROWS_CANNOT_MATCH; } if ManifestFilterVisitor::are_all_null(field, &reference.field().field_type) { @@ -389,16 +389,16 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> { return ROWS_MIGHT_MATCH; } - if prefix.as_bytes().eq(&lower_bound[..prefix_len]) { - if let Some(upper_bound) = &field.upper_bound { - // if upper is shorter than the prefix then upper can't start with the prefix - if prefix_len > upper_bound.len() { - return ROWS_MIGHT_MATCH; - } + if prefix.as_bytes().eq(&lower_bound[..prefix_len]) + && let Some(upper_bound) = &field.upper_bound + { + // if upper is shorter than the prefix then upper can't start with the prefix + if prefix_len > upper_bound.len() { + return ROWS_MIGHT_MATCH; + } - if prefix.as_bytes().eq(&upper_bound[..prefix_len]) { - return ROWS_CANNOT_MATCH; - } + if prefix.as_bytes().eq(&upper_bound[..prefix_len]) { + return ROWS_CANNOT_MATCH; } } } diff --git a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs index 3745d94d18..ae3a906275 100644 --- a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs @@ -547,16 +547,16 @@ impl BoundPredicateVisitor for PageIndexEvaluator<'_> { return Ok(false); } - if let Some(min) = min { - if min.gt(datum) { - return Ok(false); - } + if let Some(min) = min + && min.gt(datum) + { + return Ok(false); } - if let Some(max) = max { - if max.lt(datum) { - return Ok(false); - } + if let Some(max) = max + && max.lt(datum) + { + return Ok(false); } Ok(true) diff --git a/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs index e9bed775ef..7c652e2068 100644 --- a/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs @@ -129,10 +129,10 @@ impl<'a> StrictMetricsEvaluator<'a> { self.upper_bound(field_id) }; - if let Some(bound) = bound { - if cmp_fn(bound, datum) { - return ROWS_MUST_MATCH; - } + if let Some(bound) = bound + && cmp_fn(bound, datum) + { + return ROWS_MUST_MATCH; } ROWS_MIGHT_NOT_MATCH @@ -219,10 +219,10 @@ impl BoundPredicateVisitor for StrictMetricsEvaluator<'_> { ) -> crate::Result { let field_id = reference.field().id; - if let Some(&nan_count) = self.nan_count(field_id) { - if nan_count == 0 { - return ROWS_MUST_MATCH; - } + if let Some(&nan_count) = self.nan_count(field_id) + && nan_count == 0 + { + return ROWS_MUST_MATCH; } if self.contains_nulls_only(field_id) { @@ -258,10 +258,10 @@ impl BoundPredicateVisitor for StrictMetricsEvaluator<'_> { ) -> crate::Result { let field_id = reference.field().id; - if let Some(lower) = self.lower_bound(field_id) { - if lower.is_nan() { - return ROWS_MIGHT_NOT_MATCH; - } + if let Some(lower) = self.lower_bound(field_id) + && lower.is_nan() + { + return ROWS_MIGHT_NOT_MATCH; } self.visit_inequality(reference, datum, PartialOrd::gt, true) diff --git a/crates/iceberg/src/inspect/metadata_table.rs b/crates/iceberg/src/inspect/metadata_table.rs index 92571db181..d5e9d60869 100644 --- a/crates/iceberg/src/inspect/metadata_table.rs +++ b/crates/iceberg/src/inspect/metadata_table.rs @@ -71,12 +71,12 @@ impl<'a> MetadataTable<'a> { } /// Get the snapshots table. - pub fn snapshots(&self) -> SnapshotsTable { + pub fn snapshots(&self) -> SnapshotsTable<'_> { SnapshotsTable::new(self.0) } /// Get the manifests table. - pub fn manifests(&self) -> ManifestsTable { + pub fn manifests(&self) -> ManifestsTable<'_> { ManifestsTable::new(self.0) } } diff --git a/crates/iceberg/src/io/storage.rs b/crates/iceberg/src/io/storage.rs index 5880ccca59..03e43600dd 100644 --- a/crates/iceberg/src/io/storage.rs +++ b/crates/iceberg/src/io/storage.rs @@ -183,7 +183,7 @@ impl Storage { } else { Err(Error::new( ErrorKind::DataInvalid, - format!("Invalid gcs url: {}, should start with {}", path, prefix), + format!("Invalid gcs url: {path}, should start with {prefix}"), )) } } @@ -198,7 +198,7 @@ impl Storage { } else { Err(Error::new( ErrorKind::DataInvalid, - format!("Invalid oss url: {}, should start with {}", path, prefix), + format!("Invalid oss url: {path}, should start with {prefix}"), )) } } diff --git a/crates/iceberg/src/io/storage_azdls.rs b/crates/iceberg/src/io/storage_azdls.rs index fe12167f6f..5abb0cd6e0 100644 --- a/crates/iceberg/src/io/storage_azdls.rs +++ b/crates/iceberg/src/io/storage_azdls.rs @@ -165,7 +165,7 @@ impl FromStr for AzureStorageScheme { "wasbs" => Ok(AzureStorageScheme::Wasbs), _ => Err(Error::new( ErrorKind::DataInvalid, - format!("Unexpected Azure Storage scheme: {}", s), + format!("Unexpected Azure Storage scheme: {s}"), )), } } @@ -397,11 +397,11 @@ mod tests { let config = azdls_config_parse(properties); match expected { Some(expected_config) => { - assert!(config.is_ok(), "Test case {} failed: {:?}", name, config); - assert_eq!(config.unwrap(), expected_config, "Test case: {}", name); + assert!(config.is_ok(), "Test case {name} failed: {config:?}"); + assert_eq!(config.unwrap(), expected_config, "Test case: {name}"); } None => { - assert!(config.is_err(), "Test case {} expected error.", name); + assert!(config.is_err(), "Test case {name} expected error."); } } } @@ -495,14 +495,14 @@ mod tests { let result = azdls_create_operator(input.0, &input.1, &input.2); match expected { Some((expected_filesystem, expected_path)) => { - assert!(result.is_ok(), "Test case {} failed: {:?}", name, result); + assert!(result.is_ok(), "Test case {name} failed: {result:?}"); let (op, relative_path) = result.unwrap(); assert_eq!(op.info().name(), expected_filesystem); assert_eq!(relative_path, expected_path); } None => { - assert!(result.is_err(), "Test case {} expected error.", name); + assert!(result.is_err(), "Test case {name} expected error."); } } } @@ -543,11 +543,11 @@ mod tests { let result = input.parse::(); match expected { Some(expected_path) => { - assert!(result.is_ok(), "Test case {} failed: {:?}", name, result); - assert_eq!(result.unwrap(), expected_path, "Test case: {}", name); + assert!(result.is_ok(), "Test case {name} failed: {result:?}"); + assert_eq!(result.unwrap(), expected_path, "Test case: {name}"); } None => { - assert!(result.is_err(), "Test case {} expected error.", name); + assert!(result.is_err(), "Test case {name} expected error."); } } } @@ -593,7 +593,7 @@ mod tests { for (name, path, expected) in test_cases { let endpoint = path.as_endpoint(); - assert_eq!(endpoint, expected, "Test case: {}", name); + assert_eq!(endpoint, expected, "Test case: {name}"); } } } diff --git a/crates/iceberg/src/io/storage_gcs.rs b/crates/iceberg/src/io/storage_gcs.rs index 8c3d914c86..7718df603f 100644 --- a/crates/iceberg/src/io/storage_gcs.rs +++ b/crates/iceberg/src/io/storage_gcs.rs @@ -71,20 +71,20 @@ pub(crate) fn gcs_config_parse(mut m: HashMap) -> Result Result let bucket = url.host_str().ok_or_else(|| { Error::new( ErrorKind::DataInvalid, - format!("Invalid gcs url: {}, bucket is required", path), + format!("Invalid gcs url: {path}, bucket is required"), ) })?; diff --git a/crates/iceberg/src/io/storage_oss.rs b/crates/iceberg/src/io/storage_oss.rs index 8bfffc6ca8..e82dda23a5 100644 --- a/crates/iceberg/src/io/storage_oss.rs +++ b/crates/iceberg/src/io/storage_oss.rs @@ -56,7 +56,7 @@ pub(crate) fn oss_config_build(cfg: &OssConfig, path: &str) -> Result let bucket = url.host_str().ok_or_else(|| { Error::new( ErrorKind::DataInvalid, - format!("Invalid oss url: {}, missing bucket", path), + format!("Invalid oss url: {path}, missing bucket"), ) })?; diff --git a/crates/iceberg/src/io/storage_s3.rs b/crates/iceberg/src/io/storage_s3.rs index fcf9afed1f..f069e0e2f9 100644 --- a/crates/iceberg/src/io/storage_s3.rs +++ b/crates/iceberg/src/io/storage_s3.rs @@ -134,20 +134,20 @@ pub(crate) fn s3_config_parse(mut m: HashMap) -> Result Result { Err(Error::new( ErrorKind::Unexpected, format!( - "Metadata field ID {} recognized but field definition not implemented", - field_id + "Metadata field ID {field_id} recognized but field definition not implemented" ), )) } _ => Err(Error::new( ErrorKind::Unexpected, - format!("Field ID {} is not a metadata field", field_id), + format!("Field ID {field_id} is not a metadata field"), )), } } diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs index 24c03b0b2c..d83da8a879 100644 --- a/crates/iceberg/src/scan/mod.rs +++ b/crates/iceberg/src/scan/mod.rs @@ -1872,8 +1872,7 @@ pub mod tests { let file_path = string_values.value(0); assert!( file_path.ends_with(".parquet"), - "File path should end with .parquet, got: {}", - file_path + "File path should end with .parquet, got: {file_path}" ); } @@ -1981,8 +1980,7 @@ pub mod tests { for path in &file_paths { assert!( path.ends_with(".parquet"), - "All file paths should end with .parquet, got: {}", - path + "All file paths should end with .parquet, got: {path}" ); } } diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index 456b754408..0379465584 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -427,8 +427,7 @@ impl<'de> Deserialize<'de> for StructType { let type_val: String = map.next_value()?; if type_val != "struct" { return Err(serde::de::Error::custom(format!( - "expected type 'struct', got '{}'", - type_val + "expected type 'struct', got '{type_val}'" ))); } } diff --git a/crates/iceberg/src/spec/manifest/writer.rs b/crates/iceberg/src/spec/manifest/writer.rs index ebb0590bcf..389ac7a1fd 100644 --- a/crates/iceberg/src/spec/manifest/writer.rs +++ b/crates/iceberg/src/spec/manifest/writer.rs @@ -388,10 +388,10 @@ impl ManifestWriter { self.existing_rows += entry.data_file.record_count; } } - if entry.is_alive() { - if let Some(seq_num) = entry.sequence_number { - self.min_seq_num = Some(self.min_seq_num.map_or(seq_num, |v| min(v, seq_num))); - } + if entry.is_alive() + && let Some(seq_num) = entry.sequence_number + { + self.min_seq_num = Some(self.min_seq_num.map_or(seq_num, |v| min(v, seq_num))); } self.manifest_entries.push(entry); Ok(()) diff --git a/crates/iceberg/src/spec/schema/prune_columns.rs b/crates/iceberg/src/spec/schema/prune_columns.rs index 5a2f0b50fc..14f1bfd25f 100644 --- a/crates/iceberg/src/spec/schema/prune_columns.rs +++ b/crates/iceberg/src/spec/schema/prune_columns.rs @@ -110,19 +110,19 @@ impl SchemaVisitor for PruneColumn { if self.select_full_types { Ok(Some(*field.field_type.clone())) } else if field.field_type.is_struct() { - return Ok(Some(Type::Struct(PruneColumn::project_selected_struct( + Ok(Some(Type::Struct(PruneColumn::project_selected_struct( value, - )?))); + )?))) } else if !field.field_type.is_nested() { - return Ok(Some(*field.field_type.clone())); + Ok(Some(*field.field_type.clone())) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, "Can't project list or map field directly when not selecting full type." .to_string(), ) .with_context("field_id", field.id.to_string()) - .with_context("field_type", field.field_type.to_string())); + .with_context("field_type", field.field_type.to_string())) } } else { Ok(value) @@ -174,20 +174,20 @@ impl SchemaVisitor for PruneColumn { Ok(Some(Type::List(list.clone()))) } else if list.element_field.field_type.is_struct() { let projected_struct = PruneColumn::project_selected_struct(value).unwrap(); - return Ok(Some(Type::List(PruneColumn::project_list( + Ok(Some(Type::List(PruneColumn::project_list( list, Type::Struct(projected_struct), - )?))); + )?))) } else if list.element_field.field_type.is_primitive() { - return Ok(Some(Type::List(list.clone()))); + Ok(Some(Type::List(list.clone()))) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, format!( "Cannot explicitly project List or Map types, List element {} of type {} was selected", list.element_field.id, list.element_field.field_type ), - )); + )) } } else if let Some(result) = value { Ok(Some(Type::List(PruneColumn::project_list(list, result)?))) @@ -208,26 +208,26 @@ impl SchemaVisitor for PruneColumn { } else if map.value_field.field_type.is_struct() { let projected_struct = PruneColumn::project_selected_struct(Some(value.unwrap())).unwrap(); - return Ok(Some(Type::Map(PruneColumn::project_map( + Ok(Some(Type::Map(PruneColumn::project_map( map, Type::Struct(projected_struct), - )?))); + )?))) } else if map.value_field.field_type.is_primitive() { - return Ok(Some(Type::Map(map.clone()))); + Ok(Some(Type::Map(map.clone()))) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, format!( "Cannot explicitly project List or Map types, Map value {} of type {} was selected", map.value_field.id, map.value_field.field_type ), - )); + )) } } else if let Some(value_result) = value { - return Ok(Some(Type::Map(PruneColumn::project_map( + Ok(Some(Type::Map(PruneColumn::project_map( map, value_result, - )?))); + )?))) } else if self.selected.contains(&map.key_field.id) { Ok(Some(Type::Map(map.clone()))) } else { diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index 06b32cc847..48b715da59 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -390,18 +390,18 @@ impl TableMetadata { } fn construct_refs(&mut self) { - if let Some(current_snapshot_id) = self.current_snapshot_id { - if !self.refs.contains_key(MAIN_BRANCH) { - self.refs - .insert(MAIN_BRANCH.to_string(), SnapshotReference { - snapshot_id: current_snapshot_id, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: None, - max_snapshot_age_ms: None, - max_ref_age_ms: None, - }, - }); - } + if let Some(current_snapshot_id) = self.current_snapshot_id + && !self.refs.contains_key(MAIN_BRANCH) + { + self.refs + .insert(MAIN_BRANCH.to_string(), SnapshotReference { + snapshot_id: current_snapshot_id, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: None, + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, + }); } } @@ -572,17 +572,17 @@ impl TableMetadata { let main_ref = self.refs.get(MAIN_BRANCH); if self.current_snapshot_id.is_some() { - if let Some(main_ref) = main_ref { - if main_ref.snapshot_id != self.current_snapshot_id.unwrap_or_default() { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Current snapshot id does not match main branch ({:?} != {:?})", - self.current_snapshot_id.unwrap_or_default(), - main_ref.snapshot_id - ), - )); - } + if let Some(main_ref) = main_ref + && main_ref.snapshot_id != self.current_snapshot_id.unwrap_or_default() + { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Current snapshot id does not match main branch ({:?} != {:?})", + self.current_snapshot_id.unwrap_or_default(), + main_ref.snapshot_id + ), + )); } } else if main_ref.is_some() { return Err(Error::new( @@ -606,22 +606,21 @@ impl TableMetadata { )); } - if self.format_version >= FormatVersion::V2 { - if let Some(snapshot) = self + if self.format_version >= FormatVersion::V2 + && let Some(snapshot) = self .snapshots .values() .find(|snapshot| snapshot.sequence_number() > self.last_sequence_number) - { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Invalid snapshot with id {} and sequence number {} greater than last sequence number {}", - snapshot.snapshot_id(), - snapshot.sequence_number(), - self.last_sequence_number - ), - )); - } + { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid snapshot with id {} and sequence number {} greater than last sequence number {}", + snapshot.snapshot_id(), + snapshot.sequence_number(), + self.last_sequence_number + ), + )); } Ok(()) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 6068716eff..354dc1889c 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -711,10 +711,10 @@ impl Transform { PredicateOperator::GreaterThan => Some(PredicateOperator::GreaterThanOrEq), PredicateOperator::StartsWith => match datum.literal() { PrimitiveLiteral::String(s) => { - if let Some(w) = width { - if s.len() == w as usize { - return Some(PredicateOperator::Eq); - }; + if let Some(w) = width + && s.len() == w as usize + { + return Some(PredicateOperator::Eq); }; Some(*op) } @@ -757,47 +757,45 @@ impl Transform { _ => false, }; - if should_adjust { - if let &PrimitiveLiteral::Int(v) = transformed.literal() { - match op { - PredicateOperator::LessThan - | PredicateOperator::LessThanOrEq - | PredicateOperator::In => { - if v < 0 { + if should_adjust && let &PrimitiveLiteral::Int(v) = transformed.literal() { + match op { + PredicateOperator::LessThan + | PredicateOperator::LessThanOrEq + | PredicateOperator::In => { + if v < 0 { + // # TODO + // An ugly hack to fix. Refine the increment and decrement logic later. + match self { + Transform::Day => { + return Some(AdjustedProjection::Single(Datum::date(v + 1))); + } + _ => { + return Some(AdjustedProjection::Single(Datum::int(v + 1))); + } + } + }; + } + PredicateOperator::Eq => { + if v < 0 { + let new_set = FnvHashSet::from_iter(vec![ + transformed.to_owned(), // # TODO // An ugly hack to fix. Refine the increment and decrement logic later. - match self { - Transform::Day => { - return Some(AdjustedProjection::Single(Datum::date(v + 1))); + { + match self { + Transform::Day => Datum::date(v + 1), + _ => Datum::int(v + 1), } - _ => { - return Some(AdjustedProjection::Single(Datum::int(v + 1))); - } - } - }; - } - PredicateOperator::Eq => { - if v < 0 { - let new_set = FnvHashSet::from_iter(vec![ - transformed.to_owned(), - // # TODO - // An ugly hack to fix. Refine the increment and decrement logic later. - { - match self { - Transform::Day => Datum::date(v + 1), - _ => Datum::int(v + 1), - } - }, - ]); - return Some(AdjustedProjection::Set(new_set)); - } - } - _ => { - return None; + }, + ]); + return Some(AdjustedProjection::Set(new_set)); } } - }; - } + _ => { + return None; + } + } + }; None } diff --git a/crates/iceberg/src/spec/values/tests.rs b/crates/iceberg/src/spec/values/tests.rs index 0e99d44dfe..73343a9a1a 100644 --- a/crates/iceberg/src/spec/values/tests.rs +++ b/crates/iceberg/src/spec/values/tests.rs @@ -447,7 +447,7 @@ fn check_raw_literal_bytes_error_via_avro(input_bytes: Vec, expected_type: & let avro_value = Value::Bytes(input_bytes); let raw_literal: _serde::RawLiteral = apache_avro::from_value(&avro_value).unwrap(); let result = raw_literal.try_into(expected_type); - assert!(result.is_err(), "Expected error but got: {:?}", result); + assert!(result.is_err(), "Expected error but got: {result:?}"); } #[test] diff --git a/crates/iceberg/src/spec/view_metadata_builder.rs b/crates/iceberg/src/spec/view_metadata_builder.rs index 9f542a7c61..38041ca625 100644 --- a/crates/iceberg/src/spec/view_metadata_builder.rs +++ b/crates/iceberg/src/spec/view_metadata_builder.rs @@ -478,10 +478,10 @@ impl ViewMetadataBuilder { // as it might panic if the metadata is invalid. self.metadata.validate()?; - if let Some(previous) = self.previous_view_version.take() { - if !allow_replace_drop_dialects(&self.metadata.properties) { - require_no_dialect_dropped(&previous, self.metadata.current_version())?; - } + if let Some(previous) = self.previous_view_version.take() + && !allow_replace_drop_dialects(&self.metadata.properties) + { + require_no_dialect_dropped(&previous, self.metadata.current_version())?; } let _expired_versions = self.expire_versions(); diff --git a/crates/iceberg/src/transaction/mod.rs b/crates/iceberg/src/transaction/mod.rs index 4116264a14..8ddaa26698 100644 --- a/crates/iceberg/src/transaction/mod.rs +++ b/crates/iceberg/src/transaction/mod.rs @@ -518,7 +518,7 @@ mod test_row_lineage { fn file_with_rows(record_count: u64) -> DataFile { DataFileBuilder::default() .content(DataContentType::Data) - .file_path(format!("test/{}.parquet", record_count)) + .file_path(format!("test/{record_count}.parquet")) .file_format(DataFileFormat::Parquet) .file_size_in_bytes(100) .record_count(record_count) diff --git a/crates/iceberg/src/transaction/snapshot.rs b/crates/iceberg/src/transaction/snapshot.rs index d59828ce31..c8bf26a174 100644 --- a/crates/iceberg/src/transaction/snapshot.rs +++ b/crates/iceberg/src/transaction/snapshot.rs @@ -276,13 +276,13 @@ impl<'a> SnapshotProducer<'a> { "Partition field should only be primitive type.", ) })?; - if let Some(value) = value { - if !field.compatible(&value.as_primitive_literal().unwrap()) { - return Err(Error::new( - ErrorKind::DataInvalid, - "Partition value is not compatible partition type", - )); - } + if let Some(value) = value + && !field.compatible(&value.as_primitive_literal().unwrap()) + { + return Err(Error::new( + ErrorKind::DataInvalid, + "Partition value is not compatible partition type", + )); } } Ok(()) diff --git a/crates/iceberg/src/writer/file_writer/rolling_writer.rs b/crates/iceberg/src/writer/file_writer/rolling_writer.rs index 06246ab660..a93e494d48 100644 --- a/crates/iceberg/src/writer/file_writer/rolling_writer.rs +++ b/crates/iceberg/src/writer/file_writer/rolling_writer.rs @@ -197,18 +197,18 @@ where ); } - if self.should_roll() { - if let Some(inner) = self.inner.take() { - // close the current writer, roll to a new file - self.data_file_builders.extend(inner.close().await?); - - // start a new writer - self.inner = Some( - self.inner_builder - .build(self.new_output_file(partition_key)?) - .await?, - ); - } + if self.should_roll() + && let Some(inner) = self.inner.take() + { + // close the current writer, roll to a new file + self.data_file_builders.extend(inner.close().await?); + + // start a new writer + self.inner = Some( + self.inner_builder + .build(self.new_output_file(partition_key)?) + .await?, + ); } // write the input diff --git a/crates/iceberg/tests/file_io_gcs_test.rs b/crates/iceberg/tests/file_io_gcs_test.rs index 161285ae6f..9fbcdadd0e 100644 --- a/crates/iceberg/tests/file_io_gcs_test.rs +++ b/crates/iceberg/tests/file_io_gcs_test.rs @@ -68,7 +68,7 @@ mod tests { FileIOBuilder::new("gcs") .with_props(vec![ - (GCS_SERVICE_PATH, format!("http://{}", addr)), + (GCS_SERVICE_PATH, format!("http://{addr}")), (GCS_NO_AUTH, "true".to_string()), ]) .build() @@ -81,13 +81,13 @@ mod tests { bucket_data.insert("name", name); let client = reqwest::Client::new(); - let endpoint = format!("http://{}/storage/v1/b", server_addr); + let endpoint = format!("http://{server_addr}/storage/v1/b"); client.post(endpoint).json(&bucket_data).send().await?; Ok(()) } fn get_gs_path() -> String { - format!("gs://{}", FAKE_GCS_BUCKET) + format!("gs://{FAKE_GCS_BUCKET}") } #[tokio::test] diff --git a/crates/integrations/datafusion/src/physical_plan/repartition.rs b/crates/integrations/datafusion/src/physical_plan/repartition.rs index 8ad87fd1cc..2d1d7f862c 100644 --- a/crates/integrations/datafusion/src/physical_plan/repartition.rs +++ b/crates/integrations/datafusion/src/physical_plan/repartition.rs @@ -159,9 +159,8 @@ fn determine_partitioning_strategy( // Case 2: Partitioned table missing _partition column (normally this should not happen) (true, Err(_)) => Err(DataFusionError::Plan(format!( - "Partitioned table input missing {} column. \ - Ensure projection happens before repartitioning.", - PROJECTED_PARTITION_VALUE_COLUMN + "Partitioned table input missing {PROJECTED_PARTITION_VALUE_COLUMN} column. \ + Ensure projection happens before repartitioning." ))), // Case 3: Unpartitioned table, always use RoundRobinBatch @@ -508,8 +507,7 @@ mod tests { assert!( column_names.contains(&PROJECTED_PARTITION_VALUE_COLUMN.to_string()), - "Should use _partition column, got: {:?}", - column_names + "Should use _partition column, got: {column_names:?}" ); } _ => panic!("Expected Hash partitioning with Identity transform"), @@ -733,8 +731,7 @@ mod tests { .collect(); assert!( column_names.contains(&PROJECTED_PARTITION_VALUE_COLUMN.to_string()), - "Should use _partition column for mixed transforms with Identity, got: {:?}", - column_names + "Should use _partition column for mixed transforms with Identity, got: {column_names:?}" ); } _ => panic!("Expected Hash partitioning for table with identity transforms"), diff --git a/crates/integrations/datafusion/src/physical_plan/sort.rs b/crates/integrations/datafusion/src/physical_plan/sort.rs index 2a57e16e43..ede2547535 100644 --- a/crates/integrations/datafusion/src/physical_plan/sort.rs +++ b/crates/integrations/datafusion/src/physical_plan/sort.rs @@ -53,8 +53,7 @@ pub(crate) fn sort_by_partition(input: Arc) -> DFResult Result<()> { let file_io = table.file_io(); // List files under each expected partition path - let electronics_path = format!("{}/data/category=electronics", table_location); - let books_path = format!("{}/data/category=books", table_location); - let clothing_path = format!("{}/data/category=clothing", table_location); + let electronics_path = format!("{table_location}/data/category=electronics"); + let books_path = format!("{table_location}/data/category=books"); + let clothing_path = format!("{table_location}/data/category=clothing"); // Verify partition directories exist and contain data files assert!( file_io.exists(&electronics_path).await?, - "Expected partition directory: {}", - electronics_path + "Expected partition directory: {electronics_path}" ); assert!( file_io.exists(&books_path).await?, - "Expected partition directory: {}", - books_path + "Expected partition directory: {books_path}" ); assert!( file_io.exists(&clothing_path).await?, - "Expected partition directory: {}", - clothing_path + "Expected partition directory: {clothing_path}" ); Ok(()) diff --git a/rust-toolchain.toml b/rust-toolchain.toml index ff7d1f7fbb..4b20d68e44 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -20,5 +20,5 @@ # # The channel is exactly same day for our MSRV. [toolchain] -channel = "nightly-2025-03-28" +channel = "nightly-2025-06-23" components = ["rustfmt", "clippy"] From 5f631a5e2830eaa22457495f77db612068dc587c Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 10 Dec 2025 14:34:11 +0800 Subject: [PATCH 22/58] ci: Make s3tables ready for publish (#1916) ## Which issue does this PR close? - Closes https://github.com/apache/iceberg-rust/issues/1886 ## What changes are included in this PR? Add missing files needed for s3tables. ## Are these changes tested? Signed-off-by: Xuanwo --- .github/workflows/publish.yml | 1 + crates/catalog/s3tables/Cargo.toml | 1 + crates/catalog/s3tables/LICENSE | 201 +++++++++++++++++++++++++++++ crates/catalog/s3tables/NOTICE | 5 + crates/catalog/s3tables/README.md | 56 ++++++++ 5 files changed, 264 insertions(+) create mode 100644 crates/catalog/s3tables/LICENSE create mode 100644 crates/catalog/s3tables/NOTICE create mode 100644 crates/catalog/s3tables/README.md diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 634cd73519..66c17a668d 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -42,6 +42,7 @@ jobs: - "crates/catalog/glue" - "crates/catalog/hms" - "crates/catalog/rest" + - "crates/catalog/s3tables" - "crates/catalog/sql" - "crates/integrations/datafusion" steps: diff --git a/crates/catalog/s3tables/Cargo.toml b/crates/catalog/s3tables/Cargo.toml index 66fb70fefc..fde08b9a49 100644 --- a/crates/catalog/s3tables/Cargo.toml +++ b/crates/catalog/s3tables/Cargo.toml @@ -21,6 +21,7 @@ homepage = { workspace = true } name = "iceberg-catalog-s3tables" rust-version = { workspace = true } version = { workspace = true } +readme = "README.md" categories = ["database"] description = "Apache Iceberg Rust S3Tables Catalog" diff --git a/crates/catalog/s3tables/LICENSE b/crates/catalog/s3tables/LICENSE new file mode 100644 index 0000000000..261eeb9e9f --- /dev/null +++ b/crates/catalog/s3tables/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/crates/catalog/s3tables/NOTICE b/crates/catalog/s3tables/NOTICE new file mode 100644 index 0000000000..9340680cbd --- /dev/null +++ b/crates/catalog/s3tables/NOTICE @@ -0,0 +1,5 @@ +Apache Iceberg Rust +Copyright 2023-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). diff --git a/crates/catalog/s3tables/README.md b/crates/catalog/s3tables/README.md new file mode 100644 index 0000000000..d9784d5231 --- /dev/null +++ b/crates/catalog/s3tables/README.md @@ -0,0 +1,56 @@ + + +# Apache Iceberg S3Tables Catalog (Rust) + +[![crates.io](https://img.shields.io/crates/v/iceberg.svg)](https://crates.io/crates/iceberg-catalog-s3tables) +[![docs.rs](https://img.shields.io/docsrs/iceberg.svg)](https://docs.rs/iceberg/latest/iceberg_catalog_s3tables/) + +Official Native Rust implementation of the Apache Iceberg S3Tables catalog. + +## Quick start + +```rust,no_run +use std::collections::HashMap; + +use iceberg::CatalogBuilder; +use iceberg_catalog_s3tables::{ + S3TABLES_CATALOG_PROP_ENDPOINT_URL, S3TABLES_CATALOG_PROP_TABLE_BUCKET_ARN, + S3TablesCatalogBuilder, +}; + +#[tokio::main] +async fn main() { + let catalog = S3TablesCatalogBuilder::default() + .with_endpoint_url("http://localhost:4566") + .load( + "s3tables", + HashMap::from([( + S3TABLES_CATALOG_PROP_TABLE_BUCKET_ARN.to_string(), + "arn:aws:s3tables:us-east-1:123456789012:bucket/my-bucket".to_string(), + )]), + ) + .await + .unwrap(); + + // use `catalog` as any Iceberg Catalog +} +``` + +See the [API documentation](https://docs.rs/iceberg_catalog_s3tables/latest) for the full API surface. From aaa700d4af6c9e86a0a7169000b3788c6d54430e Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 10 Dec 2025 02:07:35 -0500 Subject: [PATCH 23/58] deps: bump DataFusion to 51, Arrow to 57, pyo to 0.26 (#1899) --- Cargo.lock | 1239 ++++++++--------- Cargo.toml | 24 +- bindings/python/Cargo.lock | 379 +++-- bindings/python/Cargo.toml | 8 +- bindings/python/src/transform.rs | 20 +- crates/iceberg/Cargo.toml | 1 - crates/iceberg/src/arrow/reader.rs | 35 +- .../src/expr/visitors/page_index_evaluator.rs | 613 ++++---- crates/iceberg/src/inspect/manifests.rs | 24 +- crates/iceberg/src/inspect/snapshots.rs | 14 +- .../src/writer/file_writer/parquet_writer.rs | 37 +- .../src/table/table_provider_factory.rs | 1 + .../tests/integration_datafusion_test.rs | 72 +- crates/integrations/playground/src/main.rs | 2 + 14 files changed, 1124 insertions(+), 1345 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c7c103d279..2d464709fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -116,22 +116,22 @@ dependencies = [ [[package]] name = "anstyle-query" -version = "1.1.4" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.10" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -161,8 +161,8 @@ dependencies = [ "serde_bytes", "serde_json", "snap", - "strum 0.27.2", - "strum_macros 0.27.2", + "strum", + "strum_macros", "thiserror 2.0.17", "uuid", "xz2", @@ -187,8 +187,8 @@ dependencies = [ "serde", "serde_bytes", "serde_json", - "strum 0.27.2", - "strum_macros 0.27.2", + "strum", + "strum_macros", "thiserror 2.0.17", "uuid", "zstd", @@ -223,9 +223,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3" dependencies = [ "arrow-arith", "arrow-array", @@ -244,23 +244,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num", + "num-traits", ] [[package]] name = "arrow-array" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -269,47 +269,51 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.16.0", - "num", + "hashbrown 0.16.1", + "num-complex", + "num-integer", + "num-traits", ] [[package]] name = "arrow-buffer" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" dependencies = [ "bytes", "half", - "num", + "num-bigint", + "num-traits", ] [[package]] name = "arrow-cast" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", + "arrow-ord", "arrow-schema", "arrow-select", "atoi", - "base64 0.22.1", + "base64", "chrono", "comfy-table", "half", "lexical-core", - "num", + "num-traits", "ryu", ] [[package]] name = "arrow-csv" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" +checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b" dependencies = [ "arrow-array", "arrow-cast", @@ -322,21 +326,22 @@ dependencies = [ [[package]] name = "arrow-data" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num", + "num-integer", + "num-traits", ] [[package]] name = "arrow-ipc" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" dependencies = [ "arrow-array", "arrow-buffer", @@ -350,9 +355,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" +checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0" dependencies = [ "arrow-array", "arrow-buffer", @@ -361,20 +366,22 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.12.0", + "indexmap 2.12.1", + "itoa", "lexical-core", "memchr", - "num", - "serde", + "num-traits", + "ryu", + "serde_core", "serde_json", "simdutf8", ] [[package]] name = "arrow-ord" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" dependencies = [ "arrow-array", "arrow-buffer", @@ -385,9 +392,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f" dependencies = [ "arrow-array", "arrow-buffer", @@ -398,33 +405,33 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" dependencies = [ - "serde", + "serde_core", "serde_json", ] [[package]] name = "arrow-select" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" dependencies = [ "ahash 0.8.12", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num", + "num-traits", ] [[package]] name = "arrow-string" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534" dependencies = [ "arrow-array", "arrow-buffer", @@ -432,7 +439,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num", + "num-traits", "regex", "regex-syntax", ] @@ -501,7 +508,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -512,7 +519,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -538,9 +545,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.8" +version = "1.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37cf2b6af2a95a20e266782b4f76f1a5e12bf412a9db2de9c1e9123b9d8c0ad8" +checksum = "a0149602eeaf915158e14029ba0c78dedb8c08d554b024d54c8f239aab46511d" dependencies = [ "aws-credential-types", "aws-runtime", @@ -568,9 +575,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.8" +version = "1.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faf26925f4a5b59eb76722b63c2892b1d70d06fa053c72e4a100ec308c1d47bc" +checksum = "b01c9521fa01558f750d183c8c68c81b0155b9d193a4ba7f84c36bd1b6d04a06" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -580,9 +587,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.14.1" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "879b6c89592deb404ba4dc0ae6b58ffd1795c78991cbb5b8bc441c48a070440d" +checksum = "6b5ce75405893cd713f9ab8e297d8e438f624dde7d706108285f7e17a25a180f" dependencies = [ "aws-lc-sys", "zeroize", @@ -590,11 +597,10 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.32.3" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "107a4e9d9cab9963e04e84bb8dee0e25f2a987f9a8bad5ed054abd439caa8f8c" +checksum = "179c3777a8b5e70e90ea426114ffc565b2c1a9f82f6c4a0c5a34aa6ef5e781b6" dependencies = [ - "bindgen", "cc", "cmake", "dunce", @@ -603,9 +609,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.5.12" +version = "1.5.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa006bb32360ed90ac51203feafb9d02e3d21046e1fd3a450a404b90ea73e5d" +checksum = "7ce527fb7e53ba9626fc47824f25e256250556c40d8f81d27dd92aa38239d632" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -627,9 +633,9 @@ dependencies = [ [[package]] name = "aws-sdk-glue" -version = "1.126.0" +version = "1.132.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd9c10a11584c0b619c9e478143072c4028c39017f98534e206156a7e94188be" +checksum = "35638d8e6ef97adb7f3154ffc618bbe1d631a503b6d8328b94af77b7615fbeb2" dependencies = [ "aws-credential-types", "aws-runtime", @@ -649,9 +655,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3tables" -version = "1.41.0" +version = "1.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "761f176da526badb4c3dbd67ee1da2faf3dc1e537ed229355f7590d80595ae35" +checksum = "ce68b5d4652e6248827e472c67df8773ae6ab3946ff176de8d3ee7c295299efd" dependencies = [ "aws-credential-types", "aws-runtime", @@ -671,9 +677,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.86.0" +version = "1.90.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a0abbfab841446cce6e87af853a3ba2cc1bc9afcd3f3550dd556c43d434c86d" +checksum = "4f18e53542c522459e757f81e274783a78f8c81acdfc8d1522ee8a18b5fb1c66" dependencies = [ "aws-credential-types", "aws-runtime", @@ -693,9 +699,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.89.0" +version = "1.92.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "695dc67bb861ccb8426c9129b91c30e266a0e3d85650cafdf62fcca14c8fd338" +checksum = "532f4d866012ffa724a4385c82e8dd0e59f0ca0e600f3f22d4c03b6824b34e4a" dependencies = [ "aws-credential-types", "aws-runtime", @@ -715,9 +721,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.88.0" +version = "1.94.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d30990923f4f675523c51eb1c0dec9b752fb267b36a61e83cbc219c9d86da715" +checksum = "1be6fbbfa1a57724788853a623378223fe828fc4c09b146c992f0c95b6256174" dependencies = [ "aws-credential-types", "aws-runtime", @@ -738,9 +744,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.3.5" +version = "1.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bffc03068fbb9c8dd5ce1c6fb240678a5cffb86fb2b7b1985c999c4b83c8df68" +checksum = "c35452ec3f001e1f2f6db107b6373f1f48f05ec63ba2c5c9fa91f07dad32af11" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -760,9 +766,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.6" +version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "127fcfad33b7dfc531141fda7e1c402ac65f88aca5511a4d31e2e3d2cd01ce9c" +checksum = "9ee19095c7c4dda59f1697d028ce704c24b2d33c6718790c7f1d5a3015b4107c" dependencies = [ "futures-util", "pin-project-lite", @@ -771,15 +777,16 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.62.4" +version = "0.62.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3feafd437c763db26aa04e0cc7591185d0961e64c61885bece0fb9d50ceac671" +checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", "bytes-utils", "futures-core", + "futures-util", "http 0.2.12", "http 1.4.0", "http-body 0.4.6", @@ -791,9 +798,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.1.3" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1053b5e587e6fa40ce5a79ea27957b04ba660baa02b28b7436f64850152234f1" +checksum = "59e62db736db19c488966c8d787f52e6270be565727236fd5579eaa301e7bc4a" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -804,14 +811,14 @@ dependencies = [ "http 1.4.0", "http-body 0.4.6", "hyper 0.14.32", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-rustls 0.24.2", "hyper-rustls 0.27.7", "hyper-util", "pin-project-lite", "rustls 0.21.12", - "rustls 0.23.34", - "rustls-native-certs 0.8.2", + "rustls 0.23.35", + "rustls-native-certs", "rustls-pki-types", "tokio", "tokio-rustls 0.26.4", @@ -821,27 +828,27 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.61.6" +version = "0.61.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff418fc8ec5cadf8173b10125f05c2e7e1d46771406187b2c878557d4503390" +checksum = "a6864c190cbb8e30cf4b77b2c8f3b6dfffa697a09b7218d2f7cd3d4c4065a9f7" dependencies = [ "aws-smithy-types", ] [[package]] name = "aws-smithy-observability" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d1881b1ea6d313f9890710d65c158bdab6fb08c91ea825f74c1c8c357baf4cc" +checksum = "17f616c3f2260612fe44cede278bafa18e73e6479c4e393e2c4518cf2a9a228a" dependencies = [ "aws-smithy-runtime-api", ] [[package]] name = "aws-smithy-query" -version = "0.60.8" +version = "0.60.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d28a63441360c477465f80c7abac3b9c4d075ca638f982e605b7dc2a2c7156c9" +checksum = "ae5d689cf437eae90460e944a58b5668530d433b4ff85789e69d2f2a556e057d" dependencies = [ "aws-smithy-types", "urlencoding", @@ -849,9 +856,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.3" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ab99739082da5347660c556689256438defae3bcefd66c52b095905730e404" +checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -873,9 +880,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.9.1" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3683c5b152d2ad753607179ed71988e8cfd52964443b4f74fd8e552d0bbfeb46" +checksum = "ab0d43d899f9e508300e587bf582ba54c27a452dd0a9ea294690669138ae14a2" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -890,9 +897,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.3.3" +version = "1.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f5b3a7486f6690ba25952cabf1e7d75e34d69eaff5081904a47bc79074d6457" +checksum = "905cb13a9895626d49cf2ced759b062d913834c7482c38e49557eac4e6193f01" dependencies = [ "base64-simd", "bytes", @@ -916,18 +923,18 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.11" +version = "0.60.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9c34127e8c624bc2999f3b657e749c1393bedc9cd97b92a804db8ced4d2e163" +checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "1.3.9" +version = "1.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2fd329bf0e901ff3f60425691410c69094dc2a1f34b331f37bfc4e9ac1565a1" +checksum = "d79fb68e3d7fe5d4833ea34dc87d2e97d26d3086cb3da660bb6b1f76d98680b6" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -948,12 +955,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - [[package]] name = "base64" version = "0.22.1" @@ -972,9 +973,9 @@ dependencies = [ [[package]] name = "base64ct" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" +checksum = "0e050f626429857a27ddccb31e0aca21356bfa709c04041aefddac081a8f068a" [[package]] name = "bigdecimal" @@ -996,26 +997,6 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" -[[package]] -name = "bindgen" -version = "0.72.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" -dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "itertools 0.13.0", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "syn 2.0.108", -] - [[package]] name = "bitflags" version = "2.10.0" @@ -1099,14 +1080,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "borsh" -version = "1.5.7" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad8646f98db542e39fc66e68a20b2144f6a732636df7c2354e74645faaa433ce" +checksum = "d1da5ab77c1437701eeff7c88d968729e7766172279eab0676857b3d63af7a6f" dependencies = [ "borsh-derive", "cfg_aliases", @@ -1114,15 +1095,15 @@ dependencies = [ [[package]] name = "borsh-derive" -version = "1.5.7" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd1d3c0c2f5833f22386f252fe8ed005c7f59fdcddeef025c01b4c3b9fd9ac3" +checksum = "0686c856aa6aac0c4498f936d7d6a02df690f614c03e4d906d1018062b5c5e2c" dependencies = [ "once_cell", "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1244,9 +1225,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.43" +version = "1.2.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2" +checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" dependencies = [ "find-msvc-tools", "jobserver", @@ -1254,15 +1235,6 @@ dependencies = [ "shlex", ] -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - [[package]] name = "cfg-if" version = "1.0.4" @@ -1309,22 +1281,11 @@ dependencies = [ "inout", ] -[[package]] -name = "clang-sys" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" -dependencies = [ - "glob", - "libc", - "libloading", -] - [[package]] name = "clap" -version = "4.5.50" +version = "4.5.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623" +checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" dependencies = [ "clap_builder", "clap_derive", @@ -1332,9 +1293,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.50" +version = "4.5.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0" +checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" dependencies = [ "anstream", "anstyle", @@ -1351,7 +1312,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1395,12 +1356,11 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.2" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" dependencies = [ - "strum 0.26.3", - "strum_macros 0.26.4", + "unicode-segmentation", "unicode-width 0.2.2", ] @@ -1458,16 +1418,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" -[[package]] -name = "core-foundation" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "core-foundation" version = "0.10.1" @@ -1495,9 +1445,9 @@ dependencies = [ [[package]] name = "crc" -version = "3.3.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" dependencies = [ "crc-catalog", ] @@ -1567,9 +1517,9 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", @@ -1603,7 +1553,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" dependencies = [ "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1637,7 +1587,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1651,7 +1601,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1662,7 +1612,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1673,7 +1623,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core 0.21.3", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1692,12 +1642,11 @@ dependencies = [ [[package]] name = "datafusion" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" +checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" dependencies = [ "arrow", - "arrow-ipc", "arrow-schema", "async-trait", "bytes", @@ -1708,6 +1657,7 @@ dependencies = [ "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", + "datafusion-datasource-arrow", "datafusion-datasource-avro", "datafusion-datasource-csv", "datafusion-datasource-json", @@ -1730,7 +1680,6 @@ dependencies = [ "datafusion-sql", "flate2", "futures", - "hex", "itertools 0.14.0", "log", "object_store", @@ -1738,6 +1687,7 @@ dependencies = [ "parquet", "rand 0.9.2", "regex", + "rstest", "sqlparser", "tempfile", "tokio", @@ -1749,9 +1699,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" +checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" dependencies = [ "arrow", "async-trait", @@ -1764,7 +1714,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", - "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -1775,9 +1724,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" +checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" dependencies = [ "arrow", "async-trait", @@ -1787,10 +1736,11 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-plan", - "datafusion-session", "futures", + "itertools 0.14.0", "log", "object_store", "tokio", @@ -1798,16 +1748,18 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a0b9c821d14e79070f42ea3a6d6618ced04d94277f0a32301918d7a022c250f" +checksum = "fab982df44f818a749cb5200504ccb919f4608cb9808daf8b3fb98aa7955fd1e" dependencies = [ "arrow", "async-trait", "aws-config", "aws-credential-types", + "chrono", "clap", "datafusion", + "datafusion-common", "dirs", "env_logger", "futures", @@ -1824,20 +1776,19 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" +checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" dependencies = [ "ahash 0.8.12", "apache-avro 0.20.0", "arrow", "arrow-ipc", - "base64 0.22.1", "chrono", "half", "hashbrown 0.14.5", "hex", - "indexmap 2.12.0", + "indexmap 2.12.1", "libc", "log", "object_store", @@ -1851,9 +1802,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" +checksum = "b92065bbc6532c6651e2f7dd30b55cba0c7a14f860c7e1d15f165c41a1868d95" dependencies = [ "futures", "log", @@ -1862,9 +1813,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" +checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" dependencies = [ "arrow", "async-compression", @@ -1887,9 +1838,7 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "parquet", "rand 0.9.2", - "tempfile", "tokio", "tokio-util", "url", @@ -1897,47 +1846,64 @@ dependencies = [ "zstd", ] +[[package]] +name = "datafusion-datasource-arrow" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" +dependencies = [ + "arrow", + "arrow-ipc", + "async-trait", + "bytes", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", + "object_store", + "tokio", +] + [[package]] name = "datafusion-datasource-avro" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10d40b6953ebc9099b37adfd12fde97eb73ff0cee44355c6dea64b8a4537d561" +checksum = "388ed8be535f562cc655b9c3d22edbfb0f1a50a25c242647a98b6d92a75b55a1" dependencies = [ "apache-avro 0.20.0", "arrow", "async-trait", "bytes", - "chrono", - "datafusion-catalog", "datafusion-common", "datafusion-datasource", - "datafusion-execution", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "num-traits", "object_store", - "tokio", ] [[package]] name = "datafusion-datasource-csv" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" +checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", @@ -1949,74 +1915,67 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" +checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "object_store", - "serde_json", "tokio", ] [[package]] name = "datafusion-datasource-parquet" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" +checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", - "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-pruning", "datafusion-session", "futures", - "hex", "itertools 0.14.0", "log", "object_store", "parking_lot", "parquet", - "rand 0.9.2", "tokio", ] [[package]] name = "datafusion-doc" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" +checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" [[package]] name = "datafusion-execution" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" +checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" dependencies = [ "arrow", "async-trait", @@ -2035,9 +1994,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" +checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" dependencies = [ "arrow", "async-trait", @@ -2048,7 +2007,8 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.12.0", + "indexmap 2.12.1", + "itertools 0.14.0", "paste", "recursive", "serde_json", @@ -2057,26 +2017,26 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" +checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" dependencies = [ "arrow", "datafusion-common", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-functions" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" +checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" dependencies = [ "arrow", "arrow-buffer", - "base64 0.22.1", + "base64", "blake2", "blake3", "chrono", @@ -2090,6 +2050,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "num-traits", "rand 0.9.2", "regex", "sha2", @@ -2099,9 +2060,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" +checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" dependencies = [ "ahash 0.8.12", "arrow", @@ -2120,9 +2081,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" +checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" dependencies = [ "ahash 0.8.12", "arrow", @@ -2133,9 +2094,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" +checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" dependencies = [ "arrow", "arrow-ord", @@ -2143,6 +2104,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", @@ -2155,9 +2117,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" +checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" dependencies = [ "arrow", "async-trait", @@ -2171,9 +2133,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" +checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" dependencies = [ "arrow", "datafusion-common", @@ -2189,9 +2151,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" +checksum = "53ae9bcc39800820d53a22d758b3b8726ff84a5a3e24cecef04ef4e5fdf1c7cc" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2199,20 +2161,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" +checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ - "datafusion-expr", + "datafusion-doc", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "datafusion-optimizer" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" +checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" dependencies = [ "arrow", "chrono", @@ -2220,7 +2182,7 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.14.0", "log", "recursive", @@ -2230,9 +2192,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" +checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" dependencies = [ "ahash 0.8.12", "arrow", @@ -2243,9 +2205,8 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.14.0", - "log", "parking_lot", "paste", "petgraph 0.8.3", @@ -2253,9 +2214,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" +checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" dependencies = [ "arrow", "datafusion-common", @@ -2268,9 +2229,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" +checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" dependencies = [ "ahash 0.8.12", "arrow", @@ -2282,9 +2243,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" +checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" dependencies = [ "arrow", "datafusion-common", @@ -2296,15 +2257,14 @@ dependencies = [ "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", - "log", "recursive", ] [[package]] name = "datafusion-physical-plan" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" +checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" dependencies = [ "ahash 0.8.12", "arrow", @@ -2323,7 +2283,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.14.0", "log", "parking_lot", @@ -2333,12 +2293,11 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" +checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", @@ -2351,35 +2310,26 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" +checksum = "8f96eebd17555386f459037c65ab73aae8df09f464524c709d6a3134ad4f4776" dependencies = [ - "arrow", "async-trait", - "dashmap", "datafusion-common", - "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", "parking_lot", - "tokio", ] [[package]] name = "datafusion-spark" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613efb6666a7d42fcb922b90cd0daa2b25ea486d141350e5d3e86e46df28309a" +checksum = "97a8d6fed24c80dd403dcc6afec33766a599d1b72575f222237f01429b2e58ba" dependencies = [ "arrow", + "bigdecimal", "chrono", "crc32fast", "datafusion-catalog", @@ -2387,24 +2337,24 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-functions", - "datafusion-macros", "log", + "rand 0.9.2", "sha1", "url", - "xxhash-rust", ] [[package]] name = "datafusion-sql" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" +checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" dependencies = [ "arrow", "bigdecimal", + "chrono", "datafusion-common", "datafusion-expr", - "indexmap 2.12.0", + "indexmap 2.12.1", "log", "recursive", "regex", @@ -2413,9 +2363,9 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17598193dd875ca895400c51ccab1c30fceb1855220dc60aa415a4db7c95a2d7" +checksum = "a6830e357705e0d54fda6e3ce70a87c2b255197563c6463d668520cbfc1e0b7c" dependencies = [ "arrow", "async-trait", @@ -2440,14 +2390,15 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaa011a3814d91a03ab655ad41bbe5e57b203b2859281af8fe2c30aebbbcc5d9" +checksum = "2505af06d103a55b4e8ded0c6aeb6c72a771948da939c0bd3f8eee67af475a9c" dependencies = [ "async-recursion", "async-trait", "chrono", "datafusion", + "half", "itertools 0.14.0", "object_store", "pbjson-types", @@ -2497,7 +2448,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2507,7 +2458,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2546,7 +2497,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2557,7 +2508,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2608,7 +2559,7 @@ dependencies = [ "enum-ordinalize", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2649,7 +2600,7 @@ checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2688,7 +2639,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -2782,9 +2733,9 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" [[package]] name = "fixedbitset" @@ -2836,6 +2787,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -2939,7 +2896,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2954,6 +2911,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -2974,9 +2937,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.9" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -3039,7 +3002,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.12.0", + "indexmap 2.12.1", "slab", "tokio", "tokio-util", @@ -3058,7 +3021,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.4.0", - "indexmap 2.12.0", + "indexmap 2.12.1", "slab", "tokio", "tokio-util", @@ -3104,14 +3067,19 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.1.5", ] [[package]] name = "hashbrown" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "hashlink" @@ -3128,12 +3096,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" - [[package]] name = "hex" version = "0.4.3" @@ -3278,9 +3240,9 @@ dependencies = [ [[package]] name = "hyper" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" dependencies = [ "atomic-waker", "bytes", @@ -3310,7 +3272,6 @@ dependencies = [ "hyper 0.14.32", "log", "rustls 0.21.12", - "rustls-native-certs 0.6.3", "tokio", "tokio-rustls 0.24.1", ] @@ -3322,31 +3283,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ "http 1.4.0", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-util", - "rustls 0.23.34", - "rustls-native-certs 0.8.2", + "rustls 0.23.35", + "rustls-native-certs", "rustls-pki-types", "tokio", "tokio-rustls 0.26.4", "tower-service", - "webpki-roots 1.0.3", + "webpki-roots 1.0.4", ] [[package]] name = "hyper-util" -version = "0.1.17" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" +checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "futures-channel", "futures-core", "futures-util", "http 1.4.0", "http-body 1.0.1", - "hyper 1.7.0", + "hyper 1.8.1", "ipnet", "libc", "percent-encoding", @@ -3399,7 +3360,7 @@ dependencies = [ "as-any", "async-trait", "backon", - "base64 0.22.1", + "base64", "bimap", "bytes", "chrono", @@ -3433,9 +3394,8 @@ dependencies = [ "serde_json", "serde_repr", "serde_with", - "strum 0.27.2", + "strum", "tempfile", - "thrift", "tokio", "typed-builder", "url", @@ -3554,7 +3514,7 @@ dependencies = [ "itertools 0.13.0", "regex", "sqlx", - "strum 0.27.2", + "strum", "tempfile", "tokio", ] @@ -3777,21 +3737,21 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "serde", "serde_core", ] [[package]] name = "indicatif" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade6dfcba0dfb62ad59e59e7241ec8912af34fd29e0e743e3db992bd278e8b65" +checksum = "9375e112e4b463ec1b1c6c011953545c65a30164fbab5b581df32b3abf0dcb88" dependencies = [ "console", "portable-atomic", @@ -3834,9 +3794,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" dependencies = [ "memchr", "serde", @@ -3874,28 +3834,28 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jiff" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" dependencies = [ "jiff-static", "jiff-tzdb-platform", "log", "portable-atomic", "portable-atomic-util", - "serde", - "windows-sys 0.59.0", + "serde_core", + "windows-sys 0.61.2", ] [[package]] name = "jiff-static" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -3925,9 +3885,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" dependencies = [ "once_cell", "wasm-bindgen", @@ -3939,7 +3899,7 @@ version = "9.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" dependencies = [ - "base64 0.22.1", + "base64", "js-sys", "pem", "ring", @@ -4022,19 +3982,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.177" +version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" - -[[package]] -name = "libloading" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" -dependencies = [ - "cfg-if", - "windows-link", -] +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" [[package]] name = "libm" @@ -4088,9 +4038,9 @@ dependencies = [ [[package]] name = "libz-rs-sys" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "840db8cf39d9ec4dd794376f38acc40d0fc65eec2a8f484f7fd375b84602becd" +checksum = "8b484ba8d4f775eeca644c452a56650e544bf7e617f1d170fe7298122ead5222" dependencies = [ "zlib-rs", ] @@ -4135,9 +4085,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "lru-slab" @@ -4147,9 +4097,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" -version = "0.11.5" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" dependencies = [ "twox-hash", ] @@ -4221,12 +4171,6 @@ dependencies = [ "serde", ] -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "miniz_oxide" version = "0.8.9" @@ -4239,9 +4183,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", "wasi", @@ -4271,25 +4215,26 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "mockito" -version = "1.7.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7760e0e418d9b7e5777c0374009ca4c93861b9066f18cb334a20ce50ab63aa48" +checksum = "7e0603425789b4a70fcc4ac4f5a46a566c116ee3e2a6b768dc623f7719c611de" dependencies = [ "assert-json-diff", "bytes", "colored", - "futures-util", + "futures-core", "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-util", "log", + "pin-project-lite", "rand 0.9.2", "regex", "serde_json", @@ -4339,7 +4284,7 @@ checksum = "b40e46c845ac234bcba19db7ab252bc2778cbadd516a466d2f12b1580852d136" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -4365,7 +4310,7 @@ checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -4414,37 +4359,13 @@ dependencies = [ "libc", ] -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "nu-ansi-term" version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", -] - -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", + "windows-sys 0.61.2", ] [[package]] @@ -4460,11 +4381,10 @@ dependencies = [ [[package]] name = "num-bigint-dig" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" dependencies = [ - "byteorder", "lazy_static", "libm", "num-integer", @@ -4510,17 +4430,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -4531,16 +4440,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "num_enum" version = "0.7.5" @@ -4560,7 +4459,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -4579,7 +4478,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" dependencies = [ "async-trait", - "base64 0.22.1", + "base64", "bytes", "chrono", "form_urlencoded", @@ -4587,16 +4486,16 @@ dependencies = [ "http 1.4.0", "http-body-util", "humantime", - "hyper 1.7.0", + "hyper 1.8.1", "itertools 0.14.0", "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.38.3", + "quick-xml 0.38.4", "rand 0.9.2", "reqwest", "ring", - "rustls-pemfile 2.2.0", + "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", @@ -4629,7 +4528,7 @@ checksum = "d075ab8a203a6ab4bc1bce0a4b9fe486a72bf8b939037f4b78d95386384bc80a" dependencies = [ "anyhow", "backon", - "base64 0.22.1", + "base64", "bytes", "crc32c", "futures", @@ -4640,7 +4539,7 @@ dependencies = [ "log", "md-5", "percent-encoding", - "quick-xml 0.38.3", + "quick-xml 0.38.4", "reqsign", "reqwest", "serde", @@ -4735,9 +4634,9 @@ dependencies = [ [[package]] name = "parquet" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" +checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -4747,17 +4646,18 @@ dependencies = [ "arrow-ipc", "arrow-schema", "arrow-select", - "base64 0.22.1", + "base64", "brotli", "bytes", "chrono", "flate2", "futures", "half", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "lz4_flex", - "num", "num-bigint", + "num-integer", + "num-traits", "object_store", "paste", "ring", @@ -4778,31 +4678,31 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "pbjson" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7e6349fa080353f4a597daffd05cb81572a9c031a6d4fff7e504947496fcc68" +checksum = "898bac3fa00d0ba57a4e8289837e965baa2dee8c3749f3b11d45a64b4223d9c3" dependencies = [ - "base64 0.21.7", + "base64", "serde", ] [[package]] name = "pbjson-build" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" +checksum = "af22d08a625a2213a78dbb0ffa253318c5c79ce3133d32d296655a7bdfb02095" dependencies = [ "heck", - "itertools 0.13.0", + "itertools 0.14.0", "prost", "prost-types", ] [[package]] name = "pbjson-types" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e54e5e7bfb1652f95bc361d76f3c780d8e526b134b85417e774166ee941f0887" +checksum = "8e748e28374f10a330ee3bb9f29b828c0ac79831a32bab65015ad9b661ead526" dependencies = [ "bytes", "chrono", @@ -4829,7 +4729,7 @@ version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" dependencies = [ - "base64 0.22.1", + "base64", "serde_core", ] @@ -4855,7 +4755,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", - "indexmap 2.12.0", + "indexmap 2.12.1", ] [[package]] @@ -4866,7 +4766,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.12.0", + "indexmap 2.12.1", "serde", ] @@ -4927,7 +4827,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5013,7 +4913,7 @@ version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbef655056b916eb868048276cfd5d6a7dea4f81560dfd047f97c8c6fe3fcfd4" dependencies = [ - "base64 0.22.1", + "base64", "byteorder", "bytes", "fallible-iterator", @@ -5103,7 +5003,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5112,7 +5012,7 @@ version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" dependencies = [ - "toml_edit 0.23.7", + "toml_edit 0.23.9", ] [[package]] @@ -5126,9 +5026,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" dependencies = [ "bytes", "prost-derive", @@ -5136,9 +5036,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" +checksum = "ac6c3320f9abac597dcbc668774ef006702672474aad53c6d596b62e487b40b1" dependencies = [ "heck", "itertools 0.14.0", @@ -5150,28 +5050,28 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.108", + "syn 2.0.111", "tempfile", ] [[package]] name = "prost-derive" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" dependencies = [ "anyhow", "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "prost-types" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" dependencies = [ "prost", ] @@ -5223,7 +5123,7 @@ checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5244,9 +5144,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.38.3" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89" +checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" dependencies = [ "memchr", "serde", @@ -5264,7 +5164,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.34", + "rustls 0.23.35", "socket2 0.6.1", "thiserror 2.0.17", "tokio", @@ -5284,7 +5184,7 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash", - "rustls 0.23.34", + "rustls 0.23.35", "rustls-pki-types", "slab", "thiserror 2.0.17", @@ -5309,9 +5209,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.41" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] @@ -5425,7 +5325,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5465,7 +5365,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5505,14 +5405,20 @@ checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "regress" -version = "0.10.4" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145bb27393fe455dd64d6cbc8d059adfa392590a45eadf079c01b11857e7b010" +checksum = "2057b2325e68a893284d1538021ab90279adac1139957ca2a74426c6f118fb48" dependencies = [ - "hashbrown 0.15.5", + "hashbrown 0.16.1", "memchr", ] +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + [[package]] name = "rend" version = "0.4.2" @@ -5536,7 +5442,7 @@ checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" dependencies = [ "anyhow", "async-trait", - "base64 0.22.1", + "base64", "chrono", "form_urlencoded", "getrandom 0.2.16", @@ -5562,11 +5468,11 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.24" +version = "0.12.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" +checksum = "b6eff9328d40131d43bd911d42d79eb6a47312002a4daefc9e37f17e74a7701a" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "futures-core", "futures-util", @@ -5574,7 +5480,7 @@ dependencies = [ "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-rustls 0.27.7", "hyper-util", "js-sys", @@ -5582,8 +5488,8 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.34", - "rustls-native-certs 0.8.2", + "rustls 0.23.35", + "rustls-native-certs", "rustls-pki-types", "serde", "serde_json", @@ -5600,7 +5506,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.3", + "webpki-roots 1.0.4", ] [[package]] @@ -5643,7 +5549,7 @@ checksum = "35a640b26f007713818e9a9b65d34da1cf58538207b052916a83d80e43f3ffa4" dependencies = [ "bytes", "hashbrown 0.15.5", - "indexmap 2.12.0", + "indexmap 2.12.1", "munge", "ptr_meta 0.3.1", "rancor", @@ -5672,7 +5578,7 @@ checksum = "bd83f5f173ff41e00337d97f6572e416d022ef8a19f371817259ae960324c482" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5687,9 +5593,9 @@ dependencies = [ [[package]] name = "rsa" -version = "0.9.8" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" +checksum = "40a0376c50d0358279d9d643e4bf7b7be212f1f4ff1da9070a7b54d22ef75c88" dependencies = [ "const-oid", "digest", @@ -5706,6 +5612,35 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rstest" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49" +dependencies = [ + "futures-timer", + "futures-util", + "rstest_macros", +] + +[[package]] +name = "rstest_macros" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0" +dependencies = [ + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn 2.0.111", + "unicode-ident", +] + [[package]] name = "rust-ini" version = "0.21.3" @@ -5761,7 +5696,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -5778,9 +5713,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.34" +version = "0.23.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9586e9ee2b4f8fab52a0048ca7334d7024eef48e2cb9407e3497bb7cab7fa7" +checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ "aws-lc-rs", "once_cell", @@ -5791,18 +5726,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "rustls-native-certs" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" -dependencies = [ - "openssl-probe", - "rustls-pemfile 1.0.4", - "schannel", - "security-framework 2.11.1", -] - [[package]] name = "rustls-native-certs" version = "0.8.2" @@ -5812,16 +5735,7 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework 3.5.1", -] - -[[package]] -name = "rustls-pemfile" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" -dependencies = [ - "base64 0.21.7", + "security-framework", ] [[package]] @@ -5835,9 +5749,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94182ad936a0c91c324cd46c6511b9510ed16af436d7b5bab34beab0afd55f7a" +checksum = "708c0f9d5f54ba0272468c1d306a52c495b31fa155e91bc25371e6df7996908c" dependencies = [ "web-time", "zeroize", @@ -5952,9 +5866,9 @@ dependencies = [ [[package]] name = "schemars" -version = "1.0.4" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" +checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289" dependencies = [ "dyn-clone", "ref-cast", @@ -5971,7 +5885,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6007,19 +5921,6 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" -[[package]] -name = "security-framework" -version = "2.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" -dependencies = [ - "bitflags", - "core-foundation 0.9.4", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - [[package]] name = "security-framework" version = "3.5.1" @@ -6027,7 +5928,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ "bitflags", - "core-foundation 0.10.1", + "core-foundation", "core-foundation-sys", "libc", "security-framework-sys", @@ -6096,7 +5997,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6107,7 +6008,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6131,7 +6032,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6152,7 +6053,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6169,17 +6070,17 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.16.0" +version = "3.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10574371d41b0d9b2cff89418eda27da52bcaff2cc8741db26382a77c29131f1" +checksum = "4fa237f2807440d238e0364a218270b98f767a00d3dada77b1c53ae88940e2e7" dependencies = [ - "base64 0.22.1", + "base64", "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.12.0", + "indexmap 2.12.1", "schemars 0.9.0", - "schemars 1.0.4", + "schemars 1.1.0", "serde_core", "serde_json", "serde_with_macros", @@ -6188,14 +6089,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.16.0" +version = "3.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08a72d8216842fdd57820dc78d840bef99248e35fb2554ff923319e60f2d686b" +checksum = "52a8e3ca0ca629121f70ab50f95249e5a6f925cc0f6ffe8256c45b728875706c" dependencies = [ "darling 0.21.3", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6204,7 +6105,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "itoa", "ryu", "serde", @@ -6250,9 +6151,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.6" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" dependencies = [ "libc", ] @@ -6269,9 +6170,9 @@ dependencies = [ [[package]] name = "simd-adler32" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] name = "simdutf8" @@ -6375,9 +6276,9 @@ dependencies = [ [[package]] name = "sonic-simd" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b421f7b6aa4a5de8f685aaf398dfaa828346ee639d2b1c1061ab43d40baa6223" +checksum = "5707edbfb34a40c9f2a55fa09a49101d9fec4e0cc171ce386086bd9616f34257" dependencies = [ "cfg-if", ] @@ -6428,9 +6329,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.58.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", "recursive", @@ -6445,7 +6346,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6467,7 +6368,7 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "crc", "crossbeam-queue", @@ -6479,12 +6380,12 @@ dependencies = [ "futures-util", "hashbrown 0.15.5", "hashlink", - "indexmap 2.12.0", + "indexmap 2.12.1", "log", "memchr", "once_cell", "percent-encoding", - "rustls 0.23.34", + "rustls 0.23.35", "serde", "serde_json", "sha2", @@ -6507,7 +6408,7 @@ dependencies = [ "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6528,7 +6429,7 @@ dependencies = [ "sha2", "sqlx-core", "sqlx-sqlite", - "syn 2.0.108", + "syn 2.0.111", "tokio", "url", ] @@ -6540,7 +6441,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" dependencies = [ "atoi", - "base64 0.22.1", + "base64", "bitflags", "byteorder", "bytes", @@ -6581,7 +6482,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" dependencies = [ "atoi", - "base64 0.22.1", + "base64", "bitflags", "byteorder", "crc", @@ -6671,32 +6572,13 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" - [[package]] name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" dependencies = [ - "strum_macros 0.27.2", -] - -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.108", + "strum_macros", ] [[package]] @@ -6708,7 +6590,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6723,9 +6605,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.58.0" +version = "0.62.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de6d24c270c6c672a86c183c3a8439ba46c1936f93cf7296aa692de3b0ff0228" +checksum = "21f1cb6d0bcd097a39fc25f7236236be29881fe122e282e4173d6d007a929927" dependencies = [ "heck", "pbjson", @@ -6741,7 +6623,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.108", + "syn 2.0.111", "typify", "walkdir", ] @@ -6765,9 +6647,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.108" +version = "2.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" dependencies = [ "proc-macro2", "quote", @@ -6791,7 +6673,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6816,7 +6698,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -6851,7 +6733,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6862,7 +6744,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6874,15 +6756,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "threadpool" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" -dependencies = [ - "num_cpus", -] - [[package]] name = "thrift" version = "0.17.0" @@ -6891,9 +6764,7 @@ checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" dependencies = [ "byteorder", "integer-encoding 3.0.4", - "log", "ordered-float 2.10.1", - "threadpool", ] [[package]] @@ -6986,7 +6857,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7005,7 +6876,7 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls 0.23.34", + "rustls 0.23.35", "tokio", ] @@ -7022,9 +6893,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.16" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" dependencies = [ "bytes", "futures-core", @@ -7069,7 +6940,7 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "serde", "serde_spanned", "toml_datetime 0.6.11", @@ -7079,11 +6950,11 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.23.7" +version = "0.23.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +checksum = "5d7cbc3b4b49633d57a0509303158ca50de80ae32c265093b24c414705807832" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "toml_datetime 0.7.3", "toml_parser", "winnow", @@ -7121,9 +6992,9 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ "bitflags", "bytes", @@ -7151,9 +7022,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" dependencies = [ "log", "pin-project-lite", @@ -7163,20 +7034,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" dependencies = [ "once_cell", "valuable", @@ -7195,9 +7066,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.20" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" dependencies = [ "nu-ansi-term", "sharded-slab", @@ -7236,7 +7107,7 @@ checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7247,9 +7118,9 @@ checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "typify" -version = "0.4.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7144144e97e987c94758a3017c920a027feac0799df325d6df4fc8f08d02068e" +checksum = "e6d5bcc6f62eb1fa8aa4098f39b29f93dcb914e17158b76c50360911257aa629" dependencies = [ "typify-impl", "typify-macro", @@ -7257,9 +7128,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.4.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "062879d46aa4c9dfe0d33b035bbaf512da192131645d05deacb7033ec8581a09" +checksum = "a1eb359f7ffa4f9ebe947fa11a1b2da054564502968db5f317b7e37693cb2240" dependencies = [ "heck", "log", @@ -7270,16 +7141,16 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.108", + "syn 2.0.111", "thiserror 2.0.17", "unicode-ident", ] [[package]] name = "typify-macro" -version = "0.4.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9708a3ceb6660ba3f8d2b8f0567e7d4b8b198e2b94d093b8a6077a751425de9e" +checksum = "911c32f3c8514b048c1b228361bebb5e6d73aeec01696e8cc0e82e2ffef8ab7a" dependencies = [ "proc-macro2", "quote", @@ -7288,7 +7159,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.108", + "syn 2.0.111", "typify-impl", ] @@ -7300,24 +7171,24 @@ checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] name = "unicode-ident" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "unicode-normalization" -version = "0.1.24" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" dependencies = [ "tinyvec", ] [[package]] name = "unicode-properties" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" [[package]] name = "unicode-segmentation" @@ -7339,9 +7210,9 @@ checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "unit-prefix" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "323402cff2dd658f39ca17c789b502021b3f18707c91cdf22e3838e1b4023817" +checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" [[package]] name = "unsafe-libyaml" @@ -7519,9 +7390,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ "cfg-if", "once_cell", @@ -7532,9 +7403,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.55" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", @@ -7545,9 +7416,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -7555,22 +7426,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" dependencies = [ "unicode-ident", ] @@ -7590,9 +7461,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", @@ -7614,14 +7485,14 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.3", + "webpki-roots 1.0.4", ] [[package]] name = "webpki-roots" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b130c0d2d49f8b6889abc456e795e82525204f27c42cf767cf0d7734e089b8" +checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" dependencies = [ "rustls-pki-types", ] @@ -7642,7 +7513,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -7666,7 +7537,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7677,7 +7548,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7937,9 +7808,9 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.13" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" dependencies = [ "memchr", ] @@ -7971,12 +7842,6 @@ version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" -[[package]] -name = "xxhash-rust" -version = "0.8.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" - [[package]] name = "xz2" version = "0.1.7" @@ -8011,28 +7876,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.27" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.27" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -8052,7 +7917,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", "synstructure", ] @@ -8092,14 +7957,14 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "zlib-rs" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f06ae92f42f5e5c42443fd094f245eb656abf56dd7cce9b8b263236565e00f2" +checksum = "36134c44663532e6519d7a6dfdbbe06f6f8192bde8ae9ed076e9b213f0e31df7" [[package]] name = "zstd" diff --git a/Cargo.toml b/Cargo.toml index 9904820dea..ded3aedecb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,14 +42,14 @@ rust-version = "1.88" anyhow = "1.0.72" apache-avro = { version = "0.21", features = ["zstandard"] } array-init = "2" -arrow-arith = "56.2" -arrow-array = "56.2" -arrow-buffer = "56.2" -arrow-cast = "56.2" -arrow-ord = "56.2" -arrow-schema = "56.2" -arrow-select = "56.2" -arrow-string = "56.2" +arrow-arith = "57.0" +arrow-array = "57.0" +arrow-buffer = "57.0" +arrow-cast = "57.0" +arrow-ord = "57.0" +arrow-schema = "57.0" +arrow-select = "57.0" +arrow-string = "57.0" as-any = "0.3.2" async-trait = "0.1.89" aws-config = "1.8.7" @@ -62,9 +62,9 @@ bytes = "1.10" chrono = "0.4.41" clap = { version = "4.5.48", features = ["derive", "cargo"] } ctor = "0.2.8" -datafusion = "50" -datafusion-cli = "50" -datafusion-sqllogictest = "50" +datafusion = "51.0" +datafusion-cli = "51.0" +datafusion-sqllogictest = "51.0" derive_builder = "0.20" dirs = "6" enum-ordinalize = "4.3.0" @@ -101,7 +101,7 @@ num-bigint = "0.4.6" once_cell = "1.20" opendal = "0.55.0" ordered-float = "4" -parquet = "56.2" +parquet = "57.0" pilota = "0.11.10" port_scanner = "0.1.5" pretty_assertions = "1.4" diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 814c9afb35..a7244cc8dd 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -180,9 +180,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +checksum = "4df8bb5b0bd64c0b9bc61317fcc480bad0f00e56d3bc32c69a4c8dada4786bae" dependencies = [ "arrow-arith", "arrow-array", @@ -202,23 +202,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +checksum = "a1a640186d3bd30a24cb42264c2dafb30e236a6f50d510e56d40b708c9582491" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num", + "num-traits", ] [[package]] name = "arrow-array" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +checksum = "219fe420e6800979744c8393b687afb0252b3f8a89b91027d27887b72aa36d31" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -228,25 +228,28 @@ dependencies = [ "chrono-tz", "half", "hashbrown 0.16.0", - "num", + "num-complex", + "num-integer", + "num-traits", ] [[package]] name = "arrow-buffer" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +checksum = "76885a2697a7edf6b59577f568b456afc94ce0e2edc15b784ce3685b6c3c5c27" dependencies = [ "bytes", "half", - "num", + "num-bigint", + "num-traits", ] [[package]] name = "arrow-cast" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +checksum = "9c9ebb4c987e6b3b236fb4a14b20b34835abfdd80acead3ccf1f9bf399e1f168" dependencies = [ "arrow-array", "arrow-buffer", @@ -259,15 +262,15 @@ dependencies = [ "comfy-table", "half", "lexical-core", - "num", + "num-traits", "ryu", ] [[package]] name = "arrow-csv" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" +checksum = "92386159c8d4bce96f8bd396b0642a0d544d471bdc2ef34d631aec80db40a09c" dependencies = [ "arrow-array", "arrow-cast", @@ -280,21 +283,22 @@ dependencies = [ [[package]] name = "arrow-data" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +checksum = "727681b95de313b600eddc2a37e736dcb21980a40f640314dcf360e2f36bc89b" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num", + "num-integer", + "num-traits", ] [[package]] name = "arrow-ipc" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +checksum = "da9ba92e3de170295c98a84e5af22e2b037f0c7b32449445e6c493b5fca27f27" dependencies = [ "arrow-array", "arrow-buffer", @@ -308,9 +312,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" +checksum = "b969b4a421ae83828591c6bf5450bd52e6d489584142845ad6a861f42fe35df8" dependencies = [ "arrow-array", "arrow-buffer", @@ -320,19 +324,21 @@ dependencies = [ "chrono", "half", "indexmap 2.12.0", + "itoa", "lexical-core", "memchr", - "num", - "serde", + "num-traits", + "ryu", + "serde_core", "serde_json", "simdutf8", ] [[package]] name = "arrow-ord" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +checksum = "141c05298b21d03e88062317a1f1a73f5ba7b6eb041b350015b1cd6aabc0519b" dependencies = [ "arrow-array", "arrow-buffer", @@ -343,9 +349,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d924b32e96f8bb74d94cd82bd97b313c432fcb0ea331689ef9e7c6b8be4b258" +checksum = "cfcfb2be2e9096236f449c11f425cddde18c4cc540f516d90f066f10a29ed515" dependencies = [ "arrow-array", "arrow-data", @@ -355,9 +361,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +checksum = "c5f3c06a6abad6164508ed283c7a02151515cef3de4b4ff2cebbcaeb85533db2" dependencies = [ "arrow-array", "arrow-buffer", @@ -368,34 +374,34 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5" dependencies = [ "bitflags", - "serde", + "serde_core", "serde_json", ] [[package]] name = "arrow-select" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +checksum = "bafa595babaad59f2455f4957d0f26448fb472722c186739f4fac0823a1bdb47" dependencies = [ "ahash 0.8.12", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num", + "num-traits", ] [[package]] name = "arrow-string" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +checksum = "32f46457dbbb99f2650ff3ac23e46a929e0ab81db809b02aa5511c258348bef2" dependencies = [ "arrow-array", "arrow-buffer", @@ -403,7 +409,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num", + "num-traits", "regex", "regex-syntax", ] @@ -1029,11 +1035,9 @@ dependencies = [ [[package]] name = "datafusion" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", - "arrow-ipc", "arrow-schema", "async-trait", "bytes", @@ -1044,6 +1048,7 @@ dependencies = [ "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", + "datafusion-datasource-arrow", "datafusion-datasource-csv", "datafusion-datasource-json", "datafusion-datasource-parquet", @@ -1072,6 +1077,7 @@ dependencies = [ "parquet", "rand 0.9.2", "regex", + "rstest", "sqlparser", "tempfile", "tokio", @@ -1084,8 +1090,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", @@ -1098,7 +1103,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", - "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -1110,8 +1114,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", @@ -1121,10 +1124,11 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-plan", - "datafusion-session", "futures", + "itertools 0.14.0", "log", "object_store", "tokio", @@ -1133,13 +1137,11 @@ dependencies = [ [[package]] name = "datafusion-common" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "ahash 0.8.12", "arrow", "arrow-ipc", - "base64", "chrono", "half", "hashbrown 0.14.5", @@ -1158,8 +1160,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "futures", "log", @@ -1169,8 +1170,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-compression", @@ -1193,9 +1193,7 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "parquet", "rand 0.9.2", - "tempfile", "tokio", "tokio-util", "url", @@ -1203,22 +1201,42 @@ dependencies = [ "zstd", ] +[[package]] +name = "datafusion-datasource-arrow" +version = "50.3.0" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +dependencies = [ + "arrow", + "arrow-ipc", + "async-trait", + "bytes", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", + "object_store", + "tokio", +] + [[package]] name = "datafusion-datasource-csv" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", @@ -1231,48 +1249,41 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "object_store", - "serde_json", "tokio", ] [[package]] name = "datafusion-datasource-parquet" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", - "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-pruning", "datafusion-session", @@ -1282,21 +1293,18 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand 0.9.2", "tokio", ] [[package]] name = "datafusion-doc" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" [[package]] name = "datafusion-execution" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", @@ -1315,8 +1323,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", @@ -1328,6 +1335,7 @@ dependencies = [ "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap 2.12.0", + "itertools 0.14.0", "paste", "recursive", "serde_json", @@ -1337,8 +1345,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "datafusion-common", @@ -1350,8 +1357,7 @@ dependencies = [ [[package]] name = "datafusion-ffi" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ddb7c4e645df080c27dad13a198d191da328dd1c98e198664a7a0f64b335cc" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "abi_stable", "arrow", @@ -1359,6 +1365,7 @@ dependencies = [ "async-ffi", "async-trait", "datafusion", + "datafusion-common", "datafusion-functions-aggregate-common", "datafusion-proto", "datafusion-proto-common", @@ -1372,8 +1379,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "arrow-buffer", @@ -1391,6 +1397,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "num-traits", "rand 0.9.2", "regex", "sha2", @@ -1401,8 +1408,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "ahash 0.8.12", "arrow", @@ -1422,8 +1428,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "ahash 0.8.12", "arrow", @@ -1435,8 +1440,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "arrow-ord", @@ -1444,6 +1448,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", @@ -1457,8 +1462,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "async-trait", @@ -1473,8 +1477,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "datafusion-common", @@ -1491,8 +1494,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1501,10 +1503,9 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ - "datafusion-expr", + "datafusion-doc", "quote", "syn 2.0.108", ] @@ -1512,8 +1513,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "chrono", @@ -1532,8 +1532,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "ahash 0.8.12", "arrow", @@ -1546,7 +1545,6 @@ dependencies = [ "hashbrown 0.14.5", "indexmap 2.12.0", "itertools 0.14.0", - "log", "parking_lot", "paste", "petgraph", @@ -1555,8 +1553,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "datafusion-common", @@ -1570,8 +1567,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "ahash 0.8.12", "arrow", @@ -1584,8 +1580,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "datafusion-common", @@ -1597,15 +1592,13 @@ dependencies = [ "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", - "log", "recursive", ] [[package]] name = "datafusion-physical-plan" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "ahash 0.8.12", "arrow", @@ -1635,14 +1628,24 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7df9f606892e6af45763d94d210634eec69b9bb6ced5353381682ff090028a3" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "chrono", - "datafusion", + "datafusion-catalog", + "datafusion-catalog-listing", "datafusion-common", + "datafusion-datasource", + "datafusion-datasource-arrow", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", + "datafusion-execution", "datafusion-expr", + "datafusion-functions-table", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", "datafusion-proto-common", "object_store", "prost", @@ -1651,8 +1654,7 @@ dependencies = [ [[package]] name = "datafusion-proto-common" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4b14f288ca4ef77743d9672cafecf3adfffff0b9b04af9af79ecbeaaf736901" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "datafusion-common", @@ -1662,11 +1664,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", @@ -1680,35 +1680,24 @@ dependencies = [ [[package]] name = "datafusion-session" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ - "arrow", "async-trait", - "dashmap", "datafusion-common", - "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", "parking_lot", - "tokio", ] [[package]] name = "datafusion-sql" version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" +source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" dependencies = [ "arrow", "bigdecimal", + "chrono", "datafusion-common", "datafusion-expr", "indexmap 2.12.0", @@ -1993,6 +1982,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -2784,20 +2779,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b" -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - [[package]] name = "num-bigint" version = "0.4.6" @@ -2833,28 +2814,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -3001,9 +2960,9 @@ dependencies = [ [[package]] name = "parquet" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" +checksum = "7a0f31027ef1af7549f7cec603a9a21dce706d3f8d7c2060a68f43c1773be95a" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -3022,11 +2981,11 @@ dependencies = [ "half", "hashbrown 0.16.0", "lz4_flex", - "num", "num-bigint", + "num-integer", + "num-traits", "object_store", "paste", - "ring", "seq-macro", "simdutf8", "snap", @@ -3156,9 +3115,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" dependencies = [ "bytes", "prost-derive", @@ -3166,9 +3125,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" dependencies = [ "anyhow", "itertools 0.14.0", @@ -3221,9 +3180,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a" +checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" dependencies = [ "indoc", "libc", @@ -3238,19 +3197,18 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "458eb0c55e7ece017adeba38f2248ff3ac615e53660d7c71a238d7d2a01c7598" +checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" dependencies = [ - "once_cell", "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7114fe5457c61b276ab77c5055f206295b812608083644a5c5b2640c3102565c" +checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" dependencies = [ "libc", "pyo3-build-config", @@ -3258,9 +3216,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8725c0a622b374d6cb051d11a0983786448f7785336139c3c94f5aa6bef7e50" +checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -3270,9 +3228,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4109984c22491085343c05b0dbc54ddc405c3cf7b4374fc533f5c3313a572ccc" +checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" dependencies = [ "heck", "proc-macro2", @@ -3526,6 +3484,12 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + [[package]] name = "rend" version = "0.4.2" @@ -3667,6 +3631,35 @@ dependencies = [ "byteorder", ] +[[package]] +name = "rstest" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49" +dependencies = [ + "futures-timer", + "futures-util", + "rstest_macros", +] + +[[package]] +name = "rstest_macros" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0" +dependencies = [ + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn 2.0.108", + "unicode-ident", +] + [[package]] name = "rust-ini" version = "0.21.3" @@ -4008,9 +4001,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.58.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", "recursive", diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 953d4a98fc..d8b8444195 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -19,7 +19,7 @@ edition = "2024" homepage = "https://rust.iceberg.apache.org" name = "pyiceberg_core_rust" -rust-version = "1.87" +rust-version = "1.88" version = "0.7.0" # This crate is used to build python bindings, we don't want to publish it publish = false @@ -31,11 +31,11 @@ license = "Apache-2.0" crate-type = ["cdylib"] [dependencies] -arrow = { version = "56", features = ["pyarrow", "chrono-tz"] } +arrow = { version = "57.0", features = ["pyarrow", "chrono-tz"] } iceberg = { path = "../../crates/iceberg" } -pyo3 = { version = "0.25", features = ["extension-module", "abi3-py310"] } +pyo3 = { version = "0.26", features = ["extension-module", "abi3-py310"] } iceberg-datafusion = { path = "../../crates/integrations/datafusion" } -datafusion-ffi = { version = "50" } +datafusion-ffi = { version = "51.0" } tokio = { version = "1.46.1", default-features = false } [profile.release] diff --git a/bindings/python/src/transform.rs b/bindings/python/src/transform.rs index 24e9f061dd..c159d573fc 100644 --- a/bindings/python/src/transform.rs +++ b/bindings/python/src/transform.rs @@ -24,46 +24,46 @@ use pyo3::prelude::*; use crate::error::to_py_err; #[pyfunction] -pub fn identity(py: Python, array: PyObject) -> PyResult { +pub fn identity(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Identity) } #[pyfunction] -pub fn void(py: Python, array: PyObject) -> PyResult { +pub fn void(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Void) } #[pyfunction] -pub fn year(py: Python, array: PyObject) -> PyResult { +pub fn year(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Year) } #[pyfunction] -pub fn month(py: Python, array: PyObject) -> PyResult { +pub fn month(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Month) } #[pyfunction] -pub fn day(py: Python, array: PyObject) -> PyResult { +pub fn day(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Day) } #[pyfunction] -pub fn hour(py: Python, array: PyObject) -> PyResult { +pub fn hour(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Hour) } #[pyfunction] -pub fn bucket(py: Python, array: PyObject, num_buckets: u32) -> PyResult { +pub fn bucket(py: Python, array: Py, num_buckets: u32) -> PyResult> { apply(py, array, Transform::Bucket(num_buckets)) } #[pyfunction] -pub fn truncate(py: Python, array: PyObject, width: u32) -> PyResult { +pub fn truncate(py: Python, array: Py, width: u32) -> PyResult> { apply(py, array, Transform::Truncate(width)) } -fn apply(py: Python, array: PyObject, transform: Transform) -> PyResult { +fn apply(py: Python, array: Py, transform: Transform) -> PyResult> { // import let array = ArrayData::from_pyarrow_bound(array.bind(py))?; let array = make_array(array); @@ -71,7 +71,7 @@ fn apply(py: Python, array: PyObject, transform: Transform) -> PyResult, m: &Bound<'_, PyModule>) -> PyResult<()> { diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 84c90d466f..6f1332a444 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -84,7 +84,6 @@ serde_json = { workspace = true } serde_repr = { workspace = true } serde_with = { workspace = true } strum = { workspace = true, features = ["derive"] } -thrift = { workspace = true } tokio = { workspace = true, optional = false, features = ["sync"] } typed-builder = { workspace = true } url = { workspace = true } diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index 380d48530c..6209c1e261 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -1038,7 +1038,6 @@ fn apply_name_mapping_to_arrow_schema( metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string()); } // If field_id is None, leave the field without an ID (will be filtered by projection) - // If field not found in mapping, leave it without an ID (will be filtered by projection) Field::new(field.name(), field.data_type().clone(), field.is_nullable()) .with_metadata(metadata) @@ -1915,7 +1914,7 @@ message schema { assert_eq!(err.kind(), ErrorKind::DataInvalid); assert_eq!( err.to_string(), - "DataInvalid => Unsupported Arrow data type: Duration(Microsecond)".to_string() + "DataInvalid => Unsupported Arrow data type: Duration(µs)".to_string() ); // Omitting field c2, we still get an error due to c3 being selected @@ -2141,7 +2140,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props.clone())).unwrap(); @@ -2322,7 +2321,7 @@ message schema { let tmp_dir = TempDir::new().unwrap(); let table_location = tmp_dir.path().to_str().unwrap().to_string(); - let file_path = format!("{}/multi_row_group.parquet", &table_location); + let file_path = format!("{table_location}/multi_row_group.parquet"); // Force each batch into its own row group for testing byte range filtering. let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from( @@ -2526,7 +2525,7 @@ message schema { let props = WriterProperties::builder() .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/old_file.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/old_file.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); writer.close().unwrap(); @@ -2632,7 +2631,7 @@ message schema { // Step 1: Create data file with 200 rows in 2 row groups // Row group 0: rows 0-99 (ids 1-100) // Row group 1: rows 100-199 (ids 101-200) - let data_file_path = format!("{}/data.parquet", &table_location); + let data_file_path = format!("{table_location}/data.parquet"); let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( Int32Array::from_iter_values(1..=100), @@ -2666,7 +2665,7 @@ message schema { ); // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1) - let delete_file_path = format!("{}/deletes.parquet", &table_location); + let delete_file_path = format!("{table_location}/deletes.parquet"); let delete_schema = Arc::new(ArrowSchema::new(vec![ Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([( @@ -2825,7 +2824,7 @@ message schema { // Step 1: Create data file with 200 rows in 2 row groups // Row group 0: rows 0-99 (ids 1-100) // Row group 1: rows 100-199 (ids 101-200) - let data_file_path = format!("{}/data.parquet", &table_location); + let data_file_path = format!("{table_location}/data.parquet"); let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( Int32Array::from_iter_values(1..=100), @@ -2859,7 +2858,7 @@ message schema { ); // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1) - let delete_file_path = format!("{}/deletes.parquet", &table_location); + let delete_file_path = format!("{table_location}/deletes.parquet"); let delete_schema = Arc::new(ArrowSchema::new(vec![ Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([( @@ -3046,7 +3045,7 @@ message schema { // Step 1: Create data file with 200 rows in 2 row groups // Row group 0: rows 0-99 (ids 1-100) // Row group 1: rows 100-199 (ids 101-200) - let data_file_path = format!("{}/data.parquet", &table_location); + let data_file_path = format!("{table_location}/data.parquet"); let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( Int32Array::from_iter_values(1..=100), @@ -3080,7 +3079,7 @@ message schema { ); // Step 2: Create position delete file that deletes row 0 (id=1, first row in row group 0) - let delete_file_path = format!("{}/deletes.parquet", &table_location); + let delete_file_path = format!("{table_location}/deletes.parquet"); let delete_schema = Arc::new(ArrowSchema::new(vec![ Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([( @@ -3226,7 +3225,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3323,7 +3322,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3409,7 +3408,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3497,7 +3496,7 @@ message schema { .set_max_row_group_size(2) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap(); // Write 6 rows in 3 batches (will create 3 row groups) @@ -3638,7 +3637,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3735,7 +3734,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); writer.close().unwrap(); @@ -3837,7 +3836,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); writer.close().unwrap(); diff --git a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs index ae3a906275..66e2898532 100644 --- a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs @@ -23,7 +23,7 @@ use fnv::FnvHashSet; use ordered_float::OrderedFloat; use parquet::arrow::arrow_reader::{RowSelection, RowSelector}; use parquet::file::metadata::RowGroupMetaData; -use parquet::file::page_index::index::Index; +use parquet::file::page_index::column_index::ColumnIndexMetaData; use parquet::file::page_index::offset_index::OffsetIndexMetaData; use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit}; @@ -59,7 +59,7 @@ impl PageNullCount { } pub(crate) struct PageIndexEvaluator<'a> { - column_index: &'a [Index], + column_index: &'a [ColumnIndexMetaData], offset_index: &'a OffsetIndex, row_group_metadata: &'a RowGroupMetaData, iceberg_field_id_to_parquet_column_index: &'a HashMap, @@ -69,7 +69,7 @@ pub(crate) struct PageIndexEvaluator<'a> { impl<'a> PageIndexEvaluator<'a> { pub(crate) fn new( - column_index: &'a [Index], + column_index: &'a [ColumnIndexMetaData], offset_index: &'a OffsetIndex, row_group_metadata: &'a RowGroupMetaData, field_id_map: &'a HashMap, @@ -92,7 +92,7 @@ impl<'a> PageIndexEvaluator<'a> { /// matching the filter predicate. pub(crate) fn eval( filter: &'a BoundPredicate, - column_index: &'a [Index], + column_index: &'a [ColumnIndexMetaData], offset_index: &'a OffsetIndex, row_group_metadata: &'a RowGroupMetaData, field_id_map: &'a HashMap, @@ -240,137 +240,135 @@ impl<'a> PageIndexEvaluator<'a> { fn apply_predicate_to_column_index( predicate: F, field_type: &PrimitiveType, - column_index: &Index, + column_index: &ColumnIndexMetaData, row_counts: &[usize], ) -> Result>> where F: Fn(Option, Option, PageNullCount) -> Result, { let result: Result> = match column_index { - Index::NONE => { + ColumnIndexMetaData::NONE => { return Ok(None); } - Index::BOOLEAN(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::BOOLEAN(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min.map(|val| { + min.map(|&val| { Datum::new(field_type.clone(), PrimitiveLiteral::Boolean(val)) }), - item.max.map(|val| { + max.map(|&val| { Datum::new(field_type.clone(), PrimitiveLiteral::Boolean(val)) }), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::INT32(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::INT32(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), - item.max - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + min.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), + max.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::INT64(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::INT64(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), - item.max - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + min.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), + max.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::FLOAT(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::FLOAT(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min.map(|val| { + min.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Float(OrderedFloat::from(val)), ) }), - item.max.map(|val| { + max.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Float(OrderedFloat::from(val)), ) }), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::DOUBLE(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::DOUBLE(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min.map(|val| { + min.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Double(OrderedFloat::from(val)), ) }), - item.max.map(|val| { + max.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Double(OrderedFloat::from(val)), ) }), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::BYTE_ARRAY(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::BYTE_ARRAY(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min.clone().map(|val| { + min.map(|val| { Datum::new( field_type.clone(), - PrimitiveLiteral::String( - String::from_utf8(val.data().to_vec()).unwrap(), - ), + PrimitiveLiteral::String(String::from_utf8(val.to_vec()).unwrap()), ) }), - item.max.clone().map(|val| { + max.map(|val| { Datum::new( field_type.clone(), - PrimitiveLiteral::String( - String::from_utf8(val.data().to_vec()).unwrap(), - ), + PrimitiveLiteral::String(String::from_utf8(val.to_vec()).unwrap()), ) }), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::FIXED_LEN_BYTE_ARRAY(_) => { + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(_) => { return Err(Error::new( ErrorKind::FeatureUnsupported, "unsupported 'FIXED_LEN_BYTE_ARRAY' index type in column_index", )); } - Index::INT96(_) => { + ColumnIndexMetaData::INT96(_) => { return Err(Error::new( ErrorKind::FeatureUnsupported, "unsupported 'INT96' index type in column_index", @@ -787,28 +785,164 @@ mod tests { use std::collections::HashMap; use std::sync::Arc; - use parquet::arrow::arrow_reader::RowSelector; - use parquet::basic::{LogicalType as ParquetLogicalType, Type as ParquetPhysicalType}; - use parquet::data_type::ByteArray; - use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData}; - use parquet::file::page_index::index::{Index, NativeIndex, PageIndex}; - use parquet::file::page_index::offset_index::OffsetIndexMetaData; - use parquet::file::statistics::Statistics; - use parquet::format::{BoundaryOrder, PageLocation}; - use parquet::schema::types::{ - ColumnDescriptor, ColumnPath, SchemaDescriptor, Type as parquetSchemaType, + use arrow_array::{ArrayRef, Float32Array, RecordBatch, StringArray}; + use arrow_schema::{DataType, Field, Schema as ArrowSchema}; + use parquet::arrow::ArrowWriter; + use parquet::arrow::arrow_reader::{ + ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowSelector, }; + use parquet::file::metadata::ParquetMetaData; + use parquet::file::properties::WriterProperties; use rand::{Rng, thread_rng}; + use tempfile::NamedTempFile; use super::PageIndexEvaluator; use crate::expr::{Bind, Reference}; use crate::spec::{Datum, NestedField, PrimitiveType, Schema, Type}; use crate::{ErrorKind, Result}; + /// Helper function to create a test parquet file with page indexes + /// and return the metadata needed for testing + fn create_test_parquet_file() -> Result<(Arc, NamedTempFile)> { + let arrow_schema = Arc::new(ArrowSchema::new(vec![ + Field::new("col_float", DataType::Float32, true), + Field::new("col_string", DataType::Utf8, true), + ])); + + let temp_file = NamedTempFile::new().unwrap(); + let file = temp_file.reopen().unwrap(); + + let props = WriterProperties::builder() + .set_data_page_row_count_limit(1024) + .set_write_batch_size(512) + .build(); + + let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap(); + + let mut batches = vec![]; + + // Batch 1: 1024 rows - strings with AARDVARK, BEAR, BISON + let float_vals: Vec> = vec![None; 1024]; + let mut string_vals = vec![]; + string_vals.push(Some("AARDVARK".to_string())); + for _ in 1..1023 { + string_vals.push(Some("BEAR".to_string())); + } + string_vals.push(Some("BISON".to_string())); + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Batch 2: 1024 rows - all DEER + let float_vals: Vec> = vec![None; 1024]; + let string_vals = vec![Some("DEER".to_string()); 1024]; + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Batch 3: 1024 rows - float 0-10 + let mut float_vals = vec![]; + for i in 0..1024 { + float_vals.push(Some(i as f32 * 10.0 / 1024.0)); + } + let mut string_vals = vec![]; + string_vals.push(Some("GIRAFFE".to_string())); + string_vals.push(None); + for _ in 2..1024 { + string_vals.push(Some("HIPPO".to_string())); + } + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Batch 4: 1024 rows - float 10-20 + let mut float_vals = vec![None]; + for i in 1..1024 { + float_vals.push(Some(10.0 + i as f32 * 10.0 / 1024.0)); + } + let string_vals = vec![Some("HIPPO".to_string()); 1024]; + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Write rows one at a time to give the writer a chance to split into pages + for batch in &batches { + for i in 0..batch.num_rows() { + writer.write(&batch.slice(i, 1)).unwrap(); + } + } + + writer.close().unwrap(); + + let file = temp_file.reopen().unwrap(); + let options = ArrowReaderOptions::new().with_page_index(true); + let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); + let metadata = reader.metadata().clone(); + + Ok((metadata, temp_file)) + } + + /// Get the test metadata components for testing + fn get_test_metadata( + metadata: &ParquetMetaData, + ) -> ( + Vec, + Vec, + &parquet::file::metadata::RowGroupMetaData, + ) { + let row_group_metadata = metadata.row_group(0); + let column_index = metadata.column_index().unwrap()[0].to_vec(); + let offset_index = metadata.offset_index().unwrap()[0].to_vec(); + (column_index, offset_index, row_group_metadata) + } + #[test] fn eval_matches_no_rows_for_empty_row_group() -> Result<()> { - let row_group_metadata = create_row_group_metadata(0, 0, None, 0, None)?; - let (column_index, offset_index) = create_page_index()?; + let arrow_schema = Arc::new(ArrowSchema::new(vec![ + Field::new("col_float", DataType::Float32, true), + Field::new("col_string", DataType::Utf8, true), + ])); + + let empty_float: ArrayRef = Arc::new(Float32Array::from(Vec::>::new())); + let empty_string: ArrayRef = Arc::new(StringArray::from(Vec::>::new())); + let empty_batch = + RecordBatch::try_new(arrow_schema.clone(), vec![empty_float, empty_string]).unwrap(); + + let temp_file = NamedTempFile::new().unwrap(); + let file = temp_file.reopen().unwrap(); + + let mut writer = ArrowWriter::try_new(file, arrow_schema, None).unwrap(); + writer.write(&empty_batch).unwrap(); + writer.close().unwrap(); + + let file = temp_file.reopen().unwrap(); + let options = ArrowReaderOptions::new().with_page_index(true); + let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); + let metadata = reader.metadata(); + + if metadata.num_row_groups() == 0 || metadata.row_group(0).num_rows() == 0 { + return Ok(()); + } let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; @@ -816,27 +950,28 @@ mod tests { .greater_than(Datum::float(1.0)) .bind(iceberg_schema_ref.clone(), false)?; + let row_group_metadata = metadata.row_group(0); + let column_index = metadata.column_index().unwrap()[0].to_vec(); + let offset_index = metadata.offset_index().unwrap()[0].to_vec(); + let result = PageIndexEvaluator::eval( &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![]; - - assert_eq!(result, expected); + assert_eq!(result.len(), 0); Ok(()) } #[test] fn eval_is_null_select_only_pages_with_nulls() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -847,15 +982,15 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; let expected = vec![ - RowSelector::select(1024), - RowSelector::skip(1024), RowSelector::select(2048), + RowSelector::skip(1024), + RowSelector::select(1024), ]; assert_eq!(result, expected); @@ -865,9 +1000,8 @@ mod tests { #[test] fn eval_is_not_null_dont_select_pages_with_all_nulls() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -878,12 +1012,12 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![RowSelector::skip(1024), RowSelector::select(3072)]; + let expected = vec![RowSelector::skip(2048), RowSelector::select(2048)]; assert_eq!(result, expected); @@ -892,9 +1026,8 @@ mod tests { #[test] fn eval_is_nan_select_all() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -905,7 +1038,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -919,9 +1052,8 @@ mod tests { #[test] fn eval_not_nan_select_all() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -932,7 +1064,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -946,9 +1078,8 @@ mod tests { #[test] fn eval_inequality_nan_datum_all_rows_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -959,12 +1090,12 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![RowSelector::skip(1024), RowSelector::select(3072)]; + let expected = vec![RowSelector::skip(2048), RowSelector::select(2048)]; assert_eq!(result, expected); @@ -973,9 +1104,8 @@ mod tests { #[test] fn eval_inequality_pages_containing_value_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -986,16 +1116,15 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; let expected = vec![ - RowSelector::skip(1024), + RowSelector::skip(2048), RowSelector::select(1024), RowSelector::skip(1024), - RowSelector::select(1024), ]; assert_eq!(result, expected); @@ -1005,9 +1134,8 @@ mod tests { #[test] fn eval_eq_pages_containing_value_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1018,16 +1146,18 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; + // Pages 0-1: all null (skip) + // Page 2: 0-10 (select, might contain 5.0) + // Page 3: 10-20 (skip, min > 5.0) let expected = vec![ - RowSelector::skip(1024), + RowSelector::skip(2048), RowSelector::select(1024), RowSelector::skip(1024), - RowSelector::select(1024), ]; assert_eq!(result, expected); @@ -1037,9 +1167,8 @@ mod tests { #[test] fn eval_not_eq_all_rows() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1050,7 +1179,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -1064,9 +1193,8 @@ mod tests { #[test] fn eval_starts_with_error_float_col() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1077,7 +1205,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), ); @@ -1089,11 +1217,13 @@ mod tests { #[test] fn eval_starts_with_pages_containing_value_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; + // Test starts_with on string column where only some pages match + // Our file has 4 pages: ["AARDVARK".."BISON"], ["DEER"], ["GIRAFFE".."HIPPO"], ["HIPPO"] + // Testing starts_with("B") should select only page 0 let filter = Reference::new("col_string") .starts_with(Datum::string("B")) .bind(iceberg_schema_ref.clone(), false)?; @@ -1102,16 +1232,13 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![ - RowSelector::select(512), - RowSelector::skip(3536), - RowSelector::select(48), - ]; + // Page 0 has "BEAR" and "BISON" (starts with B), rest don't + let expected = vec![RowSelector::select(1024), RowSelector::skip(3072)]; assert_eq!(result, expected); @@ -1121,11 +1248,13 @@ mod tests { #[test] fn eval_not_starts_with_pages_containing_value_except_pages_with_min_and_max_equal_to_prefix_and_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; + // Test not_starts_with where one page has ALL values starting with prefix + // Our file has page 1 with all "DEER" (min="DEER", max="DEER") + // Testing not_starts_with("DE") should skip page 1 where all values start with "DE" let filter = Reference::new("col_string") .not_starts_with(Datum::string("DE")) .bind(iceberg_schema_ref.clone(), false)?; @@ -1134,15 +1263,18 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; + // Page 0: mixed values (select) + // Page 1: all "DEER" starting with "DE" (skip) + // Pages 2-3: other values not all starting with "DE" (select) let expected = vec![ - RowSelector::select(512), - RowSelector::skip(512), - RowSelector::select(3072), + RowSelector::select(1024), + RowSelector::skip(1024), + RowSelector::select(2048), ]; assert_eq!(result, expected); @@ -1153,10 +1285,8 @@ mod tests { #[test] fn eval_in_length_of_set_above_limit_all_rows() -> Result<()> { let mut rng = thread_rng(); - - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1167,7 +1297,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -1181,30 +1311,32 @@ mod tests { #[test] fn eval_in_valid_set_size_some_rows() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; + // Test is_in with multiple values using min/max bounds + // Our file has 4 pages: ["AARDVARK".."BISON"], ["DEER"], ["GIRAFFE".."HIPPO"], ["HIPPO"] + // Testing is_in(["AARDVARK", "GIRAFFE"]) - both are in different pages let filter = Reference::new("col_string") - .is_in([Datum::string("AARDVARK"), Datum::string("ICEBERG")]) + .is_in([Datum::string("AARDVARK"), Datum::string("GIRAFFE")]) .bind(iceberg_schema_ref.clone(), false)?; let result = PageIndexEvaluator::eval( &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; + // Page 0 contains "AARDVARK", page 1 doesn't contain either, page 2 contains "GIRAFFE", page 3 doesn't let expected = vec![ - RowSelector::select(512), - RowSelector::skip(512), - RowSelector::select(2976), - RowSelector::skip(48), - RowSelector::select(48), + RowSelector::select(1024), + RowSelector::skip(1024), + RowSelector::select(1024), + RowSelector::skip(1024), ]; assert_eq!(result, expected); @@ -1235,183 +1367,4 @@ mod tests { Ok((iceberg_schema_ref, field_id_map)) } - - fn build_parquet_schema_descriptor() -> Result> { - let field_1 = Arc::new( - parquetSchemaType::primitive_type_builder("col_float", ParquetPhysicalType::FLOAT) - .with_id(Some(1)) - .build()?, - ); - - let field_2 = Arc::new( - parquetSchemaType::primitive_type_builder( - "col_string", - ParquetPhysicalType::BYTE_ARRAY, - ) - .with_id(Some(2)) - .with_logical_type(Some(ParquetLogicalType::String)) - .build()?, - ); - - let group_type = Arc::new( - parquetSchemaType::group_type_builder("all") - .with_id(Some(1000)) - .with_fields(vec![field_1, field_2]) - .build()?, - ); - - let schema_descriptor = SchemaDescriptor::new(group_type); - let schema_descriptor_arc = Arc::new(schema_descriptor); - Ok(schema_descriptor_arc) - } - - fn create_row_group_metadata( - num_rows: i64, - col_1_num_vals: i64, - col_1_stats: Option, - col_2_num_vals: i64, - col_2_stats: Option, - ) -> Result { - let schema_descriptor_arc = build_parquet_schema_descriptor()?; - - let column_1_desc_ptr = Arc::new(ColumnDescriptor::new( - schema_descriptor_arc.column(0).self_type_ptr(), - 1, - 1, - ColumnPath::new(vec!["col_float".to_string()]), - )); - - let column_2_desc_ptr = Arc::new(ColumnDescriptor::new( - schema_descriptor_arc.column(1).self_type_ptr(), - 1, - 1, - ColumnPath::new(vec!["col_string".to_string()]), - )); - - let mut col_1_meta = - ColumnChunkMetaData::builder(column_1_desc_ptr).set_num_values(col_1_num_vals); - if let Some(stats1) = col_1_stats { - col_1_meta = col_1_meta.set_statistics(stats1) - } - - let mut col_2_meta = - ColumnChunkMetaData::builder(column_2_desc_ptr).set_num_values(col_2_num_vals); - if let Some(stats2) = col_2_stats { - col_2_meta = col_2_meta.set_statistics(stats2) - } - - let row_group_metadata = RowGroupMetaData::builder(schema_descriptor_arc) - .set_num_rows(num_rows) - .set_column_metadata(vec![ - col_1_meta.build()?, - // .set_statistics(Statistics::float(None, None, None, 1, false)) - col_2_meta.build()?, - ]) - .build(); - - Ok(row_group_metadata?) - } - - fn create_page_index() -> Result<(Vec, Vec)> { - let idx_float = Index::FLOAT(NativeIndex:: { - indexes: vec![ - PageIndex { - min: None, - max: None, - null_count: Some(1024), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: Some(0.0), - max: Some(10.0), - null_count: Some(0), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: Some(10.0), - max: Some(20.0), - null_count: Some(1), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: None, - max: None, - null_count: None, - repetition_level_histogram: None, - definition_level_histogram: None, - }, - ], - boundary_order: BoundaryOrder(0), // UNORDERED - }); - - let idx_string = Index::BYTE_ARRAY(NativeIndex:: { - indexes: vec![ - PageIndex { - min: Some("AA".into()), - max: Some("DD".into()), - null_count: Some(0), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: Some("DE".into()), - max: Some("DE".into()), - null_count: Some(0), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: Some("DF".into()), - max: Some("UJ".into()), - null_count: Some(1), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: None, - max: None, - null_count: Some(48), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: None, - max: None, - null_count: None, - repetition_level_histogram: None, - definition_level_histogram: None, - }, - ], - boundary_order: BoundaryOrder(0), // UNORDERED - }); - - let page_locs_float = vec![ - PageLocation::new(0, 1024, 0), - PageLocation::new(1024, 1024, 1024), - PageLocation::new(2048, 1024, 2048), - PageLocation::new(3072, 1024, 3072), - ]; - - let page_locs_string = vec![ - PageLocation::new(0, 512, 0), - PageLocation::new(512, 512, 512), - PageLocation::new(1024, 2976, 1024), - PageLocation::new(4000, 48, 4000), - PageLocation::new(4048, 48, 4048), - ]; - - Ok((vec![idx_float, idx_string], vec![ - OffsetIndexMetaData { - page_locations: page_locs_float, - unencoded_byte_array_data_bytes: None, - }, - OffsetIndexMetaData { - page_locations: page_locs_string, - unencoded_byte_array_data_bytes: None, - }, - ])) - } } diff --git a/crates/iceberg/src/inspect/manifests.rs b/crates/iceberg/src/inspect/manifests.rs index 60854b8bae..4c30ca2ec5 100644 --- a/crates/iceberg/src/inspect/manifests.rs +++ b/crates/iceberg/src/inspect/manifests.rs @@ -296,18 +296,18 @@ mod tests { check_record_batches( record_batch.try_collect::>().await.unwrap(), expect![[r#" - Field { name: "content", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "14"} }, - Field { name: "path", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "length", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "partition_spec_id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "added_snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "added_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "existing_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, - Field { name: "deleted_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, - Field { name: "added_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "15"} }, - Field { name: "existing_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "16"} }, - Field { name: "deleted_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "17"} }, - Field { name: "partition_summaries", data_type: List(Field { name: "item", data_type: Struct([Field { name: "contains_null", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }, Field { name: "contains_nan", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "11"} }, Field { name: "lower_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "12"} }, Field { name: "upper_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "13"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }"#]], + Field { "content": Int32, metadata: {"PARQUET:field_id": "14"} }, + Field { "path": Utf8, metadata: {"PARQUET:field_id": "1"} }, + Field { "length": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "partition_spec_id": Int32, metadata: {"PARQUET:field_id": "3"} }, + Field { "added_snapshot_id": Int64, metadata: {"PARQUET:field_id": "4"} }, + Field { "added_data_files_count": Int32, metadata: {"PARQUET:field_id": "5"} }, + Field { "existing_data_files_count": Int32, metadata: {"PARQUET:field_id": "6"} }, + Field { "deleted_data_files_count": Int32, metadata: {"PARQUET:field_id": "7"} }, + Field { "added_delete_files_count": Int32, metadata: {"PARQUET:field_id": "15"} }, + Field { "existing_delete_files_count": Int32, metadata: {"PARQUET:field_id": "16"} }, + Field { "deleted_delete_files_count": Int32, metadata: {"PARQUET:field_id": "17"} }, + Field { "partition_summaries": List(non-null Struct("contains_null": non-null Boolean, metadata: {"PARQUET:field_id": "10"}, "contains_nan": Boolean, metadata: {"PARQUET:field_id": "11"}, "lower_bound": Utf8, metadata: {"PARQUET:field_id": "12"}, "upper_bound": Utf8, metadata: {"PARQUET:field_id": "13"}), metadata: {"PARQUET:field_id": "9"}), metadata: {"PARQUET:field_id": "8"} }"#]], expect![[r#" content: PrimitiveArray [ diff --git a/crates/iceberg/src/inspect/snapshots.rs b/crates/iceberg/src/inspect/snapshots.rs index 6081ec165b..fbed7ec11e 100644 --- a/crates/iceberg/src/inspect/snapshots.rs +++ b/crates/iceberg/src/inspect/snapshots.rs @@ -151,14 +151,14 @@ mod tests { check_record_batches( batch_stream.try_collect::>().await.unwrap(), expect![[r#" - Field { name: "committed_at", data_type: Timestamp(Microsecond, Some("+00:00")), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "parent_id", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "operation", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "manifest_list", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "summary", data_type: Map(Field { name: "key_value", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "value", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }"#]], + Field { "committed_at": Timestamp(µs, "+00:00"), metadata: {"PARQUET:field_id": "1"} }, + Field { "snapshot_id": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "parent_id": nullable Int64, metadata: {"PARQUET:field_id": "3"} }, + Field { "operation": nullable Utf8, metadata: {"PARQUET:field_id": "4"} }, + Field { "manifest_list": nullable Utf8, metadata: {"PARQUET:field_id": "5"} }, + Field { "summary": nullable Map("key_value": non-null Struct("key": non-null Utf8, metadata: {"PARQUET:field_id": "7"}, "value": Utf8, metadata: {"PARQUET:field_id": "8"}), unsorted), metadata: {"PARQUET:field_id": "6"} }"#]], expect![[r#" - committed_at: PrimitiveArray + committed_at: PrimitiveArray [ 2018-01-04T21:22:35.770+00:00, 2019-04-12T20:29:15.770+00:00, diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index 5cf031a9fb..356c2cb43d 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -27,12 +27,9 @@ use itertools::Itertools; use parquet::arrow::AsyncArrowWriter; use parquet::arrow::async_reader::AsyncFileReader; use parquet::arrow::async_writer::AsyncFileWriter as ArrowAsyncFileWriter; -use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader}; +use parquet::file::metadata::ParquetMetaData; use parquet::file::properties::WriterProperties; use parquet::file::statistics::Statistics; -use parquet::format::FileMetaData; -use parquet::thrift::{TCompactOutputProtocol, TSerializable}; -use thrift::protocol::TOutputProtocol; use super::{FileWriter, FileWriterBuilder}; use crate::arrow::{ @@ -349,29 +346,6 @@ impl ParquetWriter { Ok(data_files) } - fn thrift_to_parquet_metadata(&self, file_metadata: FileMetaData) -> Result { - let mut buffer = Vec::new(); - { - let mut protocol = TCompactOutputProtocol::new(&mut buffer); - file_metadata - .write_to_out_protocol(&mut protocol) - .map_err(|err| { - Error::new(ErrorKind::Unexpected, "Failed to write parquet metadata") - .with_source(err) - })?; - - protocol.flush().map_err(|err| { - Error::new(ErrorKind::Unexpected, "Failed to flush protocol").with_source(err) - })?; - } - - let parquet_metadata = ParquetMetaDataReader::decode_metadata(&buffer).map_err(|err| { - Error::new(ErrorKind::Unexpected, "Failed to decode parquet metadata").with_source(err) - })?; - - Ok(parquet_metadata) - } - /// `ParquetMetadata` to data file builder pub(crate) fn parquet_to_data_file_builder( schema: SchemaRef, @@ -564,14 +538,7 @@ impl FileWriter for ParquetWriter { })?; Ok(vec![]) } else { - let parquet_metadata = - Arc::new(self.thrift_to_parquet_metadata(metadata).map_err(|err| { - Error::new( - ErrorKind::Unexpected, - "Failed to convert metadata from thrift to parquet.", - ) - .with_source(err) - })?); + let parquet_metadata = Arc::new(metadata); Ok(vec![Self::parquet_to_data_file_builder( self.schema, diff --git a/crates/integrations/datafusion/src/table/table_provider_factory.rs b/crates/integrations/datafusion/src/table/table_provider_factory.rs index 8c0c8e90de..8cae597b7b 100644 --- a/crates/integrations/datafusion/src/table/table_provider_factory.rs +++ b/crates/integrations/datafusion/src/table/table_provider_factory.rs @@ -244,6 +244,7 @@ mod tests { constraints: Constraints::default(), column_defaults: Default::default(), if_not_exists: Default::default(), + or_replace: false, temporary: false, definition: Default::default(), unbounded: Default::default(), diff --git a/crates/integrations/datafusion/tests/integration_datafusion_test.rs b/crates/integrations/datafusion/tests/integration_datafusion_test.rs index 06d9cab03a..6f8898abb8 100644 --- a/crates/integrations/datafusion/tests/integration_datafusion_test.rs +++ b/crates/integrations/datafusion/tests/integration_datafusion_test.rs @@ -347,14 +347,14 @@ async fn test_metadata_table() -> Result<()> { check_record_batches( snapshots, expect![[r#" - Field { name: "committed_at", data_type: Timestamp(Microsecond, Some("+00:00")), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "parent_id", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "operation", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "manifest_list", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "summary", data_type: Map(Field { name: "key_value", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "value", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }"#]], + Field { "committed_at": Timestamp(µs, "+00:00"), metadata: {"PARQUET:field_id": "1"} }, + Field { "snapshot_id": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "parent_id": nullable Int64, metadata: {"PARQUET:field_id": "3"} }, + Field { "operation": nullable Utf8, metadata: {"PARQUET:field_id": "4"} }, + Field { "manifest_list": nullable Utf8, metadata: {"PARQUET:field_id": "5"} }, + Field { "summary": nullable Map("key_value": non-null Struct("key": non-null Utf8, metadata: {"PARQUET:field_id": "7"}, "value": Utf8, metadata: {"PARQUET:field_id": "8"}), unsorted), metadata: {"PARQUET:field_id": "6"} }"#]], expect![[r#" - committed_at: PrimitiveArray + committed_at: PrimitiveArray [ ], snapshot_id: PrimitiveArray @@ -386,18 +386,18 @@ async fn test_metadata_table() -> Result<()> { check_record_batches( manifests, expect![[r#" - Field { name: "content", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "14"} }, - Field { name: "path", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "length", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "partition_spec_id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "added_snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "added_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "existing_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, - Field { name: "deleted_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, - Field { name: "added_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "15"} }, - Field { name: "existing_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "16"} }, - Field { name: "deleted_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "17"} }, - Field { name: "partition_summaries", data_type: List(Field { name: "item", data_type: Struct([Field { name: "contains_null", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }, Field { name: "contains_nan", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "11"} }, Field { name: "lower_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "12"} }, Field { name: "upper_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "13"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }"#]], + Field { "content": Int32, metadata: {"PARQUET:field_id": "14"} }, + Field { "path": Utf8, metadata: {"PARQUET:field_id": "1"} }, + Field { "length": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "partition_spec_id": Int32, metadata: {"PARQUET:field_id": "3"} }, + Field { "added_snapshot_id": Int64, metadata: {"PARQUET:field_id": "4"} }, + Field { "added_data_files_count": Int32, metadata: {"PARQUET:field_id": "5"} }, + Field { "existing_data_files_count": Int32, metadata: {"PARQUET:field_id": "6"} }, + Field { "deleted_data_files_count": Int32, metadata: {"PARQUET:field_id": "7"} }, + Field { "added_delete_files_count": Int32, metadata: {"PARQUET:field_id": "15"} }, + Field { "existing_delete_files_count": Int32, metadata: {"PARQUET:field_id": "16"} }, + Field { "deleted_delete_files_count": Int32, metadata: {"PARQUET:field_id": "17"} }, + Field { "partition_summaries": List(non-null Struct("contains_null": non-null Boolean, metadata: {"PARQUET:field_id": "10"}, "contains_nan": Boolean, metadata: {"PARQUET:field_id": "11"}, "lower_bound": Utf8, metadata: {"PARQUET:field_id": "12"}, "upper_bound": Utf8, metadata: {"PARQUET:field_id": "13"}), metadata: {"PARQUET:field_id": "9"}), metadata: {"PARQUET:field_id": "8"} }"#]], expect![[r#" content: PrimitiveArray [ @@ -504,8 +504,8 @@ async fn test_insert_into() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "foo1", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "foo2", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }"#]], + Field { "foo1": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "foo2": Utf8, metadata: {"PARQUET:field_id": "2"} }"#]], expect![[r#" foo1: PrimitiveArray [ @@ -658,9 +658,9 @@ async fn test_insert_into_nested() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "name", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "profile", data_type: Struct([Field { name: "address", data_type: Struct([Field { name: "street", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, Field { name: "contact", data_type: Struct([Field { name: "email", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }, Field { name: "phone", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }"#]], + Field { "id": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "name": Utf8, metadata: {"PARQUET:field_id": "2"} }, + Field { "profile": nullable Struct("address": Struct("street": non-null Utf8, metadata: {"PARQUET:field_id": "6"}, "city": non-null Utf8, metadata: {"PARQUET:field_id": "7"}, "zip": non-null Int32, metadata: {"PARQUET:field_id": "8"}), metadata: {"PARQUET:field_id": "4"}, "contact": Struct("email": Utf8, metadata: {"PARQUET:field_id": "9"}, "phone": Utf8, metadata: {"PARQUET:field_id": "10"}), metadata: {"PARQUET:field_id": "5"}), metadata: {"PARQUET:field_id": "3"} }"#]], expect![[r#" id: PrimitiveArray [ @@ -679,7 +679,7 @@ async fn test_insert_into_nested() -> Result<()> { valid, ] [ - -- child 0: "address" (Struct([Field { name: "street", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }])) + -- child 0: "address" (Struct([Field { name: "street", data_type: Utf8, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, metadata: {"PARQUET:field_id": "8"} }])) StructArray -- validity: [ @@ -706,7 +706,7 @@ async fn test_insert_into_nested() -> Result<()> { 95113, ] ] - -- child 1: "contact" (Struct([Field { name: "email", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }, Field { name: "phone", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }])) + -- child 1: "contact" (Struct([Field { name: "email", data_type: Utf8, nullable: true, metadata: {"PARQUET:field_id": "9"} }, Field { name: "phone", data_type: Utf8, nullable: true, metadata: {"PARQUET:field_id": "10"} }])) StructArray -- validity: [ @@ -757,13 +757,13 @@ async fn test_insert_into_nested() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "name", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[address][street]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[address][city]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[address][zip]", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[contact][email]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[contact][phone]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }"#]], + Field { "id": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "name": Utf8, metadata: {"PARQUET:field_id": "2"} }, + Field { "catalog.test_insert_nested.nested_table.profile[address][street]": nullable Utf8, metadata: {"PARQUET:field_id": "6"} }, + Field { "catalog.test_insert_nested.nested_table.profile[address][city]": nullable Utf8, metadata: {"PARQUET:field_id": "7"} }, + Field { "catalog.test_insert_nested.nested_table.profile[address][zip]": nullable Int32, metadata: {"PARQUET:field_id": "8"} }, + Field { "catalog.test_insert_nested.nested_table.profile[contact][email]": nullable Utf8, metadata: {"PARQUET:field_id": "9"} }, + Field { "catalog.test_insert_nested.nested_table.profile[contact][phone]": nullable Utf8, metadata: {"PARQUET:field_id": "10"} }"#]], expect![[r#" id: PrimitiveArray [ @@ -884,9 +884,9 @@ async fn test_insert_into_partitioned() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "category", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "value", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }"#]], + Field { "id": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "category": Utf8, metadata: {"PARQUET:field_id": "2"} }, + Field { "value": Utf8, metadata: {"PARQUET:field_id": "3"} }"#]], expect![[r#" id: PrimitiveArray [ diff --git a/crates/integrations/playground/src/main.rs b/crates/integrations/playground/src/main.rs index c522209957..94068bb558 100644 --- a/crates/integrations/playground/src/main.rs +++ b/crates/integrations/playground/src/main.rs @@ -24,6 +24,7 @@ use clap::Parser; use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_cli::exec; +use datafusion_cli::object_storage::instrumented::InstrumentedObjectStoreRegistry; use datafusion_cli::print_format::PrintFormat; use datafusion_cli::print_options::{MaxRows, PrintOptions}; use iceberg_playground::{ICEBERG_PLAYGROUND_VERSION, IcebergCatalogList}; @@ -94,6 +95,7 @@ async fn main_inner() -> anyhow::Result<()> { quiet: args.quiet, maxrows: args.maxrows, color: args.color, + instrumented_registry: Arc::new(InstrumentedObjectStoreRegistry::new()), }; let rc = match args.rc { From 2944ccb551ec7c07ccc802a8edc01d5eb2ab6b87 Mon Sep 17 00:00:00 2001 From: Andrea Bozzo Date: Wed, 10 Dec 2025 08:11:33 +0100 Subject: [PATCH 24/58] fix: Serialize `split_offsets` as null when empty (#1906) - Change `split_offsets` in `DataFile` from `Vec` to `Option>` - Empty values now serialize as `null` instead of `[]` - Aligns with Iceberg spec (field is optional) Closes #1897 --------- Co-authored-by: Renjie Liu --- bindings/python/src/data_file.rs | 2 +- .../src/expr/visitors/expression_evaluator.rs | 4 ++-- .../visitors/inclusive_metrics_evaluator.rs | 12 +++++----- .../expr/visitors/strict_metrics_evaluator.rs | 8 +++---- crates/iceberg/src/spec/manifest/_serde.rs | 6 ++--- crates/iceberg/src/spec/manifest/data_file.rs | 10 ++++---- crates/iceberg/src/spec/manifest/mod.rs | 24 +++++++++---------- crates/iceberg/src/spec/manifest/writer.rs | 6 ++--- crates/iceberg/src/spec/snapshot_summary.rs | 10 ++++---- .../base_writer/equality_delete_writer.rs | 16 ++++++------- .../src/writer/file_writer/parquet_writer.rs | 4 ++-- 11 files changed, 52 insertions(+), 50 deletions(-) diff --git a/bindings/python/src/data_file.rs b/bindings/python/src/data_file.rs index 900d6c6014..b0e42e7d73 100644 --- a/bindings/python/src/data_file.rs +++ b/bindings/python/src/data_file.rs @@ -143,7 +143,7 @@ impl PyDataFile { } #[getter] - fn split_offsets(&self) -> &[i64] { + fn split_offsets(&self) -> Option<&[i64]> { self.inner.split_offsets() } diff --git a/crates/iceberg/src/expr/visitors/expression_evaluator.rs b/crates/iceberg/src/expr/visitors/expression_evaluator.rs index 3675ce355f..570c409502 100644 --- a/crates/iceberg/src/expr/visitors/expression_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/expression_evaluator.rs @@ -346,7 +346,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -374,7 +374,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, diff --git a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs index 2b65cf12aa..06c92ab3e8 100644 --- a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs @@ -1995,7 +1995,7 @@ mod test { lower_bounds: Default::default(), upper_bounds: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2021,7 +2021,7 @@ mod test { lower_bounds: Default::default(), upper_bounds: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2083,7 +2083,7 @@ mod test { column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2114,7 +2114,7 @@ mod test { column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2146,7 +2146,7 @@ mod test { column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2178,7 +2178,7 @@ mod test { column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, diff --git a/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs index 7c652e2068..a6af2990c8 100644 --- a/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs @@ -578,7 +578,7 @@ mod test { ]), column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -604,7 +604,7 @@ mod test { lower_bounds: Default::default(), upper_bounds: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -630,7 +630,7 @@ mod test { upper_bounds: HashMap::from([(1, Datum::int(42))]), column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -657,7 +657,7 @@ mod test { upper_bounds: HashMap::from([(3, Datum::string("dC"))]), column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index 7738af46d4..07306be2b9 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -153,7 +153,7 @@ impl DataFileSerde { lower_bounds: Some(to_bytes_entry(value.lower_bounds)?), upper_bounds: Some(to_bytes_entry(value.upper_bounds)?), key_metadata: value.key_metadata.map(serde_bytes::ByteBuf::from), - split_offsets: Some(value.split_offsets), + split_offsets: value.split_offsets, equality_ids: value.equality_ids, sort_order_id: value.sort_order_id, first_row_id: value.first_row_id, @@ -222,7 +222,7 @@ impl DataFileSerde { .transpose()? .unwrap_or_default(), key_metadata: self.key_metadata.map(|v| v.to_vec()), - split_offsets: self.split_offsets.unwrap_or_default(), + split_offsets: self.split_offsets, equality_ids: self.equality_ids, sort_order_id: self.sort_order_id, partition_spec_id, @@ -380,7 +380,7 @@ mod tests { lower_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]), upper_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: Some(0), partition_spec_id: 0, diff --git a/crates/iceberg/src/spec/manifest/data_file.rs b/crates/iceberg/src/spec/manifest/data_file.rs index a9c041f540..77bd046f8a 100644 --- a/crates/iceberg/src/spec/manifest/data_file.rs +++ b/crates/iceberg/src/spec/manifest/data_file.rs @@ -127,9 +127,10 @@ pub struct DataFile { /// element field id: 133 /// /// Split offsets for the data file. For example, all row group offsets - /// in a Parquet file. Must be sorted ascending + /// in a Parquet file. Must be sorted ascending. Optional field that + /// should be serialized as null when not present. #[builder(default)] - pub(crate) split_offsets: Vec, + pub(crate) split_offsets: Option>, /// field id: 135 /// element field id: 136 /// @@ -247,8 +248,9 @@ impl DataFile { } /// Get the split offsets of the data file. /// For example, all row group offsets in a Parquet file. - pub fn split_offsets(&self) -> &[i64] { - &self.split_offsets + /// Returns `None` if no split offsets are present. + pub fn split_offsets(&self) -> Option<&[i64]> { + self.split_offsets.as_deref() } /// Get the equality ids of the data file. /// Field ids used to determine row equality in equality delete files. diff --git a/crates/iceberg/src/spec/manifest/mod.rs b/crates/iceberg/src/spec/manifest/mod.rs index 51219bfdb7..b126396e3c 100644 --- a/crates/iceberg/src/spec/manifest/mod.rs +++ b/crates/iceberg/src/spec/manifest/mod.rs @@ -257,7 +257,7 @@ mod tests { snapshot_id: None, sequence_number: None, file_sequence_number: None, - data_file: DataFile {content:DataContentType::Data,file_path:"s3a://icebergdata/demo/s1/t1/data/00000-0-ba56fbfa-f2ff-40c9-bb27-565ad6dc2be8-00000.parquet".to_string(),file_format:DataFileFormat::Parquet,partition:Struct::empty(),record_count:1,file_size_in_bytes:5442,column_sizes:HashMap::from([(0,73),(6,34),(2,73),(7,61),(3,61),(5,62),(9,79),(10,73),(1,61),(4,73),(8,73)]),value_counts:HashMap::from([(4,1),(5,1),(2,1),(0,1),(3,1),(6,1),(8,1),(1,1),(10,1),(7,1),(9,1)]),null_value_counts:HashMap::from([(1,0),(6,0),(2,0),(8,0),(0,0),(3,0),(5,0),(9,0),(7,0),(4,0),(10,0)]),nan_value_counts:HashMap::new(),lower_bounds:HashMap::new(),upper_bounds:HashMap::new(),key_metadata:None,split_offsets:vec![4],equality_ids:Some(Vec::new()),sort_order_id:None, partition_spec_id: 0,first_row_id: None,referenced_data_file: None,content_offset: None,content_size_in_bytes: None } + data_file: DataFile {content:DataContentType::Data,file_path:"s3a://icebergdata/demo/s1/t1/data/00000-0-ba56fbfa-f2ff-40c9-bb27-565ad6dc2be8-00000.parquet".to_string(),file_format:DataFileFormat::Parquet,partition:Struct::empty(),record_count:1,file_size_in_bytes:5442,column_sizes:HashMap::from([(0,73),(6,34),(2,73),(7,61),(3,61),(5,62),(9,79),(10,73),(1,61),(4,73),(8,73)]),value_counts:HashMap::from([(4,1),(5,1),(2,1),(0,1),(3,1),(6,1),(8,1),(1,1),(10,1),(7,1),(9,1)]),null_value_counts:HashMap::from([(1,0),(6,0),(2,0),(8,0),(0,0),(3,0),(5,0),(9,0),(7,0),(4,0),(10,0)]),nan_value_counts:HashMap::new(),lower_bounds:HashMap::new(),upper_bounds:HashMap::new(),key_metadata:None,split_offsets:Some(vec![4]),equality_ids:Some(Vec::new()),sort_order_id:None, partition_spec_id: 0,first_row_id: None,referenced_data_file: None,content_offset: None,content_size_in_bytes: None } } ]; @@ -435,7 +435,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: Some(Vec::new()), sort_order_id: None, partition_spec_id: 0, @@ -532,7 +532,7 @@ mod tests { lower_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]), upper_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: Some(0), partition_spec_id: 0, @@ -640,7 +640,7 @@ mod tests { (3, Datum::string("x")) ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: Some(0), partition_spec_id: 0, @@ -749,7 +749,7 @@ mod tests { (3, Datum::string("x")) ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -840,7 +840,7 @@ mod tests { (2, Datum::int(2)), ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -922,7 +922,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -957,7 +957,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -992,7 +992,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -1027,7 +1027,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -1182,7 +1182,7 @@ mod tests { "lower_bounds": [], "upper_bounds": [], "key_metadata": null, - "split_offsets": [], + "split_offsets": null, "equality_ids": null, "sort_order_id": null, "first_row_id": null, @@ -1213,7 +1213,7 @@ mod tests { "lower_bounds": [], "upper_bounds": [], "key_metadata": null, - "split_offsets": [], + "split_offsets": null, "equality_ids": null, "sort_order_id": null, "first_row_id": null, diff --git a/crates/iceberg/src/spec/manifest/writer.rs b/crates/iceberg/src/spec/manifest/writer.rs index 389ac7a1fd..2fb6a42062 100644 --- a/crates/iceberg/src/spec/manifest/writer.rs +++ b/crates/iceberg/src/spec/manifest/writer.rs @@ -608,7 +608,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: Some(Vec::new()), - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -637,7 +637,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: Some(Vec::new()), - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -666,7 +666,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: Some(Vec::new()), - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, diff --git a/crates/iceberg/src/spec/snapshot_summary.rs b/crates/iceberg/src/spec/snapshot_summary.rs index 4cd3715e06..c67ee37d3e 100644 --- a/crates/iceberg/src/spec/snapshot_summary.rs +++ b/crates/iceberg/src/spec/snapshot_summary.rs @@ -767,7 +767,7 @@ mod tests { (3, Datum::string("x")), ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: Some(0), partition_spec_id: 0, @@ -799,7 +799,7 @@ mod tests { (3, Datum::string("x")), ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: Some(0), partition_spec_id: 0, @@ -910,7 +910,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -938,7 +938,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -993,7 +993,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, diff --git a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs index cd0b19148d..dd8487f9cc 100644 --- a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs +++ b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs @@ -293,15 +293,15 @@ mod test { assert_eq!(*data_file.null_value_counts.get(id).unwrap(), expect); } - assert_eq!(data_file.split_offsets.len(), metadata.num_row_groups()); - data_file + let split_offsets = data_file .split_offsets - .iter() - .enumerate() - .for_each(|(i, &v)| { - let expect = metadata.row_groups()[i].file_offset().unwrap(); - assert_eq!(v, expect); - }); + .as_ref() + .expect("split_offsets should be set"); + assert_eq!(split_offsets.len(), metadata.num_row_groups()); + split_offsets.iter().enumerate().for_each(|(i, &v)| { + let expect = metadata.row_groups()[i].file_offset().unwrap(); + assert_eq!(v, expect); + }); } #[tokio::test] diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index 356c2cb43d..8fe40df71c 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -412,13 +412,13 @@ impl ParquetWriter { // - We can ignore implementing distinct_counts due to this: https://lists.apache.org/thread/j52tsojv0x4bopxyzsp7m7bqt23n5fnd .lower_bounds(lower_bounds) .upper_bounds(upper_bounds) - .split_offsets( + .split_offsets(Some( metadata .row_groups() .iter() .filter_map(|group| group.file_offset()) .collect(), - ); + )); Ok(builder) } From bdb44ae58a45ec12a34af28037901900d6020e08 Mon Sep 17 00:00:00 2001 From: Landon Gingerich Date: Wed, 10 Dec 2025 04:21:38 -0600 Subject: [PATCH 25/58] feat(catalog): Implement update_table for SqlCatalog (#1911) ## Which issue does this PR close? - Closes the SQL catalog part of https://github.com/apache/iceberg-rust/issues/1389 ## What changes are included in this PR? - Implement `update_table()` for SQL catalog - Add corresponding `test_update_table` test ## Are these changes tested? Yes. Covered by new `test_update_table` test. --- crates/catalog/sql/src/catalog.rs | 107 ++++++++++++++++++++++++++++-- 1 file changed, 102 insertions(+), 5 deletions(-) diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs index 77b35a228f..8209cd04c1 100644 --- a/crates/catalog/sql/src/catalog.rs +++ b/crates/catalog/sql/src/catalog.rs @@ -917,11 +917,55 @@ impl Catalog for SqlCatalog { .build()?) } - async fn update_table(&self, _commit: TableCommit) -> Result { - Err(Error::new( - ErrorKind::FeatureUnsupported, - "Updating a table is not supported yet", - )) + /// Updates an existing table within the SQL catalog. + async fn update_table(&self, commit: TableCommit) -> Result
{ + let table_ident = commit.identifier().clone(); + let current_table = self.load_table(&table_ident).await?; + let current_metadata_location = current_table.metadata_location_result()?.to_string(); + + let staged_table = commit.apply(current_table)?; + let staged_metadata_location = staged_table.metadata_location_result()?; + + staged_table + .metadata() + .write_to(staged_table.file_io(), &staged_metadata_location) + .await?; + + let update_result = self + .execute( + &format!( + "UPDATE {CATALOG_TABLE_NAME} + SET {CATALOG_FIELD_METADATA_LOCATION_PROP} = ?, {CATALOG_FIELD_PREVIOUS_METADATA_LOCATION_PROP} = ? + WHERE {CATALOG_FIELD_CATALOG_NAME} = ? + AND {CATALOG_FIELD_TABLE_NAME} = ? + AND {CATALOG_FIELD_TABLE_NAMESPACE} = ? + AND ( + {CATALOG_FIELD_RECORD_TYPE} = '{CATALOG_FIELD_TABLE_RECORD_TYPE}' + OR {CATALOG_FIELD_RECORD_TYPE} IS NULL + ) + AND {CATALOG_FIELD_METADATA_LOCATION_PROP} = ?" + ), + vec![ + Some(staged_metadata_location), + Some(current_metadata_location.as_str()), + Some(&self.name), + Some(table_ident.name()), + Some(&table_ident.namespace().join(".")), + Some(current_metadata_location.as_str()), + ], + None, + ) + .await?; + + if update_result.rows_affected() == 0 { + return Err(Error::new( + ErrorKind::CatalogCommitConflicts, + format!("Commit conflicted for table: {table_ident}"), + ) + .with_retryable(true)); + } + + Ok(staged_table) } } @@ -932,6 +976,7 @@ mod tests { use iceberg::spec::{NestedField, PartitionSpec, PrimitiveType, Schema, SortOrder, Type}; use iceberg::table::Table; + use iceberg::transaction::{ApplyTransactionAction, Transaction}; use iceberg::{Catalog, CatalogBuilder, Namespace, NamespaceIdent, TableCreation, TableIdent}; use itertools::Itertools; use regex::Regex; @@ -2293,4 +2338,56 @@ mod tests { assert_eq!(table.identifier(), expected_table.identifier()); assert_eq!(table.metadata_location(), Some(metadata_location.as_str())); } + + #[tokio::test] + async fn test_update_table() { + let warehouse_loc = temp_path(); + let catalog = new_sql_catalog(warehouse_loc).await; + + // Create a test namespace and table + let namespace_ident = NamespaceIdent::new("ns1".into()); + create_namespace(&catalog, &namespace_ident).await; + let table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into()); + create_table(&catalog, &table_ident).await; + + let table = catalog.load_table(&table_ident).await.unwrap(); + + // Store the original metadata location for comparison + let original_metadata_location = table.metadata_location().unwrap().to_string(); + + // Create a transaction to update the table + let tx = Transaction::new(&table); + let tx = tx + .update_table_properties() + .set("test_property".to_string(), "test_value".to_string()) + .apply(tx) + .unwrap(); + + // Commit the transaction to the catalog + let updated_table = tx.commit(&catalog).await.unwrap(); + + // Verify the update was successful + assert_eq!( + updated_table.metadata().properties().get("test_property"), + Some(&"test_value".to_string()) + ); + // Verify the metadata location has been updated + assert_ne!( + updated_table.metadata_location().unwrap(), + original_metadata_location.as_str() + ); + + // Load the table again from the catalog to verify changes were persisted + let reloaded = catalog.load_table(&table_ident).await.unwrap(); + + // Verify the reloaded table matches the updated table + assert_eq!( + reloaded.metadata().properties().get("test_property"), + Some(&"test_value".to_string()) + ); + assert_eq!( + reloaded.metadata_location(), + updated_table.metadata_location() + ); + } } From 2ed0a6f509a5c6e7bdb143a74fa79c59a9df1bf2 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 10 Dec 2025 13:54:49 -0500 Subject: [PATCH 26/58] fix: Respect precision and scale for Decimal128 in value.rs (#1921) --- crates/iceberg/src/arrow/value.rs | 135 ++++++++++++++++++++++++++---- 1 file changed, 119 insertions(+), 16 deletions(-) diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index bc123d99e8..190aba08e8 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -663,18 +663,44 @@ pub(crate) fn create_primitive_array_single_element( (DataType::Binary, None) => Ok(Arc::new(BinaryArray::from_opt_vec(vec![ Option::<&[u8]>::None, ]))), - (DataType::Decimal128(_, _), Some(PrimitiveLiteral::Int128(v))) => { - Ok(Arc::new(arrow_array::Decimal128Array::from(vec![{ *v }]))) + (DataType::Decimal128(precision, scale), Some(PrimitiveLiteral::Int128(v))) => { + let array = Decimal128Array::from(vec![{ *v }]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?; + Ok(Arc::new(array)) } - (DataType::Decimal128(_, _), Some(PrimitiveLiteral::UInt128(v))) => { - Ok(Arc::new(arrow_array::Decimal128Array::from(vec![ - *v as i128, - ]))) + (DataType::Decimal128(precision, scale), Some(PrimitiveLiteral::UInt128(v))) => { + let array = Decimal128Array::from(vec![*v as i128]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?; + Ok(Arc::new(array)) } - (DataType::Decimal128(_, _), None) => { - Ok(Arc::new(arrow_array::Decimal128Array::from(vec![ - Option::::None, - ]))) + (DataType::Decimal128(precision, scale), None) => { + let array = Decimal128Array::from(vec![Option::::None]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?; + Ok(Arc::new(array)) } (DataType::Struct(fields), None) => { // Create a single-element StructArray with nulls @@ -795,15 +821,48 @@ pub(crate) fn create_primitive_array_repeated( let vals: Vec> = vec![None; num_rows]; Arc::new(BinaryArray::from_opt_vec(vals)) } - (DataType::Decimal128(_, _), Some(PrimitiveLiteral::Int128(value))) => { - Arc::new(Decimal128Array::from(vec![*value; num_rows])) + (DataType::Decimal128(precision, scale), Some(PrimitiveLiteral::Int128(value))) => { + Arc::new( + Decimal128Array::from(vec![*value; num_rows]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?, + ) } - (DataType::Decimal128(_, _), Some(PrimitiveLiteral::UInt128(value))) => { - Arc::new(Decimal128Array::from(vec![*value as i128; num_rows])) + (DataType::Decimal128(precision, scale), Some(PrimitiveLiteral::UInt128(value))) => { + Arc::new( + Decimal128Array::from(vec![*value as i128; num_rows]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?, + ) } - (DataType::Decimal128(_, _), None) => { + (DataType::Decimal128(precision, scale), None) => { let vals: Vec> = vec![None; num_rows]; - Arc::new(Decimal128Array::from(vals)) + Arc::new( + Decimal128Array::from(vals) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?, + ) } (DataType::Struct(fields), None) => { // Create a StructArray filled with nulls @@ -1678,4 +1737,48 @@ mod test { ]))), ]); } + + #[test] + fn test_create_decimal_array_respects_precision() { + // Decimal128Array::from() uses Arrow's default precision (38) instead of the + // target precision, causing RecordBatch construction to fail when schemas don't match. + let target_precision = 18u8; + let target_scale = 10i8; + let target_type = DataType::Decimal128(target_precision, target_scale); + let value = PrimitiveLiteral::Int128(10000000000); + + let array = create_primitive_array_single_element(&target_type, &Some(value)) + .expect("Failed to create decimal array"); + + match array.data_type() { + DataType::Decimal128(precision, scale) => { + assert_eq!(*precision, target_precision); + assert_eq!(*scale, target_scale); + } + other => panic!("Expected Decimal128, got {other:?}"), + } + } + + #[test] + fn test_create_decimal_array_repeated_respects_precision() { + // Ensure repeated arrays also respect target precision, not Arrow's default. + let target_precision = 18u8; + let target_scale = 10i8; + let target_type = DataType::Decimal128(target_precision, target_scale); + let value = PrimitiveLiteral::Int128(10000000000); + let num_rows = 5; + + let array = create_primitive_array_repeated(&target_type, &Some(value), num_rows) + .expect("Failed to create repeated decimal array"); + + match array.data_type() { + DataType::Decimal128(precision, scale) => { + assert_eq!(*precision, target_precision); + assert_eq!(*scale, target_scale); + } + other => panic!("Expected Decimal128, got {other:?}"), + } + + assert_eq!(array.len(), num_rows); + } } From 58bdb9fd94c9c5d847abffbdcb36e2273bb8bf14 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Thu, 11 Dec 2025 05:30:07 -0500 Subject: [PATCH 27/58] fix: restore no-op logic in constants_map for NULL identity-partitioned columns (#1922) ## Which issue does this PR close? See https://github.com/apache/iceberg-rust/pull/1824#discussion_r2584486989 and https://github.com/apache/iceberg-rust/issues/1914#issuecomment-3634315005 ## What changes are included in this PR? This restores the behavior in `record_batch_transformer.rs`'s `constants_map` function to pre-#1824 behavior where `NULL`s are not inserted into the constants map, and instead are just skipped. This allows the column projection rules for missing partition values to default to `NULL`. ## Are these changes tested? New test, and running the entire Iceberg Java suite via DataFusion Comet in https://github.com/apache/datafusion-comet/pull/2729. --- .../src/arrow/record_batch_transformer.rs | 81 +++++++++++++++++-- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/crates/iceberg/src/arrow/record_batch_transformer.rs b/crates/iceberg/src/arrow/record_batch_transformer.rs index c4782464c1..439358435c 100644 --- a/crates/iceberg/src/arrow/record_batch_transformer.rs +++ b/crates/iceberg/src/arrow/record_batch_transformer.rs @@ -83,14 +83,10 @@ fn constants_map( // Handle both None (null) and Some(Literal::Primitive) cases match &partition_data[pos] { None => { - // TODO (https://github.com/apache/iceberg-rust/issues/1914): Add support for null datum values. - return Err(Error::new( - ErrorKind::Unexpected, - format!( - "Partition field {} has null value for identity transform", - field.source_id - ), - )); + // Skip null partition values - they will be resolved as null per Iceberg spec rule #4. + // When a partition value is null, we don't add it to the constants map, + // allowing downstream column resolution to handle it correctly. + continue; } Some(Literal::Primitive(value)) => { // Create a Datum from the primitive type and value @@ -1610,4 +1606,73 @@ mod test { assert_eq!(get_string_value(result.column(4).as_ref(), 0), ""); assert_eq!(get_string_value(result.column(4).as_ref(), 1), ""); } + + /// Test handling of null values in identity-partitioned columns. + /// + /// Reproduces TestPartitionValues.testNullPartitionValue() from iceberg-java, which + /// writes records where the partition column has null values. Before the fix in #1922, + /// this would error with "Partition field X has null value for identity transform". + #[test] + fn null_identity_partition_value() { + use crate::spec::{Struct, Transform}; + + let schema = Arc::new( + Schema::builder() + .with_schema_id(0) + .with_fields(vec![ + NestedField::optional(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::optional(2, "data", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build() + .unwrap(), + ); + + let partition_spec = Arc::new( + crate::spec::PartitionSpec::builder(schema.clone()) + .with_spec_id(0) + .add_partition_field("data", "data", Transform::Identity) + .unwrap() + .build() + .unwrap(), + ); + + // Partition has null value for the data column + let partition_data = Struct::from_iter(vec![None]); + + let file_schema = Arc::new(ArrowSchema::new(vec![simple_field( + "id", + DataType::Int32, + true, + "1", + )])); + + let projected_field_ids = [1, 2]; + + let mut transformer = RecordBatchTransformerBuilder::new(schema, &projected_field_ids) + .with_partition(partition_spec, partition_data) + .expect("Should handle null partition values") + .build(); + + let file_batch = + RecordBatch::try_new(file_schema, vec![Arc::new(Int32Array::from(vec![1, 2, 3]))]) + .unwrap(); + + let result = transformer.process_record_batch(file_batch).unwrap(); + + assert_eq!(result.num_columns(), 2); + assert_eq!(result.num_rows(), 3); + + let id_col = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(id_col.values(), &[1, 2, 3]); + + // Partition column with null value should produce nulls + let data_col = result.column(1); + assert!(data_col.is_null(0)); + assert!(data_col.is_null(1)); + assert!(data_col.is_null(2)); + } } From 16906c127d521395a789a9019350e467cc34d063 Mon Sep 17 00:00:00 2001 From: Lo <980321+dojiong@users.noreply.github.com> Date: Thu, 11 Dec 2025 18:32:34 +0800 Subject: [PATCH 28/58] fix: stack overflow when loading large equality deletes (#1915) ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? A stack overflow occurs when processing data files containing a large number of equality deletes (e.g., > 6000 rows). This happens because parse_equality_deletes_record_batch_stream previously constructed the final predicate by linearly calling .and() in a loop: ```rust result_predicate = result_predicate.and(row_predicate.not()); ``` This resulted in a deeply nested, left-skewed tree structure with a depth equal to the number of rows (N). When rewrite_not() (which uses a recursive visitor pattern) was subsequently called on this structure, or when the structure was dropped, the call stack limit was exceeded. Changes 1. Balanced Tree Construction: Refactored the predicate combination logic. Instead of linear accumulation, row predicates are collected and combined using a pairwise combination approach to build a balanced tree. This reduces the tree depth from O(N) to O(log N). 2. Early Rewrite: rewrite_not() is now called immediately on each individual row predicate before they are combined. This ensures we are combining simplified predicates and avoids traversing a massive unoptimized tree later. 3. Regression Test: Added test_large_equality_delete_batch_stack_overflow, which processes 20,000 equality delete rows to verify the fix. ## Are these changes tested? - [x] New regression test test_large_equality_delete_batch_stack_overflow passed. - [x] All existing tests in arrow::caching_delete_file_loader passed. Co-authored-by: Renjie Liu --- .../src/arrow/caching_delete_file_loader.rs | 72 ++++++++++++++++++- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/crates/iceberg/src/arrow/caching_delete_file_loader.rs b/crates/iceberg/src/arrow/caching_delete_file_loader.rs index 192ca390a8..250fc5e8d9 100644 --- a/crates/iceberg/src/arrow/caching_delete_file_loader.rs +++ b/crates/iceberg/src/arrow/caching_delete_file_loader.rs @@ -330,7 +330,7 @@ impl CachingDeleteFileLoader { mut stream: ArrowRecordBatchStream, equality_ids: HashSet, ) -> Result { - let mut result_predicate = AlwaysTrue; + let mut row_predicates = Vec::new(); let mut batch_schema_iceberg: Option = None; let accessor = EqDelRecordBatchPartnerAccessor; @@ -374,10 +374,29 @@ impl CachingDeleteFileLoader { row_predicate = row_predicate.and(cell_predicate) } } - result_predicate = result_predicate.and(row_predicate.not()); + row_predicates.push(row_predicate.not().rewrite_not()); } } - Ok(result_predicate.rewrite_not()) + + // All row predicates are combined to a single predicate by creating a balanced binary tree. + // Using a simple fold would result in a deeply nested predicate that can cause a stack overflow. + while row_predicates.len() > 1 { + let mut next_level = Vec::with_capacity(row_predicates.len().div_ceil(2)); + let mut iter = row_predicates.into_iter(); + while let Some(p1) = iter.next() { + if let Some(p2) = iter.next() { + next_level.push(p1.and(p2)); + } else { + next_level.push(p1); + } + } + row_predicates = next_level; + } + + match row_predicates.pop() { + Some(p) => Ok(p), + None => Ok(AlwaysTrue), + } } } @@ -912,4 +931,51 @@ mod tests { result.err() ); } + + #[tokio::test] + async fn test_large_equality_delete_batch_stack_overflow() { + let tmp_dir = TempDir::new().unwrap(); + let table_location = tmp_dir.path().as_os_str().to_str().unwrap(); + let file_io = FileIO::from_path(table_location).unwrap().build().unwrap(); + + // Create a large batch of equality deletes + let num_rows = 20_000; + let col_y_vals: Vec = (0..num_rows).collect(); + let col_y = Arc::new(Int64Array::from(col_y_vals)) as ArrayRef; + + let schema = Arc::new(arrow_schema::Schema::new(vec![ + Field::new("y", arrow_schema::DataType::Int64, false).with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "2".to_string(), + )])), + ])); + + let record_batch = RecordBatch::try_new(schema.clone(), vec![col_y]).unwrap(); + + // Write to file + let path = format!("{}/large-eq-deletes.parquet", &table_location); + let file = File::create(&path).unwrap(); + let props = WriterProperties::builder() + .set_compression(Compression::SNAPPY) + .build(); + let mut writer = ArrowWriter::try_new(file, schema, Some(props)).unwrap(); + writer.write(&record_batch).unwrap(); + writer.close().unwrap(); + + let basic_delete_file_loader = BasicDeleteFileLoader::new(file_io.clone()); + let record_batch_stream = basic_delete_file_loader + .parquet_to_batch_stream(&path) + .await + .expect("could not get batch stream"); + + let eq_ids = HashSet::from_iter(vec![2]); + + let result = CachingDeleteFileLoader::parse_equality_deletes_record_batch_stream( + record_batch_stream, + eq_ids, + ) + .await; + + assert!(result.is_ok()); + } } From c25619df3f8f24f729d75d635b53efbead48a3ad Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Dec 2025 09:53:21 +0800 Subject: [PATCH 29/58] chore(deps): Bump actions/upload-artifact from 5 to 6 (#1931) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 5 to 6.
Release notes

Sourced from actions/upload-artifact's releases.

v6.0.0

v6 - What's new

[!IMPORTANT] actions/upload-artifact@v6 now runs on Node.js 24 (runs.using: node24) and requires a minimum Actions Runner version of 2.327.1. If you are using self-hosted runners, ensure they are updated before upgrading.

Node.js 24

This release updates the runtime to Node.js 24. v5 had preliminary support for Node.js 24, however this action was by default still running on Node.js 20. Now this action by default will run on Node.js 24.

What's Changed

Full Changelog: https://github.com/actions/upload-artifact/compare/v5.0.0...v6.0.0

Commits
  • b7c566a Merge pull request #745 from actions/upload-artifact-v6-release
  • e516bc8 docs: correct description of Node.js 24 support in README
  • ddc45ed docs: update README to correct action name for Node.js 24 support
  • 615b319 chore: release v6.0.0 for Node.js 24 support
  • 017748b Merge pull request #744 from actions/fix-storage-blob
  • 38d4c79 chore: rebuild dist
  • 7d27270 chore: add missing license cache files for @​actions/core, @​actions/io, and mi...
  • 5f643d3 chore: update license files for @​actions/artifact@​5.0.1 dependencies
  • 1df1684 chore: update package-lock.json with @​actions/artifact@​5.0.1
  • b5b1a91 fix: update @​actions/artifact to ^5.0.0 for Node.js 24 punycode fix
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/upload-artifact&package-manager=github_actions&previous-version=5&new-version=6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release_python.yml | 4 ++-- .github/workflows/release_python_nightly.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/release_python.yml b/.github/workflows/release_python.yml index 9646085c7e..48f5a4d47a 100644 --- a/.github/workflows/release_python.yml +++ b/.github/workflows/release_python.yml @@ -107,7 +107,7 @@ jobs: command: sdist args: -o dist - name: Upload sdist - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v6 with: name: wheels-sdist path: bindings/python/dist @@ -159,7 +159,7 @@ jobs: command: build args: --release -o dist - name: Upload wheels - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v6 with: name: wheels-${{ matrix.os }}-${{ matrix.target }} path: bindings/python/dist diff --git a/.github/workflows/release_python_nightly.yml b/.github/workflows/release_python_nightly.yml index 1d6faaeee9..cda2fd6b2c 100644 --- a/.github/workflows/release_python_nightly.yml +++ b/.github/workflows/release_python_nightly.yml @@ -56,7 +56,7 @@ jobs: args: -o dist - name: Upload sdist - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v6 with: name: wheels-sdist path: bindings/python/dist @@ -102,7 +102,7 @@ jobs: args: --release -o dist - name: Upload wheels - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v6 with: name: wheels-${{ matrix.os }}-${{ matrix.target }} path: bindings/python/dist From 939617116aeeccd77b8705e5d0cfbb356491d4ea Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Dec 2025 10:09:25 +0800 Subject: [PATCH 30/58] chore(deps): Bump actions/download-artifact from 6 to 7 (#1932) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 6 to 7.
Release notes

Sourced from actions/download-artifact's releases.

v7.0.0

v7 - What's new

[!IMPORTANT] actions/download-artifact@v7 now runs on Node.js 24 (runs.using: node24) and requires a minimum Actions Runner version of 2.327.1. If you are using self-hosted runners, ensure they are updated before upgrading.

Node.js 24

This release updates the runtime to Node.js 24. v6 had preliminary support for Node 24, however this action was by default still running on Node.js 20. Now this action by default will run on Node.js 24.

What's Changed

New Contributors

Full Changelog: https://github.com/actions/download-artifact/compare/v6.0.0...v7.0.0

Commits
  • 37930b1 Merge pull request #452 from actions/download-artifact-v7-release
  • 72582b9 doc: update readme
  • 0d2ec9d chore: release v7.0.0 for Node.js 24 support
  • fd7ae8f Merge pull request #451 from actions/fix-storage-blob
  • d484700 chore: restore minimatch.dep.yml license file
  • 03a8080 chore: remove obsolete dependency license files
  • 56fe6d9 chore: update @​actions/artifact license file to 5.0.1
  • 8e3ebc4 chore: update package-lock.json with @​actions/artifact@​5.0.1
  • 1e3c4b4 fix: update @​actions/artifact to ^5.0.0 for Node.js 24 punycode fix
  • 458627d chore: use local @​actions/artifact package for Node.js 24 testing
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/download-artifact&package-manager=github_actions&previous-version=6&new-version=7)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release_python.yml | 2 +- .github/workflows/release_python_nightly.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release_python.yml b/.github/workflows/release_python.yml index 48f5a4d47a..85663fc75f 100644 --- a/.github/workflows/release_python.yml +++ b/.github/workflows/release_python.yml @@ -178,7 +178,7 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v6 + uses: actions/download-artifact@v7 with: pattern: wheels-* merge-multiple: true diff --git a/.github/workflows/release_python_nightly.yml b/.github/workflows/release_python_nightly.yml index cda2fd6b2c..833b8ee6a6 100644 --- a/.github/workflows/release_python_nightly.yml +++ b/.github/workflows/release_python_nightly.yml @@ -120,7 +120,7 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v6 + uses: actions/download-artifact@v7 with: pattern: wheels-* merge-multiple: true From 36aedc6fbccec5a25d649d1792923d140ed83cbd Mon Sep 17 00:00:00 2001 From: Landon Gingerich Date: Mon, 15 Dec 2025 04:16:15 -0600 Subject: [PATCH 31/58] Remove wildcard pattern in exhaustive enums (#1925) ## Which issue does this PR close? - Closes https://github.com/apache/iceberg-rust/issues/1924. ## What changes are included in this PR? - For match expression on `PrimitiveType`, removes wildcard and explicitly handles all enum variants. - Removes support of `PrimititveType::TimestampzNs` for HMS and Glue. - Hive/HMS does not support timezone aware timestamps, per [Hive docs](https://cwiki.apache.org/confluence/display/hive/languagemanual+types#LanguageManualTypes-TimestampstimestampTimestamps). - Glue uses the Hive type system so it should also be disabled for Glue. ## Are these changes tested? Tested with existing test suite --- crates/catalog/glue/src/schema.rs | 13 ++++++------- crates/catalog/hms/src/schema.rs | 13 ++++++------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/crates/catalog/glue/src/schema.rs b/crates/catalog/glue/src/schema.rs index cfd7487973..864320dae4 100644 --- a/crates/catalog/glue/src/schema.rs +++ b/crates/catalog/glue/src/schema.rs @@ -165,7 +165,12 @@ impl SchemaVisitor for GlueSchemaBuilder { PrimitiveType::Date => "date".to_string(), PrimitiveType::Timestamp => "timestamp".to_string(), PrimitiveType::TimestampNs => "timestamp_ns".to_string(), - PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(), + PrimitiveType::Timestamptz | PrimitiveType::TimestamptzNs => { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + format!("Conversion from {p:?} is not supported"), + )); + } PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid => { "string".to_string() } @@ -173,12 +178,6 @@ impl SchemaVisitor for GlueSchemaBuilder { PrimitiveType::Decimal { precision, scale } => { format!("decimal({precision},{scale})") } - _ => { - return Err(Error::new( - ErrorKind::FeatureUnsupported, - "Conversion from 'Timestamptz' is not supported", - )); - } }; Ok(glue_type) diff --git a/crates/catalog/hms/src/schema.rs b/crates/catalog/hms/src/schema.rs index 8893a80521..c23b48719d 100644 --- a/crates/catalog/hms/src/schema.rs +++ b/crates/catalog/hms/src/schema.rs @@ -122,7 +122,12 @@ impl SchemaVisitor for HiveSchemaBuilder { PrimitiveType::Date => "date".to_string(), PrimitiveType::Timestamp => "timestamp".to_string(), PrimitiveType::TimestampNs => "timestamp_ns".to_string(), - PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(), + PrimitiveType::Timestamptz | PrimitiveType::TimestamptzNs => { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + format!("Conversion from {p:?} is not supported"), + )); + } PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid => { "string".to_string() } @@ -130,12 +135,6 @@ impl SchemaVisitor for HiveSchemaBuilder { PrimitiveType::Decimal { precision, scale } => { format!("decimal({precision},{scale})") } - _ => { - return Err(Error::new( - ErrorKind::FeatureUnsupported, - "Conversion from 'Timestamptz' is not supported", - )); - } }; Ok(hive_type) From 26a7606e0f04d5175e452a809c7bdceb47ba2c07 Mon Sep 17 00:00:00 2001 From: Lo <980321+dojiong@users.noreply.github.com> Date: Mon, 15 Dec 2025 18:30:10 +0800 Subject: [PATCH 32/58] fix: prioritize delete manifests to prevent scan deadlock (#1937) ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? This change ensures that delete manifests are processed before data manifests during the table scan planning phase. Previously, if data manifests were processed first and produced enough entries to fill the channel, the producer would block. Since the delete manifest consumer might still be waiting for its entries (which hadn't been produced yet), this could lead to a deadlock. Prioritizing delete manifests ensures the delete consumer can proceed, allowing the data consumer to eventually drain the channel. ## Are these changes tested? Added a reproduction test case `test_scan_deadlock` to verify the fix. --- crates/iceberg/src/scan/context.rs | 12 ++- crates/iceberg/src/scan/mod.rs | 125 +++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 1 deletion(-) diff --git a/crates/iceberg/src/scan/context.rs b/crates/iceberg/src/scan/context.rs index fe3f5c8f7e..f28b6b0901 100644 --- a/crates/iceberg/src/scan/context.rs +++ b/crates/iceberg/src/scan/context.rs @@ -194,7 +194,17 @@ impl PlanContext { delete_file_idx: DeleteFileIndex, delete_file_tx: Sender, ) -> Result> + 'static>> { - let manifest_files = manifest_list.entries().iter(); + let mut manifest_files = manifest_list.entries().iter().collect::>(); + // Sort manifest files to process delete manifests first. + // This avoids a deadlock where the producer blocks on sending data manifest entries + // (because the data channel is full) while the delete manifest consumer is waiting + // for delete manifest entries (which haven't been produced yet). + // By processing delete manifests first, we ensure the delete consumer can finish, + // which then allows the data consumer to start draining the data channel. + manifest_files.sort_by_key(|m| match m.content { + ManifestContentType::Deletes => 0, + ManifestContentType::Data => 1, + }); // TODO: Ideally we could ditch this intermediate Vec as we return an iterator. let mut filtered_mfcs = vec![]; diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs index d83da8a879..1f7fa50df8 100644 --- a/crates/iceberg/src/scan/mod.rs +++ b/crates/iceberg/src/scan/mod.rs @@ -1170,6 +1170,97 @@ pub mod tests { writer.close().unwrap(); } } + + pub async fn setup_deadlock_manifests(&mut self) { + let current_snapshot = self.table.metadata().current_snapshot().unwrap(); + let _parent_snapshot = current_snapshot + .parent_snapshot(self.table.metadata()) + .unwrap(); + let current_schema = current_snapshot.schema(self.table.metadata()).unwrap(); + let current_partition_spec = self.table.metadata().default_partition_spec(); + + // 1. Write DATA manifest with MULTIPLE entries to fill buffer + let mut writer = ManifestWriterBuilder::new( + self.next_manifest_file(), + Some(current_snapshot.snapshot_id()), + None, + current_schema.clone(), + current_partition_spec.as_ref().clone(), + ) + .build_v2_data(); + + // Add 10 data entries + for i in 0..10 { + writer + .add_entry( + ManifestEntry::builder() + .status(ManifestStatus::Added) + .data_file( + DataFileBuilder::default() + .partition_spec_id(0) + .content(DataContentType::Data) + .file_path(format!("{}/{}.parquet", &self.table_location, i)) + .file_format(DataFileFormat::Parquet) + .file_size_in_bytes(100) + .record_count(1) + .partition(Struct::from_iter([Some(Literal::long(100))])) + .key_metadata(None) + .build() + .unwrap(), + ) + .build(), + ) + .unwrap(); + } + let data_manifest = writer.write_manifest_file().await.unwrap(); + + // 2. Write DELETE manifest + let mut writer = ManifestWriterBuilder::new( + self.next_manifest_file(), + Some(current_snapshot.snapshot_id()), + None, + current_schema.clone(), + current_partition_spec.as_ref().clone(), + ) + .build_v2_deletes(); + + writer + .add_entry( + ManifestEntry::builder() + .status(ManifestStatus::Added) + .data_file( + DataFileBuilder::default() + .partition_spec_id(0) + .content(DataContentType::PositionDeletes) + .file_path(format!("{}/del.parquet", &self.table_location)) + .file_format(DataFileFormat::Parquet) + .file_size_in_bytes(100) + .record_count(1) + .partition(Struct::from_iter([Some(Literal::long(100))])) + .build() + .unwrap(), + ) + .build(), + ) + .unwrap(); + let delete_manifest = writer.write_manifest_file().await.unwrap(); + + // Write to manifest list - DATA FIRST then DELETE + // This order is crucial for reproduction + let mut manifest_list_write = ManifestListWriter::v2( + self.table + .file_io() + .new_output(current_snapshot.manifest_list()) + .unwrap(), + current_snapshot.snapshot_id(), + current_snapshot.parent_snapshot_id(), + current_snapshot.sequence_number(), + ); + manifest_list_write + .add_manifests(vec![data_manifest, delete_manifest].into_iter()) + .unwrap(); + manifest_list_write.close().await.unwrap(); + } } #[test] @@ -2127,4 +2218,38 @@ pub mod tests { "_file column (duplicate) should use RunEndEncoded type" ); } + + #[tokio::test] + async fn test_scan_deadlock() { + let mut fixture = TableTestFixture::new(); + fixture.setup_deadlock_manifests().await; + + // Create table scan with concurrency limit 1 + // This sets channel size to 1. + // Data manifest has 10 entries -> will block producer. + // Delete manifest is 2nd in list -> won't be processed. + // Consumer 2 (Data) not started -> blocked. + // Consumer 1 (Delete) waiting -> blocked. + let table_scan = fixture + .table + .scan() + .with_concurrency_limit(1) + .build() + .unwrap(); + + // This should timeout/hang if deadlock exists + // We can use tokio::time::timeout + let result = tokio::time::timeout(std::time::Duration::from_secs(5), async { + table_scan + .plan_files() + .await + .unwrap() + .try_collect::>() + .await + }) + .await; + + // Assert it finished (didn't timeout) + assert!(result.is_ok(), "Scan timed out - deadlock detected"); + } } From b047baa476a46dd83e8d5fcd2b586a8b95f46d92 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 15 Dec 2025 12:01:56 +0100 Subject: [PATCH 33/58] feat: Include statistics for Reserved Fields (#1849) This is a behavioral change. In Iceberg-Rust we require upper/lower bounds to be part of the schema. But in some cases, this isn't the case, for example when you use reserved fields. In PyIceberg we expect these values in some tests: ``` FAILED tests/integration/test_inspect_table.py::test_inspect_files[2] - AssertionError: Difference in column lower_bounds: {} != {2147483546: b's3://warehouse/default/table_metadata_files/data/00000-0-8d621c18-079b-4217-afd8-559ce216e875.parquet', 2147483545: b'\x00\x00\x00\x00\x00\x00\x00\x00'} assert {} == {2147483545: ...e875.parquet'} Right contains 2 more items: {2147483545: b'\x00\x00\x00\x00\x00\x00\x00\x00', 2147483546: b's3://warehouse/default/table_metadata_files/data/00000-0-8d621c1' b'8-079b-4217-afd8-559ce216e875.parquet'} Full diff: { + , - 2147483545: b'\x00\x00\x00\x00\x00\x00\x00\x00', - 2147483546: b's3://warehouse/default/table_metadata_files/data/00000-0-8d621c1' - b'8-079b-4217-afd8-559ce216e875.parquet', } !!!!!!!!!!!!!!!!!!!!!!!!!! stopping after 1 failures !!!!!!!!!!!!!!!!!!!!!!!!!!! ==== 1 failed, 238 passed, 32 skipped, 3123 deselected in 61.56s (0:01:01) ===== ``` This is a positional delete where the field-IDs are constant, but never part of a schema (they are reserved). ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? ## Are these changes tested? --------- Co-authored-by: Kevin Liu Co-authored-by: Renjie Liu --- crates/iceberg/src/metadata_columns.rs | 460 ++++++++++++++++++++- crates/iceberg/src/spec/manifest/_serde.rs | 42 +- 2 files changed, 492 insertions(+), 10 deletions(-) diff --git a/crates/iceberg/src/metadata_columns.rs b/crates/iceberg/src/metadata_columns.rs index b388e40c4a..b622a76edc 100644 --- a/crates/iceberg/src/metadata_columns.rs +++ b/crates/iceberg/src/metadata_columns.rs @@ -32,11 +32,74 @@ use crate::{Error, ErrorKind, Result}; /// Reserved field ID for the file path (_file) column per Iceberg spec pub const RESERVED_FIELD_ID_FILE: i32 = i32::MAX - 1; +/// Reserved field ID for the position (_pos) column per Iceberg spec +pub const RESERVED_FIELD_ID_POS: i32 = i32::MAX - 2; + +/// Reserved field ID for the deleted (_deleted) column per Iceberg spec +pub const RESERVED_FIELD_ID_DELETED: i32 = i32::MAX - 3; + +/// Reserved field ID for the spec ID (_spec_id) column per Iceberg spec +pub const RESERVED_FIELD_ID_SPEC_ID: i32 = i32::MAX - 4; + +/// Reserved field ID for the partition (_partition) column per Iceberg spec +pub const RESERVED_FIELD_ID_PARTITION: i32 = i32::MAX - 5; + +/// Reserved field ID for the file path in position delete files +pub const RESERVED_FIELD_ID_DELETE_FILE_PATH: i32 = i32::MAX - 101; + +/// Reserved field ID for the position in position delete files +pub const RESERVED_FIELD_ID_DELETE_FILE_POS: i32 = i32::MAX - 102; + +/// Reserved field ID for the change type (_change_type) column per Iceberg spec +pub const RESERVED_FIELD_ID_CHANGE_TYPE: i32 = i32::MAX - 104; + +/// Reserved field ID for the change ordinal (_change_ordinal) column per Iceberg spec +pub const RESERVED_FIELD_ID_CHANGE_ORDINAL: i32 = i32::MAX - 105; + +/// Reserved field ID for the commit snapshot ID (_commit_snapshot_id) column per Iceberg spec +pub const RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID: i32 = i32::MAX - 106; + +/// Reserved field ID for the row ID (_row_id) column per Iceberg spec +pub const RESERVED_FIELD_ID_ROW_ID: i32 = i32::MAX - 107; + +/// Reserved field ID for the last updated sequence number (_last_updated_sequence_number) column per Iceberg spec +pub const RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER: i32 = i32::MAX - 108; + /// Reserved column name for the file path metadata column pub const RESERVED_COL_NAME_FILE: &str = "_file"; -/// Documentation for the _file metadata column -pub const RESERVED_COL_DOC_FILE: &str = "Path of the file in which a row is stored"; +/// Reserved column name for the position metadata column +pub const RESERVED_COL_NAME_POS: &str = "_pos"; + +/// Reserved column name for the deleted metadata column +pub const RESERVED_COL_NAME_DELETED: &str = "_deleted"; + +/// Reserved column name for the spec ID metadata column +pub const RESERVED_COL_NAME_SPEC_ID: &str = "_spec_id"; + +/// Reserved column name for the partition metadata column +pub const RESERVED_COL_NAME_PARTITION: &str = "_partition"; + +/// Reserved column name for the file path in position delete files +pub const RESERVED_COL_NAME_DELETE_FILE_PATH: &str = "file_path"; + +/// Reserved column name for the position in position delete files +pub const RESERVED_COL_NAME_DELETE_FILE_POS: &str = "pos"; + +/// Reserved column name for the change type metadata column +pub const RESERVED_COL_NAME_CHANGE_TYPE: &str = "_change_type"; + +/// Reserved column name for the change ordinal metadata column +pub const RESERVED_COL_NAME_CHANGE_ORDINAL: &str = "_change_ordinal"; + +/// Reserved column name for the commit snapshot ID metadata column +pub const RESERVED_COL_NAME_COMMIT_SNAPSHOT_ID: &str = "_commit_snapshot_id"; + +/// Reserved column name for the row ID metadata column +pub const RESERVED_COL_NAME_ROW_ID: &str = "_row_id"; + +/// Reserved column name for the last updated sequence number metadata column +pub const RESERVED_COL_NAME_LAST_UPDATED_SEQUENCE_NUMBER: &str = "_last_updated_sequence_number"; /// Lazy-initialized Iceberg field definition for the _file metadata column. /// This field represents the file path as a required string field. @@ -47,7 +110,139 @@ static FILE_FIELD: Lazy = Lazy::new(|| { RESERVED_COL_NAME_FILE, Type::Primitive(PrimitiveType::String), ) - .with_doc(RESERVED_COL_DOC_FILE), + .with_doc("Path of the file in which a row is stored"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _pos metadata column. +/// This field represents the ordinal position of a row in the source data file. +static POS_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_POS, + RESERVED_COL_NAME_POS, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("Ordinal position of a row in the source data file"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _deleted metadata column. +/// This field indicates whether a row has been deleted. +static DELETED_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_DELETED, + RESERVED_COL_NAME_DELETED, + Type::Primitive(PrimitiveType::Boolean), + ) + .with_doc("Whether the row has been deleted"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _spec_id metadata column. +/// This field represents the spec ID used to track the file containing a row. +static SPEC_ID_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_SPEC_ID, + RESERVED_COL_NAME_SPEC_ID, + Type::Primitive(PrimitiveType::Int), + ) + .with_doc("Spec ID used to track the file containing a row"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the file_path column in position delete files. +/// This field represents the path of a file in position-based delete files. +static DELETE_FILE_PATH_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_DELETE_FILE_PATH, + RESERVED_COL_NAME_DELETE_FILE_PATH, + Type::Primitive(PrimitiveType::String), + ) + .with_doc("Path of a file, used in position-based delete files"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the pos column in position delete files. +/// This field represents the ordinal position of a row in position-based delete files. +static DELETE_FILE_POS_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_DELETE_FILE_POS, + RESERVED_COL_NAME_DELETE_FILE_POS, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("Ordinal position of a row, used in position-based delete files"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _change_type metadata column. +/// This field represents the record type in the changelog. +static CHANGE_TYPE_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_CHANGE_TYPE, + RESERVED_COL_NAME_CHANGE_TYPE, + Type::Primitive(PrimitiveType::String), + ) + .with_doc( + "The record type in the changelog (INSERT, DELETE, UPDATE_BEFORE, or UPDATE_AFTER)", + ), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _change_ordinal metadata column. +/// This field represents the order of the change. +static CHANGE_ORDINAL_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_CHANGE_ORDINAL, + RESERVED_COL_NAME_CHANGE_ORDINAL, + Type::Primitive(PrimitiveType::Int), + ) + .with_doc("The order of the change"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _commit_snapshot_id metadata column. +/// This field represents the snapshot ID in which the change occurred. +static COMMIT_SNAPSHOT_ID_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID, + RESERVED_COL_NAME_COMMIT_SNAPSHOT_ID, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("The snapshot ID in which the change occurred"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _row_id metadata column. +/// This field represents a unique long assigned for row lineage. +static ROW_ID_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_ROW_ID, + RESERVED_COL_NAME_ROW_ID, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("A unique long assigned for row lineage"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _last_updated_sequence_number metadata column. +/// This field represents the sequence number which last updated this row. +static LAST_UPDATED_SEQUENCE_NUMBER_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER, + RESERVED_COL_NAME_LAST_UPDATED_SEQUENCE_NUMBER, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("The sequence number which last updated this row"), ) }); @@ -59,16 +254,159 @@ pub fn file_field() -> &'static NestedFieldRef { &FILE_FIELD } +/// Returns the Iceberg field definition for the _pos metadata column. +/// +/// # Returns +/// A reference to the _pos field definition as an Iceberg NestedField +pub fn pos_field() -> &'static NestedFieldRef { + &POS_FIELD +} + +/// Returns the Iceberg field definition for the _deleted metadata column. +/// +/// # Returns +/// A reference to the _deleted field definition as an Iceberg NestedField +pub fn deleted_field() -> &'static NestedFieldRef { + &DELETED_FIELD +} + +/// Returns the Iceberg field definition for the _spec_id metadata column. +/// +/// # Returns +/// A reference to the _spec_id field definition as an Iceberg NestedField +pub fn spec_id_field() -> &'static NestedFieldRef { + &SPEC_ID_FIELD +} + +/// Returns the Iceberg field definition for the file_path column in position delete files. +/// +/// # Returns +/// A reference to the file_path field definition as an Iceberg NestedField +pub fn delete_file_path_field() -> &'static NestedFieldRef { + &DELETE_FILE_PATH_FIELD +} + +/// Returns the Iceberg field definition for the pos column in position delete files. +/// +/// # Returns +/// A reference to the pos field definition as an Iceberg NestedField +pub fn delete_file_pos_field() -> &'static NestedFieldRef { + &DELETE_FILE_POS_FIELD +} + +/// Returns the Iceberg field definition for the _change_type metadata column. +/// +/// # Returns +/// A reference to the _change_type field definition as an Iceberg NestedField +pub fn change_type_field() -> &'static NestedFieldRef { + &CHANGE_TYPE_FIELD +} + +/// Returns the Iceberg field definition for the _change_ordinal metadata column. +/// +/// # Returns +/// A reference to the _change_ordinal field definition as an Iceberg NestedField +pub fn change_ordinal_field() -> &'static NestedFieldRef { + &CHANGE_ORDINAL_FIELD +} + +/// Returns the Iceberg field definition for the _commit_snapshot_id metadata column. +/// +/// # Returns +/// A reference to the _commit_snapshot_id field definition as an Iceberg NestedField +pub fn commit_snapshot_id_field() -> &'static NestedFieldRef { + &COMMIT_SNAPSHOT_ID_FIELD +} + +/// Returns the Iceberg field definition for the _row_id metadata column. +/// +/// # Returns +/// A reference to the _row_id field definition as an Iceberg NestedField +pub fn row_id_field() -> &'static NestedFieldRef { + &ROW_ID_FIELD +} + +/// Returns the Iceberg field definition for the _last_updated_sequence_number metadata column. +/// +/// # Returns +/// A reference to the _last_updated_sequence_number field definition as an Iceberg NestedField +pub fn last_updated_sequence_number_field() -> &'static NestedFieldRef { + &LAST_UPDATED_SEQUENCE_NUMBER_FIELD +} + +/// Creates the Iceberg field definition for the _partition metadata column. +/// +/// The _partition field is a struct whose fields depend on the partition spec. +/// This function creates the field dynamically with the provided partition fields. +/// +/// # Arguments +/// * `partition_fields` - The fields that make up the partition struct +/// +/// # Returns +/// A new _partition field definition as an Iceberg NestedField +/// +/// # Example +/// ``` +/// use std::sync::Arc; +/// +/// use iceberg::metadata_columns::partition_field; +/// use iceberg::spec::{NestedField, PrimitiveType, Type}; +/// +/// let fields = vec![ +/// Arc::new(NestedField::required( +/// 1, +/// "year", +/// Type::Primitive(PrimitiveType::Int), +/// )), +/// Arc::new(NestedField::required( +/// 2, +/// "month", +/// Type::Primitive(PrimitiveType::Int), +/// )), +/// ]; +/// let partition_field = partition_field(fields); +/// ``` +pub fn partition_field(partition_fields: Vec) -> NestedFieldRef { + use crate::spec::StructType; + + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_PARTITION, + RESERVED_COL_NAME_PARTITION, + Type::Struct(StructType::new(partition_fields)), + ) + .with_doc("Partition to which a row belongs"), + ) +} + /// Returns the Iceberg field definition for a metadata field ID. /// +/// Note: This function does not support `_partition` (field ID `i32::MAX - 5`) because +/// it's a struct field that requires dynamic partition fields. Use `partition_field()` +/// instead to create the `_partition` field with the appropriate partition fields. +/// /// # Arguments /// * `field_id` - The metadata field ID /// /// # Returns /// The Iceberg field definition for the metadata column, or an error if not a metadata field -pub fn get_metadata_field(field_id: i32) -> Result { +pub fn get_metadata_field(field_id: i32) -> Result<&'static NestedFieldRef> { match field_id { - RESERVED_FIELD_ID_FILE => Ok(Arc::clone(file_field())), + RESERVED_FIELD_ID_FILE => Ok(file_field()), + RESERVED_FIELD_ID_POS => Ok(pos_field()), + RESERVED_FIELD_ID_DELETED => Ok(deleted_field()), + RESERVED_FIELD_ID_SPEC_ID => Ok(spec_id_field()), + RESERVED_FIELD_ID_PARTITION => Err(Error::new( + ErrorKind::Unexpected, + "The _partition field must be created using partition_field() with appropriate partition fields", + )), + RESERVED_FIELD_ID_DELETE_FILE_PATH => Ok(delete_file_path_field()), + RESERVED_FIELD_ID_DELETE_FILE_POS => Ok(delete_file_pos_field()), + RESERVED_FIELD_ID_CHANGE_TYPE => Ok(change_type_field()), + RESERVED_FIELD_ID_CHANGE_ORDINAL => Ok(change_ordinal_field()), + RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID => Ok(commit_snapshot_id_field()), + RESERVED_FIELD_ID_ROW_ID => Ok(row_id_field()), + RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER => Ok(last_updated_sequence_number_field()), _ if is_metadata_field(field_id) => { // Future metadata fields can be added here Err(Error::new( @@ -95,6 +433,19 @@ pub fn get_metadata_field(field_id: i32) -> Result { pub fn get_metadata_field_id(column_name: &str) -> Result { match column_name { RESERVED_COL_NAME_FILE => Ok(RESERVED_FIELD_ID_FILE), + RESERVED_COL_NAME_POS => Ok(RESERVED_FIELD_ID_POS), + RESERVED_COL_NAME_DELETED => Ok(RESERVED_FIELD_ID_DELETED), + RESERVED_COL_NAME_SPEC_ID => Ok(RESERVED_FIELD_ID_SPEC_ID), + RESERVED_COL_NAME_PARTITION => Ok(RESERVED_FIELD_ID_PARTITION), + RESERVED_COL_NAME_DELETE_FILE_PATH => Ok(RESERVED_FIELD_ID_DELETE_FILE_PATH), + RESERVED_COL_NAME_DELETE_FILE_POS => Ok(RESERVED_FIELD_ID_DELETE_FILE_POS), + RESERVED_COL_NAME_CHANGE_TYPE => Ok(RESERVED_FIELD_ID_CHANGE_TYPE), + RESERVED_COL_NAME_CHANGE_ORDINAL => Ok(RESERVED_FIELD_ID_CHANGE_ORDINAL), + RESERVED_COL_NAME_COMMIT_SNAPSHOT_ID => Ok(RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID), + RESERVED_COL_NAME_ROW_ID => Ok(RESERVED_FIELD_ID_ROW_ID), + RESERVED_COL_NAME_LAST_UPDATED_SEQUENCE_NUMBER => { + Ok(RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER) + } _ => Err(Error::new( ErrorKind::Unexpected, format!("Unknown/unsupported metadata column name: {column_name}"), @@ -110,8 +461,21 @@ pub fn get_metadata_field_id(column_name: &str) -> Result { /// # Returns /// `true` if the field ID is a (currently supported) metadata field, `false` otherwise pub fn is_metadata_field(field_id: i32) -> bool { - field_id == RESERVED_FIELD_ID_FILE - // Additional metadata fields can be checked here in the future + matches!( + field_id, + RESERVED_FIELD_ID_FILE + | RESERVED_FIELD_ID_POS + | RESERVED_FIELD_ID_DELETED + | RESERVED_FIELD_ID_SPEC_ID + | RESERVED_FIELD_ID_PARTITION + | RESERVED_FIELD_ID_DELETE_FILE_PATH + | RESERVED_FIELD_ID_DELETE_FILE_POS + | RESERVED_FIELD_ID_CHANGE_TYPE + | RESERVED_FIELD_ID_CHANGE_ORDINAL + | RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID + | RESERVED_FIELD_ID_ROW_ID + | RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER + ) } /// Checks if a column name is a metadata column. @@ -124,3 +488,85 @@ pub fn is_metadata_field(field_id: i32) -> bool { pub fn is_metadata_column_name(column_name: &str) -> bool { get_metadata_field_id(column_name).is_ok() } + +#[cfg(test)] +mod tests { + use super::*; + use crate::spec::PrimitiveType; + + #[test] + fn test_partition_field_creation() { + // Create partition fields for a hypothetical year/month partition + let partition_fields = vec![ + Arc::new(NestedField::required( + 1000, + "year", + Type::Primitive(PrimitiveType::Int), + )), + Arc::new(NestedField::required( + 1001, + "month", + Type::Primitive(PrimitiveType::Int), + )), + ]; + + // Create the _partition metadata field + let partition = partition_field(partition_fields); + + // Verify field properties + assert_eq!(partition.id, RESERVED_FIELD_ID_PARTITION); + assert_eq!(partition.name, RESERVED_COL_NAME_PARTITION); + assert!(partition.required); + + // Verify it's a struct type with correct fields + if let Type::Struct(struct_type) = partition.field_type.as_ref() { + assert_eq!(struct_type.fields().len(), 2); + assert_eq!(struct_type.fields()[0].name, "year"); + assert_eq!(struct_type.fields()[1].name, "month"); + } else { + panic!("Expected struct type for _partition field"); + } + } + + #[test] + fn test_partition_field_id_recognized() { + assert!(is_metadata_field(RESERVED_FIELD_ID_PARTITION)); + } + + #[test] + fn test_partition_field_name_recognized() { + assert_eq!( + get_metadata_field_id(RESERVED_COL_NAME_PARTITION).unwrap(), + RESERVED_FIELD_ID_PARTITION + ); + } + + #[test] + fn test_get_metadata_field_returns_error_for_partition() { + // partition field requires dynamic creation, so get_metadata_field should return an error + let result = get_metadata_field(RESERVED_FIELD_ID_PARTITION); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("partition_field()") + ); + } + + #[test] + fn test_all_metadata_field_ids() { + // Test that all non-partition metadata fields can be retrieved + assert!(get_metadata_field(RESERVED_FIELD_ID_FILE).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_POS).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_DELETED).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_SPEC_ID).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_DELETE_FILE_PATH).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_DELETE_FILE_POS).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_CHANGE_TYPE).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_CHANGE_ORDINAL).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_ROW_ID).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER).is_ok()); + } +} diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index 07306be2b9..247b6dde5f 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -22,7 +22,7 @@ use serde_with::serde_as; use super::{Datum, ManifestEntry, Schema, Struct}; use crate::spec::{FormatVersion, Literal, RawLiteral, StructType, Type}; -use crate::{Error, ErrorKind}; +use crate::{Error, ErrorKind, metadata_columns}; #[derive(Serialize, Deserialize)] pub(super) struct ManifestEntryV2 { @@ -245,8 +245,12 @@ struct BytesEntry { fn parse_bytes_entry(v: Vec, schema: &Schema) -> Result, Error> { let mut m = HashMap::with_capacity(v.len()); for entry in v { - // We ignore the entry if the field is not found in the schema, due to schema evolution. - if let Some(field) = schema.field_by_id(entry.key) { + // First try to find the field in the schema, or check if it's a reserved metadata field + let field = schema + .field_by_id(entry.key) + .or_else(|| metadata_columns::get_metadata_field(entry.key).ok()); + + if let Some(field) = field { let data_type = field .field_type .as_primitive_type() @@ -259,6 +263,7 @@ fn parse_bytes_entry(v: Vec, schema: &Schema) -> Result Date: Tue, 16 Dec 2025 20:22:07 +0800 Subject: [PATCH 34/58] Add case-sensitive support for equality deletes in DeleteFilter (#1930) --- .../src/arrow/caching_delete_file_loader.rs | 1 + crates/iceberg/src/arrow/delete_filter.rs | 62 ++++++++++++++++++- crates/iceberg/src/arrow/reader.rs | 15 +++++ crates/iceberg/src/scan/context.rs | 5 ++ crates/iceberg/src/scan/mod.rs | 2 + crates/iceberg/src/scan/task.rs | 3 + 6 files changed, 85 insertions(+), 3 deletions(-) diff --git a/crates/iceberg/src/arrow/caching_delete_file_loader.rs b/crates/iceberg/src/arrow/caching_delete_file_loader.rs index 250fc5e8d9..aceeae49f7 100644 --- a/crates/iceberg/src/arrow/caching_delete_file_loader.rs +++ b/crates/iceberg/src/arrow/caching_delete_file_loader.rs @@ -911,6 +911,7 @@ mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; // Load the deletes - should handle both types without error diff --git a/crates/iceberg/src/arrow/delete_filter.rs b/crates/iceberg/src/arrow/delete_filter.rs index 14b5124ee6..d05e028997 100644 --- a/crates/iceberg/src/arrow/delete_filter.rs +++ b/crates/iceberg/src/arrow/delete_filter.rs @@ -141,8 +141,8 @@ impl DeleteFilter { return Ok(None); } - // TODO: handle case-insensitive case - let bound_predicate = combined_predicate.bind(file_scan_task.schema.clone(), false)?; + let bound_predicate = combined_predicate + .bind(file_scan_task.schema.clone(), file_scan_task.case_sensitive)?; Ok(Some(bound_predicate)) } @@ -211,8 +211,9 @@ pub(crate) mod tests { use super::*; use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader; + use crate::expr::Reference; use crate::io::FileIO; - use crate::spec::{DataFileFormat, Schema}; + use crate::spec::{DataFileFormat, Datum, NestedField, PrimitiveType, Schema, Type}; type ArrowSchemaRef = Arc; @@ -344,6 +345,7 @@ pub(crate) mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }, FileScanTask { start: 0, @@ -358,6 +360,7 @@ pub(crate) mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }, ]; @@ -380,4 +383,57 @@ pub(crate) mod tests { ]; Arc::new(arrow_schema::Schema::new(fields)) } + + #[tokio::test] + async fn test_build_equality_delete_predicate_case_sensitive() { + let schema = Arc::new( + Schema::builder() + .with_schema_id(1) + .with_fields(vec![ + NestedField::required(1, "Id", Type::Primitive(PrimitiveType::Long)).into(), + ]) + .build() + .unwrap(), + ); + + // ---------- fake FileScanTask ---------- + let task = FileScanTask { + start: 0, + length: 0, + record_count: None, + data_file_path: "data.parquet".to_string(), + data_file_format: crate::spec::DataFileFormat::Parquet, + schema: schema.clone(), + project_field_ids: vec![], + predicate: None, + deletes: vec![FileScanTaskDeleteFile { + file_path: "eq-del.parquet".to_string(), + file_type: DataContentType::EqualityDeletes, + partition_spec_id: 0, + equality_ids: None, + }], + partition: None, + partition_spec: None, + name_mapping: None, + case_sensitive: true, + }; + + let filter = DeleteFilter::default(); + + // ---------- insert equality delete predicate ---------- + let pred = Reference::new("id").equal_to(Datum::long(10)); + + let (tx, rx) = tokio::sync::oneshot::channel(); + filter.insert_equality_delete("eq-del.parquet", rx); + + tx.send(pred).unwrap(); + + // ---------- should FAIL ---------- + let result = filter.build_equality_delete_predicate(&task).await; + + assert!( + result.is_err(), + "case_sensitive=true should fail when column case mismatches" + ); + } } diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index 6209c1e261..f7f90663a5 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -2082,6 +2082,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -2403,6 +2404,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; // Task 2: read the second and third row groups @@ -2419,6 +2421,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; let tasks1 = Box::pin(futures::stream::iter(vec![Ok(task1)])) as FileScanTaskStream; @@ -2546,6 +2549,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -2717,6 +2721,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream; @@ -2934,6 +2939,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream; @@ -3144,6 +3150,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream; @@ -3247,6 +3254,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3344,6 +3352,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3430,6 +3439,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3530,6 +3540,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3659,6 +3670,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3755,6 +3767,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3864,6 +3877,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -4003,6 +4017,7 @@ message schema { partition: Some(partition_data), partition_spec: Some(partition_spec), name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; diff --git a/crates/iceberg/src/scan/context.rs b/crates/iceberg/src/scan/context.rs index f28b6b0901..169d8e6405 100644 --- a/crates/iceberg/src/scan/context.rs +++ b/crates/iceberg/src/scan/context.rs @@ -46,6 +46,7 @@ pub(crate) struct ManifestFileContext { snapshot_schema: SchemaRef, expression_evaluator_cache: Arc, delete_file_index: DeleteFileIndex, + case_sensitive: bool, } /// Wraps a [`ManifestEntryRef`] alongside the objects that are needed @@ -59,6 +60,7 @@ pub(crate) struct ManifestEntryContext { pub partition_spec_id: i32, pub snapshot_schema: SchemaRef, pub delete_file_index: DeleteFileIndex, + pub case_sensitive: bool, } impl ManifestFileContext { @@ -89,6 +91,7 @@ impl ManifestFileContext { bound_predicates: bound_predicates.clone(), snapshot_schema: snapshot_schema.clone(), delete_file_index: delete_file_index.clone(), + case_sensitive: self.case_sensitive, }; sender @@ -135,6 +138,7 @@ impl ManifestEntryContext { partition_spec: None, // TODO: Extract name_mapping from table metadata property "schema.name-mapping.default" name_mapping: None, + case_sensitive: self.case_sensitive, }) } } @@ -277,6 +281,7 @@ impl PlanContext { field_ids: self.field_ids.clone(), expression_evaluator_cache: self.expression_evaluator_cache.clone(), delete_file_index, + case_sensitive: self.case_sensitive, } } } diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs index 1f7fa50df8..c055c12c9a 100644 --- a/crates/iceberg/src/scan/mod.rs +++ b/crates/iceberg/src/scan/mod.rs @@ -1885,6 +1885,7 @@ pub mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; test_fn(task); @@ -1902,6 +1903,7 @@ pub mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; test_fn(task); } diff --git a/crates/iceberg/src/scan/task.rs b/crates/iceberg/src/scan/task.rs index e1ef241a57..5349a9bdd2 100644 --- a/crates/iceberg/src/scan/task.rs +++ b/crates/iceberg/src/scan/task.rs @@ -104,6 +104,9 @@ pub struct FileScanTask { #[serde(serialize_with = "serialize_not_implemented")] #[serde(deserialize_with = "deserialize_not_implemented")] pub name_mapping: Option>, + + /// Whether this scan task should treat column names as case-sensitive when binding predicates. + pub case_sensitive: bool, } impl FileScanTask { From d4c4bd4ad3375ef012a5e943498e588bf808d14a Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 17 Dec 2025 07:57:34 +0800 Subject: [PATCH 35/58] Bump to version 0.8.0 (#1938) ## Which issue does this PR close? - Part of https://github.com/apache/iceberg-rust/issues/1850 ## What changes are included in this PR? ## Are these changes tested? --- **Parts of this PR were drafted with assistance from Codex (with `gpt-5.2`) and fully reviewed and edited by me. I take full responsibility for all changes.** --- CHANGELOG.md | 144 +++ Cargo.lock | 28 +- Cargo.toml | 16 +- bindings/python/Cargo.lock | 265 +++-- bindings/python/Cargo.toml | 2 +- bindings/python/DEPENDENCIES.rust.tsv | 884 ++++++++------- crates/catalog/glue/DEPENDENCIES.rust.tsv | 357 +++--- crates/catalog/hms/DEPENDENCIES.rust.tsv | 323 +++--- crates/catalog/loader/DEPENDENCIES.rust.tsv | 401 +++---- crates/catalog/rest/DEPENDENCIES.rust.tsv | 302 +++-- crates/catalog/s3tables/DEPENDENCIES.rust.tsv | 357 +++--- crates/catalog/sql/DEPENDENCIES.rust.tsv | 303 +++-- crates/examples/DEPENDENCIES.rust.tsv | 306 +++-- crates/iceberg/DEPENDENCIES.rust.tsv | 299 +++-- .../integration_tests/DEPENDENCIES.rust.tsv | 820 +++++++------- .../cache-moka/DEPENDENCIES.rust.tsv | 301 +++-- .../datafusion/DEPENDENCIES.rust.tsv | 810 ++++++------- .../playground/DEPENDENCIES.rust.tsv | 1001 ++++++++--------- crates/sqllogictest/DEPENDENCIES.rust.tsv | 880 ++++++++------- crates/test_utils/DEPENDENCIES.rust.tsv | 36 +- deny.toml | 3 +- 21 files changed, 4024 insertions(+), 3814 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f66f64f478..50d4576dfb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,150 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/) and this project adheres to [Semantic Versioning](https://semver.org/). +## [v0.8.0] - 2025-12-16 + +### Breaking Changes + +* **API Changes:** + * refactor: Remove redundant parameters from SnapshotProducer validation methods by @Li0k in https://github.com/apache/iceberg-rust/pull/1853 + * chore: Remove deprecated `remove_all` in FileIO by @jonathanc-n in https://github.com/apache/iceberg-rust/pull/1863 + * refactor: Drop smol runtime support by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1900 + +* **Compatibility:** + * chore: bump MSRV to 1.88, fix warnings and clippy errors by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1902 + +* **Dependency Updates:** + * Upgrade opendal to v0.55 by @dentiny in https://github.com/apache/iceberg-rust/pull/1895 + * deps: bump DataFusion to 51, Arrow to 57, pyo to 0.26 by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1899 + +* **Other:** + * Remove wildcard pattern in exhaustive enums by @lgingerich in https://github.com/apache/iceberg-rust/pull/1925 + +### All Changes + +* chore(deps): Bump tempfile from 3.22.0 to 3.23.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1717 +* chore(deps): Bump rand from 0.8.5 to 0.9.2 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1716 +* chore(deps): Bump crate-ci/typos from 1.36.2 to 1.36.3 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1715 +* refactor: Improve REST catalog's authenticate method by @imor in https://github.com/apache/iceberg-rust/pull/1712 +* chore(deps): Bump serde_with from 3.14.0 to 3.14.1 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1727 +* refactor(writer): Refactor writers for the future partitioning writers by @CTTY in https://github.com/apache/iceberg-rust/pull/1657 +* Set lock on version of Pydantic by @Fokko in https://github.com/apache/iceberg-rust/pull/1737 +* chore(deps): Bump crate-ci/typos from 1.36.3 to 1.37.2 by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1734 +* feat: support more partition transformations for PartitionSpec::partition_to_path by @mnpw in https://github.com/apache/iceberg-rust/pull/1730 +* chore: Update website for 0.7.0 by @CTTY in https://github.com/apache/iceberg-rust/pull/1738 +* feat(sql-catalog): implement register table for sql catalog by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1724 +* fix: ensure CoalescePartitionsExec is enabled for IcebergCommitExec by @sgrebnov in https://github.com/apache/iceberg-rust/pull/1723 +* chore(deps): Bump regex from 1.11.2 to 1.12.1 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1741 +* chore(deps): Bump crate-ci/typos from 1.37.2 to 1.38.1 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1740 +* Improve `IcebergCommitExec` to correctly populate properties/schema by @sgrebnov in https://github.com/apache/iceberg-rust/pull/1721 +* feat(spec): add `table_properties.rs` to spec by @kaushiksrini in https://github.com/apache/iceberg-rust/pull/1733 +* chore(deps): Bump actions/stale from 10.0.0 to 10.1.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1726 +* docs: remove -src suffix from artifact name by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1743 +* feat(reader): Make ArrowReaderBuilder::new public by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1748 +* feat(writer): Add clustered and fanout writer by @CTTY in https://github.com/apache/iceberg-rust/pull/1735 +* feat(catalog): impl builder for SqlCatalog by @335g in https://github.com/apache/iceberg-rust/pull/1666 +* fix: fix read parquert file when schema change by @chenzl25 in https://github.com/apache/iceberg-rust/pull/1750 +* docs: Fix broken orbstack and podman links in CONTRIBUTING.md by @petern48 in https://github.com/apache/iceberg-rust/pull/1757 +* chore(deps): Bump tokio from 1.47.1 to 1.48.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1763 +* chore(deps): Bump backon from 1.5.2 to 1.6.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1762 +* fix: global eq delete matching should apply to only strictly older files, and fix partition scoped matching to consider spec id by @amogh-jahagirdar in https://github.com/apache/iceberg-rust/pull/1758 +* chore(deps): Bump apache/skywalking-eyes from 0.7.0 to 0.8.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1760 +* chore(deps): Bump rust_decimal from 1.38.0 to 1.39.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1761 +* feat(datafusion): implement the project node to add the partition columns by @fvaleye in https://github.com/apache/iceberg-rust/pull/1602 +* fix: snapshot was producing empty summary by @imor in https://github.com/apache/iceberg-rust/pull/1767 +* docs: Add examples for PartitioningWriter by @CTTY in https://github.com/apache/iceberg-rust/pull/1754 +* feat(sqllogictest): Add support for iceberg datafusion sqllogictest integration by @lliangyu-lin in https://github.com/apache/iceberg-rust/pull/1764 +* fix(build): Pin home version after merging #1764 by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1783 +* minor: Update Cargo.lock to add home by @CTTY in https://github.com/apache/iceberg-rust/pull/1785 +* chore(deps): Bump aws-sdk-s3tables from 1.40.0 to 1.41.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1790 +* chore(deps): Bump rand from 0.8.5 to 0.9.2 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1789 +* chore(deps): Bump actions/download-artifact from 5 to 6 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1788 +* chore(deps): Bump actions/upload-artifact from 4 to 5 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1787 +* fix(reader): filter row groups when FileScanTask contains byte ranges by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1779 +* refactor(arrow,datafusion): Reuse PartitionValueCalculator in RecordBatchPartitionSplitter by @CTTY in https://github.com/apache/iceberg-rust/pull/1781 +* feat: Update Datafusion to v49 by @DerGut in https://github.com/apache/iceberg-rust/pull/1704 +* deps: unpin pydantic by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1793 +* feat(reader): Add Date32 support to RecordBatchTransformer create_column by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1792 +* feat(catalog): Implement update_table for S3TablesCatalog by @CTTY in https://github.com/apache/iceberg-rust/pull/1594 +* feat: Update Datafusion to v50 by @DerGut in https://github.com/apache/iceberg-rust/pull/1728 +* ci: Migrate to uv for python by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1796 +* ci: Relax msrv check thanks to rust 2024 by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1795 +* ci: Don't dismiss stale review to make contribution easier by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1799 +* add Makefile to bindings/python by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1800 +* chore: inline format args by @colinmarc in https://github.com/apache/iceberg-rust/pull/1805 +* refactor: Migrate from tera to minijinja by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1798 +* fix(reader): fix position delete bugs with row group skipping by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1806 +* feat(datafusion): implement the partitioning node for DataFusion to define the partitioning by @fvaleye in https://github.com/apache/iceberg-rust/pull/1620 +* feat(reader): Date32 from days since epoch for Literal:try_from_json by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1803 +* chore(deps): Bump aws-sdk-glue from 1.125.0 to 1.126.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1812 +* chore(deps): Bump astral-sh/setup-uv from 6 to 7 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1811 +* chore(deps): Bump crate-ci/typos from 1.38.1 to 1.39.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1810 +* feat(reader): position-based column projection for Parquet files without field IDs (migrated tables) by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1777 +* fix(reader): Equality delete files with partial schemas (containing only equality columns) by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1782 +* infra: use apache/hive:4.0.0 as hive Dockerfile base image by @geruh in https://github.com/apache/iceberg-rust/pull/1823 +* fix: StructType fails to deserialize JSON with type field by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1822 +* feat: Support for V3 Metadata by @c-thiel in https://github.com/apache/iceberg-rust/pull/1682 +* fix(reader): Support both position and equality delete files on the same FileScanTask by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1778 +* feat(datafusion): Add TaskWriter for DataFusion by @CTTY in https://github.com/apache/iceberg-rust/pull/1769 +* fix: support reading compressed metadata by @colinmarc in https://github.com/apache/iceberg-rust/pull/1802 +* Support deserializing bytes by @Fokko in https://github.com/apache/iceberg-rust/pull/1820 +* fix: Bump CI Spark version to 3.5.7 by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1832 +* infra: use python 3.12 for release by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1836 +* pyiceberg-core: create smaller artifacts by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1841 +* infra: add collaborators to .asf.yaml by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1842 +* pyiceberg-core: use pyo3 abi3-py310 by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1843 +* ci: parallelize unit test with matrix by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1833 +* pyiceberg-core: create even smaller artifacts by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1844 +* chore: Split values.rs into separate files by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1840 +* feat(datafusion): Support `INSERT INTO` partitioned tables by @CTTY in https://github.com/apache/iceberg-rust/pull/1827 +* docs: Add Wrappers project to README by @burmecia in https://github.com/apache/iceberg-rust/pull/1852 +* feat(reader): Add PartitionSpec support to FileScanTask and RecordBatchTransformer by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1821 +* feat(reader): null struct default values in create_column by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1847 +* refactor: Remove redundant parameters from SnapshotProducer validation methods by @Li0k in https://github.com/apache/iceberg-rust/pull/1853 +* infra: add verbose=true to pypa/gh-action-pypi-publish by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1846 +* use RecordBatchTransformerBuilder instead of RecordBatchTransformer by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1857 +* chore(deps): Bump bytes from 1.10.1 to 1.11.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1861 +* chore(deps): Bump serde_with from 3.15.1 to 3.16.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1859 +* chore(deps): Bump fs-err from 3.1.3 to 3.2.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1860 +* chore(deps): Bump crate-ci/typos from 1.39.0 to 1.39.2 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1858 +* chore: Remove deprecated `remove_all` in FileIO by @jonathanc-n in https://github.com/apache/iceberg-rust/pull/1863 +* infra: notify on github workflow failure by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1870 +* feat(reader): Add binary support to `get_arrow_datum` for equality deletes with binary type by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1848 +* Raise concurrency errors properly for glue tables by @jembishop in https://github.com/apache/iceberg-rust/pull/1875 +* infra: add instructions for cleaning up testpypi artifacts by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1855 +* chore(deps): Bump actions/checkout from 5 to 6 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1883 +* Update apache-avro to v0.21.0 by @N-Boutaib in https://github.com/apache/iceberg-rust/pull/1881 +* docs: Clarify functionality of `SnapshotProduceOperation` by @jonathanc-n in https://github.com/apache/iceberg-rust/pull/1874 +* feat(datafusion): Split IcebergTableProvider into static and non-static table provider by @CTTY in https://github.com/apache/iceberg-rust/pull/1879 +* infra: use new `del_branch_on_merge` in .asf.yaml by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1888 +* Upgrade opendal to v0.55 by @dentiny in https://github.com/apache/iceberg-rust/pull/1895 +* chore(deps): Bump http from 1.3.1 to 1.4.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1892 +* chore(deps): Bump crate-ci/typos from 1.39.2 to 1.40.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1891 +* feat(datafusion): Add `sort_by_partition` to sort the input partitioned data by @CTTY in https://github.com/apache/iceberg-rust/pull/1618 +* rfc: Modularize `iceberg` Implementations by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1854 +* refactor(writer): Make writer builders non-consuming in build by @leonzchang in https://github.com/apache/iceberg-rust/pull/1889 +* fix: Keep snapshot log on replace by @c-thiel in https://github.com/apache/iceberg-rust/pull/1896 +* chore(deps): Bump actions/stale from 10.1.0 to 10.1.1 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1908 +* feat(datafusion): Add sqllogictest for DataFusion INSERT INTO by @CTTY in https://github.com/apache/iceberg-rust/pull/1887 +* refactor: Drop smol runtime support by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1900 +* chore(deps): Bump minijinja from 2.12.0 to 2.13.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1909 +* chore(deps): Bump uuid from 1.18.1 to 1.19.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1910 +* feat(core): Add support for `_file` column by @gbrgr in https://github.com/apache/iceberg-rust/pull/1824 +* feat: Make `rest` types public, add documentation by @c-thiel in https://github.com/apache/iceberg-rust/pull/1901 +* chore: bump MSRV to 1.88, fix warnings and clippy errors by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1902 +* ci: Make s3tables ready for publish by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1916 +* deps: bump DataFusion to 51, Arrow to 57, pyo to 0.26 by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1899 +* fix: Serialize `split_offsets` as null when empty by @AndreaBozzo in https://github.com/apache/iceberg-rust/pull/1906 +* feat(catalog): Implement update_table for SqlCatalog by @lgingerich in https://github.com/apache/iceberg-rust/pull/1911 +* fix: Respect precision and scale for Decimal128 in value.rs by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1921 +* fix: restore no-op logic in constants_map for NULL identity-partitioned columns by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1922 +* fix: stack overflow when loading large equality deletes by @dojiong in https://github.com/apache/iceberg-rust/pull/1915 +* chore(deps): Bump actions/upload-artifact from 5 to 6 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1931 +* chore(deps): Bump actions/download-artifact from 6 to 7 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1932 +* Remove wildcard pattern in exhaustive enums by @lgingerich in https://github.com/apache/iceberg-rust/pull/1925 +* fix: prioritize delete manifests to prevent scan deadlock by @dojiong in https://github.com/apache/iceberg-rust/pull/1937 +* feat: Include statistics for Reserved Fields by @Fokko in https://github.com/apache/iceberg-rust/pull/1849 + ## [v0.7.0] - 2025-09-23 ### Breaking Changes diff --git a/Cargo.lock b/Cargo.lock index 2d464709fd..97ee25d658 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3344,7 +3344,7 @@ dependencies = [ [[package]] name = "iceberg" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "apache-avro 0.21.0", @@ -3405,7 +3405,7 @@ dependencies = [ [[package]] name = "iceberg-cache-moka" -version = "0.7.0" +version = "0.8.0" dependencies = [ "iceberg", "moka", @@ -3413,7 +3413,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-glue" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -3430,7 +3430,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-hms" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -3454,7 +3454,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-loader" -version = "0.7.0" +version = "0.8.0" dependencies = [ "async-trait", "iceberg", @@ -3470,7 +3470,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-rest" -version = "0.7.0" +version = "0.8.0" dependencies = [ "async-trait", "chrono", @@ -3493,7 +3493,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-s3tables" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -3507,7 +3507,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-sql" -version = "0.7.0" +version = "0.8.0" dependencies = [ "async-trait", "iceberg", @@ -3521,7 +3521,7 @@ dependencies = [ [[package]] name = "iceberg-datafusion" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -3537,7 +3537,7 @@ dependencies = [ [[package]] name = "iceberg-examples" -version = "0.7.0" +version = "0.8.0" dependencies = [ "futures", "iceberg", @@ -3547,7 +3547,7 @@ dependencies = [ [[package]] name = "iceberg-integration-tests" -version = "0.7.0" +version = "0.8.0" dependencies = [ "arrow-array", "arrow-schema", @@ -3566,7 +3566,7 @@ dependencies = [ [[package]] name = "iceberg-playground" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "clap", @@ -3588,7 +3588,7 @@ dependencies = [ [[package]] name = "iceberg-sqllogictest" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -3610,7 +3610,7 @@ dependencies = [ [[package]] name = "iceberg_test_utils" -version = "0.7.0" +version = "0.8.0" dependencies = [ "tracing", "tracing-subscriber", diff --git a/Cargo.toml b/Cargo.toml index ded3aedecb..d099398dbd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ resolver = "2" [workspace.package] edition = "2024" homepage = "https://rust.iceberg.apache.org/" -version = "0.7.0" +version = "0.8.0" license = "Apache-2.0" repository = "https://github.com/apache/iceberg-rust" @@ -78,13 +78,13 @@ futures = "0.3" hive_metastore = "0.2.0" home = "=0.5.11" http = "1.2" -iceberg = { version = "0.7.0", path = "./crates/iceberg" } -iceberg-catalog-glue = { version = "0.7.0", path = "./crates/catalog/glue" } -iceberg-catalog-hms = { version = "0.7.0", path = "./crates/catalog/hms" } -iceberg-catalog-rest = { version = "0.7.0", path = "./crates/catalog/rest" } -iceberg-catalog-s3tables = { version = "0.7.0", path = "./crates/catalog/s3tables" } -iceberg-catalog-sql = { version = "0.7.0", path = "./crates/catalog/sql" } -iceberg-datafusion = { version = "0.7.0", path = "./crates/integrations/datafusion" } +iceberg = { version = "0.8.0", path = "./crates/iceberg" } +iceberg-catalog-glue = { version = "0.8.0", path = "./crates/catalog/glue" } +iceberg-catalog-hms = { version = "0.8.0", path = "./crates/catalog/hms" } +iceberg-catalog-rest = { version = "0.8.0", path = "./crates/catalog/rest" } +iceberg-catalog-s3tables = { version = "0.8.0", path = "./crates/catalog/s3tables" } +iceberg-catalog-sql = { version = "0.8.0", path = "./crates/catalog/sql" } +iceberg-datafusion = { version = "0.8.0", path = "./crates/integrations/datafusion" } indicatif = "0.18" itertools = "0.13" libtest-mimic = "0.8.1" diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index a7244cc8dd..4647f9d886 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -128,9 +128,9 @@ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "apache-avro" -version = "0.20.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a033b4ced7c585199fb78ef50fca7fe2f444369ec48080c5fd072efa1a03cc7" +checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" dependencies = [ "bigdecimal", "bon", @@ -1034,8 +1034,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" dependencies = [ "arrow", "arrow-schema", @@ -1089,8 +1090,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" dependencies = [ "arrow", "async-trait", @@ -1113,8 +1115,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" dependencies = [ "arrow", "async-trait", @@ -1136,8 +1139,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" dependencies = [ "ahash 0.8.12", "arrow", @@ -1159,8 +1163,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b92065bbc6532c6651e2f7dd30b55cba0c7a14f860c7e1d15f165c41a1868d95" dependencies = [ "futures", "log", @@ -1169,8 +1174,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" dependencies = [ "arrow", "async-compression", @@ -1203,8 +1209,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" dependencies = [ "arrow", "arrow-ipc", @@ -1226,8 +1233,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" dependencies = [ "arrow", "async-trait", @@ -1248,8 +1256,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" dependencies = [ "arrow", "async-trait", @@ -1269,8 +1278,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" dependencies = [ "arrow", "async-trait", @@ -1298,13 +1308,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" [[package]] name = "datafusion-execution" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" dependencies = [ "arrow", "async-trait", @@ -1322,8 +1334,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" dependencies = [ "arrow", "async-trait", @@ -1344,8 +1357,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" dependencies = [ "arrow", "datafusion-common", @@ -1356,8 +1370,9 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec510e7787641279b0336e8b79e4b7bd1385d5976875ff9b97f4269ce5231a67" dependencies = [ "abi_stable", "arrow", @@ -1378,8 +1393,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" dependencies = [ "arrow", "arrow-buffer", @@ -1407,8 +1423,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" dependencies = [ "ahash 0.8.12", "arrow", @@ -1427,8 +1444,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" dependencies = [ "ahash 0.8.12", "arrow", @@ -1439,8 +1457,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" dependencies = [ "arrow", "arrow-ord", @@ -1461,8 +1480,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" dependencies = [ "arrow", "async-trait", @@ -1476,8 +1496,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" dependencies = [ "arrow", "datafusion-common", @@ -1493,8 +1514,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53ae9bcc39800820d53a22d758b3b8726ff84a5a3e24cecef04ef4e5fdf1c7cc" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1502,8 +1524,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ "datafusion-doc", "quote", @@ -1512,8 +1535,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" dependencies = [ "arrow", "chrono", @@ -1531,8 +1555,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" dependencies = [ "ahash 0.8.12", "arrow", @@ -1552,8 +1577,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" dependencies = [ "arrow", "datafusion-common", @@ -1566,8 +1592,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" dependencies = [ "ahash 0.8.12", "arrow", @@ -1579,8 +1606,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" dependencies = [ "arrow", "datafusion-common", @@ -1597,8 +1625,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" dependencies = [ "ahash 0.8.12", "arrow", @@ -1627,8 +1656,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d368093a98a17d1449b1083ac22ed16b7128e4c67789991869480d8c4a40ecb9" dependencies = [ "arrow", "chrono", @@ -1653,8 +1683,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b6aef3d5e5c1d2bc3114c4876730cb76a9bdc5a8df31ef1b6db48f0c1671895" dependencies = [ "arrow", "datafusion-common", @@ -1663,8 +1694,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" dependencies = [ "arrow", "datafusion-common", @@ -1679,8 +1711,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f96eebd17555386f459037c65ab73aae8df09f464524c709d6a3134ad4f4776" dependencies = [ "async-trait", "datafusion-common", @@ -1692,8 +1725,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "50.3.0" -source = "git+https://github.com/apache/datafusion.git?rev=f32984b2dbf9e5a193c20643ce624167295fbd61#f32984b2dbf9e5a193c20643ce624167295fbd61" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" dependencies = [ "arrow", "bigdecimal", @@ -2122,12 +2156,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" - [[package]] name = "hex" version = "0.4.3" @@ -2286,7 +2314,7 @@ dependencies = [ [[package]] name = "iceberg" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "apache-avro", @@ -2331,7 +2359,6 @@ dependencies = [ "serde_repr", "serde_with", "strum 0.27.2", - "thrift", "tokio", "typed-builder", "url", @@ -2341,7 +2368,7 @@ dependencies = [ [[package]] name = "iceberg-datafusion" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -2539,6 +2566,47 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jiff" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" +dependencies = [ + "jiff-static", + "jiff-tzdb-platform", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", + "windows-sys 0.61.2", +] + +[[package]] +name = "jiff-static" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + +[[package]] +name = "jiff-tzdb" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" +dependencies = [ + "jiff-tzdb", +] + [[package]] name = "jobserver" version = "0.1.34" @@ -2824,16 +2892,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "object" version = "0.32.2" @@ -2875,20 +2933,20 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "opendal" -version = "0.54.1" +version = "0.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42afda58fa2cf50914402d132cc1caacff116a85d10c72ab2082bb7c50021754" +checksum = "d075ab8a203a6ab4bc1bce0a4b9fe486a72bf8b939037f4b78d95386384bc80a" dependencies = [ "anyhow", "backon", "base64", "bytes", - "chrono", "crc32c", "futures", "getrandom 0.2.16", "http", "http-body", + "jiff", "log", "md-5", "percent-encoding", @@ -2898,6 +2956,7 @@ dependencies = [ "serde", "serde_json", "tokio", + "url", "uuid", ] @@ -3061,6 +3120,15 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + [[package]] name = "potential_utf" version = "0.1.4" @@ -3168,7 +3236,7 @@ dependencies = [ [[package]] name = "pyiceberg_core_rust" -version = "0.7.0" +version = "0.8.0" dependencies = [ "arrow", "datafusion-ffi", @@ -4185,15 +4253,6 @@ dependencies = [ "syn 2.0.108", ] -[[package]] -name = "threadpool" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" -dependencies = [ - "num_cpus", -] - [[package]] name = "thrift" version = "0.17.0" @@ -4202,9 +4261,7 @@ checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" dependencies = [ "byteorder", "integer-encoding", - "log", "ordered-float 2.10.1", - "threadpool", ] [[package]] diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index d8b8444195..9ec58cf807 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -20,7 +20,7 @@ edition = "2024" homepage = "https://rust.iceberg.apache.org" name = "pyiceberg_core_rust" rust-version = "1.88" -version = "0.7.0" +version = "0.8.0" # This crate is used to build python bindings, we don't want to publish it publish = false diff --git a/bindings/python/DEPENDENCIES.rust.tsv b/bindings/python/DEPENDENCIES.rust.tsv index 7565a13e1d..5fe0da5b2a 100644 --- a/bindings/python/DEPENDENCIES.rust.tsv +++ b/bindings/python/DEPENDENCIES.rust.tsv @@ -1,445 +1,439 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib -abi_stable@0.11.3 X X -abi_stable_derive@0.11.3 X X -abi_stable_shared@0.11.0 X X -addr2line@0.24.2 X X -adler2@2.0.1 X X X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X -array-init@2.1.0 X X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-pyarrow@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -as-any@0.3.2 X X -as_derive_utils@0.11.0 X X -async-compression@0.4.19 X X -async-ffi@0.5.0 X -async-lock@3.4.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.22.1 X X -bigdecimal@0.4.8 X X -bimap@0.6.3 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -comfy-table@7.2.0 X -concurrent-queue@2.5.0 X X -const-oid@0.9.6 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -const_panic@0.2.14 X -constant_time_eq@0.3.1 X X X -core-foundation-sys@0.8.7 X X -core_extensions@1.5.4 X X -core_extensions_proc_macros@1.5.4 X X -cpufeatures@0.2.17 X X -crc32c@0.6.8 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -darling@0.20.11 X -darling@0.21.3 X -darling_core@0.20.11 X -darling_core@0.21.3 X -darling_macro@0.20.11 X -darling_macro@0.21.3 X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-ffi@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-proto@48.0.1 X -datafusion-proto-common@48.0.1 X -datafusion-session@48.0.1 X -datafusion-sql@48.0.1 X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dissimilar@1.0.10 X -either@1.15.0 X X -equivalent@1.0.2 X X -errno@0.3.13 X X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generational-arena@0.2.9 X -generator@0.8.7 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -gloo-timers@0.3.0 X X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hermit-abi@0.5.2 X X -hex@0.4.3 X X -hmac@0.12.1 X X -home@0.5.11 X X -http@1.3.1 X X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -humantime@2.2.0 X X -hyper@1.7.0 X -hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-datafusion@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -indoc@2.0.6 X X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -ipnet@2.11.0 X X -iri-string@0.7.8 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libloading@0.7.4 X -libm@0.2.15 X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -matchers@0.2.0 X -md-5@0.10.6 X X -memchr@2.7.5 X X -memoffset@0.9.1 X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X -murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -opendal@0.54.0 X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -potential_utf@0.1.3 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -prost@0.13.5 X -prost-derive@0.13.5 X -psm@0.1.26 X X -pyiceberg_core_rust@0.7.0 X -pyo3@0.24.2 X X -pyo3-build-config@0.24.2 X X -pyo3-ffi@0.24.2 X X -pyo3-macros@0.24.2 X X -pyo3-macros-backend@0.24.2 X X -quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -repr_offset@0.2.2 X -reqsign@0.16.5 X -reqwest@0.12.23 X X -ring@0.17.14 X X -roaring@0.11.2 X X -rust_decimal@1.37.2 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X -rustversion@1.0.22 X X -ryu@1.0.20 X X -same-file@1.0.6 X X -scoped-tls@1.0.1 X X -scopeguard@1.2.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X -serde_repr@0.1.20 X X -serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X -sha1@0.10.6 X X -sha2@0.10.9 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -simdutf8@0.1.5 X X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.0 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -strsim@0.11.1 X -strum@0.27.2 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@1.0.109 X X -syn@2.0.106 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -target-lexicon@0.13.2 X -tempfile@3.21.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thread_local@1.1.9 X X -threadpool@1.8.1 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X -tower@0.5.2 X -tower-http@0.6.6 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -try-lock@0.2.5 X -tstr@0.2.4 X -tstr_proc_macros@0.2.2 X -twox-hash@2.1.2 X -typed-arena@2.0.2 X -typed-builder@0.20.1 X X -typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -typewit@1.14.1 X -unicode-ident@1.0.18 X X X -unicode-segmentation@1.12.0 X X -unicode-width@0.2.1 X X -unindent@0.2.4 X X -untrusted@0.9.0 X -url@2.5.7 X X -utf8_iter@1.0.4 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -walkdir@2.5.0 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -webpki-roots@1.0.2 X -winapi@0.3.9 X X -winapi-i686-pc-windows-gnu@0.4.0 X X -winapi-util@0.1.11 X X -winapi-x86_64-pc-windows-gnu@0.4.0 X X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.60.2 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-targets@0.53.3 X X -windows-threading@0.1.0 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_gnullvm@0.53.0 X X -windows_aarch64_msvc@0.52.6 X X -windows_aarch64_msvc@0.53.0 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnu@0.53.0 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_gnullvm@0.53.0 X X -windows_i686_msvc@0.52.6 X X -windows_i686_msvc@0.53.0 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnu@0.53.0 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_gnullvm@0.53.0 X X -windows_x86_64_msvc@0.52.6 X X -windows_x86_64_msvc@0.53.0 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +abi_stable@0.11.3 X X +abi_stable_derive@0.11.3 X X +abi_stable_shared@0.11.0 X X +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.3 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anyhow@1.0.100 X X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.0.0 X +arrow-arith@57.0.0 X +arrow-array@57.0.0 X +arrow-buffer@57.0.0 X +arrow-cast@57.0.0 X +arrow-csv@57.0.0 X +arrow-data@57.0.0 X +arrow-ipc@57.0.0 X +arrow-json@57.0.0 X +arrow-ord@57.0.0 X +arrow-pyarrow@57.0.0 X +arrow-row@57.0.0 X +arrow-schema@57.0.0 X +arrow-select@57.0.0 X +arrow-string@57.0.0 X +as-any@0.3.2 X X +as_derive_utils@0.11.0 X X +async-compression@0.4.19 X X +async-ffi@0.5.0 X +async-lock@3.4.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.10.1 X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.43 X X +cfg-if@1.0.4 X X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +comfy-table@7.1.2 X +concurrent-queue@2.5.0 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +const_panic@0.2.15 X +constant_time_eq@0.3.1 X X X +core-foundation-sys@0.8.7 X X +core_extensions@1.5.4 X X +core_extensions_proc_macros@1.5.4 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.6 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-ffi@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-proto@51.0.0 X +datafusion-proto-common@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-sql@51.0.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +either@1.15.0 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.4 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +form_urlencoded@1.2.2 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generational-arena@0.2.9 X +generic-array@0.14.9 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.0 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@1.3.1 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +humantime@2.3.0 X X +hyper@1.7.0 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.17 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-datafusion@0.8.0 X +icu_collections@2.1.0 X +icu_locale_core@2.1.0 X +icu_normalizer@2.1.0 X +icu_normalizer_data@2.1.0 X +icu_properties@2.1.0 X +icu_properties_data@2.1.0 X +icu_provider@2.1.0 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.0 X X +indoc@2.0.7 X X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.8 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.5 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.82 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.177 X X +libloading@0.7.4 X +libm@0.2.15 X +libz-rs-sys@0.5.2 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.28 X X +lz4_flex@0.11.5 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +memoffset@0.9.1 X +miniz_oxide@0.8.9 X X X +mio@1.1.0 X +moka@0.12.11 X X +murmur3@0.5.2 X X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +opendal@0.55.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.0.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +prost@0.14.1 X +prost-derive@0.14.1 X +psm@0.1.28 X X +pyiceberg_core_rust@0.8.0 X +pyo3@0.26.0 X X +pyo3-build-config@0.26.0 X X +pyo3-ffi@0.26.0 X X +pyo3-macros@0.26.0 X X +pyo3-macros-backend@0.26.0 X X +quad-rand@0.2.3 X +quick-xml@0.38.3 X +quote@1.0.41 X X +r-efi@5.3.0 X X X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +relative-path@1.9.3 X X +repr_offset@0.2.2 X +reqsign@0.16.5 X +reqwest@0.12.24 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.34 X X X +rustls-pki-types@1.13.0 X X +rustls-webpki@0.103.7 X +rustversion@1.0.22 X X +ryu@1.0.20 X X +same-file@1.0.6 X X +scopeguard@1.2.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_urlencoded@0.7.1 X X +serde_with@3.15.1 X X +serde_with_macros@3.15.1 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +shlex@1.3.0 X X +simd-adler32@0.3.7 X +simdutf8@0.1.5 X X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +strsim@0.11.1 X +strum@0.26.3 X +strum@0.27.2 X +strum_macros@0.26.4 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@1.0.109 X X +syn@2.0.108 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +target-lexicon@0.13.3 X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.16 X +toml_datetime@0.7.3 X X +toml_edit@0.23.7 X X +toml_parser@1.0.4 X X +tower@0.5.2 X +tower-http@0.6.6 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.41 X +tracing-attributes@0.1.30 X +tracing-core@0.1.34 X +try-lock@0.2.5 X +tstr@0.2.4 X +tstr_proc_macros@0.2.2 X +twox-hash@2.1.2 X +typed-arena@2.0.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +typewit@1.14.2 X +unicode-ident@1.0.20 X X X +unicode-segmentation@1.12.0 X X +unicode-width@0.2.2 X X +unindent@0.2.4 X X +untrusted@0.9.0 X +url@2.5.7 X X +utf8_iter@1.0.4 X X +uuid@1.18.1 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.105 X X +wasm-bindgen-futures@0.4.55 X X +wasm-bindgen-macro@0.2.105 X X +wasm-bindgen-macro-support@0.2.105 X X +wasm-bindgen-shared@0.2.105 X X +wasm-streams@0.4.2 X X +web-sys@0.3.82 X X +web-time@1.1.0 X X +webpki-roots@1.0.3 X +winapi@0.3.9 X X +winapi-i686-pc-windows-gnu@0.4.0 X X +winapi-util@0.1.11 X X +winapi-x86_64-pc-windows-gnu@0.4.0 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.13 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.27 X X X +zerocopy-derive@0.8.27 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.2 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/glue/DEPENDENCIES.rust.tsv b/crates/catalog/glue/DEPENDENCIES.rust.tsv index 2d9f686262..e34ff1afc1 100644 --- a/crates/catalog/glue/DEPENDENCIES.rust.tsv +++ b/crates/catalog/glue/DEPENDENCIES.rust.tsv @@ -1,77 +1,73 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -aws-config@1.8.6 X -aws-credential-types@1.2.6 X -aws-runtime@1.5.10 X -aws-sdk-glue@1.119.0 X -aws-sdk-sso@1.83.0 X -aws-sdk-ssooidc@1.84.0 X -aws-sdk-sts@1.85.0 X -aws-sigv4@1.3.4 X -aws-smithy-async@1.2.5 X -aws-smithy-http@0.62.3 X -aws-smithy-http-client@1.1.1 X -aws-smithy-json@0.61.5 X -aws-smithy-observability@0.1.3 X -aws-smithy-query@0.60.7 X -aws-smithy-runtime@1.9.1 X -aws-smithy-runtime-api@1.9.0 X -aws-smithy-types@1.3.2 X -aws-smithy-xml@0.60.10 X -aws-types@1.3.8 X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.21.7 X X +aws-config@1.8.11 X +aws-credential-types@1.2.10 X +aws-runtime@1.5.16 X +aws-sdk-glue@1.132.0 X +aws-sdk-sso@1.90.0 X +aws-sdk-ssooidc@1.92.0 X +aws-sdk-sts@1.94.0 X +aws-sigv4@1.3.6 X +aws-smithy-async@1.2.7 X +aws-smithy-http@0.62.6 X +aws-smithy-http-client@1.1.5 X +aws-smithy-json@0.61.8 X +aws-smithy-observability@0.1.5 X +aws-smithy-query@0.60.9 X +aws-smithy-runtime@1.9.5 X +aws-smithy-runtime-api@1.9.3 X +aws-smithy-types@1.3.5 X +aws-smithy-xml@0.60.13 X +aws-types@1.3.10 X +backon@1.6.0 X base64@0.22.1 X X base64-simd@0.8.0 X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X +bytes@1.11.0 X bytes-utils@0.1.4 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X const-random@0.1.18 X X const-random-macro@0.1.16 X X core-foundation@0.10.1 X X -core-foundation@0.9.4 X X core-foundation-sys@0.8.7 X X cpufeatures@0.2.17 X X crc32c@0.6.8 X X @@ -80,14 +76,14 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X darling_core@0.21.3 X darling_macro@0.20.11 X darling_macro@0.21.3 X -deranged@0.5.3 X X +deranged@0.5.5 X X derive_builder@0.20.2 X X derive_builder_core@0.20.2 X X derive_builder_macro@0.20.2 X X @@ -100,9 +96,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -114,114 +110,107 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.3.27 X h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X http@0.2.12 X X -http@1.3.1 X X +http@1.4.0 X X http-body@0.4.6 X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X hyper@0.14.32 X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.24.2 X X X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-glue@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-glue@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-conv@0.1.0 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X openssl-probe@0.1.6 X X ordered-float@2.10.1 X ordered-float@4.6.0 X outref@0.5.2 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X powerfmt@0.2.0 X X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -229,152 +218,150 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X rustls@0.21.12 X X X -rustls@0.23.31 X X X -rustls-native-certs@0.6.3 X X X -rustls-native-certs@0.8.1 X X X -rustls-pemfile@1.0.4 X X X -rustls-pki-types@1.12.0 X X +rustls@0.23.35 X X X +rustls-native-certs@0.8.2 X X X +rustls-pki-types@1.13.1 X X rustls-webpki@0.101.7 X -rustls-webpki@0.103.4 X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -schannel@0.1.27 X -scoped-tls@1.0.1 X X +schannel@0.1.28 X scopeguard@1.2.0 X X sct@0.7.1 X X X -security-framework@2.11.1 X X -security-framework@3.4.0 X X +security-framework@3.5.1 X X security-framework-sys@2.15.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X socket2@0.5.10 X X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X -time@0.3.43 X X +time@0.3.44 X X time-core@0.1.6 X X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X tokio-rustls@0.24.1 X X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X urlencoding@2.1.3 X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X vsimd@0.8.0 X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X xmlparser@0.13.6 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/hms/DEPENDENCIES.rust.tsv b/crates/catalog/hms/DEPENDENCIES.rust.tsv index cef38cabcd..5025f7b184 100644 --- a/crates/catalog/hms/DEPENDENCIES.rust.tsv +++ b/crates/catalog/hms/DEPENDENCIES.rust.tsv @@ -1,25 +1,24 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-broadcast@0.7.2 X X async-lock@3.4.1 X X @@ -28,23 +27,22 @@ async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X cfg_aliases@0.2.1 X chrono@0.4.42 X X concurrent-queue@2.5.0 X X @@ -59,7 +57,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -80,9 +78,9 @@ event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X faststr@0.2.32 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -94,103 +92,95 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X -half@2.6.0 X X +half@2.7.1 X X hashbrown@0.14.5 X X -hashbrown@0.15.5 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hive_metastore@0.2.0 X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-hms@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-hms@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -integer-encoding@4.0.2 X -io-uring@0.7.10 X X +integer-encoding@4.1.0 X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X +libz-rs-sys@0.5.3 X linked-hash-map@0.5.6 X X linkedbytes@0.1.16 X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X memoffset@0.9.1 X metainfo@0.7.14 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X motore@0.4.1 X X motore-macros@0.4.3 X X mur3@0.1.0 X murmur3@0.5.2 X X nix@0.29.0 X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -num_enum@0.7.4 X X X -num_enum_derive@0.7.4 X X X -object@0.36.7 X X +num_enum@0.7.5 X X X +num_enum_derive@0.7.5 X X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pilota@0.11.10 X X @@ -200,14 +190,15 @@ pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro-crate@3.3.0 X X -proc-macro2@1.0.101 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -215,148 +206,152 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -ref-cast@1.0.24 X X -ref-cast-impl@1.0.24 X X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +ref-cast@1.0.25 X X +ref-cast-impl@1.0.25 X X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc-hash@2.1.1 X X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X socket2@0.5.10 X X -socket2@0.6.0 X X +socket2@0.6.1 X X sonic-number@0.1.0 X sonic-rs@0.3.17 X -sonic-simd@0.1.1 X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +sonic-simd@0.1.2 X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X thiserror@1.0.69 X X -thiserror@2.0.16 X X +thiserror@2.0.17 X X thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X tokio-stream@0.1.17 X -tokio-util@0.7.16 X -toml_datetime@0.6.11 X X -toml_edit@0.22.27 X X +tokio-util@0.7.17 X +toml_datetime@0.7.3 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X volo@0.10.7 X X volo-thrift@0.10.8 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -winnow@0.7.13 X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/loader/DEPENDENCIES.rust.tsv b/crates/catalog/loader/DEPENDENCIES.rust.tsv index d809a30ca4..02b06c4479 100644 --- a/crates/catalog/loader/DEPENDENCIES.rust.tsv +++ b/crates/catalog/loader/DEPENDENCIES.rust.tsv @@ -1,25 +1,25 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-broadcast@0.7.2 X X async-lock@3.4.1 X X @@ -28,46 +28,44 @@ async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -aws-config@1.8.6 X -aws-credential-types@1.2.6 X -aws-runtime@1.5.10 X -aws-sdk-glue@1.119.0 X -aws-sdk-s3tables@1.37.0 X -aws-sdk-sso@1.83.0 X -aws-sdk-ssooidc@1.84.0 X -aws-sdk-sts@1.85.0 X -aws-sigv4@1.3.4 X -aws-smithy-async@1.2.5 X -aws-smithy-http@0.62.3 X -aws-smithy-http-client@1.1.1 X -aws-smithy-json@0.61.5 X -aws-smithy-observability@0.1.3 X -aws-smithy-query@0.60.7 X -aws-smithy-runtime@1.9.1 X -aws-smithy-runtime-api@1.9.0 X -aws-smithy-types@1.3.2 X -aws-smithy-xml@0.60.10 X -aws-types@1.3.8 X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.21.7 X X +aws-config@1.8.11 X +aws-credential-types@1.2.10 X +aws-runtime@1.5.16 X +aws-sdk-glue@1.132.0 X +aws-sdk-s3tables@1.46.0 X +aws-sdk-sso@1.90.0 X +aws-sdk-ssooidc@1.92.0 X +aws-sdk-sts@1.94.0 X +aws-sigv4@1.3.6 X +aws-smithy-async@1.2.7 X +aws-smithy-http@0.62.6 X +aws-smithy-http-client@1.1.5 X +aws-smithy-json@0.61.8 X +aws-smithy-observability@0.1.5 X +aws-smithy-query@0.60.9 X +aws-smithy-runtime@1.9.5 X +aws-smithy-runtime-api@1.9.3 X +aws-smithy-types@1.3.5 X +aws-smithy-xml@0.60.13 X +aws-types@1.3.10 X +backon@1.6.0 X base64@0.22.1 X X base64-simd@0.8.0 X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X +bytes@1.11.0 X bytes-utils@0.1.4 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X cfg_aliases@0.2.1 X chrono@0.4.42 X X concurrent-queue@2.5.0 X X @@ -75,16 +73,18 @@ const-oid@0.9.6 X X const-random@0.1.18 X X const-random-macro@0.1.16 X X core-foundation@0.10.1 X X -core-foundation@0.9.4 X X core-foundation-sys@0.8.7 X X cpufeatures@0.2.17 X X +crc@3.4.0 X X +crc-catalog@2.4.0 X X crc32c@0.6.8 X X crc32fast@1.5.0 X X crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X +crossbeam-queue@0.3.12 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -92,7 +92,7 @@ darling_core@0.21.3 X darling_macro@0.20.11 X darling_macro@0.21.3 X dashmap@6.1.0 X -deranged@0.5.3 X X +deranged@0.5.5 X X derive_builder@0.20.2 X X derive_builder_core@0.20.2 X X derive_builder_macro@0.20.2 X X @@ -106,131 +106,130 @@ event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X faststr@0.2.32 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +flume@0.11.1 X X fnv@1.0.7 X X +foldhash@0.1.5 X form_urlencoded@1.2.2 X X futures@0.3.31 X X futures-channel@0.3.31 X X futures-core@0.3.31 X X futures-executor@0.3.31 X X +futures-intrusive@0.5.0 X X futures-io@0.3.31 X X futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.3.27 X h2@0.4.12 X -half@2.6.0 X X +half@2.7.1 X X hashbrown@0.14.5 X X hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +hashlink@0.10.0 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hive_metastore@0.2.0 X hmac@0.12.1 X X home@0.5.11 X X http@0.2.12 X X -http@1.3.1 X X +http@1.4.0 X X http-body@0.4.6 X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X hyper@0.14.32 X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.24.2 X X X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-glue@0.7.0 X -iceberg-catalog-hms@0.7.0 X -iceberg-catalog-loader@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg-catalog-s3tables@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-glue@0.8.0 X +iceberg-catalog-hms@0.8.0 X +iceberg-catalog-loader@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg-catalog-s3tables@0.8.0 X +iceberg-catalog-sql@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -integer-encoding@4.0.2 X -io-uring@0.7.10 X X +integer-encoding@4.1.0 X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X +libsqlite3-sys@0.30.1 X +libz-rs-sys@0.5.3 X linked-hash-map@0.5.6 X X linkedbytes@0.1.16 X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X memoffset@0.9.1 X metainfo@0.7.14 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X motore@0.4.1 X X motore-macros@0.4.3 X X mur3@0.1.0 X murmur3@0.5.2 X X nix@0.29.0 X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-conv@0.1.0 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -num_enum@0.7.4 X X X -num_enum_derive@0.7.4 X X X -object@0.36.7 X X +num_enum@0.7.5 X X X +num_enum_derive@0.7.5 X X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X openssl-probe@0.1.6 X X ordered-float@2.10.1 X ordered-float@4.6.0 X outref@0.5.2 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pilota@0.11.10 X X @@ -240,15 +239,16 @@ pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X powerfmt@0.2.0 X X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro-crate@3.3.0 X X -proc-macro2@1.0.101 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -256,164 +256,171 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -ref-cast@1.0.24 X X -ref-cast-impl@1.0.24 X X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +ref-cast@1.0.25 X X +ref-cast-impl@1.0.25 X X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc-hash@2.1.1 X X rustc_version@0.4.1 X X rustls@0.21.12 X X X -rustls@0.23.31 X X X -rustls-native-certs@0.6.3 X X X -rustls-native-certs@0.8.1 X X X -rustls-pemfile@1.0.4 X X X -rustls-pki-types@1.12.0 X X +rustls@0.23.35 X X X +rustls-native-certs@0.8.2 X X X +rustls-pki-types@1.13.1 X X rustls-webpki@0.101.7 X -rustls-webpki@0.103.4 X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -schannel@0.1.27 X -scoped-tls@1.0.1 X X +schannel@0.1.28 X scopeguard@1.2.0 X X sct@0.7.1 X X X -security-framework@2.11.1 X X -security-framework@3.4.0 X X +security-framework@3.5.1 X X security-framework-sys@2.15.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X socket2@0.5.10 X X -socket2@0.6.0 X X +socket2@0.6.1 X X sonic-number@0.1.0 X sonic-rs@0.3.17 X -sonic-simd@0.1.1 X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +sonic-simd@0.1.2 X +spin@0.9.8 X +sqlx@0.8.6 X X +sqlx-core@0.8.6 X X +sqlx-sqlite@0.8.6 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X thiserror@1.0.69 X X -thiserror@2.0.16 X X +thiserror@2.0.17 X X thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X -time@0.3.43 X X +time@0.3.44 X X time-core@0.1.6 X X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X tokio-rustls@0.24.1 X X -tokio-rustls@0.26.2 X X +tokio-rustls@0.26.4 X X tokio-stream@0.1.17 X -tokio-util@0.7.16 X -toml_datetime@0.6.11 X X -toml_edit@0.22.27 X X +tokio-util@0.7.17 X +toml_datetime@0.7.3 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X urlencoding@2.1.3 X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X +vcpkg@0.2.15 X X version_check@0.9.5 X X volo@0.10.7 X X volo-thrift@0.10.8 X X vsimd@0.8.0 X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@0.26.11 X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -winnow@0.7.13 X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X xmlparser@0.13.6 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/rest/DEPENDENCIES.rust.tsv b/crates/catalog/rest/DEPENDENCIES.rust.tsv index cf238f4b4c..c78434fa51 100644 --- a/crates/catalog/rest/DEPENDENCIES.rust.tsv +++ b/crates/catalog/rest/DEPENDENCIES.rust.tsv @@ -1,48 +1,46 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -56,7 +54,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -75,9 +73,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -89,105 +87,98 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -195,134 +186,135 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/s3tables/DEPENDENCIES.rust.tsv b/crates/catalog/s3tables/DEPENDENCIES.rust.tsv index 13d8eebe7a..7dd182e435 100644 --- a/crates/catalog/s3tables/DEPENDENCIES.rust.tsv +++ b/crates/catalog/s3tables/DEPENDENCIES.rust.tsv @@ -1,77 +1,73 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -aws-config@1.8.6 X -aws-credential-types@1.2.6 X -aws-runtime@1.5.10 X -aws-sdk-s3tables@1.37.0 X -aws-sdk-sso@1.83.0 X -aws-sdk-ssooidc@1.84.0 X -aws-sdk-sts@1.85.0 X -aws-sigv4@1.3.4 X -aws-smithy-async@1.2.5 X -aws-smithy-http@0.62.3 X -aws-smithy-http-client@1.1.1 X -aws-smithy-json@0.61.5 X -aws-smithy-observability@0.1.3 X -aws-smithy-query@0.60.7 X -aws-smithy-runtime@1.9.1 X -aws-smithy-runtime-api@1.9.0 X -aws-smithy-types@1.3.2 X -aws-smithy-xml@0.60.10 X -aws-types@1.3.8 X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.21.7 X X +aws-config@1.8.11 X +aws-credential-types@1.2.10 X +aws-runtime@1.5.16 X +aws-sdk-s3tables@1.46.0 X +aws-sdk-sso@1.90.0 X +aws-sdk-ssooidc@1.92.0 X +aws-sdk-sts@1.94.0 X +aws-sigv4@1.3.6 X +aws-smithy-async@1.2.7 X +aws-smithy-http@0.62.6 X +aws-smithy-http-client@1.1.5 X +aws-smithy-json@0.61.8 X +aws-smithy-observability@0.1.5 X +aws-smithy-query@0.60.9 X +aws-smithy-runtime@1.9.5 X +aws-smithy-runtime-api@1.9.3 X +aws-smithy-types@1.3.5 X +aws-smithy-xml@0.60.13 X +aws-types@1.3.10 X +backon@1.6.0 X base64@0.22.1 X X base64-simd@0.8.0 X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X +bytes@1.11.0 X bytes-utils@0.1.4 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X const-random@0.1.18 X X const-random-macro@0.1.16 X X core-foundation@0.10.1 X X -core-foundation@0.9.4 X X core-foundation-sys@0.8.7 X X cpufeatures@0.2.17 X X crc32c@0.6.8 X X @@ -80,14 +76,14 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X darling_core@0.21.3 X darling_macro@0.20.11 X darling_macro@0.21.3 X -deranged@0.5.3 X X +deranged@0.5.5 X X derive_builder@0.20.2 X X derive_builder_core@0.20.2 X X derive_builder_macro@0.20.2 X X @@ -100,9 +96,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -114,114 +110,107 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.3.27 X h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X http@0.2.12 X X -http@1.3.1 X X +http@1.4.0 X X http-body@0.4.6 X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X hyper@0.14.32 X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.24.2 X X X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-s3tables@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-s3tables@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-conv@0.1.0 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X openssl-probe@0.1.6 X X ordered-float@2.10.1 X ordered-float@4.6.0 X outref@0.5.2 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X powerfmt@0.2.0 X X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -229,152 +218,150 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X rustls@0.21.12 X X X -rustls@0.23.31 X X X -rustls-native-certs@0.6.3 X X X -rustls-native-certs@0.8.1 X X X -rustls-pemfile@1.0.4 X X X -rustls-pki-types@1.12.0 X X +rustls@0.23.35 X X X +rustls-native-certs@0.8.2 X X X +rustls-pki-types@1.13.1 X X rustls-webpki@0.101.7 X -rustls-webpki@0.103.4 X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -schannel@0.1.27 X -scoped-tls@1.0.1 X X +schannel@0.1.28 X scopeguard@1.2.0 X X sct@0.7.1 X X X -security-framework@2.11.1 X X -security-framework@3.4.0 X X +security-framework@3.5.1 X X security-framework-sys@2.15.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X socket2@0.5.10 X X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X -time@0.3.43 X X +time@0.3.44 X X time-core@0.1.6 X X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X tokio-rustls@0.24.1 X X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X urlencoding@2.1.3 X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X vsimd@0.8.0 X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X xmlparser@0.13.6 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/sql/DEPENDENCIES.rust.tsv b/crates/catalog/sql/DEPENDENCIES.rust.tsv index 335c980967..cc48621d0c 100644 --- a/crates/catalog/sql/DEPENDENCIES.rust.tsv +++ b/crates/catalog/sql/DEPENDENCIES.rust.tsv @@ -1,49 +1,47 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X allocator-api2@0.2.21 X X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -51,7 +49,7 @@ const-random@0.1.18 X X const-random-macro@0.1.16 X X core-foundation-sys@0.8.7 X X cpufeatures@0.2.17 X X -crc@3.3.0 X X +crc@3.4.0 X X crc-catalog@2.4.0 X X crc32c@0.6.8 X X crc32fast@1.5.0 X X @@ -60,7 +58,7 @@ crossbeam-epoch@0.9.18 X X crossbeam-queue@0.3.12 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -79,9 +77,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X flume@0.11.1 X X fnv@1.0.7 X X foldhash@0.1.5 X @@ -96,105 +94,99 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X -half@2.6.0 X X +half@2.7.1 X X hashbrown@0.15.5 X X +hashbrown@0.16.1 X X hashlink@0.10.0 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-sql@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-sql@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X libsqlite3-sys@0.30.1 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -202,141 +194,142 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X +socket2@0.6.1 X X spin@0.9.8 X sqlx@0.8.6 X X sqlx-core@0.8.6 X X sqlx-sqlite@0.8.6 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X tokio-stream@0.1.17 X -tokio-util@0.7.16 X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X vcpkg@0.2.15 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X +web-sys@0.3.83 X X webpki-roots@0.26.11 X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/examples/DEPENDENCIES.rust.tsv b/crates/examples/DEPENDENCIES.rust.tsv index 20702597a7..de07f1c650 100644 --- a/crates/examples/DEPENDENCIES.rust.tsv +++ b/crates/examples/DEPENDENCIES.rust.tsv @@ -1,48 +1,46 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -56,7 +54,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -75,9 +73,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -89,106 +87,99 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg-examples@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg-examples@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -196,135 +187,136 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/iceberg/DEPENDENCIES.rust.tsv b/crates/iceberg/DEPENDENCIES.rust.tsv index 460a531644..c446954437 100644 --- a/crates/iceberg/DEPENDENCIES.rust.tsv +++ b/crates/iceberg/DEPENDENCIES.rust.tsv @@ -1,48 +1,46 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -56,7 +54,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -70,13 +68,14 @@ digest@0.10.7 X X displaydoc@0.2.5 X X dissimilar@1.0.10 X either@1.15.0 X X +equivalent@1.0.2 X X event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -88,101 +87,94 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -190,134 +182,135 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/integration_tests/DEPENDENCIES.rust.tsv b/crates/integration_tests/DEPENDENCIES.rust.tsv index 891d23966e..26f6d83820 100644 --- a/crates/integration_tests/DEPENDENCIES.rust.tsv +++ b/crates/integration_tests/DEPENDENCIES.rust.tsv @@ -1,406 +1,414 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X -adler2@2.0.1 X X X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X -array-init@2.1.0 X X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -as-any@0.3.2 X X -async-compression@0.4.19 X X -async-lock@3.4.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.22.1 X X -bigdecimal@0.4.8 X X -bimap@0.6.3 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -comfy-table@7.2.0 X -concurrent-queue@2.5.0 X X -const-oid@0.9.6 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -constant_time_eq@0.3.1 X X X -core-foundation-sys@0.8.7 X X -cpufeatures@0.2.17 X X -crc32c@0.6.8 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -ctor@0.2.9 X X -darling@0.20.11 X -darling@0.21.3 X -darling_core@0.20.11 X -darling_core@0.21.3 X -darling_macro@0.20.11 X -darling_macro@0.21.3 X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-session@48.0.1 X -datafusion-sql@48.0.1 X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dissimilar@1.0.10 X -either@1.15.0 X X -equivalent@1.0.2 X X -errno@0.3.13 X X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generator@0.8.7 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -gloo-timers@0.3.0 X X -h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hermit-abi@0.5.2 X X -hex@0.4.3 X X -hmac@0.12.1 X X -home@0.5.11 X X -http@1.3.1 X X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -httpdate@1.0.3 X X -humantime@2.2.0 X X -hyper@1.7.0 X -hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg-datafusion@0.7.0 X -iceberg-integration-tests@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -ipnet@2.11.0 X X -iri-string@0.7.8 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libm@0.2.15 X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -matchers@0.2.0 X -md-5@0.10.6 X X -memchr@2.7.5 X X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X -murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -opendal@0.54.0 X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -potential_utf@0.1.3 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -psm@0.1.26 X X -quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -reqsign@0.16.5 X -reqwest@0.12.23 X X -ring@0.17.14 X X -roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X -rustversion@1.0.22 X X -ryu@1.0.20 X X -same-file@1.0.6 X X -scoped-tls@1.0.1 X X -scopeguard@1.2.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X -serde_repr@0.1.20 X X -serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X -sha1@0.10.6 X X -sha2@0.10.9 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -simdutf8@0.1.5 X X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.0 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -strsim@0.11.1 X -strum@0.27.2 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@2.0.106 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -tempfile@3.22.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thread_local@1.1.9 X X -threadpool@1.8.1 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X -tower@0.5.2 X -tower-http@0.6.6 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -try-lock@0.2.5 X -twox-hash@2.1.2 X -typed-builder@0.20.1 X X -typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X -unicode-segmentation@1.12.0 X X -unicode-width@0.2.1 X X -untrusted@0.9.0 X -url@2.5.7 X X -utf8_iter@1.0.4 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -walkdir@2.5.0 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -webpki-roots@1.0.2 X -winapi-util@0.1.11 X X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-threading@0.1.0 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_msvc@0.52.6 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_msvc@0.52.6 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anyhow@1.0.100 X X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.1.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-csv@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-json@57.1.0 X +arrow-ord@57.1.0 X +arrow-row@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X +as-any@0.3.2 X X +async-compression@0.4.19 X X +async-lock@3.4.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.11.0 X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +comfy-table@7.2.1 X +concurrent-queue@2.5.0 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +constant_time_eq@0.3.1 X X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +ctor@0.2.9 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-sql@51.0.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +either@1.15.0 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.5 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +form_urlencoded@1.2.2 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generic-array@0.14.7 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +h2@0.4.12 X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@1.4.0 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +httpdate@1.0.3 X X +humantime@2.3.0 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg-datafusion@0.8.0 X +iceberg-integration-tests@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.1 X X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.9 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.83 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.178 X X +libm@0.2.15 X +libz-rs-sys@0.5.3 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +miniz_oxide@0.8.9 X X X +mio@1.1.1 X +moka@0.12.11 X X +murmur3@0.5.2 X X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +opendal@0.55.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +psm@0.1.28 X X +quad-rand@0.2.3 X +quick-xml@0.38.4 X +quote@1.0.42 X X +r-efi@5.3.0 X X X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +relative-path@1.9.3 X X +reqsign@0.16.5 X +reqwest@0.12.25 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X +rustversion@1.0.22 X X +ryu@1.0.20 X X +same-file@1.0.6 X X +scopeguard@1.2.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_urlencoded@0.7.1 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +simd-adler32@0.3.8 X +simdutf8@0.1.5 X X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@2.0.111 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X +toml_datetime@0.7.3 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X +tower@0.5.2 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.22 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X +unicode-segmentation@1.12.0 X X +unicode-width@0.2.2 X X +untrusted@0.9.0 X +url@2.5.7 X X +utf8_iter@1.0.4 X X +uuid@1.19.0 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X +wasm-streams@0.4.2 X X +web-sys@0.3.83 X X +web-time@1.1.0 X X +webpki-roots@1.0.4 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/integrations/cache-moka/DEPENDENCIES.rust.tsv b/crates/integrations/cache-moka/DEPENDENCIES.rust.tsv index 59b898d3fe..4763f71ec8 100644 --- a/crates/integrations/cache-moka/DEPENDENCIES.rust.tsv +++ b/crates/integrations/cache-moka/DEPENDENCIES.rust.tsv @@ -1,48 +1,46 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -56,7 +54,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -70,13 +68,14 @@ digest@0.10.7 X X displaydoc@0.2.5 X X dissimilar@1.0.10 X either@1.15.0 X X +equivalent@1.0.2 X X event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -88,102 +87,95 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-cache-moka@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-cache-moka@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -191,134 +183,135 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/integrations/datafusion/DEPENDENCIES.rust.tsv b/crates/integrations/datafusion/DEPENDENCIES.rust.tsv index 8fb3e04f80..7a0f57e7a0 100644 --- a/crates/integrations/datafusion/DEPENDENCIES.rust.tsv +++ b/crates/integrations/datafusion/DEPENDENCIES.rust.tsv @@ -1,401 +1,409 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X -adler2@2.0.1 X X X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X -array-init@2.1.0 X X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -as-any@0.3.2 X X -async-compression@0.4.19 X X -async-lock@3.4.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.22.1 X X -bigdecimal@0.4.8 X X -bimap@0.6.3 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -comfy-table@7.2.0 X -concurrent-queue@2.5.0 X X -const-oid@0.9.6 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -constant_time_eq@0.3.1 X X X -core-foundation-sys@0.8.7 X X -cpufeatures@0.2.17 X X -crc32c@0.6.8 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -darling@0.20.11 X -darling@0.21.3 X -darling_core@0.20.11 X -darling_core@0.21.3 X -darling_macro@0.20.11 X -darling_macro@0.21.3 X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-session@48.0.1 X -datafusion-sql@48.0.1 X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dissimilar@1.0.10 X -either@1.15.0 X X -equivalent@1.0.2 X X -errno@0.3.13 X X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generator@0.8.7 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -gloo-timers@0.3.0 X X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hermit-abi@0.5.2 X X -hex@0.4.3 X X -hmac@0.12.1 X X -home@0.5.11 X X -http@1.3.1 X X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -humantime@2.2.0 X X -hyper@1.7.0 X -hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-datafusion@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -ipnet@2.11.0 X X -iri-string@0.7.8 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libm@0.2.15 X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -matchers@0.2.0 X -md-5@0.10.6 X X -memchr@2.7.5 X X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X -murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -opendal@0.54.0 X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -potential_utf@0.1.3 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -psm@0.1.26 X X -quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -reqsign@0.16.5 X -reqwest@0.12.23 X X -ring@0.17.14 X X -roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X -rustversion@1.0.22 X X -ryu@1.0.20 X X -same-file@1.0.6 X X -scoped-tls@1.0.1 X X -scopeguard@1.2.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X -serde_repr@0.1.20 X X -serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X -sha1@0.10.6 X X -sha2@0.10.9 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -simdutf8@0.1.5 X X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.0 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -strsim@0.11.1 X -strum@0.27.2 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@2.0.106 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -tempfile@3.22.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thread_local@1.1.9 X X -threadpool@1.8.1 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X -tower@0.5.2 X -tower-http@0.6.6 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -try-lock@0.2.5 X -twox-hash@2.1.2 X -typed-builder@0.20.1 X X -typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X -unicode-segmentation@1.12.0 X X -unicode-width@0.2.1 X X -untrusted@0.9.0 X -url@2.5.7 X X -utf8_iter@1.0.4 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -walkdir@2.5.0 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -webpki-roots@1.0.2 X -winapi-util@0.1.11 X X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-threading@0.1.0 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_msvc@0.52.6 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_msvc@0.52.6 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anyhow@1.0.100 X X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.1.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-csv@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-json@57.1.0 X +arrow-ord@57.1.0 X +arrow-row@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X +as-any@0.3.2 X X +async-compression@0.4.19 X X +async-lock@3.4.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.11.0 X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +comfy-table@7.2.1 X +concurrent-queue@2.5.0 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +constant_time_eq@0.3.1 X X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-sql@51.0.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +either@1.15.0 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.5 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +form_urlencoded@1.2.2 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generic-array@0.14.7 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@1.4.0 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +humantime@2.3.0 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-datafusion@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.1 X X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.9 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.83 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.178 X X +libm@0.2.15 X +libz-rs-sys@0.5.3 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +miniz_oxide@0.8.9 X X X +mio@1.1.1 X +moka@0.12.11 X X +murmur3@0.5.2 X X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +opendal@0.55.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +psm@0.1.28 X X +quad-rand@0.2.3 X +quick-xml@0.38.4 X +quote@1.0.42 X X +r-efi@5.3.0 X X X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +relative-path@1.9.3 X X +reqsign@0.16.5 X +reqwest@0.12.25 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X +rustversion@1.0.22 X X +ryu@1.0.20 X X +same-file@1.0.6 X X +scopeguard@1.2.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_urlencoded@0.7.1 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +simd-adler32@0.3.8 X +simdutf8@0.1.5 X X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@2.0.111 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X +toml_datetime@0.7.3 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X +tower@0.5.2 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.22 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X +unicode-segmentation@1.12.0 X X +unicode-width@0.2.2 X X +untrusted@0.9.0 X +url@2.5.7 X X +utf8_iter@1.0.4 X X +uuid@1.19.0 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X +wasm-streams@0.4.2 X X +web-sys@0.3.83 X X +web-time@1.1.0 X X +webpki-roots@1.0.4 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/integrations/playground/DEPENDENCIES.rust.tsv b/crates/integrations/playground/DEPENDENCIES.rust.tsv index 84dd20ed3e..8f57b1d9ae 100644 --- a/crates/integrations/playground/DEPENDENCIES.rust.tsv +++ b/crates/integrations/playground/DEPENDENCIES.rust.tsv @@ -1,509 +1,492 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X -adler2@2.0.1 X X X -adler32@1.2.0 X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anstream@0.6.20 X X -anstyle@1.0.11 X X -anstyle-parse@0.2.7 X X -anstyle-query@1.1.4 X X -anstyle-wincon@3.0.10 X X -anyhow@1.0.99 X X -apache-avro@0.17.0 X -apache-avro@0.20.0 X -array-init@2.1.0 X X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -as-any@0.3.2 X X -async-compression@0.4.19 X X -async-lock@3.4.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -aws-config@1.8.6 X -aws-credential-types@1.2.6 X -aws-runtime@1.5.10 X -aws-sdk-sso@1.83.0 X -aws-sdk-ssooidc@1.84.0 X -aws-sdk-sts@1.85.0 X -aws-sigv4@1.3.4 X -aws-smithy-async@1.2.5 X -aws-smithy-http@0.62.3 X -aws-smithy-http-client@1.1.1 X -aws-smithy-json@0.61.5 X -aws-smithy-observability@0.1.3 X -aws-smithy-query@0.60.7 X -aws-smithy-runtime@1.9.1 X -aws-smithy-runtime-api@1.9.0 X -aws-smithy-types@1.3.2 X -aws-smithy-xml@0.60.10 X -aws-types@1.3.8 X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.22.1 X X -base64-simd@0.8.0 X -bigdecimal@0.4.8 X X -bimap@0.6.3 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bytes-utils@0.1.4 X X -bzip2@0.4.4 X X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -cfg_aliases@0.2.1 X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -clap@4.5.47 X X -clap_builder@4.5.47 X X -clap_derive@4.5.47 X X -clap_lex@0.7.5 X X -clipboard-win@5.4.1 X -colorchoice@1.0.4 X X -comfy-table@7.2.0 X -concurrent-queue@2.5.0 X X -const-oid@0.9.6 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -constant_time_eq@0.3.1 X X X -core-foundation@0.10.1 X X -core-foundation-sys@0.8.7 X X -core2@0.4.0 X X -cpufeatures@0.2.17 X X -crc32c@0.6.8 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -darling@0.20.11 X -darling@0.21.3 X -darling_core@0.20.11 X -darling_core@0.21.3 X -darling_macro@0.20.11 X -darling_macro@0.21.3 X -dary_heap@0.3.7 X X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-cli@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-avro@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-session@48.0.1 X -datafusion-sql@48.0.1 X -deranged@0.5.3 X X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -dirs@6.0.0 X X -dirs-sys@0.5.0 X X -displaydoc@0.2.5 X X -dissimilar@1.0.10 X -either@1.15.0 X X -endian-type@0.1.2 X -env_filter@0.1.3 X X -env_logger@0.11.8 X X -equivalent@1.0.2 X X -errno@0.3.13 X X -error-code@3.3.2 X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastrand@2.3.0 X X -fd-lock@4.0.4 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -fs-err@3.1.1 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generator@0.8.7 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -gloo-timers@0.3.0 X X -h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hermit-abi@0.5.2 X X -hex@0.4.3 X X -hmac@0.12.1 X X -home@0.5.11 X X -http@0.2.12 X X -http@1.3.1 X X -http-body@0.4.6 X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -httpdate@1.0.3 X X -humantime@2.2.0 X X -hyper@1.7.0 X -hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg-datafusion@0.7.0 X -iceberg-playground@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -ipnet@2.11.0 X X -iri-string@0.7.8 X X -is_terminal_polyfill@1.70.1 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jiff@0.2.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libflate@2.1.0 X -libflate_lz77@2.1.0 X -libm@0.2.15 X -libmimalloc-sys@0.1.44 X -libredox@0.1.9 X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -matchers@0.2.0 X -md-5@0.10.6 X X -memchr@2.7.5 X X -mimalloc@0.1.48 X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X -murmur3@0.5.2 X X -nibble_vec@0.1.0 X -nix@0.30.1 X -nu-ansi-term@0.50.1 X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-conv@0.1.0 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -once_cell_polyfill@1.70.1 X X -opendal@0.54.0 X -openssl-probe@0.1.6 X X -option-ext@0.2.0 X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -outref@0.5.2 X -parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -portable-atomic-util@0.2.4 X X -potential_utf@0.1.3 X -powerfmt@0.2.0 X X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -psm@0.1.26 X X -quad-rand@0.2.3 X -quick-xml@0.37.5 X -quick-xml@0.38.3 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -radix_trie@0.2.1 X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -redox_users@0.5.2 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -reqsign@0.16.5 X -reqwest@0.12.23 X X -ring@0.17.14 X X -rle-decode-fast@1.0.3 X X -roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustls@0.23.31 X X X -rustls-native-certs@0.8.1 X X X -rustls-pemfile@2.2.0 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X -rustversion@1.0.22 X X -rustyline@16.0.0 X -ryu@1.0.20 X X -same-file@1.0.6 X X -schannel@0.1.27 X -scoped-tls@1.0.1 X X -scopeguard@1.2.0 X X -security-framework@3.4.0 X X -security-framework-sys@2.15.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X -serde_repr@0.1.20 X X -serde_spanned@0.6.9 X X -serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X -sha1@0.10.6 X X -sha2@0.10.9 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X -simdutf8@0.1.5 X X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.0 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -strsim@0.11.1 X -strum@0.26.3 X -strum@0.27.2 X -strum_macros@0.26.4 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@2.0.106 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -tempfile@3.22.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thread_local@1.1.9 X X -threadpool@1.8.1 X X -thrift@0.17.0 X -time@0.3.43 X X -time-core@0.1.6 X X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X -toml@0.8.23 X X -toml_datetime@0.6.11 X X -toml_edit@0.22.27 X X -toml_write@0.1.2 X X -tower@0.5.2 X -tower-http@0.6.6 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -try-lock@0.2.5 X -twox-hash@2.1.2 X -typed-builder@0.19.1 X X -typed-builder@0.20.1 X X -typed-builder-macro@0.19.1 X X -typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X -unicode-segmentation@1.12.0 X X -unicode-width@0.2.1 X X -untrusted@0.9.0 X -url@2.5.7 X X -urlencoding@2.1.3 X -utf8_iter@1.0.4 X X -utf8parse@0.2.2 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -vsimd@0.8.0 X -walkdir@2.5.0 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -webpki-roots@1.0.2 X -winapi-util@0.1.11 X X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.60.2 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-targets@0.53.3 X X -windows-threading@0.1.0 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_gnullvm@0.53.0 X X -windows_aarch64_msvc@0.52.6 X X -windows_aarch64_msvc@0.53.0 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnu@0.53.0 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_gnullvm@0.53.0 X X -windows_i686_msvc@0.52.6 X X -windows_i686_msvc@0.53.0 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnu@0.53.0 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_gnullvm@0.53.0 X X -windows_x86_64_msvc@0.52.6 X X -windows_x86_64_msvc@0.53.0 X X -winnow@0.7.13 X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xmlparser@0.13.6 X X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anstream@0.6.21 X X +anstyle@1.0.13 X X +anstyle-parse@0.2.7 X X +anstyle-query@1.1.5 X X +anstyle-wincon@3.0.11 X X +anyhow@1.0.100 X X +apache-avro@0.20.0 X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.1.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-csv@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-json@57.1.0 X +arrow-ord@57.1.0 X +arrow-row@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X +as-any@0.3.2 X X +async-compression@0.4.19 X X +async-lock@3.4.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +aws-config@1.8.11 X +aws-credential-types@1.2.10 X +aws-runtime@1.5.16 X +aws-sdk-sso@1.90.0 X +aws-sdk-ssooidc@1.92.0 X +aws-sdk-sts@1.94.0 X +aws-sigv4@1.3.6 X +aws-smithy-async@1.2.7 X +aws-smithy-http@0.62.6 X +aws-smithy-http-client@1.1.5 X +aws-smithy-json@0.61.8 X +aws-smithy-observability@0.1.5 X +aws-smithy-query@0.60.9 X +aws-smithy-runtime@1.9.5 X +aws-smithy-runtime-api@1.9.3 X +aws-smithy-types@1.3.5 X +aws-smithy-xml@0.60.13 X +aws-types@1.3.10 X +backon@1.6.0 X +base64@0.22.1 X X +base64-simd@0.8.0 X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.11.0 X +bytes-utils@0.1.4 X X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X +cfg_aliases@0.2.1 X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +clap@4.5.53 X X +clap_builder@4.5.53 X X +clap_derive@4.5.49 X X +clap_lex@0.7.6 X X +clipboard-win@5.4.1 X +colorchoice@1.0.4 X X +comfy-table@7.2.1 X +concurrent-queue@2.5.0 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +constant_time_eq@0.3.1 X X X +core-foundation@0.10.1 X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-cli@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-avro@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-sql@51.0.0 X +deranged@0.5.5 X X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +dirs@6.0.0 X X +dirs-sys@0.5.0 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +either@1.15.0 X X +endian-type@0.1.2 X +env_filter@0.1.4 X X +env_logger@0.11.8 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +error-code@3.3.2 X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastrand@2.3.0 X X +fd-lock@4.0.4 X X +find-msvc-tools@0.1.5 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +form_urlencoded@1.2.2 X X +fs-err@3.2.0 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generic-array@0.14.7 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +h2@0.4.12 X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@0.2.12 X X +http@1.4.0 X X +http-body@0.4.6 X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +httpdate@1.0.3 X X +humantime@2.3.0 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg-datafusion@0.8.0 X +iceberg-playground@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.1 X X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.9 X X +is_terminal_polyfill@1.70.2 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.83 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.178 X X +libm@0.2.15 X +libmimalloc-sys@0.1.44 X +libredox@0.1.10 X +libz-rs-sys@0.5.3 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +mimalloc@0.1.48 X +miniz_oxide@0.8.9 X X X +mio@1.1.1 X +moka@0.12.11 X X +murmur3@0.5.2 X X +nibble_vec@0.1.0 X +nix@0.30.1 X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-conv@0.1.0 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +once_cell_polyfill@1.70.2 X X +opendal@0.55.0 X +openssl-probe@0.1.6 X X +option-ext@0.2.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +outref@0.5.2 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X +powerfmt@0.2.0 X X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +psm@0.1.28 X X +quad-rand@0.2.3 X +quick-xml@0.38.4 X +quote@1.0.42 X X +r-efi@5.3.0 X X X +radix_trie@0.2.1 X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +redox_users@0.5.2 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +relative-path@1.9.3 X X +reqsign@0.16.5 X +reqwest@0.12.25 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.35 X X X +rustls-native-certs@0.8.2 X X X +rustls-pemfile@2.2.0 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X +rustversion@1.0.22 X X +rustyline@17.0.2 X +ryu@1.0.20 X X +same-file@1.0.6 X X +schannel@0.1.28 X +scopeguard@1.2.0 X X +security-framework@3.5.1 X X +security-framework-sys@2.15.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_spanned@0.6.9 X X +serde_urlencoded@0.7.1 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X +simdutf8@0.1.5 X X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@2.0.111 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +time@0.3.44 X X +time-core@0.1.6 X X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X +toml@0.8.23 X X +toml_datetime@0.6.11 X X +toml_datetime@0.7.3 X X +toml_edit@0.22.27 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X +toml_write@0.1.2 X X +tower@0.5.2 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.22 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X +unicode-segmentation@1.12.0 X X +unicode-width@0.2.2 X X +untrusted@0.9.0 X +url@2.5.7 X X +urlencoding@2.1.3 X +utf8_iter@1.0.4 X X +utf8parse@0.2.2 X X +uuid@1.19.0 X X +version_check@0.9.5 X X +vsimd@0.8.0 X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X +wasm-streams@0.4.2 X X +web-sys@0.3.83 X X +web-time@1.1.0 X X +webpki-roots@1.0.4 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xmlparser@0.13.6 X X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/sqllogictest/DEPENDENCIES.rust.tsv b/crates/sqllogictest/DEPENDENCIES.rust.tsv index e8af062ea3..94bd88e2e6 100644 --- a/crates/sqllogictest/DEPENDENCIES.rust.tsv +++ b/crates/sqllogictest/DEPENDENCIES.rust.tsv @@ -1,398 +1,482 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X -adler2@2.0.1 X X X -adler32@1.2.0 X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anstream@0.6.20 X X -anstyle@1.0.11 X X -anstyle-parse@0.2.7 X X -anstyle-query@1.1.4 X X -anstyle-wincon@3.0.10 X X -anyhow@1.0.99 X X -apache-avro@0.17.0 X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -async-compression@0.4.19 X X -async-recursion@1.1.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -autocfg@1.5.0 X X -backtrace@0.3.75 X X -base64@0.21.7 X X -base64@0.22.1 X X -bigdecimal@0.4.8 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bzip2@0.4.4 X X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -clap@4.5.47 X X -clap_builder@4.5.47 X X -clap_derive@4.5.47 X X -clap_lex@0.7.5 X X -colorchoice@1.0.4 X X -comfy-table@7.2.0 X -console@0.15.11 X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -constant_time_eq@0.3.1 X X X -core-foundation-sys@0.8.7 X X -core2@0.4.0 X X -cpufeatures@0.2.17 X X -crc32fast@1.5.0 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -dary_heap@0.3.7 X X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-avro@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-session@48.0.1 X -datafusion-spark@48.0.1 X -datafusion-sql@48.0.1 X -datafusion-sqllogictest@48.0.1 X -datafusion-substrait@48.0.1 X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dyn-clone@1.0.20 X X -educe@0.6.0 X -either@1.15.0 X X -encode_unicode@1.0.0 X X -enum-ordinalize@4.3.0 X -enum-ordinalize-derive@4.3.1 X -equivalent@1.0.2 X X -errno@0.3.13 X X -escape8259@0.5.3 X -fallible-iterator@0.2.0 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -fs-err@3.1.1 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hex@0.4.3 X X -hmac@0.12.1 X X -http@1.3.1 X X -humantime@2.2.0 X X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg-sqllogictest@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -indicatif@0.17.11 X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -is_terminal_polyfill@1.70.1 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libflate@2.1.0 X -libflate_lz77@2.1.0 X -libm@0.2.15 X -libtest-mimic@0.8.1 X X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -md-5@0.10.6 X X -memchr@2.7.5 X X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -multimap@0.10.1 X X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -number_prefix@0.4.0 X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -once_cell_polyfill@1.70.1 X X -ordered-float@2.10.1 X -owo-colors@4.2.2 X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -pbjson@0.7.0 X -pbjson-build@0.7.0 X -pbjson-types@0.7.0 X -percent-encoding@2.3.2 X X -petgraph@0.7.1 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -postgres-protocol@0.6.8 X X -postgres-types@0.2.9 X X -potential_utf@0.1.3 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -prost@0.13.5 X -prost-build@0.13.5 X -prost-derive@0.13.5 X -prost-types@0.13.5 X -psm@0.1.26 X X -quad-rand@0.2.3 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -regress@0.10.4 X X -rle-decode-fast@1.0.3 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustversion@1.0.22 X X -ryu@1.0.20 X X -same-file@1.0.6 X X -schemars@0.8.22 X -schemars_derive@0.8.22 X -scopeguard@1.2.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_derive_internals@0.29.1 X X -serde_json@1.0.143 X X -serde_spanned@0.6.9 X X -serde_tokenstream@0.2.2 X -serde_yaml@0.9.34+deprecated X X -sha2@0.10.9 X X -shlex@1.3.0 X X -simdutf8@0.1.5 X X -similar@2.7.0 X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -sqllogictest@0.28.4 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -stringprep@0.1.5 X X -strsim@0.11.1 X -strum@0.26.3 X -strum_macros@0.26.4 X -subst@0.3.8 X X -substrait@0.56.0 X -subtle@2.6.1 X -syn@2.0.106 X X -synstructure@0.13.2 X -tempfile@3.22.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tinyvec@1.10.0 X X X -tinyvec_macros@0.1.1 X X X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-util@0.7.16 X -toml@0.8.23 X X -toml_datetime@0.6.11 X X -toml_edit@0.22.27 X X -toml_write@0.1.2 X X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -twox-hash@2.1.2 X -typed-builder@0.19.1 X X -typed-builder-macro@0.19.1 X X -typenum@1.18.0 X X -typify@0.4.3 X -typify-impl@0.4.3 X -typify-macro@0.4.3 X -unicode-bidi@0.3.18 X X -unicode-ident@1.0.18 X X X -unicode-normalization@0.1.24 X X -unicode-properties@0.1.3 X X -unicode-segmentation@1.12.0 X X -unicode-width@0.1.14 X X -unicode-width@0.2.1 X X -unsafe-libyaml@0.2.11 X -url@2.5.7 X X -utf8_iter@1.0.4 X X -utf8parse@0.2.2 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -walkdir@2.5.0 X X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -winapi-util@0.1.11 X X -windows-core@0.61.2 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.60.2 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-targets@0.53.3 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_gnullvm@0.53.0 X X -windows_aarch64_msvc@0.52.6 X X -windows_aarch64_msvc@0.53.0 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnu@0.53.0 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_gnullvm@0.53.0 X X -windows_i686_msvc@0.52.6 X X -windows_i686_msvc@0.53.0 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnu@0.53.0 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_gnullvm@0.53.0 X X -windows_x86_64_msvc@0.52.6 X X -windows_x86_64_msvc@0.53.0 X X -winnow@0.7.13 X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anstream@0.6.21 X X +anstyle@1.0.13 X X +anstyle-parse@0.2.7 X X +anstyle-query@1.1.5 X X +anstyle-wincon@3.0.11 X X +anyhow@1.0.100 X X +apache-avro@0.20.0 X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.1.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-csv@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-json@57.1.0 X +arrow-ord@57.1.0 X +arrow-row@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X +as-any@0.3.2 X X +async-compression@0.4.19 X X +async-lock@3.4.1 X X +async-recursion@1.1.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.11.0 X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +clap@4.5.53 X X +clap_builder@4.5.53 X X +clap_derive@4.5.49 X X +clap_lex@0.7.6 X X +colorchoice@1.0.4 X X +comfy-table@7.2.1 X +concurrent-queue@2.5.0 X X +console@0.16.1 X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +constant_time_eq@0.3.1 X X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-avro@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-spark@51.0.0 X +datafusion-sql@51.0.0 X +datafusion-sqllogictest@51.0.0 X +datafusion-substrait@51.0.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +dyn-clone@1.0.20 X X +educe@0.6.0 X +either@1.15.0 X X +encode_unicode@1.0.0 X X +enum-ordinalize@4.3.2 X +enum-ordinalize-derive@4.3.2 X +env_filter@0.1.4 X X +env_logger@0.11.8 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +escape8259@0.5.3 X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fallible-iterator@0.2.0 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.5 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +foldhash@0.2.0 X +form_urlencoded@1.2.2 X X +fs-err@3.2.0 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generic-array@0.14.7 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@1.4.0 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +humantime@2.3.0 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-datafusion@0.8.0 X +iceberg-sqllogictest@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.1 X X +indicatif@0.18.3 X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.9 X X +is_terminal_polyfill@1.70.2 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.83 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.178 X X +libm@0.2.15 X +libtest-mimic@0.8.1 X X +libz-rs-sys@0.5.3 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +miniz_oxide@0.8.9 X X X +mio@1.1.1 X +moka@0.12.11 X X +multimap@0.10.1 X X +murmur3@0.5.2 X X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +once_cell_polyfill@1.70.2 X X +opendal@0.55.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +owo-colors@4.2.3 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X +paste@1.0.15 X X +pbjson@0.8.0 X +pbjson-build@0.8.0 X +pbjson-types@0.8.0 X +percent-encoding@2.3.2 X X +petgraph@0.7.1 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +postgres-protocol@0.6.9 X X +postgres-types@0.2.11 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +prost@0.14.1 X +prost-build@0.14.1 X +prost-derive@0.14.1 X +prost-types@0.14.1 X +psm@0.1.28 X X +quad-rand@0.2.3 X +quick-xml@0.38.4 X +quote@1.0.42 X X +r-efi@5.3.0 X X X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +regress@0.10.5 X X +relative-path@1.9.3 X X +reqsign@0.16.5 X +reqwest@0.12.25 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X +rustversion@1.0.22 X X +ryu@1.0.20 X X +same-file@1.0.6 X X +schemars@0.8.22 X +schemars_derive@0.8.22 X +scopeguard@1.2.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_derive_internals@0.29.1 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_spanned@0.6.9 X X +serde_tokenstream@0.2.2 X +serde_urlencoded@0.7.1 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X +serde_yaml@0.9.34+deprecated X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +simd-adler32@0.3.8 X +simdutf8@0.1.5 X X +similar@2.7.0 X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqllogictest@0.28.4 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +stringprep@0.1.5 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subst@0.3.8 X X +substrait@0.62.0 X +subtle@2.6.1 X +syn@2.0.111 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tinyvec@1.10.0 X X X +tinyvec_macros@0.1.1 X X X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X +toml@0.8.23 X X +toml_datetime@0.6.11 X X +toml_datetime@0.7.3 X X +toml_edit@0.22.27 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X +toml_write@0.1.2 X X +tower@0.5.2 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.22 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +typify@0.5.0 X +typify-impl@0.5.0 X +typify-macro@0.5.0 X +unicode-bidi@0.3.18 X X +unicode-ident@1.0.22 X X X +unicode-normalization@0.1.25 X X +unicode-properties@0.1.4 X X +unicode-segmentation@1.12.0 X X +unicode-width@0.1.14 X X +unicode-width@0.2.2 X X +unit-prefix@0.5.2 X +unsafe-libyaml@0.2.11 X +untrusted@0.9.0 X +url@2.5.7 X X +utf8_iter@1.0.4 X X +utf8parse@0.2.2 X X +uuid@1.19.0 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X +wasm-streams@0.4.2 X X +web-sys@0.3.83 X X +web-time@1.1.0 X X +webpki-roots@1.0.4 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/test_utils/DEPENDENCIES.rust.tsv b/crates/test_utils/DEPENDENCIES.rust.tsv index ef7c315df7..879ead5921 100644 --- a/crates/test_utils/DEPENDENCIES.rust.tsv +++ b/crates/test_utils/DEPENDENCIES.rust.tsv @@ -1,30 +1,22 @@ crate Apache-2.0 MIT Unicode-3.0 -cfg-if@1.0.3 X X -iceberg_test_utils@0.7.0 X +cfg-if@1.0.4 X X +iceberg_test_utils@0.8.0 X lazy_static@1.5.0 X X -log@0.4.28 X X -nu-ansi-term@0.50.1 X +log@0.4.29 X X +nu-ansi-term@0.50.3 X once_cell@1.21.3 X X pin-project-lite@0.2.16 X X -proc-macro2@1.0.101 X X -quote@1.0.40 X X +proc-macro2@1.0.103 X X +quote@1.0.42 X X sharded-slab@0.1.7 X smallvec@1.15.1 X X -syn@2.0.106 X X +syn@2.0.111 X X thread_local@1.1.9 X X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -unicode-ident@1.0.18 X X X -windows-sys@0.52.0 X X -windows-targets@0.52.6 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_msvc@0.52.6 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_msvc@0.52.6 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_msvc@0.52.6 X X +tracing-subscriber@0.3.22 X +unicode-ident@1.0.22 X X X +windows-link@0.2.1 X X +windows-sys@0.61.2 X X diff --git a/deny.toml b/deny.toml index 6d75c5d219..0f88ba6d0e 100644 --- a/deny.toml +++ b/deny.toml @@ -26,6 +26,7 @@ allow = [ "CC0-1.0", "Zlib", "CDLA-Permissive-2.0", + "bzip2-1.0.6", # Category-A: https://issues.apache.org/jira/browse/LEGAL-660 "Unicode-3.0", # Boost Software License Version 1.0 is allowed (Category-A): @@ -39,4 +40,4 @@ exceptions = [ { allow = ["MPL-2.0"], crate = "webpki-roots" }, { allow = ["MPL-2.0"], crate = "generational-arena" }, { allow = ["MPL-2.0"], crate = "option-ext" }, -] \ No newline at end of file +] From b7ba2e8348ef79eff868715f2b7cf4ce6256d4ea Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 17 Dec 2025 16:49:13 -0800 Subject: [PATCH 36/58] fix(website): Update expected messages (#1942) ## Which issue does this PR close? - A minor doc fix ## What changes are included in this PR? - Corrected the expected messages on the instructions to verify a release ## Are these changes tested? not needed --- website/src/release.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/src/release.md b/website/src/release.md index 78c0e57525..f3a5798a2b 100644 --- a/website/src/release.md +++ b/website/src/release.md @@ -325,13 +325,13 @@ After downloading them, here are the instructions on how to verify them. ```bash gpg --verify apache-iceberg-rust-*.tar.gz.asc ``` - Expects: `"apache-iceberg-rust-0.7.0.tar.gz: OK"` + Expects: `gpg: Good signature from ...` * Verify the checksums: ```bash shasum -a 512 -c apache-iceberg-rust-*.tar.gz.sha512 ``` - Expects: `gpg: Good signature from ...` + Expects: `"apache-iceberg-rust-...tar.gz: OK"` * Verify build and test: ```bash From a329a3b756710b71c49dfd4b43060a4cc2731d3f Mon Sep 17 00:00:00 2001 From: Lo Date: Mon, 22 Dec 2025 09:14:12 +0800 Subject: [PATCH 37/58] feat: Implement shared delete file loading and caching for ArrowReader (#1941) ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? Currently, ArrowReader instantiates a new CachingDeleteFileLoader (and consequently a new DeleteFilter) for each FileScanTask when calling load_deletes. This results in the DeleteFilter state being isolated per task. If multiple tasks reference the same delete file (common in positional deletes), that delete file is re-read and re-parsed for every task, leading to significant performance overhead and redundant I/O. Changes * Shared State: Moved the DeleteFilter instance into the CachingDeleteFileLoader struct. Since ArrowReader holds a single CachingDeleteFileLoader instance across its lifetime, the DeleteFilter state is now effectively shared across all file scan tasks processed by that reader. * Positional Delete Caching: Implemented a state machine for loading positional delete files (PosDelState) in DeleteFilter. * Added try_start_pos_del_load: Coordinates concurrent access to the same positional delete file. * Added finish_pos_del_load: Signals completion of loading. * Synchronization: Introduced a WaitFor state. Unlike equality deletes (which are accessed asynchronously), positional deletes are accessed synchronously by ArrowReader. Therefore, if a task encounters a file that is currently being loaded by another task, it must asynchronously wait (notify.notified().await) during the loading phase to ensure the data is fully populated before ArrowReader proceeds. * Refactoring: Updated load_file_for_task and related types in CachingDeleteFileLoader to support the new caching logic and carry file paths through the loading context. ## Are these changes tested? Added test_caching_delete_file_loader_caches_results to verify that repeated loads of the same delete file return shared memory objects --- .../src/arrow/caching_delete_file_loader.rs | 111 ++++++++++++++---- crates/iceberg/src/arrow/delete_filter.rs | 66 +++++++++++ 2 files changed, 155 insertions(+), 22 deletions(-) diff --git a/crates/iceberg/src/arrow/caching_delete_file_loader.rs b/crates/iceberg/src/arrow/caching_delete_file_loader.rs index aceeae49f7..5d0b1da712 100644 --- a/crates/iceberg/src/arrow/caching_delete_file_loader.rs +++ b/crates/iceberg/src/arrow/caching_delete_file_loader.rs @@ -23,7 +23,7 @@ use arrow_array::{Array, ArrayRef, Int64Array, StringArray, StructArray}; use futures::{StreamExt, TryStreamExt}; use tokio::sync::oneshot::{Receiver, channel}; -use super::delete_filter::DeleteFilter; +use super::delete_filter::{DeleteFilter, PosDelLoadAction}; use crate::arrow::delete_file_loader::BasicDeleteFileLoader; use crate::arrow::{arrow_primitive_to_literal, arrow_schema_to_schema}; use crate::delete_vector::DeleteVector; @@ -42,13 +42,20 @@ use crate::{Error, ErrorKind, Result}; pub(crate) struct CachingDeleteFileLoader { basic_delete_file_loader: BasicDeleteFileLoader, concurrency_limit_data_files: usize, + /// Shared filter state to allow caching loaded deletes across multiple + /// calls to `load_deletes` (e.g., across multiple file scan tasks). + delete_filter: DeleteFilter, } // Intermediate context during processing of a delete file task. enum DeleteFileContext { // TODO: Delete Vector loader from Puffin files ExistingEqDel, - PosDels(ArrowRecordBatchStream), + ExistingPosDel, + PosDels { + file_path: String, + stream: ArrowRecordBatchStream, + }, FreshEqDel { batch_stream: ArrowRecordBatchStream, equality_ids: HashSet, @@ -59,8 +66,12 @@ enum DeleteFileContext { // Final result of the processing of a delete file task before // results are fully merged into the DeleteFileManager's state enum ParsedDeleteFileContext { - DelVecs(HashMap), + DelVecs { + file_path: String, + results: HashMap, + }, EqDel, + ExistingPosDel, } #[allow(unused_variables)] @@ -69,6 +80,7 @@ impl CachingDeleteFileLoader { CachingDeleteFileLoader { basic_delete_file_loader: BasicDeleteFileLoader::new(file_io), concurrency_limit_data_files, + delete_filter: DeleteFilter::default(), } } @@ -142,7 +154,6 @@ impl CachingDeleteFileLoader { schema: SchemaRef, ) -> Receiver> { let (tx, rx) = channel(); - let del_filter = DeleteFilter::default(); let stream_items = delete_file_entries .iter() @@ -150,14 +161,14 @@ impl CachingDeleteFileLoader { ( t.clone(), self.basic_delete_file_loader.clone(), - del_filter.clone(), + self.delete_filter.clone(), schema.clone(), ) }) .collect::>(); let task_stream = futures::stream::iter(stream_items); - let del_filter = del_filter.clone(); + let del_filter = self.delete_filter.clone(); let concurrency_limit_data_files = self.concurrency_limit_data_files; let basic_delete_file_loader = self.basic_delete_file_loader.clone(); crate::runtime::spawn(async move { @@ -165,7 +176,7 @@ impl CachingDeleteFileLoader { let mut del_filter = del_filter; let basic_delete_file_loader = basic_delete_file_loader.clone(); - let results: Vec = task_stream + let mut results_stream = task_stream .map(move |(task, file_io, del_filter, schema)| { let basic_delete_file_loader = basic_delete_file_loader.clone(); async move { @@ -181,15 +192,16 @@ impl CachingDeleteFileLoader { .map(move |ctx| { Ok(async { Self::parse_file_content_for_task(ctx.await?).await }) }) - .try_buffer_unordered(concurrency_limit_data_files) - .try_collect::>() - .await?; + .try_buffer_unordered(concurrency_limit_data_files); - for item in results { - if let ParsedDeleteFileContext::DelVecs(hash_map) = item { - for (data_file_path, delete_vector) in hash_map.into_iter() { + while let Some(item) = results_stream.next().await { + let item = item?; + if let ParsedDeleteFileContext::DelVecs { file_path, results } = item { + for (data_file_path, delete_vector) in results.into_iter() { del_filter.upsert_delete_vector(data_file_path, delete_vector); } + // Mark the positional delete file as fully loaded so waiters can proceed + del_filter.finish_pos_del_load(&file_path); } } @@ -210,11 +222,24 @@ impl CachingDeleteFileLoader { schema: SchemaRef, ) -> Result { match task.file_type { - DataContentType::PositionDeletes => Ok(DeleteFileContext::PosDels( - basic_delete_file_loader - .parquet_to_batch_stream(&task.file_path) - .await?, - )), + DataContentType::PositionDeletes => { + match del_filter.try_start_pos_del_load(&task.file_path) { + PosDelLoadAction::AlreadyLoaded => Ok(DeleteFileContext::ExistingPosDel), + PosDelLoadAction::WaitFor(notify) => { + // Positional deletes are accessed synchronously by ArrowReader. + // We must wait here to ensure the data is ready before returning, + // otherwise ArrowReader might get an empty/partial result. + notify.notified().await; + Ok(DeleteFileContext::ExistingPosDel) + } + PosDelLoadAction::Load => Ok(DeleteFileContext::PosDels { + file_path: task.file_path.clone(), + stream: basic_delete_file_loader + .parquet_to_batch_stream(&task.file_path) + .await?, + }), + } + } DataContentType::EqualityDeletes => { let Some(notify) = del_filter.try_start_eq_del_load(&task.file_path) else { @@ -255,10 +280,13 @@ impl CachingDeleteFileLoader { ) -> Result { match ctx { DeleteFileContext::ExistingEqDel => Ok(ParsedDeleteFileContext::EqDel), - DeleteFileContext::PosDels(batch_stream) => { - let del_vecs = - Self::parse_positional_deletes_record_batch_stream(batch_stream).await?; - Ok(ParsedDeleteFileContext::DelVecs(del_vecs)) + DeleteFileContext::ExistingPosDel => Ok(ParsedDeleteFileContext::ExistingPosDel), + DeleteFileContext::PosDels { file_path, stream } => { + let del_vecs = Self::parse_positional_deletes_record_batch_stream(stream).await?; + Ok(ParsedDeleteFileContext::DelVecs { + file_path, + results: del_vecs, + }) } DeleteFileContext::FreshEqDel { sender, @@ -979,4 +1007,43 @@ mod tests { assert!(result.is_ok()); } + + #[tokio::test] + async fn test_caching_delete_file_loader_caches_results() { + let tmp_dir = TempDir::new().unwrap(); + let table_location = tmp_dir.path(); + let file_io = FileIO::from_path(table_location.as_os_str().to_str().unwrap()) + .unwrap() + .build() + .unwrap(); + + let delete_file_loader = CachingDeleteFileLoader::new(file_io.clone(), 10); + + let file_scan_tasks = setup(table_location); + + // Load deletes for the first time + let delete_filter_1 = delete_file_loader + .load_deletes(&file_scan_tasks[0].deletes, file_scan_tasks[0].schema_ref()) + .await + .unwrap() + .unwrap(); + + // Load deletes for the second time (same task/files) + let delete_filter_2 = delete_file_loader + .load_deletes(&file_scan_tasks[0].deletes, file_scan_tasks[0].schema_ref()) + .await + .unwrap() + .unwrap(); + + let dv1 = delete_filter_1 + .get_delete_vector(&file_scan_tasks[0]) + .unwrap(); + let dv2 = delete_filter_2 + .get_delete_vector(&file_scan_tasks[0]) + .unwrap(); + + // Verify that the delete vectors point to the same memory location, + // confirming that the second load reused the result from the first. + assert!(Arc::ptr_eq(&dv1, &dv2)); + } } diff --git a/crates/iceberg/src/arrow/delete_filter.rs b/crates/iceberg/src/arrow/delete_filter.rs index d05e028997..e6434e72ec 100644 --- a/crates/iceberg/src/arrow/delete_filter.rs +++ b/crates/iceberg/src/arrow/delete_filter.rs @@ -34,10 +34,23 @@ enum EqDelState { Loaded(Predicate), } +/// State tracking for positional delete files. +/// Unlike equality deletes, positional deletes must be fully loaded before +/// the ArrowReader proceeds because retrieval is synchronous and non-blocking. +#[derive(Debug)] +enum PosDelState { + /// The file is currently being loaded by a task. + /// The notifier allows other tasks to wait for completion. + Loading(Arc), + /// The file has been fully loaded and merged into the delete vector map. + Loaded, +} + #[derive(Debug, Default)] struct DeleteFileFilterState { delete_vectors: HashMap>>, equality_deletes: HashMap, + positional_deletes: HashMap, } #[derive(Clone, Debug, Default)] @@ -45,6 +58,18 @@ pub(crate) struct DeleteFilter { state: Arc>, } +/// Action to take when trying to start loading a positional delete file +pub(crate) enum PosDelLoadAction { + /// The file is not loaded, the caller should load it. + Load, + /// The file is already loaded, nothing to do. + AlreadyLoaded, + /// The file is currently being loaded by another task. + /// The caller *must* wait for this notifier to ensure data availability + /// before returning, as subsequent access (get_delete_vector) is synchronous. + WaitFor(Arc), +} + impl DeleteFilter { /// Retrieve a delete vector for the data file associated with a given file scan task pub(crate) fn get_delete_vector( @@ -82,6 +107,47 @@ impl DeleteFilter { Some(notifier) } + /// Attempts to mark a positional delete file as "loading". + /// + /// Returns an action dictating whether the caller should load the file, + /// wait for another task to load it, or do nothing. + pub(crate) fn try_start_pos_del_load(&self, file_path: &str) -> PosDelLoadAction { + let mut state = self.state.write().unwrap(); + + if let Some(state) = state.positional_deletes.get(file_path) { + match state { + PosDelState::Loaded => return PosDelLoadAction::AlreadyLoaded, + PosDelState::Loading(notify) => return PosDelLoadAction::WaitFor(notify.clone()), + } + } + + let notifier = Arc::new(Notify::new()); + state + .positional_deletes + .insert(file_path.to_string(), PosDelState::Loading(notifier)); + + PosDelLoadAction::Load + } + + /// Marks a positional delete file as successfully loaded and notifies any waiting tasks. + pub(crate) fn finish_pos_del_load(&self, file_path: &str) { + let notify = { + let mut state = self.state.write().unwrap(); + if let Some(PosDelState::Loading(notify)) = state + .positional_deletes + .insert(file_path.to_string(), PosDelState::Loaded) + { + Some(notify) + } else { + None + } + }; + + if let Some(notify) = notify { + notify.notify_waiters(); + } + } + /// Retrieve the equality delete predicate for a given eq delete file path pub(crate) async fn get_equality_delete_predicate_for_delete_file_path( &self, From de6ffd202f77b2c53fe298f92d4d33f1482cf8a8 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Mon, 22 Dec 2025 17:25:55 -0800 Subject: [PATCH 38/58] infra: license header check ignore target/ dir (#1954) ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? Following the [release verification](https://rust.iceberg.apache.org/release.html#how-to-verify-a-release) step by step. The target/ dir gets picked up by license header check. This PR adds the target/ dir to the ignore list. ## Are these changes tested? --- .licenserc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.licenserc.yaml b/.licenserc.yaml index da87374c3b..0bcb65f3b7 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -31,6 +31,7 @@ header: - "**/DEPENDENCIES.*.tsv" # Release distributions - "dist/*" + - "target" - "Cargo.lock" - "bindings/python/uv.lock" - ".github/PULL_REQUEST_TEMPLATE.md" From 152aeb645332ba9962a4e7f8a633b8f409e9a21a Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Tue, 23 Dec 2025 15:58:48 -0800 Subject: [PATCH 39/58] infra: release script, validate proper ICEBERG_VERSION variable (#1956) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? Add a check that the input `ICEBERG_VERSION` variable is in the proper format, i.e. `0.8.0`. Otherwise, the source directory and zip file will have the wrong name ## Are these changes tested? Added these 3 lines: ``` echo "Release version: $release_version" echo "Git branch: $git_branch" echo "Source directory: apache-iceberg-rust-$release_version" ``` This PR: ``` ➜ iceberg-rust git:(kevinjqliu/validate-version) ✗ ICEBERG_VERSION=rc.1 ./scripts/release.sh var is set to 'rc.1' Error: ICEBERG_VERSION (rc.1) must be in the format: .. ➜ iceberg-rust git:(kevinjqliu/validate-version) ✗ ICEBERG_VERSION=0.8.0 ./scripts/release.sh var is set to '0.8.0' Release version: 0.8.0 Git branch: release-0.8.0-rc.1 Source directory: apache-iceberg-rust-0.8.0 ``` main: ``` ➜ iceberg-rust git:(kevinjqliu/validate-version) ✗ ICEBERG_VERSION=rc.1 ./scripts/release.sh var is set to 'rc.1' Release version: rc.1 Git branch: release-rc.1-rc.1 Source directory: apache-iceberg-rust-rc.1 ➜ iceberg-rust git:(kevinjqliu/validate-version) ✗ ICEBERG_VERSION=0.8.0 ./scripts/release.sh var is set to '0.8.0' Release version: 0.8.0 Git branch: release-0.8.0-rc.1 Source directory: apache-iceberg-rust-0.8.0 ``` --- scripts/release.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/release.sh b/scripts/release.sh index a790cdd8d1..e4cee342d2 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -25,6 +25,12 @@ else echo "var is set to '$ICEBERG_VERSION'" fi +# Validate version format (e.g., 1.0.0) +if [[ ! "$ICEBERG_VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo "Error: ICEBERG_VERSION ($ICEBERG_VERSION) must be in the format: .." + exit 1 +fi + # tar source code release_version=${ICEBERG_VERSION} # rc versions From bc86d10c8ed85865cef0effae8f6ebca0ff3e358 Mon Sep 17 00:00:00 2001 From: Robert Mu Date: Wed, 24 Dec 2025 09:25:51 +0800 Subject: [PATCH 40/58] refactor(arrow): Rename parameter in delete_filter for clarity (#1955) The parameter `delete_file_path` in `get_delete_vector_for_path` was misleading because the function expects a data file path, not a delete file path. Renaming it to `data_file_path` accurately reflects its usage. --- crates/iceberg/src/arrow/delete_filter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/iceberg/src/arrow/delete_filter.rs b/crates/iceberg/src/arrow/delete_filter.rs index e6434e72ec..4af9f6b6ff 100644 --- a/crates/iceberg/src/arrow/delete_filter.rs +++ b/crates/iceberg/src/arrow/delete_filter.rs @@ -82,12 +82,12 @@ impl DeleteFilter { /// Retrieve a delete vector for a data file pub(crate) fn get_delete_vector_for_path( &self, - delete_file_path: &str, + data_file_path: &str, ) -> Option>> { self.state .read() .ok() - .and_then(|st| st.delete_vectors.get(delete_file_path).cloned()) + .and_then(|st| st.delete_vectors.get(data_file_path).cloned()) } pub(crate) fn try_start_eq_del_load(&self, file_path: &str) -> Option> { From 5dc57643aebbe1be3ac36e29581fa7462bfdd2cd Mon Sep 17 00:00:00 2001 From: Andrea Bozzo Date: Thu, 25 Dec 2025 01:45:35 +0100 Subject: [PATCH 41/58] feat(sqllogictest): use serde derived structs for schedule parsing (#1953) This PR refactors the schedule file parsing in the sqllogictest crate to use serde-derived structs instead of manual TOML parsing, as requested in #1952. ### Changes - **New structs with serde derives:** - `ScheduleConfig` - top-level configuration parsed from TOML - `EngineConfig` - per-engine configuration with `#[serde(flatten)]` for extensibility - `EngineType` - enum of supported engine types - **Refactored parsing flow:** - `Schedule::from_file()` now uses `toml::from_str()` directly - Added `instantiate_engines()` to separate parsing from engine creation - Removed manual `parse_engines()` and `parse_steps()` functions - **Forward-compatibility:** - Uses `#[serde(flatten)]` to capture extra fields in `EngineConfig.extra` - This enables PR #1943 to easily add `catalog_type` and `catalog_properties` support ### Relation to #1943 This PR was suggested by @liurenjie1024 as a prerequisite to #1943 (dynamic catalog configuration). The `#[serde(flatten)]` approach allows #1943 to simply extract the catalog configuration from `EngineConfig.extra` without modifying the parsing logic. ### Testing - All existing tests pass - Added new unit tests for deserialization behavior - Integration test with `df_test.toml` passes unchanged Closes #1952 --- crates/sqllogictest/src/engine/datafusion.rs | 18 +- crates/sqllogictest/src/engine/mod.rs | 96 ++++++--- crates/sqllogictest/src/schedule.rs | 198 ++++++++++++------- 3 files changed, 207 insertions(+), 105 deletions(-) diff --git a/crates/sqllogictest/src/engine/datafusion.rs b/crates/sqllogictest/src/engine/datafusion.rs index e3402dfa97..e9f93287d8 100644 --- a/crates/sqllogictest/src/engine/datafusion.rs +++ b/crates/sqllogictest/src/engine/datafusion.rs @@ -27,9 +27,8 @@ use iceberg::spec::{NestedField, PrimitiveType, Schema, Transform, Type, Unbound use iceberg::{Catalog, CatalogBuilder, NamespaceIdent, TableCreation}; use iceberg_datafusion::IcebergCatalogProvider; use indicatif::ProgressBar; -use toml::Table as TomlTable; -use crate::engine::{EngineRunner, run_slt_with_runner}; +use crate::engine::{DatafusionCatalogConfig, EngineRunner, run_slt_with_runner}; use crate::error::Result; pub struct DataFusionEngine { @@ -59,12 +58,15 @@ impl EngineRunner for DataFusionEngine { } impl DataFusionEngine { - pub async fn new(config: TomlTable) -> Result { + pub async fn new(catalog_config: Option) -> Result { let session_config = SessionConfig::new() .with_target_partitions(4) .with_information_schema(true); let ctx = SessionContext::new_with_config(session_config); - ctx.register_catalog("default", Self::create_catalog(&config).await?); + ctx.register_catalog( + "default", + Self::create_catalog(catalog_config.as_ref()).await?, + ); Ok(Self { test_data_path: PathBuf::from("testdata"), @@ -72,9 +74,11 @@ impl DataFusionEngine { }) } - async fn create_catalog(_: &TomlTable) -> anyhow::Result> { - // TODO: support dynamic catalog configuration - // See: https://github.com/apache/iceberg-rust/issues/1780 + async fn create_catalog( + _catalog_config: Option<&DatafusionCatalogConfig>, + ) -> anyhow::Result> { + // TODO: Use catalog_config to load different catalog types via iceberg-catalog-loader + // See: https://github.com/apache/iceberg-rust/issues/1780 let catalog = MemoryCatalogBuilder::default() .load( "memory", diff --git a/crates/sqllogictest/src/engine/mod.rs b/crates/sqllogictest/src/engine/mod.rs index 724359fbe5..a276671401 100644 --- a/crates/sqllogictest/src/engine/mod.rs +++ b/crates/sqllogictest/src/engine/mod.rs @@ -17,29 +17,45 @@ mod datafusion; +use std::collections::HashMap; use std::path::Path; use anyhow::anyhow; +use serde::Deserialize; use sqllogictest::{AsyncDB, MakeConnection, Runner, parse_file}; -use toml::Table as TomlTable; use crate::engine::datafusion::DataFusionEngine; use crate::error::{Error, Result}; -const TYPE_DATAFUSION: &str = "datafusion"; +/// Configuration for the catalog used by the DataFusion engine +#[derive(Debug, Clone, Deserialize)] +pub struct DatafusionCatalogConfig { + /// Catalog type: "memory", "rest", "glue", "hms", "s3tables", "sql" + #[serde(rename = "type")] + pub catalog_type: String, + /// Catalog properties passed to the catalog loader + #[serde(default)] + pub props: HashMap, +} + +/// Engine configuration as a tagged enum +#[derive(Debug, Clone, Deserialize)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum EngineConfig { + Datafusion { + #[serde(default)] + catalog: Option, + }, +} #[async_trait::async_trait] pub trait EngineRunner: Send { async fn run_slt_file(&mut self, path: &Path) -> Result<()>; } -pub async fn load_engine_runner( - engine_type: &str, - cfg: TomlTable, -) -> Result> { - match engine_type { - TYPE_DATAFUSION => Ok(Box::new(DataFusionEngine::new(cfg).await?)), - _ => Err(anyhow::anyhow!("Unsupported engine type: {engine_type}").into()), +pub async fn load_engine_runner(config: EngineConfig) -> Result> { + match config { + EngineConfig::Datafusion { catalog } => Ok(Box::new(DataFusionEngine::new(catalog).await?)), } } @@ -65,29 +81,63 @@ where #[cfg(test)] mod tests { - use crate::engine::{TYPE_DATAFUSION, load_engine_runner}; + use crate::engine::{DatafusionCatalogConfig, EngineConfig, load_engine_runner}; - #[tokio::test] - async fn test_engine_invalid_type() { + #[test] + fn test_deserialize_engine_config() { + let input = r#"type = "datafusion""#; + + let config: EngineConfig = toml::from_str(input).unwrap(); + assert!(matches!(config, EngineConfig::Datafusion { catalog: None })); + } + + #[test] + fn test_deserialize_engine_config_with_catalog() { + let input = r#" + type = "datafusion" + + [catalog] + type = "rest" + + [catalog.props] + uri = "http://localhost:8181" + "#; + + let config: EngineConfig = toml::from_str(input).unwrap(); + match config { + EngineConfig::Datafusion { catalog: Some(cat) } => { + assert_eq!(cat.catalog_type, "rest"); + assert_eq!( + cat.props.get("uri"), + Some(&"http://localhost:8181".to_string()) + ); + } + _ => panic!("Expected Datafusion with catalog"), + } + } + + #[test] + fn test_deserialize_catalog_config() { let input = r#" - [engines] - random = { type = "random_engine", url = "http://localhost:8181" } + type = "memory" + + [props] + warehouse = "file:///tmp/warehouse" "#; - let tbl = toml::from_str(input).unwrap(); - let result = load_engine_runner("random_engine", tbl).await; - assert!(result.is_err()); + let config: DatafusionCatalogConfig = toml::from_str(input).unwrap(); + assert_eq!(config.catalog_type, "memory"); + assert_eq!( + config.props.get("warehouse"), + Some(&"file:///tmp/warehouse".to_string()) + ); } #[tokio::test] async fn test_load_datafusion() { - let input = r#" - [engines] - df = { type = "datafusion" } - "#; - let tbl = toml::from_str(input).unwrap(); - let result = load_engine_runner(TYPE_DATAFUSION, tbl).await; + let config = EngineConfig::Datafusion { catalog: None }; + let result = load_engine_runner(config).await; assert!(result.is_ok()); } } diff --git a/crates/sqllogictest/src/schedule.rs b/crates/sqllogictest/src/schedule.rs index 7c13ad4d12..25728a2968 100644 --- a/crates/sqllogictest/src/schedule.rs +++ b/crates/sqllogictest/src/schedule.rs @@ -21,10 +21,18 @@ use std::path::{Path, PathBuf}; use anyhow::{Context, anyhow}; use serde::{Deserialize, Serialize}; -use toml::{Table as TomlTable, Value}; use tracing::info; -use crate::engine::{EngineRunner, load_engine_runner}; +use crate::engine::{EngineConfig, EngineRunner, load_engine_runner}; + +/// Raw configuration parsed from the schedule TOML file +#[derive(Debug, Clone, Deserialize)] +pub struct ScheduleConfig { + /// Engine name to engine configuration + pub engines: HashMap, + /// List of test steps to run + pub steps: Vec, +} pub struct Schedule { /// Engine names to engine instances @@ -59,15 +67,27 @@ impl Schedule { pub async fn from_file>(path: P) -> anyhow::Result { let path_str = path.as_ref().to_string_lossy().to_string(); let content = read_to_string(path)?; - let toml_value = content.parse::()?; - let toml_table = toml_value - .as_table() - .ok_or_else(|| anyhow!("Schedule file must be a TOML table"))?; - let engines = Schedule::parse_engines(toml_table).await?; - let steps = Schedule::parse_steps(toml_table)?; + let config: ScheduleConfig = toml::from_str(&content) + .with_context(|| format!("Failed to parse schedule file: {path_str}"))?; - Ok(Self::new(engines, steps, path_str)) + let engines = Self::instantiate_engines(config.engines).await?; + + Ok(Self::new(engines, config.steps, path_str)) + } + + /// Instantiate engine runners from their configurations + async fn instantiate_engines( + configs: HashMap, + ) -> anyhow::Result>> { + let mut engines = HashMap::new(); + + for (name, config) in configs { + let engine = load_engine_runner(config).await?; + engines.insert(name, engine); + } + + Ok(engines) } pub async fn run(mut self) -> anyhow::Result<()> { @@ -105,103 +125,131 @@ impl Schedule { } Ok(()) } +} - async fn parse_engines( - table: &TomlTable, - ) -> anyhow::Result>> { - let engines_tbl = table - .get("engines") - .with_context(|| "Schedule file must have an 'engines' table")? - .as_table() - .ok_or_else(|| anyhow!("'engines' must be a table"))?; - - let mut engines = HashMap::new(); - - for (name, engine_val) in engines_tbl { - let cfg_tbl = engine_val - .as_table() - .ok_or_else(|| anyhow!("Config of engine '{name}' is not a table"))? - .clone(); - - let engine_type = cfg_tbl - .get("type") - .ok_or_else(|| anyhow::anyhow!("Engine {name} doesn't have a 'type' field"))? - .as_str() - .ok_or_else(|| anyhow::anyhow!("Engine {name} type must be a string"))?; - - let engine = load_engine_runner(engine_type, cfg_tbl.clone()).await?; - - if engines.insert(name.clone(), engine).is_some() { - return Err(anyhow!("Duplicate engine '{name}'")); - } - } +#[cfg(test)] +mod tests { + use crate::engine::EngineConfig; + use crate::schedule::ScheduleConfig; - Ok(engines) - } + #[test] + fn test_deserialize_schedule_config() { + let input = r#" + [engines] + df = { type = "datafusion" } - fn parse_steps(table: &TomlTable) -> anyhow::Result> { - let steps_val = table - .get("steps") - .with_context(|| "Schedule file must have a 'steps' array")?; + [[steps]] + engine = "df" + slt = "test.slt" + "#; - let steps: Vec = steps_val - .clone() - .try_into() - .with_context(|| "Failed to deserialize steps")?; + let config: ScheduleConfig = toml::from_str(input).unwrap(); - Ok(steps) + assert_eq!(config.engines.len(), 1); + assert!(config.engines.contains_key("df")); + assert!(matches!(config.engines["df"], EngineConfig::Datafusion { + catalog: None + })); + assert_eq!(config.steps.len(), 1); + assert_eq!(config.steps[0].engine, "df"); + assert_eq!(config.steps[0].slt, "test.slt"); } -} - -#[cfg(test)] -mod tests { - use toml::Table as TomlTable; - - use crate::schedule::Schedule; #[test] - fn test_parse_steps() { + fn test_deserialize_multiple_steps() { let input = r#" + [engines] + datafusion = { type = "datafusion" } + [[steps]] engine = "datafusion" slt = "test.slt" [[steps]] - engine = "spark" + engine = "datafusion" slt = "test2.slt" "#; - let tbl: TomlTable = toml::from_str(input).unwrap(); - let steps = Schedule::parse_steps(&tbl).unwrap(); + let config: ScheduleConfig = toml::from_str(input).unwrap(); - assert_eq!(steps.len(), 2); - assert_eq!(steps[0].engine, "datafusion"); - assert_eq!(steps[0].slt, "test.slt"); - assert_eq!(steps[1].engine, "spark"); - assert_eq!(steps[1].slt, "test2.slt"); + assert_eq!(config.steps.len(), 2); + assert_eq!(config.steps[0].engine, "datafusion"); + assert_eq!(config.steps[0].slt, "test.slt"); + assert_eq!(config.steps[1].engine, "datafusion"); + assert_eq!(config.steps[1].slt, "test2.slt"); } #[test] - fn test_parse_steps_empty() { + fn test_deserialize_with_catalog_config() { let input = r#" + [engines.df] + type = "datafusion" + + [engines.df.catalog] + type = "rest" + + [engines.df.catalog.props] + uri = "http://localhost:8181" + [[steps]] + engine = "df" + slt = "test.slt" "#; - let tbl: TomlTable = toml::from_str(input).unwrap(); - let steps = Schedule::parse_steps(&tbl); + let config: ScheduleConfig = toml::from_str(input).unwrap(); - assert!(steps.is_err()); + match &config.engines["df"] { + EngineConfig::Datafusion { catalog: Some(cat) } => { + assert_eq!(cat.catalog_type, "rest"); + assert_eq!( + cat.props.get("uri"), + Some(&"http://localhost:8181".to_string()) + ); + } + _ => panic!("Expected Datafusion with catalog config"), + } } - #[tokio::test] - async fn test_parse_engines_invalid_table() { - let toml_content = r#" - engines = "not_a_table" + #[test] + fn test_deserialize_missing_engine_type() { + let input = r#" + [engines] + df = { } + + [[steps]] + engine = "df" + slt = "test.slt" "#; - let table: TomlTable = toml::from_str(toml_content).unwrap(); - let result = Schedule::parse_engines(&table).await; + let result: Result = toml::from_str(input); + assert!(result.is_err()); + } + + #[test] + fn test_deserialize_invalid_engine_type() { + let input = r#" + [engines] + df = { type = "unknown_engine" } + + [[steps]] + engine = "df" + slt = "test.slt" + "#; + + let result: Result = toml::from_str(input); + assert!(result.is_err()); + } + + #[test] + fn test_deserialize_missing_step_fields() { + let input = r#" + [engines] + df = { type = "datafusion" } + + [[steps]] + "#; + let result: Result = toml::from_str(input); assert!(result.is_err()); } } From 99ca196a7a81343ac35d993d9db2ec59c626f389 Mon Sep 17 00:00:00 2001 From: Alan Tang Date: Thu, 25 Dec 2025 09:03:32 +0800 Subject: [PATCH 42/58] fix: follow IEEE 754 totalOrder for `float` and `double` (#1959) ## Which issue does this PR close? - Closes #1951. ## What changes are included in this PR? For `F32` and `F64`, we use the `total_cmp` method for comparison. ## Are these changes tested? Yes, I added test cases to verify whether the comparison follows the IEEE 754 rules. Signed-off-by: StandingMan Co-authored-by: Renjie Liu --- crates/iceberg/src/spec/values/datum.rs | 36 ++++++------------------- crates/iceberg/src/spec/values/tests.rs | 25 +++++++++++++++++ 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/crates/iceberg/src/spec/values/datum.rs b/crates/iceberg/src/spec/values/datum.rs index cb60fb94e9..88209ae95c 100644 --- a/crates/iceberg/src/spec/values/datum.rs +++ b/crates/iceberg/src/spec/values/datum.rs @@ -166,36 +166,16 @@ impl<'de> Deserialize<'de> for Datum { // Compare following iceberg float ordering rules: // -NaN < -Infinity < -value < -0 < 0 < value < Infinity < NaN -fn iceberg_float_cmp(a: T, b: T) -> Option { - if a.is_nan() && b.is_nan() { - return match (a.is_sign_negative(), b.is_sign_negative()) { - (true, false) => Some(Ordering::Less), - (false, true) => Some(Ordering::Greater), - _ => Some(Ordering::Equal), - }; - } - - if a.is_nan() { - return Some(if a.is_sign_negative() { - Ordering::Less - } else { - Ordering::Greater - }); - } - - if b.is_nan() { - return Some(if b.is_sign_negative() { - Ordering::Greater - } else { - Ordering::Less - }); - } +fn iceberg_float_cmp_f32(a: OrderedFloat, b: OrderedFloat) -> Option { + Some(a.total_cmp(&b)) +} - a.partial_cmp(&b) +fn iceberg_float_cmp_f64(a: OrderedFloat, b: OrderedFloat) -> Option { + Some(a.total_cmp(&b)) } impl PartialOrd for Datum { - fn partial_cmp(&self, other: &Self) -> Option { + fn partial_cmp(&self, other: &Self) -> Option { match (&self.literal, &other.literal, &self.r#type, &other.r#type) { // generate the arm with same type and same literal ( @@ -221,13 +201,13 @@ impl PartialOrd for Datum { PrimitiveLiteral::Float(other_val), PrimitiveType::Float, PrimitiveType::Float, - ) => iceberg_float_cmp(*val, *other_val), + ) => iceberg_float_cmp_f32(*val, *other_val), ( PrimitiveLiteral::Double(val), PrimitiveLiteral::Double(other_val), PrimitiveType::Double, PrimitiveType::Double, - ) => iceberg_float_cmp(*val, *other_val), + ) => iceberg_float_cmp_f64(*val, *other_val), ( PrimitiveLiteral::Int(val), PrimitiveLiteral::Int(other_val), diff --git a/crates/iceberg/src/spec/values/tests.rs b/crates/iceberg/src/spec/values/tests.rs index 73343a9a1a..bb10701d87 100644 --- a/crates/iceberg/src/spec/values/tests.rs +++ b/crates/iceberg/src/spec/values/tests.rs @@ -1293,6 +1293,31 @@ fn test_iceberg_float_order() { assert_eq!(double_sorted, double_expected); } +#[test] +fn test_negative_zero_less_than_positive_zero() { + { + let neg_zero = Datum::float(-0.0); + let pos_zero = Datum::float(0.0); + + assert_eq!( + neg_zero.partial_cmp(&pos_zero), + Some(std::cmp::Ordering::Less), + "IEEE 754 totalOrder requires -0.0 < +0.0 on F32" + ); + } + + { + let neg_zero = Datum::double(-0.0); + let pos_zero = Datum::double(0.0); + + assert_eq!( + neg_zero.partial_cmp(&pos_zero), + Some(std::cmp::Ordering::Less), + "IEEE 754 totalOrder requires -0.0 < +0.0 on F64" + ); + } +} + /// Test Date deserialization from JSON as number (days since epoch). /// /// This reproduces the scenario from Iceberg Java's TestAddFilesProcedure where: From de6a2b9cd2d364f11905fc26c2d318bdc03c2d52 Mon Sep 17 00:00:00 2001 From: Alan Tang Date: Tue, 30 Dec 2025 09:14:58 +0800 Subject: [PATCH 43/58] fix: return proper error rather than persisting error message on snapshot (#1960) ## Which issue does this PR close? - Closes #1957. ## What changes are included in this PR? Avoid storing error message on snapshot, return error instead. ## Are these changes tested? Yes --------- Signed-off-by: StandingMan --- crates/iceberg/src/spec/snapshot.rs | 97 +++++++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 4 deletions(-) diff --git a/crates/iceberg/src/spec/snapshot.rs b/crates/iceberg/src/spec/snapshot.rs index 5371cf68f2..270279988b 100644 --- a/crates/iceberg/src/spec/snapshot.rs +++ b/crates/iceberg/src/spec/snapshot.rs @@ -266,9 +266,9 @@ pub(super) mod _serde { use serde::{Deserialize, Serialize}; use super::{Operation, Snapshot, Summary}; - use crate::Error; use crate::spec::SchemaId; use crate::spec::snapshot::SnapshotRowRange; + use crate::{Error, ErrorKind}; #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] @@ -408,9 +408,19 @@ pub(super) mod _serde { timestamp_ms: v1.timestamp_ms, manifest_list: match (v1.manifest_list, v1.manifests) { (Some(file), None) => file, - (Some(_), Some(_)) => "Invalid v1 snapshot, when manifest list provided, manifest files should be omitted".to_string(), - (None, _) => "Unsupported v1 snapshot, only manifest list is supported".to_string() - }, + (Some(_), Some(_)) => { + return Err(Error::new( + ErrorKind::DataInvalid, + "Invalid v1 snapshot, when manifest list provided, manifest files should be omitted", + )); + } + (None, _) => { + return Err(Error::new( + ErrorKind::DataInvalid, + "Unsupported v1 snapshot, only manifest list is supported", + )); + } + }, summary: v1.summary.unwrap_or(Summary { operation: Operation::default(), additional_properties: HashMap::new(), @@ -517,6 +527,7 @@ mod tests { use chrono::{TimeZone, Utc}; + use crate::spec::TableMetadata; use crate::spec::snapshot::_serde::SnapshotV1; use crate::spec::snapshot::{Operation, Snapshot, Summary}; @@ -604,6 +615,84 @@ mod tests { ); } + #[test] + fn test_v1_snapshot_with_manifest_list_and_manifests() { + { + let metadata = r#" + { + "format-version": 1, + "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", + "location": "s3://bucket/test/location", + "last-updated-ms": 1700000000000, + "last-column-id": 1, + "schema": { + "type": "struct", + "fields": [ + {"id": 1, "name": "x", "required": true, "type": "long"} + ] + }, + "partition-spec": [], + "properties": {}, + "current-snapshot-id": 111111111, + "snapshots": [ + { + "snapshot-id": 111111111, + "timestamp-ms": 1600000000000, + "summary": {"operation": "append"}, + "manifest-list": "s3://bucket/metadata/snap-123.avro", + "manifests": ["s3://bucket/metadata/manifest-1.avro"] + } + ] + } + "#; + + let result_both_manifest_list_and_manifest_set = + serde_json::from_str::(metadata); + assert!(result_both_manifest_list_and_manifest_set.is_err()); + assert_eq!( + result_both_manifest_list_and_manifest_set + .unwrap_err() + .to_string(), + "DataInvalid => Invalid v1 snapshot, when manifest list provided, manifest files should be omitted" + ) + } + + { + let metadata = r#" + { + "format-version": 1, + "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", + "location": "s3://bucket/test/location", + "last-updated-ms": 1700000000000, + "last-column-id": 1, + "schema": { + "type": "struct", + "fields": [ + {"id": 1, "name": "x", "required": true, "type": "long"} + ] + }, + "partition-spec": [], + "properties": {}, + "current-snapshot-id": 111111111, + "snapshots": [ + { + "snapshot-id": 111111111, + "timestamp-ms": 1600000000000, + "summary": {"operation": "append"}, + "manifests": ["s3://bucket/metadata/manifest-1.avro"] + } + ] + } + "#; + let result_missing_manifest_list = serde_json::from_str::(metadata); + assert!(result_missing_manifest_list.is_err()); + assert_eq!( + result_missing_manifest_list.unwrap_err().to_string(), + "DataInvalid => Unsupported v1 snapshot, only manifest list is supported" + ) + } + } + #[test] fn test_snapshot_v1_to_v2_with_missing_summary() { use crate::spec::snapshot::_serde::SnapshotV1; From ef851524f16a604c05683051d732fa523b6e3bdc Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Mon, 29 Dec 2025 17:24:56 -0800 Subject: [PATCH 44/58] feat(arrow): Convert Arrow schema to Iceberg schema with auto assigned field ids (#1928) ## Which issue does this PR close? - Closes #1927 ## What changes are included in this PR? - Modified ArrowSchemaConverter to enable id reassignment - Added a new pub helper: `arrow_schema_to_schema_auto_assign_ids` ## Are these changes tested? Added uts --- crates/iceberg/src/arrow/schema.rs | 233 +++++++++++++++++- crates/iceberg/src/arrow/value.rs | 4 +- crates/iceberg/src/spec/mod.rs | 1 + crates/iceberg/src/spec/schema/mod.rs | 4 +- .../src/spec/table_metadata_builder.rs | 2 +- 5 files changed, 226 insertions(+), 18 deletions(-) diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs index 4f4f083c73..9ee7897cb6 100644 --- a/crates/iceberg/src/arrow/schema.rs +++ b/crates/iceberg/src/arrow/schema.rs @@ -35,8 +35,8 @@ use uuid::Uuid; use crate::error::Result; use crate::spec::{ - Datum, ListType, MapType, NestedField, NestedFieldRef, PrimitiveLiteral, PrimitiveType, Schema, - SchemaVisitor, StructType, Type, + Datum, FIRST_FIELD_ID, ListType, MapType, NestedField, NestedFieldRef, PrimitiveLiteral, + PrimitiveType, Schema, SchemaVisitor, StructType, Type, }; use crate::{Error, ErrorKind}; @@ -221,6 +221,19 @@ pub fn arrow_schema_to_schema(schema: &ArrowSchema) -> Result { visit_schema(schema, &mut visitor) } +/// Convert Arrow schema to Iceberg schema with automatically assigned field IDs. +/// +/// Unlike [`arrow_schema_to_schema`], this function does not require field IDs in the Arrow +/// schema metadata. Instead, it automatically assigns unique field IDs starting from 1, +/// following Iceberg's field ID assignment rules. +/// +/// This is useful when converting Arrow schemas that don't originate from Iceberg tables, +/// such as schemas from DataFusion or other Arrow-based systems. +pub fn arrow_schema_to_schema_auto_assign_ids(schema: &ArrowSchema) -> Result { + let mut visitor = ArrowSchemaConverter::new_with_field_ids_from(FIRST_FIELD_ID); + visit_schema(schema, &mut visitor) +} + /// Convert Arrow type to iceberg type. pub fn arrow_type_to_type(ty: &DataType) -> Result { let mut visitor = ArrowSchemaConverter::new(); @@ -229,7 +242,7 @@ pub fn arrow_type_to_type(ty: &DataType) -> Result { const ARROW_FIELD_DOC_KEY: &str = "doc"; -pub(super) fn get_field_id(field: &Field) -> Result { +pub(super) fn get_field_id_from_metadata(field: &Field) -> Result { if let Some(value) = field.metadata().get(PARQUET_FIELD_ID_META_KEY) { return value.parse::().map_err(|e| { Error::new( @@ -253,19 +266,55 @@ fn get_field_doc(field: &Field) -> Option { None } -struct ArrowSchemaConverter; +struct ArrowSchemaConverter { + /// When set, the schema builder will reassign field IDs starting from this value + /// using level-order traversal (breadth-first). + reassign_field_ids_from: Option, + /// Generates unique placeholder IDs for fields before reassignment. + /// Required because `ReassignFieldIds` builds an old-to-new ID mapping + /// that expects unique input IDs. + next_field_id: i32, +} impl ArrowSchemaConverter { fn new() -> Self { - Self {} + Self { + reassign_field_ids_from: None, + next_field_id: 0, + } + } + + fn new_with_field_ids_from(start_from: i32) -> Self { + Self { + reassign_field_ids_from: Some(start_from), + next_field_id: 0, + } + } + + fn get_field_id(&mut self, field: &Field) -> Result { + if self.reassign_field_ids_from.is_some() { + // Field IDs will be reassigned by the schema builder. + // We need unique temporary IDs because ReassignFieldIds builds an + // old->new ID mapping that requires unique input IDs. + let temp_id = self.next_field_id; + self.next_field_id += 1; + Ok(temp_id) + } else { + // Get field ID from arrow field metadata + get_field_id_from_metadata(field) + } } - fn convert_fields(fields: &Fields, field_results: &[Type]) -> Result> { + fn convert_fields( + &mut self, + fields: &Fields, + field_results: &[Type], + ) -> Result> { let mut results = Vec::with_capacity(fields.len()); for i in 0..fields.len() { let field = &fields[i]; let field_type = &field_results[i]; - let id = get_field_id(field)?; + let id = self.get_field_id(field)?; let doc = get_field_doc(field); let nested_field = NestedField { id, @@ -287,13 +336,16 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { type U = Schema; fn schema(&mut self, schema: &ArrowSchema, values: Vec) -> Result { - let fields = Self::convert_fields(schema.fields(), &values)?; - let builder = Schema::builder().with_fields(fields); + let fields = self.convert_fields(schema.fields(), &values)?; + let mut builder = Schema::builder().with_fields(fields); + if let Some(start_from) = self.reassign_field_ids_from { + builder = builder.with_reassigned_field_ids(start_from) + } builder.build() } fn r#struct(&mut self, fields: &Fields, results: Vec) -> Result { - let fields = Self::convert_fields(fields, &results)?; + let fields = self.convert_fields(fields, &results)?; Ok(Type::Struct(StructType::new(fields))) } @@ -310,7 +362,7 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { } }; - let id = get_field_id(element_field)?; + let id = self.get_field_id(element_field)?; let doc = get_field_doc(element_field); let mut element_field = NestedField::list_element(id, value.clone(), !element_field.is_nullable()); @@ -335,7 +387,7 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { let key_field = &fields[0]; let value_field = &fields[1]; - let key_id = get_field_id(key_field)?; + let key_id = self.get_field_id(key_field)?; let key_doc = get_field_doc(key_field); let mut key_field = NestedField::map_key_element(key_id, key_value.clone()); if let Some(doc) = key_doc { @@ -343,7 +395,7 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { } let key_field = Arc::new(key_field); - let value_id = get_field_id(value_field)?; + let value_id = self.get_field_id(value_field)?; let value_doc = get_field_doc(value_field); let mut value_field = NestedField::map_value_element( value_id, @@ -1932,4 +1984,159 @@ mod tests { assert_eq!(array.value(0), [66u8; 16]); } } + + #[test] + fn test_arrow_schema_to_schema_with_field_id() { + // Create a complex Arrow schema without field ID metadata + // Including: primitives, list, nested struct, map, and nested list of structs + let arrow_schema = ArrowSchema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + Field::new("price", DataType::Decimal128(10, 2), false), + Field::new( + "created_at", + DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())), + true, + ), + Field::new( + "tags", + DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))), + true, + ), + Field::new( + "address", + DataType::Struct(Fields::from(vec![ + Field::new("street", DataType::Utf8, true), + Field::new("city", DataType::Utf8, false), + Field::new("zip", DataType::Int32, true), + ])), + true, + ), + Field::new( + "attributes", + DataType::Map( + Arc::new(Field::new( + DEFAULT_MAP_FIELD_NAME, + DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Utf8, false), + Field::new("value", DataType::Utf8, true), + ])), + false, + )), + false, + ), + true, + ), + Field::new( + "orders", + DataType::List(Arc::new(Field::new( + "element", + DataType::Struct(Fields::from(vec![ + Field::new("order_id", DataType::Int64, false), + Field::new("amount", DataType::Float64, false), + ])), + true, + ))), + true, + ), + ]); + + let schema = arrow_schema_to_schema_auto_assign_ids(&arrow_schema).unwrap(); + + // Build expected schema with exact field IDs following level-order assignment: + // Level 0: id=1, name=2, price=3, created_at=4, tags=5, address=6, attributes=7, orders=8 + // Level 1: tags.element=9, address.{street=10,city=11,zip=12}, attributes.{key=13,value=14}, orders.element=15 + // Level 2: orders.element.{order_id=16,amount=17} + let expected = Schema::builder() + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Long)).into(), + NestedField::optional(2, "name", Type::Primitive(PrimitiveType::String)).into(), + NestedField::required( + 3, + "price", + Type::Primitive(PrimitiveType::Decimal { + precision: 10, + scale: 2, + }), + ) + .into(), + NestedField::optional(4, "created_at", Type::Primitive(PrimitiveType::Timestamptz)) + .into(), + NestedField::optional( + 5, + "tags", + Type::List(ListType { + element_field: NestedField::list_element( + 9, + Type::Primitive(PrimitiveType::String), + false, + ) + .into(), + }), + ) + .into(), + NestedField::optional( + 6, + "address", + Type::Struct(StructType::new(vec![ + NestedField::optional(10, "street", Type::Primitive(PrimitiveType::String)) + .into(), + NestedField::required(11, "city", Type::Primitive(PrimitiveType::String)) + .into(), + NestedField::optional(12, "zip", Type::Primitive(PrimitiveType::Int)) + .into(), + ])), + ) + .into(), + NestedField::optional( + 7, + "attributes", + Type::Map(MapType { + key_field: NestedField::map_key_element( + 13, + Type::Primitive(PrimitiveType::String), + ) + .into(), + value_field: NestedField::map_value_element( + 14, + Type::Primitive(PrimitiveType::String), + false, + ) + .into(), + }), + ) + .into(), + NestedField::optional( + 8, + "orders", + Type::List(ListType { + element_field: NestedField::list_element( + 15, + Type::Struct(StructType::new(vec![ + NestedField::required( + 16, + "order_id", + Type::Primitive(PrimitiveType::Long), + ) + .into(), + NestedField::required( + 17, + "amount", + Type::Primitive(PrimitiveType::Double), + ) + .into(), + ])), + false, + ) + .into(), + }), + ) + .into(), + ]) + .build() + .unwrap(); + + pretty_assertions::assert_eq!(schema, expected); + assert_eq!(schema.highest_field_id(), 17); + } } diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index 190aba08e8..30b47d83fc 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -27,7 +27,7 @@ use arrow_buffer::NullBuffer; use arrow_schema::{DataType, FieldRef}; use uuid::Uuid; -use super::get_field_id; +use super::get_field_id_from_metadata; use crate::spec::{ ListType, Literal, Map, MapType, NestedField, PartnerAccessor, PrimitiveLiteral, PrimitiveType, SchemaWithPartnerVisitor, Struct, StructType, Type, visit_struct_with_partner, @@ -450,7 +450,7 @@ impl FieldMatchMode { /// Determines if an Arrow field matches an Iceberg field based on the matching mode. pub fn match_field(&self, arrow_field: &FieldRef, iceberg_field: &NestedField) -> bool { match self { - FieldMatchMode::Id => get_field_id(arrow_field) + FieldMatchMode::Id => get_field_id_from_metadata(arrow_field) .map(|id| id == iceberg_field.id) .unwrap_or(false), FieldMatchMode::Name => arrow_field.name() == &iceberg_field.name, diff --git a/crates/iceberg/src/spec/mod.rs b/crates/iceberg/src/spec/mod.rs index 44b35e5a6b..a2b540f08b 100644 --- a/crates/iceberg/src/spec/mod.rs +++ b/crates/iceberg/src/spec/mod.rs @@ -49,6 +49,7 @@ pub use snapshot_summary::*; pub use sort::*; pub use statistic_file::*; pub use table_metadata::*; +pub(crate) use table_metadata_builder::FIRST_FIELD_ID; pub use table_properties::*; pub use transform::*; pub use values::*; diff --git a/crates/iceberg/src/spec/schema/mod.rs b/crates/iceberg/src/spec/schema/mod.rs index 7080b6e700..13ad41818b 100644 --- a/crates/iceberg/src/spec/schema/mod.rs +++ b/crates/iceberg/src/spec/schema/mod.rs @@ -102,8 +102,8 @@ impl SchemaBuilder { /// Reassignment starts from the field-id specified in `start_from` (inclusive). /// /// All specified aliases and identifier fields will be updated to the new field-ids. - pub(crate) fn with_reassigned_field_ids(mut self, start_from: u32) -> Self { - self.reassign_field_ids_from = Some(start_from.try_into().unwrap_or(i32::MAX)); + pub(crate) fn with_reassigned_field_ids(mut self, start_from: i32) -> Self { + self.reassign_field_ids_from = Some(start_from); self } diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs index eee4fec345..3db327d48a 100644 --- a/crates/iceberg/src/spec/table_metadata_builder.rs +++ b/crates/iceberg/src/spec/table_metadata_builder.rs @@ -31,7 +31,7 @@ use crate::error::{Error, ErrorKind, Result}; use crate::spec::{EncryptedKey, INITIAL_ROW_ID, MIN_FORMAT_VERSION_ROW_LINEAGE}; use crate::{TableCreation, TableUpdate}; -const FIRST_FIELD_ID: u32 = 1; +pub(crate) const FIRST_FIELD_ID: i32 = 1; /// Manipulating table metadata. /// From a4054140d9ea66022a11835afa3a68bc4eb72cec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Eickler?= <797483+eickler@users.noreply.github.com> Date: Tue, 30 Dec 2025 02:32:38 +0100 Subject: [PATCH 45/58] fix: MemoryCatalog to return absolute NamespaceIdents (#1970) ## Which issue does this PR close? - Closes #1969. ## What changes are included in this PR? The change makes `list_namespaces(parent)` return an absolute namespace identifier instead of a relative. ## Are these changes tested? The associated unit tests are updated with the same behaviour as SqlCatalog. --- crates/iceberg/src/catalog/memory/catalog.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index cfa3dc6b52..df0299acb2 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -163,8 +163,12 @@ impl Catalog for MemoryCatalog { let namespaces = root_namespace_state .list_namespaces_under(parent_namespace_ident)? .into_iter() - .map(|name| NamespaceIdent::new(name.to_string())) - .collect_vec(); + .map(|name| { + let mut names = parent_namespace_ident.iter().cloned().collect::>(); + names.push(name.to_string()); + NamespaceIdent::from_vec(names) + }) + .collect::>>()?; Ok(namespaces) } @@ -599,7 +603,7 @@ pub(crate) mod tests { .list_namespaces(Some(&namespace_ident_1)) .await .unwrap(), - vec![NamespaceIdent::new("b".into())] + vec![namespace_ident_2] ); } @@ -628,9 +632,9 @@ pub(crate) mod tests { .unwrap() ), to_set(vec![ - NamespaceIdent::new("a".into()), - NamespaceIdent::new("b".into()), - NamespaceIdent::new("c".into()), + namespace_ident_2, + namespace_ident_3, + namespace_ident_4, ]) ); } From 2301352019bc9f5cdfd488bf829f92c6c3d0da25 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Sun, 4 Jan 2026 17:57:47 -0800 Subject: [PATCH 46/58] fix(spec): Include delete file content to V3 manifest (#1979) ## Which issue does this PR close? - Closes #1973 ## What changes are included in this PR? - Write `content` to V3 manifest so the field is preserved correctly for delete entry ## Are these changes tested? Yes --- crates/iceberg/src/spec/manifest/writer.rs | 98 +++++++++++++++++++++- 1 file changed, 95 insertions(+), 3 deletions(-) diff --git a/crates/iceberg/src/spec/manifest/writer.rs b/crates/iceberg/src/spec/manifest/writer.rs index 2fb6a42062..0669651603 100644 --- a/crates/iceberg/src/spec/manifest/writer.rs +++ b/crates/iceberg/src/spec/manifest/writer.rs @@ -437,9 +437,12 @@ impl ManifestWriter { "format-version".to_string(), (self.metadata.format_version as u8).to_string(), )?; - if self.metadata.format_version == FormatVersion::V2 { - avro_writer - .add_user_metadata("content".to_string(), self.metadata.content.to_string())?; + match self.metadata.format_version { + FormatVersion::V1 => {} + FormatVersion::V2 | FormatVersion::V3 => { + avro_writer + .add_user_metadata("content".to_string(), self.metadata.content.to_string())?; + } } let partition_summary = self.construct_partition_summaries(&partition_type)?; @@ -708,4 +711,93 @@ mod tests { entries[0].file_sequence_number = None; assert_eq!(actual_manifest, Manifest::new(metadata, entries)); } + + #[tokio::test] + async fn test_v3_delete_manifest_delete_file_roundtrip() { + let schema = Arc::new( + Schema::builder() + .with_fields(vec![ + Arc::new(NestedField::optional( + 1, + "id", + Type::Primitive(PrimitiveType::Long), + )), + Arc::new(NestedField::optional( + 2, + "data", + Type::Primitive(PrimitiveType::String), + )), + ]) + .build() + .unwrap(), + ); + + let partition_spec = PartitionSpec::builder(schema.clone()) + .with_spec_id(0) + .build() + .unwrap(); + + // Create a position delete file entry + let delete_entry = ManifestEntry { + status: ManifestStatus::Added, + snapshot_id: None, + sequence_number: None, + file_sequence_number: None, + data_file: DataFile { + content: DataContentType::PositionDeletes, + file_path: "s3://bucket/table/data/delete-00000.parquet".to_string(), + file_format: DataFileFormat::Parquet, + partition: Struct::empty(), + record_count: 10, + file_size_in_bytes: 1024, + column_sizes: HashMap::new(), + value_counts: HashMap::new(), + null_value_counts: HashMap::new(), + nan_value_counts: HashMap::new(), + lower_bounds: HashMap::new(), + upper_bounds: HashMap::new(), + key_metadata: None, + split_offsets: None, + equality_ids: None, + sort_order_id: None, + partition_spec_id: 0, + first_row_id: None, + referenced_data_file: None, + content_offset: None, + content_size_in_bytes: None, + }, + }; + + // Write a V3 delete manifest + let tmp_dir = TempDir::new().unwrap(); + let path = tmp_dir.path().join("v3_delete_manifest.avro"); + let io = FileIOBuilder::new_fs_io().build().unwrap(); + let output_file = io.new_output(path.to_str().unwrap()).unwrap(); + + let mut writer = ManifestWriterBuilder::new( + output_file, + Some(1), + None, + schema.clone(), + partition_spec.clone(), + ) + .build_v3_deletes(); + + writer.add_entry(delete_entry).unwrap(); + let manifest_file = writer.write_manifest_file().await.unwrap(); + + // The returned ManifestFile correctly reports Deletes content + assert_eq!(manifest_file.content, ManifestContentType::Deletes); + + // Read back the manifest file + let actual_manifest = + Manifest::parse_avro(fs::read(&path).expect("read_file must succeed").as_slice()) + .unwrap(); + + // Verify the content type is correctly preserved as Deletes + assert_eq!( + actual_manifest.metadata().content, + ManifestContentType::Deletes, + ); + } } From a321fe59ed5fdbb14f4404f0ecc50d9c30cbf942 Mon Sep 17 00:00:00 2001 From: Alan Tang Date: Mon, 5 Jan 2026 15:04:23 +0800 Subject: [PATCH 47/58] fix: fix typo check error (#1989) ## Which issue does this PR close? - Closes #1986. ## What changes are included in this PR? ## Are these changes tested? Signed-off-by: StandingMan --- .typos.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.typos.toml b/.typos.toml index 9363f17c9a..407ce8168c 100644 --- a/.typos.toml +++ b/.typos.toml @@ -15,5 +15,8 @@ # specific language governing permissions and limitations # under the License. +[type.rust] +extend-ignore-identifiers-re = ["^bimap$"] + [files] extend-exclude = ["**/testdata", "CHANGELOG.md"] From 4d09ba216657d38102b5a33e02f9e7764cbdeba1 Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Mon, 5 Jan 2026 19:45:58 +0800 Subject: [PATCH 48/58] Fix ci audit failure (#1988) --- .cargo/audit.toml | 8 ++++++++ .github/workflows/audit.yml | 1 - 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.cargo/audit.toml b/.cargo/audit.toml index a46052f3b5..09e2d35c50 100644 --- a/.cargo/audit.toml +++ b/.cargo/audit.toml @@ -25,4 +25,12 @@ ignore = [ # # Introduced by hive_metastore, tracked at https://github.com/cloudwego/pilota/issues/293 "RUSTSEC-2024-0388", + # `paste` is unmaintained; consider using an alternative + # + # Introduced by hive_metastore, tracked at https://github.com/cloudwego/pilota/issues/293 + "RUSTSEC-2024-0436", + # `rustls-pemfile` is unmaintained + # + # Introduced by object_store, see https://github.com/apache/arrow-rs-object-store/issues/564 + "RUSTSEC-2025-0134", ] diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index 28c8a3db75..fc6b1224e8 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -48,4 +48,3 @@ jobs: - uses: rustsec/audit-check@v2.0.0 with: token: ${{ secrets.GITHUB_TOKEN }} - ignore: RUSTSEC-2024-0436 From 9844638c48e2d4b123c9df4f89330cfd1e8bfcba Mon Sep 17 00:00:00 2001 From: Alan Tang Date: Tue, 6 Jan 2026 10:43:11 +0800 Subject: [PATCH 49/58] feat: make FanoutWriter writer configurable (#1962) ## Which issue does this PR close? - Closes #1834. ## What changes are included in this PR? - Fellow on #1872. ## Are these changes tested? --------- Signed-off-by: StandingMan --- crates/iceberg/src/spec/table_properties.rs | 12 ++++++++++ .../datafusion/src/physical_plan/write.rs | 24 +++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs index 4975456010..413604f51c 100644 --- a/crates/iceberg/src/spec/table_properties.rs +++ b/crates/iceberg/src/spec/table_properties.rs @@ -49,6 +49,8 @@ pub struct TableProperties { pub write_format_default: String, /// The target file size for files. pub write_target_file_size_bytes: usize, + /// Whether to use `FanoutWriter` for partitioned tables. + pub write_datafusion_fanout_enabled: bool, } impl TableProperties { @@ -137,6 +139,11 @@ impl TableProperties { pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES: &str = "write.target-file-size-bytes"; /// Default target file size pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT: usize = 512 * 1024 * 1024; // 512 MB + /// Whether to use `FanoutWriter` for partitioned tables (handles unsorted data). + /// If false, uses `ClusteredWriter` (requires sorted data, more memory efficient). + pub const PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED: &str = "write.datafusion.fanout.enabled"; + /// Default value for fanout writer enabled + pub const PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT: bool = true; } impl TryFrom<&HashMap> for TableProperties { @@ -175,6 +182,11 @@ impl TryFrom<&HashMap> for TableProperties { TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES, TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT, )?, + write_datafusion_fanout_enabled: parse_property( + props, + TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED, + TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT, + )?, }) } } diff --git a/crates/integrations/datafusion/src/physical_plan/write.rs b/crates/integrations/datafusion/src/physical_plan/write.rs index 9eb53c235f..fdfddf877b 100644 --- a/crates/integrations/datafusion/src/physical_plan/write.rs +++ b/crates/integrations/datafusion/src/physical_plan/write.rs @@ -266,8 +266,28 @@ impl ExecutionPlan for IcebergWriteExec { let data_file_writer_builder = DataFileWriterBuilder::new(rolling_writer_builder); // Create TaskWriter - // TODO: Make fanout_enabled configurable via table properties - let fanout_enabled = true; + let fanout_enabled = self + .table + .metadata() + .properties() + .get(TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED) + .map(|value| { + value + .parse::() + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid value for {}, expected 'true' or 'false'", + TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED + ), + ) + .with_source(e) + }) + .map_err(to_datafusion_error) + }) + .transpose()? + .unwrap_or(TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT); let schema = self.table.metadata().current_schema().clone(); let partition_spec = self.table.metadata().default_partition_spec().clone(); let task_writer = TaskWriter::try_new( From 434ab9603e7cd9969b45fd54f1d6a3c40951145f Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Wed, 7 Jan 2026 08:35:33 +0800 Subject: [PATCH 50/58] fix: Address RUSTSEC-2026-0001 (#1994) ## Which issue does this PR close? - Closes #1992 - Closes #1993 ## What changes are included in this PR? Update dependency to upgrade rkyv, but we still have to ignore it and wait for rust_decimal to resolve it. ## Are these changes tested? CI. --- .cargo/audit.toml | 3 + Cargo.lock | 26 ++++---- Cargo.toml | 4 +- bindings/python/Cargo.lock | 131 ++++++++++++++++++++++++++++++++++--- bindings/python/Cargo.toml | 6 ++ 5 files changed, 146 insertions(+), 24 deletions(-) diff --git a/.cargo/audit.toml b/.cargo/audit.toml index 09e2d35c50..d403f0ac5a 100644 --- a/.cargo/audit.toml +++ b/.cargo/audit.toml @@ -33,4 +33,7 @@ ignore = [ # # Introduced by object_store, see https://github.com/apache/arrow-rs-object-store/issues/564 "RUSTSEC-2025-0134", + + # Tracked here: https://github.com/paupino/rust-decimal/issues/766 + "RUSTSEC-2026-0001", ] diff --git a/Cargo.lock b/Cargo.lock index 97ee25d658..73494910b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2715,7 +2715,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baec6a0289d7f1fe5665586ef7340af82e3037207bef60f5785e57569776f0c8" dependencies = [ "bytes", - "rkyv 0.8.12", + "rkyv 0.8.13", "serde", "simdutf8", ] @@ -5525,9 +5525,9 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.7.45" +version = "0.7.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1" dependencies = [ "bitvec", "bytecheck", @@ -5535,7 +5535,7 @@ dependencies = [ "hashbrown 0.12.3", "ptr_meta 0.1.4", "rend 0.4.2", - "rkyv_derive 0.7.45", + "rkyv_derive 0.7.46", "seahash", "tinyvec", "uuid", @@ -5543,27 +5543,27 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.8.12" +version = "0.8.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35a640b26f007713818e9a9b65d34da1cf58538207b052916a83d80e43f3ffa4" +checksum = "8b2e88acca7157d83d789836a3987dafc12bc3d88a050e54b8fe9ea4aaa29d20" dependencies = [ "bytes", - "hashbrown 0.15.5", + "hashbrown 0.16.1", "indexmap 2.12.1", "munge", "ptr_meta 0.3.1", "rancor", "rend 0.5.3", - "rkyv_derive 0.8.12", + "rkyv_derive 0.8.13", "tinyvec", "uuid", ] [[package]] name = "rkyv_derive" -version = "0.7.45" +version = "0.7.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5" dependencies = [ "proc-macro2", "quote", @@ -5572,9 +5572,9 @@ dependencies = [ [[package]] name = "rkyv_derive" -version = "0.8.12" +version = "0.8.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd83f5f173ff41e00337d97f6572e416d022ef8a19f371817259ae960324c482" +checksum = "7f6dffea3c91fa91a3c0fc8a061b0e27fef25c6304728038a6d6bcb1c58ba9bd" dependencies = [ "proc-macro2", "quote", @@ -5663,7 +5663,7 @@ dependencies = [ "num-traits", "postgres-types", "rand 0.8.5", - "rkyv 0.7.45", + "rkyv 0.7.46", "serde", "serde_json", ] diff --git a/Cargo.toml b/Cargo.toml index d099398dbd..56cd1801cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -109,7 +109,7 @@ rand = "0.8.5" regex = "1.11.3" reqwest = { version = "0.12.12", default-features = false, features = ["json"] } roaring = { version = "0.11" } -rust_decimal = "1.37.2" +rust_decimal = { version = "1.39", default-features = false, features = ["std"] } serde = { version = "1.0.219", features = ["rc"] } serde_bytes = "0.11.17" serde_derive = "1.0.219" @@ -131,4 +131,4 @@ url = "2.5.7" uuid = { version = "1.18", features = ["v7"] } volo = "0.10.6" volo-thrift = "0.10.8" -zstd = "0.13.3" +zstd = "0.13.3" \ No newline at end of file diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 4647f9d886..d33abed581 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -668,8 +668,20 @@ version = "0.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" dependencies = [ - "bytecheck_derive", - "ptr_meta", + "bytecheck_derive 0.6.12", + "ptr_meta 0.1.4", + "simdutf8", +] + +[[package]] +name = "bytecheck" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0caa33a2c0edca0419d15ac723dff03f1956f7978329b1e3b5fdaaaed9d3ca8b" +dependencies = [ + "bytecheck_derive 0.8.2", + "ptr_meta 0.3.1", + "rancor", "simdutf8", ] @@ -684,6 +696,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "bytecheck_derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89385e82b5d1821d2219e0b095efa2cc1f246cbf99080f3be46a1a85c0d392d9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "bytemuck" version = "1.24.0" @@ -2350,6 +2373,7 @@ dependencies = [ "rand 0.8.5", "reqsign", "reqwest", + "rkyv 0.8.13", "roaring", "rust_decimal", "serde", @@ -2841,6 +2865,26 @@ dependencies = [ "uuid", ] +[[package]] +name = "munge" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c" +dependencies = [ + "munge_macro", +] + +[[package]] +name = "munge_macro" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "murmur3" version = "0.5.2" @@ -3220,7 +3264,16 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" dependencies = [ - "ptr_meta_derive", + "ptr_meta_derive 0.1.4", +] + +[[package]] +name = "ptr_meta" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79" +dependencies = [ + "ptr_meta_derive 0.3.1", ] [[package]] @@ -3234,6 +3287,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "ptr_meta_derive" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "pyiceberg_core_rust" version = "0.8.0" @@ -3243,6 +3307,7 @@ dependencies = [ "iceberg", "iceberg-datafusion", "pyo3", + "rust_decimal", "tokio", ] @@ -3409,6 +3474,15 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "rancor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a063ea72381527c2a0561da9c80000ef822bdd7c3241b1cc1b12100e3df081ee" +dependencies = [ + "ptr_meta 0.3.1", +] + [[package]] name = "rand" version = "0.8.5" @@ -3564,7 +3638,16 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" dependencies = [ - "bytecheck", + "bytecheck 0.6.12", +] + +[[package]] +name = "rend" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cadadef317c2f20755a64d7fdc48f9e7178ee6b0e1f7fce33fa60f1d68a276e6" +dependencies = [ + "bytecheck 0.8.2", ] [[package]] @@ -3667,17 +3750,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" dependencies = [ "bitvec", - "bytecheck", + "bytecheck 0.6.12", "bytes", "hashbrown 0.12.3", - "ptr_meta", - "rend", - "rkyv_derive", + "ptr_meta 0.1.4", + "rend 0.4.2", + "rkyv_derive 0.7.45", "seahash", "tinyvec", "uuid", ] +[[package]] +name = "rkyv" +version = "0.8.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2e88acca7157d83d789836a3987dafc12bc3d88a050e54b8fe9ea4aaa29d20" +dependencies = [ + "bytecheck 0.8.2", + "bytes", + "hashbrown 0.16.0", + "indexmap 2.12.0", + "munge", + "ptr_meta 0.3.1", + "rancor", + "rend 0.5.3", + "rkyv_derive 0.8.13", + "tinyvec", + "uuid", +] + [[package]] name = "rkyv_derive" version = "0.7.45" @@ -3689,6 +3791,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "rkyv_derive" +version = "0.8.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6dffea3c91fa91a3c0fc8a061b0e27fef25c6304728038a6d6bcb1c58ba9bd" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "roaring" version = "0.11.2" @@ -3749,7 +3862,7 @@ dependencies = [ "bytes", "num-traits", "rand 0.8.5", - "rkyv", + "rkyv 0.7.45", "serde", "serde_json", ] diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 9ec58cf807..8346d02703 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -37,6 +37,8 @@ pyo3 = { version = "0.26", features = ["extension-module", "abi3-py310"] } iceberg-datafusion = { path = "../../crates/integrations/datafusion" } datafusion-ffi = { version = "51.0" } tokio = { version = "1.46.1", default-features = false } +# Security: disable rkyv feature to avoid RUSTSEC-2026-0001 (rkyv 0.7.45 vulnerability) +rust_decimal = { version = "1.39", default-features = false, features = ["std"] } [profile.release] codegen-units = 1 @@ -44,3 +46,7 @@ debug = false lto = "thin" opt-level = "z" strip = true + +[package.metadata.cargo-machete] +# rust_decimal is included to override feature flags for security (disable rkyv) +ignored = ["rust_decimal"] From 1b9fa3ae52eb1700cc2130301087e6c89c436004 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 7 Jan 2026 09:06:17 +0800 Subject: [PATCH 51/58] chore(deps): Bump reqwest from 0.12.25 to 0.12.28 (#1983) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [reqwest](https://github.com/seanmonstar/reqwest) from 0.12.25 to 0.12.28.
Release notes

Sourced from reqwest's releases.

v0.12.28

What's Changed

Full Changelog: https://github.com/seanmonstar/reqwest/compare/v0.12.27...v0.12.28

v0.12.27

tl;dr

  • Add ClientBuilder::windows_named_pipe(name) option that will force all requests over that Windows Named Pipe.

What's Changed

Full Changelog: https://github.com/seanmonstar/reqwest/compare/v0.12.26...v0.12.27

v0.12.26

tl;dr

  • Fix sending Accept-Encoding header only with values configured with reqwest, regardless of underlying tower-http config.

What's Changed

Full Changelog: https://github.com/seanmonstar/reqwest/compare/v0.12.25...v0.12.26

Changelog

Sourced from reqwest's changelog.

v0.12.28

  • Fix compiling on Windows if TLS and SOCKS features are not enabled.

v0.12.27

  • Add ClientBuilder::windows_named_pipe(name) option that will force all requests over that Windows Named Piper.

v0.12.26

  • Fix sending Accept-Encoding header only with values configured with reqwest, regardless of underlying tower-http config.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=reqwest&package-manager=cargo&previous-version=0.12.25&new-version=0.12.28)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Renjie Liu --- Cargo.lock | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 73494910b6..794c82b9c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2497,7 +2497,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2639,7 +2639,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -3312,7 +3312,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.1", + "socket2 0.5.10", "tokio", "tower-service", "tracing", @@ -3844,7 +3844,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -4365,7 +4365,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -5165,7 +5165,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.35", - "socket2 0.6.1", + "socket2 0.5.10", "thiserror 2.0.17", "tokio", "tracing", @@ -5202,9 +5202,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.1", + "socket2 0.5.10", "tracing", - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -5468,9 +5468,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.25" +version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6eff9328d40131d43bd911d42d79eb6a47312002a4daefc9e37f17e74a7701a" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ "base64", "bytes", @@ -5696,7 +5696,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6698,7 +6698,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -7513,7 +7513,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] From 99461bf2dca63de4109a65b13a9b05e4334126f2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 7 Jan 2026 10:26:12 +0800 Subject: [PATCH 52/58] chore(deps): Bump rsa from 0.9.9 to 0.9.10 (#2004) Bumps [rsa](https://github.com/RustCrypto/RSA) from 0.9.9 to 0.9.10.
Changelog

Sourced from rsa's changelog.

0.9.10 (2026-01-06)

Fixed

  • do not panic on a prime being 1 when loading a secret key (#624)

#624: RustCrypto/RSA#624

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=rsa&package-manager=cargo&previous-version=0.9.9&new-version=0.9.10)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/apache/iceberg-rust/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Renjie Liu --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 794c82b9c8..77d60a167f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5593,9 +5593,9 @@ dependencies = [ [[package]] name = "rsa" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40a0376c50d0358279d9d643e4bf7b7be212f1f4ff1da9070a7b54d22ef75c88" +checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" dependencies = [ "const-oid", "digest", From 7f2dda3a3807e89b162785b4f16d655ca6b84f79 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 7 Jan 2026 10:27:15 +0800 Subject: [PATCH 53/58] Bump to version 0.8.0 Round 2 (#1999) ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? This PR will start another round of v0.8.0 release ## Are these changes tested? --- CHANGELOG.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50d4576dfb..bd35e6b5d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,7 +24,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/) and this project adheres to [Semantic Versioning](https://semver.org/). -## [v0.8.0] - 2025-12-16 +## [v0.8.0] - 2026-01-06 ### Breaking Changes @@ -167,6 +167,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/). * Remove wildcard pattern in exhaustive enums by @lgingerich in https://github.com/apache/iceberg-rust/pull/1925 * fix: prioritize delete manifests to prevent scan deadlock by @dojiong in https://github.com/apache/iceberg-rust/pull/1937 * feat: Include statistics for Reserved Fields by @Fokko in https://github.com/apache/iceberg-rust/pull/1849 +* fix(website): Update expected messages by @CTTY in https://github.com/apache/iceberg-rust/pull/1942 +* feat: Implement shared delete file loading and caching for ArrowReader by @dojiong in https://github.com/apache/iceberg-rust/pull/1941 +* infra: license header check ignore target/ dir by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1954 +* infra: release script, validate proper ICEBERG_VERSION variable by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1956 +* refactor(arrow): Rename parameter in delete_filter for clarity by @robertmu in https://github.com/apache/iceberg-rust/pull/1955 +* feat(sqllogictest): use serde derived structs for schedule parsing by @AndreaBozzo in https://github.com/apache/iceberg-rust/pull/1953 +* fix: follow IEEE 754 totalOrder for `float` and `double` by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1959 +* fix: return proper error rather than persisting error message on snapshot by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1960 +* feat(arrow): Convert Arrow schema to Iceberg schema with auto assigned field ids by @CTTY in https://github.com/apache/iceberg-rust/pull/1928 +* fix: MemoryCatalog to return absolute NamespaceIdents by @eickler in https://github.com/apache/iceberg-rust/pull/1970 +* fix(spec): Include delete file content to V3 manifest by @CTTY in https://github.com/apache/iceberg-rust/pull/1979 +* fix: fix typo check error by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1989 +* Fix ci audit failure by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/1988 +* feat: make FanoutWriter writer configurable by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1962 ## [v0.7.0] - 2025-09-23 From 6fa878590a70f4078e6fbefa2751513662d421b4 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 7 Jan 2026 17:42:45 -0800 Subject: [PATCH 54/58] feat(datafusion): Apply SortExec when writing in clustered mode (#2005) ## Which issue does this PR close? - Closes #1540 ## What changes are included in this PR? - When writing in clustered mode, use `sort_by_partition` to sort the data so the clustered writer can comsume it ## Are these changes tested? - Added simple uts to verify that the `SortExec` is applied correctly --- .../datafusion/src/physical_plan/sort.rs | 3 - .../integrations/datafusion/src/table/mod.rs | 178 +++++++++++++++++- 2 files changed, 177 insertions(+), 4 deletions(-) diff --git a/crates/integrations/datafusion/src/physical_plan/sort.rs b/crates/integrations/datafusion/src/physical_plan/sort.rs index ede2547535..587ab120ca 100644 --- a/crates/integrations/datafusion/src/physical_plan/sort.rs +++ b/crates/integrations/datafusion/src/physical_plan/sort.rs @@ -42,9 +42,6 @@ use iceberg::arrow::PROJECTED_PARTITION_VALUE_COLUMN; /// # Returns /// * `Ok(Arc)` - A SortExec that sorts by partition values /// * `Err` - If the partition column is not found -/// -/// TODO remove dead_code mark when integrating with insert_into -#[allow(dead_code)] pub(crate) fn sort_by_partition(input: Arc) -> DFResult> { let schema = input.schema(); diff --git a/crates/integrations/datafusion/src/table/mod.rs b/crates/integrations/datafusion/src/table/mod.rs index 86a79611b3..ad616542a4 100644 --- a/crates/integrations/datafusion/src/table/mod.rs +++ b/crates/integrations/datafusion/src/table/mod.rs @@ -44,6 +44,7 @@ use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; use iceberg::arrow::schema_to_arrow_schema; use iceberg::inspect::MetadataTableType; +use iceberg::spec::TableProperties; use iceberg::table::Table; use iceberg::{Catalog, Error, ErrorKind, NamespaceIdent, Result, TableIdent}; use metadata_table::IcebergMetadataTableProvider; @@ -53,6 +54,7 @@ use crate::physical_plan::commit::IcebergCommitExec; use crate::physical_plan::project::project_with_partition; use crate::physical_plan::repartition::repartition; use crate::physical_plan::scan::IcebergTableScan; +use crate::physical_plan::sort::sort_by_partition; use crate::physical_plan::write::IcebergWriteExec; /// Catalog-backed table provider with automatic metadata refresh. @@ -185,9 +187,38 @@ impl TableProvider for IcebergTableProvider { let repartitioned_plan = repartition(plan_with_partition, table.metadata_ref(), target_partitions)?; + // Apply sort node when it's not fanout mode + let fanout_enabled = table + .metadata() + .properties() + .get(TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED) + .map(|value| { + value + .parse::() + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid value for {}, expected 'true' or 'false'", + TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED + ), + ) + .with_source(e) + }) + .map_err(to_datafusion_error) + }) + .transpose()? + .unwrap_or(TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT); + + let write_input = if fanout_enabled { + repartitioned_plan + } else { + sort_by_partition(repartitioned_plan)? + }; + let write_plan = Arc::new(IcebergWriteExec::new( table.clone(), - repartitioned_plan, + write_input, self.schema.clone(), )); @@ -321,6 +352,7 @@ mod tests { use std::sync::Arc; use datafusion::common::Column; + use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::SessionContext; use iceberg::io::FileIO; use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder}; @@ -598,4 +630,148 @@ mod tests { assert_eq!(logical_field.data_type(), physical_field.data_type()); } } + + async fn get_partitioned_test_catalog_and_table( + fanout_enabled: Option, + ) -> (Arc, NamespaceIdent, String, TempDir) { + use iceberg::spec::{Transform, UnboundPartitionSpec}; + + let temp_dir = TempDir::new().unwrap(); + let warehouse_path = temp_dir.path().to_str().unwrap().to_string(); + + let catalog = MemoryCatalogBuilder::default() + .load( + "memory", + HashMap::from([(MEMORY_CATALOG_WAREHOUSE.to_string(), warehouse_path.clone())]), + ) + .await + .unwrap(); + + let namespace = NamespaceIdent::new("test_ns".to_string()); + catalog + .create_namespace(&namespace, HashMap::new()) + .await + .unwrap(); + + let schema = Schema::builder() + .with_schema_id(0) + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::required(2, "category", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build() + .unwrap(); + + let partition_spec = UnboundPartitionSpec::builder() + .with_spec_id(0) + .add_partition_field(2, "category", Transform::Identity) + .unwrap() + .build(); + + let mut properties = HashMap::new(); + if let Some(enabled) = fanout_enabled { + properties.insert( + iceberg::spec::TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED + .to_string(), + enabled.to_string(), + ); + } + + let table_creation = TableCreation::builder() + .name("partitioned_table".to_string()) + .location(format!("{warehouse_path}/partitioned_table")) + .schema(schema) + .partition_spec(partition_spec) + .properties(properties) + .build(); + + catalog + .create_table(&namespace, table_creation) + .await + .unwrap(); + + ( + Arc::new(catalog), + namespace, + "partitioned_table".to_string(), + temp_dir, + ) + } + + /// Helper to check if a plan contains a SortExec node + fn plan_contains_sort(plan: &Arc) -> bool { + if plan.name() == "SortExec" { + return true; + } + for child in plan.children() { + if plan_contains_sort(child) { + return true; + } + } + false + } + + #[tokio::test] + async fn test_insert_plan_fanout_enabled_no_sort() { + use datafusion::datasource::TableProvider; + use datafusion::logical_expr::dml::InsertOp; + use datafusion::physical_plan::empty::EmptyExec; + + // When fanout is enabled (default), no sort node should be added + let (catalog, namespace, table_name, _temp_dir) = + get_partitioned_test_catalog_and_table(Some(true)).await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let input_schema = provider.schema(); + let input = Arc::new(EmptyExec::new(input_schema)) as Arc; + + let state = ctx.state(); + let insert_plan = provider + .insert_into(&state, input, InsertOp::Append) + .await + .unwrap(); + + // With fanout enabled, there should be no SortExec in the plan + assert!( + !plan_contains_sort(&insert_plan), + "Plan should NOT contain SortExec when fanout is enabled" + ); + } + + #[tokio::test] + async fn test_insert_plan_fanout_disabled_has_sort() { + use datafusion::datasource::TableProvider; + use datafusion::logical_expr::dml::InsertOp; + use datafusion::physical_plan::empty::EmptyExec; + + // When fanout is disabled, a sort node should be added + let (catalog, namespace, table_name, _temp_dir) = + get_partitioned_test_catalog_and_table(Some(false)).await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let input_schema = provider.schema(); + let input = Arc::new(EmptyExec::new(input_schema)) as Arc; + + let state = ctx.state(); + let insert_plan = provider + .insert_into(&state, input, InsertOp::Append) + .await + .unwrap(); + + // With fanout disabled, there should be a SortExec in the plan + assert!( + plan_contains_sort(&insert_plan), + "Plan should contain SortExec when fanout is disabled" + ); + } } From 700e62e00009a781cab5c8d321eb94173dfe7676 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 8 Jan 2026 10:20:28 +0800 Subject: [PATCH 55/58] chore(deps): Bump crate-ci/typos from 1.40.0 to 1.41.0 (#1982) Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.40.0 to 1.41.0.
Release notes

Sourced from crate-ci/typos's releases.

v1.41.0

[1.41.0] - 2025-12-31

Features

v1.40.1

[1.40.1] - 2025-12-29

Fixes

  • Treat incrementer and incrementor the same for now

Fixes

  • Don't correct ITerm2
Changelog

Sourced from crate-ci/typos's changelog.

[1.41.0] - 2025-12-31

Features

[1.40.1] - 2025-12-29

Fixes

  • Treat incrementer and incrementor the same for now

Fixes

  • Don't correct ITerm2
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=crate-ci/typos&package-manager=github_actions&previous-version=1.40.0&new-version=1.41.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Renjie Liu --- .github/workflows/ci_typos.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_typos.yml b/.github/workflows/ci_typos.yml index 6406b82142..b79ab0c0d1 100644 --- a/.github/workflows/ci_typos.yml +++ b/.github/workflows/ci_typos.yml @@ -42,4 +42,4 @@ jobs: steps: - uses: actions/checkout@v6 - name: Check typos - uses: crate-ci/typos@v1.40.0 + uses: crate-ci/typos@v1.41.0 From 76cdf280231bf27ea888de15507d918392f08d44 Mon Sep 17 00:00:00 2001 From: Aditya Subrahmanyan Date: Wed, 7 Jan 2026 18:20:43 -0800 Subject: [PATCH 56/58] fix: Use correct byte representation for decimal hashing (#1998) ## Which issue does this PR close? - Closes #1981. ## What changes are included in this PR? The [spec](https://iceberg.apache.org/spec/#appendix-b-32-bit-hash-requirements) states that: >"Decimal values are hashed using the minimum number of bytes required to hold the unscaled value as a two's complement big-endian". Prior to this fix, we would incorrectly consume leading `0xFF` bytes and hash them. Now, we only consume the bytes starting with the one that is used to preserve the sign, and everything that follows it. ## Are these changes tested? Added unit tests for original scenario mentioned in the issue, as well as some additional cases --- crates/iceberg/src/transform/bucket.rs | 43 +++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/crates/iceberg/src/transform/bucket.rs b/crates/iceberg/src/transform/bucket.rs index 8807fb1f79..e6786a70ca 100644 --- a/crates/iceberg/src/transform/bucket.rs +++ b/crates/iceberg/src/transform/bucket.rs @@ -78,12 +78,26 @@ impl Bucket { /// ref: https://iceberg.apache.org/spec/#appendix-b-32-bit-hash-requirements #[inline] fn hash_decimal(v: i128) -> i32 { + if v == 0 { + return Self::hash_bytes(&[0]); + } + let bytes = v.to_be_bytes(); - if let Some(start) = bytes.iter().position(|&x| x != 0) { - Self::hash_bytes(&bytes[start..]) + let start = if v > 0 { + // Positive: skip 0x00 unless next byte would appear negative + bytes + .windows(2) + .position(|w| w[0] != 0x00 || w[1] & 0x80 != 0) + .unwrap_or(15) } else { - Self::hash_bytes(&[0]) - } + // Negative: skip 0xFF only if next byte stays negative + bytes + .windows(2) + .position(|w| w[0] != 0xFF || w[1] & 0x80 == 0) + .unwrap_or(15) + }; + + Self::hash_bytes(&bytes[start..]) } /// def bucket_N(x) = (murmur3_x86_32_hash(x) & Integer.MAX_VALUE) % N @@ -790,6 +804,27 @@ mod test { ); } + #[test] + fn test_hash_decimal_with_negative_value() { + // Test cases from GitHub issue #1981 + assert_eq!(Bucket::hash_decimal(1), -463810133); + assert_eq!(Bucket::hash_decimal(-1), -43192051); + + // Additional test cases for edge case values + assert_eq!(Bucket::hash_decimal(0), Bucket::hash_decimal(0)); + assert_eq!(Bucket::hash_decimal(127), Bucket::hash_decimal(127)); + assert_eq!(Bucket::hash_decimal(-128), Bucket::hash_decimal(-128)); + + // Test minimum representation is used + // -1 should hash as [0xFF] not [0xFF, 0xFF, ..., 0xFF] + // 128 should hash as [0x00, 0x80] not [0x00, 0x00, ..., 0x80] + assert_eq!(Bucket::hash_decimal(128), Bucket::hash_bytes(&[0x00, 0x80])); + assert_eq!( + Bucket::hash_decimal(-129), + Bucket::hash_bytes(&[0xFF, 0x7F]) + ); + } + #[test] fn test_int_literal() { let bucket = Bucket::new(10); From 43f1ed8362951a8be72101ac1625cb8634cd242e Mon Sep 17 00:00:00 2001 From: Aditya Subrahmanyan Date: Thu, 8 Jan 2026 18:50:04 -0800 Subject: [PATCH 57/58] fix: Reserved sort order ID cannot contain any fields (#1978) ## Which issue does this PR close? - Closes #1963. ## What changes are included in this PR? This change validates that table metadata with reserved sort order ID (0) cannot contain fields associated with it. If this is found, we error out instead of silently parsing arbitrary field values. ## Are these changes tested? Added the unit test described in the issue and verified that the check is now enforced. --- crates/iceberg/src/spec/table_metadata.rs | 66 +++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index 48b715da59..cfa25deccb 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -506,6 +506,19 @@ impl TableMetadata { /// If the default sort order is unsorted but the sort order is not present, add it fn try_normalize_sort_order(&mut self) -> Result<()> { + // Validate that sort order ID 0 (reserved for unsorted) has no fields + if let Some(sort_order) = self.sort_order_by_id(SortOrder::UNSORTED_ORDER_ID) + && !sort_order.fields.is_empty() + { + return Err(Error::new( + ErrorKind::Unexpected, + format!( + "Sort order ID {} is reserved for unsorted order", + SortOrder::UNSORTED_ORDER_ID + ), + )); + } + if self.sort_order_by_id(self.default_sort_order_id).is_some() { return Ok(()); } @@ -3795,4 +3808,57 @@ mod tests { assert!(final_metadata.name_exists_in_any_schema("new_field")); // only in current schema assert!(!final_metadata.name_exists_in_any_schema("never_existed")); } + + #[test] + fn test_invalid_sort_order_id_zero_with_fields() { + let metadata = r#" + { + "format-version": 2, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", + "location": "s3://bucket/test/location", + "last-sequence-number": 111, + "last-updated-ms": 1600000000000, + "last-column-id": 3, + "current-schema-id": 1, + "schemas": [ + { + "type": "struct", + "schema-id": 1, + "fields": [ + {"id": 1, "name": "x", "required": true, "type": "long"}, + {"id": 2, "name": "y", "required": true, "type": "long"} + ] + } + ], + "default-spec-id": 0, + "partition-specs": [{"spec-id": 0, "fields": []}], + "last-partition-id": 999, + "default-sort-order-id": 0, + "sort-orders": [ + { + "order-id": 0, + "fields": [ + { + "transform": "identity", + "source-id": 1, + "direction": "asc", + "null-order": "nulls-first" + } + ] + } + ], + "properties": {}, + "current-snapshot-id": -1, + "snapshots": [] + } + "#; + + let result: Result = serde_json::from_str(metadata); + + // Should fail because sort order ID 0 is reserved for unsorted order and cannot have fields + assert!( + result.is_err(), + "Parsing should fail for sort order ID 0 with fields" + ); + } } From 65e368250c1e363c3c63de06248e4bc40ad37c58 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 9 Jan 2026 11:37:44 +0800 Subject: [PATCH 58/58] feat(datafusion): Add LIMIT pushdown support (#2006) Implement LIMIT pushdown to optimize queries with LIMIT clauses by stopping data processing once the limit is reached. This reduces unnecessary I/O and computation for queries that only need a subset of rows. Changes: - Add limit field to IcebergTableScan to track row limit - Apply limit at stream level by filtering/slicing record batches - Update IcebergTableProvider and IcebergStaticTableProvider to pass limit parameter to scan - Add comprehensive tests for limit pushdown functionality ## Which issue does this PR close? - Closes #. ## What changes are included in this PR? ## Are these changes tested? Co-authored-by: Claude Sonnet 4.5 --- .../datafusion/src/physical_plan/scan.rs | 30 +++++- .../integrations/datafusion/src/table/mod.rs | 98 ++++++++++++++++++- 2 files changed, 125 insertions(+), 3 deletions(-) diff --git a/crates/integrations/datafusion/src/physical_plan/scan.rs b/crates/integrations/datafusion/src/physical_plan/scan.rs index be92e93d25..d627b6a63d 100644 --- a/crates/integrations/datafusion/src/physical_plan/scan.rs +++ b/crates/integrations/datafusion/src/physical_plan/scan.rs @@ -51,6 +51,8 @@ pub struct IcebergTableScan { projection: Option>, /// Filters to apply to the table scan predicates: Option, + /// Optional limit on the number of rows to return + limit: Option, } impl IcebergTableScan { @@ -61,6 +63,7 @@ impl IcebergTableScan { schema: ArrowSchemaRef, projection: Option<&Vec>, filters: &[Expr], + limit: Option, ) -> Self { let output_schema = match projection { None => schema.clone(), @@ -76,6 +79,7 @@ impl IcebergTableScan { plan_properties, projection, predicates, + limit, } } @@ -95,6 +99,10 @@ impl IcebergTableScan { self.predicates.as_ref() } + pub fn limit(&self) -> Option { + self.limit + } + /// Computes [`PlanProperties`] used in query optimization. fn compute_properties(schema: ArrowSchemaRef) -> PlanProperties { // TODO: @@ -146,9 +154,29 @@ impl ExecutionPlan for IcebergTableScan { ); let stream = futures::stream::once(fut).try_flatten(); + // Apply limit if specified + let limited_stream: Pin> + Send>> = + if let Some(limit) = self.limit { + let mut remaining = limit; + Box::pin(stream.try_filter_map(move |batch| { + futures::future::ready(if remaining == 0 { + Ok(None) + } else if batch.num_rows() <= remaining { + remaining -= batch.num_rows(); + Ok(Some(batch)) + } else { + let limited_batch = batch.slice(0, remaining); + remaining = 0; + Ok(Some(limited_batch)) + }) + })) + } else { + Box::pin(stream) + }; + Ok(Box::pin(RecordBatchStreamAdapter::new( self.schema(), - stream, + limited_stream, ))) } } diff --git a/crates/integrations/datafusion/src/table/mod.rs b/crates/integrations/datafusion/src/table/mod.rs index ad616542a4..ae87342fa5 100644 --- a/crates/integrations/datafusion/src/table/mod.rs +++ b/crates/integrations/datafusion/src/table/mod.rs @@ -127,7 +127,7 @@ impl TableProvider for IcebergTableProvider { _state: &dyn Session, projection: Option<&Vec>, filters: &[Expr], - _limit: Option, + limit: Option, ) -> DFResult> { // Load fresh table metadata from catalog let table = self @@ -143,6 +143,7 @@ impl TableProvider for IcebergTableProvider { self.schema.clone(), projection, filters, + limit, ))) } @@ -311,7 +312,7 @@ impl TableProvider for IcebergStaticTableProvider { _state: &dyn Session, projection: Option<&Vec>, filters: &[Expr], - _limit: Option, + limit: Option, ) -> DFResult> { // Use cached table (no refresh) Ok(Arc::new(IcebergTableScan::new( @@ -320,6 +321,7 @@ impl TableProvider for IcebergStaticTableProvider { self.schema.clone(), projection, filters, + limit, ))) } @@ -774,4 +776,96 @@ mod tests { "Plan should contain SortExec when fanout is disabled" ); } + + #[tokio::test] + async fn test_limit_pushdown_static_provider() { + use datafusion::datasource::TableProvider; + + let table = get_test_table_from_metadata_file().await; + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let state = ctx.state(); + + // Test scan with limit + let scan_plan = table_provider + .scan(&state, None, &[], Some(10)) + .await + .unwrap(); + + // Verify that the scan plan is an IcebergTableScan + let iceberg_scan = scan_plan + .as_any() + .downcast_ref::() + .expect("Expected IcebergTableScan"); + + // Verify the limit is set + assert_eq!( + iceberg_scan.limit(), + Some(10), + "Limit should be set to 10 in the scan plan" + ); + } + + #[tokio::test] + async fn test_limit_pushdown_catalog_backed_provider() { + use datafusion::datasource::TableProvider; + + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let state = ctx.state(); + + // Test scan with limit + let scan_plan = provider.scan(&state, None, &[], Some(5)).await.unwrap(); + + // Verify that the scan plan is an IcebergTableScan + let iceberg_scan = scan_plan + .as_any() + .downcast_ref::() + .expect("Expected IcebergTableScan"); + + // Verify the limit is set + assert_eq!( + iceberg_scan.limit(), + Some(5), + "Limit should be set to 5 in the scan plan" + ); + } + + #[tokio::test] + async fn test_no_limit_pushdown() { + use datafusion::datasource::TableProvider; + + let table = get_test_table_from_metadata_file().await; + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let state = ctx.state(); + + // Test scan without limit + let scan_plan = table_provider.scan(&state, None, &[], None).await.unwrap(); + + // Verify that the scan plan is an IcebergTableScan + let iceberg_scan = scan_plan + .as_any() + .downcast_ref::() + .expect("Expected IcebergTableScan"); + + // Verify the limit is None + assert_eq!( + iceberg_scan.limit(), + None, + "Limit should be None when not specified" + ); + } }