diff --git a/.asf.yaml b/.asf.yaml index f46c437a79..b217fd7807 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -43,7 +43,9 @@ github: dismiss_stale_reviews: false required_linear_history: true - del_branch_on_merge: true + pull_requests: + # auto-delete head branches after being merged + del_branch_on_merge: true features: wiki: false issues: true diff --git a/.cargo/audit.toml b/.cargo/audit.toml index a46052f3b5..d403f0ac5a 100644 --- a/.cargo/audit.toml +++ b/.cargo/audit.toml @@ -25,4 +25,15 @@ ignore = [ # # Introduced by hive_metastore, tracked at https://github.com/cloudwego/pilota/issues/293 "RUSTSEC-2024-0388", + # `paste` is unmaintained; consider using an alternative + # + # Introduced by hive_metastore, tracked at https://github.com/cloudwego/pilota/issues/293 + "RUSTSEC-2024-0436", + # `rustls-pemfile` is unmaintained + # + # Introduced by object_store, see https://github.com/apache/arrow-rs-object-store/issues/564 + "RUSTSEC-2025-0134", + + # Tracked here: https://github.com/paupino/rust-decimal/issues/766 + "RUSTSEC-2026-0001", ] diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index 47b0c47874..fc6b1224e8 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -40,7 +40,7 @@ jobs: security_audit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: @@ -48,4 +48,3 @@ jobs: - uses: rustsec/audit-check@v2.0.0 with: token: ${{ secrets.GITHUB_TOKEN }} - ignore: RUSTSEC-2024-0436 diff --git a/.github/workflows/bindings_python_ci.yml b/.github/workflows/bindings_python_ci.yml index e9eabda2cb..ed2c03b763 100644 --- a/.github/workflows/bindings_python_ci.yml +++ b/.github/workflows/bindings_python_ci.yml @@ -44,7 +44,7 @@ jobs: check-rust: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Check format working-directory: "bindings/python" run: cargo fmt --all -- --check @@ -55,7 +55,7 @@ jobs: check-python: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: astral-sh/setup-uv@v7 with: version: "0.9.3" @@ -81,7 +81,7 @@ jobs: - macos-latest - windows-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: 3.12 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c0f360b982..f393309bcb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,7 @@ jobs: - ubuntu-latest - macos-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder @@ -101,7 +101,7 @@ jobs: root-reserve-mb: 10240 temp-reserve-mb: 10240 - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder @@ -126,7 +126,7 @@ jobs: - macos-latest - windows-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder @@ -143,7 +143,6 @@ jobs: matrix: test-suite: - { name: "default", args: "--all-targets --all-features --workspace" } - - { name: "smol", args: "--all-targets --no-default-features --features smol --features storage-all --workspace" } - { name: "doc", args: "--doc --all-features --workspace" } name: Unit Tests (${{ matrix.test-suite.name }}) steps: @@ -158,7 +157,7 @@ jobs: root-reserve-mb: 10240 temp-reserve-mb: 10240 - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder @@ -180,7 +179,7 @@ jobs: name: Verify MSRV runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install protoc uses: arduino/setup-protoc@v3 with: diff --git a/.github/workflows/ci_typos.yml b/.github/workflows/ci_typos.yml index 4c60369482..b79ab0c0d1 100644 --- a/.github/workflows/ci_typos.yml +++ b/.github/workflows/ci_typos.yml @@ -40,6 +40,6 @@ jobs: env: FORCE_COLOR: 1 steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Check typos - uses: crate-ci/typos@v1.39.2 + uses: crate-ci/typos@v1.41.0 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 71d35001da..66c17a668d 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -42,10 +42,11 @@ jobs: - "crates/catalog/glue" - "crates/catalog/hms" - "crates/catalog/rest" + - "crates/catalog/s3tables" - "crates/catalog/sql" - "crates/integrations/datafusion" steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder diff --git a/.github/workflows/release_python.yml b/.github/workflows/release_python.yml index e6b7021c9b..85663fc75f 100644 --- a/.github/workflows/release_python.yml +++ b/.github/workflows/release_python.yml @@ -85,7 +85,7 @@ jobs: runs-on: ubuntu-latest needs: [validate-release-tag] steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install toml-cli if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }} @@ -107,7 +107,7 @@ jobs: command: sdist args: -o dist - name: Upload sdist - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v6 with: name: wheels-sdist path: bindings/python/dist @@ -128,7 +128,7 @@ jobs: } - { os: ubuntu-latest, target: "armv7l" } steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install toml-cli if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }} @@ -159,7 +159,7 @@ jobs: command: build args: --release -o dist - name: Upload wheels - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v6 with: name: wheels-${{ matrix.os }}-${{ matrix.target }} path: bindings/python/dist @@ -178,7 +178,7 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v6 + uses: actions/download-artifact@v7 with: pattern: wheels-* merge-multiple: true diff --git a/.github/workflows/release_python_nightly.yml b/.github/workflows/release_python_nightly.yml index 9c27554f9a..833b8ee6a6 100644 --- a/.github/workflows/release_python_nightly.yml +++ b/.github/workflows/release_python_nightly.yml @@ -43,7 +43,7 @@ jobs: if: github.repository == 'apache/iceberg-rust' # Only run for apache repo runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp with: @@ -56,7 +56,7 @@ jobs: args: -o dist - name: Upload sdist - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v6 with: name: wheels-sdist path: bindings/python/dist @@ -78,7 +78,7 @@ jobs: } - { os: ubuntu-latest, target: "armv7l" } steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp with: @@ -102,7 +102,7 @@ jobs: args: --release -o dist - name: Upload wheels - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v6 with: name: wheels-${{ matrix.os }}-${{ matrix.target }} path: bindings/python/dist @@ -120,7 +120,7 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v6 + uses: actions/download-artifact@v7 with: pattern: wheels-* merge-multiple: true @@ -128,9 +128,36 @@ jobs: - name: List downloaded artifacts run: ls -R bindings/python/dist - name: Publish to TestPyPI + id: publish-testpypi + continue-on-error: true uses: pypa/gh-action-pypi-publish@release/v1 with: repository-url: https://test.pypi.org/legacy/ skip-existing: true packages-dir: bindings/python/dist verbose: true + - name: Display error message on publish failure + if: steps.publish-testpypi.outcome == 'failure' + run: | + echo "::error::Failed to publish to TestPyPI" + echo "" + echo "⚠️ TestPyPI Publish Failed" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "" + echo "This may be due to TestPyPI storage limits." + echo "See: https://docs.pypi.org/project-management/storage-limits" + echo "" + echo "To resolve this issue, use the pypi-cleanup utility to clean up old TestPyPI artifacts:" + echo "https://pypi.org/project/pypi-cleanup/" + echo "" + echo " uvx pypi-cleanup --package pyiceberg-core --host https://test.pypi.org/ \\" + echo " --verbose -d 10 --do-it --username " + echo "" + echo "Requirements:" + echo " • Must be a maintainer for pyiceberg-core on TestPyPI" + echo " (https://test.pypi.org/project/pyiceberg-core)" + echo " • Requires TestPyPI password and 2FA" + echo " • ⚠️ ONLY do this for TestPyPI, NOT for production PyPI!" + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + exit 1 diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 5e653cffe4..95a4fdc256 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -31,7 +31,7 @@ jobs: if: github.repository_owner == 'apache' runs-on: ubuntu-22.04 steps: - - uses: actions/stale@v10.1.0 + - uses: actions/stale@v10.1.1 with: stale-issue-label: 'stale' exempt-issue-labels: 'not-stale' diff --git a/.github/workflows/website.yml b/.github/workflows/website.yml index c5925da6ae..1a52482b08 100644 --- a/.github/workflows/website.yml +++ b/.github/workflows/website.yml @@ -36,7 +36,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup mdBook uses: peaceiris/actions-mdbook@v2 diff --git a/.licenserc.yaml b/.licenserc.yaml index da87374c3b..0bcb65f3b7 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -31,6 +31,7 @@ header: - "**/DEPENDENCIES.*.tsv" # Release distributions - "dist/*" + - "target" - "Cargo.lock" - "bindings/python/uv.lock" - ".github/PULL_REQUEST_TEMPLATE.md" diff --git a/.typos.toml b/.typos.toml index 9363f17c9a..407ce8168c 100644 --- a/.typos.toml +++ b/.typos.toml @@ -15,5 +15,8 @@ # specific language governing permissions and limitations # under the License. +[type.rust] +extend-ignore-identifiers-re = ["^bimap$"] + [files] extend-exclude = ["**/testdata", "CHANGELOG.md"] diff --git a/CHANGELOG.md b/CHANGELOG.md index f66f64f478..bd35e6b5d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,164 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/) and this project adheres to [Semantic Versioning](https://semver.org/). +## [v0.8.0] - 2026-01-06 + +### Breaking Changes + +* **API Changes:** + * refactor: Remove redundant parameters from SnapshotProducer validation methods by @Li0k in https://github.com/apache/iceberg-rust/pull/1853 + * chore: Remove deprecated `remove_all` in FileIO by @jonathanc-n in https://github.com/apache/iceberg-rust/pull/1863 + * refactor: Drop smol runtime support by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1900 + +* **Compatibility:** + * chore: bump MSRV to 1.88, fix warnings and clippy errors by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1902 + +* **Dependency Updates:** + * Upgrade opendal to v0.55 by @dentiny in https://github.com/apache/iceberg-rust/pull/1895 + * deps: bump DataFusion to 51, Arrow to 57, pyo to 0.26 by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1899 + +* **Other:** + * Remove wildcard pattern in exhaustive enums by @lgingerich in https://github.com/apache/iceberg-rust/pull/1925 + +### All Changes + +* chore(deps): Bump tempfile from 3.22.0 to 3.23.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1717 +* chore(deps): Bump rand from 0.8.5 to 0.9.2 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1716 +* chore(deps): Bump crate-ci/typos from 1.36.2 to 1.36.3 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1715 +* refactor: Improve REST catalog's authenticate method by @imor in https://github.com/apache/iceberg-rust/pull/1712 +* chore(deps): Bump serde_with from 3.14.0 to 3.14.1 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1727 +* refactor(writer): Refactor writers for the future partitioning writers by @CTTY in https://github.com/apache/iceberg-rust/pull/1657 +* Set lock on version of Pydantic by @Fokko in https://github.com/apache/iceberg-rust/pull/1737 +* chore(deps): Bump crate-ci/typos from 1.36.3 to 1.37.2 by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1734 +* feat: support more partition transformations for PartitionSpec::partition_to_path by @mnpw in https://github.com/apache/iceberg-rust/pull/1730 +* chore: Update website for 0.7.0 by @CTTY in https://github.com/apache/iceberg-rust/pull/1738 +* feat(sql-catalog): implement register table for sql catalog by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1724 +* fix: ensure CoalescePartitionsExec is enabled for IcebergCommitExec by @sgrebnov in https://github.com/apache/iceberg-rust/pull/1723 +* chore(deps): Bump regex from 1.11.2 to 1.12.1 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1741 +* chore(deps): Bump crate-ci/typos from 1.37.2 to 1.38.1 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1740 +* Improve `IcebergCommitExec` to correctly populate properties/schema by @sgrebnov in https://github.com/apache/iceberg-rust/pull/1721 +* feat(spec): add `table_properties.rs` to spec by @kaushiksrini in https://github.com/apache/iceberg-rust/pull/1733 +* chore(deps): Bump actions/stale from 10.0.0 to 10.1.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1726 +* docs: remove -src suffix from artifact name by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1743 +* feat(reader): Make ArrowReaderBuilder::new public by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1748 +* feat(writer): Add clustered and fanout writer by @CTTY in https://github.com/apache/iceberg-rust/pull/1735 +* feat(catalog): impl builder for SqlCatalog by @335g in https://github.com/apache/iceberg-rust/pull/1666 +* fix: fix read parquert file when schema change by @chenzl25 in https://github.com/apache/iceberg-rust/pull/1750 +* docs: Fix broken orbstack and podman links in CONTRIBUTING.md by @petern48 in https://github.com/apache/iceberg-rust/pull/1757 +* chore(deps): Bump tokio from 1.47.1 to 1.48.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1763 +* chore(deps): Bump backon from 1.5.2 to 1.6.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1762 +* fix: global eq delete matching should apply to only strictly older files, and fix partition scoped matching to consider spec id by @amogh-jahagirdar in https://github.com/apache/iceberg-rust/pull/1758 +* chore(deps): Bump apache/skywalking-eyes from 0.7.0 to 0.8.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1760 +* chore(deps): Bump rust_decimal from 1.38.0 to 1.39.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1761 +* feat(datafusion): implement the project node to add the partition columns by @fvaleye in https://github.com/apache/iceberg-rust/pull/1602 +* fix: snapshot was producing empty summary by @imor in https://github.com/apache/iceberg-rust/pull/1767 +* docs: Add examples for PartitioningWriter by @CTTY in https://github.com/apache/iceberg-rust/pull/1754 +* feat(sqllogictest): Add support for iceberg datafusion sqllogictest integration by @lliangyu-lin in https://github.com/apache/iceberg-rust/pull/1764 +* fix(build): Pin home version after merging #1764 by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1783 +* minor: Update Cargo.lock to add home by @CTTY in https://github.com/apache/iceberg-rust/pull/1785 +* chore(deps): Bump aws-sdk-s3tables from 1.40.0 to 1.41.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1790 +* chore(deps): Bump rand from 0.8.5 to 0.9.2 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1789 +* chore(deps): Bump actions/download-artifact from 5 to 6 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1788 +* chore(deps): Bump actions/upload-artifact from 4 to 5 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1787 +* fix(reader): filter row groups when FileScanTask contains byte ranges by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1779 +* refactor(arrow,datafusion): Reuse PartitionValueCalculator in RecordBatchPartitionSplitter by @CTTY in https://github.com/apache/iceberg-rust/pull/1781 +* feat: Update Datafusion to v49 by @DerGut in https://github.com/apache/iceberg-rust/pull/1704 +* deps: unpin pydantic by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1793 +* feat(reader): Add Date32 support to RecordBatchTransformer create_column by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1792 +* feat(catalog): Implement update_table for S3TablesCatalog by @CTTY in https://github.com/apache/iceberg-rust/pull/1594 +* feat: Update Datafusion to v50 by @DerGut in https://github.com/apache/iceberg-rust/pull/1728 +* ci: Migrate to uv for python by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1796 +* ci: Relax msrv check thanks to rust 2024 by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1795 +* ci: Don't dismiss stale review to make contribution easier by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1799 +* add Makefile to bindings/python by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1800 +* chore: inline format args by @colinmarc in https://github.com/apache/iceberg-rust/pull/1805 +* refactor: Migrate from tera to minijinja by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1798 +* fix(reader): fix position delete bugs with row group skipping by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1806 +* feat(datafusion): implement the partitioning node for DataFusion to define the partitioning by @fvaleye in https://github.com/apache/iceberg-rust/pull/1620 +* feat(reader): Date32 from days since epoch for Literal:try_from_json by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1803 +* chore(deps): Bump aws-sdk-glue from 1.125.0 to 1.126.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1812 +* chore(deps): Bump astral-sh/setup-uv from 6 to 7 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1811 +* chore(deps): Bump crate-ci/typos from 1.38.1 to 1.39.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1810 +* feat(reader): position-based column projection for Parquet files without field IDs (migrated tables) by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1777 +* fix(reader): Equality delete files with partial schemas (containing only equality columns) by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1782 +* infra: use apache/hive:4.0.0 as hive Dockerfile base image by @geruh in https://github.com/apache/iceberg-rust/pull/1823 +* fix: StructType fails to deserialize JSON with type field by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1822 +* feat: Support for V3 Metadata by @c-thiel in https://github.com/apache/iceberg-rust/pull/1682 +* fix(reader): Support both position and equality delete files on the same FileScanTask by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1778 +* feat(datafusion): Add TaskWriter for DataFusion by @CTTY in https://github.com/apache/iceberg-rust/pull/1769 +* fix: support reading compressed metadata by @colinmarc in https://github.com/apache/iceberg-rust/pull/1802 +* Support deserializing bytes by @Fokko in https://github.com/apache/iceberg-rust/pull/1820 +* fix: Bump CI Spark version to 3.5.7 by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1832 +* infra: use python 3.12 for release by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1836 +* pyiceberg-core: create smaller artifacts by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1841 +* infra: add collaborators to .asf.yaml by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1842 +* pyiceberg-core: use pyo3 abi3-py310 by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1843 +* ci: parallelize unit test with matrix by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1833 +* pyiceberg-core: create even smaller artifacts by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1844 +* chore: Split values.rs into separate files by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1840 +* feat(datafusion): Support `INSERT INTO` partitioned tables by @CTTY in https://github.com/apache/iceberg-rust/pull/1827 +* docs: Add Wrappers project to README by @burmecia in https://github.com/apache/iceberg-rust/pull/1852 +* feat(reader): Add PartitionSpec support to FileScanTask and RecordBatchTransformer by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1821 +* feat(reader): null struct default values in create_column by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1847 +* refactor: Remove redundant parameters from SnapshotProducer validation methods by @Li0k in https://github.com/apache/iceberg-rust/pull/1853 +* infra: add verbose=true to pypa/gh-action-pypi-publish by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1846 +* use RecordBatchTransformerBuilder instead of RecordBatchTransformer by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1857 +* chore(deps): Bump bytes from 1.10.1 to 1.11.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1861 +* chore(deps): Bump serde_with from 3.15.1 to 3.16.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1859 +* chore(deps): Bump fs-err from 3.1.3 to 3.2.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1860 +* chore(deps): Bump crate-ci/typos from 1.39.0 to 1.39.2 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1858 +* chore: Remove deprecated `remove_all` in FileIO by @jonathanc-n in https://github.com/apache/iceberg-rust/pull/1863 +* infra: notify on github workflow failure by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1870 +* feat(reader): Add binary support to `get_arrow_datum` for equality deletes with binary type by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1848 +* Raise concurrency errors properly for glue tables by @jembishop in https://github.com/apache/iceberg-rust/pull/1875 +* infra: add instructions for cleaning up testpypi artifacts by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1855 +* chore(deps): Bump actions/checkout from 5 to 6 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1883 +* Update apache-avro to v0.21.0 by @N-Boutaib in https://github.com/apache/iceberg-rust/pull/1881 +* docs: Clarify functionality of `SnapshotProduceOperation` by @jonathanc-n in https://github.com/apache/iceberg-rust/pull/1874 +* feat(datafusion): Split IcebergTableProvider into static and non-static table provider by @CTTY in https://github.com/apache/iceberg-rust/pull/1879 +* infra: use new `del_branch_on_merge` in .asf.yaml by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1888 +* Upgrade opendal to v0.55 by @dentiny in https://github.com/apache/iceberg-rust/pull/1895 +* chore(deps): Bump http from 1.3.1 to 1.4.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1892 +* chore(deps): Bump crate-ci/typos from 1.39.2 to 1.40.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1891 +* feat(datafusion): Add `sort_by_partition` to sort the input partitioned data by @CTTY in https://github.com/apache/iceberg-rust/pull/1618 +* rfc: Modularize `iceberg` Implementations by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1854 +* refactor(writer): Make writer builders non-consuming in build by @leonzchang in https://github.com/apache/iceberg-rust/pull/1889 +* fix: Keep snapshot log on replace by @c-thiel in https://github.com/apache/iceberg-rust/pull/1896 +* chore(deps): Bump actions/stale from 10.1.0 to 10.1.1 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1908 +* feat(datafusion): Add sqllogictest for DataFusion INSERT INTO by @CTTY in https://github.com/apache/iceberg-rust/pull/1887 +* refactor: Drop smol runtime support by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1900 +* chore(deps): Bump minijinja from 2.12.0 to 2.13.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1909 +* chore(deps): Bump uuid from 1.18.1 to 1.19.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1910 +* feat(core): Add support for `_file` column by @gbrgr in https://github.com/apache/iceberg-rust/pull/1824 +* feat: Make `rest` types public, add documentation by @c-thiel in https://github.com/apache/iceberg-rust/pull/1901 +* chore: bump MSRV to 1.88, fix warnings and clippy errors by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1902 +* ci: Make s3tables ready for publish by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1916 +* deps: bump DataFusion to 51, Arrow to 57, pyo to 0.26 by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1899 +* fix: Serialize `split_offsets` as null when empty by @AndreaBozzo in https://github.com/apache/iceberg-rust/pull/1906 +* feat(catalog): Implement update_table for SqlCatalog by @lgingerich in https://github.com/apache/iceberg-rust/pull/1911 +* fix: Respect precision and scale for Decimal128 in value.rs by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1921 +* fix: restore no-op logic in constants_map for NULL identity-partitioned columns by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1922 +* fix: stack overflow when loading large equality deletes by @dojiong in https://github.com/apache/iceberg-rust/pull/1915 +* chore(deps): Bump actions/upload-artifact from 5 to 6 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1931 +* chore(deps): Bump actions/download-artifact from 6 to 7 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1932 +* Remove wildcard pattern in exhaustive enums by @lgingerich in https://github.com/apache/iceberg-rust/pull/1925 +* fix: prioritize delete manifests to prevent scan deadlock by @dojiong in https://github.com/apache/iceberg-rust/pull/1937 +* feat: Include statistics for Reserved Fields by @Fokko in https://github.com/apache/iceberg-rust/pull/1849 +* fix(website): Update expected messages by @CTTY in https://github.com/apache/iceberg-rust/pull/1942 +* feat: Implement shared delete file loading and caching for ArrowReader by @dojiong in https://github.com/apache/iceberg-rust/pull/1941 +* infra: license header check ignore target/ dir by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1954 +* infra: release script, validate proper ICEBERG_VERSION variable by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1956 +* refactor(arrow): Rename parameter in delete_filter for clarity by @robertmu in https://github.com/apache/iceberg-rust/pull/1955 +* feat(sqllogictest): use serde derived structs for schedule parsing by @AndreaBozzo in https://github.com/apache/iceberg-rust/pull/1953 +* fix: follow IEEE 754 totalOrder for `float` and `double` by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1959 +* fix: return proper error rather than persisting error message on snapshot by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1960 +* feat(arrow): Convert Arrow schema to Iceberg schema with auto assigned field ids by @CTTY in https://github.com/apache/iceberg-rust/pull/1928 +* fix: MemoryCatalog to return absolute NamespaceIdents by @eickler in https://github.com/apache/iceberg-rust/pull/1970 +* fix(spec): Include delete file content to V3 manifest by @CTTY in https://github.com/apache/iceberg-rust/pull/1979 +* fix: fix typo check error by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1989 +* Fix ci audit failure by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/1988 +* feat: make FanoutWriter writer configurable by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1962 + ## [v0.7.0] - 2025-09-23 ### Breaking Changes diff --git a/Cargo.lock b/Cargo.lock index 62478f32a0..77d60a167f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -116,22 +116,22 @@ dependencies = [ [[package]] name = "anstyle-query" -version = "1.1.4" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.10" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -161,14 +161,39 @@ dependencies = [ "serde_bytes", "serde_json", "snap", - "strum 0.27.2", - "strum_macros 0.27.2", + "strum", + "strum_macros", "thiserror 2.0.17", "uuid", "xz2", "zstd", ] +[[package]] +name = "apache-avro" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" +dependencies = [ + "bigdecimal", + "bon", + "digest", + "log", + "miniz_oxide", + "num-bigint", + "quad-rand", + "rand 0.9.2", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "strum", + "strum_macros", + "thiserror 2.0.17", + "uuid", + "zstd", +] + [[package]] name = "ar_archive_writer" version = "0.2.0" @@ -198,9 +223,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3" dependencies = [ "arrow-arith", "arrow-array", @@ -219,23 +244,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num", + "num-traits", ] [[package]] name = "arrow-array" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -244,47 +269,51 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.16.0", - "num", + "hashbrown 0.16.1", + "num-complex", + "num-integer", + "num-traits", ] [[package]] name = "arrow-buffer" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" dependencies = [ "bytes", "half", - "num", + "num-bigint", + "num-traits", ] [[package]] name = "arrow-cast" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", + "arrow-ord", "arrow-schema", "arrow-select", "atoi", - "base64 0.22.1", + "base64", "chrono", "comfy-table", "half", "lexical-core", - "num", + "num-traits", "ryu", ] [[package]] name = "arrow-csv" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" +checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b" dependencies = [ "arrow-array", "arrow-cast", @@ -297,21 +326,22 @@ dependencies = [ [[package]] name = "arrow-data" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num", + "num-integer", + "num-traits", ] [[package]] name = "arrow-ipc" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" dependencies = [ "arrow-array", "arrow-buffer", @@ -325,9 +355,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" +checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0" dependencies = [ "arrow-array", "arrow-buffer", @@ -336,20 +366,22 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.12.0", + "indexmap 2.12.1", + "itoa", "lexical-core", "memchr", - "num", - "serde", + "num-traits", + "ryu", + "serde_core", "serde_json", "simdutf8", ] [[package]] name = "arrow-ord" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" dependencies = [ "arrow-array", "arrow-buffer", @@ -360,9 +392,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f" dependencies = [ "arrow-array", "arrow-buffer", @@ -373,33 +405,33 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" dependencies = [ - "serde", + "serde_core", "serde_json", ] [[package]] name = "arrow-select" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" dependencies = [ "ahash 0.8.12", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num", + "num-traits", ] [[package]] name = "arrow-string" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534" dependencies = [ "arrow-array", "arrow-buffer", @@ -407,7 +439,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num", + "num-traits", "regex", "regex-syntax", ] @@ -440,18 +472,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "async-channel" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "924ed96dd52d1b75e9c1a3e6275715fd320f5f9439fb5a4a11fa51f4221158d2" -dependencies = [ - "concurrent-queue", - "event-listener-strategy", - "futures-core", - "pin-project-lite", -] - [[package]] name = "async-compression" version = "0.4.19" @@ -469,49 +489,6 @@ dependencies = [ "zstd-safe", ] -[[package]] -name = "async-executor" -version = "1.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497c00e0fd83a72a79a39fcbd8e3e2f055d6f6c7e025f3b3d91f4f8e76527fb8" -dependencies = [ - "async-task", - "concurrent-queue", - "fastrand", - "futures-lite", - "pin-project-lite", - "slab", -] - -[[package]] -name = "async-fs" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8034a681df4aed8b8edbd7fbe472401ecf009251c8b40556b304567052e294c5" -dependencies = [ - "async-lock", - "blocking", - "futures-lite", -] - -[[package]] -name = "async-io" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456b8a8feb6f42d237746d4b3e9a178494627745c3c56c6ea55d92ba50d026fc" -dependencies = [ - "autocfg", - "cfg-if", - "concurrent-queue", - "futures-io", - "futures-lite", - "parking", - "polling", - "rustix", - "slab", - "windows-sys 0.61.2", -] - [[package]] name = "async-lock" version = "3.4.1" @@ -523,35 +500,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "async-net" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b948000fad4873c1c9339d60f2623323a0cfd3816e5181033c6a5cb68b2accf7" -dependencies = [ - "async-io", - "blocking", - "futures-lite", -] - -[[package]] -name = "async-process" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc50921ec0055cdd8a16de48773bfeec5c972598674347252c0399676be7da75" -dependencies = [ - "async-channel", - "async-io", - "async-lock", - "async-signal", - "async-task", - "blocking", - "cfg-if", - "event-listener", - "futures-lite", - "rustix", -] - [[package]] name = "async-recursion" version = "1.1.1" @@ -560,33 +508,9 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] -[[package]] -name = "async-signal" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c070bbf59cd3570b6b2dd54cd772527c7c3620fce8be898406dd3ed6adc64c" -dependencies = [ - "async-io", - "async-lock", - "atomic-waker", - "cfg-if", - "futures-core", - "futures-io", - "rustix", - "signal-hook-registry", - "slab", - "windows-sys 0.61.2", -] - -[[package]] -name = "async-task" -version = "4.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" - [[package]] name = "async-trait" version = "0.1.89" @@ -595,7 +519,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -621,9 +545,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.8" +version = "1.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37cf2b6af2a95a20e266782b4f76f1a5e12bf412a9db2de9c1e9123b9d8c0ad8" +checksum = "a0149602eeaf915158e14029ba0c78dedb8c08d554b024d54c8f239aab46511d" dependencies = [ "aws-credential-types", "aws-runtime", @@ -640,7 +564,7 @@ dependencies = [ "bytes", "fastrand", "hex", - "http 1.3.1", + "http 1.4.0", "ring", "time", "tokio", @@ -651,9 +575,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.8" +version = "1.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faf26925f4a5b59eb76722b63c2892b1d70d06fa053c72e4a100ec308c1d47bc" +checksum = "b01c9521fa01558f750d183c8c68c81b0155b9d193a4ba7f84c36bd1b6d04a06" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -663,9 +587,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.14.1" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "879b6c89592deb404ba4dc0ae6b58ffd1795c78991cbb5b8bc441c48a070440d" +checksum = "6b5ce75405893cd713f9ab8e297d8e438f624dde7d706108285f7e17a25a180f" dependencies = [ "aws-lc-sys", "zeroize", @@ -673,11 +597,10 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.32.3" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "107a4e9d9cab9963e04e84bb8dee0e25f2a987f9a8bad5ed054abd439caa8f8c" +checksum = "179c3777a8b5e70e90ea426114ffc565b2c1a9f82f6c4a0c5a34aa6ef5e781b6" dependencies = [ - "bindgen", "cc", "cmake", "dunce", @@ -686,9 +609,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.5.12" +version = "1.5.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa006bb32360ed90ac51203feafb9d02e3d21046e1fd3a450a404b90ea73e5d" +checksum = "7ce527fb7e53ba9626fc47824f25e256250556c40d8f81d27dd92aa38239d632" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -710,9 +633,9 @@ dependencies = [ [[package]] name = "aws-sdk-glue" -version = "1.126.0" +version = "1.132.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd9c10a11584c0b619c9e478143072c4028c39017f98534e206156a7e94188be" +checksum = "35638d8e6ef97adb7f3154ffc618bbe1d631a503b6d8328b94af77b7615fbeb2" dependencies = [ "aws-credential-types", "aws-runtime", @@ -732,9 +655,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3tables" -version = "1.41.0" +version = "1.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "761f176da526badb4c3dbd67ee1da2faf3dc1e537ed229355f7590d80595ae35" +checksum = "ce68b5d4652e6248827e472c67df8773ae6ab3946ff176de8d3ee7c295299efd" dependencies = [ "aws-credential-types", "aws-runtime", @@ -754,9 +677,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.86.0" +version = "1.90.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a0abbfab841446cce6e87af853a3ba2cc1bc9afcd3f3550dd556c43d434c86d" +checksum = "4f18e53542c522459e757f81e274783a78f8c81acdfc8d1522ee8a18b5fb1c66" dependencies = [ "aws-credential-types", "aws-runtime", @@ -776,9 +699,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.89.0" +version = "1.92.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "695dc67bb861ccb8426c9129b91c30e266a0e3d85650cafdf62fcca14c8fd338" +checksum = "532f4d866012ffa724a4385c82e8dd0e59f0ca0e600f3f22d4c03b6824b34e4a" dependencies = [ "aws-credential-types", "aws-runtime", @@ -798,9 +721,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.88.0" +version = "1.94.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d30990923f4f675523c51eb1c0dec9b752fb267b36a61e83cbc219c9d86da715" +checksum = "1be6fbbfa1a57724788853a623378223fe828fc4c09b146c992f0c95b6256174" dependencies = [ "aws-credential-types", "aws-runtime", @@ -821,9 +744,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.3.5" +version = "1.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bffc03068fbb9c8dd5ce1c6fb240678a5cffb86fb2b7b1985c999c4b83c8df68" +checksum = "c35452ec3f001e1f2f6db107b6373f1f48f05ec63ba2c5c9fa91f07dad32af11" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -834,7 +757,7 @@ dependencies = [ "hex", "hmac", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "percent-encoding", "sha2", "time", @@ -843,9 +766,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.6" +version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "127fcfad33b7dfc531141fda7e1c402ac65f88aca5511a4d31e2e3d2cd01ce9c" +checksum = "9ee19095c7c4dda59f1697d028ce704c24b2d33c6718790c7f1d5a3015b4107c" dependencies = [ "futures-util", "pin-project-lite", @@ -854,17 +777,18 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.62.4" +version = "0.62.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3feafd437c763db26aa04e0cc7591185d0961e64c61885bece0fb9d50ceac671" +checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", "bytes-utils", "futures-core", + "futures-util", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "percent-encoding", "pin-project-lite", @@ -874,9 +798,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.1.3" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1053b5e587e6fa40ce5a79ea27957b04ba660baa02b28b7436f64850152234f1" +checksum = "59e62db736db19c488966c8d787f52e6270be565727236fd5579eaa301e7bc4a" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -884,17 +808,17 @@ dependencies = [ "h2 0.3.27", "h2 0.4.12", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "hyper 0.14.32", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-rustls 0.24.2", "hyper-rustls 0.27.7", "hyper-util", "pin-project-lite", "rustls 0.21.12", - "rustls 0.23.34", - "rustls-native-certs 0.8.2", + "rustls 0.23.35", + "rustls-native-certs", "rustls-pki-types", "tokio", "tokio-rustls 0.26.4", @@ -904,27 +828,27 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.61.6" +version = "0.61.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff418fc8ec5cadf8173b10125f05c2e7e1d46771406187b2c878557d4503390" +checksum = "a6864c190cbb8e30cf4b77b2c8f3b6dfffa697a09b7218d2f7cd3d4c4065a9f7" dependencies = [ "aws-smithy-types", ] [[package]] name = "aws-smithy-observability" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d1881b1ea6d313f9890710d65c158bdab6fb08c91ea825f74c1c8c357baf4cc" +checksum = "17f616c3f2260612fe44cede278bafa18e73e6479c4e393e2c4518cf2a9a228a" dependencies = [ "aws-smithy-runtime-api", ] [[package]] name = "aws-smithy-query" -version = "0.60.8" +version = "0.60.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d28a63441360c477465f80c7abac3b9c4d075ca638f982e605b7dc2a2c7156c9" +checksum = "ae5d689cf437eae90460e944a58b5668530d433b4ff85789e69d2f2a556e057d" dependencies = [ "aws-smithy-types", "urlencoding", @@ -932,9 +856,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.3" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ab99739082da5347660c556689256438defae3bcefd66c52b095905730e404" +checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -945,7 +869,7 @@ dependencies = [ "bytes", "fastrand", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "http-body 1.0.1", "pin-project-lite", @@ -956,15 +880,15 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.9.1" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3683c5b152d2ad753607179ed71988e8cfd52964443b4f74fd8e552d0bbfeb46" +checksum = "ab0d43d899f9e508300e587bf582ba54c27a452dd0a9ea294690669138ae14a2" dependencies = [ "aws-smithy-async", "aws-smithy-types", "bytes", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "pin-project-lite", "tokio", "tracing", @@ -973,16 +897,16 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.3.3" +version = "1.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f5b3a7486f6690ba25952cabf1e7d75e34d69eaff5081904a47bc79074d6457" +checksum = "905cb13a9895626d49cf2ced759b062d913834c7482c38e49557eac4e6193f01" dependencies = [ "base64-simd", "bytes", "bytes-utils", "futures-core", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "http-body 1.0.1", "http-body-util", @@ -999,18 +923,18 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.11" +version = "0.60.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9c34127e8c624bc2999f3b657e749c1393bedc9cd97b92a804db8ced4d2e163" +checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "1.3.9" +version = "1.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2fd329bf0e901ff3f60425691410c69094dc2a1f34b331f37bfc4e9ac1565a1" +checksum = "d79fb68e3d7fe5d4833ea34dc87d2e97d26d3086cb3da660bb6b1f76d98680b6" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1031,12 +955,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - [[package]] name = "base64" version = "0.22.1" @@ -1055,9 +973,9 @@ dependencies = [ [[package]] name = "base64ct" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" +checksum = "0e050f626429857a27ddccb31e0aca21356bfa709c04041aefddac081a8f068a" [[package]] name = "bigdecimal" @@ -1079,26 +997,6 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" -[[package]] -name = "bindgen" -version = "0.72.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" -dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "itertools 0.13.0", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "syn 2.0.108", -] - [[package]] name = "bitflags" version = "2.10.0" @@ -1160,19 +1058,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "blocking" -version = "1.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e83f8d02be6967315521be875afa792a316e28d57b5a2d401897e2a7921b7f21" -dependencies = [ - "async-channel", - "async-task", - "futures-io", - "futures-lite", - "piper", -] - [[package]] name = "bon" version = "3.8.1" @@ -1195,14 +1080,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "borsh" -version = "1.5.7" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad8646f98db542e39fc66e68a20b2144f6a732636df7c2354e74645faaa433ce" +checksum = "d1da5ab77c1437701eeff7c88d968729e7766172279eab0676857b3d63af7a6f" dependencies = [ "borsh-derive", "cfg_aliases", @@ -1210,15 +1095,15 @@ dependencies = [ [[package]] name = "borsh-derive" -version = "1.5.7" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd1d3c0c2f5833f22386f252fe8ed005c7f59fdcddeef025c01b4c3b9fd9ac3" +checksum = "0686c856aa6aac0c4498f936d7d6a02df690f614c03e4d906d1018062b5c5e2c" dependencies = [ "once_cell", "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1340,9 +1225,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.43" +version = "1.2.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2" +checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" dependencies = [ "find-msvc-tools", "jobserver", @@ -1350,15 +1235,6 @@ dependencies = [ "shlex", ] -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - [[package]] name = "cfg-if" version = "1.0.4" @@ -1405,22 +1281,11 @@ dependencies = [ "inout", ] -[[package]] -name = "clang-sys" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" -dependencies = [ - "glob", - "libc", - "libloading", -] - [[package]] name = "clap" -version = "4.5.50" +version = "4.5.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623" +checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" dependencies = [ "clap_builder", "clap_derive", @@ -1428,9 +1293,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.50" +version = "4.5.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0" +checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" dependencies = [ "anstream", "anstyle", @@ -1447,7 +1312,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1491,12 +1356,11 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.2" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" dependencies = [ - "strum 0.26.3", - "strum_macros 0.26.4", + "unicode-segmentation", "unicode-width 0.2.2", ] @@ -1554,16 +1418,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" -[[package]] -name = "core-foundation" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "core-foundation" version = "0.10.1" @@ -1591,9 +1445,9 @@ dependencies = [ [[package]] name = "crc" -version = "3.3.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" dependencies = [ "crc-catalog", ] @@ -1663,9 +1517,9 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", @@ -1699,7 +1553,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" dependencies = [ "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1733,7 +1587,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1747,7 +1601,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1758,7 +1612,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1769,7 +1623,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core 0.21.3", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -1788,12 +1642,11 @@ dependencies = [ [[package]] name = "datafusion" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" +checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" dependencies = [ "arrow", - "arrow-ipc", "arrow-schema", "async-trait", "bytes", @@ -1804,6 +1657,7 @@ dependencies = [ "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", + "datafusion-datasource-arrow", "datafusion-datasource-avro", "datafusion-datasource-csv", "datafusion-datasource-json", @@ -1826,7 +1680,6 @@ dependencies = [ "datafusion-sql", "flate2", "futures", - "hex", "itertools 0.14.0", "log", "object_store", @@ -1834,6 +1687,7 @@ dependencies = [ "parquet", "rand 0.9.2", "regex", + "rstest", "sqlparser", "tempfile", "tokio", @@ -1845,9 +1699,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" +checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" dependencies = [ "arrow", "async-trait", @@ -1860,7 +1714,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", - "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -1871,9 +1724,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" +checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" dependencies = [ "arrow", "async-trait", @@ -1883,10 +1736,11 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-plan", - "datafusion-session", "futures", + "itertools 0.14.0", "log", "object_store", "tokio", @@ -1894,16 +1748,18 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a0b9c821d14e79070f42ea3a6d6618ced04d94277f0a32301918d7a022c250f" +checksum = "fab982df44f818a749cb5200504ccb919f4608cb9808daf8b3fb98aa7955fd1e" dependencies = [ "arrow", "async-trait", "aws-config", "aws-credential-types", + "chrono", "clap", "datafusion", + "datafusion-common", "dirs", "env_logger", "futures", @@ -1920,20 +1776,19 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" +checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" dependencies = [ "ahash 0.8.12", - "apache-avro", + "apache-avro 0.20.0", "arrow", "arrow-ipc", - "base64 0.22.1", "chrono", "half", "hashbrown 0.14.5", "hex", - "indexmap 2.12.0", + "indexmap 2.12.1", "libc", "log", "object_store", @@ -1947,9 +1802,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" +checksum = "b92065bbc6532c6651e2f7dd30b55cba0c7a14f860c7e1d15f165c41a1868d95" dependencies = [ "futures", "log", @@ -1958,9 +1813,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" +checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" dependencies = [ "arrow", "async-compression", @@ -1983,9 +1838,7 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "parquet", "rand 0.9.2", - "tempfile", "tokio", "tokio-util", "url", @@ -1994,46 +1847,63 @@ dependencies = [ ] [[package]] -name = "datafusion-datasource-avro" -version = "50.3.0" +name = "datafusion-datasource-arrow" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10d40b6953ebc9099b37adfd12fde97eb73ff0cee44355c6dea64b8a4537d561" +checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" dependencies = [ - "apache-avro", "arrow", + "arrow-ipc", "async-trait", "bytes", - "chrono", - "datafusion-catalog", "datafusion-common", + "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", - "datafusion-physical-expr", + "datafusion-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", - "num-traits", + "itertools 0.14.0", "object_store", "tokio", ] +[[package]] +name = "datafusion-datasource-avro" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388ed8be535f562cc655b9c3d22edbfb0f1a50a25c242647a98b6d92a75b55a1" +dependencies = [ + "apache-avro 0.20.0", + "arrow", + "async-trait", + "bytes", + "datafusion-common", + "datafusion-datasource", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "num-traits", + "object_store", +] + [[package]] name = "datafusion-datasource-csv" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" +checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", @@ -2045,74 +1915,67 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" +checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "object_store", - "serde_json", "tokio", ] [[package]] name = "datafusion-datasource-parquet" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" +checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", - "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-pruning", "datafusion-session", "futures", - "hex", "itertools 0.14.0", "log", "object_store", "parking_lot", "parquet", - "rand 0.9.2", "tokio", ] [[package]] name = "datafusion-doc" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" +checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" [[package]] name = "datafusion-execution" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" +checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" dependencies = [ "arrow", "async-trait", @@ -2131,9 +1994,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" +checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" dependencies = [ "arrow", "async-trait", @@ -2144,7 +2007,8 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.12.0", + "indexmap 2.12.1", + "itertools 0.14.0", "paste", "recursive", "serde_json", @@ -2153,26 +2017,26 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" +checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" dependencies = [ "arrow", "datafusion-common", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-functions" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" +checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" dependencies = [ "arrow", "arrow-buffer", - "base64 0.22.1", + "base64", "blake2", "blake3", "chrono", @@ -2186,6 +2050,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "num-traits", "rand 0.9.2", "regex", "sha2", @@ -2195,9 +2060,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" +checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" dependencies = [ "ahash 0.8.12", "arrow", @@ -2216,9 +2081,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" +checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" dependencies = [ "ahash 0.8.12", "arrow", @@ -2229,9 +2094,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" +checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" dependencies = [ "arrow", "arrow-ord", @@ -2239,6 +2104,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", @@ -2251,9 +2117,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" +checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" dependencies = [ "arrow", "async-trait", @@ -2267,9 +2133,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" +checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" dependencies = [ "arrow", "datafusion-common", @@ -2285,9 +2151,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" +checksum = "53ae9bcc39800820d53a22d758b3b8726ff84a5a3e24cecef04ef4e5fdf1c7cc" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2295,20 +2161,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" +checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ - "datafusion-expr", + "datafusion-doc", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "datafusion-optimizer" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" +checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" dependencies = [ "arrow", "chrono", @@ -2316,7 +2182,7 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.14.0", "log", "recursive", @@ -2326,9 +2192,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" +checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" dependencies = [ "ahash 0.8.12", "arrow", @@ -2339,9 +2205,8 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.14.0", - "log", "parking_lot", "paste", "petgraph 0.8.3", @@ -2349,9 +2214,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" +checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" dependencies = [ "arrow", "datafusion-common", @@ -2364,9 +2229,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" +checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" dependencies = [ "ahash 0.8.12", "arrow", @@ -2378,9 +2243,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" +checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" dependencies = [ "arrow", "datafusion-common", @@ -2392,15 +2257,14 @@ dependencies = [ "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", - "log", "recursive", ] [[package]] name = "datafusion-physical-plan" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" +checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" dependencies = [ "ahash 0.8.12", "arrow", @@ -2419,7 +2283,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.14.0", "log", "parking_lot", @@ -2429,12 +2293,11 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" +checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", @@ -2447,35 +2310,26 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" +checksum = "8f96eebd17555386f459037c65ab73aae8df09f464524c709d6a3134ad4f4776" dependencies = [ - "arrow", "async-trait", - "dashmap", "datafusion-common", - "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", "parking_lot", - "tokio", ] [[package]] name = "datafusion-spark" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613efb6666a7d42fcb922b90cd0daa2b25ea486d141350e5d3e86e46df28309a" +checksum = "97a8d6fed24c80dd403dcc6afec33766a599d1b72575f222237f01429b2e58ba" dependencies = [ "arrow", + "bigdecimal", "chrono", "crc32fast", "datafusion-catalog", @@ -2483,24 +2337,24 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-functions", - "datafusion-macros", "log", + "rand 0.9.2", "sha1", "url", - "xxhash-rust", ] [[package]] name = "datafusion-sql" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" +checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" dependencies = [ "arrow", "bigdecimal", + "chrono", "datafusion-common", "datafusion-expr", - "indexmap 2.12.0", + "indexmap 2.12.1", "log", "recursive", "regex", @@ -2509,9 +2363,9 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17598193dd875ca895400c51ccab1c30fceb1855220dc60aa415a4db7c95a2d7" +checksum = "a6830e357705e0d54fda6e3ce70a87c2b255197563c6463d668520cbfc1e0b7c" dependencies = [ "arrow", "async-trait", @@ -2536,14 +2390,15 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaa011a3814d91a03ab655ad41bbe5e57b203b2859281af8fe2c30aebbbcc5d9" +checksum = "2505af06d103a55b4e8ded0c6aeb6c72a771948da939c0bd3f8eee67af475a9c" dependencies = [ "async-recursion", "async-trait", "chrono", "datafusion", + "half", "itertools 0.14.0", "object_store", "pbjson-types", @@ -2593,7 +2448,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2603,7 +2458,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2653,7 +2508,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2704,7 +2559,7 @@ dependencies = [ "enum-ordinalize", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2745,7 +2600,7 @@ checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -2860,7 +2715,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baec6a0289d7f1fe5665586ef7340af82e3037207bef60f5785e57569776f0c8" dependencies = [ "bytes", - "rkyv 0.8.12", + "rkyv 0.8.13", "serde", "simdutf8", ] @@ -2878,9 +2733,9 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" [[package]] name = "fixedbitset" @@ -2932,6 +2787,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -3027,19 +2888,6 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" -[[package]] -name = "futures-lite" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad" -dependencies = [ - "fastrand", - "futures-core", - "futures-io", - "parking", - "pin-project-lite", -] - [[package]] name = "futures-macro" version = "0.3.31" @@ -3048,7 +2896,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -3063,6 +2911,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -3083,9 +2937,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.9" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -3148,7 +3002,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.12.0", + "indexmap 2.12.1", "slab", "tokio", "tokio-util", @@ -3166,8 +3020,8 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.3.1", - "indexmap 2.12.0", + "http 1.4.0", + "indexmap 2.12.1", "slab", "tokio", "tokio-util", @@ -3213,14 +3067,19 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.1.5", ] [[package]] name = "hashbrown" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "hashlink" @@ -3237,12 +3096,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" - [[package]] name = "hex" version = "0.4.3" @@ -3301,12 +3154,11 @@ dependencies = [ [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -3328,7 +3180,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.3.1", + "http 1.4.0", ] [[package]] @@ -3339,7 +3191,7 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "pin-project-lite", ] @@ -3388,16 +3240,16 @@ dependencies = [ [[package]] name = "hyper" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" dependencies = [ "atomic-waker", "bytes", "futures-channel", "futures-core", "h2 0.4.12", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "httparse", "httpdate", @@ -3420,7 +3272,6 @@ dependencies = [ "hyper 0.14.32", "log", "rustls 0.21.12", - "rustls-native-certs 0.6.3", "tokio", "tokio-rustls 0.24.1", ] @@ -3431,37 +3282,37 @@ version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http 1.3.1", - "hyper 1.7.0", + "http 1.4.0", + "hyper 1.8.1", "hyper-util", - "rustls 0.23.34", - "rustls-native-certs 0.8.2", + "rustls 0.23.35", + "rustls-native-certs", "rustls-pki-types", "tokio", "tokio-rustls 0.26.4", "tower-service", - "webpki-roots 1.0.3", + "webpki-roots 1.0.4", ] [[package]] name = "hyper-util" -version = "0.1.17" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" +checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "futures-channel", "futures-core", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", - "hyper 1.7.0", + "hyper 1.8.1", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.1", + "socket2 0.5.10", "tokio", "tower-service", "tracing", @@ -3493,10 +3344,10 @@ dependencies = [ [[package]] name = "iceberg" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", - "apache-avro", + "apache-avro 0.21.0", "array-init", "arrow-arith", "arrow-array", @@ -3509,7 +3360,7 @@ dependencies = [ "as-any", "async-trait", "backon", - "base64 0.22.1", + "base64", "bimap", "bytes", "chrono", @@ -3543,10 +3394,8 @@ dependencies = [ "serde_json", "serde_repr", "serde_with", - "smol", - "strum 0.27.2", + "strum", "tempfile", - "thrift", "tokio", "typed-builder", "url", @@ -3556,7 +3405,7 @@ dependencies = [ [[package]] name = "iceberg-cache-moka" -version = "0.7.0" +version = "0.8.0" dependencies = [ "iceberg", "moka", @@ -3564,7 +3413,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-glue" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -3581,7 +3430,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-hms" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -3605,7 +3454,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-loader" -version = "0.7.0" +version = "0.8.0" dependencies = [ "async-trait", "iceberg", @@ -3621,12 +3470,12 @@ dependencies = [ [[package]] name = "iceberg-catalog-rest" -version = "0.7.0" +version = "0.8.0" dependencies = [ "async-trait", "chrono", "ctor", - "http 1.3.1", + "http 1.4.0", "iceberg", "iceberg_test_utils", "itertools 0.13.0", @@ -3644,7 +3493,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-s3tables" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -3658,21 +3507,21 @@ dependencies = [ [[package]] name = "iceberg-catalog-sql" -version = "0.7.0" +version = "0.8.0" dependencies = [ "async-trait", "iceberg", "itertools 0.13.0", "regex", "sqlx", - "strum 0.27.2", + "strum", "tempfile", "tokio", ] [[package]] name = "iceberg-datafusion" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -3688,7 +3537,7 @@ dependencies = [ [[package]] name = "iceberg-examples" -version = "0.7.0" +version = "0.8.0" dependencies = [ "futures", "iceberg", @@ -3698,7 +3547,7 @@ dependencies = [ [[package]] name = "iceberg-integration-tests" -version = "0.7.0" +version = "0.8.0" dependencies = [ "arrow-array", "arrow-schema", @@ -3717,7 +3566,7 @@ dependencies = [ [[package]] name = "iceberg-playground" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "clap", @@ -3739,7 +3588,7 @@ dependencies = [ [[package]] name = "iceberg-sqllogictest" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -3761,7 +3610,7 @@ dependencies = [ [[package]] name = "iceberg_test_utils" -version = "0.7.0" +version = "0.8.0" dependencies = [ "tracing", "tracing-subscriber", @@ -3888,21 +3737,21 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "serde", "serde_core", ] [[package]] name = "indicatif" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade6dfcba0dfb62ad59e59e7241ec8912af34fd29e0e743e3db992bd278e8b65" +checksum = "9375e112e4b463ec1b1c6c011953545c65a30164fbab5b581df32b3abf0dcb88" dependencies = [ "console", "portable-atomic", @@ -3945,9 +3794,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" dependencies = [ "memchr", "serde", @@ -3985,26 +3834,43 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jiff" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" dependencies = [ "jiff-static", + "jiff-tzdb-platform", "log", "portable-atomic", "portable-atomic-util", - "serde", + "serde_core", + "windows-sys 0.59.0", ] [[package]] name = "jiff-static" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", +] + +[[package]] +name = "jiff-tzdb" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1283705eb0a21404d2bfd6eef2a7593d240bc42a0bdb39db0ad6fa2ec026524" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" +dependencies = [ + "jiff-tzdb", ] [[package]] @@ -4019,9 +3885,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" dependencies = [ "once_cell", "wasm-bindgen", @@ -4033,7 +3899,7 @@ version = "9.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" dependencies = [ - "base64 0.22.1", + "base64", "js-sys", "pem", "ring", @@ -4116,19 +3982,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.177" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" - -[[package]] -name = "libloading" -version = "0.8.9" +version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" -dependencies = [ - "cfg-if", - "windows-link", -] +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" [[package]] name = "libm" @@ -4182,9 +4038,9 @@ dependencies = [ [[package]] name = "libz-rs-sys" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "840db8cf39d9ec4dd794376f38acc40d0fc65eec2a8f484f7fd375b84602becd" +checksum = "8b484ba8d4f775eeca644c452a56650e544bf7e617f1d170fe7298122ead5222" dependencies = [ "zlib-rs", ] @@ -4229,9 +4085,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "lru-slab" @@ -4241,9 +4097,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" -version = "0.11.5" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" dependencies = [ "twox-hash", ] @@ -4308,19 +4164,13 @@ dependencies = [ [[package]] name = "minijinja" -version = "2.12.0" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9f264d75233323f4b7d2f03aefe8a990690cdebfbfe26ea86bcbaec5e9ac990" +checksum = "0adbe6e92a6ce0fd6c4aac593fdfd3e3950b0f61b1a63aa9731eb6fd85776fa3" dependencies = [ "serde", ] -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "miniz_oxide" version = "0.8.9" @@ -4333,9 +4183,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", "wasi", @@ -4365,25 +4215,26 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "mockito" -version = "1.7.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7760e0e418d9b7e5777c0374009ca4c93861b9066f18cb334a20ce50ab63aa48" +checksum = "7e0603425789b4a70fcc4ac4f5a46a566c116ee3e2a6b768dc623f7719c611de" dependencies = [ "assert-json-diff", "bytes", "colored", - "futures-util", - "http 1.3.1", + "futures-core", + "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-util", "log", + "pin-project-lite", "rand 0.9.2", "regex", "serde_json", @@ -4433,7 +4284,7 @@ checksum = "b40e46c845ac234bcba19db7ab252bc2778cbadd516a466d2f12b1580852d136" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -4459,7 +4310,7 @@ checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -4508,16 +4359,6 @@ dependencies = [ "libc", ] -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -4527,20 +4368,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - [[package]] name = "num-bigint" version = "0.4.6" @@ -4554,11 +4381,10 @@ dependencies = [ [[package]] name = "num-bigint-dig" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" dependencies = [ - "byteorder", "lazy_static", "libm", "num-integer", @@ -4604,17 +4430,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -4625,16 +4440,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "num_enum" version = "0.7.5" @@ -4654,7 +4459,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -4673,24 +4478,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" dependencies = [ "async-trait", - "base64 0.22.1", + "base64", "bytes", "chrono", "form_urlencoded", "futures", - "http 1.3.1", + "http 1.4.0", "http-body-util", "humantime", - "hyper 1.7.0", + "hyper 1.8.1", "itertools 0.14.0", "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.38.3", + "quick-xml 0.38.4", "rand 0.9.2", "reqwest", "ring", - "rustls-pemfile 2.2.0", + "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", @@ -4717,29 +4522,30 @@ checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "opendal" -version = "0.54.1" +version = "0.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42afda58fa2cf50914402d132cc1caacff116a85d10c72ab2082bb7c50021754" +checksum = "d075ab8a203a6ab4bc1bce0a4b9fe486a72bf8b939037f4b78d95386384bc80a" dependencies = [ "anyhow", "backon", - "base64 0.22.1", + "base64", "bytes", - "chrono", "crc32c", "futures", "getrandom 0.2.16", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", + "jiff", "log", "md-5", "percent-encoding", - "quick-xml 0.38.3", + "quick-xml 0.38.4", "reqsign", "reqwest", "serde", "serde_json", "tokio", + "url", "uuid", ] @@ -4828,9 +4634,9 @@ dependencies = [ [[package]] name = "parquet" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" +checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -4840,17 +4646,18 @@ dependencies = [ "arrow-ipc", "arrow-schema", "arrow-select", - "base64 0.22.1", + "base64", "brotli", "bytes", "chrono", "flate2", "futures", "half", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "lz4_flex", - "num", "num-bigint", + "num-integer", + "num-traits", "object_store", "paste", "ring", @@ -4871,31 +4678,31 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "pbjson" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7e6349fa080353f4a597daffd05cb81572a9c031a6d4fff7e504947496fcc68" +checksum = "898bac3fa00d0ba57a4e8289837e965baa2dee8c3749f3b11d45a64b4223d9c3" dependencies = [ - "base64 0.21.7", + "base64", "serde", ] [[package]] name = "pbjson-build" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" +checksum = "af22d08a625a2213a78dbb0ffa253318c5c79ce3133d32d296655a7bdfb02095" dependencies = [ "heck", - "itertools 0.13.0", + "itertools 0.14.0", "prost", "prost-types", ] [[package]] name = "pbjson-types" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e54e5e7bfb1652f95bc361d76f3c780d8e526b134b85417e774166ee941f0887" +checksum = "8e748e28374f10a330ee3bb9f29b828c0ac79831a32bab65015ad9b661ead526" dependencies = [ "bytes", "chrono", @@ -4922,7 +4729,7 @@ version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" dependencies = [ - "base64 0.22.1", + "base64", "serde_core", ] @@ -4948,7 +4755,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", - "indexmap 2.12.0", + "indexmap 2.12.1", ] [[package]] @@ -4959,7 +4766,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.12.0", + "indexmap 2.12.1", "serde", ] @@ -5020,7 +4827,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5035,17 +4842,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "piper" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066" -dependencies = [ - "atomic-waker", - "fastrand", - "futures-io", -] - [[package]] name = "pkcs1" version = "0.7.5" @@ -5090,20 +4886,6 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" -[[package]] -name = "polling" -version = "3.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218" -dependencies = [ - "cfg-if", - "concurrent-queue", - "hermit-abi", - "pin-project-lite", - "rustix", - "windows-sys 0.61.2", -] - [[package]] name = "port_scanner" version = "0.1.5" @@ -5131,7 +4913,7 @@ version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbef655056b916eb868048276cfd5d6a7dea4f81560dfd047f97c8c6fe3fcfd4" dependencies = [ - "base64 0.22.1", + "base64", "byteorder", "bytes", "fallible-iterator", @@ -5221,7 +5003,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5230,7 +5012,7 @@ version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" dependencies = [ - "toml_edit 0.23.7", + "toml_edit 0.23.9", ] [[package]] @@ -5244,9 +5026,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" dependencies = [ "bytes", "prost-derive", @@ -5254,9 +5036,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" +checksum = "ac6c3320f9abac597dcbc668774ef006702672474aad53c6d596b62e487b40b1" dependencies = [ "heck", "itertools 0.14.0", @@ -5268,28 +5050,28 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.108", + "syn 2.0.111", "tempfile", ] [[package]] name = "prost-derive" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" dependencies = [ "anyhow", "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "prost-types" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" dependencies = [ "prost", ] @@ -5341,7 +5123,7 @@ checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5362,9 +5144,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.38.3" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89" +checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" dependencies = [ "memchr", "serde", @@ -5382,8 +5164,8 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.34", - "socket2 0.6.1", + "rustls 0.23.35", + "socket2 0.5.10", "thiserror 2.0.17", "tokio", "tracing", @@ -5402,7 +5184,7 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash", - "rustls 0.23.34", + "rustls 0.23.35", "rustls-pki-types", "slab", "thiserror 2.0.17", @@ -5420,16 +5202,16 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.1", + "socket2 0.5.10", "tracing", "windows-sys 0.59.0", ] [[package]] name = "quote" -version = "1.0.41" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] @@ -5543,7 +5325,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5583,7 +5365,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5623,14 +5405,20 @@ checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "regress" -version = "0.10.4" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145bb27393fe455dd64d6cbc8d059adfa392590a45eadf079c01b11857e7b010" +checksum = "2057b2325e68a893284d1538021ab90279adac1139957ca2a74426c6f118fb48" dependencies = [ - "hashbrown 0.15.5", + "hashbrown 0.16.1", "memchr", ] +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + [[package]] name = "rend" version = "0.4.2" @@ -5654,14 +5442,14 @@ checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" dependencies = [ "anyhow", "async-trait", - "base64 0.22.1", + "base64", "chrono", "form_urlencoded", "getrandom 0.2.16", "hex", "hmac", "home", - "http 1.3.1", + "http 1.4.0", "jsonwebtoken", "log", "once_cell", @@ -5680,19 +5468,19 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.24" +version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "futures-core", "futures-util", "h2 0.4.12", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-rustls 0.27.7", "hyper-util", "js-sys", @@ -5700,8 +5488,8 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.34", - "rustls-native-certs 0.8.2", + "rustls 0.23.35", + "rustls-native-certs", "rustls-pki-types", "serde", "serde_json", @@ -5718,7 +5506,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.3", + "webpki-roots 1.0.4", ] [[package]] @@ -5737,9 +5525,9 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.7.45" +version = "0.7.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1" dependencies = [ "bitvec", "bytecheck", @@ -5747,7 +5535,7 @@ dependencies = [ "hashbrown 0.12.3", "ptr_meta 0.1.4", "rend 0.4.2", - "rkyv_derive 0.7.45", + "rkyv_derive 0.7.46", "seahash", "tinyvec", "uuid", @@ -5755,27 +5543,27 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.8.12" +version = "0.8.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35a640b26f007713818e9a9b65d34da1cf58538207b052916a83d80e43f3ffa4" +checksum = "8b2e88acca7157d83d789836a3987dafc12bc3d88a050e54b8fe9ea4aaa29d20" dependencies = [ "bytes", - "hashbrown 0.15.5", - "indexmap 2.12.0", + "hashbrown 0.16.1", + "indexmap 2.12.1", "munge", "ptr_meta 0.3.1", "rancor", "rend 0.5.3", - "rkyv_derive 0.8.12", + "rkyv_derive 0.8.13", "tinyvec", "uuid", ] [[package]] name = "rkyv_derive" -version = "0.7.45" +version = "0.7.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5" dependencies = [ "proc-macro2", "quote", @@ -5784,13 +5572,13 @@ dependencies = [ [[package]] name = "rkyv_derive" -version = "0.8.12" +version = "0.8.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd83f5f173ff41e00337d97f6572e416d022ef8a19f371817259ae960324c482" +checksum = "7f6dffea3c91fa91a3c0fc8a061b0e27fef25c6304728038a6d6bcb1c58ba9bd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -5805,9 +5593,9 @@ dependencies = [ [[package]] name = "rsa" -version = "0.9.8" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" +checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" dependencies = [ "const-oid", "digest", @@ -5824,6 +5612,35 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rstest" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49" +dependencies = [ + "futures-timer", + "futures-util", + "rstest_macros", +] + +[[package]] +name = "rstest_macros" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0" +dependencies = [ + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn 2.0.111", + "unicode-ident", +] + [[package]] name = "rust-ini" version = "0.21.3" @@ -5846,7 +5663,7 @@ dependencies = [ "num-traits", "postgres-types", "rand 0.8.5", - "rkyv 0.7.45", + "rkyv 0.7.46", "serde", "serde_json", ] @@ -5896,9 +5713,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.34" +version = "0.23.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9586e9ee2b4f8fab52a0048ca7334d7024eef48e2cb9407e3497bb7cab7fa7" +checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ "aws-lc-rs", "once_cell", @@ -5909,18 +5726,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "rustls-native-certs" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" -dependencies = [ - "openssl-probe", - "rustls-pemfile 1.0.4", - "schannel", - "security-framework 2.11.1", -] - [[package]] name = "rustls-native-certs" version = "0.8.2" @@ -5930,16 +5735,7 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework 3.5.1", -] - -[[package]] -name = "rustls-pemfile" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" -dependencies = [ - "base64 0.21.7", + "security-framework", ] [[package]] @@ -5953,9 +5749,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94182ad936a0c91c324cd46c6511b9510ed16af436d7b5bab34beab0afd55f7a" +checksum = "708c0f9d5f54ba0272468c1d306a52c495b31fa155e91bc25371e6df7996908c" dependencies = [ "web-time", "zeroize", @@ -6070,9 +5866,9 @@ dependencies = [ [[package]] name = "schemars" -version = "1.0.4" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" +checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289" dependencies = [ "dyn-clone", "ref-cast", @@ -6089,7 +5885,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6125,19 +5921,6 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" -[[package]] -name = "security-framework" -version = "2.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" -dependencies = [ - "bitflags", - "core-foundation 0.9.4", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - [[package]] name = "security-framework" version = "3.5.1" @@ -6145,7 +5928,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ "bitflags", - "core-foundation 0.10.1", + "core-foundation", "core-foundation-sys", "libc", "security-framework-sys", @@ -6214,7 +5997,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6225,7 +6008,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6249,7 +6032,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6270,7 +6053,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6287,17 +6070,17 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.16.0" +version = "3.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10574371d41b0d9b2cff89418eda27da52bcaff2cc8741db26382a77c29131f1" +checksum = "4fa237f2807440d238e0364a218270b98f767a00d3dada77b1c53ae88940e2e7" dependencies = [ - "base64 0.22.1", + "base64", "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.12.0", + "indexmap 2.12.1", "schemars 0.9.0", - "schemars 1.0.4", + "schemars 1.1.0", "serde_core", "serde_json", "serde_with_macros", @@ -6306,14 +6089,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.16.0" +version = "3.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08a72d8216842fdd57820dc78d840bef99248e35fb2554ff923319e60f2d686b" +checksum = "52a8e3ca0ca629121f70ab50f95249e5a6f925cc0f6ffe8256c45b728875706c" dependencies = [ "darling 0.21.3", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6322,7 +6105,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "itoa", "ryu", "serde", @@ -6368,9 +6151,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.6" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" dependencies = [ "libc", ] @@ -6387,9 +6170,9 @@ dependencies = [ [[package]] name = "simd-adler32" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] name = "simdutf8" @@ -6436,23 +6219,6 @@ dependencies = [ "serde", ] -[[package]] -name = "smol" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33bd3e260892199c3ccfc487c88b2da2265080acb316cd920da72fdfd7c599f" -dependencies = [ - "async-channel", - "async-executor", - "async-fs", - "async-io", - "async-lock", - "async-net", - "async-process", - "blocking", - "futures-lite", -] - [[package]] name = "snap" version = "1.1.1" @@ -6510,9 +6276,9 @@ dependencies = [ [[package]] name = "sonic-simd" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b421f7b6aa4a5de8f685aaf398dfaa828346ee639d2b1c1061ab43d40baa6223" +checksum = "5707edbfb34a40c9f2a55fa09a49101d9fec4e0cc171ce386086bd9616f34257" dependencies = [ "cfg-if", ] @@ -6563,9 +6329,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.58.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", "recursive", @@ -6580,7 +6346,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6602,7 +6368,7 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "crc", "crossbeam-queue", @@ -6614,12 +6380,12 @@ dependencies = [ "futures-util", "hashbrown 0.15.5", "hashlink", - "indexmap 2.12.0", + "indexmap 2.12.1", "log", "memchr", "once_cell", "percent-encoding", - "rustls 0.23.34", + "rustls 0.23.35", "serde", "serde_json", "sha2", @@ -6642,7 +6408,7 @@ dependencies = [ "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6663,7 +6429,7 @@ dependencies = [ "sha2", "sqlx-core", "sqlx-sqlite", - "syn 2.0.108", + "syn 2.0.111", "tokio", "url", ] @@ -6675,7 +6441,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" dependencies = [ "atoi", - "base64 0.22.1", + "base64", "bitflags", "byteorder", "bytes", @@ -6716,7 +6482,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" dependencies = [ "atoi", - "base64 0.22.1", + "base64", "bitflags", "byteorder", "crc", @@ -6806,32 +6572,13 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" - [[package]] name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" dependencies = [ - "strum_macros 0.27.2", -] - -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.108", + "strum_macros", ] [[package]] @@ -6843,7 +6590,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6858,9 +6605,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.58.0" +version = "0.62.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de6d24c270c6c672a86c183c3a8439ba46c1936f93cf7296aa692de3b0ff0228" +checksum = "21f1cb6d0bcd097a39fc25f7236236be29881fe122e282e4173d6d007a929927" dependencies = [ "heck", "pbjson", @@ -6876,7 +6623,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.108", + "syn 2.0.111", "typify", "walkdir", ] @@ -6900,9 +6647,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.108" +version = "2.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" dependencies = [ "proc-macro2", "quote", @@ -6926,7 +6673,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6986,7 +6733,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -6997,7 +6744,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7009,15 +6756,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "threadpool" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" -dependencies = [ - "num_cpus", -] - [[package]] name = "thrift" version = "0.17.0" @@ -7026,9 +6764,7 @@ checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" dependencies = [ "byteorder", "integer-encoding 3.0.4", - "log", "ordered-float 2.10.1", - "threadpool", ] [[package]] @@ -7121,7 +6857,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7140,7 +6876,7 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls 0.23.34", + "rustls 0.23.35", "tokio", ] @@ -7157,9 +6893,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.16" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" dependencies = [ "bytes", "futures-core", @@ -7204,7 +6940,7 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "serde", "serde_spanned", "toml_datetime 0.6.11", @@ -7214,11 +6950,11 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.23.7" +version = "0.23.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +checksum = "5d7cbc3b4b49633d57a0509303158ca50de80ae32c265093b24c414705807832" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "toml_datetime 0.7.3", "toml_parser", "winnow", @@ -7256,14 +6992,14 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ "bitflags", "bytes", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "iri-string", "pin-project-lite", @@ -7286,9 +7022,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" dependencies = [ "log", "pin-project-lite", @@ -7298,20 +7034,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" dependencies = [ "once_cell", "valuable", @@ -7330,9 +7066,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.20" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" dependencies = [ "nu-ansi-term", "sharded-slab", @@ -7371,7 +7107,7 @@ checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7382,9 +7118,9 @@ checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "typify" -version = "0.4.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7144144e97e987c94758a3017c920a027feac0799df325d6df4fc8f08d02068e" +checksum = "e6d5bcc6f62eb1fa8aa4098f39b29f93dcb914e17158b76c50360911257aa629" dependencies = [ "typify-impl", "typify-macro", @@ -7392,9 +7128,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.4.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "062879d46aa4c9dfe0d33b035bbaf512da192131645d05deacb7033ec8581a09" +checksum = "a1eb359f7ffa4f9ebe947fa11a1b2da054564502968db5f317b7e37693cb2240" dependencies = [ "heck", "log", @@ -7405,16 +7141,16 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.108", + "syn 2.0.111", "thiserror 2.0.17", "unicode-ident", ] [[package]] name = "typify-macro" -version = "0.4.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9708a3ceb6660ba3f8d2b8f0567e7d4b8b198e2b94d093b8a6077a751425de9e" +checksum = "911c32f3c8514b048c1b228361bebb5e6d73aeec01696e8cc0e82e2ffef8ab7a" dependencies = [ "proc-macro2", "quote", @@ -7423,7 +7159,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.108", + "syn 2.0.111", "typify-impl", ] @@ -7435,24 +7171,24 @@ checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] name = "unicode-ident" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "unicode-normalization" -version = "0.1.24" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" dependencies = [ "tinyvec", ] [[package]] name = "unicode-properties" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" [[package]] name = "unicode-segmentation" @@ -7474,9 +7210,9 @@ checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "unit-prefix" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "323402cff2dd658f39ca17c789b502021b3f18707c91cdf22e3838e1b4023817" +checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" [[package]] name = "unsafe-libyaml" @@ -7522,13 +7258,13 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.18.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" dependencies = [ "getrandom 0.3.4", "js-sys", - "serde", + "serde_core", "wasm-bindgen", ] @@ -7654,9 +7390,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ "cfg-if", "once_cell", @@ -7667,9 +7403,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.55" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", @@ -7680,9 +7416,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -7690,22 +7426,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" dependencies = [ "unicode-ident", ] @@ -7725,9 +7461,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", @@ -7749,14 +7485,14 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.3", + "webpki-roots 1.0.4", ] [[package]] name = "webpki-roots" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b130c0d2d49f8b6889abc456e795e82525204f27c42cf767cf0d7734e089b8" +checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" dependencies = [ "rustls-pki-types", ] @@ -7801,7 +7537,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -7812,7 +7548,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -8072,9 +7808,9 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.13" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" dependencies = [ "memchr", ] @@ -8106,12 +7842,6 @@ version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" -[[package]] -name = "xxhash-rust" -version = "0.8.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" - [[package]] name = "xz2" version = "0.1.7" @@ -8146,28 +7876,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.27" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.27" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] @@ -8187,7 +7917,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", "synstructure", ] @@ -8227,14 +7957,14 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.111", ] [[package]] name = "zlib-rs" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f06ae92f42f5e5c42443fd094f245eb656abf56dd7cce9b8b263236565e00f2" +checksum = "36134c44663532e6519d7a6dfdbbe06f6f8192bde8ae9ed076e9b213f0e31df7" [[package]] name = "zstd" diff --git a/Cargo.toml b/Cargo.toml index c10c01d94a..56cd1801cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,25 +31,25 @@ resolver = "2" [workspace.package] edition = "2024" homepage = "https://rust.iceberg.apache.org/" -version = "0.7.0" +version = "0.8.0" license = "Apache-2.0" repository = "https://github.com/apache/iceberg-rust" # Check the MSRV policy in README.md before changing this -rust-version = "1.87" +rust-version = "1.88" [workspace.dependencies] anyhow = "1.0.72" -apache-avro = { version = "0.20", features = ["zstandard"] } +apache-avro = { version = "0.21", features = ["zstandard"] } array-init = "2" -arrow-arith = "56.2" -arrow-array = "56.2" -arrow-buffer = "56.2" -arrow-cast = "56.2" -arrow-ord = "56.2" -arrow-schema = "56.2" -arrow-select = "56.2" -arrow-string = "56.2" +arrow-arith = "57.0" +arrow-array = "57.0" +arrow-buffer = "57.0" +arrow-cast = "57.0" +arrow-ord = "57.0" +arrow-schema = "57.0" +arrow-select = "57.0" +arrow-string = "57.0" as-any = "0.3.2" async-trait = "0.1.89" aws-config = "1.8.7" @@ -62,9 +62,9 @@ bytes = "1.10" chrono = "0.4.41" clap = { version = "4.5.48", features = ["derive", "cargo"] } ctor = "0.2.8" -datafusion = "50" -datafusion-cli = "50" -datafusion-sqllogictest = "50" +datafusion = "51.0" +datafusion-cli = "51.0" +datafusion-sqllogictest = "51.0" derive_builder = "0.20" dirs = "6" enum-ordinalize = "4.3.0" @@ -78,13 +78,13 @@ futures = "0.3" hive_metastore = "0.2.0" home = "=0.5.11" http = "1.2" -iceberg = { version = "0.7.0", path = "./crates/iceberg" } -iceberg-catalog-glue = { version = "0.7.0", path = "./crates/catalog/glue" } -iceberg-catalog-hms = { version = "0.7.0", path = "./crates/catalog/hms" } -iceberg-catalog-sql = { version = "0.7.0", path = "./crates/catalog/sql" } -iceberg-catalog-rest = { version = "0.7.0", path = "./crates/catalog/rest" } -iceberg-catalog-s3tables = { version = "0.7.0", path = "./crates/catalog/s3tables" } -iceberg-datafusion = { version = "0.7.0", path = "./crates/integrations/datafusion" } +iceberg = { version = "0.8.0", path = "./crates/iceberg" } +iceberg-catalog-glue = { version = "0.8.0", path = "./crates/catalog/glue" } +iceberg-catalog-hms = { version = "0.8.0", path = "./crates/catalog/hms" } +iceberg-catalog-rest = { version = "0.8.0", path = "./crates/catalog/rest" } +iceberg-catalog-s3tables = { version = "0.8.0", path = "./crates/catalog/s3tables" } +iceberg-catalog-sql = { version = "0.8.0", path = "./crates/catalog/sql" } +iceberg-datafusion = { version = "0.8.0", path = "./crates/integrations/datafusion" } indicatif = "0.18" itertools = "0.13" libtest-mimic = "0.8.1" @@ -99,9 +99,9 @@ motore-macros = "0.4.3" murmur3 = "0.5.2" num-bigint = "0.4.6" once_cell = "1.20" -opendal = "0.54.0" +opendal = "0.55.0" ordered-float = "4" -parquet = "56.2" +parquet = "57.0" pilota = "0.11.10" port_scanner = "0.1.5" pretty_assertions = "1.4" @@ -109,14 +109,13 @@ rand = "0.8.5" regex = "1.11.3" reqwest = { version = "0.12.12", default-features = false, features = ["json"] } roaring = { version = "0.11" } -rust_decimal = "1.37.2" +rust_decimal = { version = "1.39", default-features = false, features = ["std"] } serde = { version = "1.0.219", features = ["rc"] } serde_bytes = "0.11.17" serde_derive = "1.0.219" serde_json = "1.0.142" serde_repr = "0.1.16" serde_with = "3.4" -smol = "2.0.2" sqllogictest = "0.28.3" sqlx = { version = "0.8.1", default-features = false } stacker = "0.1.20" @@ -132,4 +131,4 @@ url = "2.5.7" uuid = { version = "1.18", features = ["v7"] } volo = "0.10.6" volo-thrift = "0.10.8" -zstd = "0.13.3" +zstd = "0.13.3" \ No newline at end of file diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 8249414b8d..d33abed581 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -128,9 +128,9 @@ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "apache-avro" -version = "0.20.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a033b4ced7c585199fb78ef50fca7fe2f444369ec48080c5fd072efa1a03cc7" +checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" dependencies = [ "bigdecimal", "bon", @@ -180,9 +180,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +checksum = "4df8bb5b0bd64c0b9bc61317fcc480bad0f00e56d3bc32c69a4c8dada4786bae" dependencies = [ "arrow-arith", "arrow-array", @@ -202,23 +202,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +checksum = "a1a640186d3bd30a24cb42264c2dafb30e236a6f50d510e56d40b708c9582491" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num", + "num-traits", ] [[package]] name = "arrow-array" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +checksum = "219fe420e6800979744c8393b687afb0252b3f8a89b91027d27887b72aa36d31" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -228,25 +228,28 @@ dependencies = [ "chrono-tz", "half", "hashbrown 0.16.0", - "num", + "num-complex", + "num-integer", + "num-traits", ] [[package]] name = "arrow-buffer" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +checksum = "76885a2697a7edf6b59577f568b456afc94ce0e2edc15b784ce3685b6c3c5c27" dependencies = [ "bytes", "half", - "num", + "num-bigint", + "num-traits", ] [[package]] name = "arrow-cast" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +checksum = "9c9ebb4c987e6b3b236fb4a14b20b34835abfdd80acead3ccf1f9bf399e1f168" dependencies = [ "arrow-array", "arrow-buffer", @@ -259,15 +262,15 @@ dependencies = [ "comfy-table", "half", "lexical-core", - "num", + "num-traits", "ryu", ] [[package]] name = "arrow-csv" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" +checksum = "92386159c8d4bce96f8bd396b0642a0d544d471bdc2ef34d631aec80db40a09c" dependencies = [ "arrow-array", "arrow-cast", @@ -280,21 +283,22 @@ dependencies = [ [[package]] name = "arrow-data" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +checksum = "727681b95de313b600eddc2a37e736dcb21980a40f640314dcf360e2f36bc89b" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num", + "num-integer", + "num-traits", ] [[package]] name = "arrow-ipc" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +checksum = "da9ba92e3de170295c98a84e5af22e2b037f0c7b32449445e6c493b5fca27f27" dependencies = [ "arrow-array", "arrow-buffer", @@ -308,9 +312,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" +checksum = "b969b4a421ae83828591c6bf5450bd52e6d489584142845ad6a861f42fe35df8" dependencies = [ "arrow-array", "arrow-buffer", @@ -320,19 +324,21 @@ dependencies = [ "chrono", "half", "indexmap 2.12.0", + "itoa", "lexical-core", "memchr", - "num", - "serde", + "num-traits", + "ryu", + "serde_core", "serde_json", "simdutf8", ] [[package]] name = "arrow-ord" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +checksum = "141c05298b21d03e88062317a1f1a73f5ba7b6eb041b350015b1cd6aabc0519b" dependencies = [ "arrow-array", "arrow-buffer", @@ -343,9 +349,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d924b32e96f8bb74d94cd82bd97b313c432fcb0ea331689ef9e7c6b8be4b258" +checksum = "cfcfb2be2e9096236f449c11f425cddde18c4cc540f516d90f066f10a29ed515" dependencies = [ "arrow-array", "arrow-data", @@ -355,9 +361,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +checksum = "c5f3c06a6abad6164508ed283c7a02151515cef3de4b4ff2cebbcaeb85533db2" dependencies = [ "arrow-array", "arrow-buffer", @@ -368,34 +374,34 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5" dependencies = [ "bitflags", - "serde", + "serde_core", "serde_json", ] [[package]] name = "arrow-select" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +checksum = "bafa595babaad59f2455f4957d0f26448fb472722c186739f4fac0823a1bdb47" dependencies = [ "ahash 0.8.12", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num", + "num-traits", ] [[package]] name = "arrow-string" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +checksum = "32f46457dbbb99f2650ff3ac23e46a929e0ab81db809b02aa5511c258348bef2" dependencies = [ "arrow-array", "arrow-buffer", @@ -403,7 +409,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num", + "num-traits", "regex", "regex-syntax", ] @@ -662,8 +668,20 @@ version = "0.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" dependencies = [ - "bytecheck_derive", - "ptr_meta", + "bytecheck_derive 0.6.12", + "ptr_meta 0.1.4", + "simdutf8", +] + +[[package]] +name = "bytecheck" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0caa33a2c0edca0419d15ac723dff03f1956f7978329b1e3b5fdaaaed9d3ca8b" +dependencies = [ + "bytecheck_derive 0.8.2", + "ptr_meta 0.3.1", + "rancor", "simdutf8", ] @@ -678,6 +696,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "bytecheck_derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89385e82b5d1821d2219e0b095efa2cc1f246cbf99080f3be46a1a85c0d392d9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "bytemuck" version = "1.24.0" @@ -1028,12 +1057,11 @@ dependencies = [ [[package]] name = "datafusion" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" +checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" dependencies = [ "arrow", - "arrow-ipc", "arrow-schema", "async-trait", "bytes", @@ -1044,6 +1072,7 @@ dependencies = [ "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", + "datafusion-datasource-arrow", "datafusion-datasource-csv", "datafusion-datasource-json", "datafusion-datasource-parquet", @@ -1072,6 +1101,7 @@ dependencies = [ "parquet", "rand 0.9.2", "regex", + "rstest", "sqlparser", "tempfile", "tokio", @@ -1083,9 +1113,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" +checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" dependencies = [ "arrow", "async-trait", @@ -1098,7 +1128,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", - "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -1109,9 +1138,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" +checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" dependencies = [ "arrow", "async-trait", @@ -1121,10 +1150,11 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-plan", - "datafusion-session", "futures", + "itertools 0.14.0", "log", "object_store", "tokio", @@ -1132,14 +1162,13 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" +checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" dependencies = [ "ahash 0.8.12", "arrow", "arrow-ipc", - "base64", "chrono", "half", "hashbrown 0.14.5", @@ -1157,9 +1186,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" +checksum = "b92065bbc6532c6651e2f7dd30b55cba0c7a14f860c7e1d15f165c41a1868d95" dependencies = [ "futures", "log", @@ -1168,9 +1197,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" +checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" dependencies = [ "arrow", "async-compression", @@ -1193,9 +1222,7 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "parquet", "rand 0.9.2", - "tempfile", "tokio", "tokio-util", "url", @@ -1203,22 +1230,44 @@ dependencies = [ "zstd", ] +[[package]] +name = "datafusion-datasource-arrow" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" +dependencies = [ + "arrow", + "arrow-ipc", + "async-trait", + "bytes", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", + "object_store", + "tokio", +] + [[package]] name = "datafusion-datasource-csv" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" +checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", @@ -1230,49 +1279,44 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" +checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "object_store", - "serde_json", "tokio", ] [[package]] name = "datafusion-datasource-parquet" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" +checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", - "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-pruning", "datafusion-session", @@ -1282,21 +1326,20 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand 0.9.2", "tokio", ] [[package]] name = "datafusion-doc" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" +checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" [[package]] name = "datafusion-execution" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" +checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" dependencies = [ "arrow", "async-trait", @@ -1314,9 +1357,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" +checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" dependencies = [ "arrow", "async-trait", @@ -1328,6 +1371,7 @@ dependencies = [ "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap 2.12.0", + "itertools 0.14.0", "paste", "recursive", "serde_json", @@ -1336,9 +1380,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" +checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" dependencies = [ "arrow", "datafusion-common", @@ -1349,9 +1393,9 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ddb7c4e645df080c27dad13a198d191da328dd1c98e198664a7a0f64b335cc" +checksum = "ec510e7787641279b0336e8b79e4b7bd1385d5976875ff9b97f4269ce5231a67" dependencies = [ "abi_stable", "arrow", @@ -1359,6 +1403,7 @@ dependencies = [ "async-ffi", "async-trait", "datafusion", + "datafusion-common", "datafusion-functions-aggregate-common", "datafusion-proto", "datafusion-proto-common", @@ -1371,9 +1416,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" +checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" dependencies = [ "arrow", "arrow-buffer", @@ -1391,6 +1436,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "num-traits", "rand 0.9.2", "regex", "sha2", @@ -1400,9 +1446,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" +checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" dependencies = [ "ahash 0.8.12", "arrow", @@ -1421,9 +1467,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" +checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" dependencies = [ "ahash 0.8.12", "arrow", @@ -1434,9 +1480,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" +checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" dependencies = [ "arrow", "arrow-ord", @@ -1444,6 +1490,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", @@ -1456,9 +1503,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" +checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" dependencies = [ "arrow", "async-trait", @@ -1472,9 +1519,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" +checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" dependencies = [ "arrow", "datafusion-common", @@ -1490,9 +1537,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" +checksum = "53ae9bcc39800820d53a22d758b3b8726ff84a5a3e24cecef04ef4e5fdf1c7cc" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1500,20 +1547,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" +checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ - "datafusion-expr", + "datafusion-doc", "quote", "syn 2.0.108", ] [[package]] name = "datafusion-optimizer" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" +checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" dependencies = [ "arrow", "chrono", @@ -1531,9 +1578,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" +checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" dependencies = [ "ahash 0.8.12", "arrow", @@ -1546,7 +1593,6 @@ dependencies = [ "hashbrown 0.14.5", "indexmap 2.12.0", "itertools 0.14.0", - "log", "parking_lot", "paste", "petgraph", @@ -1554,9 +1600,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" +checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" dependencies = [ "arrow", "datafusion-common", @@ -1569,9 +1615,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" +checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" dependencies = [ "ahash 0.8.12", "arrow", @@ -1583,9 +1629,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" +checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" dependencies = [ "arrow", "datafusion-common", @@ -1597,15 +1643,14 @@ dependencies = [ "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", - "log", "recursive", ] [[package]] name = "datafusion-physical-plan" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" +checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" dependencies = [ "ahash 0.8.12", "arrow", @@ -1634,15 +1679,26 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7df9f606892e6af45763d94d210634eec69b9bb6ced5353381682ff090028a3" +checksum = "d368093a98a17d1449b1083ac22ed16b7128e4c67789991869480d8c4a40ecb9" dependencies = [ "arrow", "chrono", - "datafusion", + "datafusion-catalog", + "datafusion-catalog-listing", "datafusion-common", + "datafusion-datasource", + "datafusion-datasource-arrow", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", + "datafusion-execution", "datafusion-expr", + "datafusion-functions-table", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", "datafusion-proto-common", "object_store", "prost", @@ -1650,9 +1706,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4b14f288ca4ef77743d9672cafecf3adfffff0b9b04af9af79ecbeaaf736901" +checksum = "3b6aef3d5e5c1d2bc3114c4876730cb76a9bdc5a8df31ef1b6db48f0c1671895" dependencies = [ "arrow", "datafusion-common", @@ -1661,12 +1717,11 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" +checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", @@ -1679,36 +1734,27 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" +checksum = "8f96eebd17555386f459037c65ab73aae8df09f464524c709d6a3134ad4f4776" dependencies = [ - "arrow", "async-trait", - "dashmap", "datafusion-common", - "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", "parking_lot", - "tokio", ] [[package]] name = "datafusion-sql" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" +checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" dependencies = [ "arrow", "bigdecimal", + "chrono", "datafusion-common", "datafusion-expr", "indexmap 2.12.0", @@ -1993,6 +2039,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -2127,12 +2179,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" - [[package]] name = "hex" version = "0.4.3" @@ -2291,7 +2337,7 @@ dependencies = [ [[package]] name = "iceberg" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "apache-avro", @@ -2313,6 +2359,7 @@ dependencies = [ "chrono", "derive_builder", "expect-test", + "flate2", "fnv", "futures", "itertools 0.13.0", @@ -2326,6 +2373,7 @@ dependencies = [ "rand 0.8.5", "reqsign", "reqwest", + "rkyv 0.8.13", "roaring", "rust_decimal", "serde", @@ -2335,7 +2383,6 @@ dependencies = [ "serde_repr", "serde_with", "strum 0.27.2", - "thrift", "tokio", "typed-builder", "url", @@ -2345,7 +2392,7 @@ dependencies = [ [[package]] name = "iceberg-datafusion" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -2543,6 +2590,47 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jiff" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" +dependencies = [ + "jiff-static", + "jiff-tzdb-platform", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", + "windows-sys 0.61.2", +] + +[[package]] +name = "jiff-static" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + +[[package]] +name = "jiff-tzdb" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" +dependencies = [ + "jiff-tzdb", +] + [[package]] name = "jobserver" version = "0.1.34" @@ -2778,25 +2866,31 @@ dependencies = [ ] [[package]] -name = "murmur3" -version = "0.5.2" +name = "munge" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b" +checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c" +dependencies = [ + "munge_macro", +] [[package]] -name = "num" -version = "0.4.3" +name = "munge_macro" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931" dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", + "proc-macro2", + "quote", + "syn 2.0.108", ] +[[package]] +name = "murmur3" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b" + [[package]] name = "num-bigint" version = "0.4.6" @@ -2832,28 +2926,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -2864,16 +2936,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "object" version = "0.32.2" @@ -2915,20 +2977,20 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "opendal" -version = "0.54.1" +version = "0.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42afda58fa2cf50914402d132cc1caacff116a85d10c72ab2082bb7c50021754" +checksum = "d075ab8a203a6ab4bc1bce0a4b9fe486a72bf8b939037f4b78d95386384bc80a" dependencies = [ "anyhow", "backon", "base64", "bytes", - "chrono", "crc32c", "futures", "getrandom 0.2.16", "http", "http-body", + "jiff", "log", "md-5", "percent-encoding", @@ -2938,6 +3000,7 @@ dependencies = [ "serde", "serde_json", "tokio", + "url", "uuid", ] @@ -3000,9 +3063,9 @@ dependencies = [ [[package]] name = "parquet" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" +checksum = "7a0f31027ef1af7549f7cec603a9a21dce706d3f8d7c2060a68f43c1773be95a" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -3021,11 +3084,11 @@ dependencies = [ "half", "hashbrown 0.16.0", "lz4_flex", - "num", "num-bigint", + "num-integer", + "num-traits", "object_store", "paste", - "ring", "seq-macro", "simdutf8", "snap", @@ -3101,6 +3164,15 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + [[package]] name = "potential_utf" version = "0.1.4" @@ -3155,9 +3227,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" dependencies = [ "bytes", "prost-derive", @@ -3165,9 +3237,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" dependencies = [ "anyhow", "itertools 0.14.0", @@ -3192,7 +3264,16 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" dependencies = [ - "ptr_meta_derive", + "ptr_meta_derive 0.1.4", +] + +[[package]] +name = "ptr_meta" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79" +dependencies = [ + "ptr_meta_derive 0.3.1", ] [[package]] @@ -3206,23 +3287,35 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "ptr_meta_derive" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "pyiceberg_core_rust" -version = "0.7.0" +version = "0.8.0" dependencies = [ "arrow", "datafusion-ffi", "iceberg", "iceberg-datafusion", "pyo3", + "rust_decimal", "tokio", ] [[package]] name = "pyo3" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a" +checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" dependencies = [ "indoc", "libc", @@ -3237,19 +3330,18 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "458eb0c55e7ece017adeba38f2248ff3ac615e53660d7c71a238d7d2a01c7598" +checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" dependencies = [ - "once_cell", "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7114fe5457c61b276ab77c5055f206295b812608083644a5c5b2640c3102565c" +checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" dependencies = [ "libc", "pyo3-build-config", @@ -3257,9 +3349,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8725c0a622b374d6cb051d11a0983786448f7785336139c3c94f5aa6bef7e50" +checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -3269,9 +3361,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4109984c22491085343c05b0dbc54ddc405c3cf7b4374fc533f5c3313a572ccc" +checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" dependencies = [ "heck", "proc-macro2", @@ -3382,6 +3474,15 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "rancor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a063ea72381527c2a0561da9c80000ef822bdd7c3241b1cc1b12100e3df081ee" +dependencies = [ + "ptr_meta 0.3.1", +] + [[package]] name = "rand" version = "0.8.5" @@ -3525,13 +3626,28 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + [[package]] name = "rend" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" dependencies = [ - "bytecheck", + "bytecheck 0.6.12", +] + +[[package]] +name = "rend" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cadadef317c2f20755a64d7fdc48f9e7178ee6b0e1f7fce33fa60f1d68a276e6" +dependencies = [ + "bytecheck 0.8.2", ] [[package]] @@ -3634,17 +3750,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" dependencies = [ "bitvec", - "bytecheck", + "bytecheck 0.6.12", "bytes", "hashbrown 0.12.3", - "ptr_meta", - "rend", - "rkyv_derive", + "ptr_meta 0.1.4", + "rend 0.4.2", + "rkyv_derive 0.7.45", "seahash", "tinyvec", "uuid", ] +[[package]] +name = "rkyv" +version = "0.8.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2e88acca7157d83d789836a3987dafc12bc3d88a050e54b8fe9ea4aaa29d20" +dependencies = [ + "bytecheck 0.8.2", + "bytes", + "hashbrown 0.16.0", + "indexmap 2.12.0", + "munge", + "ptr_meta 0.3.1", + "rancor", + "rend 0.5.3", + "rkyv_derive 0.8.13", + "tinyvec", + "uuid", +] + [[package]] name = "rkyv_derive" version = "0.7.45" @@ -3656,6 +3791,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "rkyv_derive" +version = "0.8.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6dffea3c91fa91a3c0fc8a061b0e27fef25c6304728038a6d6bcb1c58ba9bd" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "roaring" version = "0.11.2" @@ -3666,6 +3812,35 @@ dependencies = [ "byteorder", ] +[[package]] +name = "rstest" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49" +dependencies = [ + "futures-timer", + "futures-util", + "rstest_macros", +] + +[[package]] +name = "rstest_macros" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0" +dependencies = [ + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn 2.0.108", + "unicode-ident", +] + [[package]] name = "rust-ini" version = "0.21.3" @@ -3687,7 +3862,7 @@ dependencies = [ "bytes", "num-traits", "rand 0.8.5", - "rkyv", + "rkyv 0.7.45", "serde", "serde_json", ] @@ -4007,9 +4182,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.58.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", "recursive", @@ -4191,15 +4366,6 @@ dependencies = [ "syn 2.0.108", ] -[[package]] -name = "threadpool" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" -dependencies = [ - "num_cpus", -] - [[package]] name = "thrift" version = "0.17.0" @@ -4208,9 +4374,7 @@ checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" dependencies = [ "byteorder", "integer-encoding", - "log", "ordered-float 2.10.1", - "threadpool", ] [[package]] diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 953d4a98fc..8346d02703 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -19,8 +19,8 @@ edition = "2024" homepage = "https://rust.iceberg.apache.org" name = "pyiceberg_core_rust" -rust-version = "1.87" -version = "0.7.0" +rust-version = "1.88" +version = "0.8.0" # This crate is used to build python bindings, we don't want to publish it publish = false @@ -31,12 +31,14 @@ license = "Apache-2.0" crate-type = ["cdylib"] [dependencies] -arrow = { version = "56", features = ["pyarrow", "chrono-tz"] } +arrow = { version = "57.0", features = ["pyarrow", "chrono-tz"] } iceberg = { path = "../../crates/iceberg" } -pyo3 = { version = "0.25", features = ["extension-module", "abi3-py310"] } +pyo3 = { version = "0.26", features = ["extension-module", "abi3-py310"] } iceberg-datafusion = { path = "../../crates/integrations/datafusion" } -datafusion-ffi = { version = "50" } +datafusion-ffi = { version = "51.0" } tokio = { version = "1.46.1", default-features = false } +# Security: disable rkyv feature to avoid RUSTSEC-2026-0001 (rkyv 0.7.45 vulnerability) +rust_decimal = { version = "1.39", default-features = false, features = ["std"] } [profile.release] codegen-units = 1 @@ -44,3 +46,7 @@ debug = false lto = "thin" opt-level = "z" strip = true + +[package.metadata.cargo-machete] +# rust_decimal is included to override feature flags for security (disable rkyv) +ignored = ["rust_decimal"] diff --git a/bindings/python/DEPENDENCIES.rust.tsv b/bindings/python/DEPENDENCIES.rust.tsv index 7565a13e1d..5fe0da5b2a 100644 --- a/bindings/python/DEPENDENCIES.rust.tsv +++ b/bindings/python/DEPENDENCIES.rust.tsv @@ -1,445 +1,439 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib -abi_stable@0.11.3 X X -abi_stable_derive@0.11.3 X X -abi_stable_shared@0.11.0 X X -addr2line@0.24.2 X X -adler2@2.0.1 X X X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X -array-init@2.1.0 X X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-pyarrow@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -as-any@0.3.2 X X -as_derive_utils@0.11.0 X X -async-compression@0.4.19 X X -async-ffi@0.5.0 X -async-lock@3.4.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.22.1 X X -bigdecimal@0.4.8 X X -bimap@0.6.3 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -comfy-table@7.2.0 X -concurrent-queue@2.5.0 X X -const-oid@0.9.6 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -const_panic@0.2.14 X -constant_time_eq@0.3.1 X X X -core-foundation-sys@0.8.7 X X -core_extensions@1.5.4 X X -core_extensions_proc_macros@1.5.4 X X -cpufeatures@0.2.17 X X -crc32c@0.6.8 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -darling@0.20.11 X -darling@0.21.3 X -darling_core@0.20.11 X -darling_core@0.21.3 X -darling_macro@0.20.11 X -darling_macro@0.21.3 X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-ffi@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-proto@48.0.1 X -datafusion-proto-common@48.0.1 X -datafusion-session@48.0.1 X -datafusion-sql@48.0.1 X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dissimilar@1.0.10 X -either@1.15.0 X X -equivalent@1.0.2 X X -errno@0.3.13 X X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generational-arena@0.2.9 X -generator@0.8.7 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -gloo-timers@0.3.0 X X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hermit-abi@0.5.2 X X -hex@0.4.3 X X -hmac@0.12.1 X X -home@0.5.11 X X -http@1.3.1 X X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -humantime@2.2.0 X X -hyper@1.7.0 X -hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-datafusion@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -indoc@2.0.6 X X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -ipnet@2.11.0 X X -iri-string@0.7.8 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libloading@0.7.4 X -libm@0.2.15 X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -matchers@0.2.0 X -md-5@0.10.6 X X -memchr@2.7.5 X X -memoffset@0.9.1 X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X -murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -opendal@0.54.0 X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -potential_utf@0.1.3 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -prost@0.13.5 X -prost-derive@0.13.5 X -psm@0.1.26 X X -pyiceberg_core_rust@0.7.0 X -pyo3@0.24.2 X X -pyo3-build-config@0.24.2 X X -pyo3-ffi@0.24.2 X X -pyo3-macros@0.24.2 X X -pyo3-macros-backend@0.24.2 X X -quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -repr_offset@0.2.2 X -reqsign@0.16.5 X -reqwest@0.12.23 X X -ring@0.17.14 X X -roaring@0.11.2 X X -rust_decimal@1.37.2 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X -rustversion@1.0.22 X X -ryu@1.0.20 X X -same-file@1.0.6 X X -scoped-tls@1.0.1 X X -scopeguard@1.2.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X -serde_repr@0.1.20 X X -serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X -sha1@0.10.6 X X -sha2@0.10.9 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -simdutf8@0.1.5 X X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.0 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -strsim@0.11.1 X -strum@0.27.2 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@1.0.109 X X -syn@2.0.106 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -target-lexicon@0.13.2 X -tempfile@3.21.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thread_local@1.1.9 X X -threadpool@1.8.1 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X -tower@0.5.2 X -tower-http@0.6.6 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -try-lock@0.2.5 X -tstr@0.2.4 X -tstr_proc_macros@0.2.2 X -twox-hash@2.1.2 X -typed-arena@2.0.2 X -typed-builder@0.20.1 X X -typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -typewit@1.14.1 X -unicode-ident@1.0.18 X X X -unicode-segmentation@1.12.0 X X -unicode-width@0.2.1 X X -unindent@0.2.4 X X -untrusted@0.9.0 X -url@2.5.7 X X -utf8_iter@1.0.4 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -walkdir@2.5.0 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -webpki-roots@1.0.2 X -winapi@0.3.9 X X -winapi-i686-pc-windows-gnu@0.4.0 X X -winapi-util@0.1.11 X X -winapi-x86_64-pc-windows-gnu@0.4.0 X X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.60.2 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-targets@0.53.3 X X -windows-threading@0.1.0 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_gnullvm@0.53.0 X X -windows_aarch64_msvc@0.52.6 X X -windows_aarch64_msvc@0.53.0 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnu@0.53.0 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_gnullvm@0.53.0 X X -windows_i686_msvc@0.52.6 X X -windows_i686_msvc@0.53.0 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnu@0.53.0 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_gnullvm@0.53.0 X X -windows_x86_64_msvc@0.52.6 X X -windows_x86_64_msvc@0.53.0 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +abi_stable@0.11.3 X X +abi_stable_derive@0.11.3 X X +abi_stable_shared@0.11.0 X X +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.3 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anyhow@1.0.100 X X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.0.0 X +arrow-arith@57.0.0 X +arrow-array@57.0.0 X +arrow-buffer@57.0.0 X +arrow-cast@57.0.0 X +arrow-csv@57.0.0 X +arrow-data@57.0.0 X +arrow-ipc@57.0.0 X +arrow-json@57.0.0 X +arrow-ord@57.0.0 X +arrow-pyarrow@57.0.0 X +arrow-row@57.0.0 X +arrow-schema@57.0.0 X +arrow-select@57.0.0 X +arrow-string@57.0.0 X +as-any@0.3.2 X X +as_derive_utils@0.11.0 X X +async-compression@0.4.19 X X +async-ffi@0.5.0 X +async-lock@3.4.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.10.1 X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.43 X X +cfg-if@1.0.4 X X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +comfy-table@7.1.2 X +concurrent-queue@2.5.0 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +const_panic@0.2.15 X +constant_time_eq@0.3.1 X X X +core-foundation-sys@0.8.7 X X +core_extensions@1.5.4 X X +core_extensions_proc_macros@1.5.4 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.6 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-ffi@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-proto@51.0.0 X +datafusion-proto-common@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-sql@51.0.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +either@1.15.0 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.4 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +form_urlencoded@1.2.2 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generational-arena@0.2.9 X +generic-array@0.14.9 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.0 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@1.3.1 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +humantime@2.3.0 X X +hyper@1.7.0 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.17 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-datafusion@0.8.0 X +icu_collections@2.1.0 X +icu_locale_core@2.1.0 X +icu_normalizer@2.1.0 X +icu_normalizer_data@2.1.0 X +icu_properties@2.1.0 X +icu_properties_data@2.1.0 X +icu_provider@2.1.0 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.0 X X +indoc@2.0.7 X X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.8 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.5 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.82 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.177 X X +libloading@0.7.4 X +libm@0.2.15 X +libz-rs-sys@0.5.2 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.28 X X +lz4_flex@0.11.5 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +memoffset@0.9.1 X +miniz_oxide@0.8.9 X X X +mio@1.1.0 X +moka@0.12.11 X X +murmur3@0.5.2 X X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +opendal@0.55.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.0.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +prost@0.14.1 X +prost-derive@0.14.1 X +psm@0.1.28 X X +pyiceberg_core_rust@0.8.0 X +pyo3@0.26.0 X X +pyo3-build-config@0.26.0 X X +pyo3-ffi@0.26.0 X X +pyo3-macros@0.26.0 X X +pyo3-macros-backend@0.26.0 X X +quad-rand@0.2.3 X +quick-xml@0.38.3 X +quote@1.0.41 X X +r-efi@5.3.0 X X X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +relative-path@1.9.3 X X +repr_offset@0.2.2 X +reqsign@0.16.5 X +reqwest@0.12.24 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.34 X X X +rustls-pki-types@1.13.0 X X +rustls-webpki@0.103.7 X +rustversion@1.0.22 X X +ryu@1.0.20 X X +same-file@1.0.6 X X +scopeguard@1.2.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_urlencoded@0.7.1 X X +serde_with@3.15.1 X X +serde_with_macros@3.15.1 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +shlex@1.3.0 X X +simd-adler32@0.3.7 X +simdutf8@0.1.5 X X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +strsim@0.11.1 X +strum@0.26.3 X +strum@0.27.2 X +strum_macros@0.26.4 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@1.0.109 X X +syn@2.0.108 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +target-lexicon@0.13.3 X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.16 X +toml_datetime@0.7.3 X X +toml_edit@0.23.7 X X +toml_parser@1.0.4 X X +tower@0.5.2 X +tower-http@0.6.6 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.41 X +tracing-attributes@0.1.30 X +tracing-core@0.1.34 X +try-lock@0.2.5 X +tstr@0.2.4 X +tstr_proc_macros@0.2.2 X +twox-hash@2.1.2 X +typed-arena@2.0.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +typewit@1.14.2 X +unicode-ident@1.0.20 X X X +unicode-segmentation@1.12.0 X X +unicode-width@0.2.2 X X +unindent@0.2.4 X X +untrusted@0.9.0 X +url@2.5.7 X X +utf8_iter@1.0.4 X X +uuid@1.18.1 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.105 X X +wasm-bindgen-futures@0.4.55 X X +wasm-bindgen-macro@0.2.105 X X +wasm-bindgen-macro-support@0.2.105 X X +wasm-bindgen-shared@0.2.105 X X +wasm-streams@0.4.2 X X +web-sys@0.3.82 X X +web-time@1.1.0 X X +webpki-roots@1.0.3 X +winapi@0.3.9 X X +winapi-i686-pc-windows-gnu@0.4.0 X X +winapi-util@0.1.11 X X +winapi-x86_64-pc-windows-gnu@0.4.0 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.13 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.27 X X X +zerocopy-derive@0.8.27 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.2 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/bindings/python/src/data_file.rs b/bindings/python/src/data_file.rs index 900d6c6014..b0e42e7d73 100644 --- a/bindings/python/src/data_file.rs +++ b/bindings/python/src/data_file.rs @@ -143,7 +143,7 @@ impl PyDataFile { } #[getter] - fn split_offsets(&self) -> &[i64] { + fn split_offsets(&self) -> Option<&[i64]> { self.inner.split_offsets() } diff --git a/bindings/python/src/datafusion_table_provider.rs b/bindings/python/src/datafusion_table_provider.rs index b5e1bf952e..8db7223b34 100644 --- a/bindings/python/src/datafusion_table_provider.rs +++ b/bindings/python/src/datafusion_table_provider.rs @@ -23,7 +23,7 @@ use datafusion_ffi::table_provider::FFI_TableProvider; use iceberg::TableIdent; use iceberg::io::FileIO; use iceberg::table::StaticTable; -use iceberg_datafusion::table::IcebergTableProvider; +use iceberg_datafusion::table::IcebergStaticTableProvider; use pyo3::exceptions::PyRuntimeError; use pyo3::prelude::*; use pyo3::types::PyCapsule; @@ -32,7 +32,7 @@ use crate::runtime::runtime; #[pyclass(name = "IcebergDataFusionTable")] pub struct PyIcebergDataFusionTable { - inner: Arc, + inner: Arc, } #[pymethods] @@ -69,7 +69,7 @@ impl PyIcebergDataFusionTable { let table = static_table.into_table(); - IcebergTableProvider::try_new_from_table(table) + IcebergStaticTableProvider::try_new_from_table(table) .await .map_err(|e| { PyRuntimeError::new_err(format!("Failed to create table provider: {e}")) diff --git a/bindings/python/src/transform.rs b/bindings/python/src/transform.rs index 24e9f061dd..c159d573fc 100644 --- a/bindings/python/src/transform.rs +++ b/bindings/python/src/transform.rs @@ -24,46 +24,46 @@ use pyo3::prelude::*; use crate::error::to_py_err; #[pyfunction] -pub fn identity(py: Python, array: PyObject) -> PyResult { +pub fn identity(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Identity) } #[pyfunction] -pub fn void(py: Python, array: PyObject) -> PyResult { +pub fn void(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Void) } #[pyfunction] -pub fn year(py: Python, array: PyObject) -> PyResult { +pub fn year(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Year) } #[pyfunction] -pub fn month(py: Python, array: PyObject) -> PyResult { +pub fn month(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Month) } #[pyfunction] -pub fn day(py: Python, array: PyObject) -> PyResult { +pub fn day(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Day) } #[pyfunction] -pub fn hour(py: Python, array: PyObject) -> PyResult { +pub fn hour(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Hour) } #[pyfunction] -pub fn bucket(py: Python, array: PyObject, num_buckets: u32) -> PyResult { +pub fn bucket(py: Python, array: Py, num_buckets: u32) -> PyResult> { apply(py, array, Transform::Bucket(num_buckets)) } #[pyfunction] -pub fn truncate(py: Python, array: PyObject, width: u32) -> PyResult { +pub fn truncate(py: Python, array: Py, width: u32) -> PyResult> { apply(py, array, Transform::Truncate(width)) } -fn apply(py: Python, array: PyObject, transform: Transform) -> PyResult { +fn apply(py: Python, array: Py, transform: Transform) -> PyResult> { // import let array = ArrayData::from_pyarrow_bound(array.bind(py))?; let array = make_array(array); @@ -71,7 +71,7 @@ fn apply(py: Python, array: PyObject, transform: Transform) -> PyResult, m: &Bound<'_, PyModule>) -> PyResult<()> { diff --git a/crates/catalog/glue/DEPENDENCIES.rust.tsv b/crates/catalog/glue/DEPENDENCIES.rust.tsv index 2d9f686262..e34ff1afc1 100644 --- a/crates/catalog/glue/DEPENDENCIES.rust.tsv +++ b/crates/catalog/glue/DEPENDENCIES.rust.tsv @@ -1,77 +1,73 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -aws-config@1.8.6 X -aws-credential-types@1.2.6 X -aws-runtime@1.5.10 X -aws-sdk-glue@1.119.0 X -aws-sdk-sso@1.83.0 X -aws-sdk-ssooidc@1.84.0 X -aws-sdk-sts@1.85.0 X -aws-sigv4@1.3.4 X -aws-smithy-async@1.2.5 X -aws-smithy-http@0.62.3 X -aws-smithy-http-client@1.1.1 X -aws-smithy-json@0.61.5 X -aws-smithy-observability@0.1.3 X -aws-smithy-query@0.60.7 X -aws-smithy-runtime@1.9.1 X -aws-smithy-runtime-api@1.9.0 X -aws-smithy-types@1.3.2 X -aws-smithy-xml@0.60.10 X -aws-types@1.3.8 X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.21.7 X X +aws-config@1.8.11 X +aws-credential-types@1.2.10 X +aws-runtime@1.5.16 X +aws-sdk-glue@1.132.0 X +aws-sdk-sso@1.90.0 X +aws-sdk-ssooidc@1.92.0 X +aws-sdk-sts@1.94.0 X +aws-sigv4@1.3.6 X +aws-smithy-async@1.2.7 X +aws-smithy-http@0.62.6 X +aws-smithy-http-client@1.1.5 X +aws-smithy-json@0.61.8 X +aws-smithy-observability@0.1.5 X +aws-smithy-query@0.60.9 X +aws-smithy-runtime@1.9.5 X +aws-smithy-runtime-api@1.9.3 X +aws-smithy-types@1.3.5 X +aws-smithy-xml@0.60.13 X +aws-types@1.3.10 X +backon@1.6.0 X base64@0.22.1 X X base64-simd@0.8.0 X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X +bytes@1.11.0 X bytes-utils@0.1.4 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X const-random@0.1.18 X X const-random-macro@0.1.16 X X core-foundation@0.10.1 X X -core-foundation@0.9.4 X X core-foundation-sys@0.8.7 X X cpufeatures@0.2.17 X X crc32c@0.6.8 X X @@ -80,14 +76,14 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X darling_core@0.21.3 X darling_macro@0.20.11 X darling_macro@0.21.3 X -deranged@0.5.3 X X +deranged@0.5.5 X X derive_builder@0.20.2 X X derive_builder_core@0.20.2 X X derive_builder_macro@0.20.2 X X @@ -100,9 +96,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -114,114 +110,107 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.3.27 X h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X http@0.2.12 X X -http@1.3.1 X X +http@1.4.0 X X http-body@0.4.6 X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X hyper@0.14.32 X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.24.2 X X X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-glue@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-glue@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-conv@0.1.0 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X openssl-probe@0.1.6 X X ordered-float@2.10.1 X ordered-float@4.6.0 X outref@0.5.2 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X powerfmt@0.2.0 X X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -229,152 +218,150 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X rustls@0.21.12 X X X -rustls@0.23.31 X X X -rustls-native-certs@0.6.3 X X X -rustls-native-certs@0.8.1 X X X -rustls-pemfile@1.0.4 X X X -rustls-pki-types@1.12.0 X X +rustls@0.23.35 X X X +rustls-native-certs@0.8.2 X X X +rustls-pki-types@1.13.1 X X rustls-webpki@0.101.7 X -rustls-webpki@0.103.4 X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -schannel@0.1.27 X -scoped-tls@1.0.1 X X +schannel@0.1.28 X scopeguard@1.2.0 X X sct@0.7.1 X X X -security-framework@2.11.1 X X -security-framework@3.4.0 X X +security-framework@3.5.1 X X security-framework-sys@2.15.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X socket2@0.5.10 X X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X -time@0.3.43 X X +time@0.3.44 X X time-core@0.1.6 X X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X tokio-rustls@0.24.1 X X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X urlencoding@2.1.3 X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X vsimd@0.8.0 X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X xmlparser@0.13.6 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index dce287ed6e..37a7996f80 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -151,33 +151,33 @@ impl GlueCatalog { async fn new(config: GlueCatalogConfig) -> Result { let sdk_config = create_sdk_config(&config.props, config.uri.as_ref()).await; let mut file_io_props = config.props.clone(); - if !file_io_props.contains_key(S3_ACCESS_KEY_ID) { - if let Some(access_key_id) = file_io_props.get(AWS_ACCESS_KEY_ID) { - file_io_props.insert(S3_ACCESS_KEY_ID.to_string(), access_key_id.to_string()); - } + if !file_io_props.contains_key(S3_ACCESS_KEY_ID) + && let Some(access_key_id) = file_io_props.get(AWS_ACCESS_KEY_ID) + { + file_io_props.insert(S3_ACCESS_KEY_ID.to_string(), access_key_id.to_string()); } - if !file_io_props.contains_key(S3_SECRET_ACCESS_KEY) { - if let Some(secret_access_key) = file_io_props.get(AWS_SECRET_ACCESS_KEY) { - file_io_props.insert( - S3_SECRET_ACCESS_KEY.to_string(), - secret_access_key.to_string(), - ); - } + if !file_io_props.contains_key(S3_SECRET_ACCESS_KEY) + && let Some(secret_access_key) = file_io_props.get(AWS_SECRET_ACCESS_KEY) + { + file_io_props.insert( + S3_SECRET_ACCESS_KEY.to_string(), + secret_access_key.to_string(), + ); } - if !file_io_props.contains_key(S3_REGION) { - if let Some(region) = file_io_props.get(AWS_REGION_NAME) { - file_io_props.insert(S3_REGION.to_string(), region.to_string()); - } + if !file_io_props.contains_key(S3_REGION) + && let Some(region) = file_io_props.get(AWS_REGION_NAME) + { + file_io_props.insert(S3_REGION.to_string(), region.to_string()); } - if !file_io_props.contains_key(S3_SESSION_TOKEN) { - if let Some(session_token) = file_io_props.get(AWS_SESSION_TOKEN) { - file_io_props.insert(S3_SESSION_TOKEN.to_string(), session_token.to_string()); - } + if !file_io_props.contains_key(S3_SESSION_TOKEN) + && let Some(session_token) = file_io_props.get(AWS_SESSION_TOKEN) + { + file_io_props.insert(S3_SESSION_TOKEN.to_string(), session_token.to_string()); } - if !file_io_props.contains_key(S3_ENDPOINT) { - if let Some(aws_endpoint) = config.uri.as_ref() { - file_io_props.insert(S3_ENDPOINT.to_string(), aws_endpoint.to_string()); - } + if !file_io_props.contains_key(S3_ENDPOINT) + && let Some(aws_endpoint) = config.uri.as_ref() + { + file_io_props.insert(S3_ENDPOINT.to_string(), aws_endpoint.to_string()); } let client = aws_sdk_glue::Client::new(&sdk_config); diff --git a/crates/catalog/glue/src/schema.rs b/crates/catalog/glue/src/schema.rs index cfd7487973..864320dae4 100644 --- a/crates/catalog/glue/src/schema.rs +++ b/crates/catalog/glue/src/schema.rs @@ -165,7 +165,12 @@ impl SchemaVisitor for GlueSchemaBuilder { PrimitiveType::Date => "date".to_string(), PrimitiveType::Timestamp => "timestamp".to_string(), PrimitiveType::TimestampNs => "timestamp_ns".to_string(), - PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(), + PrimitiveType::Timestamptz | PrimitiveType::TimestamptzNs => { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + format!("Conversion from {p:?} is not supported"), + )); + } PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid => { "string".to_string() } @@ -173,12 +178,6 @@ impl SchemaVisitor for GlueSchemaBuilder { PrimitiveType::Decimal { precision, scale } => { format!("decimal({precision},{scale})") } - _ => { - return Err(Error::new( - ErrorKind::FeatureUnsupported, - "Conversion from 'Timestamptz' is not supported", - )); - } }; Ok(glue_type) diff --git a/crates/catalog/hms/DEPENDENCIES.rust.tsv b/crates/catalog/hms/DEPENDENCIES.rust.tsv index cef38cabcd..5025f7b184 100644 --- a/crates/catalog/hms/DEPENDENCIES.rust.tsv +++ b/crates/catalog/hms/DEPENDENCIES.rust.tsv @@ -1,25 +1,24 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-broadcast@0.7.2 X X async-lock@3.4.1 X X @@ -28,23 +27,22 @@ async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X cfg_aliases@0.2.1 X chrono@0.4.42 X X concurrent-queue@2.5.0 X X @@ -59,7 +57,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -80,9 +78,9 @@ event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X faststr@0.2.32 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -94,103 +92,95 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X -half@2.6.0 X X +half@2.7.1 X X hashbrown@0.14.5 X X -hashbrown@0.15.5 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hive_metastore@0.2.0 X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-hms@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-hms@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -integer-encoding@4.0.2 X -io-uring@0.7.10 X X +integer-encoding@4.1.0 X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X +libz-rs-sys@0.5.3 X linked-hash-map@0.5.6 X X linkedbytes@0.1.16 X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X memoffset@0.9.1 X metainfo@0.7.14 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X motore@0.4.1 X X motore-macros@0.4.3 X X mur3@0.1.0 X murmur3@0.5.2 X X nix@0.29.0 X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -num_enum@0.7.4 X X X -num_enum_derive@0.7.4 X X X -object@0.36.7 X X +num_enum@0.7.5 X X X +num_enum_derive@0.7.5 X X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pilota@0.11.10 X X @@ -200,14 +190,15 @@ pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro-crate@3.3.0 X X -proc-macro2@1.0.101 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -215,148 +206,152 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -ref-cast@1.0.24 X X -ref-cast-impl@1.0.24 X X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +ref-cast@1.0.25 X X +ref-cast-impl@1.0.25 X X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc-hash@2.1.1 X X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X socket2@0.5.10 X X -socket2@0.6.0 X X +socket2@0.6.1 X X sonic-number@0.1.0 X sonic-rs@0.3.17 X -sonic-simd@0.1.1 X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +sonic-simd@0.1.2 X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X thiserror@1.0.69 X X -thiserror@2.0.16 X X +thiserror@2.0.17 X X thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X tokio-stream@0.1.17 X -tokio-util@0.7.16 X -toml_datetime@0.6.11 X X -toml_edit@0.22.27 X X +tokio-util@0.7.17 X +toml_datetime@0.7.3 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X volo@0.10.7 X X volo-thrift@0.10.8 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -winnow@0.7.13 X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/hms/src/schema.rs b/crates/catalog/hms/src/schema.rs index 8893a80521..c23b48719d 100644 --- a/crates/catalog/hms/src/schema.rs +++ b/crates/catalog/hms/src/schema.rs @@ -122,7 +122,12 @@ impl SchemaVisitor for HiveSchemaBuilder { PrimitiveType::Date => "date".to_string(), PrimitiveType::Timestamp => "timestamp".to_string(), PrimitiveType::TimestampNs => "timestamp_ns".to_string(), - PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(), + PrimitiveType::Timestamptz | PrimitiveType::TimestamptzNs => { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + format!("Conversion from {p:?} is not supported"), + )); + } PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid => { "string".to_string() } @@ -130,12 +135,6 @@ impl SchemaVisitor for HiveSchemaBuilder { PrimitiveType::Decimal { precision, scale } => { format!("decimal({precision},{scale})") } - _ => { - return Err(Error::new( - ErrorKind::FeatureUnsupported, - "Conversion from 'Timestamptz' is not supported", - )); - } }; Ok(hive_type) diff --git a/crates/catalog/loader/DEPENDENCIES.rust.tsv b/crates/catalog/loader/DEPENDENCIES.rust.tsv index d809a30ca4..02b06c4479 100644 --- a/crates/catalog/loader/DEPENDENCIES.rust.tsv +++ b/crates/catalog/loader/DEPENDENCIES.rust.tsv @@ -1,25 +1,25 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-broadcast@0.7.2 X X async-lock@3.4.1 X X @@ -28,46 +28,44 @@ async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -aws-config@1.8.6 X -aws-credential-types@1.2.6 X -aws-runtime@1.5.10 X -aws-sdk-glue@1.119.0 X -aws-sdk-s3tables@1.37.0 X -aws-sdk-sso@1.83.0 X -aws-sdk-ssooidc@1.84.0 X -aws-sdk-sts@1.85.0 X -aws-sigv4@1.3.4 X -aws-smithy-async@1.2.5 X -aws-smithy-http@0.62.3 X -aws-smithy-http-client@1.1.1 X -aws-smithy-json@0.61.5 X -aws-smithy-observability@0.1.3 X -aws-smithy-query@0.60.7 X -aws-smithy-runtime@1.9.1 X -aws-smithy-runtime-api@1.9.0 X -aws-smithy-types@1.3.2 X -aws-smithy-xml@0.60.10 X -aws-types@1.3.8 X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.21.7 X X +aws-config@1.8.11 X +aws-credential-types@1.2.10 X +aws-runtime@1.5.16 X +aws-sdk-glue@1.132.0 X +aws-sdk-s3tables@1.46.0 X +aws-sdk-sso@1.90.0 X +aws-sdk-ssooidc@1.92.0 X +aws-sdk-sts@1.94.0 X +aws-sigv4@1.3.6 X +aws-smithy-async@1.2.7 X +aws-smithy-http@0.62.6 X +aws-smithy-http-client@1.1.5 X +aws-smithy-json@0.61.8 X +aws-smithy-observability@0.1.5 X +aws-smithy-query@0.60.9 X +aws-smithy-runtime@1.9.5 X +aws-smithy-runtime-api@1.9.3 X +aws-smithy-types@1.3.5 X +aws-smithy-xml@0.60.13 X +aws-types@1.3.10 X +backon@1.6.0 X base64@0.22.1 X X base64-simd@0.8.0 X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X +bytes@1.11.0 X bytes-utils@0.1.4 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X cfg_aliases@0.2.1 X chrono@0.4.42 X X concurrent-queue@2.5.0 X X @@ -75,16 +73,18 @@ const-oid@0.9.6 X X const-random@0.1.18 X X const-random-macro@0.1.16 X X core-foundation@0.10.1 X X -core-foundation@0.9.4 X X core-foundation-sys@0.8.7 X X cpufeatures@0.2.17 X X +crc@3.4.0 X X +crc-catalog@2.4.0 X X crc32c@0.6.8 X X crc32fast@1.5.0 X X crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X +crossbeam-queue@0.3.12 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -92,7 +92,7 @@ darling_core@0.21.3 X darling_macro@0.20.11 X darling_macro@0.21.3 X dashmap@6.1.0 X -deranged@0.5.3 X X +deranged@0.5.5 X X derive_builder@0.20.2 X X derive_builder_core@0.20.2 X X derive_builder_macro@0.20.2 X X @@ -106,131 +106,130 @@ event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X faststr@0.2.32 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +flume@0.11.1 X X fnv@1.0.7 X X +foldhash@0.1.5 X form_urlencoded@1.2.2 X X futures@0.3.31 X X futures-channel@0.3.31 X X futures-core@0.3.31 X X futures-executor@0.3.31 X X +futures-intrusive@0.5.0 X X futures-io@0.3.31 X X futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.3.27 X h2@0.4.12 X -half@2.6.0 X X +half@2.7.1 X X hashbrown@0.14.5 X X hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +hashlink@0.10.0 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hive_metastore@0.2.0 X hmac@0.12.1 X X home@0.5.11 X X http@0.2.12 X X -http@1.3.1 X X +http@1.4.0 X X http-body@0.4.6 X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X hyper@0.14.32 X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.24.2 X X X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-glue@0.7.0 X -iceberg-catalog-hms@0.7.0 X -iceberg-catalog-loader@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg-catalog-s3tables@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-glue@0.8.0 X +iceberg-catalog-hms@0.8.0 X +iceberg-catalog-loader@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg-catalog-s3tables@0.8.0 X +iceberg-catalog-sql@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -integer-encoding@4.0.2 X -io-uring@0.7.10 X X +integer-encoding@4.1.0 X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X +libsqlite3-sys@0.30.1 X +libz-rs-sys@0.5.3 X linked-hash-map@0.5.6 X X linkedbytes@0.1.16 X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X memoffset@0.9.1 X metainfo@0.7.14 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X motore@0.4.1 X X motore-macros@0.4.3 X X mur3@0.1.0 X murmur3@0.5.2 X X nix@0.29.0 X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-conv@0.1.0 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -num_enum@0.7.4 X X X -num_enum_derive@0.7.4 X X X -object@0.36.7 X X +num_enum@0.7.5 X X X +num_enum_derive@0.7.5 X X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X openssl-probe@0.1.6 X X ordered-float@2.10.1 X ordered-float@4.6.0 X outref@0.5.2 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pilota@0.11.10 X X @@ -240,15 +239,16 @@ pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X powerfmt@0.2.0 X X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro-crate@3.3.0 X X -proc-macro2@1.0.101 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -256,164 +256,171 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -ref-cast@1.0.24 X X -ref-cast-impl@1.0.24 X X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +ref-cast@1.0.25 X X +ref-cast-impl@1.0.25 X X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc-hash@2.1.1 X X rustc_version@0.4.1 X X rustls@0.21.12 X X X -rustls@0.23.31 X X X -rustls-native-certs@0.6.3 X X X -rustls-native-certs@0.8.1 X X X -rustls-pemfile@1.0.4 X X X -rustls-pki-types@1.12.0 X X +rustls@0.23.35 X X X +rustls-native-certs@0.8.2 X X X +rustls-pki-types@1.13.1 X X rustls-webpki@0.101.7 X -rustls-webpki@0.103.4 X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -schannel@0.1.27 X -scoped-tls@1.0.1 X X +schannel@0.1.28 X scopeguard@1.2.0 X X sct@0.7.1 X X X -security-framework@2.11.1 X X -security-framework@3.4.0 X X +security-framework@3.5.1 X X security-framework-sys@2.15.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X socket2@0.5.10 X X -socket2@0.6.0 X X +socket2@0.6.1 X X sonic-number@0.1.0 X sonic-rs@0.3.17 X -sonic-simd@0.1.1 X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +sonic-simd@0.1.2 X +spin@0.9.8 X +sqlx@0.8.6 X X +sqlx-core@0.8.6 X X +sqlx-sqlite@0.8.6 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X thiserror@1.0.69 X X -thiserror@2.0.16 X X +thiserror@2.0.17 X X thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X -time@0.3.43 X X +time@0.3.44 X X time-core@0.1.6 X X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X tokio-rustls@0.24.1 X X -tokio-rustls@0.26.2 X X +tokio-rustls@0.26.4 X X tokio-stream@0.1.17 X -tokio-util@0.7.16 X -toml_datetime@0.6.11 X X -toml_edit@0.22.27 X X +tokio-util@0.7.17 X +toml_datetime@0.7.3 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X urlencoding@2.1.3 X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X +vcpkg@0.2.15 X X version_check@0.9.5 X X volo@0.10.7 X X volo-thrift@0.10.8 X X vsimd@0.8.0 X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@0.26.11 X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -winnow@0.7.13 X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X xmlparser@0.13.6 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/rest/DEPENDENCIES.rust.tsv b/crates/catalog/rest/DEPENDENCIES.rust.tsv index cf238f4b4c..c78434fa51 100644 --- a/crates/catalog/rest/DEPENDENCIES.rust.tsv +++ b/crates/catalog/rest/DEPENDENCIES.rust.tsv @@ -1,48 +1,46 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -56,7 +54,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -75,9 +73,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -89,105 +87,98 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -195,134 +186,135 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/rest/src/catalog.rs b/crates/catalog/rest/src/catalog.rs index 39553f7554..ddbf6a4e01 100644 --- a/crates/catalog/rest/src/catalog.rs +++ b/crates/catalog/rest/src/catalog.rs @@ -41,9 +41,9 @@ use crate::client::{ HttpClient, deserialize_catalog_response, deserialize_unexpected_catalog_error, }; use crate::types::{ - CatalogConfig, CommitTableRequest, CommitTableResponse, CreateTableRequest, - ListNamespaceResponse, ListTableResponse, LoadTableResponse, NamespaceSerde, - RegisterTableRequest, RenameTableRequest, + CatalogConfig, CommitTableRequest, CommitTableResponse, CreateNamespaceRequest, + CreateTableRequest, ListNamespaceResponse, ListTablesResponse, LoadTableResult, + NamespaceResponse, RegisterTableRequest, RenameTableRequest, }; /// REST catalog URI @@ -466,13 +466,7 @@ impl Catalog for RestCatalog { deserialize_catalog_response::(http_response) .await?; - let ns_identifiers = response - .namespaces - .into_iter() - .map(NamespaceIdent::from_vec) - .collect::>>()?; - - namespaces.extend(ns_identifiers); + namespaces.extend(response.namespaces); match response.next_page_token { Some(token) => next_token = Some(token), @@ -502,9 +496,9 @@ impl Catalog for RestCatalog { let request = context .client .request(Method::POST, context.config.namespaces_endpoint()) - .json(&NamespaceSerde { - namespace: namespace.as_ref().clone(), - properties: Some(properties), + .json(&CreateNamespaceRequest { + namespace: namespace.clone(), + properties, }) .build()?; @@ -513,8 +507,8 @@ impl Catalog for RestCatalog { match http_response.status() { StatusCode::OK => { let response = - deserialize_catalog_response::(http_response).await?; - Namespace::try_from(response) + deserialize_catalog_response::(http_response).await?; + Ok(Namespace::from(response)) } StatusCode::CONFLICT => Err(Error::new( ErrorKind::Unexpected, @@ -537,8 +531,8 @@ impl Catalog for RestCatalog { match http_response.status() { StatusCode::OK => { let response = - deserialize_catalog_response::(http_response).await?; - Namespace::try_from(response) + deserialize_catalog_response::(http_response).await?; + Ok(Namespace::from(response)) } StatusCode::NOT_FOUND => Err(Error::new( ErrorKind::Unexpected, @@ -614,7 +608,7 @@ impl Catalog for RestCatalog { match http_response.status() { StatusCode::OK => { let response = - deserialize_catalog_response::(http_response).await?; + deserialize_catalog_response::(http_response).await?; identifiers.extend(response.identifiers); @@ -661,11 +655,7 @@ impl Catalog for RestCatalog { partition_spec: creation.partition_spec, write_order: creation.sort_order, stage_create: Some(false), - properties: if creation.properties.is_empty() { - None - } else { - Some(creation.properties) - }, + properties: creation.properties, }) .build()?; @@ -673,7 +663,7 @@ impl Catalog for RestCatalog { let response = match http_response.status() { StatusCode::OK => { - deserialize_catalog_response::(http_response).await? + deserialize_catalog_response::(http_response).await? } StatusCode::NOT_FOUND => { return Err(Error::new( @@ -697,7 +687,6 @@ impl Catalog for RestCatalog { let config = response .config - .unwrap_or_default() .into_iter() .chain(self.user_config.props.clone()) .collect(); @@ -735,7 +724,7 @@ impl Catalog for RestCatalog { let response = match http_response.status() { StatusCode::OK | StatusCode::NOT_MODIFIED => { - deserialize_catalog_response::(http_response).await? + deserialize_catalog_response::(http_response).await? } StatusCode::NOT_FOUND => { return Err(Error::new( @@ -748,7 +737,6 @@ impl Catalog for RestCatalog { let config = response .config - .unwrap_or_default() .into_iter() .chain(self.user_config.props.clone()) .collect(); @@ -861,9 +849,9 @@ impl Catalog for RestCatalog { let http_response = context.client.query_catalog(request).await?; - let response: LoadTableResponse = match http_response.status() { + let response: LoadTableResult = match http_response.status() { StatusCode::OK => { - deserialize_catalog_response::(http_response).await? + deserialize_catalog_response::(http_response).await? } StatusCode::NOT_FOUND => { return Err(Error::new( @@ -905,7 +893,7 @@ impl Catalog for RestCatalog { context.config.table_endpoint(commit.identifier()), ) .json(&CommitTableRequest { - identifier: commit.identifier().clone(), + identifier: Some(commit.identifier().clone()), requirements: commit.take_requirements(), updates: commit.take_updates(), }) @@ -2428,7 +2416,7 @@ mod tests { )) .unwrap(); let reader = BufReader::new(file); - let resp = serde_json::from_reader::<_, LoadTableResponse>(reader).unwrap(); + let resp = serde_json::from_reader::<_, LoadTableResult>(reader).unwrap(); Table::builder() .metadata(resp.metadata) @@ -2568,7 +2556,7 @@ mod tests { )) .unwrap(); let reader = BufReader::new(file); - let resp = serde_json::from_reader::<_, LoadTableResponse>(reader).unwrap(); + let resp = serde_json::from_reader::<_, LoadTableResult>(reader).unwrap(); Table::builder() .metadata(resp.metadata) diff --git a/crates/catalog/rest/src/lib.rs b/crates/catalog/rest/src/lib.rs index 70cdeaabd0..6bee950970 100644 --- a/crates/catalog/rest/src/lib.rs +++ b/crates/catalog/rest/src/lib.rs @@ -56,3 +56,4 @@ mod client; mod types; pub use catalog::*; +pub use types::*; diff --git a/crates/catalog/rest/src/types.rs b/crates/catalog/rest/src/types.rs index 70ed72051a..ab44c40ee3 100644 --- a/crates/catalog/rest/src/types.rs +++ b/crates/catalog/rest/src/types.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Request and response types for the Iceberg REST API. + use std::collections::HashMap; use iceberg::spec::{Schema, SortOrder, TableMetadata, UnboundPartitionSpec}; @@ -30,7 +32,8 @@ pub(super) struct CatalogConfig { } #[derive(Debug, Serialize, Deserialize)] -pub(super) struct ErrorResponse { +/// Wrapper for all non-2xx error responses from the REST API +pub struct ErrorResponse { error: ErrorModel, } @@ -41,11 +44,16 @@ impl From for Error { } #[derive(Debug, Serialize, Deserialize)] -pub(super) struct ErrorModel { - pub(super) message: String, - pub(super) r#type: String, - pub(super) code: u16, - pub(super) stack: Option>, +/// Error payload returned in a response with further details on the error +pub struct ErrorModel { + /// Human-readable error message + pub message: String, + /// Internal type definition of the error + pub r#type: String, + /// HTTP response code + pub code: u16, + /// Optional error stack / context + pub stack: Option>, } impl From for Error { @@ -96,106 +104,255 @@ pub(super) struct TokenResponse { pub(super) issued_token_type: Option, } -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct NamespaceSerde { - pub(super) namespace: Vec, - pub(super) properties: Option>, -} - -impl TryFrom for Namespace { - type Error = Error; - fn try_from(value: NamespaceSerde) -> std::result::Result { - Ok(Namespace::with_properties( - NamespaceIdent::from_vec(value.namespace)?, - value.properties.unwrap_or_default(), - )) - } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Namespace response +pub struct NamespaceResponse { + /// Namespace identifier + pub namespace: NamespaceIdent, + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + /// Properties stored on the namespace, if supported by the server. + pub properties: HashMap, } -impl From<&Namespace> for NamespaceSerde { +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Create namespace request +pub struct CreateNamespaceRequest { + /// Name of the namespace to create + pub namespace: NamespaceIdent, + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + /// Properties to set on the namespace + pub properties: HashMap, +} + +impl From<&Namespace> for NamespaceResponse { fn from(value: &Namespace) -> Self { Self { - namespace: value.name().as_ref().clone(), - properties: Some(value.properties().clone()), + namespace: value.name().clone(), + properties: value.properties().clone(), } } } -#[derive(Debug, Serialize, Deserialize)] +impl From for Namespace { + fn from(value: NamespaceResponse) -> Self { + Namespace::with_properties(value.namespace, value.properties) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct ListNamespaceResponse { - pub(super) namespaces: Vec>, - #[serde(default)] - pub(super) next_page_token: Option, +/// Response containing a list of namespace identifiers, with optional pagination support. +pub struct ListNamespaceResponse { + /// List of namespace identifiers returned by the server + pub namespaces: Vec, + /// Opaque token for pagination. If present, indicates there are more results available. + /// Use this value in subsequent requests to retrieve the next page. + pub next_page_token: Option, } -#[allow(dead_code)] -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct UpdateNamespacePropsRequest { - removals: Option>, - updates: Option>, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Request to update properties on a namespace. +/// +/// Properties that are not in the request are not modified or removed by this call. +/// Server implementations are not required to support namespace properties. +pub struct UpdateNamespacePropertiesRequest { + /// List of property keys to remove from the namespace + pub removals: Option>, + /// Map of property keys to values to set or update on the namespace + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub updates: HashMap, } -#[allow(dead_code)] -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct UpdateNamespacePropsResponse { - updated: Vec, - removed: Vec, - missing: Option>, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Response from updating namespace properties, indicating which properties were changed. +pub struct UpdateNamespacePropertiesResponse { + /// List of property keys that were added or updated + pub updated: Vec, + /// List of properties that were removed + pub removed: Vec, + /// List of properties requested for removal that were not found in the namespace's properties. + /// Represents a partial success response. Servers do not need to implement this. + #[serde(skip_serializing_if = "Option::is_none")] + pub missing: Option>, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct ListTableResponse { - pub(super) identifiers: Vec, +/// Response containing a list of table identifiers, with optional pagination support. +pub struct ListTablesResponse { + /// List of table identifiers under the requested namespace + pub identifiers: Vec, + /// Opaque token for pagination. If present, indicates there are more results available. + /// Use this value in subsequent requests to retrieve the next page. #[serde(default)] - pub(super) next_page_token: Option, + pub next_page_token: Option, } -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct RenameTableRequest { - pub(super) source: TableIdent, - pub(super) destination: TableIdent, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Request to rename a table from one identifier to another. +/// +/// It's valid to move a table across namespaces, but the server implementation +/// is not required to support it. +pub struct RenameTableRequest { + /// Current table identifier to rename + pub source: TableIdent, + /// New table identifier to rename to + pub destination: TableIdent, } -#[derive(Debug, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct LoadTableResponse { - pub(super) metadata_location: Option, - pub(super) metadata: TableMetadata, - pub(super) config: Option>, +/// Result returned when a table is successfully loaded or created. +/// +/// The table metadata JSON is returned in the `metadata` field. The corresponding file location +/// of table metadata should be returned in the `metadata_location` field, unless the metadata +/// is not yet committed. For example, a create transaction may return metadata that is staged +/// but not committed. +/// +/// The `config` map returns table-specific configuration for the table's resources, including +/// its HTTP client and FileIO. For example, config may contain a specific FileIO implementation +/// class for the table depending on its underlying storage. +pub struct LoadTableResult { + /// May be null if the table is staged as part of a transaction + pub metadata_location: Option, + /// The table's full metadata + pub metadata: TableMetadata, + /// Table-specific configuration overriding catalog configuration + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub config: HashMap, + /// Storage credentials for accessing table data. Clients should check this field + /// before falling back to credentials in the `config` field. + #[serde(skip_serializing_if = "Option::is_none")] + pub storage_credentials: Option>, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Storage credential for a specific location prefix. +/// +/// Indicates a storage location prefix where the credential is relevant. Clients should +/// choose the most specific prefix (by selecting the longest prefix) if several credentials +/// of the same type are available. +pub struct StorageCredential { + /// Storage location prefix where this credential is relevant + pub prefix: String, + /// Configuration map containing credential information + pub config: HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct CreateTableRequest { - pub(super) name: String, - pub(super) location: Option, - pub(super) schema: Schema, - pub(super) partition_spec: Option, - pub(super) write_order: Option, - pub(super) stage_create: Option, - pub(super) properties: Option>, +/// Request to create a new table in a namespace. +/// +/// If `stage_create` is false, the table is created immediately. +/// If `stage_create` is true, the table is not created, but table metadata is initialized +/// and returned. The service should prepare as needed for a commit to the table commit +/// endpoint to complete the create transaction. +pub struct CreateTableRequest { + /// Name of the table to create + pub name: String, + /// Optional table location. If not provided, the server will choose a location. + pub location: Option, + /// Table schema + pub schema: Schema, + /// Optional partition specification. If not provided, the table will be unpartitioned. + pub partition_spec: Option, + /// Optional sort order for the table + pub write_order: Option, + /// Whether to stage the create for a transaction (true) or create immediately (false) + pub stage_create: Option, + /// Optional properties to set on the table + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub properties: HashMap, } -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct CommitTableRequest { - pub(super) identifier: TableIdent, - pub(super) requirements: Vec, - pub(super) updates: Vec, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +/// Request to commit updates to a table. +/// +/// Commits have two parts: requirements and updates. Requirements are assertions that will +/// be validated before attempting to make and commit changes. Updates are changes to make +/// to table metadata. +/// +/// Create table transactions that are started by createTable with `stage-create` set to true +/// are committed using this request. Transactions should include all changes to the table, +/// including table initialization, like AddSchemaUpdate and SetCurrentSchemaUpdate. +pub struct CommitTableRequest { + /// Table identifier to update; must be present for CommitTransactionRequest + #[serde(skip_serializing_if = "Option::is_none")] + pub identifier: Option, + /// List of requirements that must be satisfied before committing changes + pub requirements: Vec, + /// List of updates to apply to the table metadata + pub updates: Vec, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct CommitTableResponse { - pub(super) metadata_location: String, - pub(super) metadata: TableMetadata, +/// Response returned when a table is successfully updated. +/// +/// The table metadata JSON is returned in the metadata field. The corresponding file location +/// of table metadata must be returned in the metadata-location field. Clients can check whether +/// metadata has changed by comparing metadata locations. +pub struct CommitTableResponse { + /// Location of the updated table metadata file + pub metadata_location: String, + /// The table's updated metadata + pub metadata: TableMetadata, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct RegisterTableRequest { - pub(super) name: String, - pub(super) metadata_location: String, - pub(super) overwrite: Option, +/// Request to register a table using an existing metadata file location. +pub struct RegisterTableRequest { + /// Name of the table to register + pub name: String, + /// Location of the metadata file for the table + pub metadata_location: String, + /// Whether to overwrite table metadata if the table already exists + pub overwrite: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_namespace_response_serde() { + let json = serde_json::json!({ + "namespace": ["nested", "ns"], + "properties": { + "key1": "value1", + "key2": "value2" + } + }); + let ns_response: NamespaceResponse = + serde_json::from_value(json.clone()).expect("Deserialization failed"); + assert_eq!(ns_response, NamespaceResponse { + namespace: NamespaceIdent::from_vec(vec!["nested".to_string(), "ns".to_string()]) + .unwrap(), + properties: HashMap::from([ + ("key1".to_string(), "value1".to_string()), + ("key2".to_string(), "value2".to_string()), + ]), + }); + assert_eq!( + serde_json::to_value(&ns_response).expect("Serialization failed"), + json + ); + + // Without properties + let json_no_props = serde_json::json!({ + "namespace": ["db", "schema"] + }); + let ns_response_no_props: NamespaceResponse = + serde_json::from_value(json_no_props.clone()).expect("Deserialization failed"); + assert_eq!(ns_response_no_props, NamespaceResponse { + namespace: NamespaceIdent::from_vec(vec!["db".to_string(), "schema".to_string()]) + .unwrap(), + properties: HashMap::new(), + }); + assert_eq!( + serde_json::to_value(&ns_response_no_props).expect("Serialization failed"), + json_no_props + ); + } } diff --git a/crates/catalog/s3tables/Cargo.toml b/crates/catalog/s3tables/Cargo.toml index 66fb70fefc..fde08b9a49 100644 --- a/crates/catalog/s3tables/Cargo.toml +++ b/crates/catalog/s3tables/Cargo.toml @@ -21,6 +21,7 @@ homepage = { workspace = true } name = "iceberg-catalog-s3tables" rust-version = { workspace = true } version = { workspace = true } +readme = "README.md" categories = ["database"] description = "Apache Iceberg Rust S3Tables Catalog" diff --git a/crates/catalog/s3tables/DEPENDENCIES.rust.tsv b/crates/catalog/s3tables/DEPENDENCIES.rust.tsv index 13d8eebe7a..7dd182e435 100644 --- a/crates/catalog/s3tables/DEPENDENCIES.rust.tsv +++ b/crates/catalog/s3tables/DEPENDENCIES.rust.tsv @@ -1,77 +1,73 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -aws-config@1.8.6 X -aws-credential-types@1.2.6 X -aws-runtime@1.5.10 X -aws-sdk-s3tables@1.37.0 X -aws-sdk-sso@1.83.0 X -aws-sdk-ssooidc@1.84.0 X -aws-sdk-sts@1.85.0 X -aws-sigv4@1.3.4 X -aws-smithy-async@1.2.5 X -aws-smithy-http@0.62.3 X -aws-smithy-http-client@1.1.1 X -aws-smithy-json@0.61.5 X -aws-smithy-observability@0.1.3 X -aws-smithy-query@0.60.7 X -aws-smithy-runtime@1.9.1 X -aws-smithy-runtime-api@1.9.0 X -aws-smithy-types@1.3.2 X -aws-smithy-xml@0.60.10 X -aws-types@1.3.8 X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.21.7 X X +aws-config@1.8.11 X +aws-credential-types@1.2.10 X +aws-runtime@1.5.16 X +aws-sdk-s3tables@1.46.0 X +aws-sdk-sso@1.90.0 X +aws-sdk-ssooidc@1.92.0 X +aws-sdk-sts@1.94.0 X +aws-sigv4@1.3.6 X +aws-smithy-async@1.2.7 X +aws-smithy-http@0.62.6 X +aws-smithy-http-client@1.1.5 X +aws-smithy-json@0.61.8 X +aws-smithy-observability@0.1.5 X +aws-smithy-query@0.60.9 X +aws-smithy-runtime@1.9.5 X +aws-smithy-runtime-api@1.9.3 X +aws-smithy-types@1.3.5 X +aws-smithy-xml@0.60.13 X +aws-types@1.3.10 X +backon@1.6.0 X base64@0.22.1 X X base64-simd@0.8.0 X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X +bytes@1.11.0 X bytes-utils@0.1.4 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X const-random@0.1.18 X X const-random-macro@0.1.16 X X core-foundation@0.10.1 X X -core-foundation@0.9.4 X X core-foundation-sys@0.8.7 X X cpufeatures@0.2.17 X X crc32c@0.6.8 X X @@ -80,14 +76,14 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X darling_core@0.21.3 X darling_macro@0.20.11 X darling_macro@0.21.3 X -deranged@0.5.3 X X +deranged@0.5.5 X X derive_builder@0.20.2 X X derive_builder_core@0.20.2 X X derive_builder_macro@0.20.2 X X @@ -100,9 +96,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -114,114 +110,107 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.3.27 X h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X http@0.2.12 X X -http@1.3.1 X X +http@1.4.0 X X http-body@0.4.6 X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X hyper@0.14.32 X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.24.2 X X X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-s3tables@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-s3tables@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-conv@0.1.0 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X openssl-probe@0.1.6 X X ordered-float@2.10.1 X ordered-float@4.6.0 X outref@0.5.2 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X powerfmt@0.2.0 X X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -229,152 +218,150 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X rustls@0.21.12 X X X -rustls@0.23.31 X X X -rustls-native-certs@0.6.3 X X X -rustls-native-certs@0.8.1 X X X -rustls-pemfile@1.0.4 X X X -rustls-pki-types@1.12.0 X X +rustls@0.23.35 X X X +rustls-native-certs@0.8.2 X X X +rustls-pki-types@1.13.1 X X rustls-webpki@0.101.7 X -rustls-webpki@0.103.4 X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -schannel@0.1.27 X -scoped-tls@1.0.1 X X +schannel@0.1.28 X scopeguard@1.2.0 X X sct@0.7.1 X X X -security-framework@2.11.1 X X -security-framework@3.4.0 X X +security-framework@3.5.1 X X security-framework-sys@2.15.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X socket2@0.5.10 X X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X -time@0.3.43 X X +time@0.3.44 X X time-core@0.1.6 X X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X tokio-rustls@0.24.1 X X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X urlencoding@2.1.3 X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X vsimd@0.8.0 X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X xmlparser@0.13.6 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/s3tables/LICENSE b/crates/catalog/s3tables/LICENSE new file mode 100644 index 0000000000..261eeb9e9f --- /dev/null +++ b/crates/catalog/s3tables/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/crates/catalog/s3tables/NOTICE b/crates/catalog/s3tables/NOTICE new file mode 100644 index 0000000000..9340680cbd --- /dev/null +++ b/crates/catalog/s3tables/NOTICE @@ -0,0 +1,5 @@ +Apache Iceberg Rust +Copyright 2023-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). diff --git a/crates/catalog/s3tables/README.md b/crates/catalog/s3tables/README.md new file mode 100644 index 0000000000..d9784d5231 --- /dev/null +++ b/crates/catalog/s3tables/README.md @@ -0,0 +1,56 @@ + + +# Apache Iceberg S3Tables Catalog (Rust) + +[![crates.io](https://img.shields.io/crates/v/iceberg.svg)](https://crates.io/crates/iceberg-catalog-s3tables) +[![docs.rs](https://img.shields.io/docsrs/iceberg.svg)](https://docs.rs/iceberg/latest/iceberg_catalog_s3tables/) + +Official Native Rust implementation of the Apache Iceberg S3Tables catalog. + +## Quick start + +```rust,no_run +use std::collections::HashMap; + +use iceberg::CatalogBuilder; +use iceberg_catalog_s3tables::{ + S3TABLES_CATALOG_PROP_ENDPOINT_URL, S3TABLES_CATALOG_PROP_TABLE_BUCKET_ARN, + S3TablesCatalogBuilder, +}; + +#[tokio::main] +async fn main() { + let catalog = S3TablesCatalogBuilder::default() + .with_endpoint_url("http://localhost:4566") + .load( + "s3tables", + HashMap::from([( + S3TABLES_CATALOG_PROP_TABLE_BUCKET_ARN.to_string(), + "arn:aws:s3tables:us-east-1:123456789012:bucket/my-bucket".to_string(), + )]), + ) + .await + .unwrap(); + + // use `catalog` as any Iceberg Catalog +} +``` + +See the [API documentation](https://docs.rs/iceberg_catalog_s3tables/latest) for the full API surface. diff --git a/crates/catalog/sql/DEPENDENCIES.rust.tsv b/crates/catalog/sql/DEPENDENCIES.rust.tsv index 335c980967..cc48621d0c 100644 --- a/crates/catalog/sql/DEPENDENCIES.rust.tsv +++ b/crates/catalog/sql/DEPENDENCIES.rust.tsv @@ -1,49 +1,47 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X allocator-api2@0.2.21 X X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -51,7 +49,7 @@ const-random@0.1.18 X X const-random-macro@0.1.16 X X core-foundation-sys@0.8.7 X X cpufeatures@0.2.17 X X -crc@3.3.0 X X +crc@3.4.0 X X crc-catalog@2.4.0 X X crc32c@0.6.8 X X crc32fast@1.5.0 X X @@ -60,7 +58,7 @@ crossbeam-epoch@0.9.18 X X crossbeam-queue@0.3.12 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -79,9 +77,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X flume@0.11.1 X X fnv@1.0.7 X X foldhash@0.1.5 X @@ -96,105 +94,99 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X -half@2.6.0 X X +half@2.7.1 X X hashbrown@0.15.5 X X +hashbrown@0.16.1 X X hashlink@0.10.0 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-sql@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-sql@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X libsqlite3-sys@0.30.1 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -202,141 +194,142 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X +socket2@0.6.1 X X spin@0.9.8 X sqlx@0.8.6 X X sqlx-core@0.8.6 X X sqlx-sqlite@0.8.6 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X tokio-stream@0.1.17 X -tokio-util@0.7.16 X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X vcpkg@0.2.15 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X +web-sys@0.3.83 X X webpki-roots@0.26.11 X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs index 77b35a228f..8209cd04c1 100644 --- a/crates/catalog/sql/src/catalog.rs +++ b/crates/catalog/sql/src/catalog.rs @@ -917,11 +917,55 @@ impl Catalog for SqlCatalog { .build()?) } - async fn update_table(&self, _commit: TableCommit) -> Result { - Err(Error::new( - ErrorKind::FeatureUnsupported, - "Updating a table is not supported yet", - )) + /// Updates an existing table within the SQL catalog. + async fn update_table(&self, commit: TableCommit) -> Result
{ + let table_ident = commit.identifier().clone(); + let current_table = self.load_table(&table_ident).await?; + let current_metadata_location = current_table.metadata_location_result()?.to_string(); + + let staged_table = commit.apply(current_table)?; + let staged_metadata_location = staged_table.metadata_location_result()?; + + staged_table + .metadata() + .write_to(staged_table.file_io(), &staged_metadata_location) + .await?; + + let update_result = self + .execute( + &format!( + "UPDATE {CATALOG_TABLE_NAME} + SET {CATALOG_FIELD_METADATA_LOCATION_PROP} = ?, {CATALOG_FIELD_PREVIOUS_METADATA_LOCATION_PROP} = ? + WHERE {CATALOG_FIELD_CATALOG_NAME} = ? + AND {CATALOG_FIELD_TABLE_NAME} = ? + AND {CATALOG_FIELD_TABLE_NAMESPACE} = ? + AND ( + {CATALOG_FIELD_RECORD_TYPE} = '{CATALOG_FIELD_TABLE_RECORD_TYPE}' + OR {CATALOG_FIELD_RECORD_TYPE} IS NULL + ) + AND {CATALOG_FIELD_METADATA_LOCATION_PROP} = ?" + ), + vec![ + Some(staged_metadata_location), + Some(current_metadata_location.as_str()), + Some(&self.name), + Some(table_ident.name()), + Some(&table_ident.namespace().join(".")), + Some(current_metadata_location.as_str()), + ], + None, + ) + .await?; + + if update_result.rows_affected() == 0 { + return Err(Error::new( + ErrorKind::CatalogCommitConflicts, + format!("Commit conflicted for table: {table_ident}"), + ) + .with_retryable(true)); + } + + Ok(staged_table) } } @@ -932,6 +976,7 @@ mod tests { use iceberg::spec::{NestedField, PartitionSpec, PrimitiveType, Schema, SortOrder, Type}; use iceberg::table::Table; + use iceberg::transaction::{ApplyTransactionAction, Transaction}; use iceberg::{Catalog, CatalogBuilder, Namespace, NamespaceIdent, TableCreation, TableIdent}; use itertools::Itertools; use regex::Regex; @@ -2293,4 +2338,56 @@ mod tests { assert_eq!(table.identifier(), expected_table.identifier()); assert_eq!(table.metadata_location(), Some(metadata_location.as_str())); } + + #[tokio::test] + async fn test_update_table() { + let warehouse_loc = temp_path(); + let catalog = new_sql_catalog(warehouse_loc).await; + + // Create a test namespace and table + let namespace_ident = NamespaceIdent::new("ns1".into()); + create_namespace(&catalog, &namespace_ident).await; + let table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into()); + create_table(&catalog, &table_ident).await; + + let table = catalog.load_table(&table_ident).await.unwrap(); + + // Store the original metadata location for comparison + let original_metadata_location = table.metadata_location().unwrap().to_string(); + + // Create a transaction to update the table + let tx = Transaction::new(&table); + let tx = tx + .update_table_properties() + .set("test_property".to_string(), "test_value".to_string()) + .apply(tx) + .unwrap(); + + // Commit the transaction to the catalog + let updated_table = tx.commit(&catalog).await.unwrap(); + + // Verify the update was successful + assert_eq!( + updated_table.metadata().properties().get("test_property"), + Some(&"test_value".to_string()) + ); + // Verify the metadata location has been updated + assert_ne!( + updated_table.metadata_location().unwrap(), + original_metadata_location.as_str() + ); + + // Load the table again from the catalog to verify changes were persisted + let reloaded = catalog.load_table(&table_ident).await.unwrap(); + + // Verify the reloaded table matches the updated table + assert_eq!( + reloaded.metadata().properties().get("test_property"), + Some(&"test_value".to_string()) + ); + assert_eq!( + reloaded.metadata_location(), + updated_table.metadata_location() + ); + } } diff --git a/crates/examples/DEPENDENCIES.rust.tsv b/crates/examples/DEPENDENCIES.rust.tsv index 20702597a7..de07f1c650 100644 --- a/crates/examples/DEPENDENCIES.rust.tsv +++ b/crates/examples/DEPENDENCIES.rust.tsv @@ -1,48 +1,46 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -56,7 +54,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -75,9 +73,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -89,106 +87,99 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg-examples@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg-examples@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -196,135 +187,136 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 895a5cf5e4..6f1332a444 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -29,7 +29,7 @@ license = { workspace = true } repository = { workspace = true } [features] -default = ["storage-memory", "storage-fs", "storage-s3", "tokio"] +default = ["storage-memory", "storage-fs", "storage-s3"] storage-all = ["storage-memory", "storage-fs", "storage-s3", "storage-gcs"] storage-azdls = ["opendal/services-azdls"] @@ -39,8 +39,6 @@ storage-memory = ["opendal/services-memory"] storage-oss = ["opendal/services-oss"] storage-s3 = ["opendal/services-s3", "reqsign"] -smol = ["dep:smol"] -tokio = ["tokio/rt-multi-thread"] [dependencies] anyhow = { workspace = true } @@ -85,9 +83,7 @@ serde_derive = { workspace = true } serde_json = { workspace = true } serde_repr = { workspace = true } serde_with = { workspace = true } -smol = { workspace = true, optional = true } strum = { workspace = true, features = ["derive"] } -thrift = { workspace = true } tokio = { workspace = true, optional = false, features = ["sync"] } typed-builder = { workspace = true } url = { workspace = true } diff --git a/crates/iceberg/DEPENDENCIES.rust.tsv b/crates/iceberg/DEPENDENCIES.rust.tsv index 460a531644..c446954437 100644 --- a/crates/iceberg/DEPENDENCIES.rust.tsv +++ b/crates/iceberg/DEPENDENCIES.rust.tsv @@ -1,48 +1,46 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -56,7 +54,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -70,13 +68,14 @@ digest@0.10.7 X X displaydoc@0.2.5 X X dissimilar@1.0.10 X either@1.15.0 X X +equivalent@1.0.2 X X event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -88,101 +87,94 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -190,134 +182,135 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/iceberg/src/arrow/caching_delete_file_loader.rs b/crates/iceberg/src/arrow/caching_delete_file_loader.rs index 192ca390a8..5d0b1da712 100644 --- a/crates/iceberg/src/arrow/caching_delete_file_loader.rs +++ b/crates/iceberg/src/arrow/caching_delete_file_loader.rs @@ -23,7 +23,7 @@ use arrow_array::{Array, ArrayRef, Int64Array, StringArray, StructArray}; use futures::{StreamExt, TryStreamExt}; use tokio::sync::oneshot::{Receiver, channel}; -use super::delete_filter::DeleteFilter; +use super::delete_filter::{DeleteFilter, PosDelLoadAction}; use crate::arrow::delete_file_loader::BasicDeleteFileLoader; use crate::arrow::{arrow_primitive_to_literal, arrow_schema_to_schema}; use crate::delete_vector::DeleteVector; @@ -42,13 +42,20 @@ use crate::{Error, ErrorKind, Result}; pub(crate) struct CachingDeleteFileLoader { basic_delete_file_loader: BasicDeleteFileLoader, concurrency_limit_data_files: usize, + /// Shared filter state to allow caching loaded deletes across multiple + /// calls to `load_deletes` (e.g., across multiple file scan tasks). + delete_filter: DeleteFilter, } // Intermediate context during processing of a delete file task. enum DeleteFileContext { // TODO: Delete Vector loader from Puffin files ExistingEqDel, - PosDels(ArrowRecordBatchStream), + ExistingPosDel, + PosDels { + file_path: String, + stream: ArrowRecordBatchStream, + }, FreshEqDel { batch_stream: ArrowRecordBatchStream, equality_ids: HashSet, @@ -59,8 +66,12 @@ enum DeleteFileContext { // Final result of the processing of a delete file task before // results are fully merged into the DeleteFileManager's state enum ParsedDeleteFileContext { - DelVecs(HashMap), + DelVecs { + file_path: String, + results: HashMap, + }, EqDel, + ExistingPosDel, } #[allow(unused_variables)] @@ -69,6 +80,7 @@ impl CachingDeleteFileLoader { CachingDeleteFileLoader { basic_delete_file_loader: BasicDeleteFileLoader::new(file_io), concurrency_limit_data_files, + delete_filter: DeleteFilter::default(), } } @@ -142,7 +154,6 @@ impl CachingDeleteFileLoader { schema: SchemaRef, ) -> Receiver> { let (tx, rx) = channel(); - let del_filter = DeleteFilter::default(); let stream_items = delete_file_entries .iter() @@ -150,14 +161,14 @@ impl CachingDeleteFileLoader { ( t.clone(), self.basic_delete_file_loader.clone(), - del_filter.clone(), + self.delete_filter.clone(), schema.clone(), ) }) .collect::>(); let task_stream = futures::stream::iter(stream_items); - let del_filter = del_filter.clone(); + let del_filter = self.delete_filter.clone(); let concurrency_limit_data_files = self.concurrency_limit_data_files; let basic_delete_file_loader = self.basic_delete_file_loader.clone(); crate::runtime::spawn(async move { @@ -165,7 +176,7 @@ impl CachingDeleteFileLoader { let mut del_filter = del_filter; let basic_delete_file_loader = basic_delete_file_loader.clone(); - let results: Vec = task_stream + let mut results_stream = task_stream .map(move |(task, file_io, del_filter, schema)| { let basic_delete_file_loader = basic_delete_file_loader.clone(); async move { @@ -181,15 +192,16 @@ impl CachingDeleteFileLoader { .map(move |ctx| { Ok(async { Self::parse_file_content_for_task(ctx.await?).await }) }) - .try_buffer_unordered(concurrency_limit_data_files) - .try_collect::>() - .await?; + .try_buffer_unordered(concurrency_limit_data_files); - for item in results { - if let ParsedDeleteFileContext::DelVecs(hash_map) = item { - for (data_file_path, delete_vector) in hash_map.into_iter() { + while let Some(item) = results_stream.next().await { + let item = item?; + if let ParsedDeleteFileContext::DelVecs { file_path, results } = item { + for (data_file_path, delete_vector) in results.into_iter() { del_filter.upsert_delete_vector(data_file_path, delete_vector); } + // Mark the positional delete file as fully loaded so waiters can proceed + del_filter.finish_pos_del_load(&file_path); } } @@ -210,11 +222,24 @@ impl CachingDeleteFileLoader { schema: SchemaRef, ) -> Result { match task.file_type { - DataContentType::PositionDeletes => Ok(DeleteFileContext::PosDels( - basic_delete_file_loader - .parquet_to_batch_stream(&task.file_path) - .await?, - )), + DataContentType::PositionDeletes => { + match del_filter.try_start_pos_del_load(&task.file_path) { + PosDelLoadAction::AlreadyLoaded => Ok(DeleteFileContext::ExistingPosDel), + PosDelLoadAction::WaitFor(notify) => { + // Positional deletes are accessed synchronously by ArrowReader. + // We must wait here to ensure the data is ready before returning, + // otherwise ArrowReader might get an empty/partial result. + notify.notified().await; + Ok(DeleteFileContext::ExistingPosDel) + } + PosDelLoadAction::Load => Ok(DeleteFileContext::PosDels { + file_path: task.file_path.clone(), + stream: basic_delete_file_loader + .parquet_to_batch_stream(&task.file_path) + .await?, + }), + } + } DataContentType::EqualityDeletes => { let Some(notify) = del_filter.try_start_eq_del_load(&task.file_path) else { @@ -255,10 +280,13 @@ impl CachingDeleteFileLoader { ) -> Result { match ctx { DeleteFileContext::ExistingEqDel => Ok(ParsedDeleteFileContext::EqDel), - DeleteFileContext::PosDels(batch_stream) => { - let del_vecs = - Self::parse_positional_deletes_record_batch_stream(batch_stream).await?; - Ok(ParsedDeleteFileContext::DelVecs(del_vecs)) + DeleteFileContext::ExistingPosDel => Ok(ParsedDeleteFileContext::ExistingPosDel), + DeleteFileContext::PosDels { file_path, stream } => { + let del_vecs = Self::parse_positional_deletes_record_batch_stream(stream).await?; + Ok(ParsedDeleteFileContext::DelVecs { + file_path, + results: del_vecs, + }) } DeleteFileContext::FreshEqDel { sender, @@ -330,7 +358,7 @@ impl CachingDeleteFileLoader { mut stream: ArrowRecordBatchStream, equality_ids: HashSet, ) -> Result { - let mut result_predicate = AlwaysTrue; + let mut row_predicates = Vec::new(); let mut batch_schema_iceberg: Option = None; let accessor = EqDelRecordBatchPartnerAccessor; @@ -374,10 +402,29 @@ impl CachingDeleteFileLoader { row_predicate = row_predicate.and(cell_predicate) } } - result_predicate = result_predicate.and(row_predicate.not()); + row_predicates.push(row_predicate.not().rewrite_not()); + } + } + + // All row predicates are combined to a single predicate by creating a balanced binary tree. + // Using a simple fold would result in a deeply nested predicate that can cause a stack overflow. + while row_predicates.len() > 1 { + let mut next_level = Vec::with_capacity(row_predicates.len().div_ceil(2)); + let mut iter = row_predicates.into_iter(); + while let Some(p1) = iter.next() { + if let Some(p2) = iter.next() { + next_level.push(p1.and(p2)); + } else { + next_level.push(p1); + } } + row_predicates = next_level; + } + + match row_predicates.pop() { + Some(p) => Ok(p), + None => Ok(AlwaysTrue), } - Ok(result_predicate.rewrite_not()) } } @@ -892,6 +939,7 @@ mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; // Load the deletes - should handle both types without error @@ -912,4 +960,90 @@ mod tests { result.err() ); } + + #[tokio::test] + async fn test_large_equality_delete_batch_stack_overflow() { + let tmp_dir = TempDir::new().unwrap(); + let table_location = tmp_dir.path().as_os_str().to_str().unwrap(); + let file_io = FileIO::from_path(table_location).unwrap().build().unwrap(); + + // Create a large batch of equality deletes + let num_rows = 20_000; + let col_y_vals: Vec = (0..num_rows).collect(); + let col_y = Arc::new(Int64Array::from(col_y_vals)) as ArrayRef; + + let schema = Arc::new(arrow_schema::Schema::new(vec![ + Field::new("y", arrow_schema::DataType::Int64, false).with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "2".to_string(), + )])), + ])); + + let record_batch = RecordBatch::try_new(schema.clone(), vec![col_y]).unwrap(); + + // Write to file + let path = format!("{}/large-eq-deletes.parquet", &table_location); + let file = File::create(&path).unwrap(); + let props = WriterProperties::builder() + .set_compression(Compression::SNAPPY) + .build(); + let mut writer = ArrowWriter::try_new(file, schema, Some(props)).unwrap(); + writer.write(&record_batch).unwrap(); + writer.close().unwrap(); + + let basic_delete_file_loader = BasicDeleteFileLoader::new(file_io.clone()); + let record_batch_stream = basic_delete_file_loader + .parquet_to_batch_stream(&path) + .await + .expect("could not get batch stream"); + + let eq_ids = HashSet::from_iter(vec![2]); + + let result = CachingDeleteFileLoader::parse_equality_deletes_record_batch_stream( + record_batch_stream, + eq_ids, + ) + .await; + + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_caching_delete_file_loader_caches_results() { + let tmp_dir = TempDir::new().unwrap(); + let table_location = tmp_dir.path(); + let file_io = FileIO::from_path(table_location.as_os_str().to_str().unwrap()) + .unwrap() + .build() + .unwrap(); + + let delete_file_loader = CachingDeleteFileLoader::new(file_io.clone(), 10); + + let file_scan_tasks = setup(table_location); + + // Load deletes for the first time + let delete_filter_1 = delete_file_loader + .load_deletes(&file_scan_tasks[0].deletes, file_scan_tasks[0].schema_ref()) + .await + .unwrap() + .unwrap(); + + // Load deletes for the second time (same task/files) + let delete_filter_2 = delete_file_loader + .load_deletes(&file_scan_tasks[0].deletes, file_scan_tasks[0].schema_ref()) + .await + .unwrap() + .unwrap(); + + let dv1 = delete_filter_1 + .get_delete_vector(&file_scan_tasks[0]) + .unwrap(); + let dv2 = delete_filter_2 + .get_delete_vector(&file_scan_tasks[0]) + .unwrap(); + + // Verify that the delete vectors point to the same memory location, + // confirming that the second load reused the result from the first. + assert!(Arc::ptr_eq(&dv1, &dv2)); + } } diff --git a/crates/iceberg/src/arrow/delete_filter.rs b/crates/iceberg/src/arrow/delete_filter.rs index 14b5124ee6..4af9f6b6ff 100644 --- a/crates/iceberg/src/arrow/delete_filter.rs +++ b/crates/iceberg/src/arrow/delete_filter.rs @@ -34,10 +34,23 @@ enum EqDelState { Loaded(Predicate), } +/// State tracking for positional delete files. +/// Unlike equality deletes, positional deletes must be fully loaded before +/// the ArrowReader proceeds because retrieval is synchronous and non-blocking. +#[derive(Debug)] +enum PosDelState { + /// The file is currently being loaded by a task. + /// The notifier allows other tasks to wait for completion. + Loading(Arc), + /// The file has been fully loaded and merged into the delete vector map. + Loaded, +} + #[derive(Debug, Default)] struct DeleteFileFilterState { delete_vectors: HashMap>>, equality_deletes: HashMap, + positional_deletes: HashMap, } #[derive(Clone, Debug, Default)] @@ -45,6 +58,18 @@ pub(crate) struct DeleteFilter { state: Arc>, } +/// Action to take when trying to start loading a positional delete file +pub(crate) enum PosDelLoadAction { + /// The file is not loaded, the caller should load it. + Load, + /// The file is already loaded, nothing to do. + AlreadyLoaded, + /// The file is currently being loaded by another task. + /// The caller *must* wait for this notifier to ensure data availability + /// before returning, as subsequent access (get_delete_vector) is synchronous. + WaitFor(Arc), +} + impl DeleteFilter { /// Retrieve a delete vector for the data file associated with a given file scan task pub(crate) fn get_delete_vector( @@ -57,12 +82,12 @@ impl DeleteFilter { /// Retrieve a delete vector for a data file pub(crate) fn get_delete_vector_for_path( &self, - delete_file_path: &str, + data_file_path: &str, ) -> Option>> { self.state .read() .ok() - .and_then(|st| st.delete_vectors.get(delete_file_path).cloned()) + .and_then(|st| st.delete_vectors.get(data_file_path).cloned()) } pub(crate) fn try_start_eq_del_load(&self, file_path: &str) -> Option> { @@ -82,6 +107,47 @@ impl DeleteFilter { Some(notifier) } + /// Attempts to mark a positional delete file as "loading". + /// + /// Returns an action dictating whether the caller should load the file, + /// wait for another task to load it, or do nothing. + pub(crate) fn try_start_pos_del_load(&self, file_path: &str) -> PosDelLoadAction { + let mut state = self.state.write().unwrap(); + + if let Some(state) = state.positional_deletes.get(file_path) { + match state { + PosDelState::Loaded => return PosDelLoadAction::AlreadyLoaded, + PosDelState::Loading(notify) => return PosDelLoadAction::WaitFor(notify.clone()), + } + } + + let notifier = Arc::new(Notify::new()); + state + .positional_deletes + .insert(file_path.to_string(), PosDelState::Loading(notifier)); + + PosDelLoadAction::Load + } + + /// Marks a positional delete file as successfully loaded and notifies any waiting tasks. + pub(crate) fn finish_pos_del_load(&self, file_path: &str) { + let notify = { + let mut state = self.state.write().unwrap(); + if let Some(PosDelState::Loading(notify)) = state + .positional_deletes + .insert(file_path.to_string(), PosDelState::Loaded) + { + Some(notify) + } else { + None + } + }; + + if let Some(notify) = notify { + notify.notify_waiters(); + } + } + /// Retrieve the equality delete predicate for a given eq delete file path pub(crate) async fn get_equality_delete_predicate_for_delete_file_path( &self, @@ -141,8 +207,8 @@ impl DeleteFilter { return Ok(None); } - // TODO: handle case-insensitive case - let bound_predicate = combined_predicate.bind(file_scan_task.schema.clone(), false)?; + let bound_predicate = combined_predicate + .bind(file_scan_task.schema.clone(), file_scan_task.case_sensitive)?; Ok(Some(bound_predicate)) } @@ -211,8 +277,9 @@ pub(crate) mod tests { use super::*; use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader; + use crate::expr::Reference; use crate::io::FileIO; - use crate::spec::{DataFileFormat, Schema}; + use crate::spec::{DataFileFormat, Datum, NestedField, PrimitiveType, Schema, Type}; type ArrowSchemaRef = Arc; @@ -344,6 +411,7 @@ pub(crate) mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }, FileScanTask { start: 0, @@ -358,6 +426,7 @@ pub(crate) mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }, ]; @@ -380,4 +449,57 @@ pub(crate) mod tests { ]; Arc::new(arrow_schema::Schema::new(fields)) } + + #[tokio::test] + async fn test_build_equality_delete_predicate_case_sensitive() { + let schema = Arc::new( + Schema::builder() + .with_schema_id(1) + .with_fields(vec![ + NestedField::required(1, "Id", Type::Primitive(PrimitiveType::Long)).into(), + ]) + .build() + .unwrap(), + ); + + // ---------- fake FileScanTask ---------- + let task = FileScanTask { + start: 0, + length: 0, + record_count: None, + data_file_path: "data.parquet".to_string(), + data_file_format: crate::spec::DataFileFormat::Parquet, + schema: schema.clone(), + project_field_ids: vec![], + predicate: None, + deletes: vec![FileScanTaskDeleteFile { + file_path: "eq-del.parquet".to_string(), + file_type: DataContentType::EqualityDeletes, + partition_spec_id: 0, + equality_ids: None, + }], + partition: None, + partition_spec: None, + name_mapping: None, + case_sensitive: true, + }; + + let filter = DeleteFilter::default(); + + // ---------- insert equality delete predicate ---------- + let pred = Reference::new("id").equal_to(Datum::long(10)); + + let (tx, rx) = tokio::sync::oneshot::channel(); + filter.insert_equality_delete("eq-del.parquet", rx); + + tx.send(pred).unwrap(); + + // ---------- should FAIL ---------- + let result = filter.build_equality_delete_predicate(&task).await; + + assert!( + result.is_err(), + "case_sensitive=true should fail when column case mismatches" + ); + } } diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index ab5a96f751..f7f90663a5 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -54,6 +54,7 @@ use crate::expr::visitors::page_index_evaluator::PageIndexEvaluator; use crate::expr::visitors::row_group_metrics_evaluator::RowGroupMetricsEvaluator; use crate::expr::{BoundPredicate, BoundReference}; use crate::io::{FileIO, FileMetadata, FileRead}; +use crate::metadata_columns::{RESERVED_FIELD_ID_FILE, is_metadata_field}; use crate::scan::{ArrowRecordBatchStream, FileScanTask, FileScanTaskStream}; use crate::spec::{Datum, NameMapping, NestedField, PrimitiveType, Schema, Type}; use crate::utils::available_parallelism; @@ -250,12 +251,20 @@ impl ArrowReader { initial_stream_builder }; + // Filter out metadata fields for Parquet projection (they don't exist in files) + let project_field_ids_without_metadata: Vec = task + .project_field_ids + .iter() + .filter(|&&id| !is_metadata_field(id)) + .copied() + .collect(); + // Create projection mask based on field IDs // - If file has embedded IDs: field-ID-based projection (missing_field_ids=false) // - If name mapping applied: field-ID-based projection (missing_field_ids=true but IDs now match) // - If fallback IDs: position-based projection (missing_field_ids=true) let projection_mask = Self::get_arrow_projection_mask( - &task.project_field_ids, + &project_field_ids_without_metadata, &task.schema, record_batch_stream_builder.parquet_schema(), record_batch_stream_builder.schema(), @@ -266,16 +275,23 @@ impl ArrowReader { record_batch_stream_builder.with_projection(projection_mask.clone()); // RecordBatchTransformer performs any transformations required on the RecordBatches - // that come back from the file, such as type promotion, default column insertion - // and column re-ordering. + // that come back from the file, such as type promotion, default column insertion, + // column re-ordering, partition constants, and virtual field addition (like _file) let mut record_batch_transformer_builder = RecordBatchTransformerBuilder::new(task.schema_ref(), task.project_field_ids()); + // Add the _file metadata column if it's in the projected fields + if task.project_field_ids().contains(&RESERVED_FIELD_ID_FILE) { + let file_datum = Datum::string(task.data_file_path.clone()); + record_batch_transformer_builder = + record_batch_transformer_builder.with_constant(RESERVED_FIELD_ID_FILE, file_datum); + } + if let (Some(partition_spec), Some(partition_data)) = (task.partition_spec.clone(), task.partition.clone()) { record_batch_transformer_builder = - record_batch_transformer_builder.with_partition(partition_spec, partition_data); + record_batch_transformer_builder.with_partition(partition_spec, partition_data)?; } let mut record_batch_transformer = record_batch_transformer_builder.build(); @@ -416,7 +432,10 @@ impl ArrowReader { record_batch_stream_builder .build()? .map(move |batch| match batch { - Ok(batch) => record_batch_transformer.process_record_batch(batch), + Ok(batch) => { + // Process the record batch (type promotion, column reordering, virtual fields, etc.) + record_batch_transformer.process_record_batch(batch) + } Err(err) => Err(err.into()), }); @@ -485,10 +504,10 @@ impl ArrowReader { // we need to call next() to update the cache with the newly positioned value. delete_vector_iter.advance_to(next_row_group_base_idx); // Only update the cache if the cached value is stale (in the skipped range) - if let Some(cached_idx) = next_deleted_row_idx_opt { - if cached_idx < next_row_group_base_idx { - next_deleted_row_idx_opt = delete_vector_iter.next(); - } + if let Some(cached_idx) = next_deleted_row_idx_opt + && cached_idx < next_row_group_base_idx + { + next_deleted_row_idx_opt = delete_vector_iter.next(); } // still increment the current page base index but then skip to the next row group @@ -842,10 +861,10 @@ impl ArrowReader { }; // If all row groups were filtered out, return an empty RowSelection (select no rows) - if let Some(selected_row_groups) = selected_row_groups { - if selected_row_groups.is_empty() { - return Ok(RowSelection::from(Vec::new())); - } + if let Some(selected_row_groups) = selected_row_groups + && selected_row_groups.is_empty() + { + return Ok(RowSelection::from(Vec::new())); } let mut selected_row_groups_idx = 0; @@ -878,10 +897,10 @@ impl ArrowReader { results.push(selections_for_page); - if let Some(selected_row_groups) = selected_row_groups { - if selected_row_groups_idx == selected_row_groups.len() { - break; - } + if let Some(selected_row_groups) = selected_row_groups + && selected_row_groups_idx == selected_row_groups.len() + { + break; } } @@ -1012,14 +1031,13 @@ fn apply_name_mapping_to_arrow_schema( let mut metadata = field.metadata().clone(); - if let Some(mapped_field) = mapped_field_opt { - if let Some(field_id) = mapped_field.field_id() { - // Field found in mapping with a field_id → assign it - metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string()); - } - // If field_id is None, leave the field without an ID (will be filtered by projection) + if let Some(mapped_field) = mapped_field_opt + && let Some(field_id) = mapped_field.field_id() + { + // Field found in mapping with a field_id → assign it + metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string()); } - // If field not found in mapping, leave it without an ID (will be filtered by projection) + // If field_id is None, leave the field without an ID (will be filtered by projection) Field::new(field.name(), field.data_type().clone(), field.is_nullable()) .with_metadata(metadata) @@ -1896,7 +1914,7 @@ message schema { assert_eq!(err.kind(), ErrorKind::DataInvalid); assert_eq!( err.to_string(), - "DataInvalid => Unsupported Arrow data type: Duration(Microsecond)".to_string() + "DataInvalid => Unsupported Arrow data type: Duration(µs)".to_string() ); // Omitting field c2, we still get an error due to c3 being selected @@ -2064,6 +2082,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -2122,7 +2141,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props.clone())).unwrap(); @@ -2303,7 +2322,7 @@ message schema { let tmp_dir = TempDir::new().unwrap(); let table_location = tmp_dir.path().to_str().unwrap().to_string(); - let file_path = format!("{}/multi_row_group.parquet", &table_location); + let file_path = format!("{table_location}/multi_row_group.parquet"); // Force each batch into its own row group for testing byte range filtering. let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from( @@ -2385,6 +2404,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; // Task 2: read the second and third row groups @@ -2401,6 +2421,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; let tasks1 = Box::pin(futures::stream::iter(vec![Ok(task1)])) as FileScanTaskStream; @@ -2507,7 +2528,7 @@ message schema { let props = WriterProperties::builder() .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/old_file.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/old_file.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); writer.close().unwrap(); @@ -2528,6 +2549,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -2613,7 +2635,7 @@ message schema { // Step 1: Create data file with 200 rows in 2 row groups // Row group 0: rows 0-99 (ids 1-100) // Row group 1: rows 100-199 (ids 101-200) - let data_file_path = format!("{}/data.parquet", &table_location); + let data_file_path = format!("{table_location}/data.parquet"); let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( Int32Array::from_iter_values(1..=100), @@ -2647,7 +2669,7 @@ message schema { ); // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1) - let delete_file_path = format!("{}/deletes.parquet", &table_location); + let delete_file_path = format!("{table_location}/deletes.parquet"); let delete_schema = Arc::new(ArrowSchema::new(vec![ Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([( @@ -2699,6 +2721,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream; @@ -2712,15 +2735,14 @@ message schema { // Step 4: Verify we got 199 rows (not 200) let total_rows: usize = result.iter().map(|b| b.num_rows()).sum(); - println!("Total rows read: {}", total_rows); + println!("Total rows read: {total_rows}"); println!("Expected: 199 rows (deleted row 199 which had id=200)"); // This assertion will FAIL before the fix and PASS after the fix assert_eq!( total_rows, 199, - "Expected 199 rows after deleting row 199, but got {} rows. \ - The bug causes position deletes in later row groups to be ignored.", - total_rows + "Expected 199 rows after deleting row 199, but got {total_rows} rows. \ + The bug causes position deletes in later row groups to be ignored." ); // Verify the deleted row (id=200) is not present @@ -2807,7 +2829,7 @@ message schema { // Step 1: Create data file with 200 rows in 2 row groups // Row group 0: rows 0-99 (ids 1-100) // Row group 1: rows 100-199 (ids 101-200) - let data_file_path = format!("{}/data.parquet", &table_location); + let data_file_path = format!("{table_location}/data.parquet"); let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( Int32Array::from_iter_values(1..=100), @@ -2841,7 +2863,7 @@ message schema { ); // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1) - let delete_file_path = format!("{}/deletes.parquet", &table_location); + let delete_file_path = format!("{table_location}/deletes.parquet"); let delete_schema = Arc::new(ArrowSchema::new(vec![ Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([( @@ -2917,6 +2939,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream; @@ -2931,16 +2954,15 @@ message schema { // Row group 1 has 100 rows (ids 101-200), minus 1 delete (id=200) = 99 rows let total_rows: usize = result.iter().map(|b| b.num_rows()).sum(); - println!("Total rows read from row group 1: {}", total_rows); + println!("Total rows read from row group 1: {total_rows}"); println!("Expected: 99 rows (row group 1 has 100 rows, 1 delete at position 199)"); // This assertion will FAIL before the fix and PASS after the fix assert_eq!( total_rows, 99, - "Expected 99 rows from row group 1 after deleting position 199, but got {} rows. \ + "Expected 99 rows from row group 1 after deleting position 199, but got {total_rows} rows. \ The bug causes position deletes to be lost when advance_to() is followed by next() \ - when skipping unselected row groups.", - total_rows + when skipping unselected row groups." ); // Verify the deleted row (id=200) is not present @@ -3029,7 +3051,7 @@ message schema { // Step 1: Create data file with 200 rows in 2 row groups // Row group 0: rows 0-99 (ids 1-100) // Row group 1: rows 100-199 (ids 101-200) - let data_file_path = format!("{}/data.parquet", &table_location); + let data_file_path = format!("{table_location}/data.parquet"); let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( Int32Array::from_iter_values(1..=100), @@ -3063,7 +3085,7 @@ message schema { ); // Step 2: Create position delete file that deletes row 0 (id=1, first row in row group 0) - let delete_file_path = format!("{}/deletes.parquet", &table_location); + let delete_file_path = format!("{table_location}/deletes.parquet"); let delete_schema = Arc::new(ArrowSchema::new(vec![ Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([( @@ -3128,6 +3150,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream; @@ -3209,7 +3232,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3222,7 +3245,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -3231,6 +3254,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3306,7 +3330,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3319,7 +3343,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 3], @@ -3328,6 +3352,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3392,7 +3417,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3405,7 +3430,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2, 3], @@ -3414,6 +3439,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3480,7 +3506,7 @@ message schema { .set_max_row_group_size(2) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap(); // Write 6 rows in 3 batches (will create 3 row groups) @@ -3505,7 +3531,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -3514,6 +3540,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3546,7 +3573,7 @@ message schema { assert_eq!(all_values.len(), 6); for i in 0..6 { - assert_eq!(all_names[i], format!("name_{}", i)); + assert_eq!(all_names[i], format!("name_{i}")); assert_eq!(all_values[i], i as i32); } } @@ -3621,7 +3648,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3634,7 +3661,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -3643,6 +3670,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3718,7 +3746,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); writer.close().unwrap(); @@ -3730,7 +3758,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 5, 2], @@ -3739,6 +3767,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3820,7 +3849,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); writer.close().unwrap(); @@ -3839,7 +3868,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2, 3], @@ -3848,6 +3877,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3978,7 +4008,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/data.parquet", table_location), + data_file_path: format!("{table_location}/data.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -3987,6 +4017,7 @@ message schema { partition: Some(partition_data), partition_spec: Some(partition_spec), name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; diff --git a/crates/iceberg/src/arrow/record_batch_projector.rs b/crates/iceberg/src/arrow/record_batch_projector.rs index 45de0212e8..7028eee961 100644 --- a/crates/iceberg/src/arrow/record_batch_projector.rs +++ b/crates/iceberg/src/arrow/record_batch_projector.rs @@ -133,25 +133,24 @@ impl RecordBatchProjector { { for (pos, field) in fields.iter().enumerate() { let id = field_id_fetch_func(field)?; - if let Some(id) = id { - if target_field_id == id { - index_vec.push(pos); - return Ok(Some(field.clone())); - } + if let Some(id) = id + && target_field_id == id + { + index_vec.push(pos); + return Ok(Some(field.clone())); } - if let DataType::Struct(inner) = field.data_type() { - if searchable_field_func(field) { - if let Some(res) = Self::fetch_field_index( - inner, - index_vec, - target_field_id, - field_id_fetch_func, - searchable_field_func, - )? { - index_vec.push(pos); - return Ok(Some(res)); - } - } + if let DataType::Struct(inner) = field.data_type() + && searchable_field_func(field) + && let Some(res) = Self::fetch_field_index( + inner, + index_vec, + target_field_id, + field_id_fetch_func, + searchable_field_func, + )? + { + index_vec.push(pos); + return Ok(Some(res)); } } Ok(None) diff --git a/crates/iceberg/src/arrow/record_batch_transformer.rs b/crates/iceberg/src/arrow/record_batch_transformer.rs index a20adb6a5a..439358435c 100644 --- a/crates/iceberg/src/arrow/record_batch_transformer.rs +++ b/crates/iceberg/src/arrow/record_batch_transformer.rs @@ -19,24 +19,23 @@ use std::collections::HashMap; use std::sync::Arc; use arrow_array::{ - Array as ArrowArray, ArrayRef, BinaryArray, BooleanArray, Date32Array, Float32Array, - Float64Array, Int32Array, Int64Array, NullArray, RecordBatch, RecordBatchOptions, StringArray, - StructArray, + Array as ArrowArray, ArrayRef, Int32Array, RecordBatch, RecordBatchOptions, RunArray, }; -use arrow_buffer::NullBuffer; use arrow_cast::cast; use arrow_schema::{ - DataType, FieldRef, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, SchemaRef, + DataType, Field, FieldRef, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, SchemaRef, }; use parquet::arrow::PARQUET_FIELD_ID_META_KEY; -use crate::arrow::schema_to_arrow_schema; +use crate::arrow::value::{create_primitive_array_repeated, create_primitive_array_single_element}; +use crate::arrow::{datum_to_arrow_type_with_ree, schema_to_arrow_schema}; +use crate::metadata_columns::get_metadata_field; use crate::spec::{ - Literal, PartitionSpec, PrimitiveLiteral, Schema as IcebergSchema, Struct, Transform, + Datum, Literal, PartitionSpec, PrimitiveLiteral, Schema as IcebergSchema, Struct, Transform, }; use crate::{Error, ErrorKind, Result}; -/// Build a map of field ID to constant value for identity-partitioned fields. +/// Build a map of field ID to constant value (as Datum) for identity-partitioned fields. /// /// Implements Iceberg spec "Column Projection" rule #1: use partition metadata constants /// only for identity-transformed fields. Non-identity transforms (bucket, truncate, year, etc.) @@ -53,20 +52,61 @@ use crate::{Error, ErrorKind, Result}; fn constants_map( partition_spec: &PartitionSpec, partition_data: &Struct, -) -> HashMap { + schema: &IcebergSchema, +) -> Result> { let mut constants = HashMap::new(); for (pos, field) in partition_spec.fields().iter().enumerate() { // Only identity transforms should use constant values from partition metadata if matches!(field.transform, Transform::Identity) { + // Get the field from schema to extract its type + let iceberg_field = schema.field_by_id(field.source_id).ok_or(Error::new( + ErrorKind::Unexpected, + format!("Field {} not found in schema", field.source_id), + ))?; + + // Ensure the field type is primitive + let prim_type = match &*iceberg_field.field_type { + crate::spec::Type::Primitive(prim_type) => prim_type, + _ => { + return Err(Error::new( + ErrorKind::Unexpected, + format!( + "Partition field {} has non-primitive type {:?}", + field.source_id, iceberg_field.field_type + ), + )); + } + }; + // Get the partition value for this field - if let Some(Literal::Primitive(value)) = &partition_data[pos] { - constants.insert(field.source_id, value.clone()); + // Handle both None (null) and Some(Literal::Primitive) cases + match &partition_data[pos] { + None => { + // Skip null partition values - they will be resolved as null per Iceberg spec rule #4. + // When a partition value is null, we don't add it to the constants map, + // allowing downstream column resolution to handle it correctly. + continue; + } + Some(Literal::Primitive(value)) => { + // Create a Datum from the primitive type and value + let datum = Datum::new(prim_type.clone(), value.clone()); + constants.insert(field.source_id, datum); + } + Some(literal) => { + return Err(Error::new( + ErrorKind::Unexpected, + format!( + "Partition field {} has non-primitive value: {:?}", + field.source_id, literal + ), + )); + } } } } - constants + Ok(constants) } /// Indicates how a particular column in a processed RecordBatch should @@ -146,13 +186,13 @@ enum SchemaComparison { /// Builder for RecordBatchTransformer to improve ergonomics when constructing with optional parameters. /// -/// See [`RecordBatchTransformer`] for details on partition spec and partition data. +/// Constant fields are pre-computed for both virtual/metadata fields (like _file) and +/// identity-partitioned fields to avoid duplicate work during batch processing. #[derive(Debug)] pub(crate) struct RecordBatchTransformerBuilder { snapshot_schema: Arc, projected_iceberg_field_ids: Vec, - partition_spec: Option>, - partition_data: Option, + constant_fields: HashMap, } impl RecordBatchTransformerBuilder { @@ -163,32 +203,48 @@ impl RecordBatchTransformerBuilder { Self { snapshot_schema, projected_iceberg_field_ids: projected_iceberg_field_ids.to_vec(), - partition_spec: None, - partition_data: None, + constant_fields: HashMap::new(), } } + /// Add a constant value for a specific field ID. + /// This is used for virtual/metadata fields like _file that have constant values per batch. + /// + /// # Arguments + /// * `field_id` - The field ID to associate with the constant + /// * `datum` - The constant value (with type) for this field + pub(crate) fn with_constant(mut self, field_id: i32, datum: Datum) -> Self { + self.constant_fields.insert(field_id, datum); + self + } + /// Set partition spec and data together for identifying identity-transformed partition columns. /// /// Both partition_spec and partition_data must be provided together since the spec defines /// which fields are identity-partitioned, and the data provides their constant values. - /// One without the other cannot produce a valid constants map. + /// This method computes the partition constants and merges them into constant_fields. pub(crate) fn with_partition( mut self, partition_spec: Arc, partition_data: Struct, - ) -> Self { - self.partition_spec = Some(partition_spec); - self.partition_data = Some(partition_data); - self + ) -> Result { + // Compute partition constants for identity-transformed fields (already returns Datum) + let partition_constants = + constants_map(&partition_spec, &partition_data, &self.snapshot_schema)?; + + // Add partition constants to constant_fields + for (field_id, datum) in partition_constants { + self.constant_fields.insert(field_id, datum); + } + + Ok(self) } pub(crate) fn build(self) -> RecordBatchTransformer { RecordBatchTransformer { snapshot_schema: self.snapshot_schema, projected_iceberg_field_ids: self.projected_iceberg_field_ids, - partition_spec: self.partition_spec, - partition_data: self.partition_data, + constant_fields: self.constant_fields, batch_transform: None, } } @@ -228,16 +284,10 @@ impl RecordBatchTransformerBuilder { pub(crate) struct RecordBatchTransformer { snapshot_schema: Arc, projected_iceberg_field_ids: Vec, - - /// Partition spec for identifying identity-transformed partition columns (spec rule #1). - /// Only fields with identity transforms use partition data constants; non-identity transforms - /// (bucket, truncate, etc.) must read source columns from data files. - partition_spec: Option>, - - /// Partition data providing constant values for identity-transformed partition columns (spec rule #1). - /// For example, in a file at path `dept=engineering/file.parquet`, this would contain - /// the value "engineering" for the dept field. - partition_data: Option, + // Pre-computed constant field information: field_id -> Datum + // Includes both virtual/metadata fields (like _file) and identity-partitioned fields + // Datum holds both the Iceberg type and the value + constant_fields: HashMap, // BatchTransform gets lazily constructed based on the schema of // the first RecordBatch we receive from the file @@ -279,8 +329,7 @@ impl RecordBatchTransformer { record_batch.schema_ref(), self.snapshot_schema.as_ref(), &self.projected_iceberg_field_ids, - self.partition_spec.as_ref().map(|s| s.as_ref()), - self.partition_data.as_ref(), + &self.constant_fields, )?); self.process_record_batch(record_batch)? @@ -299,8 +348,7 @@ impl RecordBatchTransformer { source_schema: &ArrowSchemaRef, snapshot_schema: &IcebergSchema, projected_iceberg_field_ids: &[i32], - partition_spec: Option<&PartitionSpec>, - partition_data: Option<&Struct>, + constant_fields: &HashMap, ) -> Result { let mapped_unprojected_arrow_schema = Arc::new(schema_to_arrow_schema(snapshot_schema)?); let field_id_to_mapped_schema_map = @@ -311,22 +359,54 @@ impl RecordBatchTransformer { let fields: Result> = projected_iceberg_field_ids .iter() .map(|field_id| { - Ok(field_id_to_mapped_schema_map - .get(field_id) - .ok_or(Error::new(ErrorKind::Unexpected, "field not found"))? - .0 - .clone()) + // Check if this is a constant field + if constant_fields.contains_key(field_id) { + // For metadata/virtual fields (like _file), get name from metadata_columns + // For partition fields, get name from schema (they exist in schema) + if let Ok(iceberg_field) = get_metadata_field(*field_id) { + // This is a metadata/virtual field - convert Iceberg field to Arrow + let datum = constant_fields.get(field_id).ok_or(Error::new( + ErrorKind::Unexpected, + "constant field not found", + ))?; + let arrow_type = datum_to_arrow_type_with_ree(datum); + let arrow_field = + Field::new(&iceberg_field.name, arrow_type, !iceberg_field.required) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + iceberg_field.id.to_string(), + )])); + Ok(Arc::new(arrow_field)) + } else { + // This is a partition constant field (exists in schema but uses constant value) + let field = &field_id_to_mapped_schema_map + .get(field_id) + .ok_or(Error::new(ErrorKind::Unexpected, "field not found"))? + .0; + let datum = constant_fields.get(field_id).ok_or(Error::new( + ErrorKind::Unexpected, + "constant field not found", + ))?; + let arrow_type = datum_to_arrow_type_with_ree(datum); + // Use the type from constant_fields (REE for constants) + let constant_field = + Field::new(field.name(), arrow_type, field.is_nullable()) + .with_metadata(field.metadata().clone()); + Ok(Arc::new(constant_field)) + } + } else { + // Regular field - use schema as-is + Ok(field_id_to_mapped_schema_map + .get(field_id) + .ok_or(Error::new(ErrorKind::Unexpected, "field not found"))? + .0 + .clone()) + } }) .collect(); let target_schema = Arc::new(ArrowSchema::new(fields?)); - let constants_map = if let (Some(spec), Some(data)) = (partition_spec, partition_data) { - constants_map(spec, data) - } else { - HashMap::new() - }; - match Self::compare_schemas(source_schema, &target_schema) { SchemaComparison::Equivalent => Ok(BatchTransform::PassThrough), SchemaComparison::NameChangesOnly => Ok(BatchTransform::ModifySchema { target_schema }), @@ -336,8 +416,7 @@ impl RecordBatchTransformer { snapshot_schema, projected_iceberg_field_ids, field_id_to_mapped_schema_map, - constants_map, - partition_spec, + constant_fields, )?, target_schema, }), @@ -394,8 +473,7 @@ impl RecordBatchTransformer { snapshot_schema: &IcebergSchema, projected_iceberg_field_ids: &[i32], field_id_to_mapped_schema_map: HashMap, - constants_map: HashMap, - _partition_spec: Option<&PartitionSpec>, + constant_fields: &HashMap, ) -> Result> { let field_id_to_source_schema_map = Self::build_field_id_to_arrow_schema_map(source_schema)?; @@ -403,6 +481,18 @@ impl RecordBatchTransformer { projected_iceberg_field_ids .iter() .map(|field_id| { + // Check if this is a constant field (metadata/virtual or identity-partitioned) + // Constant fields always use their pre-computed constant values, regardless of whether + // they exist in the Parquet file. This is per Iceberg spec rule #1: partition metadata + // is authoritative and should be preferred over file data. + if let Some(datum) = constant_fields.get(field_id) { + let arrow_type = datum_to_arrow_type_with_ree(datum); + return Ok(ColumnSource::Add { + value: Some(datum.literal().clone()), + target_type: arrow_type, + }); + } + let (target_field, _) = field_id_to_mapped_schema_map .get(field_id) @@ -451,13 +541,8 @@ impl RecordBatchTransformer { ); // Apply spec's fallback steps for "not present" fields. - let column_source = if let Some(constant_value) = constants_map.get(field_id) { - // Rule #1: Identity partition constant - ColumnSource::Add { - value: Some(constant_value.clone()), - target_type: target_type.clone(), - } - } else if let Some(source) = field_by_id { + // Rule #1 (constants) is handled at the beginning of this function + let column_source = if let Some(source) = field_by_id { source } else { // Rules #2, #3 and #4: @@ -471,6 +556,7 @@ impl RecordBatchTransformer { None } }); + ColumnSource::Add { value: default_value, target_type: target_type.clone(), @@ -492,7 +578,7 @@ impl RecordBatchTransformer { let this_field_id = field_id_str.parse().map_err(|e| { Error::new( ErrorKind::DataInvalid, - format!("field id not parseable as an i32: {}", e), + format!("field id not parseable as an i32: {e}"), ) })?; @@ -539,86 +625,36 @@ impl RecordBatchTransformer { prim_lit: &Option, num_rows: usize, ) -> Result { - Ok(match (target_type, prim_lit) { - (DataType::Boolean, Some(PrimitiveLiteral::Boolean(value))) => { - Arc::new(BooleanArray::from(vec![*value; num_rows])) - } - (DataType::Boolean, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(BooleanArray::from(vals)) - } - (DataType::Int32, Some(PrimitiveLiteral::Int(value))) => { - Arc::new(Int32Array::from(vec![*value; num_rows])) - } - (DataType::Int32, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Int32Array::from(vals)) - } - (DataType::Date32, Some(PrimitiveLiteral::Int(value))) => { - Arc::new(Date32Array::from(vec![*value; num_rows])) - } - (DataType::Date32, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Date32Array::from(vals)) - } - (DataType::Int64, Some(PrimitiveLiteral::Long(value))) => { - Arc::new(Int64Array::from(vec![*value; num_rows])) - } - (DataType::Int64, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Int64Array::from(vals)) - } - (DataType::Float32, Some(PrimitiveLiteral::Float(value))) => { - Arc::new(Float32Array::from(vec![value.0; num_rows])) - } - (DataType::Float32, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Float32Array::from(vals)) - } - (DataType::Float64, Some(PrimitiveLiteral::Double(value))) => { - Arc::new(Float64Array::from(vec![value.0; num_rows])) - } - (DataType::Float64, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Float64Array::from(vals)) - } - (DataType::Utf8, Some(PrimitiveLiteral::String(value))) => { - Arc::new(StringArray::from(vec![value.clone(); num_rows])) - } - (DataType::Utf8, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(StringArray::from(vals)) - } - (DataType::Binary, Some(PrimitiveLiteral::Binary(value))) => { - Arc::new(BinaryArray::from_vec(vec![value; num_rows])) - } - (DataType::Binary, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(BinaryArray::from_opt_vec(vals)) - } - (DataType::Struct(fields), None) => { - // Create a StructArray filled with nulls. Per Iceberg spec, optional struct fields - // default to null when added to the schema. We defer non-null default struct values - // and leave them as not implemented yet. - let null_arrays: Vec = fields - .iter() - .map(|field| Self::create_column(field.data_type(), &None, num_rows)) - .collect::>>()?; - - Arc::new(StructArray::new( - fields.clone(), - null_arrays, - Some(NullBuffer::new_null(num_rows)), + // Check if this is a RunEndEncoded type (for constant fields) + if let DataType::RunEndEncoded(_, values_field) = target_type { + // Helper to create a Run-End Encoded array + let create_ree_array = |values_array: ArrayRef| -> Result { + let run_ends = if num_rows == 0 { + Int32Array::from(Vec::::new()) + } else { + Int32Array::from(vec![num_rows as i32]) + }; + Ok(Arc::new( + RunArray::try_new(&run_ends, &values_array).map_err(|e| { + Error::new( + ErrorKind::Unexpected, + "Failed to create RunArray for constant value", + ) + .with_source(e) + })?, )) - } - (DataType::Null, _) => Arc::new(NullArray::new(num_rows)), - (dt, _) => { - return Err(Error::new( - ErrorKind::Unexpected, - format!("unexpected target column type {}", dt), - )); - } - }) + }; + + // Create the values array using the helper function + let values_array = + create_primitive_array_single_element(values_field.data_type(), prim_lit)?; + + // Wrap in Run-End Encoding + create_ree_array(values_array) + } else { + // Non-REE type (simple arrays for non-constant fields) + create_primitive_array_repeated(target_type, prim_lit, num_rows) + } } } @@ -639,6 +675,54 @@ mod test { }; use crate::spec::{Literal, NestedField, PrimitiveType, Schema, Struct, Type}; + /// Helper to extract string values from either StringArray or RunEndEncoded + /// Returns empty string for null values + fn get_string_value(array: &dyn Array, index: usize) -> String { + if let Some(string_array) = array.as_any().downcast_ref::() { + if string_array.is_null(index) { + String::new() + } else { + string_array.value(index).to_string() + } + } else if let Some(run_array) = array + .as_any() + .downcast_ref::>() + { + let values = run_array.values(); + let string_values = values + .as_any() + .downcast_ref::() + .expect("REE values should be StringArray"); + // For REE, all rows have the same value (index 0 in the values array) + if string_values.is_null(0) { + String::new() + } else { + string_values.value(0).to_string() + } + } else { + panic!("Expected StringArray or RunEndEncoded"); + } + } + + /// Helper to extract int values from either Int32Array or RunEndEncoded + fn get_int_value(array: &dyn Array, index: usize) -> i32 { + if let Some(int_array) = array.as_any().downcast_ref::() { + int_array.value(index) + } else if let Some(run_array) = array + .as_any() + .downcast_ref::>() + { + let values = run_array.values(); + let int_values = values + .as_any() + .downcast_ref::() + .expect("REE values should be Int32Array"); + int_values.value(0) + } else { + panic!("Expected Int32Array or RunEndEncoded"); + } + } + #[test] fn build_field_id_to_source_schema_map_works() { let arrow_schema = arrow_schema_already_same_as_target(); @@ -1137,6 +1221,7 @@ mod test { let mut transformer = RecordBatchTransformerBuilder::new(snapshot_schema, &projected_field_ids) .with_partition(partition_spec, partition_data) + .expect("Failed to add partition constants") .build(); // Create a Parquet RecordBatch with actual data @@ -1257,6 +1342,7 @@ mod test { let mut transformer = RecordBatchTransformerBuilder::new(snapshot_schema, &projected_field_ids) .with_partition(partition_spec, partition_data) + .expect("Failed to add partition constants") .build(); let parquet_batch = RecordBatch::try_new(parquet_schema, vec![ @@ -1271,30 +1357,23 @@ mod test { assert_eq!(result.num_columns(), 3); assert_eq!(result.num_rows(), 2); - let id_column = result - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(id_column.value(0), 100); - assert_eq!(id_column.value(1), 200); + // Use helpers to handle both simple and REE arrays + assert_eq!(get_int_value(result.column(0).as_ref(), 0), 100); + assert_eq!(get_int_value(result.column(0).as_ref(), 1), 200); - let dept_column = result - .column(1) - .as_any() - .downcast_ref::() - .unwrap(); - // This value MUST come from partition metadata (constant) - assert_eq!(dept_column.value(0), "engineering"); - assert_eq!(dept_column.value(1), "engineering"); + // dept column comes from partition metadata (constant) - will be REE + assert_eq!( + get_string_value(result.column(1).as_ref(), 0), + "engineering" + ); + assert_eq!( + get_string_value(result.column(1).as_ref(), 1), + "engineering" + ); - let name_column = result - .column(2) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(name_column.value(0), "Alice"); - assert_eq!(name_column.value(1), "Bob"); + // name column comes from file + assert_eq!(get_string_value(result.column(2).as_ref(), 0), "Alice"); + assert_eq!(get_string_value(result.column(2).as_ref(), 1), "Bob"); } /// Test bucket partitioning with renamed source column. @@ -1372,6 +1451,7 @@ mod test { let mut transformer = RecordBatchTransformerBuilder::new(snapshot_schema, &projected_field_ids) .with_partition(partition_spec, partition_data) + .expect("Failed to add partition constants") .build(); // Create a Parquet RecordBatch with actual data @@ -1476,6 +1556,7 @@ mod test { let mut transformer = RecordBatchTransformerBuilder::new(snapshot_schema, &projected_field_ids) .with_partition(partition_spec, partition_data) + .expect("Failed to add partition constants") .build(); let parquet_batch = RecordBatch::try_new(parquet_schema, vec![ @@ -1492,48 +1573,106 @@ mod test { // Verify each column demonstrates the correct spec rule: // Normal case: id from Parquet by field ID - let id_column = result - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(id_column.value(0), 100); - assert_eq!(id_column.value(1), 200); + // Use helpers to handle both simple and REE arrays + assert_eq!(get_int_value(result.column(0).as_ref(), 0), 100); + assert_eq!(get_int_value(result.column(0).as_ref(), 1), 200); + + // Rule #1: dept from partition metadata (identity transform) - will be REE + assert_eq!( + get_string_value(result.column(1).as_ref(), 0), + "engineering" + ); + assert_eq!( + get_string_value(result.column(1).as_ref(), 1), + "engineering" + ); - // Rule #1: dept from partition metadata (identity transform) - let dept_column = result - .column(1) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(dept_column.value(0), "engineering"); - assert_eq!(dept_column.value(1), "engineering"); + // Rule #2: data from Parquet via name mapping - will be regular array + assert_eq!(get_string_value(result.column(2).as_ref(), 0), "value1"); + assert_eq!(get_string_value(result.column(2).as_ref(), 1), "value2"); - // Rule #2: data from Parquet via name mapping - let data_column = result - .column(2) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(data_column.value(0), "value1"); - assert_eq!(data_column.value(1), "value2"); + // Rule #3: category from initial_default - will be REE + assert_eq!( + get_string_value(result.column(3).as_ref(), 0), + "default_category" + ); + assert_eq!( + get_string_value(result.column(3).as_ref(), 1), + "default_category" + ); - // Rule #3: category from initial_default - let category_column = result - .column(3) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(category_column.value(0), "default_category"); - assert_eq!(category_column.value(1), "default_category"); + // Rule #4: notes is null (no default, not in Parquet, not in partition) - will be REE with null + // For null REE arrays, we still use the helper which handles extraction + assert_eq!(get_string_value(result.column(4).as_ref(), 0), ""); + assert_eq!(get_string_value(result.column(4).as_ref(), 1), ""); + } + + /// Test handling of null values in identity-partitioned columns. + /// + /// Reproduces TestPartitionValues.testNullPartitionValue() from iceberg-java, which + /// writes records where the partition column has null values. Before the fix in #1922, + /// this would error with "Partition field X has null value for identity transform". + #[test] + fn null_identity_partition_value() { + use crate::spec::{Struct, Transform}; + + let schema = Arc::new( + Schema::builder() + .with_schema_id(0) + .with_fields(vec![ + NestedField::optional(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::optional(2, "data", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build() + .unwrap(), + ); + + let partition_spec = Arc::new( + crate::spec::PartitionSpec::builder(schema.clone()) + .with_spec_id(0) + .add_partition_field("data", "data", Transform::Identity) + .unwrap() + .build() + .unwrap(), + ); + + // Partition has null value for the data column + let partition_data = Struct::from_iter(vec![None]); + + let file_schema = Arc::new(ArrowSchema::new(vec![simple_field( + "id", + DataType::Int32, + true, + "1", + )])); + + let projected_field_ids = [1, 2]; + + let mut transformer = RecordBatchTransformerBuilder::new(schema, &projected_field_ids) + .with_partition(partition_spec, partition_data) + .expect("Should handle null partition values") + .build(); + + let file_batch = + RecordBatch::try_new(file_schema, vec![Arc::new(Int32Array::from(vec![1, 2, 3]))]) + .unwrap(); - // Rule #4: notes is null (no default, not in Parquet, not in partition) - let notes_column = result - .column(4) + let result = transformer.process_record_batch(file_batch).unwrap(); + + assert_eq!(result.num_columns(), 2); + assert_eq!(result.num_rows(), 3); + + let id_col = result + .column(0) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - assert!(notes_column.is_null(0)); - assert!(notes_column.is_null(1)); + assert_eq!(id_col.values(), &[1, 2, 3]); + + // Partition column with null value should produce nulls + let data_col = result.column(1); + assert!(data_col.is_null(0)); + assert!(data_col.is_null(1)); + assert!(data_col.is_null(2)); } } diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs index ec0135bd77..9ee7897cb6 100644 --- a/crates/iceberg/src/arrow/schema.rs +++ b/crates/iceberg/src/arrow/schema.rs @@ -35,8 +35,8 @@ use uuid::Uuid; use crate::error::Result; use crate::spec::{ - Datum, ListType, MapType, NestedField, NestedFieldRef, PrimitiveLiteral, PrimitiveType, Schema, - SchemaVisitor, StructType, Type, + Datum, FIRST_FIELD_ID, ListType, MapType, NestedField, NestedFieldRef, PrimitiveLiteral, + PrimitiveType, Schema, SchemaVisitor, StructType, Type, }; use crate::{Error, ErrorKind}; @@ -221,6 +221,19 @@ pub fn arrow_schema_to_schema(schema: &ArrowSchema) -> Result { visit_schema(schema, &mut visitor) } +/// Convert Arrow schema to Iceberg schema with automatically assigned field IDs. +/// +/// Unlike [`arrow_schema_to_schema`], this function does not require field IDs in the Arrow +/// schema metadata. Instead, it automatically assigns unique field IDs starting from 1, +/// following Iceberg's field ID assignment rules. +/// +/// This is useful when converting Arrow schemas that don't originate from Iceberg tables, +/// such as schemas from DataFusion or other Arrow-based systems. +pub fn arrow_schema_to_schema_auto_assign_ids(schema: &ArrowSchema) -> Result { + let mut visitor = ArrowSchemaConverter::new_with_field_ids_from(FIRST_FIELD_ID); + visit_schema(schema, &mut visitor) +} + /// Convert Arrow type to iceberg type. pub fn arrow_type_to_type(ty: &DataType) -> Result { let mut visitor = ArrowSchemaConverter::new(); @@ -229,7 +242,7 @@ pub fn arrow_type_to_type(ty: &DataType) -> Result { const ARROW_FIELD_DOC_KEY: &str = "doc"; -pub(super) fn get_field_id(field: &Field) -> Result { +pub(super) fn get_field_id_from_metadata(field: &Field) -> Result { if let Some(value) = field.metadata().get(PARQUET_FIELD_ID_META_KEY) { return value.parse::().map_err(|e| { Error::new( @@ -253,19 +266,55 @@ fn get_field_doc(field: &Field) -> Option { None } -struct ArrowSchemaConverter; +struct ArrowSchemaConverter { + /// When set, the schema builder will reassign field IDs starting from this value + /// using level-order traversal (breadth-first). + reassign_field_ids_from: Option, + /// Generates unique placeholder IDs for fields before reassignment. + /// Required because `ReassignFieldIds` builds an old-to-new ID mapping + /// that expects unique input IDs. + next_field_id: i32, +} impl ArrowSchemaConverter { fn new() -> Self { - Self {} + Self { + reassign_field_ids_from: None, + next_field_id: 0, + } + } + + fn new_with_field_ids_from(start_from: i32) -> Self { + Self { + reassign_field_ids_from: Some(start_from), + next_field_id: 0, + } + } + + fn get_field_id(&mut self, field: &Field) -> Result { + if self.reassign_field_ids_from.is_some() { + // Field IDs will be reassigned by the schema builder. + // We need unique temporary IDs because ReassignFieldIds builds an + // old->new ID mapping that requires unique input IDs. + let temp_id = self.next_field_id; + self.next_field_id += 1; + Ok(temp_id) + } else { + // Get field ID from arrow field metadata + get_field_id_from_metadata(field) + } } - fn convert_fields(fields: &Fields, field_results: &[Type]) -> Result> { + fn convert_fields( + &mut self, + fields: &Fields, + field_results: &[Type], + ) -> Result> { let mut results = Vec::with_capacity(fields.len()); for i in 0..fields.len() { let field = &fields[i]; let field_type = &field_results[i]; - let id = get_field_id(field)?; + let id = self.get_field_id(field)?; let doc = get_field_doc(field); let nested_field = NestedField { id, @@ -287,13 +336,16 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { type U = Schema; fn schema(&mut self, schema: &ArrowSchema, values: Vec) -> Result { - let fields = Self::convert_fields(schema.fields(), &values)?; - let builder = Schema::builder().with_fields(fields); + let fields = self.convert_fields(schema.fields(), &values)?; + let mut builder = Schema::builder().with_fields(fields); + if let Some(start_from) = self.reassign_field_ids_from { + builder = builder.with_reassigned_field_ids(start_from) + } builder.build() } fn r#struct(&mut self, fields: &Fields, results: Vec) -> Result { - let fields = Self::convert_fields(fields, &results)?; + let fields = self.convert_fields(fields, &results)?; Ok(Type::Struct(StructType::new(fields))) } @@ -310,7 +362,7 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { } }; - let id = get_field_id(element_field)?; + let id = self.get_field_id(element_field)?; let doc = get_field_doc(element_field); let mut element_field = NestedField::list_element(id, value.clone(), !element_field.is_nullable()); @@ -335,7 +387,7 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { let key_field = &fields[0]; let value_field = &fields[1]; - let key_id = get_field_id(key_field)?; + let key_id = self.get_field_id(key_field)?; let key_doc = get_field_doc(key_field); let mut key_field = NestedField::map_key_element(key_id, key_value.clone()); if let Some(doc) = key_doc { @@ -343,7 +395,7 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { } let key_field = Arc::new(key_field); - let value_id = get_field_id(value_field)?; + let value_id = self.get_field_id(value_field)?; let value_doc = get_field_doc(value_field); let mut value_field = NestedField::map_value_element( value_id, @@ -1019,6 +1071,60 @@ impl TryFrom<&crate::spec::Schema> for ArrowSchema { } } +/// Converts a Datum (Iceberg type + primitive literal) to its corresponding Arrow DataType +/// with Run-End Encoding (REE). +/// +/// This function is used for constant fields in record batches, where all values are the same. +/// Run-End Encoding provides efficient storage for such constant columns. +/// +/// # Arguments +/// * `datum` - The Datum to convert, which contains both type and value information +/// +/// # Returns +/// Arrow DataType with Run-End Encoding applied +/// +/// # Example +/// ``` +/// use iceberg::arrow::datum_to_arrow_type_with_ree; +/// use iceberg::spec::Datum; +/// +/// let datum = Datum::string("test_file.parquet"); +/// let ree_type = datum_to_arrow_type_with_ree(&datum); +/// // Returns: RunEndEncoded(Int32, Utf8) +/// ``` +pub fn datum_to_arrow_type_with_ree(datum: &Datum) -> DataType { + // Helper to create REE type with the given values type. + // Note: values field is nullable as Arrow expects this when building the + // final Arrow schema with `RunArray::try_new`. + let make_ree = |values_type: DataType| -> DataType { + let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int32, false)); + let values_field = Arc::new(Field::new("values", values_type, true)); + DataType::RunEndEncoded(run_ends_field, values_field) + }; + + // Match on the PrimitiveType from the Datum to determine the Arrow type + match datum.data_type() { + PrimitiveType::Boolean => make_ree(DataType::Boolean), + PrimitiveType::Int => make_ree(DataType::Int32), + PrimitiveType::Long => make_ree(DataType::Int64), + PrimitiveType::Float => make_ree(DataType::Float32), + PrimitiveType::Double => make_ree(DataType::Float64), + PrimitiveType::Date => make_ree(DataType::Date32), + PrimitiveType::Time => make_ree(DataType::Int64), + PrimitiveType::Timestamp => make_ree(DataType::Int64), + PrimitiveType::Timestamptz => make_ree(DataType::Int64), + PrimitiveType::TimestampNs => make_ree(DataType::Int64), + PrimitiveType::TimestamptzNs => make_ree(DataType::Int64), + PrimitiveType::String => make_ree(DataType::Utf8), + PrimitiveType::Uuid => make_ree(DataType::Binary), + PrimitiveType::Fixed(_) => make_ree(DataType::Binary), + PrimitiveType::Binary => make_ree(DataType::Binary), + PrimitiveType::Decimal { precision, scale } => { + make_ree(DataType::Decimal128(*precision as u8, *scale as i8)) + } + } +} + #[cfg(test)] mod tests { use std::collections::HashMap; @@ -1878,4 +1984,159 @@ mod tests { assert_eq!(array.value(0), [66u8; 16]); } } + + #[test] + fn test_arrow_schema_to_schema_with_field_id() { + // Create a complex Arrow schema without field ID metadata + // Including: primitives, list, nested struct, map, and nested list of structs + let arrow_schema = ArrowSchema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + Field::new("price", DataType::Decimal128(10, 2), false), + Field::new( + "created_at", + DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())), + true, + ), + Field::new( + "tags", + DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))), + true, + ), + Field::new( + "address", + DataType::Struct(Fields::from(vec![ + Field::new("street", DataType::Utf8, true), + Field::new("city", DataType::Utf8, false), + Field::new("zip", DataType::Int32, true), + ])), + true, + ), + Field::new( + "attributes", + DataType::Map( + Arc::new(Field::new( + DEFAULT_MAP_FIELD_NAME, + DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Utf8, false), + Field::new("value", DataType::Utf8, true), + ])), + false, + )), + false, + ), + true, + ), + Field::new( + "orders", + DataType::List(Arc::new(Field::new( + "element", + DataType::Struct(Fields::from(vec![ + Field::new("order_id", DataType::Int64, false), + Field::new("amount", DataType::Float64, false), + ])), + true, + ))), + true, + ), + ]); + + let schema = arrow_schema_to_schema_auto_assign_ids(&arrow_schema).unwrap(); + + // Build expected schema with exact field IDs following level-order assignment: + // Level 0: id=1, name=2, price=3, created_at=4, tags=5, address=6, attributes=7, orders=8 + // Level 1: tags.element=9, address.{street=10,city=11,zip=12}, attributes.{key=13,value=14}, orders.element=15 + // Level 2: orders.element.{order_id=16,amount=17} + let expected = Schema::builder() + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Long)).into(), + NestedField::optional(2, "name", Type::Primitive(PrimitiveType::String)).into(), + NestedField::required( + 3, + "price", + Type::Primitive(PrimitiveType::Decimal { + precision: 10, + scale: 2, + }), + ) + .into(), + NestedField::optional(4, "created_at", Type::Primitive(PrimitiveType::Timestamptz)) + .into(), + NestedField::optional( + 5, + "tags", + Type::List(ListType { + element_field: NestedField::list_element( + 9, + Type::Primitive(PrimitiveType::String), + false, + ) + .into(), + }), + ) + .into(), + NestedField::optional( + 6, + "address", + Type::Struct(StructType::new(vec![ + NestedField::optional(10, "street", Type::Primitive(PrimitiveType::String)) + .into(), + NestedField::required(11, "city", Type::Primitive(PrimitiveType::String)) + .into(), + NestedField::optional(12, "zip", Type::Primitive(PrimitiveType::Int)) + .into(), + ])), + ) + .into(), + NestedField::optional( + 7, + "attributes", + Type::Map(MapType { + key_field: NestedField::map_key_element( + 13, + Type::Primitive(PrimitiveType::String), + ) + .into(), + value_field: NestedField::map_value_element( + 14, + Type::Primitive(PrimitiveType::String), + false, + ) + .into(), + }), + ) + .into(), + NestedField::optional( + 8, + "orders", + Type::List(ListType { + element_field: NestedField::list_element( + 15, + Type::Struct(StructType::new(vec![ + NestedField::required( + 16, + "order_id", + Type::Primitive(PrimitiveType::Long), + ) + .into(), + NestedField::required( + 17, + "amount", + Type::Primitive(PrimitiveType::Double), + ) + .into(), + ])), + false, + ) + .into(), + }), + ) + .into(), + ]) + .build() + .unwrap(); + + pretty_assertions::assert_eq!(schema, expected); + assert_eq!(schema.highest_field_id(), 17); + } } diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index f1cf225bb4..30b47d83fc 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. +use std::sync::Arc; + use arrow_array::{ Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Decimal128Array, FixedSizeBinaryArray, FixedSizeListArray, Float32Array, Float64Array, Int32Array, Int64Array, LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, MapArray, StringArray, StructArray, Time64MicrosecondArray, TimestampMicrosecondArray, TimestampNanosecondArray, }; +use arrow_buffer::NullBuffer; use arrow_schema::{DataType, FieldRef}; use uuid::Uuid; -use super::get_field_id; +use super::get_field_id_from_metadata; use crate::spec::{ - ListType, Literal, Map, MapType, NestedField, PartnerAccessor, PrimitiveType, + ListType, Literal, Map, MapType, NestedField, PartnerAccessor, PrimitiveLiteral, PrimitiveType, SchemaWithPartnerVisitor, Struct, StructType, Type, visit_struct_with_partner, visit_type_with_partner, }; @@ -258,15 +261,15 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { "The partner is not a decimal128 array", ) })?; - if let DataType::Decimal128(arrow_precision, arrow_scale) = array.data_type() { - if *arrow_precision as u32 != *precision || *arrow_scale as u32 != *scale { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "The precision or scale ({arrow_precision},{arrow_scale}) of arrow decimal128 array is not compatible with iceberg decimal type ({precision},{scale})" - ), - )); - } + if let DataType::Decimal128(arrow_precision, arrow_scale) = array.data_type() + && (*arrow_precision as u32 != *precision || *arrow_scale as u32 != *scale) + { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "The precision or scale ({arrow_precision},{arrow_scale}) of arrow decimal128 array is not compatible with iceberg decimal type ({precision},{scale})" + ), + )); } Ok(array.iter().map(|v| v.map(Literal::decimal)).collect()) } @@ -348,10 +351,10 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { } else if let Some(array) = partner.as_any().downcast_ref::() { Ok(array.iter().map(|v| v.map(Literal::string)).collect()) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, "The partner is not a string array", - )); + )) } } PrimitiveType::Uuid => { @@ -415,10 +418,10 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { .map(|v| v.map(|v| Literal::binary(v.to_vec()))) .collect()) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, "The partner is not a binary array", - )); + )) } } } @@ -447,7 +450,7 @@ impl FieldMatchMode { /// Determines if an Arrow field matches an Iceberg field based on the matching mode. pub fn match_field(&self, arrow_field: &FieldRef, iceberg_field: &NestedField) -> bool { match self { - FieldMatchMode::Id => get_field_id(arrow_field) + FieldMatchMode::Id => get_field_id_from_metadata(arrow_field) .map(|id| id == iceberg_field.id) .unwrap_or(false), FieldMatchMode::Name => arrow_field.name() == &iceberg_field.name, @@ -617,6 +620,273 @@ pub fn arrow_primitive_to_literal( ) } +/// Create a single-element array from a primitive literal. +/// +/// This is used for creating constant arrays (Run-End Encoded arrays) where we need +/// a single value that represents all rows. +pub(crate) fn create_primitive_array_single_element( + data_type: &DataType, + prim_lit: &Option, +) -> Result { + match (data_type, prim_lit) { + (DataType::Boolean, Some(PrimitiveLiteral::Boolean(v))) => { + Ok(Arc::new(BooleanArray::from(vec![*v]))) + } + (DataType::Boolean, None) => Ok(Arc::new(BooleanArray::from(vec![Option::::None]))), + (DataType::Int32, Some(PrimitiveLiteral::Int(v))) => { + Ok(Arc::new(Int32Array::from(vec![*v]))) + } + (DataType::Int32, None) => Ok(Arc::new(Int32Array::from(vec![Option::::None]))), + (DataType::Date32, Some(PrimitiveLiteral::Int(v))) => { + Ok(Arc::new(Date32Array::from(vec![*v]))) + } + (DataType::Date32, None) => Ok(Arc::new(Date32Array::from(vec![Option::::None]))), + (DataType::Int64, Some(PrimitiveLiteral::Long(v))) => { + Ok(Arc::new(Int64Array::from(vec![*v]))) + } + (DataType::Int64, None) => Ok(Arc::new(Int64Array::from(vec![Option::::None]))), + (DataType::Float32, Some(PrimitiveLiteral::Float(v))) => { + Ok(Arc::new(Float32Array::from(vec![v.0]))) + } + (DataType::Float32, None) => Ok(Arc::new(Float32Array::from(vec![Option::::None]))), + (DataType::Float64, Some(PrimitiveLiteral::Double(v))) => { + Ok(Arc::new(Float64Array::from(vec![v.0]))) + } + (DataType::Float64, None) => Ok(Arc::new(Float64Array::from(vec![Option::::None]))), + (DataType::Utf8, Some(PrimitiveLiteral::String(v))) => { + Ok(Arc::new(StringArray::from(vec![v.as_str()]))) + } + (DataType::Utf8, None) => Ok(Arc::new(StringArray::from(vec![Option::<&str>::None]))), + (DataType::Binary, Some(PrimitiveLiteral::Binary(v))) => { + Ok(Arc::new(BinaryArray::from_vec(vec![v.as_slice()]))) + } + (DataType::Binary, None) => Ok(Arc::new(BinaryArray::from_opt_vec(vec![ + Option::<&[u8]>::None, + ]))), + (DataType::Decimal128(precision, scale), Some(PrimitiveLiteral::Int128(v))) => { + let array = Decimal128Array::from(vec![{ *v }]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?; + Ok(Arc::new(array)) + } + (DataType::Decimal128(precision, scale), Some(PrimitiveLiteral::UInt128(v))) => { + let array = Decimal128Array::from(vec![*v as i128]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?; + Ok(Arc::new(array)) + } + (DataType::Decimal128(precision, scale), None) => { + let array = Decimal128Array::from(vec![Option::::None]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?; + Ok(Arc::new(array)) + } + (DataType::Struct(fields), None) => { + // Create a single-element StructArray with nulls + let null_arrays: Vec = fields + .iter() + .map(|f| { + // Recursively create null arrays for struct fields + // For primitive fields in structs, use simple null arrays (not REE within struct) + match f.data_type() { + DataType::Boolean => { + Ok(Arc::new(BooleanArray::from(vec![Option::::None])) + as ArrayRef) + } + DataType::Int32 | DataType::Date32 => { + Ok(Arc::new(Int32Array::from(vec![Option::::None])) as ArrayRef) + } + DataType::Int64 => { + Ok(Arc::new(Int64Array::from(vec![Option::::None])) as ArrayRef) + } + DataType::Float32 => { + Ok(Arc::new(Float32Array::from(vec![Option::::None])) as ArrayRef) + } + DataType::Float64 => { + Ok(Arc::new(Float64Array::from(vec![Option::::None])) as ArrayRef) + } + DataType::Utf8 => { + Ok(Arc::new(StringArray::from(vec![Option::<&str>::None])) as ArrayRef) + } + DataType::Binary => { + Ok( + Arc::new(BinaryArray::from_opt_vec(vec![Option::<&[u8]>::None])) + as ArrayRef, + ) + } + _ => Err(Error::new( + ErrorKind::Unexpected, + format!("Unsupported struct field type: {:?}", f.data_type()), + )), + } + }) + .collect::>>()?; + Ok(Arc::new(arrow_array::StructArray::new( + fields.clone(), + null_arrays, + Some(arrow_buffer::NullBuffer::new_null(1)), + ))) + } + _ => Err(Error::new( + ErrorKind::Unexpected, + format!("Unsupported constant type combination: {data_type:?} with {prim_lit:?}"), + )), + } +} + +/// Create a repeated array from a primitive literal for a given number of rows. +/// +/// This is used for creating non-constant arrays where we need the same value +/// repeated for each row. +pub(crate) fn create_primitive_array_repeated( + data_type: &DataType, + prim_lit: &Option, + num_rows: usize, +) -> Result { + Ok(match (data_type, prim_lit) { + (DataType::Boolean, Some(PrimitiveLiteral::Boolean(value))) => { + Arc::new(BooleanArray::from(vec![*value; num_rows])) + } + (DataType::Boolean, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(BooleanArray::from(vals)) + } + (DataType::Int32, Some(PrimitiveLiteral::Int(value))) => { + Arc::new(Int32Array::from(vec![*value; num_rows])) + } + (DataType::Int32, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Int32Array::from(vals)) + } + (DataType::Date32, Some(PrimitiveLiteral::Int(value))) => { + Arc::new(Date32Array::from(vec![*value; num_rows])) + } + (DataType::Date32, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Date32Array::from(vals)) + } + (DataType::Int64, Some(PrimitiveLiteral::Long(value))) => { + Arc::new(Int64Array::from(vec![*value; num_rows])) + } + (DataType::Int64, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Int64Array::from(vals)) + } + (DataType::Float32, Some(PrimitiveLiteral::Float(value))) => { + Arc::new(Float32Array::from(vec![value.0; num_rows])) + } + (DataType::Float32, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Float32Array::from(vals)) + } + (DataType::Float64, Some(PrimitiveLiteral::Double(value))) => { + Arc::new(Float64Array::from(vec![value.0; num_rows])) + } + (DataType::Float64, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Float64Array::from(vals)) + } + (DataType::Utf8, Some(PrimitiveLiteral::String(value))) => { + Arc::new(StringArray::from(vec![value.clone(); num_rows])) + } + (DataType::Utf8, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(StringArray::from(vals)) + } + (DataType::Binary, Some(PrimitiveLiteral::Binary(value))) => { + Arc::new(BinaryArray::from_vec(vec![value; num_rows])) + } + (DataType::Binary, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(BinaryArray::from_opt_vec(vals)) + } + (DataType::Decimal128(precision, scale), Some(PrimitiveLiteral::Int128(value))) => { + Arc::new( + Decimal128Array::from(vec![*value; num_rows]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?, + ) + } + (DataType::Decimal128(precision, scale), Some(PrimitiveLiteral::UInt128(value))) => { + Arc::new( + Decimal128Array::from(vec![*value as i128; num_rows]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?, + ) + } + (DataType::Decimal128(precision, scale), None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new( + Decimal128Array::from(vals) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?, + ) + } + (DataType::Struct(fields), None) => { + // Create a StructArray filled with nulls + let null_arrays: Vec = fields + .iter() + .map(|field| create_primitive_array_repeated(field.data_type(), &None, num_rows)) + .collect::>>()?; + + Arc::new(StructArray::new( + fields.clone(), + null_arrays, + Some(NullBuffer::new_null(num_rows)), + )) + } + (DataType::Null, _) => Arc::new(arrow_array::NullArray::new(num_rows)), + (dt, _) => { + return Err(Error::new( + ErrorKind::Unexpected, + format!("unexpected target column type {dt}"), + )); + } + }) +} + #[cfg(test)] mod test { use std::collections::HashMap; @@ -1467,4 +1737,48 @@ mod test { ]))), ]); } + + #[test] + fn test_create_decimal_array_respects_precision() { + // Decimal128Array::from() uses Arrow's default precision (38) instead of the + // target precision, causing RecordBatch construction to fail when schemas don't match. + let target_precision = 18u8; + let target_scale = 10i8; + let target_type = DataType::Decimal128(target_precision, target_scale); + let value = PrimitiveLiteral::Int128(10000000000); + + let array = create_primitive_array_single_element(&target_type, &Some(value)) + .expect("Failed to create decimal array"); + + match array.data_type() { + DataType::Decimal128(precision, scale) => { + assert_eq!(*precision, target_precision); + assert_eq!(*scale, target_scale); + } + other => panic!("Expected Decimal128, got {other:?}"), + } + } + + #[test] + fn test_create_decimal_array_repeated_respects_precision() { + // Ensure repeated arrays also respect target precision, not Arrow's default. + let target_precision = 18u8; + let target_scale = 10i8; + let target_type = DataType::Decimal128(target_precision, target_scale); + let value = PrimitiveLiteral::Int128(10000000000); + let num_rows = 5; + + let array = create_primitive_array_repeated(&target_type, &Some(value), num_rows) + .expect("Failed to create repeated decimal array"); + + match array.data_type() { + DataType::Decimal128(precision, scale) => { + assert_eq!(*precision, target_precision); + assert_eq!(*scale, target_scale); + } + other => panic!("Expected Decimal128, got {other:?}"), + } + + assert_eq!(array.len(), num_rows); + } } diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index cfa3dc6b52..df0299acb2 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -163,8 +163,12 @@ impl Catalog for MemoryCatalog { let namespaces = root_namespace_state .list_namespaces_under(parent_namespace_ident)? .into_iter() - .map(|name| NamespaceIdent::new(name.to_string())) - .collect_vec(); + .map(|name| { + let mut names = parent_namespace_ident.iter().cloned().collect::>(); + names.push(name.to_string()); + NamespaceIdent::from_vec(names) + }) + .collect::>>()?; Ok(namespaces) } @@ -599,7 +603,7 @@ pub(crate) mod tests { .list_namespaces(Some(&namespace_ident_1)) .await .unwrap(), - vec![NamespaceIdent::new("b".into())] + vec![namespace_ident_2] ); } @@ -628,9 +632,9 @@ pub(crate) mod tests { .unwrap() ), to_set(vec![ - NamespaceIdent::new("a".into()), - NamespaceIdent::new("b".into()), - NamespaceIdent::new("c".into()), + namespace_ident_2, + namespace_ident_3, + namespace_ident_4, ]) ); } diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 27d5edaedb..f3a521379e 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -1000,13 +1000,13 @@ mod _serde_set_statistics { snapshot_id, statistics, } = SetStatistics::deserialize(deserializer)?; - if let Some(snapshot_id) = snapshot_id { - if snapshot_id != statistics.snapshot_id { - return Err(serde::de::Error::custom(format!( - "Snapshot id to set {snapshot_id} does not match the statistics file snapshot id {}", - statistics.snapshot_id - ))); - } + if let Some(snapshot_id) = snapshot_id + && snapshot_id != statistics.snapshot_id + { + return Err(serde::de::Error::custom(format!( + "Snapshot id to set {snapshot_id} does not match the statistics file snapshot id {}", + statistics.snapshot_id + ))); } Ok(statistics) diff --git a/crates/iceberg/src/delete_vector.rs b/crates/iceberg/src/delete_vector.rs index f382bf079e..df8a10193c 100644 --- a/crates/iceberg/src/delete_vector.rs +++ b/crates/iceberg/src/delete_vector.rs @@ -36,7 +36,7 @@ impl DeleteVector { } } - pub fn iter(&self) -> DeleteVectorIterator { + pub fn iter(&self) -> DeleteVectorIterator<'_> { let outer = self.inner.bitmaps(); DeleteVectorIterator { outer, inner: None } } @@ -93,10 +93,10 @@ impl Iterator for DeleteVectorIterator<'_> { type Item = u64; fn next(&mut self) -> Option { - if let Some(inner) = &mut self.inner { - if let Some(inner_next) = inner.bitmap_iter.next() { - return Some(u64::from(inner.high_bits) << 32 | u64::from(inner_next)); - } + if let Some(inner) = &mut self.inner + && let Some(inner_next) = inner.bitmap_iter.next() + { + return Some(u64::from(inner.high_bits) << 32 | u64::from(inner_next)); } if let Some((high_bits, next_bitmap)) = self.outer.next() { diff --git a/crates/iceberg/src/expr/visitors/expression_evaluator.rs b/crates/iceberg/src/expr/visitors/expression_evaluator.rs index 3675ce355f..570c409502 100644 --- a/crates/iceberg/src/expr/visitors/expression_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/expression_evaluator.rs @@ -346,7 +346,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -374,7 +374,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, diff --git a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs index 2b65cf12aa..06c92ab3e8 100644 --- a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs @@ -1995,7 +1995,7 @@ mod test { lower_bounds: Default::default(), upper_bounds: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2021,7 +2021,7 @@ mod test { lower_bounds: Default::default(), upper_bounds: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2083,7 +2083,7 @@ mod test { column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2114,7 +2114,7 @@ mod test { column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2146,7 +2146,7 @@ mod test { column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2178,7 +2178,7 @@ mod test { column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, diff --git a/crates/iceberg/src/expr/visitors/manifest_evaluator.rs b/crates/iceberg/src/expr/visitors/manifest_evaluator.rs index abbd136cb1..770163ae95 100644 --- a/crates/iceberg/src/expr/visitors/manifest_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/manifest_evaluator.rs @@ -161,10 +161,10 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> { _predicate: &BoundPredicate, ) -> crate::Result { let field = self.field_summary_for_reference(reference); - if let Some(contains_nan) = field.contains_nan { - if !contains_nan { - return ROWS_CANNOT_MATCH; - } + if let Some(contains_nan) = field.contains_nan + && !contains_nan + { + return ROWS_CANNOT_MATCH; } if ManifestFilterVisitor::are_all_null(field, &reference.field().field_type) { @@ -389,16 +389,16 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> { return ROWS_MIGHT_MATCH; } - if prefix.as_bytes().eq(&lower_bound[..prefix_len]) { - if let Some(upper_bound) = &field.upper_bound { - // if upper is shorter than the prefix then upper can't start with the prefix - if prefix_len > upper_bound.len() { - return ROWS_MIGHT_MATCH; - } + if prefix.as_bytes().eq(&lower_bound[..prefix_len]) + && let Some(upper_bound) = &field.upper_bound + { + // if upper is shorter than the prefix then upper can't start with the prefix + if prefix_len > upper_bound.len() { + return ROWS_MIGHT_MATCH; + } - if prefix.as_bytes().eq(&upper_bound[..prefix_len]) { - return ROWS_CANNOT_MATCH; - } + if prefix.as_bytes().eq(&upper_bound[..prefix_len]) { + return ROWS_CANNOT_MATCH; } } } diff --git a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs index 3745d94d18..66e2898532 100644 --- a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs @@ -23,7 +23,7 @@ use fnv::FnvHashSet; use ordered_float::OrderedFloat; use parquet::arrow::arrow_reader::{RowSelection, RowSelector}; use parquet::file::metadata::RowGroupMetaData; -use parquet::file::page_index::index::Index; +use parquet::file::page_index::column_index::ColumnIndexMetaData; use parquet::file::page_index::offset_index::OffsetIndexMetaData; use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit}; @@ -59,7 +59,7 @@ impl PageNullCount { } pub(crate) struct PageIndexEvaluator<'a> { - column_index: &'a [Index], + column_index: &'a [ColumnIndexMetaData], offset_index: &'a OffsetIndex, row_group_metadata: &'a RowGroupMetaData, iceberg_field_id_to_parquet_column_index: &'a HashMap, @@ -69,7 +69,7 @@ pub(crate) struct PageIndexEvaluator<'a> { impl<'a> PageIndexEvaluator<'a> { pub(crate) fn new( - column_index: &'a [Index], + column_index: &'a [ColumnIndexMetaData], offset_index: &'a OffsetIndex, row_group_metadata: &'a RowGroupMetaData, field_id_map: &'a HashMap, @@ -92,7 +92,7 @@ impl<'a> PageIndexEvaluator<'a> { /// matching the filter predicate. pub(crate) fn eval( filter: &'a BoundPredicate, - column_index: &'a [Index], + column_index: &'a [ColumnIndexMetaData], offset_index: &'a OffsetIndex, row_group_metadata: &'a RowGroupMetaData, field_id_map: &'a HashMap, @@ -240,137 +240,135 @@ impl<'a> PageIndexEvaluator<'a> { fn apply_predicate_to_column_index( predicate: F, field_type: &PrimitiveType, - column_index: &Index, + column_index: &ColumnIndexMetaData, row_counts: &[usize], ) -> Result>> where F: Fn(Option, Option, PageNullCount) -> Result, { let result: Result> = match column_index { - Index::NONE => { + ColumnIndexMetaData::NONE => { return Ok(None); } - Index::BOOLEAN(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::BOOLEAN(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min.map(|val| { + min.map(|&val| { Datum::new(field_type.clone(), PrimitiveLiteral::Boolean(val)) }), - item.max.map(|val| { + max.map(|&val| { Datum::new(field_type.clone(), PrimitiveLiteral::Boolean(val)) }), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::INT32(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::INT32(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), - item.max - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + min.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), + max.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::INT64(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::INT64(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), - item.max - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + min.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), + max.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::FLOAT(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::FLOAT(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min.map(|val| { + min.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Float(OrderedFloat::from(val)), ) }), - item.max.map(|val| { + max.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Float(OrderedFloat::from(val)), ) }), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::DOUBLE(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::DOUBLE(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min.map(|val| { + min.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Double(OrderedFloat::from(val)), ) }), - item.max.map(|val| { + max.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Double(OrderedFloat::from(val)), ) }), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::BYTE_ARRAY(idx) => idx - .indexes - .iter() + ColumnIndexMetaData::BYTE_ARRAY(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(item, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - item.min.clone().map(|val| { + min.map(|val| { Datum::new( field_type.clone(), - PrimitiveLiteral::String( - String::from_utf8(val.data().to_vec()).unwrap(), - ), + PrimitiveLiteral::String(String::from_utf8(val.to_vec()).unwrap()), ) }), - item.max.clone().map(|val| { + max.map(|val| { Datum::new( field_type.clone(), - PrimitiveLiteral::String( - String::from_utf8(val.data().to_vec()).unwrap(), - ), + PrimitiveLiteral::String(String::from_utf8(val.to_vec()).unwrap()), ) }), - PageNullCount::from_row_and_null_counts(row_count, item.null_count), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::FIXED_LEN_BYTE_ARRAY(_) => { + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(_) => { return Err(Error::new( ErrorKind::FeatureUnsupported, "unsupported 'FIXED_LEN_BYTE_ARRAY' index type in column_index", )); } - Index::INT96(_) => { + ColumnIndexMetaData::INT96(_) => { return Err(Error::new( ErrorKind::FeatureUnsupported, "unsupported 'INT96' index type in column_index", @@ -547,16 +545,16 @@ impl BoundPredicateVisitor for PageIndexEvaluator<'_> { return Ok(false); } - if let Some(min) = min { - if min.gt(datum) { - return Ok(false); - } + if let Some(min) = min + && min.gt(datum) + { + return Ok(false); } - if let Some(max) = max { - if max.lt(datum) { - return Ok(false); - } + if let Some(max) = max + && max.lt(datum) + { + return Ok(false); } Ok(true) @@ -787,28 +785,164 @@ mod tests { use std::collections::HashMap; use std::sync::Arc; - use parquet::arrow::arrow_reader::RowSelector; - use parquet::basic::{LogicalType as ParquetLogicalType, Type as ParquetPhysicalType}; - use parquet::data_type::ByteArray; - use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData}; - use parquet::file::page_index::index::{Index, NativeIndex, PageIndex}; - use parquet::file::page_index::offset_index::OffsetIndexMetaData; - use parquet::file::statistics::Statistics; - use parquet::format::{BoundaryOrder, PageLocation}; - use parquet::schema::types::{ - ColumnDescriptor, ColumnPath, SchemaDescriptor, Type as parquetSchemaType, + use arrow_array::{ArrayRef, Float32Array, RecordBatch, StringArray}; + use arrow_schema::{DataType, Field, Schema as ArrowSchema}; + use parquet::arrow::ArrowWriter; + use parquet::arrow::arrow_reader::{ + ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowSelector, }; + use parquet::file::metadata::ParquetMetaData; + use parquet::file::properties::WriterProperties; use rand::{Rng, thread_rng}; + use tempfile::NamedTempFile; use super::PageIndexEvaluator; use crate::expr::{Bind, Reference}; use crate::spec::{Datum, NestedField, PrimitiveType, Schema, Type}; use crate::{ErrorKind, Result}; + /// Helper function to create a test parquet file with page indexes + /// and return the metadata needed for testing + fn create_test_parquet_file() -> Result<(Arc, NamedTempFile)> { + let arrow_schema = Arc::new(ArrowSchema::new(vec![ + Field::new("col_float", DataType::Float32, true), + Field::new("col_string", DataType::Utf8, true), + ])); + + let temp_file = NamedTempFile::new().unwrap(); + let file = temp_file.reopen().unwrap(); + + let props = WriterProperties::builder() + .set_data_page_row_count_limit(1024) + .set_write_batch_size(512) + .build(); + + let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap(); + + let mut batches = vec![]; + + // Batch 1: 1024 rows - strings with AARDVARK, BEAR, BISON + let float_vals: Vec> = vec![None; 1024]; + let mut string_vals = vec![]; + string_vals.push(Some("AARDVARK".to_string())); + for _ in 1..1023 { + string_vals.push(Some("BEAR".to_string())); + } + string_vals.push(Some("BISON".to_string())); + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Batch 2: 1024 rows - all DEER + let float_vals: Vec> = vec![None; 1024]; + let string_vals = vec![Some("DEER".to_string()); 1024]; + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Batch 3: 1024 rows - float 0-10 + let mut float_vals = vec![]; + for i in 0..1024 { + float_vals.push(Some(i as f32 * 10.0 / 1024.0)); + } + let mut string_vals = vec![]; + string_vals.push(Some("GIRAFFE".to_string())); + string_vals.push(None); + for _ in 2..1024 { + string_vals.push(Some("HIPPO".to_string())); + } + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Batch 4: 1024 rows - float 10-20 + let mut float_vals = vec![None]; + for i in 1..1024 { + float_vals.push(Some(10.0 + i as f32 * 10.0 / 1024.0)); + } + let string_vals = vec![Some("HIPPO".to_string()); 1024]; + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Write rows one at a time to give the writer a chance to split into pages + for batch in &batches { + for i in 0..batch.num_rows() { + writer.write(&batch.slice(i, 1)).unwrap(); + } + } + + writer.close().unwrap(); + + let file = temp_file.reopen().unwrap(); + let options = ArrowReaderOptions::new().with_page_index(true); + let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); + let metadata = reader.metadata().clone(); + + Ok((metadata, temp_file)) + } + + /// Get the test metadata components for testing + fn get_test_metadata( + metadata: &ParquetMetaData, + ) -> ( + Vec, + Vec, + &parquet::file::metadata::RowGroupMetaData, + ) { + let row_group_metadata = metadata.row_group(0); + let column_index = metadata.column_index().unwrap()[0].to_vec(); + let offset_index = metadata.offset_index().unwrap()[0].to_vec(); + (column_index, offset_index, row_group_metadata) + } + #[test] fn eval_matches_no_rows_for_empty_row_group() -> Result<()> { - let row_group_metadata = create_row_group_metadata(0, 0, None, 0, None)?; - let (column_index, offset_index) = create_page_index()?; + let arrow_schema = Arc::new(ArrowSchema::new(vec![ + Field::new("col_float", DataType::Float32, true), + Field::new("col_string", DataType::Utf8, true), + ])); + + let empty_float: ArrayRef = Arc::new(Float32Array::from(Vec::>::new())); + let empty_string: ArrayRef = Arc::new(StringArray::from(Vec::>::new())); + let empty_batch = + RecordBatch::try_new(arrow_schema.clone(), vec![empty_float, empty_string]).unwrap(); + + let temp_file = NamedTempFile::new().unwrap(); + let file = temp_file.reopen().unwrap(); + + let mut writer = ArrowWriter::try_new(file, arrow_schema, None).unwrap(); + writer.write(&empty_batch).unwrap(); + writer.close().unwrap(); + + let file = temp_file.reopen().unwrap(); + let options = ArrowReaderOptions::new().with_page_index(true); + let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); + let metadata = reader.metadata(); + + if metadata.num_row_groups() == 0 || metadata.row_group(0).num_rows() == 0 { + return Ok(()); + } let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; @@ -816,27 +950,28 @@ mod tests { .greater_than(Datum::float(1.0)) .bind(iceberg_schema_ref.clone(), false)?; + let row_group_metadata = metadata.row_group(0); + let column_index = metadata.column_index().unwrap()[0].to_vec(); + let offset_index = metadata.offset_index().unwrap()[0].to_vec(); + let result = PageIndexEvaluator::eval( &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![]; - - assert_eq!(result, expected); + assert_eq!(result.len(), 0); Ok(()) } #[test] fn eval_is_null_select_only_pages_with_nulls() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -847,15 +982,15 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; let expected = vec![ - RowSelector::select(1024), - RowSelector::skip(1024), RowSelector::select(2048), + RowSelector::skip(1024), + RowSelector::select(1024), ]; assert_eq!(result, expected); @@ -865,9 +1000,8 @@ mod tests { #[test] fn eval_is_not_null_dont_select_pages_with_all_nulls() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -878,12 +1012,12 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![RowSelector::skip(1024), RowSelector::select(3072)]; + let expected = vec![RowSelector::skip(2048), RowSelector::select(2048)]; assert_eq!(result, expected); @@ -892,9 +1026,8 @@ mod tests { #[test] fn eval_is_nan_select_all() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -905,7 +1038,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -919,9 +1052,8 @@ mod tests { #[test] fn eval_not_nan_select_all() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -932,7 +1064,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -946,9 +1078,8 @@ mod tests { #[test] fn eval_inequality_nan_datum_all_rows_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -959,12 +1090,12 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![RowSelector::skip(1024), RowSelector::select(3072)]; + let expected = vec![RowSelector::skip(2048), RowSelector::select(2048)]; assert_eq!(result, expected); @@ -973,9 +1104,8 @@ mod tests { #[test] fn eval_inequality_pages_containing_value_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -986,16 +1116,15 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; let expected = vec![ - RowSelector::skip(1024), + RowSelector::skip(2048), RowSelector::select(1024), RowSelector::skip(1024), - RowSelector::select(1024), ]; assert_eq!(result, expected); @@ -1005,9 +1134,8 @@ mod tests { #[test] fn eval_eq_pages_containing_value_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1018,16 +1146,18 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; + // Pages 0-1: all null (skip) + // Page 2: 0-10 (select, might contain 5.0) + // Page 3: 10-20 (skip, min > 5.0) let expected = vec![ - RowSelector::skip(1024), + RowSelector::skip(2048), RowSelector::select(1024), RowSelector::skip(1024), - RowSelector::select(1024), ]; assert_eq!(result, expected); @@ -1037,9 +1167,8 @@ mod tests { #[test] fn eval_not_eq_all_rows() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1050,7 +1179,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -1064,9 +1193,8 @@ mod tests { #[test] fn eval_starts_with_error_float_col() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1077,7 +1205,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), ); @@ -1089,11 +1217,13 @@ mod tests { #[test] fn eval_starts_with_pages_containing_value_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; + // Test starts_with on string column where only some pages match + // Our file has 4 pages: ["AARDVARK".."BISON"], ["DEER"], ["GIRAFFE".."HIPPO"], ["HIPPO"] + // Testing starts_with("B") should select only page 0 let filter = Reference::new("col_string") .starts_with(Datum::string("B")) .bind(iceberg_schema_ref.clone(), false)?; @@ -1102,16 +1232,13 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![ - RowSelector::select(512), - RowSelector::skip(3536), - RowSelector::select(48), - ]; + // Page 0 has "BEAR" and "BISON" (starts with B), rest don't + let expected = vec![RowSelector::select(1024), RowSelector::skip(3072)]; assert_eq!(result, expected); @@ -1121,11 +1248,13 @@ mod tests { #[test] fn eval_not_starts_with_pages_containing_value_except_pages_with_min_and_max_equal_to_prefix_and_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; + // Test not_starts_with where one page has ALL values starting with prefix + // Our file has page 1 with all "DEER" (min="DEER", max="DEER") + // Testing not_starts_with("DE") should skip page 1 where all values start with "DE" let filter = Reference::new("col_string") .not_starts_with(Datum::string("DE")) .bind(iceberg_schema_ref.clone(), false)?; @@ -1134,15 +1263,18 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; + // Page 0: mixed values (select) + // Page 1: all "DEER" starting with "DE" (skip) + // Pages 2-3: other values not all starting with "DE" (select) let expected = vec![ - RowSelector::select(512), - RowSelector::skip(512), - RowSelector::select(3072), + RowSelector::select(1024), + RowSelector::skip(1024), + RowSelector::select(2048), ]; assert_eq!(result, expected); @@ -1153,10 +1285,8 @@ mod tests { #[test] fn eval_in_length_of_set_above_limit_all_rows() -> Result<()> { let mut rng = thread_rng(); - - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1167,7 +1297,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -1181,30 +1311,32 @@ mod tests { #[test] fn eval_in_valid_set_size_some_rows() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; + // Test is_in with multiple values using min/max bounds + // Our file has 4 pages: ["AARDVARK".."BISON"], ["DEER"], ["GIRAFFE".."HIPPO"], ["HIPPO"] + // Testing is_in(["AARDVARK", "GIRAFFE"]) - both are in different pages let filter = Reference::new("col_string") - .is_in([Datum::string("AARDVARK"), Datum::string("ICEBERG")]) + .is_in([Datum::string("AARDVARK"), Datum::string("GIRAFFE")]) .bind(iceberg_schema_ref.clone(), false)?; let result = PageIndexEvaluator::eval( &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; + // Page 0 contains "AARDVARK", page 1 doesn't contain either, page 2 contains "GIRAFFE", page 3 doesn't let expected = vec![ - RowSelector::select(512), - RowSelector::skip(512), - RowSelector::select(2976), - RowSelector::skip(48), - RowSelector::select(48), + RowSelector::select(1024), + RowSelector::skip(1024), + RowSelector::select(1024), + RowSelector::skip(1024), ]; assert_eq!(result, expected); @@ -1235,183 +1367,4 @@ mod tests { Ok((iceberg_schema_ref, field_id_map)) } - - fn build_parquet_schema_descriptor() -> Result> { - let field_1 = Arc::new( - parquetSchemaType::primitive_type_builder("col_float", ParquetPhysicalType::FLOAT) - .with_id(Some(1)) - .build()?, - ); - - let field_2 = Arc::new( - parquetSchemaType::primitive_type_builder( - "col_string", - ParquetPhysicalType::BYTE_ARRAY, - ) - .with_id(Some(2)) - .with_logical_type(Some(ParquetLogicalType::String)) - .build()?, - ); - - let group_type = Arc::new( - parquetSchemaType::group_type_builder("all") - .with_id(Some(1000)) - .with_fields(vec![field_1, field_2]) - .build()?, - ); - - let schema_descriptor = SchemaDescriptor::new(group_type); - let schema_descriptor_arc = Arc::new(schema_descriptor); - Ok(schema_descriptor_arc) - } - - fn create_row_group_metadata( - num_rows: i64, - col_1_num_vals: i64, - col_1_stats: Option, - col_2_num_vals: i64, - col_2_stats: Option, - ) -> Result { - let schema_descriptor_arc = build_parquet_schema_descriptor()?; - - let column_1_desc_ptr = Arc::new(ColumnDescriptor::new( - schema_descriptor_arc.column(0).self_type_ptr(), - 1, - 1, - ColumnPath::new(vec!["col_float".to_string()]), - )); - - let column_2_desc_ptr = Arc::new(ColumnDescriptor::new( - schema_descriptor_arc.column(1).self_type_ptr(), - 1, - 1, - ColumnPath::new(vec!["col_string".to_string()]), - )); - - let mut col_1_meta = - ColumnChunkMetaData::builder(column_1_desc_ptr).set_num_values(col_1_num_vals); - if let Some(stats1) = col_1_stats { - col_1_meta = col_1_meta.set_statistics(stats1) - } - - let mut col_2_meta = - ColumnChunkMetaData::builder(column_2_desc_ptr).set_num_values(col_2_num_vals); - if let Some(stats2) = col_2_stats { - col_2_meta = col_2_meta.set_statistics(stats2) - } - - let row_group_metadata = RowGroupMetaData::builder(schema_descriptor_arc) - .set_num_rows(num_rows) - .set_column_metadata(vec![ - col_1_meta.build()?, - // .set_statistics(Statistics::float(None, None, None, 1, false)) - col_2_meta.build()?, - ]) - .build(); - - Ok(row_group_metadata?) - } - - fn create_page_index() -> Result<(Vec, Vec)> { - let idx_float = Index::FLOAT(NativeIndex:: { - indexes: vec![ - PageIndex { - min: None, - max: None, - null_count: Some(1024), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: Some(0.0), - max: Some(10.0), - null_count: Some(0), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: Some(10.0), - max: Some(20.0), - null_count: Some(1), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: None, - max: None, - null_count: None, - repetition_level_histogram: None, - definition_level_histogram: None, - }, - ], - boundary_order: BoundaryOrder(0), // UNORDERED - }); - - let idx_string = Index::BYTE_ARRAY(NativeIndex:: { - indexes: vec![ - PageIndex { - min: Some("AA".into()), - max: Some("DD".into()), - null_count: Some(0), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: Some("DE".into()), - max: Some("DE".into()), - null_count: Some(0), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: Some("DF".into()), - max: Some("UJ".into()), - null_count: Some(1), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: None, - max: None, - null_count: Some(48), - repetition_level_histogram: None, - definition_level_histogram: None, - }, - PageIndex { - min: None, - max: None, - null_count: None, - repetition_level_histogram: None, - definition_level_histogram: None, - }, - ], - boundary_order: BoundaryOrder(0), // UNORDERED - }); - - let page_locs_float = vec![ - PageLocation::new(0, 1024, 0), - PageLocation::new(1024, 1024, 1024), - PageLocation::new(2048, 1024, 2048), - PageLocation::new(3072, 1024, 3072), - ]; - - let page_locs_string = vec![ - PageLocation::new(0, 512, 0), - PageLocation::new(512, 512, 512), - PageLocation::new(1024, 2976, 1024), - PageLocation::new(4000, 48, 4000), - PageLocation::new(4048, 48, 4048), - ]; - - Ok((vec![idx_float, idx_string], vec![ - OffsetIndexMetaData { - page_locations: page_locs_float, - unencoded_byte_array_data_bytes: None, - }, - OffsetIndexMetaData { - page_locations: page_locs_string, - unencoded_byte_array_data_bytes: None, - }, - ])) - } } diff --git a/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs index e9bed775ef..a6af2990c8 100644 --- a/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs @@ -129,10 +129,10 @@ impl<'a> StrictMetricsEvaluator<'a> { self.upper_bound(field_id) }; - if let Some(bound) = bound { - if cmp_fn(bound, datum) { - return ROWS_MUST_MATCH; - } + if let Some(bound) = bound + && cmp_fn(bound, datum) + { + return ROWS_MUST_MATCH; } ROWS_MIGHT_NOT_MATCH @@ -219,10 +219,10 @@ impl BoundPredicateVisitor for StrictMetricsEvaluator<'_> { ) -> crate::Result { let field_id = reference.field().id; - if let Some(&nan_count) = self.nan_count(field_id) { - if nan_count == 0 { - return ROWS_MUST_MATCH; - } + if let Some(&nan_count) = self.nan_count(field_id) + && nan_count == 0 + { + return ROWS_MUST_MATCH; } if self.contains_nulls_only(field_id) { @@ -258,10 +258,10 @@ impl BoundPredicateVisitor for StrictMetricsEvaluator<'_> { ) -> crate::Result { let field_id = reference.field().id; - if let Some(lower) = self.lower_bound(field_id) { - if lower.is_nan() { - return ROWS_MIGHT_NOT_MATCH; - } + if let Some(lower) = self.lower_bound(field_id) + && lower.is_nan() + { + return ROWS_MIGHT_NOT_MATCH; } self.visit_inequality(reference, datum, PartialOrd::gt, true) @@ -578,7 +578,7 @@ mod test { ]), column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -604,7 +604,7 @@ mod test { lower_bounds: Default::default(), upper_bounds: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -630,7 +630,7 @@ mod test { upper_bounds: HashMap::from([(1, Datum::int(42))]), column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -657,7 +657,7 @@ mod test { upper_bounds: HashMap::from([(3, Datum::string("dC"))]), column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, diff --git a/crates/iceberg/src/inspect/manifests.rs b/crates/iceberg/src/inspect/manifests.rs index 60854b8bae..4c30ca2ec5 100644 --- a/crates/iceberg/src/inspect/manifests.rs +++ b/crates/iceberg/src/inspect/manifests.rs @@ -296,18 +296,18 @@ mod tests { check_record_batches( record_batch.try_collect::>().await.unwrap(), expect![[r#" - Field { name: "content", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "14"} }, - Field { name: "path", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "length", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "partition_spec_id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "added_snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "added_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "existing_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, - Field { name: "deleted_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, - Field { name: "added_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "15"} }, - Field { name: "existing_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "16"} }, - Field { name: "deleted_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "17"} }, - Field { name: "partition_summaries", data_type: List(Field { name: "item", data_type: Struct([Field { name: "contains_null", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }, Field { name: "contains_nan", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "11"} }, Field { name: "lower_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "12"} }, Field { name: "upper_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "13"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }"#]], + Field { "content": Int32, metadata: {"PARQUET:field_id": "14"} }, + Field { "path": Utf8, metadata: {"PARQUET:field_id": "1"} }, + Field { "length": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "partition_spec_id": Int32, metadata: {"PARQUET:field_id": "3"} }, + Field { "added_snapshot_id": Int64, metadata: {"PARQUET:field_id": "4"} }, + Field { "added_data_files_count": Int32, metadata: {"PARQUET:field_id": "5"} }, + Field { "existing_data_files_count": Int32, metadata: {"PARQUET:field_id": "6"} }, + Field { "deleted_data_files_count": Int32, metadata: {"PARQUET:field_id": "7"} }, + Field { "added_delete_files_count": Int32, metadata: {"PARQUET:field_id": "15"} }, + Field { "existing_delete_files_count": Int32, metadata: {"PARQUET:field_id": "16"} }, + Field { "deleted_delete_files_count": Int32, metadata: {"PARQUET:field_id": "17"} }, + Field { "partition_summaries": List(non-null Struct("contains_null": non-null Boolean, metadata: {"PARQUET:field_id": "10"}, "contains_nan": Boolean, metadata: {"PARQUET:field_id": "11"}, "lower_bound": Utf8, metadata: {"PARQUET:field_id": "12"}, "upper_bound": Utf8, metadata: {"PARQUET:field_id": "13"}), metadata: {"PARQUET:field_id": "9"}), metadata: {"PARQUET:field_id": "8"} }"#]], expect![[r#" content: PrimitiveArray [ diff --git a/crates/iceberg/src/inspect/metadata_table.rs b/crates/iceberg/src/inspect/metadata_table.rs index 92571db181..d5e9d60869 100644 --- a/crates/iceberg/src/inspect/metadata_table.rs +++ b/crates/iceberg/src/inspect/metadata_table.rs @@ -71,12 +71,12 @@ impl<'a> MetadataTable<'a> { } /// Get the snapshots table. - pub fn snapshots(&self) -> SnapshotsTable { + pub fn snapshots(&self) -> SnapshotsTable<'_> { SnapshotsTable::new(self.0) } /// Get the manifests table. - pub fn manifests(&self) -> ManifestsTable { + pub fn manifests(&self) -> ManifestsTable<'_> { ManifestsTable::new(self.0) } } diff --git a/crates/iceberg/src/inspect/snapshots.rs b/crates/iceberg/src/inspect/snapshots.rs index 6081ec165b..fbed7ec11e 100644 --- a/crates/iceberg/src/inspect/snapshots.rs +++ b/crates/iceberg/src/inspect/snapshots.rs @@ -151,14 +151,14 @@ mod tests { check_record_batches( batch_stream.try_collect::>().await.unwrap(), expect![[r#" - Field { name: "committed_at", data_type: Timestamp(Microsecond, Some("+00:00")), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "parent_id", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "operation", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "manifest_list", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "summary", data_type: Map(Field { name: "key_value", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "value", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }"#]], + Field { "committed_at": Timestamp(µs, "+00:00"), metadata: {"PARQUET:field_id": "1"} }, + Field { "snapshot_id": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "parent_id": nullable Int64, metadata: {"PARQUET:field_id": "3"} }, + Field { "operation": nullable Utf8, metadata: {"PARQUET:field_id": "4"} }, + Field { "manifest_list": nullable Utf8, metadata: {"PARQUET:field_id": "5"} }, + Field { "summary": nullable Map("key_value": non-null Struct("key": non-null Utf8, metadata: {"PARQUET:field_id": "7"}, "value": Utf8, metadata: {"PARQUET:field_id": "8"}), unsorted), metadata: {"PARQUET:field_id": "6"} }"#]], expect![[r#" - committed_at: PrimitiveArray + committed_at: PrimitiveArray [ 2018-01-04T21:22:35.770+00:00, 2019-04-12T20:29:15.770+00:00, diff --git a/crates/iceberg/src/io/storage.rs b/crates/iceberg/src/io/storage.rs index d5f2ad8fab..03e43600dd 100644 --- a/crates/iceberg/src/io/storage.rs +++ b/crates/iceberg/src/io/storage.rs @@ -15,6 +15,12 @@ // specific language governing permissions and limitations // under the License. +#[cfg(any( + feature = "storage-s3", + feature = "storage-gcs", + feature = "storage-oss", + feature = "storage-azdls", +))] use std::sync::Arc; use opendal::layers::RetryLayer; @@ -71,6 +77,7 @@ impl Storage { /// Convert iceberg config to opendal config. pub(crate) fn build(file_io_builder: FileIOBuilder) -> crate::Result { let (scheme_str, props, extensions) = file_io_builder.into_parts(); + let _ = (&props, &extensions); let scheme = Self::parse_scheme(&scheme_str)?; match scheme { @@ -127,6 +134,7 @@ impl Storage { path: &'a impl AsRef, ) -> crate::Result<(Operator, &'a str)> { let path = path.as_ref(); + let _ = path; let (operator, relative_path): (Operator, &str) = match self { #[cfg(feature = "storage-memory")] Storage::Memory(op) => { @@ -175,7 +183,7 @@ impl Storage { } else { Err(Error::new( ErrorKind::DataInvalid, - format!("Invalid gcs url: {}, should start with {}", path, prefix), + format!("Invalid gcs url: {path}, should start with {prefix}"), )) } } @@ -190,7 +198,7 @@ impl Storage { } else { Err(Error::new( ErrorKind::DataInvalid, - format!("Invalid oss url: {}, should start with {}", path, prefix), + format!("Invalid oss url: {path}, should start with {prefix}"), )) } } diff --git a/crates/iceberg/src/io/storage_azdls.rs b/crates/iceberg/src/io/storage_azdls.rs index fe12167f6f..5abb0cd6e0 100644 --- a/crates/iceberg/src/io/storage_azdls.rs +++ b/crates/iceberg/src/io/storage_azdls.rs @@ -165,7 +165,7 @@ impl FromStr for AzureStorageScheme { "wasbs" => Ok(AzureStorageScheme::Wasbs), _ => Err(Error::new( ErrorKind::DataInvalid, - format!("Unexpected Azure Storage scheme: {}", s), + format!("Unexpected Azure Storage scheme: {s}"), )), } } @@ -397,11 +397,11 @@ mod tests { let config = azdls_config_parse(properties); match expected { Some(expected_config) => { - assert!(config.is_ok(), "Test case {} failed: {:?}", name, config); - assert_eq!(config.unwrap(), expected_config, "Test case: {}", name); + assert!(config.is_ok(), "Test case {name} failed: {config:?}"); + assert_eq!(config.unwrap(), expected_config, "Test case: {name}"); } None => { - assert!(config.is_err(), "Test case {} expected error.", name); + assert!(config.is_err(), "Test case {name} expected error."); } } } @@ -495,14 +495,14 @@ mod tests { let result = azdls_create_operator(input.0, &input.1, &input.2); match expected { Some((expected_filesystem, expected_path)) => { - assert!(result.is_ok(), "Test case {} failed: {:?}", name, result); + assert!(result.is_ok(), "Test case {name} failed: {result:?}"); let (op, relative_path) = result.unwrap(); assert_eq!(op.info().name(), expected_filesystem); assert_eq!(relative_path, expected_path); } None => { - assert!(result.is_err(), "Test case {} expected error.", name); + assert!(result.is_err(), "Test case {name} expected error."); } } } @@ -543,11 +543,11 @@ mod tests { let result = input.parse::(); match expected { Some(expected_path) => { - assert!(result.is_ok(), "Test case {} failed: {:?}", name, result); - assert_eq!(result.unwrap(), expected_path, "Test case: {}", name); + assert!(result.is_ok(), "Test case {name} failed: {result:?}"); + assert_eq!(result.unwrap(), expected_path, "Test case: {name}"); } None => { - assert!(result.is_err(), "Test case {} expected error.", name); + assert!(result.is_err(), "Test case {name} expected error."); } } } @@ -593,7 +593,7 @@ mod tests { for (name, path, expected) in test_cases { let endpoint = path.as_endpoint(); - assert_eq!(endpoint, expected, "Test case: {}", name); + assert_eq!(endpoint, expected, "Test case: {name}"); } } } diff --git a/crates/iceberg/src/io/storage_gcs.rs b/crates/iceberg/src/io/storage_gcs.rs index 8c3d914c86..7718df603f 100644 --- a/crates/iceberg/src/io/storage_gcs.rs +++ b/crates/iceberg/src/io/storage_gcs.rs @@ -71,20 +71,20 @@ pub(crate) fn gcs_config_parse(mut m: HashMap) -> Result Result let bucket = url.host_str().ok_or_else(|| { Error::new( ErrorKind::DataInvalid, - format!("Invalid gcs url: {}, bucket is required", path), + format!("Invalid gcs url: {path}, bucket is required"), ) })?; diff --git a/crates/iceberg/src/io/storage_oss.rs b/crates/iceberg/src/io/storage_oss.rs index 8bfffc6ca8..e82dda23a5 100644 --- a/crates/iceberg/src/io/storage_oss.rs +++ b/crates/iceberg/src/io/storage_oss.rs @@ -56,7 +56,7 @@ pub(crate) fn oss_config_build(cfg: &OssConfig, path: &str) -> Result let bucket = url.host_str().ok_or_else(|| { Error::new( ErrorKind::DataInvalid, - format!("Invalid oss url: {}, missing bucket", path), + format!("Invalid oss url: {path}, missing bucket"), ) })?; diff --git a/crates/iceberg/src/io/storage_s3.rs b/crates/iceberg/src/io/storage_s3.rs index fcf9afed1f..f069e0e2f9 100644 --- a/crates/iceberg/src/io/storage_s3.rs +++ b/crates/iceberg/src/io/storage_s3.rs @@ -134,20 +134,20 @@ pub(crate) fn s3_config_parse(mut m: HashMap) -> Result = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_FILE, + RESERVED_COL_NAME_FILE, + Type::Primitive(PrimitiveType::String), + ) + .with_doc("Path of the file in which a row is stored"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _pos metadata column. +/// This field represents the ordinal position of a row in the source data file. +static POS_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_POS, + RESERVED_COL_NAME_POS, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("Ordinal position of a row in the source data file"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _deleted metadata column. +/// This field indicates whether a row has been deleted. +static DELETED_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_DELETED, + RESERVED_COL_NAME_DELETED, + Type::Primitive(PrimitiveType::Boolean), + ) + .with_doc("Whether the row has been deleted"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _spec_id metadata column. +/// This field represents the spec ID used to track the file containing a row. +static SPEC_ID_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_SPEC_ID, + RESERVED_COL_NAME_SPEC_ID, + Type::Primitive(PrimitiveType::Int), + ) + .with_doc("Spec ID used to track the file containing a row"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the file_path column in position delete files. +/// This field represents the path of a file in position-based delete files. +static DELETE_FILE_PATH_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_DELETE_FILE_PATH, + RESERVED_COL_NAME_DELETE_FILE_PATH, + Type::Primitive(PrimitiveType::String), + ) + .with_doc("Path of a file, used in position-based delete files"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the pos column in position delete files. +/// This field represents the ordinal position of a row in position-based delete files. +static DELETE_FILE_POS_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_DELETE_FILE_POS, + RESERVED_COL_NAME_DELETE_FILE_POS, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("Ordinal position of a row, used in position-based delete files"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _change_type metadata column. +/// This field represents the record type in the changelog. +static CHANGE_TYPE_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_CHANGE_TYPE, + RESERVED_COL_NAME_CHANGE_TYPE, + Type::Primitive(PrimitiveType::String), + ) + .with_doc( + "The record type in the changelog (INSERT, DELETE, UPDATE_BEFORE, or UPDATE_AFTER)", + ), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _change_ordinal metadata column. +/// This field represents the order of the change. +static CHANGE_ORDINAL_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_CHANGE_ORDINAL, + RESERVED_COL_NAME_CHANGE_ORDINAL, + Type::Primitive(PrimitiveType::Int), + ) + .with_doc("The order of the change"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _commit_snapshot_id metadata column. +/// This field represents the snapshot ID in which the change occurred. +static COMMIT_SNAPSHOT_ID_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID, + RESERVED_COL_NAME_COMMIT_SNAPSHOT_ID, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("The snapshot ID in which the change occurred"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _row_id metadata column. +/// This field represents a unique long assigned for row lineage. +static ROW_ID_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_ROW_ID, + RESERVED_COL_NAME_ROW_ID, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("A unique long assigned for row lineage"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _last_updated_sequence_number metadata column. +/// This field represents the sequence number which last updated this row. +static LAST_UPDATED_SEQUENCE_NUMBER_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER, + RESERVED_COL_NAME_LAST_UPDATED_SEQUENCE_NUMBER, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("The sequence number which last updated this row"), + ) +}); + +/// Returns the Iceberg field definition for the _file metadata column. +/// +/// # Returns +/// A reference to the _file field definition as an Iceberg NestedField +pub fn file_field() -> &'static NestedFieldRef { + &FILE_FIELD +} + +/// Returns the Iceberg field definition for the _pos metadata column. +/// +/// # Returns +/// A reference to the _pos field definition as an Iceberg NestedField +pub fn pos_field() -> &'static NestedFieldRef { + &POS_FIELD +} + +/// Returns the Iceberg field definition for the _deleted metadata column. +/// +/// # Returns +/// A reference to the _deleted field definition as an Iceberg NestedField +pub fn deleted_field() -> &'static NestedFieldRef { + &DELETED_FIELD +} + +/// Returns the Iceberg field definition for the _spec_id metadata column. +/// +/// # Returns +/// A reference to the _spec_id field definition as an Iceberg NestedField +pub fn spec_id_field() -> &'static NestedFieldRef { + &SPEC_ID_FIELD +} + +/// Returns the Iceberg field definition for the file_path column in position delete files. +/// +/// # Returns +/// A reference to the file_path field definition as an Iceberg NestedField +pub fn delete_file_path_field() -> &'static NestedFieldRef { + &DELETE_FILE_PATH_FIELD +} + +/// Returns the Iceberg field definition for the pos column in position delete files. +/// +/// # Returns +/// A reference to the pos field definition as an Iceberg NestedField +pub fn delete_file_pos_field() -> &'static NestedFieldRef { + &DELETE_FILE_POS_FIELD +} + +/// Returns the Iceberg field definition for the _change_type metadata column. +/// +/// # Returns +/// A reference to the _change_type field definition as an Iceberg NestedField +pub fn change_type_field() -> &'static NestedFieldRef { + &CHANGE_TYPE_FIELD +} + +/// Returns the Iceberg field definition for the _change_ordinal metadata column. +/// +/// # Returns +/// A reference to the _change_ordinal field definition as an Iceberg NestedField +pub fn change_ordinal_field() -> &'static NestedFieldRef { + &CHANGE_ORDINAL_FIELD +} + +/// Returns the Iceberg field definition for the _commit_snapshot_id metadata column. +/// +/// # Returns +/// A reference to the _commit_snapshot_id field definition as an Iceberg NestedField +pub fn commit_snapshot_id_field() -> &'static NestedFieldRef { + &COMMIT_SNAPSHOT_ID_FIELD +} + +/// Returns the Iceberg field definition for the _row_id metadata column. +/// +/// # Returns +/// A reference to the _row_id field definition as an Iceberg NestedField +pub fn row_id_field() -> &'static NestedFieldRef { + &ROW_ID_FIELD +} + +/// Returns the Iceberg field definition for the _last_updated_sequence_number metadata column. +/// +/// # Returns +/// A reference to the _last_updated_sequence_number field definition as an Iceberg NestedField +pub fn last_updated_sequence_number_field() -> &'static NestedFieldRef { + &LAST_UPDATED_SEQUENCE_NUMBER_FIELD +} + +/// Creates the Iceberg field definition for the _partition metadata column. +/// +/// The _partition field is a struct whose fields depend on the partition spec. +/// This function creates the field dynamically with the provided partition fields. +/// +/// # Arguments +/// * `partition_fields` - The fields that make up the partition struct +/// +/// # Returns +/// A new _partition field definition as an Iceberg NestedField +/// +/// # Example +/// ``` +/// use std::sync::Arc; +/// +/// use iceberg::metadata_columns::partition_field; +/// use iceberg::spec::{NestedField, PrimitiveType, Type}; +/// +/// let fields = vec![ +/// Arc::new(NestedField::required( +/// 1, +/// "year", +/// Type::Primitive(PrimitiveType::Int), +/// )), +/// Arc::new(NestedField::required( +/// 2, +/// "month", +/// Type::Primitive(PrimitiveType::Int), +/// )), +/// ]; +/// let partition_field = partition_field(fields); +/// ``` +pub fn partition_field(partition_fields: Vec) -> NestedFieldRef { + use crate::spec::StructType; + + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_PARTITION, + RESERVED_COL_NAME_PARTITION, + Type::Struct(StructType::new(partition_fields)), + ) + .with_doc("Partition to which a row belongs"), + ) +} + +/// Returns the Iceberg field definition for a metadata field ID. +/// +/// Note: This function does not support `_partition` (field ID `i32::MAX - 5`) because +/// it's a struct field that requires dynamic partition fields. Use `partition_field()` +/// instead to create the `_partition` field with the appropriate partition fields. +/// +/// # Arguments +/// * `field_id` - The metadata field ID +/// +/// # Returns +/// The Iceberg field definition for the metadata column, or an error if not a metadata field +pub fn get_metadata_field(field_id: i32) -> Result<&'static NestedFieldRef> { + match field_id { + RESERVED_FIELD_ID_FILE => Ok(file_field()), + RESERVED_FIELD_ID_POS => Ok(pos_field()), + RESERVED_FIELD_ID_DELETED => Ok(deleted_field()), + RESERVED_FIELD_ID_SPEC_ID => Ok(spec_id_field()), + RESERVED_FIELD_ID_PARTITION => Err(Error::new( + ErrorKind::Unexpected, + "The _partition field must be created using partition_field() with appropriate partition fields", + )), + RESERVED_FIELD_ID_DELETE_FILE_PATH => Ok(delete_file_path_field()), + RESERVED_FIELD_ID_DELETE_FILE_POS => Ok(delete_file_pos_field()), + RESERVED_FIELD_ID_CHANGE_TYPE => Ok(change_type_field()), + RESERVED_FIELD_ID_CHANGE_ORDINAL => Ok(change_ordinal_field()), + RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID => Ok(commit_snapshot_id_field()), + RESERVED_FIELD_ID_ROW_ID => Ok(row_id_field()), + RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER => Ok(last_updated_sequence_number_field()), + _ if is_metadata_field(field_id) => { + // Future metadata fields can be added here + Err(Error::new( + ErrorKind::Unexpected, + format!( + "Metadata field ID {field_id} recognized but field definition not implemented" + ), + )) + } + _ => Err(Error::new( + ErrorKind::Unexpected, + format!("Field ID {field_id} is not a metadata field"), + )), + } +} + +/// Returns the field ID for a metadata column name. +/// +/// # Arguments +/// * `column_name` - The metadata column name +/// +/// # Returns +/// The field ID of the metadata column, or an error if the column name is not recognized +pub fn get_metadata_field_id(column_name: &str) -> Result { + match column_name { + RESERVED_COL_NAME_FILE => Ok(RESERVED_FIELD_ID_FILE), + RESERVED_COL_NAME_POS => Ok(RESERVED_FIELD_ID_POS), + RESERVED_COL_NAME_DELETED => Ok(RESERVED_FIELD_ID_DELETED), + RESERVED_COL_NAME_SPEC_ID => Ok(RESERVED_FIELD_ID_SPEC_ID), + RESERVED_COL_NAME_PARTITION => Ok(RESERVED_FIELD_ID_PARTITION), + RESERVED_COL_NAME_DELETE_FILE_PATH => Ok(RESERVED_FIELD_ID_DELETE_FILE_PATH), + RESERVED_COL_NAME_DELETE_FILE_POS => Ok(RESERVED_FIELD_ID_DELETE_FILE_POS), + RESERVED_COL_NAME_CHANGE_TYPE => Ok(RESERVED_FIELD_ID_CHANGE_TYPE), + RESERVED_COL_NAME_CHANGE_ORDINAL => Ok(RESERVED_FIELD_ID_CHANGE_ORDINAL), + RESERVED_COL_NAME_COMMIT_SNAPSHOT_ID => Ok(RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID), + RESERVED_COL_NAME_ROW_ID => Ok(RESERVED_FIELD_ID_ROW_ID), + RESERVED_COL_NAME_LAST_UPDATED_SEQUENCE_NUMBER => { + Ok(RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER) + } + _ => Err(Error::new( + ErrorKind::Unexpected, + format!("Unknown/unsupported metadata column name: {column_name}"), + )), + } +} + +/// Checks if a field ID is a metadata field. +/// +/// # Arguments +/// * `field_id` - The field ID to check +/// +/// # Returns +/// `true` if the field ID is a (currently supported) metadata field, `false` otherwise +pub fn is_metadata_field(field_id: i32) -> bool { + matches!( + field_id, + RESERVED_FIELD_ID_FILE + | RESERVED_FIELD_ID_POS + | RESERVED_FIELD_ID_DELETED + | RESERVED_FIELD_ID_SPEC_ID + | RESERVED_FIELD_ID_PARTITION + | RESERVED_FIELD_ID_DELETE_FILE_PATH + | RESERVED_FIELD_ID_DELETE_FILE_POS + | RESERVED_FIELD_ID_CHANGE_TYPE + | RESERVED_FIELD_ID_CHANGE_ORDINAL + | RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID + | RESERVED_FIELD_ID_ROW_ID + | RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER + ) +} + +/// Checks if a column name is a metadata column. +/// +/// # Arguments +/// * `column_name` - The column name to check +/// +/// # Returns +/// `true` if the column name is a metadata column, `false` otherwise +pub fn is_metadata_column_name(column_name: &str) -> bool { + get_metadata_field_id(column_name).is_ok() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::spec::PrimitiveType; + + #[test] + fn test_partition_field_creation() { + // Create partition fields for a hypothetical year/month partition + let partition_fields = vec![ + Arc::new(NestedField::required( + 1000, + "year", + Type::Primitive(PrimitiveType::Int), + )), + Arc::new(NestedField::required( + 1001, + "month", + Type::Primitive(PrimitiveType::Int), + )), + ]; + + // Create the _partition metadata field + let partition = partition_field(partition_fields); + + // Verify field properties + assert_eq!(partition.id, RESERVED_FIELD_ID_PARTITION); + assert_eq!(partition.name, RESERVED_COL_NAME_PARTITION); + assert!(partition.required); + + // Verify it's a struct type with correct fields + if let Type::Struct(struct_type) = partition.field_type.as_ref() { + assert_eq!(struct_type.fields().len(), 2); + assert_eq!(struct_type.fields()[0].name, "year"); + assert_eq!(struct_type.fields()[1].name, "month"); + } else { + panic!("Expected struct type for _partition field"); + } + } + + #[test] + fn test_partition_field_id_recognized() { + assert!(is_metadata_field(RESERVED_FIELD_ID_PARTITION)); + } + + #[test] + fn test_partition_field_name_recognized() { + assert_eq!( + get_metadata_field_id(RESERVED_COL_NAME_PARTITION).unwrap(), + RESERVED_FIELD_ID_PARTITION + ); + } + + #[test] + fn test_get_metadata_field_returns_error_for_partition() { + // partition field requires dynamic creation, so get_metadata_field should return an error + let result = get_metadata_field(RESERVED_FIELD_ID_PARTITION); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("partition_field()") + ); + } + + #[test] + fn test_all_metadata_field_ids() { + // Test that all non-partition metadata fields can be retrieved + assert!(get_metadata_field(RESERVED_FIELD_ID_FILE).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_POS).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_DELETED).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_SPEC_ID).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_DELETE_FILE_PATH).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_DELETE_FILE_POS).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_CHANGE_TYPE).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_CHANGE_ORDINAL).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_ROW_ID).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER).is_ok()); + } +} diff --git a/crates/iceberg/src/runtime/mod.rs b/crates/iceberg/src/runtime/mod.rs index d0a3ce6602..61aa623f58 100644 --- a/crates/iceberg/src/runtime/mod.rs +++ b/crates/iceberg/src/runtime/mod.rs @@ -21,28 +21,20 @@ use std::future::Future; use std::pin::Pin; use std::task::{Context, Poll}; -pub enum JoinHandle { - #[cfg(feature = "tokio")] - Tokio(tokio::task::JoinHandle), - #[cfg(all(feature = "smol", not(feature = "tokio")))] - Smol(smol::Task), - #[cfg(all(not(feature = "smol"), not(feature = "tokio")))] - Unimplemented(Box), -} +use tokio::task; + +pub struct JoinHandle(task::JoinHandle); + +impl Unpin for JoinHandle {} impl Future for JoinHandle { type Output = T; fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { match self.get_mut() { - #[cfg(feature = "tokio")] - JoinHandle::Tokio(handle) => Pin::new(handle) + JoinHandle(handle) => Pin::new(handle) .poll(cx) - .map(|h| h.expect("tokio spawned task failed")), - #[cfg(all(feature = "smol", not(feature = "tokio")))] - JoinHandle::Smol(handle) => Pin::new(handle).poll(cx), - #[cfg(all(not(feature = "smol"), not(feature = "tokio")))] - JoinHandle::Unimplemented(_) => unimplemented!("no runtime has been enabled"), + .map(|r| r.expect("tokio spawned task failed")), } } } @@ -50,17 +42,10 @@ impl Future for JoinHandle { #[allow(dead_code)] pub fn spawn(f: F) -> JoinHandle where - F: Future + Send + 'static, + F: std::future::Future + Send + 'static, F::Output: Send + 'static, { - #[cfg(feature = "tokio")] - return JoinHandle::Tokio(tokio::task::spawn(f)); - - #[cfg(all(feature = "smol", not(feature = "tokio")))] - return JoinHandle::Smol(smol::spawn(f)); - - #[cfg(all(not(feature = "smol"), not(feature = "tokio")))] - unimplemented!("no runtime has been enabled") + JoinHandle(task::spawn(f)) } #[allow(dead_code)] @@ -69,45 +54,22 @@ where F: FnOnce() -> T + Send + 'static, T: Send + 'static, { - #[cfg(feature = "tokio")] - return JoinHandle::Tokio(tokio::task::spawn_blocking(f)); - - #[cfg(all(feature = "smol", not(feature = "tokio")))] - return JoinHandle::Smol(smol::unblock(f)); - - #[cfg(all(not(feature = "smol"), not(feature = "tokio")))] - unimplemented!("no runtime has been enabled") + JoinHandle(task::spawn_blocking(f)) } #[cfg(test)] mod tests { use super::*; - #[cfg(feature = "tokio")] #[tokio::test] async fn test_tokio_spawn() { let handle = spawn(async { 1 + 1 }); assert_eq!(handle.await, 2); } - #[cfg(feature = "tokio")] #[tokio::test] async fn test_tokio_spawn_blocking() { let handle = spawn_blocking(|| 1 + 1); assert_eq!(handle.await, 2); } - - #[cfg(all(feature = "smol", not(feature = "tokio")))] - #[smol::test] - async fn test_smol_spawn() { - let handle = spawn(async { 1 + 1 }); - assert_eq!(handle.await, 2); - } - - #[cfg(all(feature = "smol", not(feature = "tokio")))] - #[smo::test] - async fn test_smol_spawn_blocking() { - let handle = spawn_blocking(|| 1 + 1); - assert_eq!(handle.await, 2); - } } diff --git a/crates/iceberg/src/scan/context.rs b/crates/iceberg/src/scan/context.rs index fe3f5c8f7e..169d8e6405 100644 --- a/crates/iceberg/src/scan/context.rs +++ b/crates/iceberg/src/scan/context.rs @@ -46,6 +46,7 @@ pub(crate) struct ManifestFileContext { snapshot_schema: SchemaRef, expression_evaluator_cache: Arc, delete_file_index: DeleteFileIndex, + case_sensitive: bool, } /// Wraps a [`ManifestEntryRef`] alongside the objects that are needed @@ -59,6 +60,7 @@ pub(crate) struct ManifestEntryContext { pub partition_spec_id: i32, pub snapshot_schema: SchemaRef, pub delete_file_index: DeleteFileIndex, + pub case_sensitive: bool, } impl ManifestFileContext { @@ -89,6 +91,7 @@ impl ManifestFileContext { bound_predicates: bound_predicates.clone(), snapshot_schema: snapshot_schema.clone(), delete_file_index: delete_file_index.clone(), + case_sensitive: self.case_sensitive, }; sender @@ -135,6 +138,7 @@ impl ManifestEntryContext { partition_spec: None, // TODO: Extract name_mapping from table metadata property "schema.name-mapping.default" name_mapping: None, + case_sensitive: self.case_sensitive, }) } } @@ -194,7 +198,17 @@ impl PlanContext { delete_file_idx: DeleteFileIndex, delete_file_tx: Sender, ) -> Result> + 'static>> { - let manifest_files = manifest_list.entries().iter(); + let mut manifest_files = manifest_list.entries().iter().collect::>(); + // Sort manifest files to process delete manifests first. + // This avoids a deadlock where the producer blocks on sending data manifest entries + // (because the data channel is full) while the delete manifest consumer is waiting + // for delete manifest entries (which haven't been produced yet). + // By processing delete manifests first, we ensure the delete consumer can finish, + // which then allows the data consumer to start draining the data channel. + manifest_files.sort_by_key(|m| match m.content { + ManifestContentType::Deletes => 0, + ManifestContentType::Data => 1, + }); // TODO: Ideally we could ditch this intermediate Vec as we return an iterator. let mut filtered_mfcs = vec![]; @@ -267,6 +281,7 @@ impl PlanContext { field_ids: self.field_ids.clone(), expression_evaluator_cache: self.expression_evaluator_cache.clone(), delete_file_index, + case_sensitive: self.case_sensitive, } } } diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs index 3e319ca062..c055c12c9a 100644 --- a/crates/iceberg/src/scan/mod.rs +++ b/crates/iceberg/src/scan/mod.rs @@ -36,6 +36,7 @@ use crate::delete_file_index::DeleteFileIndex; use crate::expr::visitors::inclusive_metrics_evaluator::InclusiveMetricsEvaluator; use crate::expr::{Bind, BoundPredicate, Predicate}; use crate::io::FileIO; +use crate::metadata_columns::{get_metadata_field_id, is_metadata_column_name}; use crate::runtime::spawn; use crate::spec::{DataContentType, SnapshotRef}; use crate::table::Table; @@ -217,9 +218,13 @@ impl<'a> TableScanBuilder<'a> { let schema = snapshot.schema(self.table.metadata())?; - // Check that all column names exist in the schema. + // Check that all column names exist in the schema (skip reserved columns). if let Some(column_names) = self.column_names.as_ref() { for column_name in column_names { + // Skip reserved columns that don't exist in the schema + if is_metadata_column_name(column_name) { + continue; + } if schema.field_by_name(column_name).is_none() { return Err(Error::new( ErrorKind::DataInvalid, @@ -240,6 +245,12 @@ impl<'a> TableScanBuilder<'a> { }); for column_name in column_names.iter() { + // Handle metadata columns (like "_file") + if is_metadata_column_name(column_name) { + field_ids.push(get_metadata_field_id(column_name)?); + continue; + } + let field_id = schema.field_id_by_name(column_name).ok_or_else(|| { Error::new( ErrorKind::DataInvalid, @@ -254,10 +265,10 @@ impl<'a> TableScanBuilder<'a> { Error::new( ErrorKind::FeatureUnsupported, format!( - "Column {column_name} is not a direct child of schema but a nested field, which is not supported now. Schema: {schema}" - ), - ) - })?; + "Column {column_name} is not a direct child of schema but a nested field, which is not supported now. Schema: {schema}" + ), + ) + })?; field_ids.push(field_id); } @@ -559,8 +570,10 @@ pub mod tests { use std::fs::File; use std::sync::Arc; + use arrow_array::cast::AsArray; use arrow_array::{ - ArrayRef, BooleanArray, Float64Array, Int32Array, Int64Array, RecordBatch, StringArray, + Array, ArrayRef, BooleanArray, Float64Array, Int32Array, Int64Array, RecordBatch, + StringArray, }; use futures::{TryStreamExt, stream}; use minijinja::value::Value; @@ -575,6 +588,7 @@ pub mod tests { use crate::arrow::ArrowReaderBuilder; use crate::expr::{BoundPredicate, Reference}; use crate::io::{FileIO, OutputFile}; + use crate::metadata_columns::RESERVED_COL_NAME_FILE; use crate::scan::FileScanTask; use crate::spec::{ DataContentType, DataFileBuilder, DataFileFormat, Datum, Literal, ManifestEntry, @@ -1156,6 +1170,97 @@ pub mod tests { writer.close().unwrap(); } } + + pub async fn setup_deadlock_manifests(&mut self) { + let current_snapshot = self.table.metadata().current_snapshot().unwrap(); + let _parent_snapshot = current_snapshot + .parent_snapshot(self.table.metadata()) + .unwrap(); + let current_schema = current_snapshot.schema(self.table.metadata()).unwrap(); + let current_partition_spec = self.table.metadata().default_partition_spec(); + + // 1. Write DATA manifest with MULTIPLE entries to fill buffer + let mut writer = ManifestWriterBuilder::new( + self.next_manifest_file(), + Some(current_snapshot.snapshot_id()), + None, + current_schema.clone(), + current_partition_spec.as_ref().clone(), + ) + .build_v2_data(); + + // Add 10 data entries + for i in 0..10 { + writer + .add_entry( + ManifestEntry::builder() + .status(ManifestStatus::Added) + .data_file( + DataFileBuilder::default() + .partition_spec_id(0) + .content(DataContentType::Data) + .file_path(format!("{}/{}.parquet", &self.table_location, i)) + .file_format(DataFileFormat::Parquet) + .file_size_in_bytes(100) + .record_count(1) + .partition(Struct::from_iter([Some(Literal::long(100))])) + .key_metadata(None) + .build() + .unwrap(), + ) + .build(), + ) + .unwrap(); + } + let data_manifest = writer.write_manifest_file().await.unwrap(); + + // 2. Write DELETE manifest + let mut writer = ManifestWriterBuilder::new( + self.next_manifest_file(), + Some(current_snapshot.snapshot_id()), + None, + current_schema.clone(), + current_partition_spec.as_ref().clone(), + ) + .build_v2_deletes(); + + writer + .add_entry( + ManifestEntry::builder() + .status(ManifestStatus::Added) + .data_file( + DataFileBuilder::default() + .partition_spec_id(0) + .content(DataContentType::PositionDeletes) + .file_path(format!("{}/del.parquet", &self.table_location)) + .file_format(DataFileFormat::Parquet) + .file_size_in_bytes(100) + .record_count(1) + .partition(Struct::from_iter([Some(Literal::long(100))])) + .build() + .unwrap(), + ) + .build(), + ) + .unwrap(); + let delete_manifest = writer.write_manifest_file().await.unwrap(); + + // Write to manifest list - DATA FIRST then DELETE + // This order is crucial for reproduction + let mut manifest_list_write = ManifestListWriter::v2( + self.table + .file_io() + .new_output(current_snapshot.manifest_list()) + .unwrap(), + current_snapshot.snapshot_id(), + current_snapshot.parent_snapshot_id(), + current_snapshot.sequence_number(), + ); + manifest_list_write + .add_manifests(vec![data_manifest, delete_manifest].into_iter()) + .unwrap(); + manifest_list_write.close().await.unwrap(); + } } #[test] @@ -1780,6 +1885,7 @@ pub mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; test_fn(task); @@ -1797,7 +1903,355 @@ pub mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; test_fn(task); } + + #[tokio::test] + async fn test_select_with_file_column() { + use arrow_array::cast::AsArray; + + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select regular columns plus the _file column + let table_scan = fixture + .table + .scan() + .select(["x", RESERVED_COL_NAME_FILE]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + // Verify we have 2 columns: x and _file + assert_eq!(batches[0].num_columns(), 2); + + // Verify the x column exists and has correct data + let x_col = batches[0].column_by_name("x").unwrap(); + let x_arr = x_col.as_primitive::(); + assert_eq!(x_arr.value(0), 1); + + // Verify the _file column exists + let file_col = batches[0].column_by_name(RESERVED_COL_NAME_FILE); + assert!( + file_col.is_some(), + "_file column should be present in the batch" + ); + + // Verify the _file column contains a file path + let file_col = file_col.unwrap(); + assert!( + matches!( + file_col.data_type(), + arrow_schema::DataType::RunEndEncoded(_, _) + ), + "_file column should use RunEndEncoded type" + ); + + // Decode the RunArray to verify it contains the file path + let run_array = file_col + .as_any() + .downcast_ref::>() + .expect("_file column should be a RunArray"); + + let values = run_array.values(); + let string_values = values.as_string::(); + assert_eq!(string_values.len(), 1, "Should have a single file path"); + + let file_path = string_values.value(0); + assert!( + file_path.ends_with(".parquet"), + "File path should end with .parquet, got: {file_path}" + ); + } + + #[tokio::test] + async fn test_select_file_column_position() { + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select columns in specific order: x, _file, z + let table_scan = fixture + .table + .scan() + .select(["x", RESERVED_COL_NAME_FILE, "z"]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + assert_eq!(batches[0].num_columns(), 3); + + // Verify column order: x at position 0, _file at position 1, z at position 2 + let schema = batches[0].schema(); + assert_eq!(schema.field(0).name(), "x"); + assert_eq!(schema.field(1).name(), RESERVED_COL_NAME_FILE); + assert_eq!(schema.field(2).name(), "z"); + + // Verify columns by name also works + assert!(batches[0].column_by_name("x").is_some()); + assert!(batches[0].column_by_name(RESERVED_COL_NAME_FILE).is_some()); + assert!(batches[0].column_by_name("z").is_some()); + } + + #[tokio::test] + async fn test_select_file_column_only() { + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select only the _file column + let table_scan = fixture + .table + .scan() + .select([RESERVED_COL_NAME_FILE]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + // Should have exactly 1 column + assert_eq!(batches[0].num_columns(), 1); + + // Verify it's the _file column + let schema = batches[0].schema(); + assert_eq!(schema.field(0).name(), RESERVED_COL_NAME_FILE); + + // Verify the batch has the correct number of rows + // The scan reads files 1.parquet and 3.parquet (2.parquet is deleted) + // Each file has 1024 rows, so total is 2048 rows + let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); + assert_eq!(total_rows, 2048); + } + + #[tokio::test] + async fn test_file_column_with_multiple_files() { + use std::collections::HashSet; + + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select x and _file columns + let table_scan = fixture + .table + .scan() + .select(["x", RESERVED_COL_NAME_FILE]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + // Collect all unique file paths from the batches + let mut file_paths = HashSet::new(); + for batch in &batches { + let file_col = batch.column_by_name(RESERVED_COL_NAME_FILE).unwrap(); + let run_array = file_col + .as_any() + .downcast_ref::>() + .expect("_file column should be a RunArray"); + + let values = run_array.values(); + let string_values = values.as_string::(); + for i in 0..string_values.len() { + file_paths.insert(string_values.value(i).to_string()); + } + } + + // We should have multiple files (the test creates 1.parquet and 3.parquet) + assert!(!file_paths.is_empty(), "Should have at least one file path"); + + // All paths should end with .parquet + for path in &file_paths { + assert!( + path.ends_with(".parquet"), + "All file paths should end with .parquet, got: {path}" + ); + } + } + + #[tokio::test] + async fn test_file_column_at_start() { + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select _file at the start + let table_scan = fixture + .table + .scan() + .select([RESERVED_COL_NAME_FILE, "x", "y"]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + assert_eq!(batches[0].num_columns(), 3); + + // Verify _file is at position 0 + let schema = batches[0].schema(); + assert_eq!(schema.field(0).name(), RESERVED_COL_NAME_FILE); + assert_eq!(schema.field(1).name(), "x"); + assert_eq!(schema.field(2).name(), "y"); + } + + #[tokio::test] + async fn test_file_column_at_end() { + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select _file at the end + let table_scan = fixture + .table + .scan() + .select(["x", "y", RESERVED_COL_NAME_FILE]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + assert_eq!(batches[0].num_columns(), 3); + + // Verify _file is at position 2 (the end) + let schema = batches[0].schema(); + assert_eq!(schema.field(0).name(), "x"); + assert_eq!(schema.field(1).name(), "y"); + assert_eq!(schema.field(2).name(), RESERVED_COL_NAME_FILE); + } + + #[tokio::test] + async fn test_select_with_repeated_column_names() { + let mut fixture = TableTestFixture::new(); + fixture.setup_manifest_files().await; + + // Select with repeated column names - both regular columns and virtual columns + // Repeated columns should appear multiple times in the result (duplicates are allowed) + let table_scan = fixture + .table + .scan() + .select([ + "x", + RESERVED_COL_NAME_FILE, + "x", // x repeated + "y", + RESERVED_COL_NAME_FILE, // _file repeated + "y", // y repeated + ]) + .with_row_selection_enabled(true) + .build() + .unwrap(); + + let batch_stream = table_scan.to_arrow().await.unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + + // Verify we have exactly 6 columns (duplicates are allowed and preserved) + assert_eq!( + batches[0].num_columns(), + 6, + "Should have exactly 6 columns with duplicates" + ); + + let schema = batches[0].schema(); + + // Verify columns appear in the exact order requested: x, _file, x, y, _file, y + assert_eq!(schema.field(0).name(), "x", "Column 0 should be x"); + assert_eq!( + schema.field(1).name(), + RESERVED_COL_NAME_FILE, + "Column 1 should be _file" + ); + assert_eq!( + schema.field(2).name(), + "x", + "Column 2 should be x (duplicate)" + ); + assert_eq!(schema.field(3).name(), "y", "Column 3 should be y"); + assert_eq!( + schema.field(4).name(), + RESERVED_COL_NAME_FILE, + "Column 4 should be _file (duplicate)" + ); + assert_eq!( + schema.field(5).name(), + "y", + "Column 5 should be y (duplicate)" + ); + + // Verify all columns have correct data types + assert!( + matches!(schema.field(0).data_type(), arrow_schema::DataType::Int64), + "Column x should be Int64" + ); + assert!( + matches!(schema.field(2).data_type(), arrow_schema::DataType::Int64), + "Column x (duplicate) should be Int64" + ); + assert!( + matches!(schema.field(3).data_type(), arrow_schema::DataType::Int64), + "Column y should be Int64" + ); + assert!( + matches!(schema.field(5).data_type(), arrow_schema::DataType::Int64), + "Column y (duplicate) should be Int64" + ); + assert!( + matches!( + schema.field(1).data_type(), + arrow_schema::DataType::RunEndEncoded(_, _) + ), + "_file column should use RunEndEncoded type" + ); + assert!( + matches!( + schema.field(4).data_type(), + arrow_schema::DataType::RunEndEncoded(_, _) + ), + "_file column (duplicate) should use RunEndEncoded type" + ); + } + + #[tokio::test] + async fn test_scan_deadlock() { + let mut fixture = TableTestFixture::new(); + fixture.setup_deadlock_manifests().await; + + // Create table scan with concurrency limit 1 + // This sets channel size to 1. + // Data manifest has 10 entries -> will block producer. + // Delete manifest is 2nd in list -> won't be processed. + // Consumer 2 (Data) not started -> blocked. + // Consumer 1 (Delete) waiting -> blocked. + let table_scan = fixture + .table + .scan() + .with_concurrency_limit(1) + .build() + .unwrap(); + + // This should timeout/hang if deadlock exists + // We can use tokio::time::timeout + let result = tokio::time::timeout(std::time::Duration::from_secs(5), async { + table_scan + .plan_files() + .await + .unwrap() + .try_collect::>() + .await + }) + .await; + + // Assert it finished (didn't timeout) + assert!(result.is_ok(), "Scan timed out - deadlock detected"); + } } diff --git a/crates/iceberg/src/scan/task.rs b/crates/iceberg/src/scan/task.rs index e1ef241a57..5349a9bdd2 100644 --- a/crates/iceberg/src/scan/task.rs +++ b/crates/iceberg/src/scan/task.rs @@ -104,6 +104,9 @@ pub struct FileScanTask { #[serde(serialize_with = "serialize_not_implemented")] #[serde(deserialize_with = "deserialize_not_implemented")] pub name_mapping: Option>, + + /// Whether this scan task should treat column names as case-sensitive when binding predicates. + pub case_sensitive: bool, } impl FileScanTask { diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index 456b754408..0379465584 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -427,8 +427,7 @@ impl<'de> Deserialize<'de> for StructType { let type_val: String = map.next_value()?; if type_val != "struct" { return Err(serde::de::Error::custom(format!( - "expected type 'struct', got '{}'", - type_val + "expected type 'struct', got '{type_val}'" ))); } } diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index 7738af46d4..247b6dde5f 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -22,7 +22,7 @@ use serde_with::serde_as; use super::{Datum, ManifestEntry, Schema, Struct}; use crate::spec::{FormatVersion, Literal, RawLiteral, StructType, Type}; -use crate::{Error, ErrorKind}; +use crate::{Error, ErrorKind, metadata_columns}; #[derive(Serialize, Deserialize)] pub(super) struct ManifestEntryV2 { @@ -153,7 +153,7 @@ impl DataFileSerde { lower_bounds: Some(to_bytes_entry(value.lower_bounds)?), upper_bounds: Some(to_bytes_entry(value.upper_bounds)?), key_metadata: value.key_metadata.map(serde_bytes::ByteBuf::from), - split_offsets: Some(value.split_offsets), + split_offsets: value.split_offsets, equality_ids: value.equality_ids, sort_order_id: value.sort_order_id, first_row_id: value.first_row_id, @@ -222,7 +222,7 @@ impl DataFileSerde { .transpose()? .unwrap_or_default(), key_metadata: self.key_metadata.map(|v| v.to_vec()), - split_offsets: self.split_offsets.unwrap_or_default(), + split_offsets: self.split_offsets, equality_ids: self.equality_ids, sort_order_id: self.sort_order_id, partition_spec_id, @@ -245,8 +245,12 @@ struct BytesEntry { fn parse_bytes_entry(v: Vec, schema: &Schema) -> Result, Error> { let mut m = HashMap::with_capacity(v.len()); for entry in v { - // We ignore the entry if the field is not found in the schema, due to schema evolution. - if let Some(field) = schema.field_by_id(entry.key) { + // First try to find the field in the schema, or check if it's a reserved metadata field + let field = schema + .field_by_id(entry.key) + .or_else(|| metadata_columns::get_metadata_field(entry.key).ok()); + + if let Some(field) = field { let data_type = field .field_type .as_primitive_type() @@ -259,6 +263,7 @@ fn parse_bytes_entry(v: Vec, schema: &Schema) -> Result, + pub(crate) split_offsets: Option>, /// field id: 135 /// element field id: 136 /// @@ -247,8 +248,9 @@ impl DataFile { } /// Get the split offsets of the data file. /// For example, all row group offsets in a Parquet file. - pub fn split_offsets(&self) -> &[i64] { - &self.split_offsets + /// Returns `None` if no split offsets are present. + pub fn split_offsets(&self) -> Option<&[i64]> { + self.split_offsets.as_deref() } /// Get the equality ids of the data file. /// Field ids used to determine row equality in equality delete files. diff --git a/crates/iceberg/src/spec/manifest/mod.rs b/crates/iceberg/src/spec/manifest/mod.rs index 51219bfdb7..b126396e3c 100644 --- a/crates/iceberg/src/spec/manifest/mod.rs +++ b/crates/iceberg/src/spec/manifest/mod.rs @@ -257,7 +257,7 @@ mod tests { snapshot_id: None, sequence_number: None, file_sequence_number: None, - data_file: DataFile {content:DataContentType::Data,file_path:"s3a://icebergdata/demo/s1/t1/data/00000-0-ba56fbfa-f2ff-40c9-bb27-565ad6dc2be8-00000.parquet".to_string(),file_format:DataFileFormat::Parquet,partition:Struct::empty(),record_count:1,file_size_in_bytes:5442,column_sizes:HashMap::from([(0,73),(6,34),(2,73),(7,61),(3,61),(5,62),(9,79),(10,73),(1,61),(4,73),(8,73)]),value_counts:HashMap::from([(4,1),(5,1),(2,1),(0,1),(3,1),(6,1),(8,1),(1,1),(10,1),(7,1),(9,1)]),null_value_counts:HashMap::from([(1,0),(6,0),(2,0),(8,0),(0,0),(3,0),(5,0),(9,0),(7,0),(4,0),(10,0)]),nan_value_counts:HashMap::new(),lower_bounds:HashMap::new(),upper_bounds:HashMap::new(),key_metadata:None,split_offsets:vec![4],equality_ids:Some(Vec::new()),sort_order_id:None, partition_spec_id: 0,first_row_id: None,referenced_data_file: None,content_offset: None,content_size_in_bytes: None } + data_file: DataFile {content:DataContentType::Data,file_path:"s3a://icebergdata/demo/s1/t1/data/00000-0-ba56fbfa-f2ff-40c9-bb27-565ad6dc2be8-00000.parquet".to_string(),file_format:DataFileFormat::Parquet,partition:Struct::empty(),record_count:1,file_size_in_bytes:5442,column_sizes:HashMap::from([(0,73),(6,34),(2,73),(7,61),(3,61),(5,62),(9,79),(10,73),(1,61),(4,73),(8,73)]),value_counts:HashMap::from([(4,1),(5,1),(2,1),(0,1),(3,1),(6,1),(8,1),(1,1),(10,1),(7,1),(9,1)]),null_value_counts:HashMap::from([(1,0),(6,0),(2,0),(8,0),(0,0),(3,0),(5,0),(9,0),(7,0),(4,0),(10,0)]),nan_value_counts:HashMap::new(),lower_bounds:HashMap::new(),upper_bounds:HashMap::new(),key_metadata:None,split_offsets:Some(vec![4]),equality_ids:Some(Vec::new()),sort_order_id:None, partition_spec_id: 0,first_row_id: None,referenced_data_file: None,content_offset: None,content_size_in_bytes: None } } ]; @@ -435,7 +435,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: Some(Vec::new()), sort_order_id: None, partition_spec_id: 0, @@ -532,7 +532,7 @@ mod tests { lower_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]), upper_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: Some(0), partition_spec_id: 0, @@ -640,7 +640,7 @@ mod tests { (3, Datum::string("x")) ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: Some(0), partition_spec_id: 0, @@ -749,7 +749,7 @@ mod tests { (3, Datum::string("x")) ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -840,7 +840,7 @@ mod tests { (2, Datum::int(2)), ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -922,7 +922,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -957,7 +957,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -992,7 +992,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -1027,7 +1027,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -1182,7 +1182,7 @@ mod tests { "lower_bounds": [], "upper_bounds": [], "key_metadata": null, - "split_offsets": [], + "split_offsets": null, "equality_ids": null, "sort_order_id": null, "first_row_id": null, @@ -1213,7 +1213,7 @@ mod tests { "lower_bounds": [], "upper_bounds": [], "key_metadata": null, - "split_offsets": [], + "split_offsets": null, "equality_ids": null, "sort_order_id": null, "first_row_id": null, diff --git a/crates/iceberg/src/spec/manifest/writer.rs b/crates/iceberg/src/spec/manifest/writer.rs index ebb0590bcf..0669651603 100644 --- a/crates/iceberg/src/spec/manifest/writer.rs +++ b/crates/iceberg/src/spec/manifest/writer.rs @@ -388,10 +388,10 @@ impl ManifestWriter { self.existing_rows += entry.data_file.record_count; } } - if entry.is_alive() { - if let Some(seq_num) = entry.sequence_number { - self.min_seq_num = Some(self.min_seq_num.map_or(seq_num, |v| min(v, seq_num))); - } + if entry.is_alive() + && let Some(seq_num) = entry.sequence_number + { + self.min_seq_num = Some(self.min_seq_num.map_or(seq_num, |v| min(v, seq_num))); } self.manifest_entries.push(entry); Ok(()) @@ -437,9 +437,12 @@ impl ManifestWriter { "format-version".to_string(), (self.metadata.format_version as u8).to_string(), )?; - if self.metadata.format_version == FormatVersion::V2 { - avro_writer - .add_user_metadata("content".to_string(), self.metadata.content.to_string())?; + match self.metadata.format_version { + FormatVersion::V1 => {} + FormatVersion::V2 | FormatVersion::V3 => { + avro_writer + .add_user_metadata("content".to_string(), self.metadata.content.to_string())?; + } } let partition_summary = self.construct_partition_summaries(&partition_type)?; @@ -608,7 +611,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: Some(Vec::new()), - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -637,7 +640,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: Some(Vec::new()), - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -666,7 +669,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: Some(Vec::new()), - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -708,4 +711,93 @@ mod tests { entries[0].file_sequence_number = None; assert_eq!(actual_manifest, Manifest::new(metadata, entries)); } + + #[tokio::test] + async fn test_v3_delete_manifest_delete_file_roundtrip() { + let schema = Arc::new( + Schema::builder() + .with_fields(vec![ + Arc::new(NestedField::optional( + 1, + "id", + Type::Primitive(PrimitiveType::Long), + )), + Arc::new(NestedField::optional( + 2, + "data", + Type::Primitive(PrimitiveType::String), + )), + ]) + .build() + .unwrap(), + ); + + let partition_spec = PartitionSpec::builder(schema.clone()) + .with_spec_id(0) + .build() + .unwrap(); + + // Create a position delete file entry + let delete_entry = ManifestEntry { + status: ManifestStatus::Added, + snapshot_id: None, + sequence_number: None, + file_sequence_number: None, + data_file: DataFile { + content: DataContentType::PositionDeletes, + file_path: "s3://bucket/table/data/delete-00000.parquet".to_string(), + file_format: DataFileFormat::Parquet, + partition: Struct::empty(), + record_count: 10, + file_size_in_bytes: 1024, + column_sizes: HashMap::new(), + value_counts: HashMap::new(), + null_value_counts: HashMap::new(), + nan_value_counts: HashMap::new(), + lower_bounds: HashMap::new(), + upper_bounds: HashMap::new(), + key_metadata: None, + split_offsets: None, + equality_ids: None, + sort_order_id: None, + partition_spec_id: 0, + first_row_id: None, + referenced_data_file: None, + content_offset: None, + content_size_in_bytes: None, + }, + }; + + // Write a V3 delete manifest + let tmp_dir = TempDir::new().unwrap(); + let path = tmp_dir.path().join("v3_delete_manifest.avro"); + let io = FileIOBuilder::new_fs_io().build().unwrap(); + let output_file = io.new_output(path.to_str().unwrap()).unwrap(); + + let mut writer = ManifestWriterBuilder::new( + output_file, + Some(1), + None, + schema.clone(), + partition_spec.clone(), + ) + .build_v3_deletes(); + + writer.add_entry(delete_entry).unwrap(); + let manifest_file = writer.write_manifest_file().await.unwrap(); + + // The returned ManifestFile correctly reports Deletes content + assert_eq!(manifest_file.content, ManifestContentType::Deletes); + + // Read back the manifest file + let actual_manifest = + Manifest::parse_avro(fs::read(&path).expect("read_file must succeed").as_slice()) + .unwrap(); + + // Verify the content type is correctly preserved as Deletes + assert_eq!( + actual_manifest.metadata().content, + ManifestContentType::Deletes, + ); + } } diff --git a/crates/iceberg/src/spec/mod.rs b/crates/iceberg/src/spec/mod.rs index 44b35e5a6b..a2b540f08b 100644 --- a/crates/iceberg/src/spec/mod.rs +++ b/crates/iceberg/src/spec/mod.rs @@ -49,6 +49,7 @@ pub use snapshot_summary::*; pub use sort::*; pub use statistic_file::*; pub use table_metadata::*; +pub(crate) use table_metadata_builder::FIRST_FIELD_ID; pub use table_properties::*; pub use transform::*; pub use values::*; diff --git a/crates/iceberg/src/spec/schema/mod.rs b/crates/iceberg/src/spec/schema/mod.rs index 7080b6e700..13ad41818b 100644 --- a/crates/iceberg/src/spec/schema/mod.rs +++ b/crates/iceberg/src/spec/schema/mod.rs @@ -102,8 +102,8 @@ impl SchemaBuilder { /// Reassignment starts from the field-id specified in `start_from` (inclusive). /// /// All specified aliases and identifier fields will be updated to the new field-ids. - pub(crate) fn with_reassigned_field_ids(mut self, start_from: u32) -> Self { - self.reassign_field_ids_from = Some(start_from.try_into().unwrap_or(i32::MAX)); + pub(crate) fn with_reassigned_field_ids(mut self, start_from: i32) -> Self { + self.reassign_field_ids_from = Some(start_from); self } diff --git a/crates/iceberg/src/spec/schema/prune_columns.rs b/crates/iceberg/src/spec/schema/prune_columns.rs index 5a2f0b50fc..14f1bfd25f 100644 --- a/crates/iceberg/src/spec/schema/prune_columns.rs +++ b/crates/iceberg/src/spec/schema/prune_columns.rs @@ -110,19 +110,19 @@ impl SchemaVisitor for PruneColumn { if self.select_full_types { Ok(Some(*field.field_type.clone())) } else if field.field_type.is_struct() { - return Ok(Some(Type::Struct(PruneColumn::project_selected_struct( + Ok(Some(Type::Struct(PruneColumn::project_selected_struct( value, - )?))); + )?))) } else if !field.field_type.is_nested() { - return Ok(Some(*field.field_type.clone())); + Ok(Some(*field.field_type.clone())) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, "Can't project list or map field directly when not selecting full type." .to_string(), ) .with_context("field_id", field.id.to_string()) - .with_context("field_type", field.field_type.to_string())); + .with_context("field_type", field.field_type.to_string())) } } else { Ok(value) @@ -174,20 +174,20 @@ impl SchemaVisitor for PruneColumn { Ok(Some(Type::List(list.clone()))) } else if list.element_field.field_type.is_struct() { let projected_struct = PruneColumn::project_selected_struct(value).unwrap(); - return Ok(Some(Type::List(PruneColumn::project_list( + Ok(Some(Type::List(PruneColumn::project_list( list, Type::Struct(projected_struct), - )?))); + )?))) } else if list.element_field.field_type.is_primitive() { - return Ok(Some(Type::List(list.clone()))); + Ok(Some(Type::List(list.clone()))) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, format!( "Cannot explicitly project List or Map types, List element {} of type {} was selected", list.element_field.id, list.element_field.field_type ), - )); + )) } } else if let Some(result) = value { Ok(Some(Type::List(PruneColumn::project_list(list, result)?))) @@ -208,26 +208,26 @@ impl SchemaVisitor for PruneColumn { } else if map.value_field.field_type.is_struct() { let projected_struct = PruneColumn::project_selected_struct(Some(value.unwrap())).unwrap(); - return Ok(Some(Type::Map(PruneColumn::project_map( + Ok(Some(Type::Map(PruneColumn::project_map( map, Type::Struct(projected_struct), - )?))); + )?))) } else if map.value_field.field_type.is_primitive() { - return Ok(Some(Type::Map(map.clone()))); + Ok(Some(Type::Map(map.clone()))) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, format!( "Cannot explicitly project List or Map types, Map value {} of type {} was selected", map.value_field.id, map.value_field.field_type ), - )); + )) } } else if let Some(value_result) = value { - return Ok(Some(Type::Map(PruneColumn::project_map( + Ok(Some(Type::Map(PruneColumn::project_map( map, value_result, - )?))); + )?))) } else if self.selected.contains(&map.key_field.id) { Ok(Some(Type::Map(map.clone()))) } else { diff --git a/crates/iceberg/src/spec/snapshot.rs b/crates/iceberg/src/spec/snapshot.rs index 5371cf68f2..270279988b 100644 --- a/crates/iceberg/src/spec/snapshot.rs +++ b/crates/iceberg/src/spec/snapshot.rs @@ -266,9 +266,9 @@ pub(super) mod _serde { use serde::{Deserialize, Serialize}; use super::{Operation, Snapshot, Summary}; - use crate::Error; use crate::spec::SchemaId; use crate::spec::snapshot::SnapshotRowRange; + use crate::{Error, ErrorKind}; #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] @@ -408,9 +408,19 @@ pub(super) mod _serde { timestamp_ms: v1.timestamp_ms, manifest_list: match (v1.manifest_list, v1.manifests) { (Some(file), None) => file, - (Some(_), Some(_)) => "Invalid v1 snapshot, when manifest list provided, manifest files should be omitted".to_string(), - (None, _) => "Unsupported v1 snapshot, only manifest list is supported".to_string() - }, + (Some(_), Some(_)) => { + return Err(Error::new( + ErrorKind::DataInvalid, + "Invalid v1 snapshot, when manifest list provided, manifest files should be omitted", + )); + } + (None, _) => { + return Err(Error::new( + ErrorKind::DataInvalid, + "Unsupported v1 snapshot, only manifest list is supported", + )); + } + }, summary: v1.summary.unwrap_or(Summary { operation: Operation::default(), additional_properties: HashMap::new(), @@ -517,6 +527,7 @@ mod tests { use chrono::{TimeZone, Utc}; + use crate::spec::TableMetadata; use crate::spec::snapshot::_serde::SnapshotV1; use crate::spec::snapshot::{Operation, Snapshot, Summary}; @@ -604,6 +615,84 @@ mod tests { ); } + #[test] + fn test_v1_snapshot_with_manifest_list_and_manifests() { + { + let metadata = r#" + { + "format-version": 1, + "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", + "location": "s3://bucket/test/location", + "last-updated-ms": 1700000000000, + "last-column-id": 1, + "schema": { + "type": "struct", + "fields": [ + {"id": 1, "name": "x", "required": true, "type": "long"} + ] + }, + "partition-spec": [], + "properties": {}, + "current-snapshot-id": 111111111, + "snapshots": [ + { + "snapshot-id": 111111111, + "timestamp-ms": 1600000000000, + "summary": {"operation": "append"}, + "manifest-list": "s3://bucket/metadata/snap-123.avro", + "manifests": ["s3://bucket/metadata/manifest-1.avro"] + } + ] + } + "#; + + let result_both_manifest_list_and_manifest_set = + serde_json::from_str::(metadata); + assert!(result_both_manifest_list_and_manifest_set.is_err()); + assert_eq!( + result_both_manifest_list_and_manifest_set + .unwrap_err() + .to_string(), + "DataInvalid => Invalid v1 snapshot, when manifest list provided, manifest files should be omitted" + ) + } + + { + let metadata = r#" + { + "format-version": 1, + "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", + "location": "s3://bucket/test/location", + "last-updated-ms": 1700000000000, + "last-column-id": 1, + "schema": { + "type": "struct", + "fields": [ + {"id": 1, "name": "x", "required": true, "type": "long"} + ] + }, + "partition-spec": [], + "properties": {}, + "current-snapshot-id": 111111111, + "snapshots": [ + { + "snapshot-id": 111111111, + "timestamp-ms": 1600000000000, + "summary": {"operation": "append"}, + "manifests": ["s3://bucket/metadata/manifest-1.avro"] + } + ] + } + "#; + let result_missing_manifest_list = serde_json::from_str::(metadata); + assert!(result_missing_manifest_list.is_err()); + assert_eq!( + result_missing_manifest_list.unwrap_err().to_string(), + "DataInvalid => Unsupported v1 snapshot, only manifest list is supported" + ) + } + } + #[test] fn test_snapshot_v1_to_v2_with_missing_summary() { use crate::spec::snapshot::_serde::SnapshotV1; diff --git a/crates/iceberg/src/spec/snapshot_summary.rs b/crates/iceberg/src/spec/snapshot_summary.rs index 4cd3715e06..c67ee37d3e 100644 --- a/crates/iceberg/src/spec/snapshot_summary.rs +++ b/crates/iceberg/src/spec/snapshot_summary.rs @@ -767,7 +767,7 @@ mod tests { (3, Datum::string("x")), ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: Some(0), partition_spec_id: 0, @@ -799,7 +799,7 @@ mod tests { (3, Datum::string("x")), ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: Some(0), partition_spec_id: 0, @@ -910,7 +910,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -938,7 +938,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -993,7 +993,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index 06b32cc847..cfa25deccb 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -390,18 +390,18 @@ impl TableMetadata { } fn construct_refs(&mut self) { - if let Some(current_snapshot_id) = self.current_snapshot_id { - if !self.refs.contains_key(MAIN_BRANCH) { - self.refs - .insert(MAIN_BRANCH.to_string(), SnapshotReference { - snapshot_id: current_snapshot_id, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: None, - max_snapshot_age_ms: None, - max_ref_age_ms: None, - }, - }); - } + if let Some(current_snapshot_id) = self.current_snapshot_id + && !self.refs.contains_key(MAIN_BRANCH) + { + self.refs + .insert(MAIN_BRANCH.to_string(), SnapshotReference { + snapshot_id: current_snapshot_id, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: None, + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, + }); } } @@ -506,6 +506,19 @@ impl TableMetadata { /// If the default sort order is unsorted but the sort order is not present, add it fn try_normalize_sort_order(&mut self) -> Result<()> { + // Validate that sort order ID 0 (reserved for unsorted) has no fields + if let Some(sort_order) = self.sort_order_by_id(SortOrder::UNSORTED_ORDER_ID) + && !sort_order.fields.is_empty() + { + return Err(Error::new( + ErrorKind::Unexpected, + format!( + "Sort order ID {} is reserved for unsorted order", + SortOrder::UNSORTED_ORDER_ID + ), + )); + } + if self.sort_order_by_id(self.default_sort_order_id).is_some() { return Ok(()); } @@ -572,17 +585,17 @@ impl TableMetadata { let main_ref = self.refs.get(MAIN_BRANCH); if self.current_snapshot_id.is_some() { - if let Some(main_ref) = main_ref { - if main_ref.snapshot_id != self.current_snapshot_id.unwrap_or_default() { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Current snapshot id does not match main branch ({:?} != {:?})", - self.current_snapshot_id.unwrap_or_default(), - main_ref.snapshot_id - ), - )); - } + if let Some(main_ref) = main_ref + && main_ref.snapshot_id != self.current_snapshot_id.unwrap_or_default() + { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Current snapshot id does not match main branch ({:?} != {:?})", + self.current_snapshot_id.unwrap_or_default(), + main_ref.snapshot_id + ), + )); } } else if main_ref.is_some() { return Err(Error::new( @@ -606,22 +619,21 @@ impl TableMetadata { )); } - if self.format_version >= FormatVersion::V2 { - if let Some(snapshot) = self + if self.format_version >= FormatVersion::V2 + && let Some(snapshot) = self .snapshots .values() .find(|snapshot| snapshot.sequence_number() > self.last_sequence_number) - { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Invalid snapshot with id {} and sequence number {} greater than last sequence number {}", - snapshot.snapshot_id(), - snapshot.sequence_number(), - self.last_sequence_number - ), - )); - } + { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid snapshot with id {} and sequence number {} greater than last sequence number {}", + snapshot.snapshot_id(), + snapshot.sequence_number(), + self.last_sequence_number + ), + )); } Ok(()) @@ -3796,4 +3808,57 @@ mod tests { assert!(final_metadata.name_exists_in_any_schema("new_field")); // only in current schema assert!(!final_metadata.name_exists_in_any_schema("never_existed")); } + + #[test] + fn test_invalid_sort_order_id_zero_with_fields() { + let metadata = r#" + { + "format-version": 2, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", + "location": "s3://bucket/test/location", + "last-sequence-number": 111, + "last-updated-ms": 1600000000000, + "last-column-id": 3, + "current-schema-id": 1, + "schemas": [ + { + "type": "struct", + "schema-id": 1, + "fields": [ + {"id": 1, "name": "x", "required": true, "type": "long"}, + {"id": 2, "name": "y", "required": true, "type": "long"} + ] + } + ], + "default-spec-id": 0, + "partition-specs": [{"spec-id": 0, "fields": []}], + "last-partition-id": 999, + "default-sort-order-id": 0, + "sort-orders": [ + { + "order-id": 0, + "fields": [ + { + "transform": "identity", + "source-id": 1, + "direction": "asc", + "null-order": "nulls-first" + } + ] + } + ], + "properties": {}, + "current-snapshot-id": -1, + "snapshots": [] + } + "#; + + let result: Result = serde_json::from_str(metadata); + + // Should fail because sort order ID 0 is reserved for unsorted order and cannot have fields + assert!( + result.is_err(), + "Parsing should fail for sort order ID 0 with fields" + ); + } } diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs index 6b8ce1e6a5..3db327d48a 100644 --- a/crates/iceberg/src/spec/table_metadata_builder.rs +++ b/crates/iceberg/src/spec/table_metadata_builder.rs @@ -31,7 +31,7 @@ use crate::error::{Error, ErrorKind, Result}; use crate::spec::{EncryptedKey, INITIAL_ROW_ID, MIN_FORMAT_VERSION_ROW_LINEAGE}; use crate::{TableCreation, TableUpdate}; -const FIRST_FIELD_ID: u32 = 1; +pub(crate) const FIRST_FIELD_ID: i32 = 1; /// Manipulating table metadata. /// @@ -572,7 +572,6 @@ impl TableMetadataBuilder { pub fn remove_ref(mut self, ref_name: &str) -> Self { if ref_name == MAIN_BRANCH { self.metadata.current_snapshot_id = None; - self.metadata.snapshot_log.clear(); } if self.metadata.refs.remove(ref_name).is_some() || ref_name == MAIN_BRANCH { @@ -2237,6 +2236,73 @@ mod tests { assert_eq!(result.metadata.current_snapshot().unwrap().snapshot_id(), 2); } + #[test] + fn test_remove_main_ref_keeps_snapshot_log() { + let builder = builder_without_changes(FormatVersion::V2); + + let snapshot = Snapshot::builder() + .with_snapshot_id(1) + .with_timestamp_ms(builder.metadata.last_updated_ms + 1) + .with_sequence_number(0) + .with_schema_id(0) + .with_manifest_list("/snap-1.avro") + .with_summary(Summary { + operation: Operation::Append, + additional_properties: HashMap::from_iter(vec![ + ( + "spark.app.id".to_string(), + "local-1662532784305".to_string(), + ), + ("added-data-files".to_string(), "4".to_string()), + ("added-records".to_string(), "4".to_string()), + ("added-files-size".to_string(), "6001".to_string()), + ]), + }) + .build(); + + let result = builder + .add_snapshot(snapshot.clone()) + .unwrap() + .set_ref(MAIN_BRANCH, SnapshotReference { + snapshot_id: 1, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: Some(10), + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, + }) + .unwrap() + .build() + .unwrap(); + + // Verify snapshot log was created + assert_eq!(result.metadata.snapshot_log.len(), 1); + assert_eq!(result.metadata.snapshot_log[0].snapshot_id, 1); + assert_eq!(result.metadata.current_snapshot_id, Some(1)); + + // Remove the main ref + let result_after_remove = result + .metadata + .into_builder(Some( + "s3://bucket/test/location/metadata/metadata2.json".to_string(), + )) + .remove_ref(MAIN_BRANCH) + .build() + .unwrap(); + + // Verify snapshot log is kept even after removing main ref + assert_eq!(result_after_remove.metadata.snapshot_log.len(), 1); + assert_eq!(result_after_remove.metadata.snapshot_log[0].snapshot_id, 1); + assert_eq!(result_after_remove.metadata.current_snapshot_id, None); + assert_eq!(result_after_remove.changes.len(), 1); + assert_eq!( + result_after_remove.changes[0], + TableUpdate::RemoveSnapshotRef { + ref_name: MAIN_BRANCH.to_string() + } + ); + } + #[test] fn test_set_branch_snapshot_creates_branch_if_not_exists() { let builder = builder_without_changes(FormatVersion::V2); diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs index 4975456010..413604f51c 100644 --- a/crates/iceberg/src/spec/table_properties.rs +++ b/crates/iceberg/src/spec/table_properties.rs @@ -49,6 +49,8 @@ pub struct TableProperties { pub write_format_default: String, /// The target file size for files. pub write_target_file_size_bytes: usize, + /// Whether to use `FanoutWriter` for partitioned tables. + pub write_datafusion_fanout_enabled: bool, } impl TableProperties { @@ -137,6 +139,11 @@ impl TableProperties { pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES: &str = "write.target-file-size-bytes"; /// Default target file size pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT: usize = 512 * 1024 * 1024; // 512 MB + /// Whether to use `FanoutWriter` for partitioned tables (handles unsorted data). + /// If false, uses `ClusteredWriter` (requires sorted data, more memory efficient). + pub const PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED: &str = "write.datafusion.fanout.enabled"; + /// Default value for fanout writer enabled + pub const PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT: bool = true; } impl TryFrom<&HashMap> for TableProperties { @@ -175,6 +182,11 @@ impl TryFrom<&HashMap> for TableProperties { TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES, TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT, )?, + write_datafusion_fanout_enabled: parse_property( + props, + TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED, + TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT, + )?, }) } } diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 6068716eff..354dc1889c 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -711,10 +711,10 @@ impl Transform { PredicateOperator::GreaterThan => Some(PredicateOperator::GreaterThanOrEq), PredicateOperator::StartsWith => match datum.literal() { PrimitiveLiteral::String(s) => { - if let Some(w) = width { - if s.len() == w as usize { - return Some(PredicateOperator::Eq); - }; + if let Some(w) = width + && s.len() == w as usize + { + return Some(PredicateOperator::Eq); }; Some(*op) } @@ -757,47 +757,45 @@ impl Transform { _ => false, }; - if should_adjust { - if let &PrimitiveLiteral::Int(v) = transformed.literal() { - match op { - PredicateOperator::LessThan - | PredicateOperator::LessThanOrEq - | PredicateOperator::In => { - if v < 0 { + if should_adjust && let &PrimitiveLiteral::Int(v) = transformed.literal() { + match op { + PredicateOperator::LessThan + | PredicateOperator::LessThanOrEq + | PredicateOperator::In => { + if v < 0 { + // # TODO + // An ugly hack to fix. Refine the increment and decrement logic later. + match self { + Transform::Day => { + return Some(AdjustedProjection::Single(Datum::date(v + 1))); + } + _ => { + return Some(AdjustedProjection::Single(Datum::int(v + 1))); + } + } + }; + } + PredicateOperator::Eq => { + if v < 0 { + let new_set = FnvHashSet::from_iter(vec![ + transformed.to_owned(), // # TODO // An ugly hack to fix. Refine the increment and decrement logic later. - match self { - Transform::Day => { - return Some(AdjustedProjection::Single(Datum::date(v + 1))); + { + match self { + Transform::Day => Datum::date(v + 1), + _ => Datum::int(v + 1), } - _ => { - return Some(AdjustedProjection::Single(Datum::int(v + 1))); - } - } - }; - } - PredicateOperator::Eq => { - if v < 0 { - let new_set = FnvHashSet::from_iter(vec![ - transformed.to_owned(), - // # TODO - // An ugly hack to fix. Refine the increment and decrement logic later. - { - match self { - Transform::Day => Datum::date(v + 1), - _ => Datum::int(v + 1), - } - }, - ]); - return Some(AdjustedProjection::Set(new_set)); - } - } - _ => { - return None; + }, + ]); + return Some(AdjustedProjection::Set(new_set)); } } - }; - } + _ => { + return None; + } + } + }; None } diff --git a/crates/iceberg/src/spec/values/datum.rs b/crates/iceberg/src/spec/values/datum.rs index cb60fb94e9..88209ae95c 100644 --- a/crates/iceberg/src/spec/values/datum.rs +++ b/crates/iceberg/src/spec/values/datum.rs @@ -166,36 +166,16 @@ impl<'de> Deserialize<'de> for Datum { // Compare following iceberg float ordering rules: // -NaN < -Infinity < -value < -0 < 0 < value < Infinity < NaN -fn iceberg_float_cmp(a: T, b: T) -> Option { - if a.is_nan() && b.is_nan() { - return match (a.is_sign_negative(), b.is_sign_negative()) { - (true, false) => Some(Ordering::Less), - (false, true) => Some(Ordering::Greater), - _ => Some(Ordering::Equal), - }; - } - - if a.is_nan() { - return Some(if a.is_sign_negative() { - Ordering::Less - } else { - Ordering::Greater - }); - } - - if b.is_nan() { - return Some(if b.is_sign_negative() { - Ordering::Greater - } else { - Ordering::Less - }); - } +fn iceberg_float_cmp_f32(a: OrderedFloat, b: OrderedFloat) -> Option { + Some(a.total_cmp(&b)) +} - a.partial_cmp(&b) +fn iceberg_float_cmp_f64(a: OrderedFloat, b: OrderedFloat) -> Option { + Some(a.total_cmp(&b)) } impl PartialOrd for Datum { - fn partial_cmp(&self, other: &Self) -> Option { + fn partial_cmp(&self, other: &Self) -> Option { match (&self.literal, &other.literal, &self.r#type, &other.r#type) { // generate the arm with same type and same literal ( @@ -221,13 +201,13 @@ impl PartialOrd for Datum { PrimitiveLiteral::Float(other_val), PrimitiveType::Float, PrimitiveType::Float, - ) => iceberg_float_cmp(*val, *other_val), + ) => iceberg_float_cmp_f32(*val, *other_val), ( PrimitiveLiteral::Double(val), PrimitiveLiteral::Double(other_val), PrimitiveType::Double, PrimitiveType::Double, - ) => iceberg_float_cmp(*val, *other_val), + ) => iceberg_float_cmp_f64(*val, *other_val), ( PrimitiveLiteral::Int(val), PrimitiveLiteral::Int(other_val), diff --git a/crates/iceberg/src/spec/values/tests.rs b/crates/iceberg/src/spec/values/tests.rs index 0e99d44dfe..bb10701d87 100644 --- a/crates/iceberg/src/spec/values/tests.rs +++ b/crates/iceberg/src/spec/values/tests.rs @@ -447,7 +447,7 @@ fn check_raw_literal_bytes_error_via_avro(input_bytes: Vec, expected_type: & let avro_value = Value::Bytes(input_bytes); let raw_literal: _serde::RawLiteral = apache_avro::from_value(&avro_value).unwrap(); let result = raw_literal.try_into(expected_type); - assert!(result.is_err(), "Expected error but got: {:?}", result); + assert!(result.is_err(), "Expected error but got: {result:?}"); } #[test] @@ -1293,6 +1293,31 @@ fn test_iceberg_float_order() { assert_eq!(double_sorted, double_expected); } +#[test] +fn test_negative_zero_less_than_positive_zero() { + { + let neg_zero = Datum::float(-0.0); + let pos_zero = Datum::float(0.0); + + assert_eq!( + neg_zero.partial_cmp(&pos_zero), + Some(std::cmp::Ordering::Less), + "IEEE 754 totalOrder requires -0.0 < +0.0 on F32" + ); + } + + { + let neg_zero = Datum::double(-0.0); + let pos_zero = Datum::double(0.0); + + assert_eq!( + neg_zero.partial_cmp(&pos_zero), + Some(std::cmp::Ordering::Less), + "IEEE 754 totalOrder requires -0.0 < +0.0 on F64" + ); + } +} + /// Test Date deserialization from JSON as number (days since epoch). /// /// This reproduces the scenario from Iceberg Java's TestAddFilesProcedure where: diff --git a/crates/iceberg/src/spec/view_metadata_builder.rs b/crates/iceberg/src/spec/view_metadata_builder.rs index 9f542a7c61..38041ca625 100644 --- a/crates/iceberg/src/spec/view_metadata_builder.rs +++ b/crates/iceberg/src/spec/view_metadata_builder.rs @@ -478,10 +478,10 @@ impl ViewMetadataBuilder { // as it might panic if the metadata is invalid. self.metadata.validate()?; - if let Some(previous) = self.previous_view_version.take() { - if !allow_replace_drop_dialects(&self.metadata.properties) { - require_no_dialect_dropped(&previous, self.metadata.current_version())?; - } + if let Some(previous) = self.previous_view_version.take() + && !allow_replace_drop_dialects(&self.metadata.properties) + { + require_no_dialect_dropped(&previous, self.metadata.current_version())?; } let _expired_versions = self.expire_versions(); diff --git a/crates/iceberg/src/transaction/mod.rs b/crates/iceberg/src/transaction/mod.rs index 4116264a14..8ddaa26698 100644 --- a/crates/iceberg/src/transaction/mod.rs +++ b/crates/iceberg/src/transaction/mod.rs @@ -518,7 +518,7 @@ mod test_row_lineage { fn file_with_rows(record_count: u64) -> DataFile { DataFileBuilder::default() .content(DataContentType::Data) - .file_path(format!("test/{}.parquet", record_count)) + .file_path(format!("test/{record_count}.parquet")) .file_format(DataFileFormat::Parquet) .file_size_in_bytes(100) .record_count(record_count) diff --git a/crates/iceberg/src/transaction/snapshot.rs b/crates/iceberg/src/transaction/snapshot.rs index 6b3d0e4ff4..c8bf26a174 100644 --- a/crates/iceberg/src/transaction/snapshot.rs +++ b/crates/iceberg/src/transaction/snapshot.rs @@ -34,13 +34,53 @@ use crate::{Error, ErrorKind, TableRequirement, TableUpdate}; const META_ROOT_PATH: &str = "metadata"; +/// A trait that defines how different table operations produce new snapshots. +/// +/// `SnapshotProduceOperation` is used by [`SnapshotProducer`] to customize snapshot creation +/// based on the type of operation being performed (e.g., `Append`, `Overwrite`, `Delete`, etc.). +/// Each operation type implements this trait to specify: +/// - Which operation type to record in the snapshot summary +/// - Which existing manifest files should be included in the new snapshot +/// - Which manifest entries should be marked as deleted +/// +/// # When it accomplishes +/// +/// This trait is used during the snapshot creation process in [`SnapshotProducer::commit()`]: +/// +/// 1. **Operation Type Recording**: The `operation()` method determines which operation type +/// (e.g., `Operation::Append`, `Operation::Overwrite`) is recorded in the snapshot summary. +/// This metadata helps track what kind of change was made to the table. +/// +/// 2. **Manifest File Selection**: The `existing_manifest()` method determines which existing +/// manifest files from the current snapshot should be carried forward to the new snapshot. +/// For example: +/// - An `Append` operation typically includes all existing manifests plus new ones +/// - An `Overwrite` operation might exclude manifests for partitions being overwritten +/// +/// 3. **Delete Entry Processing**: The `delete_entries()` method is intended for future delete +/// operations to specify which manifest entries should be marked as deleted. pub(crate) trait SnapshotProduceOperation: Send + Sync { + /// Returns the operation type that will be recorded in the snapshot summary. + /// + /// This determines what kind of operation is being performed (e.g., `Append`, `Overwrite`), + /// which is stored in the snapshot metadata for tracking and auditing purposes. fn operation(&self) -> Operation; + + /// Returns manifest entries that should be marked as deleted in the new snapshot. #[allow(unused)] fn delete_entries( &self, snapshot_produce: &SnapshotProducer, ) -> impl Future>> + Send; + + /// Returns existing manifest files that should be included in the new snapshot. + /// + /// This method determines which manifest files from the current snapshot should be + /// carried forward to the new snapshot. The selection depends on the operation type: + /// + /// - **Append operations**: Typically include all existing manifests + /// - **Overwrite operations**: May exclude manifests for partitions being overwritten + /// - **Delete operations**: May exclude manifests for partitions being deleted fn existing_manifest( &self, snapshot_produce: &SnapshotProducer<'_>, @@ -236,13 +276,13 @@ impl<'a> SnapshotProducer<'a> { "Partition field should only be primitive type.", ) })?; - if let Some(value) = value { - if !field.compatible(&value.as_primitive_literal().unwrap()) { - return Err(Error::new( - ErrorKind::DataInvalid, - "Partition value is not compatible partition type", - )); - } + if let Some(value) = value + && !field.compatible(&value.as_primitive_literal().unwrap()) + { + return Err(Error::new( + ErrorKind::DataInvalid, + "Partition value is not compatible partition type", + )); } } Ok(()) diff --git a/crates/iceberg/src/transform/bucket.rs b/crates/iceberg/src/transform/bucket.rs index 8807fb1f79..e6786a70ca 100644 --- a/crates/iceberg/src/transform/bucket.rs +++ b/crates/iceberg/src/transform/bucket.rs @@ -78,12 +78,26 @@ impl Bucket { /// ref: https://iceberg.apache.org/spec/#appendix-b-32-bit-hash-requirements #[inline] fn hash_decimal(v: i128) -> i32 { + if v == 0 { + return Self::hash_bytes(&[0]); + } + let bytes = v.to_be_bytes(); - if let Some(start) = bytes.iter().position(|&x| x != 0) { - Self::hash_bytes(&bytes[start..]) + let start = if v > 0 { + // Positive: skip 0x00 unless next byte would appear negative + bytes + .windows(2) + .position(|w| w[0] != 0x00 || w[1] & 0x80 != 0) + .unwrap_or(15) } else { - Self::hash_bytes(&[0]) - } + // Negative: skip 0xFF only if next byte stays negative + bytes + .windows(2) + .position(|w| w[0] != 0xFF || w[1] & 0x80 == 0) + .unwrap_or(15) + }; + + Self::hash_bytes(&bytes[start..]) } /// def bucket_N(x) = (murmur3_x86_32_hash(x) & Integer.MAX_VALUE) % N @@ -790,6 +804,27 @@ mod test { ); } + #[test] + fn test_hash_decimal_with_negative_value() { + // Test cases from GitHub issue #1981 + assert_eq!(Bucket::hash_decimal(1), -463810133); + assert_eq!(Bucket::hash_decimal(-1), -43192051); + + // Additional test cases for edge case values + assert_eq!(Bucket::hash_decimal(0), Bucket::hash_decimal(0)); + assert_eq!(Bucket::hash_decimal(127), Bucket::hash_decimal(127)); + assert_eq!(Bucket::hash_decimal(-128), Bucket::hash_decimal(-128)); + + // Test minimum representation is used + // -1 should hash as [0xFF] not [0xFF, 0xFF, ..., 0xFF] + // 128 should hash as [0x00, 0x80] not [0x00, 0x00, ..., 0x80] + assert_eq!(Bucket::hash_decimal(128), Bucket::hash_bytes(&[0x00, 0x80])); + assert_eq!( + Bucket::hash_decimal(-129), + Bucket::hash_bytes(&[0xFF, 0x7F]) + ); + } + #[test] fn test_int_literal() { let bucket = Bucket::new(10); diff --git a/crates/iceberg/src/writer/base_writer/data_file_writer.rs b/crates/iceberg/src/writer/base_writer/data_file_writer.rs index dcaa56cc97..cb7bd172ea 100644 --- a/crates/iceberg/src/writer/base_writer/data_file_writer.rs +++ b/crates/iceberg/src/writer/base_writer/data_file_writer.rs @@ -27,7 +27,7 @@ use crate::writer::{CurrentFileStatus, IcebergWriter, IcebergWriterBuilder}; use crate::{Error, ErrorKind, Result}; /// Builder for `DataFileWriter`. -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct DataFileWriterBuilder { inner: RollingFileWriterBuilder, } @@ -53,9 +53,9 @@ where { type R = DataFileWriter; - async fn build(self, partition_key: Option) -> Result { + async fn build(&self, partition_key: Option) -> Result { Ok(DataFileWriter { - inner: Some(self.inner.clone().build()), + inner: Some(self.inner.build()), partition_key, }) } diff --git a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs index 664ea84334..dd8487f9cc 100644 --- a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs +++ b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs @@ -34,7 +34,7 @@ use crate::writer::{IcebergWriter, IcebergWriterBuilder}; use crate::{Error, ErrorKind, Result}; /// Builder for `EqualityDeleteWriter`. -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct EqualityDeleteFileWriterBuilder< B: FileWriterBuilder, L: LocationGenerator, @@ -60,7 +60,7 @@ where } /// Config for `EqualityDeleteWriter`. -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct EqualityDeleteWriterConfig { // Field ids used to determine row equality in equality delete files. equality_ids: Vec, @@ -123,11 +123,11 @@ where { type R = EqualityDeleteFileWriter; - async fn build(self, partition_key: Option) -> Result { + async fn build(&self, partition_key: Option) -> Result { Ok(EqualityDeleteFileWriter { - inner: Some(self.inner.clone().build()), - projector: self.config.projector, - equality_ids: self.config.equality_ids, + inner: Some(self.inner.build()), + projector: self.config.projector.clone(), + equality_ids: self.config.equality_ids.clone(), partition_key, }) } @@ -293,15 +293,15 @@ mod test { assert_eq!(*data_file.null_value_counts.get(id).unwrap(), expect); } - assert_eq!(data_file.split_offsets.len(), metadata.num_row_groups()); - data_file + let split_offsets = data_file .split_offsets - .iter() - .enumerate() - .for_each(|(i, &v)| { - let expect = metadata.row_groups()[i].file_offset().unwrap(); - assert_eq!(v, expect); - }); + .as_ref() + .expect("split_offsets should be set"); + assert_eq!(split_offsets.len(), metadata.num_row_groups()); + split_offsets.iter().enumerate().for_each(|(i, &v)| { + let expect = metadata.row_groups()[i].file_offset().unwrap(); + assert_eq!(v, expect); + }); } #[tokio::test] diff --git a/crates/iceberg/src/writer/file_writer/location_generator.rs b/crates/iceberg/src/writer/file_writer/location_generator.rs index a5cfc28292..0ad4d91ac6 100644 --- a/crates/iceberg/src/writer/file_writer/location_generator.rs +++ b/crates/iceberg/src/writer/file_writer/location_generator.rs @@ -24,7 +24,7 @@ use crate::Result; use crate::spec::{DataFileFormat, PartitionKey, TableMetadata}; /// `LocationGenerator` used to generate the location of data file. -pub trait LocationGenerator: Clone + Send + 'static { +pub trait LocationGenerator: Clone + Send + Sync + 'static { /// Generate an absolute path for the given file name that includes the partition path. /// /// # Arguments @@ -94,7 +94,7 @@ impl LocationGenerator for DefaultLocationGenerator { } /// `FileNameGeneratorTrait` used to generate file name for data file. The file name can be passed to `LocationGenerator` to generate the location of the file. -pub trait FileNameGenerator: Clone + Send + 'static { +pub trait FileNameGenerator: Clone + Send + Sync + 'static { /// Generate a file name. fn generate_file_name(&self) -> String; } diff --git a/crates/iceberg/src/writer/file_writer/mod.rs b/crates/iceberg/src/writer/file_writer/mod.rs index 2ed6414ce8..101919f5b3 100644 --- a/crates/iceberg/src/writer/file_writer/mod.rs +++ b/crates/iceberg/src/writer/file_writer/mod.rs @@ -36,11 +36,11 @@ pub mod rolling_writer; type DefaultOutput = Vec; /// File writer builder trait. -pub trait FileWriterBuilder: Send + Clone + 'static { +pub trait FileWriterBuilder: Clone + Send + Sync + 'static { /// The associated file writer type. type R: FileWriter; /// Build file writer. - fn build(self, output_file: OutputFile) -> impl Future> + Send; + fn build(&self, output_file: OutputFile) -> impl Future> + Send; } /// File writer focus on writing record batch to different physical file format.(Such as parquet. orc) diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index 3e9d1715c9..8fe40df71c 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -27,12 +27,9 @@ use itertools::Itertools; use parquet::arrow::AsyncArrowWriter; use parquet::arrow::async_reader::AsyncFileReader; use parquet::arrow::async_writer::AsyncFileWriter as ArrowAsyncFileWriter; -use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader}; +use parquet::file::metadata::ParquetMetaData; use parquet::file::properties::WriterProperties; use parquet::file::statistics::Statistics; -use parquet::format::FileMetaData; -use parquet::thrift::{TCompactOutputProtocol, TSerializable}; -use thrift::protocol::TOutputProtocol; use super::{FileWriter, FileWriterBuilder}; use crate::arrow::{ @@ -81,11 +78,11 @@ impl ParquetWriterBuilder { impl FileWriterBuilder for ParquetWriterBuilder { type R = ParquetWriter; - async fn build(self, output_file: OutputFile) -> Result { + async fn build(&self, output_file: OutputFile) -> Result { Ok(ParquetWriter { schema: self.schema.clone(), inner_writer: None, - writer_properties: self.props, + writer_properties: self.props.clone(), current_row_num: 0, output_file, nan_value_count_visitor: NanValueCountVisitor::new_with_match_mode(self.match_mode), @@ -349,29 +346,6 @@ impl ParquetWriter { Ok(data_files) } - fn thrift_to_parquet_metadata(&self, file_metadata: FileMetaData) -> Result { - let mut buffer = Vec::new(); - { - let mut protocol = TCompactOutputProtocol::new(&mut buffer); - file_metadata - .write_to_out_protocol(&mut protocol) - .map_err(|err| { - Error::new(ErrorKind::Unexpected, "Failed to write parquet metadata") - .with_source(err) - })?; - - protocol.flush().map_err(|err| { - Error::new(ErrorKind::Unexpected, "Failed to flush protocol").with_source(err) - })?; - } - - let parquet_metadata = ParquetMetaDataReader::decode_metadata(&buffer).map_err(|err| { - Error::new(ErrorKind::Unexpected, "Failed to decode parquet metadata").with_source(err) - })?; - - Ok(parquet_metadata) - } - /// `ParquetMetadata` to data file builder pub(crate) fn parquet_to_data_file_builder( schema: SchemaRef, @@ -438,13 +412,13 @@ impl ParquetWriter { // - We can ignore implementing distinct_counts due to this: https://lists.apache.org/thread/j52tsojv0x4bopxyzsp7m7bqt23n5fnd .lower_bounds(lower_bounds) .upper_bounds(upper_bounds) - .split_offsets( + .split_offsets(Some( metadata .row_groups() .iter() .filter_map(|group| group.file_offset()) .collect(), - ); + )); Ok(builder) } @@ -564,14 +538,7 @@ impl FileWriter for ParquetWriter { })?; Ok(vec![]) } else { - let parquet_metadata = - Arc::new(self.thrift_to_parquet_metadata(metadata).map_err(|err| { - Error::new( - ErrorKind::Unexpected, - "Failed to convert metadata from thrift to parquet.", - ) - .with_source(err) - })?); + let parquet_metadata = Arc::new(metadata); Ok(vec![Self::parquet_to_data_file_builder( self.schema, diff --git a/crates/iceberg/src/writer/file_writer/rolling_writer.rs b/crates/iceberg/src/writer/file_writer/rolling_writer.rs index 8f03654786..a93e494d48 100644 --- a/crates/iceberg/src/writer/file_writer/rolling_writer.rs +++ b/crates/iceberg/src/writer/file_writer/rolling_writer.rs @@ -103,15 +103,15 @@ where } /// Build a new [`RollingFileWriter`]. - pub fn build(self) -> RollingFileWriter { + pub fn build(&self) -> RollingFileWriter { RollingFileWriter { inner: None, - inner_builder: self.inner_builder, + inner_builder: self.inner_builder.clone(), target_file_size: self.target_file_size, data_file_builders: vec![], - file_io: self.file_io, - location_generator: self.location_generator, - file_name_generator: self.file_name_generator, + file_io: self.file_io.clone(), + location_generator: self.location_generator.clone(), + file_name_generator: self.file_name_generator.clone(), } } } @@ -192,25 +192,23 @@ where // initialize inner writer self.inner = Some( self.inner_builder - .clone() .build(self.new_output_file(partition_key)?) .await?, ); } - if self.should_roll() { - if let Some(inner) = self.inner.take() { - // close the current writer, roll to a new file - self.data_file_builders.extend(inner.close().await?); - - // start a new writer - self.inner = Some( - self.inner_builder - .clone() - .build(self.new_output_file(partition_key)?) - .await?, - ); - } + if self.should_roll() + && let Some(inner) = self.inner.take() + { + // close the current writer, roll to a new file + self.data_file_builders.extend(inner.close().await?); + + // start a new writer + self.inner = Some( + self.inner_builder + .build(self.new_output_file(partition_key)?) + .await?, + ); } // write the input diff --git a/crates/iceberg/src/writer/mod.rs b/crates/iceberg/src/writer/mod.rs index a7892d49e1..d475230685 100644 --- a/crates/iceberg/src/writer/mod.rs +++ b/crates/iceberg/src/writer/mod.rs @@ -148,7 +148,7 @@ //! impl IcebergWriterBuilder for LatencyRecordWriterBuilder { //! type R = LatencyRecordWriter; //! -//! async fn build(self, partition_key: Option) -> Result { +//! async fn build(&self, partition_key: Option) -> Result { //! Ok(LatencyRecordWriter { //! inner_writer: self.inner_writer_builder.build(partition_key).await?, //! }) @@ -398,13 +398,11 @@ type DefaultOutput = Vec; /// The builder for iceberg writer. #[async_trait::async_trait] -pub trait IcebergWriterBuilder: - Send + Clone + 'static -{ +pub trait IcebergWriterBuilder: Send + Sync + 'static { /// The associated writer type. type R: IcebergWriter; /// Build the iceberg writer with an optional partition key. - async fn build(self, partition_key: Option) -> Result; + async fn build(&self, partition_key: Option) -> Result; } /// The iceberg writer used to write data to iceberg table. diff --git a/crates/iceberg/src/writer/partitioning/clustered_writer.rs b/crates/iceberg/src/writer/partitioning/clustered_writer.rs index 3587723965..01eb452083 100644 --- a/crates/iceberg/src/writer/partitioning/clustered_writer.rs +++ b/crates/iceberg/src/writer/partitioning/clustered_writer.rs @@ -118,7 +118,6 @@ where // Create a new writer for the new partition self.current_writer = Some( self.inner_builder - .clone() .build(Some(partition_key.clone())) .await?, ); diff --git a/crates/iceberg/src/writer/partitioning/fanout_writer.rs b/crates/iceberg/src/writer/partitioning/fanout_writer.rs index 796c1a4888..21a174b0d0 100644 --- a/crates/iceberg/src/writer/partitioning/fanout_writer.rs +++ b/crates/iceberg/src/writer/partitioning/fanout_writer.rs @@ -73,7 +73,6 @@ where if !self.partition_writers.contains_key(partition_key.data()) { let writer = self .inner_builder - .clone() .build(Some(partition_key.clone())) .await?; self.partition_writers diff --git a/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs b/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs index 0fb9cba3f1..29825a5416 100644 --- a/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs +++ b/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs @@ -75,7 +75,7 @@ where pub async fn write(&mut self, input: I) -> Result<()> { // Lazily create writer on first write if self.writer.is_none() { - self.writer = Some(self.inner_builder.clone().build(None).await?); + self.writer = Some(self.inner_builder.build(None).await?); } // Write directly to inner writer diff --git a/crates/iceberg/tests/file_io_gcs_test.rs b/crates/iceberg/tests/file_io_gcs_test.rs index 161285ae6f..9fbcdadd0e 100644 --- a/crates/iceberg/tests/file_io_gcs_test.rs +++ b/crates/iceberg/tests/file_io_gcs_test.rs @@ -68,7 +68,7 @@ mod tests { FileIOBuilder::new("gcs") .with_props(vec![ - (GCS_SERVICE_PATH, format!("http://{}", addr)), + (GCS_SERVICE_PATH, format!("http://{addr}")), (GCS_NO_AUTH, "true".to_string()), ]) .build() @@ -81,13 +81,13 @@ mod tests { bucket_data.insert("name", name); let client = reqwest::Client::new(); - let endpoint = format!("http://{}/storage/v1/b", server_addr); + let endpoint = format!("http://{server_addr}/storage/v1/b"); client.post(endpoint).json(&bucket_data).send().await?; Ok(()) } fn get_gs_path() -> String { - format!("gs://{}", FAKE_GCS_BUCKET) + format!("gs://{FAKE_GCS_BUCKET}") } #[tokio::test] diff --git a/crates/integration_tests/DEPENDENCIES.rust.tsv b/crates/integration_tests/DEPENDENCIES.rust.tsv index 891d23966e..26f6d83820 100644 --- a/crates/integration_tests/DEPENDENCIES.rust.tsv +++ b/crates/integration_tests/DEPENDENCIES.rust.tsv @@ -1,406 +1,414 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X -adler2@2.0.1 X X X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X -array-init@2.1.0 X X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -as-any@0.3.2 X X -async-compression@0.4.19 X X -async-lock@3.4.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.22.1 X X -bigdecimal@0.4.8 X X -bimap@0.6.3 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -comfy-table@7.2.0 X -concurrent-queue@2.5.0 X X -const-oid@0.9.6 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -constant_time_eq@0.3.1 X X X -core-foundation-sys@0.8.7 X X -cpufeatures@0.2.17 X X -crc32c@0.6.8 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -ctor@0.2.9 X X -darling@0.20.11 X -darling@0.21.3 X -darling_core@0.20.11 X -darling_core@0.21.3 X -darling_macro@0.20.11 X -darling_macro@0.21.3 X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-session@48.0.1 X -datafusion-sql@48.0.1 X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dissimilar@1.0.10 X -either@1.15.0 X X -equivalent@1.0.2 X X -errno@0.3.13 X X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generator@0.8.7 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -gloo-timers@0.3.0 X X -h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hermit-abi@0.5.2 X X -hex@0.4.3 X X -hmac@0.12.1 X X -home@0.5.11 X X -http@1.3.1 X X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -httpdate@1.0.3 X X -humantime@2.2.0 X X -hyper@1.7.0 X -hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg-datafusion@0.7.0 X -iceberg-integration-tests@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -ipnet@2.11.0 X X -iri-string@0.7.8 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libm@0.2.15 X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -matchers@0.2.0 X -md-5@0.10.6 X X -memchr@2.7.5 X X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X -murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -opendal@0.54.0 X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -potential_utf@0.1.3 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -psm@0.1.26 X X -quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -reqsign@0.16.5 X -reqwest@0.12.23 X X -ring@0.17.14 X X -roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X -rustversion@1.0.22 X X -ryu@1.0.20 X X -same-file@1.0.6 X X -scoped-tls@1.0.1 X X -scopeguard@1.2.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X -serde_repr@0.1.20 X X -serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X -sha1@0.10.6 X X -sha2@0.10.9 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -simdutf8@0.1.5 X X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.0 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -strsim@0.11.1 X -strum@0.27.2 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@2.0.106 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -tempfile@3.22.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thread_local@1.1.9 X X -threadpool@1.8.1 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X -tower@0.5.2 X -tower-http@0.6.6 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -try-lock@0.2.5 X -twox-hash@2.1.2 X -typed-builder@0.20.1 X X -typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X -unicode-segmentation@1.12.0 X X -unicode-width@0.2.1 X X -untrusted@0.9.0 X -url@2.5.7 X X -utf8_iter@1.0.4 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -walkdir@2.5.0 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -webpki-roots@1.0.2 X -winapi-util@0.1.11 X X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-threading@0.1.0 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_msvc@0.52.6 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_msvc@0.52.6 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anyhow@1.0.100 X X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.1.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-csv@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-json@57.1.0 X +arrow-ord@57.1.0 X +arrow-row@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X +as-any@0.3.2 X X +async-compression@0.4.19 X X +async-lock@3.4.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.11.0 X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +comfy-table@7.2.1 X +concurrent-queue@2.5.0 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +constant_time_eq@0.3.1 X X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +ctor@0.2.9 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-sql@51.0.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +either@1.15.0 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.5 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +form_urlencoded@1.2.2 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generic-array@0.14.7 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +h2@0.4.12 X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@1.4.0 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +httpdate@1.0.3 X X +humantime@2.3.0 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg-datafusion@0.8.0 X +iceberg-integration-tests@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.1 X X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.9 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.83 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.178 X X +libm@0.2.15 X +libz-rs-sys@0.5.3 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +miniz_oxide@0.8.9 X X X +mio@1.1.1 X +moka@0.12.11 X X +murmur3@0.5.2 X X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +opendal@0.55.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +psm@0.1.28 X X +quad-rand@0.2.3 X +quick-xml@0.38.4 X +quote@1.0.42 X X +r-efi@5.3.0 X X X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +relative-path@1.9.3 X X +reqsign@0.16.5 X +reqwest@0.12.25 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X +rustversion@1.0.22 X X +ryu@1.0.20 X X +same-file@1.0.6 X X +scopeguard@1.2.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_urlencoded@0.7.1 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +simd-adler32@0.3.8 X +simdutf8@0.1.5 X X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@2.0.111 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X +toml_datetime@0.7.3 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X +tower@0.5.2 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.22 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X +unicode-segmentation@1.12.0 X X +unicode-width@0.2.2 X X +untrusted@0.9.0 X +url@2.5.7 X X +utf8_iter@1.0.4 X X +uuid@1.19.0 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X +wasm-streams@0.4.2 X X +web-sys@0.3.83 X X +web-time@1.1.0 X X +webpki-roots@1.0.4 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/integration_tests/tests/shared_tests/datafusion.rs b/crates/integration_tests/tests/shared_tests/datafusion.rs index 81bbb5f54c..60dd9f36c8 100644 --- a/crates/integration_tests/tests/shared_tests/datafusion.rs +++ b/crates/integration_tests/tests/shared_tests/datafusion.rs @@ -26,7 +26,7 @@ use datafusion::error::DataFusionError; use datafusion::prelude::SessionContext; use iceberg::{Catalog, CatalogBuilder, TableIdent}; use iceberg_catalog_rest::RestCatalogBuilder; -use iceberg_datafusion::IcebergTableProvider; +use iceberg_datafusion::IcebergStaticTableProvider; use parquet::arrow::PARQUET_FIELD_ID_META_KEY; use crate::get_shared_containers; @@ -47,7 +47,7 @@ async fn test_basic_queries() -> Result<(), DataFusionError> { let ctx = SessionContext::new(); let table_provider = Arc::new( - IcebergTableProvider::try_new_from_table(table) + IcebergStaticTableProvider::try_new_from_table(table) .await .unwrap(), ); diff --git a/crates/integrations/cache-moka/DEPENDENCIES.rust.tsv b/crates/integrations/cache-moka/DEPENDENCIES.rust.tsv index 59b898d3fe..4763f71ec8 100644 --- a/crates/integrations/cache-moka/DEPENDENCIES.rust.tsv +++ b/crates/integrations/cache-moka/DEPENDENCIES.rust.tsv @@ -1,48 +1,46 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -56,7 +54,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -70,13 +68,14 @@ digest@0.10.7 X X displaydoc@0.2.5 X X dissimilar@1.0.10 X either@1.15.0 X X +equivalent@1.0.2 X X event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -88,102 +87,95 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-cache-moka@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-cache-moka@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -191,134 +183,135 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/integrations/datafusion/DEPENDENCIES.rust.tsv b/crates/integrations/datafusion/DEPENDENCIES.rust.tsv index 8fb3e04f80..7a0f57e7a0 100644 --- a/crates/integrations/datafusion/DEPENDENCIES.rust.tsv +++ b/crates/integrations/datafusion/DEPENDENCIES.rust.tsv @@ -1,401 +1,409 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X -adler2@2.0.1 X X X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X -array-init@2.1.0 X X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -as-any@0.3.2 X X -async-compression@0.4.19 X X -async-lock@3.4.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.22.1 X X -bigdecimal@0.4.8 X X -bimap@0.6.3 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -comfy-table@7.2.0 X -concurrent-queue@2.5.0 X X -const-oid@0.9.6 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -constant_time_eq@0.3.1 X X X -core-foundation-sys@0.8.7 X X -cpufeatures@0.2.17 X X -crc32c@0.6.8 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -darling@0.20.11 X -darling@0.21.3 X -darling_core@0.20.11 X -darling_core@0.21.3 X -darling_macro@0.20.11 X -darling_macro@0.21.3 X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-session@48.0.1 X -datafusion-sql@48.0.1 X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dissimilar@1.0.10 X -either@1.15.0 X X -equivalent@1.0.2 X X -errno@0.3.13 X X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generator@0.8.7 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -gloo-timers@0.3.0 X X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hermit-abi@0.5.2 X X -hex@0.4.3 X X -hmac@0.12.1 X X -home@0.5.11 X X -http@1.3.1 X X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -humantime@2.2.0 X X -hyper@1.7.0 X -hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-datafusion@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -ipnet@2.11.0 X X -iri-string@0.7.8 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libm@0.2.15 X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -matchers@0.2.0 X -md-5@0.10.6 X X -memchr@2.7.5 X X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X -murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -opendal@0.54.0 X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -potential_utf@0.1.3 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -psm@0.1.26 X X -quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -reqsign@0.16.5 X -reqwest@0.12.23 X X -ring@0.17.14 X X -roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X -rustversion@1.0.22 X X -ryu@1.0.20 X X -same-file@1.0.6 X X -scoped-tls@1.0.1 X X -scopeguard@1.2.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X -serde_repr@0.1.20 X X -serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X -sha1@0.10.6 X X -sha2@0.10.9 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -simdutf8@0.1.5 X X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.0 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -strsim@0.11.1 X -strum@0.27.2 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@2.0.106 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -tempfile@3.22.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thread_local@1.1.9 X X -threadpool@1.8.1 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X -tower@0.5.2 X -tower-http@0.6.6 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -try-lock@0.2.5 X -twox-hash@2.1.2 X -typed-builder@0.20.1 X X -typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X -unicode-segmentation@1.12.0 X X -unicode-width@0.2.1 X X -untrusted@0.9.0 X -url@2.5.7 X X -utf8_iter@1.0.4 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -walkdir@2.5.0 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -webpki-roots@1.0.2 X -winapi-util@0.1.11 X X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-threading@0.1.0 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_msvc@0.52.6 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_msvc@0.52.6 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anyhow@1.0.100 X X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.1.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-csv@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-json@57.1.0 X +arrow-ord@57.1.0 X +arrow-row@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X +as-any@0.3.2 X X +async-compression@0.4.19 X X +async-lock@3.4.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.11.0 X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +comfy-table@7.2.1 X +concurrent-queue@2.5.0 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +constant_time_eq@0.3.1 X X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-sql@51.0.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +either@1.15.0 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.5 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +form_urlencoded@1.2.2 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generic-array@0.14.7 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@1.4.0 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +humantime@2.3.0 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-datafusion@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.1 X X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.9 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.83 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.178 X X +libm@0.2.15 X +libz-rs-sys@0.5.3 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +miniz_oxide@0.8.9 X X X +mio@1.1.1 X +moka@0.12.11 X X +murmur3@0.5.2 X X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +opendal@0.55.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +psm@0.1.28 X X +quad-rand@0.2.3 X +quick-xml@0.38.4 X +quote@1.0.42 X X +r-efi@5.3.0 X X X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +relative-path@1.9.3 X X +reqsign@0.16.5 X +reqwest@0.12.25 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X +rustversion@1.0.22 X X +ryu@1.0.20 X X +same-file@1.0.6 X X +scopeguard@1.2.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_urlencoded@0.7.1 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +simd-adler32@0.3.8 X +simdutf8@0.1.5 X X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@2.0.111 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X +toml_datetime@0.7.3 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X +tower@0.5.2 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.22 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X +unicode-segmentation@1.12.0 X X +unicode-width@0.2.2 X X +untrusted@0.9.0 X +url@2.5.7 X X +utf8_iter@1.0.4 X X +uuid@1.19.0 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X +wasm-streams@0.4.2 X X +web-sys@0.3.83 X X +web-time@1.1.0 X X +webpki-roots@1.0.4 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/integrations/datafusion/src/physical_plan/mod.rs b/crates/integrations/datafusion/src/physical_plan/mod.rs index eb58082fe5..5a9845cde0 100644 --- a/crates/integrations/datafusion/src/physical_plan/mod.rs +++ b/crates/integrations/datafusion/src/physical_plan/mod.rs @@ -21,6 +21,7 @@ pub(crate) mod metadata_scan; pub(crate) mod project; pub(crate) mod repartition; pub(crate) mod scan; +pub(crate) mod sort; pub(crate) mod write; pub(crate) const DATA_FILES_COL_NAME: &str = "data_files"; diff --git a/crates/integrations/datafusion/src/physical_plan/repartition.rs b/crates/integrations/datafusion/src/physical_plan/repartition.rs index 8ad87fd1cc..2d1d7f862c 100644 --- a/crates/integrations/datafusion/src/physical_plan/repartition.rs +++ b/crates/integrations/datafusion/src/physical_plan/repartition.rs @@ -159,9 +159,8 @@ fn determine_partitioning_strategy( // Case 2: Partitioned table missing _partition column (normally this should not happen) (true, Err(_)) => Err(DataFusionError::Plan(format!( - "Partitioned table input missing {} column. \ - Ensure projection happens before repartitioning.", - PROJECTED_PARTITION_VALUE_COLUMN + "Partitioned table input missing {PROJECTED_PARTITION_VALUE_COLUMN} column. \ + Ensure projection happens before repartitioning." ))), // Case 3: Unpartitioned table, always use RoundRobinBatch @@ -508,8 +507,7 @@ mod tests { assert!( column_names.contains(&PROJECTED_PARTITION_VALUE_COLUMN.to_string()), - "Should use _partition column, got: {:?}", - column_names + "Should use _partition column, got: {column_names:?}" ); } _ => panic!("Expected Hash partitioning with Identity transform"), @@ -733,8 +731,7 @@ mod tests { .collect(); assert!( column_names.contains(&PROJECTED_PARTITION_VALUE_COLUMN.to_string()), - "Should use _partition column for mixed transforms with Identity, got: {:?}", - column_names + "Should use _partition column for mixed transforms with Identity, got: {column_names:?}" ); } _ => panic!("Expected Hash partitioning for table with identity transforms"), diff --git a/crates/integrations/datafusion/src/physical_plan/scan.rs b/crates/integrations/datafusion/src/physical_plan/scan.rs index be92e93d25..d627b6a63d 100644 --- a/crates/integrations/datafusion/src/physical_plan/scan.rs +++ b/crates/integrations/datafusion/src/physical_plan/scan.rs @@ -51,6 +51,8 @@ pub struct IcebergTableScan { projection: Option>, /// Filters to apply to the table scan predicates: Option, + /// Optional limit on the number of rows to return + limit: Option, } impl IcebergTableScan { @@ -61,6 +63,7 @@ impl IcebergTableScan { schema: ArrowSchemaRef, projection: Option<&Vec>, filters: &[Expr], + limit: Option, ) -> Self { let output_schema = match projection { None => schema.clone(), @@ -76,6 +79,7 @@ impl IcebergTableScan { plan_properties, projection, predicates, + limit, } } @@ -95,6 +99,10 @@ impl IcebergTableScan { self.predicates.as_ref() } + pub fn limit(&self) -> Option { + self.limit + } + /// Computes [`PlanProperties`] used in query optimization. fn compute_properties(schema: ArrowSchemaRef) -> PlanProperties { // TODO: @@ -146,9 +154,29 @@ impl ExecutionPlan for IcebergTableScan { ); let stream = futures::stream::once(fut).try_flatten(); + // Apply limit if specified + let limited_stream: Pin> + Send>> = + if let Some(limit) = self.limit { + let mut remaining = limit; + Box::pin(stream.try_filter_map(move |batch| { + futures::future::ready(if remaining == 0 { + Ok(None) + } else if batch.num_rows() <= remaining { + remaining -= batch.num_rows(); + Ok(Some(batch)) + } else { + let limited_batch = batch.slice(0, remaining); + remaining = 0; + Ok(Some(limited_batch)) + }) + })) + } else { + Box::pin(stream) + }; + Ok(Box::pin(RecordBatchStreamAdapter::new( self.schema(), - stream, + limited_stream, ))) } } diff --git a/crates/integrations/datafusion/src/physical_plan/sort.rs b/crates/integrations/datafusion/src/physical_plan/sort.rs new file mode 100644 index 0000000000..587ab120ca --- /dev/null +++ b/crates/integrations/datafusion/src/physical_plan/sort.rs @@ -0,0 +1,240 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Partition-based sorting for Iceberg tables. + +use std::sync::Arc; + +use datafusion::arrow::compute::SortOptions; +use datafusion::common::Result as DFResult; +use datafusion::error::DataFusionError; +use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr}; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::physical_plan::expressions::Column; +use datafusion::physical_plan::sorts::sort::SortExec; +use iceberg::arrow::PROJECTED_PARTITION_VALUE_COLUMN; + +/// Sorts an ExecutionPlan by partition values for Iceberg tables. +/// +/// This function takes an input ExecutionPlan that has been extended with partition values +/// (via `project_with_partition`) and returns a SortExec that sorts by the partition column. +/// The partition values are expected to be in a struct column named `PROJECTED_PARTITION_VALUE_COLUMN`. +/// +/// For unpartitioned tables or plans without the partition column, returns an error. +/// +/// # Arguments +/// * `input` - The input ExecutionPlan with projected partition values +/// +/// # Returns +/// * `Ok(Arc)` - A SortExec that sorts by partition values +/// * `Err` - If the partition column is not found +pub(crate) fn sort_by_partition(input: Arc) -> DFResult> { + let schema = input.schema(); + + // Find the partition column in the schema + let (partition_column_index, _partition_field) = schema + .column_with_name(PROJECTED_PARTITION_VALUE_COLUMN) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "Partition column '{PROJECTED_PARTITION_VALUE_COLUMN}' not found in schema. Ensure the plan has been extended with partition values using project_with_partition." + )) + })?; + + // Create a single sort expression for the partition column + let column_expr = Arc::new(Column::new( + PROJECTED_PARTITION_VALUE_COLUMN, + partition_column_index, + )); + + let sort_expr = PhysicalSortExpr { + expr: column_expr, + options: SortOptions::default(), // Ascending, nulls last + }; + + // Create a SortExec with preserve_partitioning=true to ensure the output partitioning + // is the same as the input partitioning, and the data is sorted within each partition + let lex_ordering = LexOrdering::new(vec![sort_expr]).ok_or_else(|| { + DataFusionError::Plan("Failed to create LexOrdering from sort expression".to_string()) + })?; + + let sort_exec = SortExec::new(lex_ordering, input).with_preserve_partitioning(true); + + Ok(Arc::new(sort_exec)) +} + +#[cfg(test)] +mod tests { + use datafusion::arrow::array::{Int32Array, RecordBatch, StringArray, StructArray}; + use datafusion::arrow::datatypes::{DataType, Field, Fields, Schema as ArrowSchema}; + use datafusion::datasource::{MemTable, TableProvider}; + use datafusion::prelude::SessionContext; + + use super::*; + + #[tokio::test] + async fn test_sort_by_partition_basic() { + // Create a schema with a partition column + let partition_fields = + Fields::from(vec![Field::new("id_partition", DataType::Int32, false)]); + + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + Field::new( + PROJECTED_PARTITION_VALUE_COLUMN, + DataType::Struct(partition_fields.clone()), + false, + ), + ])); + + // Create test data with partition values + let id_array = Arc::new(Int32Array::from(vec![3, 1, 2])); + let name_array = Arc::new(StringArray::from(vec!["c", "a", "b"])); + let partition_array = Arc::new(StructArray::from(vec![( + Arc::new(Field::new("id_partition", DataType::Int32, false)), + Arc::new(Int32Array::from(vec![3, 1, 2])) as _, + )])); + + let batch = + RecordBatch::try_new(schema.clone(), vec![id_array, name_array, partition_array]) + .unwrap(); + + let ctx = SessionContext::new(); + let mem_table = MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap(); + let input = mem_table.scan(&ctx.state(), None, &[], None).await.unwrap(); + + // Apply sort + let sorted_plan = sort_by_partition(input).unwrap(); + + // Execute and verify + let result = datafusion::physical_plan::collect(sorted_plan, ctx.task_ctx()) + .await + .unwrap(); + + assert_eq!(result.len(), 1); + let result_batch = &result[0]; + + let id_col = result_batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + + // Verify data is sorted by partition value + assert_eq!(id_col.value(0), 1); + assert_eq!(id_col.value(1), 2); + assert_eq!(id_col.value(2), 3); + } + + #[tokio::test] + async fn test_sort_by_partition_missing_column() { + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + ])); + + let batch = RecordBatch::try_new(schema.clone(), vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["a", "b", "c"])), + ]) + .unwrap(); + + let ctx = SessionContext::new(); + let mem_table = MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap(); + let input = mem_table.scan(&ctx.state(), None, &[], None).await.unwrap(); + + let result = sort_by_partition(input); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Partition column '_partition' not found") + ); + } + + #[tokio::test] + async fn test_sort_by_partition_multi_field() { + // Test with multiple partition fields in the struct + let partition_fields = Fields::from(vec![ + Field::new("year", DataType::Int32, false), + Field::new("month", DataType::Int32, false), + ]); + + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("data", DataType::Utf8, false), + Field::new( + PROJECTED_PARTITION_VALUE_COLUMN, + DataType::Struct(partition_fields.clone()), + false, + ), + ])); + + // Create test data with partition values (year, month) + let id_array = Arc::new(Int32Array::from(vec![1, 2, 3, 4])); + let data_array = Arc::new(StringArray::from(vec!["a", "b", "c", "d"])); + + // Partition values: (2024, 2), (2024, 1), (2023, 12), (2024, 1) + let year_array = Arc::new(Int32Array::from(vec![2024, 2024, 2023, 2024])); + let month_array = Arc::new(Int32Array::from(vec![2, 1, 12, 1])); + + let partition_array = Arc::new(StructArray::from(vec![ + ( + Arc::new(Field::new("year", DataType::Int32, false)), + year_array as _, + ), + ( + Arc::new(Field::new("month", DataType::Int32, false)), + month_array as _, + ), + ])); + + let batch = + RecordBatch::try_new(schema.clone(), vec![id_array, data_array, partition_array]) + .unwrap(); + + let ctx = SessionContext::new(); + let mem_table = MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap(); + let input = mem_table.scan(&ctx.state(), None, &[], None).await.unwrap(); + + // Apply sort + let sorted_plan = sort_by_partition(input).unwrap(); + + // Execute and verify + let result = datafusion::physical_plan::collect(sorted_plan, ctx.task_ctx()) + .await + .unwrap(); + + assert_eq!(result.len(), 1); + let result_batch = &result[0]; + + let id_col = result_batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + + // Verify data is sorted by partition value (struct comparison) + // Expected order: (2023, 12), (2024, 1), (2024, 1), (2024, 2) + // Which corresponds to ids: 3, 2, 4, 1 + assert_eq!(id_col.value(0), 3); + assert_eq!(id_col.value(1), 2); + assert_eq!(id_col.value(2), 4); + assert_eq!(id_col.value(3), 1); + } +} diff --git a/crates/integrations/datafusion/src/physical_plan/write.rs b/crates/integrations/datafusion/src/physical_plan/write.rs index 9eb53c235f..fdfddf877b 100644 --- a/crates/integrations/datafusion/src/physical_plan/write.rs +++ b/crates/integrations/datafusion/src/physical_plan/write.rs @@ -266,8 +266,28 @@ impl ExecutionPlan for IcebergWriteExec { let data_file_writer_builder = DataFileWriterBuilder::new(rolling_writer_builder); // Create TaskWriter - // TODO: Make fanout_enabled configurable via table properties - let fanout_enabled = true; + let fanout_enabled = self + .table + .metadata() + .properties() + .get(TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED) + .map(|value| { + value + .parse::() + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid value for {}, expected 'true' or 'false'", + TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED + ), + ) + .with_source(e) + }) + .map_err(to_datafusion_error) + }) + .transpose()? + .unwrap_or(TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT); let schema = self.table.metadata().current_schema().clone(); let partition_spec = self.table.metadata().default_partition_spec().clone(); let task_writer = TaskWriter::try_new( diff --git a/crates/integrations/datafusion/src/schema.rs b/crates/integrations/datafusion/src/schema.rs index 3920ee73ca..31bbdbd67f 100644 --- a/crates/integrations/datafusion/src/schema.rs +++ b/crates/integrations/datafusion/src/schema.rs @@ -28,6 +28,7 @@ use iceberg::inspect::MetadataTableType; use iceberg::{Catalog, NamespaceIdent, Result}; use crate::table::IcebergTableProvider; +use crate::to_datafusion_error; /// Represents a [`SchemaProvider`] for the Iceberg [`Catalog`], managing /// access to table providers within a specific namespace. @@ -113,7 +114,10 @@ impl SchemaProvider for IcebergSchemaProvider { let metadata_table_type = MetadataTableType::try_from(metadata_table_name).map_err(DataFusionError::Plan)?; if let Some(table) = self.tables.get(table_name) { - let metadata_table = table.metadata_table(metadata_table_type); + let metadata_table = table + .metadata_table(metadata_table_type) + .await + .map_err(to_datafusion_error)?; return Ok(Some(Arc::new(metadata_table))); } else { return Ok(None); diff --git a/crates/integrations/datafusion/src/table/mod.rs b/crates/integrations/datafusion/src/table/mod.rs index 42a3baad3b..ae87342fa5 100644 --- a/crates/integrations/datafusion/src/table/mod.rs +++ b/crates/integrations/datafusion/src/table/mod.rs @@ -15,6 +15,16 @@ // specific language governing permissions and limitations // under the License. +//! Iceberg table providers for DataFusion. +//! +//! This module provides two table provider implementations: +//! +//! - [`IcebergTableProvider`]: Catalog-backed provider with automatic metadata refresh. +//! Use for write operations and when you need to see the latest table state. +//! +//! - [`IcebergStaticTableProvider`]: Static provider for read-only access to a specific +//! table snapshot. Use for consistent analytical queries or time-travel scenarios. + pub mod metadata_table; pub mod table_provider_factory; @@ -34,102 +44,67 @@ use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; use iceberg::arrow::schema_to_arrow_schema; use iceberg::inspect::MetadataTableType; +use iceberg::spec::TableProperties; use iceberg::table::Table; use iceberg::{Catalog, Error, ErrorKind, NamespaceIdent, Result, TableIdent}; use metadata_table::IcebergMetadataTableProvider; +use crate::error::to_datafusion_error; use crate::physical_plan::commit::IcebergCommitExec; use crate::physical_plan::project::project_with_partition; use crate::physical_plan::repartition::repartition; use crate::physical_plan::scan::IcebergTableScan; +use crate::physical_plan::sort::sort_by_partition; use crate::physical_plan::write::IcebergWriteExec; -/// Represents a [`TableProvider`] for the Iceberg [`Catalog`], -/// managing access to a [`Table`]. +/// Catalog-backed table provider with automatic metadata refresh. +/// +/// This provider loads fresh table metadata from the catalog on every scan and write +/// operation, ensuring you always see the latest table state. Use this when you need +/// write operations or want to see the most up-to-date data. +/// +/// For read-only access to a specific snapshot without catalog overhead, use +/// [`IcebergStaticTableProvider`] instead. #[derive(Debug, Clone)] pub struct IcebergTableProvider { - /// A table in the catalog. - table: Table, - /// Table snapshot id that will be queried via this provider. - snapshot_id: Option, - /// A reference-counted arrow `Schema`. + /// The catalog that manages this table + catalog: Arc, + /// The table identifier (namespace + name) + table_ident: TableIdent, + /// A reference-counted arrow `Schema` (cached at construction) schema: ArrowSchemaRef, - /// The catalog that the table belongs to. - catalog: Option>, } impl IcebergTableProvider { - pub(crate) fn new(table: Table, schema: ArrowSchemaRef) -> Self { - IcebergTableProvider { - table, - snapshot_id: None, - schema, - catalog: None, - } - } - /// Asynchronously tries to construct a new [`IcebergTableProvider`] - /// using the given client and table name to fetch an actual [`Table`] - /// in the provided namespace. + /// Creates a new catalog-backed table provider. + /// + /// Loads the table once to get the initial schema, then stores the catalog + /// reference for future metadata refreshes on each operation. pub(crate) async fn try_new( - client: Arc, + catalog: Arc, namespace: NamespaceIdent, name: impl Into, ) -> Result { - let ident = TableIdent::new(namespace, name.into()); - let table = client.load_table(&ident).await?; - - let schema = Arc::new(schema_to_arrow_schema(table.metadata().current_schema())?); + let table_ident = TableIdent::new(namespace, name.into()); - Ok(IcebergTableProvider { - table, - snapshot_id: None, - schema, - catalog: Some(client), - }) - } - - /// Asynchronously tries to construct a new [`IcebergTableProvider`] - /// using the given table. Can be used to create a table provider from an existing table regardless of the catalog implementation. - pub async fn try_new_from_table(table: Table) -> Result { + // Load table once to get initial schema + let table = catalog.load_table(&table_ident).await?; let schema = Arc::new(schema_to_arrow_schema(table.metadata().current_schema())?); - Ok(IcebergTableProvider { - table, - snapshot_id: None, - schema, - catalog: None, - }) - } - /// Asynchronously tries to construct a new [`IcebergTableProvider`] - /// using a specific snapshot of the given table. Can be used to create a table provider from an existing table regardless of the catalog implementation. - pub async fn try_new_from_table_snapshot(table: Table, snapshot_id: i64) -> Result { - let snapshot = table - .metadata() - .snapshot_by_id(snapshot_id) - .ok_or_else(|| { - Error::new( - ErrorKind::Unexpected, - format!( - "snapshot id {snapshot_id} not found in table {}", - table.identifier().name() - ), - ) - })?; - let schema = snapshot.schema(table.metadata())?; - let schema = Arc::new(schema_to_arrow_schema(&schema)?); Ok(IcebergTableProvider { - table, - snapshot_id: Some(snapshot_id), + catalog, + table_ident, schema, - catalog: None, }) } - pub(crate) fn metadata_table(&self, r#type: MetadataTableType) -> IcebergMetadataTableProvider { - IcebergMetadataTableProvider { - table: self.table.clone(), - r#type, - } + pub(crate) async fn metadata_table( + &self, + r#type: MetadataTableType, + ) -> Result { + // Load fresh table metadata for metadata table access + let table = self.catalog.load_table(&self.table_ident).await?; + Ok(IcebergMetadataTableProvider { table, r#type }) } } @@ -152,14 +127,23 @@ impl TableProvider for IcebergTableProvider { _state: &dyn Session, projection: Option<&Vec>, filters: &[Expr], - _limit: Option, + limit: Option, ) -> DFResult> { + // Load fresh table metadata from catalog + let table = self + .catalog + .load_table(&self.table_ident) + .await + .map_err(to_datafusion_error)?; + + // Create scan with fresh metadata (always use current snapshot) Ok(Arc::new(IcebergTableScan::new( - self.table.clone(), - self.snapshot_id, + table, + None, // Always use current snapshot for catalog-backed provider self.schema.clone(), projection, filters, + limit, ))) } @@ -177,17 +161,18 @@ impl TableProvider for IcebergTableProvider { input: Arc, _insert_op: InsertOp, ) -> DFResult> { - let Some(catalog) = self.catalog.clone() else { - return Err(DataFusionError::Execution( - "Catalog cannot be none for insert_into".to_string(), - )); - }; + // Load fresh table metadata from catalog + let table = self + .catalog + .load_table(&self.table_ident) + .await + .map_err(to_datafusion_error)?; - let partition_spec = self.table.metadata().default_partition_spec(); + let partition_spec = table.metadata().default_partition_spec(); // Step 1: Project partition values for partitioned tables let plan_with_partition = if !partition_spec.is_unpartitioned() { - project_with_partition(input, &self.table)? + project_with_partition(input, &table)? } else { input }; @@ -200,15 +185,41 @@ impl TableProvider for IcebergTableProvider { ) })?; - let repartitioned_plan = repartition( - plan_with_partition, - self.table.metadata_ref(), - target_partitions, - )?; + let repartitioned_plan = + repartition(plan_with_partition, table.metadata_ref(), target_partitions)?; + + // Apply sort node when it's not fanout mode + let fanout_enabled = table + .metadata() + .properties() + .get(TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED) + .map(|value| { + value + .parse::() + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid value for {}, expected 'true' or 'false'", + TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED + ), + ) + .with_source(e) + }) + .map_err(to_datafusion_error) + }) + .transpose()? + .unwrap_or(TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT); + + let write_input = if fanout_enabled { + repartitioned_plan + } else { + sort_by_partition(repartitioned_plan)? + }; let write_plan = Arc::new(IcebergWriteExec::new( - self.table.clone(), - repartitioned_plan, + table.clone(), + write_input, self.schema.clone(), )); @@ -216,21 +227,141 @@ impl TableProvider for IcebergTableProvider { let coalesce_partitions = Arc::new(CoalescePartitionsExec::new(write_plan)); Ok(Arc::new(IcebergCommitExec::new( - self.table.clone(), - catalog, + table, + self.catalog.clone(), coalesce_partitions, self.schema.clone(), ))) } } +/// Static table provider for read-only snapshot access. +/// +/// This provider holds a cached table instance and does not refresh metadata or support +/// write operations. Use this for consistent analytical queries, time-travel scenarios, +/// or when you want to avoid catalog overhead. +/// +/// For catalog-backed tables with write support and automatic refresh, use +/// [`IcebergTableProvider`] instead. +#[derive(Debug, Clone)] +pub struct IcebergStaticTableProvider { + /// The static table instance (never refreshed) + table: Table, + /// Optional snapshot ID for this static view + snapshot_id: Option, + /// A reference-counted arrow `Schema` + schema: ArrowSchemaRef, +} + +impl IcebergStaticTableProvider { + /// Creates a static provider from a table instance. + /// + /// Uses the table's current snapshot for all queries. Does not support write operations. + pub async fn try_new_from_table(table: Table) -> Result { + let schema = Arc::new(schema_to_arrow_schema(table.metadata().current_schema())?); + Ok(IcebergStaticTableProvider { + table, + snapshot_id: None, + schema, + }) + } + + /// Creates a static provider for a specific table snapshot. + /// + /// Queries the specified snapshot for all operations. Useful for time-travel queries. + /// Does not support write operations. + pub async fn try_new_from_table_snapshot(table: Table, snapshot_id: i64) -> Result { + let snapshot = table + .metadata() + .snapshot_by_id(snapshot_id) + .ok_or_else(|| { + Error::new( + ErrorKind::Unexpected, + format!( + "snapshot id {snapshot_id} not found in table {}", + table.identifier().name() + ), + ) + })?; + let table_schema = snapshot.schema(table.metadata())?; + let schema = Arc::new(schema_to_arrow_schema(&table_schema)?); + Ok(IcebergStaticTableProvider { + table, + snapshot_id: Some(snapshot_id), + schema, + }) + } +} + +#[async_trait] +impl TableProvider for IcebergStaticTableProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> ArrowSchemaRef { + self.schema.clone() + } + + fn table_type(&self) -> TableType { + TableType::Base + } + + async fn scan( + &self, + _state: &dyn Session, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> DFResult> { + // Use cached table (no refresh) + Ok(Arc::new(IcebergTableScan::new( + self.table.clone(), + self.snapshot_id, + self.schema.clone(), + projection, + filters, + limit, + ))) + } + + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> DFResult> { + // Push down all filters, as a single source of truth, the scanner will drop the filters which couldn't be push down + Ok(vec![TableProviderFilterPushDown::Inexact; filters.len()]) + } + + async fn insert_into( + &self, + _state: &dyn Session, + _input: Arc, + _insert_op: InsertOp, + ) -> DFResult> { + Err(to_datafusion_error(Error::new( + ErrorKind::FeatureUnsupported, + "Write operations are not supported on IcebergStaticTableProvider. \ + Use IcebergTableProvider with a catalog for write support." + .to_string(), + ))) + } +} + #[cfg(test)] mod tests { + use std::collections::HashMap; + use std::sync::Arc; + use datafusion::common::Column; + use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::SessionContext; - use iceberg::TableIdent; use iceberg::io::FileIO; + use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder}; + use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; use iceberg::table::{StaticTable, Table}; + use iceberg::{Catalog, CatalogBuilder, NamespaceIdent, TableCreation, TableIdent}; + use tempfile::TempDir; use super::*; @@ -253,10 +384,59 @@ mod tests { static_table.into_table() } + async fn get_test_catalog_and_table() -> (Arc, NamespaceIdent, String, TempDir) { + let temp_dir = TempDir::new().unwrap(); + let warehouse_path = temp_dir.path().to_str().unwrap().to_string(); + + let catalog = MemoryCatalogBuilder::default() + .load( + "memory", + HashMap::from([(MEMORY_CATALOG_WAREHOUSE.to_string(), warehouse_path.clone())]), + ) + .await + .unwrap(); + + let namespace = NamespaceIdent::new("test_ns".to_string()); + catalog + .create_namespace(&namespace, HashMap::new()) + .await + .unwrap(); + + let schema = Schema::builder() + .with_schema_id(0) + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build() + .unwrap(); + + let table_creation = TableCreation::builder() + .name("test_table".to_string()) + .location(format!("{warehouse_path}/test_table")) + .schema(schema) + .properties(HashMap::new()) + .build(); + + catalog + .create_table(&namespace, table_creation) + .await + .unwrap(); + + ( + Arc::new(catalog), + namespace, + "test_table".to_string(), + temp_dir, + ) + } + + // Tests for IcebergStaticTableProvider + #[tokio::test] - async fn test_try_new_from_table() { + async fn test_static_provider_from_table() { let table = get_test_table_from_metadata_file().await; - let table_provider = IcebergTableProvider::try_new_from_table(table.clone()) + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) .await .unwrap(); let ctx = SessionContext::new(); @@ -278,11 +458,11 @@ mod tests { } #[tokio::test] - async fn test_try_new_from_table_snapshot() { + async fn test_static_provider_from_snapshot() { let table = get_test_table_from_metadata_file().await; let snapshot_id = table.metadata().snapshots().next().unwrap().snapshot_id(); let table_provider = - IcebergTableProvider::try_new_from_table_snapshot(table.clone(), snapshot_id) + IcebergStaticTableProvider::try_new_from_table_snapshot(table.clone(), snapshot_id) .await .unwrap(); let ctx = SessionContext::new(); @@ -304,16 +484,388 @@ mod tests { } #[tokio::test] - async fn test_physical_input_schema_consistent_with_logical_input_schema() { + async fn test_static_provider_rejects_writes() { let table = get_test_table_from_metadata_file().await; - let table_provider = IcebergTableProvider::try_new_from_table(table.clone()) + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) .await .unwrap(); let ctx = SessionContext::new(); ctx.register_table("mytable", Arc::new(table_provider)) .unwrap(); + + // Attempt to insert into the static provider should fail + let result = ctx.sql("INSERT INTO mytable VALUES (1, 2, 3)").await; + + // The error should occur during planning or execution + // We expect an error indicating write operations are not supported + assert!( + result.is_err() || { + let df = result.unwrap(); + df.collect().await.is_err() + } + ); + } + + #[tokio::test] + async fn test_static_provider_scan() { + let table = get_test_table_from_metadata_file().await; + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) + .await + .unwrap(); + let ctx = SessionContext::new(); + ctx.register_table("mytable", Arc::new(table_provider)) + .unwrap(); + + // Test that scan operations work correctly let df = ctx.sql("SELECT count(*) FROM mytable").await.unwrap(); let physical_plan = df.create_physical_plan().await; - assert!(physical_plan.is_ok()) + assert!(physical_plan.is_ok()); + } + + // Tests for IcebergTableProvider + + #[tokio::test] + async fn test_catalog_backed_provider_creation() { + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + // Test creating a catalog-backed provider + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + // Verify the schema is loaded correctly + let schema = provider.schema(); + assert_eq!(schema.fields().len(), 2); + assert_eq!(schema.field(0).name(), "id"); + assert_eq!(schema.field(1).name(), "name"); + } + + #[tokio::test] + async fn test_catalog_backed_provider_scan() { + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + ctx.register_table("test_table", Arc::new(provider)) + .unwrap(); + + // Test that scan operations work correctly + let df = ctx.sql("SELECT * FROM test_table").await.unwrap(); + + // Verify the schema in the query result + let df_schema = df.schema(); + assert_eq!(df_schema.fields().len(), 2); + assert_eq!(df_schema.field(0).name(), "id"); + assert_eq!(df_schema.field(1).name(), "name"); + + let physical_plan = df.create_physical_plan().await; + assert!(physical_plan.is_ok()); + } + + #[tokio::test] + async fn test_catalog_backed_provider_insert() { + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + ctx.register_table("test_table", Arc::new(provider)) + .unwrap(); + + // Test that insert operations work correctly + let result = ctx.sql("INSERT INTO test_table VALUES (1, 'test')").await; + + // Insert should succeed (or at least not fail during planning) + assert!(result.is_ok()); + + // Try to execute the insert plan + let df = result.unwrap(); + let execution_result = df.collect().await; + + // The execution should succeed + assert!(execution_result.is_ok()); + } + + #[tokio::test] + async fn test_physical_input_schema_consistent_with_logical_input_schema() { + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + ctx.register_table("test_table", Arc::new(provider)) + .unwrap(); + + // Create a query plan + let df = ctx.sql("SELECT id, name FROM test_table").await.unwrap(); + + // Get logical schema before consuming df + let logical_schema = df.schema().clone(); + + // Get physical plan (this consumes df) + let physical_plan = df.create_physical_plan().await.unwrap(); + let physical_schema = physical_plan.schema(); + + // Verify that logical and physical schemas are consistent + assert_eq!( + logical_schema.fields().len(), + physical_schema.fields().len() + ); + + for (logical_field, physical_field) in logical_schema + .fields() + .iter() + .zip(physical_schema.fields().iter()) + { + assert_eq!(logical_field.name(), physical_field.name()); + assert_eq!(logical_field.data_type(), physical_field.data_type()); + } + } + + async fn get_partitioned_test_catalog_and_table( + fanout_enabled: Option, + ) -> (Arc, NamespaceIdent, String, TempDir) { + use iceberg::spec::{Transform, UnboundPartitionSpec}; + + let temp_dir = TempDir::new().unwrap(); + let warehouse_path = temp_dir.path().to_str().unwrap().to_string(); + + let catalog = MemoryCatalogBuilder::default() + .load( + "memory", + HashMap::from([(MEMORY_CATALOG_WAREHOUSE.to_string(), warehouse_path.clone())]), + ) + .await + .unwrap(); + + let namespace = NamespaceIdent::new("test_ns".to_string()); + catalog + .create_namespace(&namespace, HashMap::new()) + .await + .unwrap(); + + let schema = Schema::builder() + .with_schema_id(0) + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::required(2, "category", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build() + .unwrap(); + + let partition_spec = UnboundPartitionSpec::builder() + .with_spec_id(0) + .add_partition_field(2, "category", Transform::Identity) + .unwrap() + .build(); + + let mut properties = HashMap::new(); + if let Some(enabled) = fanout_enabled { + properties.insert( + iceberg::spec::TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED + .to_string(), + enabled.to_string(), + ); + } + + let table_creation = TableCreation::builder() + .name("partitioned_table".to_string()) + .location(format!("{warehouse_path}/partitioned_table")) + .schema(schema) + .partition_spec(partition_spec) + .properties(properties) + .build(); + + catalog + .create_table(&namespace, table_creation) + .await + .unwrap(); + + ( + Arc::new(catalog), + namespace, + "partitioned_table".to_string(), + temp_dir, + ) + } + + /// Helper to check if a plan contains a SortExec node + fn plan_contains_sort(plan: &Arc) -> bool { + if plan.name() == "SortExec" { + return true; + } + for child in plan.children() { + if plan_contains_sort(child) { + return true; + } + } + false + } + + #[tokio::test] + async fn test_insert_plan_fanout_enabled_no_sort() { + use datafusion::datasource::TableProvider; + use datafusion::logical_expr::dml::InsertOp; + use datafusion::physical_plan::empty::EmptyExec; + + // When fanout is enabled (default), no sort node should be added + let (catalog, namespace, table_name, _temp_dir) = + get_partitioned_test_catalog_and_table(Some(true)).await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let input_schema = provider.schema(); + let input = Arc::new(EmptyExec::new(input_schema)) as Arc; + + let state = ctx.state(); + let insert_plan = provider + .insert_into(&state, input, InsertOp::Append) + .await + .unwrap(); + + // With fanout enabled, there should be no SortExec in the plan + assert!( + !plan_contains_sort(&insert_plan), + "Plan should NOT contain SortExec when fanout is enabled" + ); + } + + #[tokio::test] + async fn test_insert_plan_fanout_disabled_has_sort() { + use datafusion::datasource::TableProvider; + use datafusion::logical_expr::dml::InsertOp; + use datafusion::physical_plan::empty::EmptyExec; + + // When fanout is disabled, a sort node should be added + let (catalog, namespace, table_name, _temp_dir) = + get_partitioned_test_catalog_and_table(Some(false)).await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let input_schema = provider.schema(); + let input = Arc::new(EmptyExec::new(input_schema)) as Arc; + + let state = ctx.state(); + let insert_plan = provider + .insert_into(&state, input, InsertOp::Append) + .await + .unwrap(); + + // With fanout disabled, there should be a SortExec in the plan + assert!( + plan_contains_sort(&insert_plan), + "Plan should contain SortExec when fanout is disabled" + ); + } + + #[tokio::test] + async fn test_limit_pushdown_static_provider() { + use datafusion::datasource::TableProvider; + + let table = get_test_table_from_metadata_file().await; + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let state = ctx.state(); + + // Test scan with limit + let scan_plan = table_provider + .scan(&state, None, &[], Some(10)) + .await + .unwrap(); + + // Verify that the scan plan is an IcebergTableScan + let iceberg_scan = scan_plan + .as_any() + .downcast_ref::() + .expect("Expected IcebergTableScan"); + + // Verify the limit is set + assert_eq!( + iceberg_scan.limit(), + Some(10), + "Limit should be set to 10 in the scan plan" + ); + } + + #[tokio::test] + async fn test_limit_pushdown_catalog_backed_provider() { + use datafusion::datasource::TableProvider; + + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let state = ctx.state(); + + // Test scan with limit + let scan_plan = provider.scan(&state, None, &[], Some(5)).await.unwrap(); + + // Verify that the scan plan is an IcebergTableScan + let iceberg_scan = scan_plan + .as_any() + .downcast_ref::() + .expect("Expected IcebergTableScan"); + + // Verify the limit is set + assert_eq!( + iceberg_scan.limit(), + Some(5), + "Limit should be set to 5 in the scan plan" + ); + } + + #[tokio::test] + async fn test_no_limit_pushdown() { + use datafusion::datasource::TableProvider; + + let table = get_test_table_from_metadata_file().await; + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let state = ctx.state(); + + // Test scan without limit + let scan_plan = table_provider.scan(&state, None, &[], None).await.unwrap(); + + // Verify that the scan plan is an IcebergTableScan + let iceberg_scan = scan_plan + .as_any() + .downcast_ref::() + .expect("Expected IcebergTableScan"); + + // Verify the limit is None + assert_eq!( + iceberg_scan.limit(), + None, + "Limit should be None when not specified" + ); } } diff --git a/crates/integrations/datafusion/src/table/table_provider_factory.rs b/crates/integrations/datafusion/src/table/table_provider_factory.rs index e8e87dd318..8cae597b7b 100644 --- a/crates/integrations/datafusion/src/table/table_provider_factory.rs +++ b/crates/integrations/datafusion/src/table/table_provider_factory.rs @@ -24,12 +24,11 @@ use datafusion::catalog::{Session, TableProvider, TableProviderFactory}; use datafusion::error::Result as DFResult; use datafusion::logical_expr::CreateExternalTable; use datafusion::sql::TableReference; -use iceberg::arrow::schema_to_arrow_schema; use iceberg::io::FileIO; use iceberg::table::StaticTable; use iceberg::{Error, ErrorKind, Result, TableIdent}; -use super::IcebergTableProvider; +use super::IcebergStaticTableProvider; use crate::to_datafusion_error; /// A factory that implements DataFusion's `TableProviderFactory` to create `IcebergTableProvider` instances. @@ -126,10 +125,11 @@ impl TableProviderFactory for IcebergTableProviderFactory { .map_err(to_datafusion_error)? .into_table(); - let schema = schema_to_arrow_schema(table.metadata().current_schema()) + let provider = IcebergStaticTableProvider::try_new_from_table(table) + .await .map_err(to_datafusion_error)?; - Ok(Arc::new(IcebergTableProvider::new(table, Arc::new(schema)))) + Ok(Arc::new(provider)) } } @@ -244,6 +244,7 @@ mod tests { constraints: Constraints::default(), column_defaults: Default::default(), if_not_exists: Default::default(), + or_replace: false, temporary: false, definition: Default::default(), unbounded: Default::default(), diff --git a/crates/integrations/datafusion/tests/integration_datafusion_test.rs b/crates/integrations/datafusion/tests/integration_datafusion_test.rs index fdf5b17d18..6f8898abb8 100644 --- a/crates/integrations/datafusion/tests/integration_datafusion_test.rs +++ b/crates/integrations/datafusion/tests/integration_datafusion_test.rs @@ -347,14 +347,14 @@ async fn test_metadata_table() -> Result<()> { check_record_batches( snapshots, expect![[r#" - Field { name: "committed_at", data_type: Timestamp(Microsecond, Some("+00:00")), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "parent_id", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "operation", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "manifest_list", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "summary", data_type: Map(Field { name: "key_value", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "value", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }"#]], + Field { "committed_at": Timestamp(µs, "+00:00"), metadata: {"PARQUET:field_id": "1"} }, + Field { "snapshot_id": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "parent_id": nullable Int64, metadata: {"PARQUET:field_id": "3"} }, + Field { "operation": nullable Utf8, metadata: {"PARQUET:field_id": "4"} }, + Field { "manifest_list": nullable Utf8, metadata: {"PARQUET:field_id": "5"} }, + Field { "summary": nullable Map("key_value": non-null Struct("key": non-null Utf8, metadata: {"PARQUET:field_id": "7"}, "value": Utf8, metadata: {"PARQUET:field_id": "8"}), unsorted), metadata: {"PARQUET:field_id": "6"} }"#]], expect![[r#" - committed_at: PrimitiveArray + committed_at: PrimitiveArray [ ], snapshot_id: PrimitiveArray @@ -386,18 +386,18 @@ async fn test_metadata_table() -> Result<()> { check_record_batches( manifests, expect![[r#" - Field { name: "content", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "14"} }, - Field { name: "path", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "length", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "partition_spec_id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "added_snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "added_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "existing_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, - Field { name: "deleted_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, - Field { name: "added_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "15"} }, - Field { name: "existing_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "16"} }, - Field { name: "deleted_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "17"} }, - Field { name: "partition_summaries", data_type: List(Field { name: "item", data_type: Struct([Field { name: "contains_null", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }, Field { name: "contains_nan", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "11"} }, Field { name: "lower_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "12"} }, Field { name: "upper_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "13"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }"#]], + Field { "content": Int32, metadata: {"PARQUET:field_id": "14"} }, + Field { "path": Utf8, metadata: {"PARQUET:field_id": "1"} }, + Field { "length": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "partition_spec_id": Int32, metadata: {"PARQUET:field_id": "3"} }, + Field { "added_snapshot_id": Int64, metadata: {"PARQUET:field_id": "4"} }, + Field { "added_data_files_count": Int32, metadata: {"PARQUET:field_id": "5"} }, + Field { "existing_data_files_count": Int32, metadata: {"PARQUET:field_id": "6"} }, + Field { "deleted_data_files_count": Int32, metadata: {"PARQUET:field_id": "7"} }, + Field { "added_delete_files_count": Int32, metadata: {"PARQUET:field_id": "15"} }, + Field { "existing_delete_files_count": Int32, metadata: {"PARQUET:field_id": "16"} }, + Field { "deleted_delete_files_count": Int32, metadata: {"PARQUET:field_id": "17"} }, + Field { "partition_summaries": List(non-null Struct("contains_null": non-null Boolean, metadata: {"PARQUET:field_id": "10"}, "contains_nan": Boolean, metadata: {"PARQUET:field_id": "11"}, "lower_bound": Utf8, metadata: {"PARQUET:field_id": "12"}, "upper_bound": Utf8, metadata: {"PARQUET:field_id": "13"}), metadata: {"PARQUET:field_id": "9"}), metadata: {"PARQUET:field_id": "8"} }"#]], expect![[r#" content: PrimitiveArray [ @@ -492,10 +492,6 @@ async fn test_insert_into() -> Result<()> { .unwrap(); assert_eq!(rows_inserted.value(0), 2); - // Refresh context to avoid getting stale table - let catalog = Arc::new(IcebergCatalogProvider::try_new(client).await?); - ctx.register_catalog("catalog", catalog); - // Query the table to verify the inserted data let df = ctx .sql("SELECT * FROM catalog.test_insert_into.my_table") @@ -508,8 +504,8 @@ async fn test_insert_into() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "foo1", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "foo2", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }"#]], + Field { "foo1": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "foo2": Utf8, metadata: {"PARQUET:field_id": "2"} }"#]], expect![[r#" foo1: PrimitiveArray [ @@ -650,10 +646,6 @@ async fn test_insert_into_nested() -> Result<()> { .unwrap(); assert_eq!(rows_inserted.value(0), 2); - // Refresh context to avoid getting stale table - let catalog = Arc::new(IcebergCatalogProvider::try_new(client).await?); - ctx.register_catalog("catalog", catalog); - // Query the table to verify the inserted data let df = ctx .sql("SELECT * FROM catalog.test_insert_nested.nested_table ORDER BY id") @@ -666,9 +658,9 @@ async fn test_insert_into_nested() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "name", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "profile", data_type: Struct([Field { name: "address", data_type: Struct([Field { name: "street", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, Field { name: "contact", data_type: Struct([Field { name: "email", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }, Field { name: "phone", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }"#]], + Field { "id": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "name": Utf8, metadata: {"PARQUET:field_id": "2"} }, + Field { "profile": nullable Struct("address": Struct("street": non-null Utf8, metadata: {"PARQUET:field_id": "6"}, "city": non-null Utf8, metadata: {"PARQUET:field_id": "7"}, "zip": non-null Int32, metadata: {"PARQUET:field_id": "8"}), metadata: {"PARQUET:field_id": "4"}, "contact": Struct("email": Utf8, metadata: {"PARQUET:field_id": "9"}, "phone": Utf8, metadata: {"PARQUET:field_id": "10"}), metadata: {"PARQUET:field_id": "5"}), metadata: {"PARQUET:field_id": "3"} }"#]], expect![[r#" id: PrimitiveArray [ @@ -687,7 +679,7 @@ async fn test_insert_into_nested() -> Result<()> { valid, ] [ - -- child 0: "address" (Struct([Field { name: "street", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }])) + -- child 0: "address" (Struct([Field { name: "street", data_type: Utf8, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, metadata: {"PARQUET:field_id": "8"} }])) StructArray -- validity: [ @@ -714,7 +706,7 @@ async fn test_insert_into_nested() -> Result<()> { 95113, ] ] - -- child 1: "contact" (Struct([Field { name: "email", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }, Field { name: "phone", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }])) + -- child 1: "contact" (Struct([Field { name: "email", data_type: Utf8, nullable: true, metadata: {"PARQUET:field_id": "9"} }, Field { name: "phone", data_type: Utf8, nullable: true, metadata: {"PARQUET:field_id": "10"} }])) StructArray -- validity: [ @@ -765,13 +757,13 @@ async fn test_insert_into_nested() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "name", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[address][street]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[address][city]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[address][zip]", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[contact][email]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[contact][phone]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }"#]], + Field { "id": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "name": Utf8, metadata: {"PARQUET:field_id": "2"} }, + Field { "catalog.test_insert_nested.nested_table.profile[address][street]": nullable Utf8, metadata: {"PARQUET:field_id": "6"} }, + Field { "catalog.test_insert_nested.nested_table.profile[address][city]": nullable Utf8, metadata: {"PARQUET:field_id": "7"} }, + Field { "catalog.test_insert_nested.nested_table.profile[address][zip]": nullable Int32, metadata: {"PARQUET:field_id": "8"} }, + Field { "catalog.test_insert_nested.nested_table.profile[contact][email]": nullable Utf8, metadata: {"PARQUET:field_id": "9"} }, + Field { "catalog.test_insert_nested.nested_table.profile[contact][phone]": nullable Utf8, metadata: {"PARQUET:field_id": "10"} }"#]], expect![[r#" id: PrimitiveArray [ @@ -880,10 +872,6 @@ async fn test_insert_into_partitioned() -> Result<()> { .unwrap(); assert_eq!(rows_inserted.value(0), 5); - // Refresh catalog to get updated table - let catalog = Arc::new(IcebergCatalogProvider::try_new(client.clone()).await?); - ctx.register_catalog("catalog", catalog); - // Query the table to verify data let df = ctx .sql("SELECT * FROM catalog.test_partitioned_write.partitioned_table ORDER BY id") @@ -896,9 +884,9 @@ async fn test_insert_into_partitioned() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "category", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "value", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }"#]], + Field { "id": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "category": Utf8, metadata: {"PARQUET:field_id": "2"} }, + Field { "value": Utf8, metadata: {"PARQUET:field_id": "3"} }"#]], expect![[r#" id: PrimitiveArray [ @@ -935,25 +923,22 @@ async fn test_insert_into_partitioned() -> Result<()> { let file_io = table.file_io(); // List files under each expected partition path - let electronics_path = format!("{}/data/category=electronics", table_location); - let books_path = format!("{}/data/category=books", table_location); - let clothing_path = format!("{}/data/category=clothing", table_location); + let electronics_path = format!("{table_location}/data/category=electronics"); + let books_path = format!("{table_location}/data/category=books"); + let clothing_path = format!("{table_location}/data/category=clothing"); // Verify partition directories exist and contain data files assert!( file_io.exists(&electronics_path).await?, - "Expected partition directory: {}", - electronics_path + "Expected partition directory: {electronics_path}" ); assert!( file_io.exists(&books_path).await?, - "Expected partition directory: {}", - books_path + "Expected partition directory: {books_path}" ); assert!( file_io.exists(&clothing_path).await?, - "Expected partition directory: {}", - clothing_path + "Expected partition directory: {clothing_path}" ); Ok(()) diff --git a/crates/integrations/playground/DEPENDENCIES.rust.tsv b/crates/integrations/playground/DEPENDENCIES.rust.tsv index 84dd20ed3e..8f57b1d9ae 100644 --- a/crates/integrations/playground/DEPENDENCIES.rust.tsv +++ b/crates/integrations/playground/DEPENDENCIES.rust.tsv @@ -1,509 +1,492 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X -adler2@2.0.1 X X X -adler32@1.2.0 X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anstream@0.6.20 X X -anstyle@1.0.11 X X -anstyle-parse@0.2.7 X X -anstyle-query@1.1.4 X X -anstyle-wincon@3.0.10 X X -anyhow@1.0.99 X X -apache-avro@0.17.0 X -apache-avro@0.20.0 X -array-init@2.1.0 X X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -as-any@0.3.2 X X -async-compression@0.4.19 X X -async-lock@3.4.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -aws-config@1.8.6 X -aws-credential-types@1.2.6 X -aws-runtime@1.5.10 X -aws-sdk-sso@1.83.0 X -aws-sdk-ssooidc@1.84.0 X -aws-sdk-sts@1.85.0 X -aws-sigv4@1.3.4 X -aws-smithy-async@1.2.5 X -aws-smithy-http@0.62.3 X -aws-smithy-http-client@1.1.1 X -aws-smithy-json@0.61.5 X -aws-smithy-observability@0.1.3 X -aws-smithy-query@0.60.7 X -aws-smithy-runtime@1.9.1 X -aws-smithy-runtime-api@1.9.0 X -aws-smithy-types@1.3.2 X -aws-smithy-xml@0.60.10 X -aws-types@1.3.8 X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.22.1 X X -base64-simd@0.8.0 X -bigdecimal@0.4.8 X X -bimap@0.6.3 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bytes-utils@0.1.4 X X -bzip2@0.4.4 X X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -cfg_aliases@0.2.1 X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -clap@4.5.47 X X -clap_builder@4.5.47 X X -clap_derive@4.5.47 X X -clap_lex@0.7.5 X X -clipboard-win@5.4.1 X -colorchoice@1.0.4 X X -comfy-table@7.2.0 X -concurrent-queue@2.5.0 X X -const-oid@0.9.6 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -constant_time_eq@0.3.1 X X X -core-foundation@0.10.1 X X -core-foundation-sys@0.8.7 X X -core2@0.4.0 X X -cpufeatures@0.2.17 X X -crc32c@0.6.8 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -darling@0.20.11 X -darling@0.21.3 X -darling_core@0.20.11 X -darling_core@0.21.3 X -darling_macro@0.20.11 X -darling_macro@0.21.3 X -dary_heap@0.3.7 X X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-cli@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-avro@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-session@48.0.1 X -datafusion-sql@48.0.1 X -deranged@0.5.3 X X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -dirs@6.0.0 X X -dirs-sys@0.5.0 X X -displaydoc@0.2.5 X X -dissimilar@1.0.10 X -either@1.15.0 X X -endian-type@0.1.2 X -env_filter@0.1.3 X X -env_logger@0.11.8 X X -equivalent@1.0.2 X X -errno@0.3.13 X X -error-code@3.3.2 X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastrand@2.3.0 X X -fd-lock@4.0.4 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -fs-err@3.1.1 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generator@0.8.7 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -gloo-timers@0.3.0 X X -h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hermit-abi@0.5.2 X X -hex@0.4.3 X X -hmac@0.12.1 X X -home@0.5.11 X X -http@0.2.12 X X -http@1.3.1 X X -http-body@0.4.6 X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -httpdate@1.0.3 X X -humantime@2.2.0 X X -hyper@1.7.0 X -hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg-datafusion@0.7.0 X -iceberg-playground@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -ipnet@2.11.0 X X -iri-string@0.7.8 X X -is_terminal_polyfill@1.70.1 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jiff@0.2.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libflate@2.1.0 X -libflate_lz77@2.1.0 X -libm@0.2.15 X -libmimalloc-sys@0.1.44 X -libredox@0.1.9 X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -matchers@0.2.0 X -md-5@0.10.6 X X -memchr@2.7.5 X X -mimalloc@0.1.48 X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X -murmur3@0.5.2 X X -nibble_vec@0.1.0 X -nix@0.30.1 X -nu-ansi-term@0.50.1 X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-conv@0.1.0 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -once_cell_polyfill@1.70.1 X X -opendal@0.54.0 X -openssl-probe@0.1.6 X X -option-ext@0.2.0 X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -outref@0.5.2 X -parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -portable-atomic-util@0.2.4 X X -potential_utf@0.1.3 X -powerfmt@0.2.0 X X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -psm@0.1.26 X X -quad-rand@0.2.3 X -quick-xml@0.37.5 X -quick-xml@0.38.3 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -radix_trie@0.2.1 X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -redox_users@0.5.2 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -reqsign@0.16.5 X -reqwest@0.12.23 X X -ring@0.17.14 X X -rle-decode-fast@1.0.3 X X -roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustls@0.23.31 X X X -rustls-native-certs@0.8.1 X X X -rustls-pemfile@2.2.0 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X -rustversion@1.0.22 X X -rustyline@16.0.0 X -ryu@1.0.20 X X -same-file@1.0.6 X X -schannel@0.1.27 X -scoped-tls@1.0.1 X X -scopeguard@1.2.0 X X -security-framework@3.4.0 X X -security-framework-sys@2.15.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X -serde_repr@0.1.20 X X -serde_spanned@0.6.9 X X -serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X -sha1@0.10.6 X X -sha2@0.10.9 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X -simdutf8@0.1.5 X X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.0 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -strsim@0.11.1 X -strum@0.26.3 X -strum@0.27.2 X -strum_macros@0.26.4 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@2.0.106 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -tempfile@3.22.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thread_local@1.1.9 X X -threadpool@1.8.1 X X -thrift@0.17.0 X -time@0.3.43 X X -time-core@0.1.6 X X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X -toml@0.8.23 X X -toml_datetime@0.6.11 X X -toml_edit@0.22.27 X X -toml_write@0.1.2 X X -tower@0.5.2 X -tower-http@0.6.6 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -try-lock@0.2.5 X -twox-hash@2.1.2 X -typed-builder@0.19.1 X X -typed-builder@0.20.1 X X -typed-builder-macro@0.19.1 X X -typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X -unicode-segmentation@1.12.0 X X -unicode-width@0.2.1 X X -untrusted@0.9.0 X -url@2.5.7 X X -urlencoding@2.1.3 X -utf8_iter@1.0.4 X X -utf8parse@0.2.2 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -vsimd@0.8.0 X -walkdir@2.5.0 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -webpki-roots@1.0.2 X -winapi-util@0.1.11 X X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.60.2 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-targets@0.53.3 X X -windows-threading@0.1.0 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_gnullvm@0.53.0 X X -windows_aarch64_msvc@0.52.6 X X -windows_aarch64_msvc@0.53.0 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnu@0.53.0 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_gnullvm@0.53.0 X X -windows_i686_msvc@0.52.6 X X -windows_i686_msvc@0.53.0 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnu@0.53.0 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_gnullvm@0.53.0 X X -windows_x86_64_msvc@0.52.6 X X -windows_x86_64_msvc@0.53.0 X X -winnow@0.7.13 X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xmlparser@0.13.6 X X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anstream@0.6.21 X X +anstyle@1.0.13 X X +anstyle-parse@0.2.7 X X +anstyle-query@1.1.5 X X +anstyle-wincon@3.0.11 X X +anyhow@1.0.100 X X +apache-avro@0.20.0 X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.1.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-csv@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-json@57.1.0 X +arrow-ord@57.1.0 X +arrow-row@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X +as-any@0.3.2 X X +async-compression@0.4.19 X X +async-lock@3.4.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +aws-config@1.8.11 X +aws-credential-types@1.2.10 X +aws-runtime@1.5.16 X +aws-sdk-sso@1.90.0 X +aws-sdk-ssooidc@1.92.0 X +aws-sdk-sts@1.94.0 X +aws-sigv4@1.3.6 X +aws-smithy-async@1.2.7 X +aws-smithy-http@0.62.6 X +aws-smithy-http-client@1.1.5 X +aws-smithy-json@0.61.8 X +aws-smithy-observability@0.1.5 X +aws-smithy-query@0.60.9 X +aws-smithy-runtime@1.9.5 X +aws-smithy-runtime-api@1.9.3 X +aws-smithy-types@1.3.5 X +aws-smithy-xml@0.60.13 X +aws-types@1.3.10 X +backon@1.6.0 X +base64@0.22.1 X X +base64-simd@0.8.0 X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.11.0 X +bytes-utils@0.1.4 X X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X +cfg_aliases@0.2.1 X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +clap@4.5.53 X X +clap_builder@4.5.53 X X +clap_derive@4.5.49 X X +clap_lex@0.7.6 X X +clipboard-win@5.4.1 X +colorchoice@1.0.4 X X +comfy-table@7.2.1 X +concurrent-queue@2.5.0 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +constant_time_eq@0.3.1 X X X +core-foundation@0.10.1 X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-cli@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-avro@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-sql@51.0.0 X +deranged@0.5.5 X X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +dirs@6.0.0 X X +dirs-sys@0.5.0 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +either@1.15.0 X X +endian-type@0.1.2 X +env_filter@0.1.4 X X +env_logger@0.11.8 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +error-code@3.3.2 X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastrand@2.3.0 X X +fd-lock@4.0.4 X X +find-msvc-tools@0.1.5 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +form_urlencoded@1.2.2 X X +fs-err@3.2.0 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generic-array@0.14.7 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +h2@0.4.12 X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@0.2.12 X X +http@1.4.0 X X +http-body@0.4.6 X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +httpdate@1.0.3 X X +humantime@2.3.0 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg-datafusion@0.8.0 X +iceberg-playground@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.1 X X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.9 X X +is_terminal_polyfill@1.70.2 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.83 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.178 X X +libm@0.2.15 X +libmimalloc-sys@0.1.44 X +libredox@0.1.10 X +libz-rs-sys@0.5.3 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +mimalloc@0.1.48 X +miniz_oxide@0.8.9 X X X +mio@1.1.1 X +moka@0.12.11 X X +murmur3@0.5.2 X X +nibble_vec@0.1.0 X +nix@0.30.1 X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-conv@0.1.0 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +once_cell_polyfill@1.70.2 X X +opendal@0.55.0 X +openssl-probe@0.1.6 X X +option-ext@0.2.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +outref@0.5.2 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X +powerfmt@0.2.0 X X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +psm@0.1.28 X X +quad-rand@0.2.3 X +quick-xml@0.38.4 X +quote@1.0.42 X X +r-efi@5.3.0 X X X +radix_trie@0.2.1 X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +redox_users@0.5.2 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +relative-path@1.9.3 X X +reqsign@0.16.5 X +reqwest@0.12.25 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.35 X X X +rustls-native-certs@0.8.2 X X X +rustls-pemfile@2.2.0 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X +rustversion@1.0.22 X X +rustyline@17.0.2 X +ryu@1.0.20 X X +same-file@1.0.6 X X +schannel@0.1.28 X +scopeguard@1.2.0 X X +security-framework@3.5.1 X X +security-framework-sys@2.15.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_spanned@0.6.9 X X +serde_urlencoded@0.7.1 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X +simdutf8@0.1.5 X X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@2.0.111 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +time@0.3.44 X X +time-core@0.1.6 X X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X +toml@0.8.23 X X +toml_datetime@0.6.11 X X +toml_datetime@0.7.3 X X +toml_edit@0.22.27 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X +toml_write@0.1.2 X X +tower@0.5.2 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.22 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X +unicode-segmentation@1.12.0 X X +unicode-width@0.2.2 X X +untrusted@0.9.0 X +url@2.5.7 X X +urlencoding@2.1.3 X +utf8_iter@1.0.4 X X +utf8parse@0.2.2 X X +uuid@1.19.0 X X +version_check@0.9.5 X X +vsimd@0.8.0 X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X +wasm-streams@0.4.2 X X +web-sys@0.3.83 X X +web-time@1.1.0 X X +webpki-roots@1.0.4 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xmlparser@0.13.6 X X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/integrations/playground/src/main.rs b/crates/integrations/playground/src/main.rs index c522209957..94068bb558 100644 --- a/crates/integrations/playground/src/main.rs +++ b/crates/integrations/playground/src/main.rs @@ -24,6 +24,7 @@ use clap::Parser; use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_cli::exec; +use datafusion_cli::object_storage::instrumented::InstrumentedObjectStoreRegistry; use datafusion_cli::print_format::PrintFormat; use datafusion_cli::print_options::{MaxRows, PrintOptions}; use iceberg_playground::{ICEBERG_PLAYGROUND_VERSION, IcebergCatalogList}; @@ -94,6 +95,7 @@ async fn main_inner() -> anyhow::Result<()> { quiet: args.quiet, maxrows: args.maxrows, color: args.color, + instrumented_registry: Arc::new(InstrumentedObjectStoreRegistry::new()), }; let rc = match args.rc { diff --git a/crates/sqllogictest/DEPENDENCIES.rust.tsv b/crates/sqllogictest/DEPENDENCIES.rust.tsv index e8af062ea3..94bd88e2e6 100644 --- a/crates/sqllogictest/DEPENDENCIES.rust.tsv +++ b/crates/sqllogictest/DEPENDENCIES.rust.tsv @@ -1,398 +1,482 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X -adler2@2.0.1 X X X -adler32@1.2.0 X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anstream@0.6.20 X X -anstyle@1.0.11 X X -anstyle-parse@0.2.7 X X -anstyle-query@1.1.4 X X -anstyle-wincon@3.0.10 X X -anyhow@1.0.99 X X -apache-avro@0.17.0 X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -async-compression@0.4.19 X X -async-recursion@1.1.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -autocfg@1.5.0 X X -backtrace@0.3.75 X X -base64@0.21.7 X X -base64@0.22.1 X X -bigdecimal@0.4.8 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bzip2@0.4.4 X X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -clap@4.5.47 X X -clap_builder@4.5.47 X X -clap_derive@4.5.47 X X -clap_lex@0.7.5 X X -colorchoice@1.0.4 X X -comfy-table@7.2.0 X -console@0.15.11 X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -constant_time_eq@0.3.1 X X X -core-foundation-sys@0.8.7 X X -core2@0.4.0 X X -cpufeatures@0.2.17 X X -crc32fast@1.5.0 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -dary_heap@0.3.7 X X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-avro@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-session@48.0.1 X -datafusion-spark@48.0.1 X -datafusion-sql@48.0.1 X -datafusion-sqllogictest@48.0.1 X -datafusion-substrait@48.0.1 X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dyn-clone@1.0.20 X X -educe@0.6.0 X -either@1.15.0 X X -encode_unicode@1.0.0 X X -enum-ordinalize@4.3.0 X -enum-ordinalize-derive@4.3.1 X -equivalent@1.0.2 X X -errno@0.3.13 X X -escape8259@0.5.3 X -fallible-iterator@0.2.0 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -fs-err@3.1.1 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hex@0.4.3 X X -hmac@0.12.1 X X -http@1.3.1 X X -humantime@2.2.0 X X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg-sqllogictest@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -indicatif@0.17.11 X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -is_terminal_polyfill@1.70.1 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libflate@2.1.0 X -libflate_lz77@2.1.0 X -libm@0.2.15 X -libtest-mimic@0.8.1 X X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -md-5@0.10.6 X X -memchr@2.7.5 X X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -multimap@0.10.1 X X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -number_prefix@0.4.0 X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -once_cell_polyfill@1.70.1 X X -ordered-float@2.10.1 X -owo-colors@4.2.2 X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -pbjson@0.7.0 X -pbjson-build@0.7.0 X -pbjson-types@0.7.0 X -percent-encoding@2.3.2 X X -petgraph@0.7.1 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -postgres-protocol@0.6.8 X X -postgres-types@0.2.9 X X -potential_utf@0.1.3 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -prost@0.13.5 X -prost-build@0.13.5 X -prost-derive@0.13.5 X -prost-types@0.13.5 X -psm@0.1.26 X X -quad-rand@0.2.3 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -regress@0.10.4 X X -rle-decode-fast@1.0.3 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustversion@1.0.22 X X -ryu@1.0.20 X X -same-file@1.0.6 X X -schemars@0.8.22 X -schemars_derive@0.8.22 X -scopeguard@1.2.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_derive_internals@0.29.1 X X -serde_json@1.0.143 X X -serde_spanned@0.6.9 X X -serde_tokenstream@0.2.2 X -serde_yaml@0.9.34+deprecated X X -sha2@0.10.9 X X -shlex@1.3.0 X X -simdutf8@0.1.5 X X -similar@2.7.0 X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -sqllogictest@0.28.4 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -stringprep@0.1.5 X X -strsim@0.11.1 X -strum@0.26.3 X -strum_macros@0.26.4 X -subst@0.3.8 X X -substrait@0.56.0 X -subtle@2.6.1 X -syn@2.0.106 X X -synstructure@0.13.2 X -tempfile@3.22.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tinyvec@1.10.0 X X X -tinyvec_macros@0.1.1 X X X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-util@0.7.16 X -toml@0.8.23 X X -toml_datetime@0.6.11 X X -toml_edit@0.22.27 X X -toml_write@0.1.2 X X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -twox-hash@2.1.2 X -typed-builder@0.19.1 X X -typed-builder-macro@0.19.1 X X -typenum@1.18.0 X X -typify@0.4.3 X -typify-impl@0.4.3 X -typify-macro@0.4.3 X -unicode-bidi@0.3.18 X X -unicode-ident@1.0.18 X X X -unicode-normalization@0.1.24 X X -unicode-properties@0.1.3 X X -unicode-segmentation@1.12.0 X X -unicode-width@0.1.14 X X -unicode-width@0.2.1 X X -unsafe-libyaml@0.2.11 X -url@2.5.7 X X -utf8_iter@1.0.4 X X -utf8parse@0.2.2 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -walkdir@2.5.0 X X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -winapi-util@0.1.11 X X -windows-core@0.61.2 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.60.2 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-targets@0.53.3 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_gnullvm@0.53.0 X X -windows_aarch64_msvc@0.52.6 X X -windows_aarch64_msvc@0.53.0 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnu@0.53.0 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_gnullvm@0.53.0 X X -windows_i686_msvc@0.52.6 X X -windows_i686_msvc@0.53.0 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnu@0.53.0 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_gnullvm@0.53.0 X X -windows_x86_64_msvc@0.52.6 X X -windows_x86_64_msvc@0.53.0 X X -winnow@0.7.13 X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anstream@0.6.21 X X +anstyle@1.0.13 X X +anstyle-parse@0.2.7 X X +anstyle-query@1.1.5 X X +anstyle-wincon@3.0.11 X X +anyhow@1.0.100 X X +apache-avro@0.20.0 X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.1.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-csv@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-json@57.1.0 X +arrow-ord@57.1.0 X +arrow-row@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X +as-any@0.3.2 X X +async-compression@0.4.19 X X +async-lock@3.4.1 X X +async-recursion@1.1.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.11.0 X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +clap@4.5.53 X X +clap_builder@4.5.53 X X +clap_derive@4.5.49 X X +clap_lex@0.7.6 X X +colorchoice@1.0.4 X X +comfy-table@7.2.1 X +concurrent-queue@2.5.0 X X +console@0.16.1 X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +constant_time_eq@0.3.1 X X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-avro@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-spark@51.0.0 X +datafusion-sql@51.0.0 X +datafusion-sqllogictest@51.0.0 X +datafusion-substrait@51.0.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +dyn-clone@1.0.20 X X +educe@0.6.0 X +either@1.15.0 X X +encode_unicode@1.0.0 X X +enum-ordinalize@4.3.2 X +enum-ordinalize-derive@4.3.2 X +env_filter@0.1.4 X X +env_logger@0.11.8 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +escape8259@0.5.3 X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fallible-iterator@0.2.0 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.5 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +foldhash@0.2.0 X +form_urlencoded@1.2.2 X X +fs-err@3.2.0 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generic-array@0.14.7 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@1.4.0 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +humantime@2.3.0 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-datafusion@0.8.0 X +iceberg-sqllogictest@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.1 X X +indicatif@0.18.3 X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.9 X X +is_terminal_polyfill@1.70.2 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.83 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.178 X X +libm@0.2.15 X +libtest-mimic@0.8.1 X X +libz-rs-sys@0.5.3 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +miniz_oxide@0.8.9 X X X +mio@1.1.1 X +moka@0.12.11 X X +multimap@0.10.1 X X +murmur3@0.5.2 X X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +once_cell_polyfill@1.70.2 X X +opendal@0.55.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +owo-colors@4.2.3 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X +paste@1.0.15 X X +pbjson@0.8.0 X +pbjson-build@0.8.0 X +pbjson-types@0.8.0 X +percent-encoding@2.3.2 X X +petgraph@0.7.1 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +postgres-protocol@0.6.9 X X +postgres-types@0.2.11 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +prost@0.14.1 X +prost-build@0.14.1 X +prost-derive@0.14.1 X +prost-types@0.14.1 X +psm@0.1.28 X X +quad-rand@0.2.3 X +quick-xml@0.38.4 X +quote@1.0.42 X X +r-efi@5.3.0 X X X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +regress@0.10.5 X X +relative-path@1.9.3 X X +reqsign@0.16.5 X +reqwest@0.12.25 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X +rustversion@1.0.22 X X +ryu@1.0.20 X X +same-file@1.0.6 X X +schemars@0.8.22 X +schemars_derive@0.8.22 X +scopeguard@1.2.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_derive_internals@0.29.1 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_spanned@0.6.9 X X +serde_tokenstream@0.2.2 X +serde_urlencoded@0.7.1 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X +serde_yaml@0.9.34+deprecated X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +simd-adler32@0.3.8 X +simdutf8@0.1.5 X X +similar@2.7.0 X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqllogictest@0.28.4 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +stringprep@0.1.5 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subst@0.3.8 X X +substrait@0.62.0 X +subtle@2.6.1 X +syn@2.0.111 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tinyvec@1.10.0 X X X +tinyvec_macros@0.1.1 X X X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X +toml@0.8.23 X X +toml_datetime@0.6.11 X X +toml_datetime@0.7.3 X X +toml_edit@0.22.27 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X +toml_write@0.1.2 X X +tower@0.5.2 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.22 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +typify@0.5.0 X +typify-impl@0.5.0 X +typify-macro@0.5.0 X +unicode-bidi@0.3.18 X X +unicode-ident@1.0.22 X X X +unicode-normalization@0.1.25 X X +unicode-properties@0.1.4 X X +unicode-segmentation@1.12.0 X X +unicode-width@0.1.14 X X +unicode-width@0.2.2 X X +unit-prefix@0.5.2 X +unsafe-libyaml@0.2.11 X +untrusted@0.9.0 X +url@2.5.7 X X +utf8_iter@1.0.4 X X +utf8parse@0.2.2 X X +uuid@1.19.0 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X +wasm-streams@0.4.2 X X +web-sys@0.3.83 X X +web-time@1.1.0 X X +webpki-roots@1.0.4 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/sqllogictest/src/engine/datafusion.rs b/crates/sqllogictest/src/engine/datafusion.rs index b3e37d9206..e9f93287d8 100644 --- a/crates/sqllogictest/src/engine/datafusion.rs +++ b/crates/sqllogictest/src/engine/datafusion.rs @@ -22,13 +22,13 @@ use std::sync::Arc; use datafusion::catalog::CatalogProvider; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_sqllogictest::DataFusion; -use iceberg::CatalogBuilder; use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder}; +use iceberg::spec::{NestedField, PrimitiveType, Schema, Transform, Type, UnboundPartitionSpec}; +use iceberg::{Catalog, CatalogBuilder, NamespaceIdent, TableCreation}; use iceberg_datafusion::IcebergCatalogProvider; use indicatif::ProgressBar; -use toml::Table as TomlTable; -use crate::engine::{EngineRunner, run_slt_with_runner}; +use crate::engine::{DatafusionCatalogConfig, EngineRunner, run_slt_with_runner}; use crate::error::Result; pub struct DataFusionEngine { @@ -58,12 +58,15 @@ impl EngineRunner for DataFusionEngine { } impl DataFusionEngine { - pub async fn new(config: TomlTable) -> Result { + pub async fn new(catalog_config: Option) -> Result { let session_config = SessionConfig::new() .with_target_partitions(4) .with_information_schema(true); let ctx = SessionContext::new_with_config(session_config); - ctx.register_catalog("default", Self::create_catalog(&config).await?); + ctx.register_catalog( + "default", + Self::create_catalog(catalog_config.as_ref()).await?, + ); Ok(Self { test_data_path: PathBuf::from("testdata"), @@ -71,9 +74,11 @@ impl DataFusionEngine { }) } - async fn create_catalog(_: &TomlTable) -> anyhow::Result> { - // TODO: support dynamic catalog configuration - // See: https://github.com/apache/iceberg-rust/issues/1780 + async fn create_catalog( + _catalog_config: Option<&DatafusionCatalogConfig>, + ) -> anyhow::Result> { + // TODO: Use catalog_config to load different catalog types via iceberg-catalog-loader + // See: https://github.com/apache/iceberg-rust/issues/1780 let catalog = MemoryCatalogBuilder::default() .load( "memory", @@ -84,8 +89,76 @@ impl DataFusionEngine { ) .await?; + // Create a test namespace for INSERT INTO tests + let namespace = NamespaceIdent::new("default".to_string()); + catalog.create_namespace(&namespace, HashMap::new()).await?; + + // Create test tables + Self::create_unpartitioned_table(&catalog, &namespace).await?; + Self::create_partitioned_table(&catalog, &namespace).await?; + Ok(Arc::new( IcebergCatalogProvider::try_new(Arc::new(catalog)).await?, )) } + + /// Create an unpartitioned test table with id and name columns + /// TODO: this can be removed when we support CREATE TABLE + async fn create_unpartitioned_table( + catalog: &impl Catalog, + namespace: &NamespaceIdent, + ) -> anyhow::Result<()> { + let schema = Schema::builder() + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::optional(2, "name", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build()?; + + catalog + .create_table( + namespace, + TableCreation::builder() + .name("test_unpartitioned_table".to_string()) + .schema(schema) + .build(), + ) + .await?; + + Ok(()) + } + + /// Create a partitioned test table with id, category, and value columns + /// Partitioned by category using identity transform + /// TODO: this can be removed when we support CREATE TABLE + async fn create_partitioned_table( + catalog: &impl Catalog, + namespace: &NamespaceIdent, + ) -> anyhow::Result<()> { + let schema = Schema::builder() + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::required(2, "category", Type::Primitive(PrimitiveType::String)).into(), + NestedField::optional(3, "value", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build()?; + + let partition_spec = UnboundPartitionSpec::builder() + .with_spec_id(0) + .add_partition_field(2, "category", Transform::Identity)? + .build(); + + catalog + .create_table( + namespace, + TableCreation::builder() + .name("test_partitioned_table".to_string()) + .schema(schema) + .partition_spec(partition_spec) + .build(), + ) + .await?; + + Ok(()) + } } diff --git a/crates/sqllogictest/src/engine/mod.rs b/crates/sqllogictest/src/engine/mod.rs index 724359fbe5..a276671401 100644 --- a/crates/sqllogictest/src/engine/mod.rs +++ b/crates/sqllogictest/src/engine/mod.rs @@ -17,29 +17,45 @@ mod datafusion; +use std::collections::HashMap; use std::path::Path; use anyhow::anyhow; +use serde::Deserialize; use sqllogictest::{AsyncDB, MakeConnection, Runner, parse_file}; -use toml::Table as TomlTable; use crate::engine::datafusion::DataFusionEngine; use crate::error::{Error, Result}; -const TYPE_DATAFUSION: &str = "datafusion"; +/// Configuration for the catalog used by the DataFusion engine +#[derive(Debug, Clone, Deserialize)] +pub struct DatafusionCatalogConfig { + /// Catalog type: "memory", "rest", "glue", "hms", "s3tables", "sql" + #[serde(rename = "type")] + pub catalog_type: String, + /// Catalog properties passed to the catalog loader + #[serde(default)] + pub props: HashMap, +} + +/// Engine configuration as a tagged enum +#[derive(Debug, Clone, Deserialize)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum EngineConfig { + Datafusion { + #[serde(default)] + catalog: Option, + }, +} #[async_trait::async_trait] pub trait EngineRunner: Send { async fn run_slt_file(&mut self, path: &Path) -> Result<()>; } -pub async fn load_engine_runner( - engine_type: &str, - cfg: TomlTable, -) -> Result> { - match engine_type { - TYPE_DATAFUSION => Ok(Box::new(DataFusionEngine::new(cfg).await?)), - _ => Err(anyhow::anyhow!("Unsupported engine type: {engine_type}").into()), +pub async fn load_engine_runner(config: EngineConfig) -> Result> { + match config { + EngineConfig::Datafusion { catalog } => Ok(Box::new(DataFusionEngine::new(catalog).await?)), } } @@ -65,29 +81,63 @@ where #[cfg(test)] mod tests { - use crate::engine::{TYPE_DATAFUSION, load_engine_runner}; + use crate::engine::{DatafusionCatalogConfig, EngineConfig, load_engine_runner}; - #[tokio::test] - async fn test_engine_invalid_type() { + #[test] + fn test_deserialize_engine_config() { + let input = r#"type = "datafusion""#; + + let config: EngineConfig = toml::from_str(input).unwrap(); + assert!(matches!(config, EngineConfig::Datafusion { catalog: None })); + } + + #[test] + fn test_deserialize_engine_config_with_catalog() { + let input = r#" + type = "datafusion" + + [catalog] + type = "rest" + + [catalog.props] + uri = "http://localhost:8181" + "#; + + let config: EngineConfig = toml::from_str(input).unwrap(); + match config { + EngineConfig::Datafusion { catalog: Some(cat) } => { + assert_eq!(cat.catalog_type, "rest"); + assert_eq!( + cat.props.get("uri"), + Some(&"http://localhost:8181".to_string()) + ); + } + _ => panic!("Expected Datafusion with catalog"), + } + } + + #[test] + fn test_deserialize_catalog_config() { let input = r#" - [engines] - random = { type = "random_engine", url = "http://localhost:8181" } + type = "memory" + + [props] + warehouse = "file:///tmp/warehouse" "#; - let tbl = toml::from_str(input).unwrap(); - let result = load_engine_runner("random_engine", tbl).await; - assert!(result.is_err()); + let config: DatafusionCatalogConfig = toml::from_str(input).unwrap(); + assert_eq!(config.catalog_type, "memory"); + assert_eq!( + config.props.get("warehouse"), + Some(&"file:///tmp/warehouse".to_string()) + ); } #[tokio::test] async fn test_load_datafusion() { - let input = r#" - [engines] - df = { type = "datafusion" } - "#; - let tbl = toml::from_str(input).unwrap(); - let result = load_engine_runner(TYPE_DATAFUSION, tbl).await; + let config = EngineConfig::Datafusion { catalog: None }; + let result = load_engine_runner(config).await; assert!(result.is_ok()); } } diff --git a/crates/sqllogictest/src/schedule.rs b/crates/sqllogictest/src/schedule.rs index 7c13ad4d12..25728a2968 100644 --- a/crates/sqllogictest/src/schedule.rs +++ b/crates/sqllogictest/src/schedule.rs @@ -21,10 +21,18 @@ use std::path::{Path, PathBuf}; use anyhow::{Context, anyhow}; use serde::{Deserialize, Serialize}; -use toml::{Table as TomlTable, Value}; use tracing::info; -use crate::engine::{EngineRunner, load_engine_runner}; +use crate::engine::{EngineConfig, EngineRunner, load_engine_runner}; + +/// Raw configuration parsed from the schedule TOML file +#[derive(Debug, Clone, Deserialize)] +pub struct ScheduleConfig { + /// Engine name to engine configuration + pub engines: HashMap, + /// List of test steps to run + pub steps: Vec, +} pub struct Schedule { /// Engine names to engine instances @@ -59,15 +67,27 @@ impl Schedule { pub async fn from_file>(path: P) -> anyhow::Result { let path_str = path.as_ref().to_string_lossy().to_string(); let content = read_to_string(path)?; - let toml_value = content.parse::()?; - let toml_table = toml_value - .as_table() - .ok_or_else(|| anyhow!("Schedule file must be a TOML table"))?; - let engines = Schedule::parse_engines(toml_table).await?; - let steps = Schedule::parse_steps(toml_table)?; + let config: ScheduleConfig = toml::from_str(&content) + .with_context(|| format!("Failed to parse schedule file: {path_str}"))?; - Ok(Self::new(engines, steps, path_str)) + let engines = Self::instantiate_engines(config.engines).await?; + + Ok(Self::new(engines, config.steps, path_str)) + } + + /// Instantiate engine runners from their configurations + async fn instantiate_engines( + configs: HashMap, + ) -> anyhow::Result>> { + let mut engines = HashMap::new(); + + for (name, config) in configs { + let engine = load_engine_runner(config).await?; + engines.insert(name, engine); + } + + Ok(engines) } pub async fn run(mut self) -> anyhow::Result<()> { @@ -105,103 +125,131 @@ impl Schedule { } Ok(()) } +} - async fn parse_engines( - table: &TomlTable, - ) -> anyhow::Result>> { - let engines_tbl = table - .get("engines") - .with_context(|| "Schedule file must have an 'engines' table")? - .as_table() - .ok_or_else(|| anyhow!("'engines' must be a table"))?; - - let mut engines = HashMap::new(); - - for (name, engine_val) in engines_tbl { - let cfg_tbl = engine_val - .as_table() - .ok_or_else(|| anyhow!("Config of engine '{name}' is not a table"))? - .clone(); - - let engine_type = cfg_tbl - .get("type") - .ok_or_else(|| anyhow::anyhow!("Engine {name} doesn't have a 'type' field"))? - .as_str() - .ok_or_else(|| anyhow::anyhow!("Engine {name} type must be a string"))?; - - let engine = load_engine_runner(engine_type, cfg_tbl.clone()).await?; - - if engines.insert(name.clone(), engine).is_some() { - return Err(anyhow!("Duplicate engine '{name}'")); - } - } +#[cfg(test)] +mod tests { + use crate::engine::EngineConfig; + use crate::schedule::ScheduleConfig; - Ok(engines) - } + #[test] + fn test_deserialize_schedule_config() { + let input = r#" + [engines] + df = { type = "datafusion" } - fn parse_steps(table: &TomlTable) -> anyhow::Result> { - let steps_val = table - .get("steps") - .with_context(|| "Schedule file must have a 'steps' array")?; + [[steps]] + engine = "df" + slt = "test.slt" + "#; - let steps: Vec = steps_val - .clone() - .try_into() - .with_context(|| "Failed to deserialize steps")?; + let config: ScheduleConfig = toml::from_str(input).unwrap(); - Ok(steps) + assert_eq!(config.engines.len(), 1); + assert!(config.engines.contains_key("df")); + assert!(matches!(config.engines["df"], EngineConfig::Datafusion { + catalog: None + })); + assert_eq!(config.steps.len(), 1); + assert_eq!(config.steps[0].engine, "df"); + assert_eq!(config.steps[0].slt, "test.slt"); } -} - -#[cfg(test)] -mod tests { - use toml::Table as TomlTable; - - use crate::schedule::Schedule; #[test] - fn test_parse_steps() { + fn test_deserialize_multiple_steps() { let input = r#" + [engines] + datafusion = { type = "datafusion" } + [[steps]] engine = "datafusion" slt = "test.slt" [[steps]] - engine = "spark" + engine = "datafusion" slt = "test2.slt" "#; - let tbl: TomlTable = toml::from_str(input).unwrap(); - let steps = Schedule::parse_steps(&tbl).unwrap(); + let config: ScheduleConfig = toml::from_str(input).unwrap(); - assert_eq!(steps.len(), 2); - assert_eq!(steps[0].engine, "datafusion"); - assert_eq!(steps[0].slt, "test.slt"); - assert_eq!(steps[1].engine, "spark"); - assert_eq!(steps[1].slt, "test2.slt"); + assert_eq!(config.steps.len(), 2); + assert_eq!(config.steps[0].engine, "datafusion"); + assert_eq!(config.steps[0].slt, "test.slt"); + assert_eq!(config.steps[1].engine, "datafusion"); + assert_eq!(config.steps[1].slt, "test2.slt"); } #[test] - fn test_parse_steps_empty() { + fn test_deserialize_with_catalog_config() { let input = r#" + [engines.df] + type = "datafusion" + + [engines.df.catalog] + type = "rest" + + [engines.df.catalog.props] + uri = "http://localhost:8181" + [[steps]] + engine = "df" + slt = "test.slt" "#; - let tbl: TomlTable = toml::from_str(input).unwrap(); - let steps = Schedule::parse_steps(&tbl); + let config: ScheduleConfig = toml::from_str(input).unwrap(); - assert!(steps.is_err()); + match &config.engines["df"] { + EngineConfig::Datafusion { catalog: Some(cat) } => { + assert_eq!(cat.catalog_type, "rest"); + assert_eq!( + cat.props.get("uri"), + Some(&"http://localhost:8181".to_string()) + ); + } + _ => panic!("Expected Datafusion with catalog config"), + } } - #[tokio::test] - async fn test_parse_engines_invalid_table() { - let toml_content = r#" - engines = "not_a_table" + #[test] + fn test_deserialize_missing_engine_type() { + let input = r#" + [engines] + df = { } + + [[steps]] + engine = "df" + slt = "test.slt" "#; - let table: TomlTable = toml::from_str(toml_content).unwrap(); - let result = Schedule::parse_engines(&table).await; + let result: Result = toml::from_str(input); + assert!(result.is_err()); + } + + #[test] + fn test_deserialize_invalid_engine_type() { + let input = r#" + [engines] + df = { type = "unknown_engine" } + + [[steps]] + engine = "df" + slt = "test.slt" + "#; + + let result: Result = toml::from_str(input); + assert!(result.is_err()); + } + + #[test] + fn test_deserialize_missing_step_fields() { + let input = r#" + [engines] + df = { type = "datafusion" } + + [[steps]] + "#; + let result: Result = toml::from_str(input); assert!(result.is_err()); } } diff --git a/crates/sqllogictest/testdata/schedules/df_test.toml b/crates/sqllogictest/testdata/schedules/df_test.toml index 0733744951..df5e638d5a 100644 --- a/crates/sqllogictest/testdata/schedules/df_test.toml +++ b/crates/sqllogictest/testdata/schedules/df_test.toml @@ -20,4 +20,8 @@ df = { type = "datafusion" } [[steps]] engine = "df" -slt = "df_test/show_tables.slt" \ No newline at end of file +slt = "df_test/show_tables.slt" + +[[steps]] +engine = "df" +slt = "df_test/insert_into.slt" diff --git a/crates/sqllogictest/testdata/slts/df_test/insert_into.slt b/crates/sqllogictest/testdata/slts/df_test/insert_into.slt new file mode 100644 index 0000000000..2ba33afcd1 --- /dev/null +++ b/crates/sqllogictest/testdata/slts/df_test/insert_into.slt @@ -0,0 +1,119 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Verify the table is initially empty +query IT rowsort +SELECT * FROM default.default.test_unpartitioned_table +---- + +# Insert a single row and verify the count +query I +INSERT INTO default.default.test_unpartitioned_table VALUES (1, 'Alice') +---- +1 + +# Verify the inserted row +query IT rowsort +SELECT * FROM default.default.test_unpartitioned_table +---- +1 Alice + +# Insert multiple rows and verify the count +query I +INSERT INTO default.default.test_unpartitioned_table VALUES (2, 'Bob'), (3, 'Charlie') +---- +2 + +# Verify all rows +query IT rowsort +SELECT * FROM default.default.test_unpartitioned_table +---- +1 Alice +2 Bob +3 Charlie + +# Insert with NULL value and verify the count +query I +INSERT INTO default.default.test_unpartitioned_table VALUES (4, NULL) +---- +1 + +# Verify NULL handling +query IT rowsort +SELECT * FROM default.default.test_unpartitioned_table +---- +1 Alice +2 Bob +3 Charlie +4 NULL + +# Test partitioned table - verify initially empty +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table +---- + +# Insert single row into partitioned table +query I +INSERT INTO default.default.test_partitioned_table VALUES (1, 'electronics', 'laptop') +---- +1 + +# Verify the inserted row in partitioned table +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table +---- +1 electronics laptop + +# Insert multiple rows with different partition values +query I +INSERT INTO default.default.test_partitioned_table VALUES (2, 'electronics', 'phone'), (3, 'books', 'novel'), (4, 'books', 'textbook'), (5, 'clothing', 'shirt') +---- +4 + +# Verify all rows in partitioned table +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table +---- +1 electronics laptop +2 electronics phone +3 books novel +4 books textbook +5 clothing shirt + +# Insert with NULL value in optional column +query I +INSERT INTO default.default.test_partitioned_table VALUES (6, 'electronics', NULL) +---- +1 + +# Verify NULL handling in partitioned table +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table +---- +1 electronics laptop +2 electronics phone +3 books novel +4 books textbook +5 clothing shirt +6 electronics NULL + +# Verify partition filtering works +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table WHERE category = 'books' +---- +3 books novel +4 books textbook diff --git a/crates/sqllogictest/testdata/slts/df_test/show_tables.slt b/crates/sqllogictest/testdata/slts/df_test/show_tables.slt index 34709d7359..c5da5f6276 100644 --- a/crates/sqllogictest/testdata/slts/df_test/show_tables.slt +++ b/crates/sqllogictest/testdata/slts/df_test/show_tables.slt @@ -25,6 +25,12 @@ datafusion information_schema routines VIEW datafusion information_schema schemata VIEW datafusion information_schema tables VIEW datafusion information_schema views VIEW +default default test_partitioned_table BASE TABLE +default default test_partitioned_table$manifests BASE TABLE +default default test_partitioned_table$snapshots BASE TABLE +default default test_unpartitioned_table BASE TABLE +default default test_unpartitioned_table$manifests BASE TABLE +default default test_unpartitioned_table$snapshots BASE TABLE default information_schema columns VIEW default information_schema df_settings VIEW default information_schema parameters VIEW diff --git a/crates/test_utils/DEPENDENCIES.rust.tsv b/crates/test_utils/DEPENDENCIES.rust.tsv index ef7c315df7..879ead5921 100644 --- a/crates/test_utils/DEPENDENCIES.rust.tsv +++ b/crates/test_utils/DEPENDENCIES.rust.tsv @@ -1,30 +1,22 @@ crate Apache-2.0 MIT Unicode-3.0 -cfg-if@1.0.3 X X -iceberg_test_utils@0.7.0 X +cfg-if@1.0.4 X X +iceberg_test_utils@0.8.0 X lazy_static@1.5.0 X X -log@0.4.28 X X -nu-ansi-term@0.50.1 X +log@0.4.29 X X +nu-ansi-term@0.50.3 X once_cell@1.21.3 X X pin-project-lite@0.2.16 X X -proc-macro2@1.0.101 X X -quote@1.0.40 X X +proc-macro2@1.0.103 X X +quote@1.0.42 X X sharded-slab@0.1.7 X smallvec@1.15.1 X X -syn@2.0.106 X X +syn@2.0.111 X X thread_local@1.1.9 X X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -unicode-ident@1.0.18 X X X -windows-sys@0.52.0 X X -windows-targets@0.52.6 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_msvc@0.52.6 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_msvc@0.52.6 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_msvc@0.52.6 X X +tracing-subscriber@0.3.22 X +unicode-ident@1.0.22 X X X +windows-link@0.2.1 X X +windows-sys@0.61.2 X X diff --git a/deny.toml b/deny.toml index 6d75c5d219..0f88ba6d0e 100644 --- a/deny.toml +++ b/deny.toml @@ -26,6 +26,7 @@ allow = [ "CC0-1.0", "Zlib", "CDLA-Permissive-2.0", + "bzip2-1.0.6", # Category-A: https://issues.apache.org/jira/browse/LEGAL-660 "Unicode-3.0", # Boost Software License Version 1.0 is allowed (Category-A): @@ -39,4 +40,4 @@ exceptions = [ { allow = ["MPL-2.0"], crate = "webpki-roots" }, { allow = ["MPL-2.0"], crate = "generational-arena" }, { allow = ["MPL-2.0"], crate = "option-ext" }, -] \ No newline at end of file +] diff --git a/docs/rfcs/0001_modularize_iceberg_implementations.md b/docs/rfcs/0001_modularize_iceberg_implementations.md new file mode 100644 index 0000000000..14bd478270 --- /dev/null +++ b/docs/rfcs/0001_modularize_iceberg_implementations.md @@ -0,0 +1,120 @@ + + +# RFC: Modularize `iceberg` Implementations + +## Background + +Issue #1819 highlighted that the current `iceberg` crate mixes the Iceberg protocol abstractions (catalog/table/plan/transaction) with concrete runtime, storage, and execution code (Tokio runtime wrappers, opendal-based `FileIO`, Arrow helpers, DataFusion glue, etc.). This coupling makes the crate heavy and blocks users from composing their own storage or execution stacks. + +Two principles have been agreed: +1. The `iceberg` crate remains the single source of truth for all protocol traits and data structures. We will not create a separate “kernel” crate or facade layer. +2. Concrete integrations (Tokio runtime, opendal `FileIO`, Arrow/DataFusion glue, catalog adapters, etc.) move out into dedicated companion crates. Users needing a ready path can depend on those crates (e.g., `iceberg-datafusion` or `integrations/local`), while custom stacks depend only on `iceberg`. + +This RFC focuses on modularizing implementations; detailed trait signatures (e.g., `FileIO`, `Runtime`) will be handled in separate RFCs. + +## Goals and Scope + +- Keep `iceberg` as the protocol crate (traits + metadata + planning), without bundling runtimes, storage adapters, or execution glue. +- Relocate concrete code into companion crates under `crates/fileio/*`, `crates/runtime/*`, and `crates/integrations/*`. +- Provide a staged plan for extracting Arrow-dependent APIs to avoid destabilizing file-format code. +- Minimize breaking surfaces: traits stay in `iceberg`; downstream crates mainly adjust dependencies. + +Out of scope: changes to the Iceberg table specification or catalog adapter external behavior; detailed trait method design (covered by follow-up RFCs). + +## Architecture Overview + +### Workspace Layout (target) + +``` +crates/ + iceberg/ # core traits, metadata, planning, transactions + fileio/ + opendal/ # e.g. `iceberg-fileio-opendal` + fs/ # other FileIO implementations + runtime/ + tokio/ # e.g. `iceberg-runtime-tokio` + smol/ + catalog/* # catalog adapters (REST, HMS, Glue, etc.) + integrations/ + local/ # simple local/arrow-based helper crate + datafusion/ # combines core + implementations for DF + cache-moka/ + playground/ +``` + +- `crates/iceberg` drops direct deps on opendal, Tokio, Arrow, and DataFusion. +- Implementation crates depend on `iceberg` to implement the traits. +- Higher-level crates (`integrations/local`, `iceberg-datafusion`) assemble the pieces for ready-to-use scenarios. + +### Core Trait Surfaces + +`FileIO`, `Runtime`, `Catalog`, `Table`, `Transaction`, `TableScan` (plan descriptors) all remain hosted in `iceberg`. Precise method signatures are deferred to dedicated RFCs to avoid locking details prematurely. + +### Usage Modes + +- **Custom stacks**: depend on `iceberg` and provide your own implementations. +- **Pre-built stacks**: depend on `integrations/local` or `iceberg-datafusion`, which bundle `iceberg` with selected runtime/FileIO/Arrow helpers. +- `iceberg` does not re-export companion crates; users compose explicitly. + +## Migration Plan (staged, with Arrow extraction phased) + +1. **Phase 1 – Confirm trait hosting, defer details** + - Keep all protocol traits in `iceberg`; move detailed API design (FileIO, Runtime, etc.) to separate RFCs. + - Add temporary shims/deprecations only when traits are finalized. + +2. **Phase 2 – First Arrow step: move `to_arrow()` out** + - Relocate the public `to_arrow()` API to `integrations/local` (or another higher-level crate). Core no longer exposes Arrow entry points. + - Keep internal Arrow-dependent helpers (e.g., `ArrowFileReader`) temporarily in `iceberg` to avoid breaking file-format flows. + +3. **Phase 3 – Gradual Arrow dependency removal** + - Incrementally migrate/replace Arrow-dependent internals (`ArrowFileReader`, format-specific readers) into `integrations/local` or other helper crates. + - Adjust file-format APIs as needed; expect this to be multi-release work. + +4. **Phase 4 – Dependency cleanup** + - Ensure catalog and integration crates depend only on `iceberg` plus the specific runtime/FileIO/helper crates they need. + - Verify build/test pipelines against the new dependency graph. + +5. **Phase 5 – Docs & release** + - Publish migration guides: where `to_arrow()` moved, how to assemble local/DataFusion stacks. + - Schedule deprecation windows for remaining Arrow helpers; target a breaking release once Arrow is fully removed from `iceberg`. + +## Compatibility + +- Short term: users of `Table::scan().to_arrow()` must switch to `integrations/local` (or another crate that rehosts that API). Other Arrow types stay temporarily but will migrate in later phases. +- Long term: `iceberg` will be Arrow-free; companion crates provide Arrow-based helpers. +- Tests/examples move alongside the implementations they exercise. + +## Risks and Mitigations + +| Risk | Description | Mitigation | +| ---- | ----------- | ---------- | +| Arrow dependency unwinding is complex | File-format readers may rely on Arrow types | Phase the work; move `to_arrow()` first, then refactor readers; document interim state | +| Discoverability | Users may not know where Arrow helpers went | Clear docs pointing to `integrations/local` and `iceberg-datafusion`; migration guide | +| Trait churn | Future trait RFCs may break early adopters | Use deprecation shims and communicate timelines | +| Duplicate impls | Multiple helper crates could overlap | Provide recommended combinations and feature guidance | + +## Open Questions + +1. Versioning: align companion crate versions with `iceberg`, or allow independent versions plus compatibility matrix? +2. Deprecation schedule: how long do we keep interim Arrow helpers before full removal from `iceberg`? + +## Conclusion + +We will keep `iceberg` as the protocol crate while modularizing concrete implementations. Arrow removal will be phased: first relocating `to_arrow()` to `integrations/local`, then gradually moving Arrow-dependent readers and helpers. This keeps the core lean, lets users compose their preferred runtime/FileIO stacks, and still offers ready-to-use combinations via companion crates. diff --git a/rust-toolchain.toml b/rust-toolchain.toml index ff7d1f7fbb..4b20d68e44 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -20,5 +20,5 @@ # # The channel is exactly same day for our MSRV. [toolchain] -channel = "nightly-2025-03-28" +channel = "nightly-2025-06-23" components = ["rustfmt", "clippy"] diff --git a/scripts/release.sh b/scripts/release.sh index a790cdd8d1..e4cee342d2 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -25,6 +25,12 @@ else echo "var is set to '$ICEBERG_VERSION'" fi +# Validate version format (e.g., 1.0.0) +if [[ ! "$ICEBERG_VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo "Error: ICEBERG_VERSION ($ICEBERG_VERSION) must be in the format: .." + exit 1 +fi + # tar source code release_version=${ICEBERG_VERSION} # rc versions diff --git a/website/src/release.md b/website/src/release.md index 78c0e57525..f3a5798a2b 100644 --- a/website/src/release.md +++ b/website/src/release.md @@ -325,13 +325,13 @@ After downloading them, here are the instructions on how to verify them. ```bash gpg --verify apache-iceberg-rust-*.tar.gz.asc ``` - Expects: `"apache-iceberg-rust-0.7.0.tar.gz: OK"` + Expects: `gpg: Good signature from ...` * Verify the checksums: ```bash shasum -a 512 -c apache-iceberg-rust-*.tar.gz.sha512 ``` - Expects: `gpg: Good signature from ...` + Expects: `"apache-iceberg-rust-...tar.gz: OK"` * Verify build and test: ```bash