diff --git a/.github/docker-compose.yaml b/.github/docker-compose.yaml index cfb7eb43f..01cd9e3ec 100644 --- a/.github/docker-compose.yaml +++ b/.github/docker-compose.yaml @@ -11,5 +11,7 @@ services: container_name: dask-worker image: daskdev/dask:latest command: dask-worker dask-scheduler:8786 + environment: + EXTRA_CONDA_PACKAGES: "pyarrow>=4.0.0" # required for parquet IO volumes: - /tmp:/tmp diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 623bd5dae..7d46b8e13 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,11 +16,6 @@ jobs: if: github.repository == 'dask-contrib/dask-sql' steps: - uses: actions/checkout@v2 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }} - name: Set up Python uses: conda-incubator/setup-miniconda@v2 with: @@ -29,7 +24,7 @@ jobs: python-version: "3.8" channel-priority: strict activate-environment: dask-sql - environment-file: continuous_integration/environment-3.8-jdk11-dev.yaml + environment-file: continuous_integration/environment-3.8-dev.yaml - name: Install dependencies run: | pip install setuptools wheel twine diff --git a/.github/workflows/test-upstream.yml b/.github/workflows/test-upstream.yml index a515395a0..b9fc415b9 100644 --- a/.github/workflows/test-upstream.yml +++ b/.github/workflows/test-upstream.yml @@ -5,64 +5,23 @@ on: workflow_dispatch: # allows you to trigger the workflow run manually jobs: - build: - # This build step should be similar to the deploy build, to make sure we actually test - # the future deployable - name: Build the jar on ubuntu - runs-on: ubuntu-latest - if: github.repository == 'dask-contrib/dask-sql' - defaults: - run: - shell: bash -l {0} - steps: - - uses: actions/checkout@v2 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }} - - name: Set up Python - uses: conda-incubator/setup-miniconda@v2 - with: - miniforge-variant: Mambaforge - use-mamba: true - python-version: "3.8" - channel-priority: strict - activate-environment: dask-sql - environment-file: continuous_integration/environment-3.8-jdk11-dev.yaml - - name: Install dependencies and build the jar - run: | - python setup.py build_ext - - name: Upload the jar - uses: actions/upload-artifact@v1 - with: - name: jar - path: dask_sql/jar/DaskSQL.jar - test-dev: - name: "Test upstream dev (${{ matrix.os }}, java: ${{ matrix.java }}, python: ${{ matrix.python }})" - needs: build + name: "Test upstream dev (${{ matrix.os }}, python: ${{ matrix.python }})" runs-on: ${{ matrix.os }} env: - CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-jdk${{ matrix.java }}-dev.yaml + CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-dev.yaml defaults: run: shell: bash -l {0} strategy: fail-fast: false matrix: - java: [8, 11] os: [ubuntu-latest, windows-latest] python: ["3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v2 with: fetch-depth: 0 # Fetch all history for all branches and tags. - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk${{ matrix.java }}-${{ hashFiles('**/pom.xml') }} - name: Set up Python uses: conda-incubator/setup-miniconda@v2 with: @@ -72,21 +31,12 @@ jobs: channel-priority: strict activate-environment: dask-sql environment-file: ${{ env.CONDA_FILE }} - - name: Download the pre-build jar - uses: actions/download-artifact@v1 - with: - name: jar - path: dask_sql/jar/ - name: Install hive testing dependencies for Linux if: matrix.os == 'ubuntu-latest' run: | mamba install -c conda-forge sasl>=0.3.1 docker pull bde2020/hive:2.3.2-postgresql-metastore docker pull bde2020/hive-metastore-postgresql:2.3.0 - - name: Set proper JAVA_HOME for Windows - if: matrix.os == 'windows-latest' - run: | - echo "JAVA_HOME=${{ env.CONDA }}\envs\dask-sql\Library" >> $GITHUB_ENV - name: Install upstream dev Dask / dask-ml run: | python -m pip install --no-deps git+https://github.com/dask/dask diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b590260f7..cc5b078bd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,7 +1,7 @@ --- # Test the main branch and every pull request by -# 1. building the jar on ubuntu -# 2. testing code (using the build jar) on ubuntu and windows, with different java versions +# 1. build dask_planner (Arrow DataFusion Rust bindings) on ubuntu +# 2. testing code (using the build DataFusion bindings) on ubuntu and windows name: Test Python package on: push: @@ -36,55 +36,20 @@ jobs: with: keyword: "[test-upstream]" - build: - # This build step should be similar to the deploy build, to make sure we actually test - # the future deployable - name: Build the jar on ubuntu - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }} - - name: Set up Python - uses: conda-incubator/setup-miniconda@v2 - with: - miniforge-variant: Mambaforge - use-mamba: true - python-version: "3.8" - channel-priority: strict - activate-environment: dask-sql - environment-file: continuous_integration/environment-3.8-jdk11-dev.yaml - - name: Build the jar - run: | - python setup.py build_ext - - name: Upload the jar - uses: actions/upload-artifact@v1 - with: - name: jar - path: dask_sql/jar/DaskSQL.jar - test: - name: "Test (${{ matrix.os }}, java: ${{ matrix.java }}, python: ${{ matrix.python }})" - needs: [detect-ci-trigger, build] + name: "Build & Test (${{ matrix.os }}, python: ${{ matrix.python }}, Rust: ${{ matrix.toolchain }})" + needs: [detect-ci-trigger] runs-on: ${{ matrix.os }} env: - CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-jdk${{ matrix.java }}-dev.yaml + CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-dev.yaml strategy: fail-fast: false matrix: - java: [8, 11] os: [ubuntu-latest, windows-latest] python: ["3.8", "3.9", "3.10"] + toolchain: [stable] steps: - uses: actions/checkout@v2 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk${{ matrix.java }}-${{ hashFiles('**/pom.xml') }} - name: Set up Python uses: conda-incubator/setup-miniconda@v2 with: @@ -94,21 +59,21 @@ jobs: channel-priority: strict activate-environment: dask-sql environment-file: ${{ env.CONDA_FILE }} - - name: Download the pre-build jar - uses: actions/download-artifact@v1 + - name: Setup Rust Toolchain + uses: actions-rs/toolchain@v1 + id: rust-toolchain with: - name: jar - path: dask_sql/jar/ + toolchain: stable + override: true + - name: Build the Rust DataFusion bindings + run: | + python setup.py build install - name: Install hive testing dependencies for Linux if: matrix.os == 'ubuntu-latest' run: | mamba install -c conda-forge sasl>=0.3.1 docker pull bde2020/hive:2.3.2-postgresql-metastore docker pull bde2020/hive-metastore-postgresql:2.3.0 - - name: Set proper JAVA_HOME for Windows - if: matrix.os == 'windows-latest' - run: | - echo "JAVA_HOME=${{ env.CONDA }}\envs\dask-sql\Library" >> $GITHUB_ENV - name: Optionally install upstream dev Dask / dask-ml if: needs.detect-ci-trigger.outputs.triggered == 'true' run: | @@ -133,15 +98,10 @@ jobs: cluster: name: "Test in a dask cluster" - needs: [detect-ci-trigger, build] + needs: [detect-ci-trigger] runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }} - name: Set up Python uses: conda-incubator/setup-miniconda@v2 with: @@ -150,12 +110,16 @@ jobs: python-version: "3.8" channel-priority: strict activate-environment: dask-sql - environment-file: continuous_integration/environment-3.8-jdk11-dev.yaml - - name: Download the pre-build jar - uses: actions/download-artifact@v1 - with: - name: jar - path: dask_sql/jar/ + environment-file: continuous_integration/environment-3.8-dev.yaml + - name: Setup Rust Toolchain + uses: actions-rs/toolchain@v1 + id: rust-toolchain + with: + toolchain: stable + override: true + - name: Build the Rust DataFusion bindings + run: | + python setup.py build install - name: Install dependencies run: | mamba install python-blosc lz4 -c conda-forge @@ -184,15 +148,10 @@ jobs: import: name: "Test importing with bare requirements" - needs: [detect-ci-trigger, build] + needs: [detect-ci-trigger] runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }} - name: Set up Python uses: conda-incubator/setup-miniconda@v2 with: @@ -200,13 +159,10 @@ jobs: mamba-version: "*" channels: conda-forge,defaults channel-priority: strict - - name: Download the pre-build jar - uses: actions/download-artifact@v1 - with: - name: jar - path: dask_sql/jar/ - name: Install dependencies and nothing else run: | + conda install setuptools-rust + conda install pyarrow>=4.0.0 pip install -e . which python diff --git a/.github/workflows/update-gpuci.yml b/.github/workflows/update-gpuci.yml index e3c9c8469..1cf91c069 100644 --- a/.github/workflows/update-gpuci.yml +++ b/.github/workflows/update-gpuci.yml @@ -13,39 +13,70 @@ jobs: steps: - uses: actions/checkout@v2 + - name: Parse current axis YAML + uses: the-coding-turtle/ga-yaml-parser@v0.1.1 + with: + file: continuous_integration/gpuci/axis.yaml + - name: Get latest cuDF nightly version - id: latest_version + id: cudf_latest uses: jacobtomlinson/gha-anaconda-package-version@0.1.3 with: org: "rapidsai-nightly" package: "cudf" version_system: "CalVer" - - name: Strip git tags from versions + - name: Get latest cuML nightly version + id: cuml_latest + uses: jacobtomlinson/gha-anaconda-package-version@0.1.3 + with: + org: "rapidsai-nightly" + package: "cuml" + version_system: "CalVer" + + - name: Get latest UCX-Py nightly version + id: ucx_py_latest + uses: jacobtomlinson/gha-anaconda-package-version@0.1.3 + with: + org: "rapidsai-nightly" + package: "ucx-py" + version_system: "CalVer" + + - name: Get old RAPIDS / UCX-Py versions env: - FULL_RAPIDS_VER: ${{ steps.latest_version.outputs.version }} - run: echo "RAPIDS_VER=${FULL_RAPIDS_VER::-10}" >> $GITHUB_ENV + FULL_CUDF_VER: ${{ steps.cudf_latest.outputs.version }} + FULL_CUML_VER: ${{ steps.cuml_latest.outputs.version }} + FULL_UCX_PY_VER: ${{ steps.ucx_py_latest.outputs.version }} + run: | + echo RAPIDS_VER=$RAPIDS_VER_0 >> $GITHUB_ENV + echo UCX_PY_VER=$(curl -sL https://version.gpuci.io/rapids/$RAPIDS_VER_0) >> $GITHUB_ENV + echo NEW_CUDF_VER=${FULL_CUDF_VER::-10} >> $GITHUB_ENV + echo NEW_CUML_VER=${FULL_CUML_VER::-10} >> $GITHUB_ENV + echo NEW_UCX_PY_VER=${FULL_UCX_PY_VER::-10} >> $GITHUB_ENV - - name: Find and Replace Release - uses: jacobtomlinson/gha-find-replace@0.1.4 + - name: Update RAPIDS version + uses: jacobtomlinson/gha-find-replace@v2 with: include: 'continuous_integration\/gpuci\/axis\.yaml' - find: "RAPIDS_VER:\n- .*" - replace: |- - RAPIDS_VER: - - "${{ env.RAPIDS_VER }}" + find: "${{ env.RAPIDS_VER }}" + replace: "${{ env.NEW_CUDF_VER }}" + regex: false - name: Create Pull Request uses: peter-evans/create-pull-request@v3 + # make sure ucx-py nightlies are available and that cuDF/cuML nightly versions match up + if: | + env.UCX_PY_VER != env.NEW_UCX_PY_VER && + env.NEW_CUDF_VER == env.NEW_CUML_VER with: token: ${{ secrets.GITHUB_TOKEN }} draft: true - commit-message: "Update gpuCI `RAPIDS_VER` to `${{ env.RAPIDS_VER }}`" - title: "Update gpuCI `RAPIDS_VER` to `${{ env.RAPIDS_VER }}`" + commit-message: "Update gpuCI `RAPIDS_VER` to `${{ env.NEW_CUDF_VER }}`" + title: "Update gpuCI `RAPIDS_VER` to `${{ env.NEW_CUDF_VER }}`" team-reviewers: "dask/gpu" author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> branch: "upgrade-gpuci-rapids" body: | - A new cuDF nightly version has been detected. + New cuDF and ucx-py nightly versions have been detected. - Updated `axis.yaml` to use `${{ env.RAPIDS_VER }}`. + Updated `axis.yaml` to use `${{ env.NEW_CUDF_VER }}`. diff --git a/.gitignore b/.gitignore index 5bc336e03..950c92821 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,4 @@ dask_sql/jar dask-worker-space/ node_modules/ docs/source/_build/ +dask_planner/Cargo.lock diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e50a5f1d5..709938a81 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/psf/black - rev: 19.10b0 + rev: 22.3.0 hooks: - id: black language_version: python3 @@ -16,8 +16,17 @@ repos: args: - "--profile" - "black" + - repo: https://github.com/doublify/pre-commit-rust + rev: v1.0 + hooks: + - id: fmt + args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--'] + - id: cargo-check + args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--'] + - id: clippy + args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--'] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 + rev: v4.2.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 3b3682543..831ec6d50 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,15 +3,15 @@ version: 2 build: os: ubuntu-20.04 tools: - python: "3.8" - apt_packages: - - maven + python: "mambaforge-4.10" sphinx: configuration: docs/source/conf.py +conda: + environment: docs/environment.yml + python: install: - - requirements: docs/requirements-docs.txt - method: pip path: . diff --git a/MANIFEST.in b/MANIFEST.in index 2b6351550..d0108fedd 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,5 @@ recursive-include dask_sql *.yaml +recursive-include dask_planner * include versioneer.py include dask_sql/_version.py diff --git a/README.md b/README.md index 5b508651f..924ae1e41 100644 --- a/README.md +++ b/README.md @@ -101,19 +101,18 @@ If you want to have the newest (unreleased) `dask-sql` version or if you plan to Create a new conda environment and install the development environment: - conda env create -f continuous_integration/environment-3.9-jdk11-dev.yaml + conda env create -f continuous_integration/environment-3.9-dev.yaml It is not recommended to use `pip` instead of `conda` for the environment setup. -If you however need to, make sure to have Java (jdk >= 8) and maven installed and correctly setup before continuing. -Have a look into `environment-3.9-jdk11-dev.yaml` for the rest of the development environment. After that, you can install the package in development mode pip install -e ".[dev]" -To recompile the Java classes after changes have been made to the source contained in `planner/`, run +The Rust DataFusion bindings are built as part of the `pip install`. +If changes are made to the Rust source in `dask_planner/`, another build/install must be run to recompile the bindings: - python setup.py build_ext + python setup.py build install This repository uses [pre-commit](https://pre-commit.com/) hooks. To install them, call @@ -172,5 +171,5 @@ At the core, `dask-sql` does two things: - translate the SQL query using [Apache Calcite](https://calcite.apache.org/) into a relational algebra, which is specified as a tree of java objects - similar to many other SQL engines (Hive, Flink, ...) - convert this description of the query from java objects into dask API calls (and execute them) - returning a dask dataframe. -For the first step, Apache Calcite needs to know about the columns and types of the dask dataframes, therefore some java classes to store this information for dask dataframes are defined in `planner`. -After the translation to a relational algebra is done (using `RelationalAlgebraGenerator.getRelationalAlgebra`), the python methods defined in `dask_sql.physical` turn this into a physical dask execution plan by converting each piece of the relational algebra one-by-one. +For the first step, Arrow DataFusion needs to know about the columns and types of the dask dataframes, therefore some Rust code to store this information for dask dataframes are defined in `dask_planner`. +After the translation to a relational algebra is done (using `DaskSQLContext.logical_relational_algebra`), the python methods defined in `dask_sql.physical` turn this into a physical dask execution plan by converting each piece of the relational algebra one-by-one. diff --git a/continuous_integration/environment-3.10-dev.yaml b/continuous_integration/environment-3.10-dev.yaml index effdccbaf..51a7f6052 100644 --- a/continuous_integration/environment-3.10-dev.yaml +++ b/continuous_integration/environment-3.10-dev.yaml @@ -4,15 +4,16 @@ channels: - nodefaults dependencies: - adagio>=0.2.3 -- antlr4-python3-runtime>=4.9.2 -- black=19.10b0 +- antlr4-python3-runtime>=4.9.2, <4.10.0 # Remove max pin after qpd(fugue dependency) updates their conda recipe +- black=22.3.0 - ciso8601>=2.2.0 - dask-ml>=2022.1.22 -- dask>=2021.11.1 +- dask>=2022.3.0 - fastapi>=0.61.1 - fs>=2.4.11 - intake>=0.6.0 - isort=5.7.0 +- jsonschema>=4.4.0 - lightgbm>=3.2.1 - mlflow>=1.19.0 - mock>=4.0.3 @@ -22,7 +23,7 @@ dependencies: - pre-commit>=2.11.1 - prompt_toolkit>=3.0.8 - psycopg2>=2.9.1 -- pyarrow>=0.15.1 +- pyarrow>=4.0.0 - pygments>=2.7.1 - pyhive>=0.6.4 - pytest-cov>=2.10.1 diff --git a/continuous_integration/environment-3.8-dev.yaml b/continuous_integration/environment-3.8-dev.yaml index 0a57856cf..10132bff6 100644 --- a/continuous_integration/environment-3.8-dev.yaml +++ b/continuous_integration/environment-3.8-dev.yaml @@ -4,15 +4,16 @@ channels: - nodefaults dependencies: - adagio>=0.2.3 -- antlr4-python3-runtime>=4.9.2 -- black=19.10b0 +- antlr4-python3-runtime>=4.9.2, <4.10.0 # Remove max pin after qpd(fugue dependency) updates their conda recipe +- black=22.3.0 - ciso8601>=2.2.0 - dask-ml>=2022.1.22 -- dask>=2021.11.1 +- dask>=2022.3.0 - fastapi>=0.61.1 - fs>=2.4.11 - intake>=0.6.0 - isort=5.7.0 +- jsonschema>=4.4.0 - lightgbm>=3.2.1 - mlflow>=1.19.0 - mock>=4.0.3 @@ -22,7 +23,7 @@ dependencies: - pre-commit>=2.11.1 - prompt_toolkit>=3.0.8 - psycopg2>=2.9.1 -- pyarrow>=0.15.1 +- pyarrow>=4.0.0 - pygments>=2.7.1 - pyhive>=0.6.4 - pytest-cov>=2.10.1 diff --git a/continuous_integration/environment-3.9-dev.yaml b/continuous_integration/environment-3.9-dev.yaml index 0793150a5..571a265a7 100644 --- a/continuous_integration/environment-3.9-dev.yaml +++ b/continuous_integration/environment-3.9-dev.yaml @@ -6,15 +6,16 @@ channels: - nvidia dependencies: - adagio>=0.2.3 -- antlr4-python3-runtime>=4.9.2 -- black=19.10b0 +- antlr4-python3-runtime>=4.9.2, <4.10.0 # Remove max pin after qpd(fugue dependency) updates their conda recipe +- black=22.3.0 - ciso8601>=2.2.0 - dask-ml>=2022.1.22 -- dask>=2021.11.1 +- dask>=2022.3.0 - fastapi>=0.61.1 - fs>=2.4.11 - intake>=0.6.0 - isort=5.7.0 +- jsonschema>=4.4.0 - lightgbm>=3.2.1 - mlflow>=1.19.0 - mock>=4.0.3 @@ -24,7 +25,7 @@ dependencies: - pre-commit>=2.11.1 - prompt_toolkit>=3.0.8 - psycopg2>=2.9.1 -- pyarrow>=0.15.1 +- pyarrow>=4.0.0 - pygments>=2.7.1 - pyhive>=0.6.4 - pytest-cov>=2.10.1 @@ -40,17 +41,5 @@ dependencies: - maturin>=0.12.8 - setuptools-rust>=1.1.2 - rust>=1.59.0 -# Items below this are added only for testing and should be removed later -- cudf=22.04 -- cudatoolkit=11.5 -- dask-cuda -- dask-cudf -- ipykernel -- pyngrok -- plotly -- s3fs -- requests -- nbformat -- cuml - pip: - fugue[sql]>=0.5.3 diff --git a/continuous_integration/gpuci/axis.yaml b/continuous_integration/gpuci/axis.yaml index 922eee23c..41ddb56ec 100644 --- a/continuous_integration/gpuci/axis.yaml +++ b/continuous_integration/gpuci/axis.yaml @@ -8,6 +8,6 @@ LINUX_VER: - ubuntu18.04 RAPIDS_VER: -- "22.04" +- "22.06" excludes: diff --git a/continuous_integration/recipe/meta.yaml b/continuous_integration/recipe/meta.yaml index d6211bf1a..1bfeb19cb 100644 --- a/continuous_integration/recipe/meta.yaml +++ b/continuous_integration/recipe/meta.yaml @@ -23,12 +23,14 @@ build: requirements: build: - {{ compiler('rust') }} + - setuptools-rust>=1.1.2 host: - pip - python >=3.8 + - setuptools-rust>=1.1.2 run: - python - - dask >=2021.11.1 + - dask >=2022.3.0 - pandas >=1.0.0 - fastapi >=0.61.1 - uvicorn >=0.11.3 @@ -37,6 +39,7 @@ requirements: - pygments - nest-asyncio - tabulate + - pyarrow>=4.0.0 test: imports: diff --git a/dask_planner/.github/workflows/CI.yml b/dask_planner/.github/workflows/CI.yml deleted file mode 100644 index 02573724e..000000000 --- a/dask_planner/.github/workflows/CI.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: CI - -on: - push: - pull_request: - -jobs: - linux: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: messense/maturin-action@v1 - with: - manylinux: auto - command: build - args: --release -o dist - - name: Upload wheels - uses: actions/upload-artifact@v2 - with: - name: wheels - path: dist - - windows: - runs-on: windows-latest - steps: - - uses: actions/checkout@v2 - - uses: messense/maturin-action@v1 - with: - command: build - args: --release --no-sdist -o dist - - name: Upload wheels - uses: actions/upload-artifact@v2 - with: - name: wheels - path: dist - - macos: - runs-on: macos-latest - steps: - - uses: actions/checkout@v2 - - uses: messense/maturin-action@v1 - with: - command: build - args: --release --no-sdist -o dist --universal2 - - name: Upload wheels - uses: actions/upload-artifact@v2 - with: - name: wheels - path: dist - - release: - name: Release - runs-on: ubuntu-latest - if: "startsWith(github.ref, 'refs/tags/')" - needs: [ macos, windows, linux ] - steps: - - uses: actions/download-artifact@v2 - with: - name: wheels - - name: Publish to PyPI - uses: messense/maturin-action@v1 - env: - MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} - with: - command: upload - args: --skip-existing * diff --git a/dask_planner/Cargo.lock b/dask_planner/Cargo.lock deleted file mode 100644 index 6852589af..000000000 --- a/dask_planner/Cargo.lock +++ /dev/null @@ -1,1593 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "ahash" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" -dependencies = [ - "getrandom 0.2.5", - "once_cell", - "version_check", -] - -[[package]] -name = "aho-corasick" -version = "0.7.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" -dependencies = [ - "memchr", -] - -[[package]] -name = "alloc-no-stdlib" -version = "2.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35ef4730490ad1c4eae5c4325b2a95f521d023e5c885853ff7aca0a6a1631db3" - -[[package]] -name = "alloc-stdlib" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "697ed7edc0f1711de49ce108c541623a0af97c6c60b2f6e2b65229847ac843c2" -dependencies = [ - "alloc-no-stdlib", -] - -[[package]] -name = "arrayref" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" - -[[package]] -name = "arrayvec" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" - -[[package]] -name = "arrow" -version = "9.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9864ca2fdcd3d4883259495b4517879877c5991d9928cc9713794d8076d3e78b" -dependencies = [ - "bitflags", - "chrono", - "comfy-table", - "csv", - "flatbuffers", - "half", - "hex", - "indexmap", - "lazy_static", - "lexical-core", - "multiversion", - "num", - "pyo3", - "rand 0.8.5", - "regex", - "serde", - "serde_derive", - "serde_json", -] - -[[package]] -name = "async-trait" -version = "0.1.52" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "061a7acccaa286c011ddc30970520b98fa40e00c9d644633fb26b5fc63a265e3" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "base64" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "blake2" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9cf849ee05b2ee5fba5e36f97ff8ec2533916700fc0758d40d92136a42f3388" -dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08e53fc5a564bb15bfe6fae56bd71522205f1f91893f9c0116edad6496c183f" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", - "digest", -] - -[[package]] -name = "block-buffer" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" -dependencies = [ - "generic-array", -] - -[[package]] -name = "brotli" -version = "3.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f838e47a451d5a8fa552371f80024dd6ace9b7acdf25c4c3d0f9bc6816fb1c39" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] - -[[package]] -name = "brotli-decompressor" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", -] - -[[package]] -name = "bstr" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" -dependencies = [ - "lazy_static", - "memchr", - "regex-automata", - "serde", -] - -[[package]] -name = "byteorder" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" - -[[package]] -name = "cc" -version = "1.0.73" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" -dependencies = [ - "jobserver", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chrono" -version = "0.4.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" -dependencies = [ - "libc", - "num-integer", - "num-traits", - "winapi", -] - -[[package]] -name = "comfy-table" -version = "5.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b103d85ca6e209388771bfb7aa6b68a7aeec4afbf6f0a0264bfbf50360e5212e" -dependencies = [ - "strum", - "strum_macros", - "unicode-width", -] - -[[package]] -name = "constant_time_eq" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - -[[package]] -name = "cpufeatures" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469" -dependencies = [ - "libc", -] - -[[package]] -name = "crc32fast" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crypto-common" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57952ca27b5e3606ff4dd79b0020231aaf9d6aa76dc05fd30137538c50bd3ce8" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "csv" -version = "1.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" -dependencies = [ - "bstr", - "csv-core", - "itoa 0.4.8", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] - -[[package]] -name = "dask_planner" -version = "0.1.0" -dependencies = [ - "datafusion", - "datafusion-common", - "datafusion-expr", - "mimalloc", - "parking_lot 0.12.0", - "pyo3", - "rand 0.7.3", - "sqlparser", - "tokio", - "uuid", -] - -[[package]] -name = "datafusion" -version = "7.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cf8e6735817bb021748d72cecc33e468d8775bf749470c52aa7f55ee5cdf9e" -dependencies = [ - "ahash", - "arrow", - "async-trait", - "blake2", - "blake3", - "chrono", - "datafusion-common", - "datafusion-expr", - "futures", - "hashbrown 0.12.0", - "lazy_static", - "log", - "md-5", - "num_cpus", - "ordered-float 2.10.0", - "parking_lot 0.12.0", - "parquet", - "paste 1.0.6", - "pin-project-lite", - "pyo3", - "rand 0.8.5", - "regex", - "sha2", - "smallvec", - "sqlparser", - "tempfile", - "tokio", - "tokio-stream", - "unicode-segmentation", -] - -[[package]] -name = "datafusion-common" -version = "7.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40d2e2a1a6508f9e66c6deb84ab655be0e9db177f617ec1904de458439981b03" -dependencies = [ - "arrow", - "ordered-float 2.10.0", - "parquet", - "pyo3", - "sqlparser", -] - -[[package]] -name = "datafusion-expr" -version = "7.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1061dc27f2e4843ddb0f93a8e208d1984260c18c8bbf5e67598be9a111259fe2" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "sqlparser", -] - -[[package]] -name = "digest" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - -[[package]] -name = "fastrand" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" -dependencies = [ - "instant", -] - -[[package]] -name = "flatbuffers" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef4c5738bcd7fad10315029c50026f83c9da5e4a21f8ed66826f43e0e2bde5f6" -dependencies = [ - "bitflags", - "smallvec", - "thiserror", -] - -[[package]] -name = "flate2" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" -dependencies = [ - "cfg-if", - "crc32fast", - "libc", - "miniz_oxide", -] - -[[package]] -name = "futures" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" - -[[package]] -name = "futures-executor" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" - -[[package]] -name = "futures-macro" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "futures-sink" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" - -[[package]] -name = "futures-task" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" - -[[package]] -name = "futures-util" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "generic-array" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getrandom" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.9.0+wasi-snapshot-preview1", -] - -[[package]] -name = "getrandom" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d39cd93900197114fa1fcb7ae84ca742095eed9442088988ae74fa744e930e77" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.10.2+wasi-snapshot-preview1", -] - -[[package]] -name = "half" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" - -[[package]] -name = "hashbrown" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" - -[[package]] -name = "hashbrown" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c21d40587b92fa6a6c6e3c1bdbf87d75511db5672f9c93175574b3a00df1758" -dependencies = [ - "ahash", -] - -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "indexmap" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223" -dependencies = [ - "autocfg", - "hashbrown 0.11.2", -] - -[[package]] -name = "indoc" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47741a8bc60fb26eb8d6e0238bbb26d8575ff623fdc97b1a2c00c050b9684ed8" -dependencies = [ - "indoc-impl", - "proc-macro-hack", -] - -[[package]] -name = "indoc-impl" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce046d161f000fffde5f432a0d034d0341dc152643b2598ed5bfce44c4f3a8f0" -dependencies = [ - "proc-macro-hack", - "proc-macro2", - "quote", - "syn", - "unindent", -] - -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "integer-encoding" -version = "1.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f" - -[[package]] -name = "itoa" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" - -[[package]] -name = "itoa" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" - -[[package]] -name = "jobserver" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" -dependencies = [ - "libc", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "lexical-core" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a3926d8f156019890be4abe5fd3785e0cff1001e06f59c597641fd513a5a284" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4d066d004fa762d9da995ed21aa8845bb9f6e4265f540d716fb4b315197bf0e" -dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2c92badda8cc0fc4f3d3cc1c30aaefafb830510c8781ce4e8669881f3ed53ac" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff669ccaae16ee33af90dc51125755efed17f1309626ba5c12052512b11e291" -dependencies = [ - "static_assertions", -] - -[[package]] -name = "lexical-write-float" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b5186948c7b297abaaa51560f2581dae625e5ce7dfc2d8fdc56345adb6dc576" -dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ece956492e0e40fd95ef8658a34d53a3b8c2015762fdcaaff2167b28de1f56ef" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "libc" -version = "0.2.119" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4" - -[[package]] -name = "libmimalloc-sys" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7705fc40f6ed493f73584abbb324e74f96b358ff60dfe5659a0f8fc12c590a69" -dependencies = [ - "cc", -] - -[[package]] -name = "lock_api" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88943dd7ef4a2e5a4bfa2753aaab3013e34ce2533d1996fb18ef591e315e2b3b" -dependencies = [ - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "lz4" -version = "1.23.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885" -dependencies = [ - "libc", - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "md-5" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "658646b21e0b72f7866c7038ab086d3d5e1cd6271f060fd37defb241949d0582" -dependencies = [ - "digest", -] - -[[package]] -name = "memchr" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" - -[[package]] -name = "mimalloc" -version = "0.1.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0dfa131390c2f6bdb3242f65ff271fcdaca5ff7b6c08f28398be7f2280e3926" -dependencies = [ - "libmimalloc-sys", -] - -[[package]] -name = "miniz_oxide" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" -dependencies = [ - "adler", - "autocfg", -] - -[[package]] -name = "multiversion" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373" -dependencies = [ - "multiversion-macros", -] - -[[package]] -name = "multiversion-macros" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "num" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26873667bbbb7c5182d4a37c1add32cdf09f841af72da53318fdb81543c15085" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-integer" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" -dependencies = [ - "autocfg", - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d41702bd167c2df5520b384281bc111a4b5efcf7fbc4c9c222c815b07e0a6a6a" -dependencies = [ - "autocfg", - "num-bigint", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" -dependencies = [ - "autocfg", -] - -[[package]] -name = "num_cpus" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "once_cell" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" - -[[package]] -name = "ordered-float" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" -dependencies = [ - "num-traits", -] - -[[package]] -name = "ordered-float" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" -dependencies = [ - "num-traits", -] - -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.5", -] - -[[package]] -name = "parking_lot" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" -dependencies = [ - "lock_api", - "parking_lot_core 0.9.1", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall", - "smallvec", - "winapi", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28141e0cc4143da2443301914478dc976a61ffdb3f043058310c70df2fed8954" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-sys", -] - -[[package]] -name = "parquet" -version = "9.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1697d963e6319c19099adcf131a5440685053d4902890f9e4bb272cbd0dc6532" -dependencies = [ - "arrow", - "base64", - "brotli", - "byteorder", - "chrono", - "flate2", - "lz4", - "num", - "num-bigint", - "parquet-format", - "rand 0.8.5", - "snap", - "thrift", - "zstd", -] - -[[package]] -name = "parquet-format" -version = "4.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f0c06cdcd5460967c485f9c40a821746f5955ad81990533c7fae95dbd9bc0b5" -dependencies = [ - "thrift", -] - -[[package]] -name = "paste" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880" -dependencies = [ - "paste-impl", - "proc-macro-hack", -] - -[[package]] -name = "paste" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0744126afe1a6dd7f394cb50a716dbe086cb06e255e53d8d0185d82828358fb5" - -[[package]] -name = "paste-impl" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6" -dependencies = [ - "proc-macro-hack", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e280fbe77cc62c91527259e9442153f4688736748d24660126286329742b4c6c" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "ppv-lite86" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" - -[[package]] -name = "proc-macro-hack" -version = "0.5.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" - -[[package]] -name = "proc-macro2" -version = "1.0.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029" -dependencies = [ - "unicode-xid", -] - -[[package]] -name = "pyo3" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cf01dbf1c05af0a14c7779ed6f3aa9deac9c3419606ac9de537a2d649005720" -dependencies = [ - "cfg-if", - "indoc", - "libc", - "parking_lot 0.11.2", - "paste 0.1.18", - "pyo3-build-config", - "pyo3-macros", - "unindent", -] - -[[package]] -name = "pyo3-build-config" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf9e4d128bfbddc898ad3409900080d8d5095c379632fbbfbb9c8cfb1fb852b" -dependencies = [ - "once_cell", -] - -[[package]] -name = "pyo3-macros" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67701eb32b1f9a9722b4bc54b548ff9d7ebfded011c12daece7b9063be1fd755" -dependencies = [ - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f44f09e825ee49a105f2c7b23ebee50886a9aee0746f4dd5a704138a64b0218a" -dependencies = [ - "proc-macro2", - "pyo3-build-config", - "quote", - "syn", -] - -[[package]] -name = "quote" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" -dependencies = [ - "getrandom 0.1.16", - "libc", - "rand_chacha 0.2.2", - "rand_core 0.5.1", - "rand_hc", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.3", -] - -[[package]] -name = "rand_chacha" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" -dependencies = [ - "ppv-lite86", - "rand_core 0.5.1", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.3", -] - -[[package]] -name = "rand_core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" -dependencies = [ - "getrandom 0.1.16", -] - -[[package]] -name = "rand_core" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" -dependencies = [ - "getrandom 0.2.5", -] - -[[package]] -name = "rand_hc" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -dependencies = [ - "rand_core 0.5.1", -] - -[[package]] -name = "redox_syscall" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8380fe0152551244f0747b1bf41737e0f8a74f97a14ccefd1148187271634f3c" -dependencies = [ - "bitflags", -] - -[[package]] -name = "regex" -version = "1.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" - -[[package]] -name = "regex-syntax" -version = "0.6.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" - -[[package]] -name = "remove_dir_all" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] - -[[package]] -name = "rustversion" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" - -[[package]] -name = "ryu" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "serde" -version = "1.0.136" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" - -[[package]] -name = "serde_derive" -version = "1.0.136" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95" -dependencies = [ - "indexmap", - "itoa 1.0.1", - "ryu", - "serde", -] - -[[package]] -name = "sha2" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "slab" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5" - -[[package]] -name = "smallvec" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" - -[[package]] -name = "snap" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" - -[[package]] -name = "sqlparser" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8f192f29f4aa49e57bebd0aa05858e0a1f32dd270af36efe49edb82cbfffab6" -dependencies = [ - "log", -] - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "strum" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" - -[[package]] -name = "strum_macros" -version = "0.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn", -] - -[[package]] -name = "subtle" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" - -[[package]] -name = "syn" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b" -dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", -] - -[[package]] -name = "tempfile" -version = "3.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" -dependencies = [ - "cfg-if", - "fastrand", - "libc", - "redox_syscall", - "remove_dir_all", - "winapi", -] - -[[package]] -name = "thiserror" -version = "1.0.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "threadpool" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" -dependencies = [ - "num_cpus", -] - -[[package]] -name = "thrift" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6d965454947cc7266d22716ebfd07b18d84ebaf35eec558586bbb2a8cb6b5b" -dependencies = [ - "byteorder", - "integer-encoding", - "log", - "ordered-float 1.1.1", - "threadpool", -] - -[[package]] -name = "tokio" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af73ac49756f3f7c01172e34a23e5d0216f6c32333757c2c61feb2bbff5a5ee" -dependencies = [ - "num_cpus", - "parking_lot 0.12.0", - "pin-project-lite", - "tokio-macros", -] - -[[package]] -name = "tokio-macros" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tokio-stream" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50145484efff8818b5ccd256697f36863f587da82cf8b409c53adf1e840798e3" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "typenum" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" - -[[package]] -name = "unicode-segmentation" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" - -[[package]] -name = "unicode-width" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" - -[[package]] -name = "unicode-xid" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" - -[[package]] -name = "unindent" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514672a55d7380da379785a4d70ca8386c8883ff7eaae877be4d2081cebe73d8" - -[[package]] -name = "uuid" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" -dependencies = [ - "getrandom 0.2.5", -] - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" - -[[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df6e476185f92a12c072be4a189a0210dcdcf512a1891d6dff9edb874deadc6" -dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_msvc" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8e92753b1c443191654ec532f14c199742964a061be25d77d7a96f09db20bf5" - -[[package]] -name = "windows_i686_gnu" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a711c68811799e017b6038e0922cb27a5e2f43a2ddb609fe0b6f3eeda9de615" - -[[package]] -name = "windows_i686_msvc" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c11bb1a02615db74680b32a68e2d61f553cc24c4eb5b4ca10311740e44172" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c912b12f7454c6620635bbff3450962753834be2a594819bd5e945af18ec64bc" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "504a2476202769977a040c6364301a3f65d0cc9e3fb08600b2bda150a0488316" - -[[package]] -name = "zstd" -version = "0.10.0+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b1365becbe415f3f0fcd024e2f7b45bacfb5bdd055f0dc113571394114e7bdd" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "4.1.4+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f7cd17c9af1a4d6c24beb1cc54b17e2ef7b593dc92f19e9d9acad8b182bbaee" -dependencies = [ - "libc", - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "1.6.3+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8" -dependencies = [ - "cc", - "libc", -] diff --git a/dask_planner/Cargo.toml b/dask_planner/Cargo.toml index 5cb01bdb2..0ed665bf6 100644 --- a/dask_planner/Cargo.toml +++ b/dask_planner/Cargo.toml @@ -2,23 +2,23 @@ name = "dask_planner" repository = "https://github.com/dask-contrib/dask-sql" version = "0.1.0" -description = "Bindings for Datafusion used by Dask-SQL" +description = "Bindings for DataFusion used by Dask-SQL" readme = "README.md" license = "Apache-2.0" edition = "2021" -rust-version = "1.57" +rust-version = "1.59" [dependencies] tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } rand = "0.7" -pyo3 = { version = "0.15", features = ["extension-module", "abi3", "abi3-py36"] } -datafusion = { version = "^7.0.0", features = ["pyarrow"] } -datafusion-expr = { version = "^7.0.0" } -datafusion-common = { version = "^7.0.0", features = ["pyarrow"] } +pyo3 = { version = "0.15", features = ["extension-module", "abi3", "abi3-py38"] } +datafusion = { git="https://github.com/apache/arrow-datafusion/", rev = "583b4ab8dfe6148a7387841d112dd50b1151f6fb" } +datafusion-expr = { git="https://github.com/apache/arrow-datafusion/", rev = "583b4ab8dfe6148a7387841d112dd50b1151f6fb" } uuid = { version = "0.8", features = ["v4"] } mimalloc = { version = "*", default-features = false } sqlparser = "0.14.0" parking_lot = "0.12" +async-trait = "0.1.41" [lib] diff --git a/dask_planner/README.md b/dask_planner/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/dask_planner/dask_planner/python/__init__.py b/dask_planner/dask_planner/python/__init__.py deleted file mode 100644 index bc6c513f5..000000000 --- a/dask_planner/dask_planner/python/__init__.py +++ /dev/null @@ -1,95 +0,0 @@ -# from dask_planner import * - -# from abc import ABCMeta, abstractmethod -# from typing import List - -# try: -# import importlib.metadata as importlib_metadata -# except ImportError: -# import importlib_metadata - -# import pyarrow as pa - -# from ._internal import AggregateUDF, DataFrame, ExecutionContext, Expression, ScalarUDF - -# __version__ = importlib_metadata.version(__name__) - - -# __all__ = [ -# "DataFrame", -# "ExecutionContext", -# "Expression", -# "AggregateUDF", -# "ScalarUDF", -# "column", -# "literal", -# ] - - -# class Accumulator(metaclass=ABCMeta): -# @abstractmethod -# def state(self) -> List[pa.Scalar]: -# pass - -# @abstractmethod -# def update(self, values: pa.Array) -> None: -# pass - -# @abstractmethod -# def merge(self, states: pa.Array) -> None: -# pass - -# @abstractmethod -# def evaluate(self) -> pa.Scalar: -# pass - - -# def column(value): -# return Expression.column(value) - - -# col = column - - -# def literal(value): -# if not isinstance(value, pa.Scalar): -# value = pa.scalar(value) -# return Expression.literal(value) - - -# lit = literal - - -# def udf(func, input_types, return_type, volatility, name=None): -# """ -# Create a new User Defined Function -# """ -# if not callable(func): -# raise TypeError("`func` argument must be callable") -# if name is None: -# name = func.__qualname__ -# return ScalarUDF( -# name=name, -# func=func, -# input_types=input_types, -# return_type=return_type, -# volatility=volatility, -# ) - - -# def udaf(accum, input_type, return_type, state_type, volatility, name=None): -# """ -# Create a new User Defined Aggregate Function -# """ -# if not issubclass(accum, Accumulator): -# raise TypeError("`accum` must implement the abstract base class Accumulator") -# if name is None: -# name = accum.__qualname__ -# return AggregateUDF( -# name=name, -# accumulator=accum, -# input_type=input_type, -# return_type=return_type, -# state_type=state_type, -# volatility=volatility, -# ) diff --git a/dask_planner/src/catalog.rs b/dask_planner/src/catalog.rs deleted file mode 100644 index ed3acbed1..000000000 --- a/dask_planner/src/catalog.rs +++ /dev/null @@ -1,107 +0,0 @@ - -use std::collections::HashSet; -use std::sync::Arc; - -use pyo3::exceptions::PyKeyError; -use pyo3::prelude::*; - -use datafusion::{ - arrow::pyarrow::PyArrowConvert, - catalog::{catalog::CatalogProvider, schema::SchemaProvider}, - datasource::{TableProvider, TableType}, -}; - -#[pyclass(name = "Catalog", module = "datafusion", subclass)] -pub(crate) struct PyCatalog { - catalog: Arc, -} - -#[pyclass(name = "Database", module = "datafusion", subclass)] -pub(crate) struct PyDatabase { - database: Arc, -} - -#[pyclass(name = "Table", module = "datafusion", subclass)] -pub(crate) struct PyTable { - table: Arc, -} - -impl PyCatalog { - pub fn new(catalog: Arc) -> Self { - Self { catalog } - } -} - -impl PyDatabase { - pub fn new(database: Arc) -> Self { - Self { database } - } -} - -impl PyTable { - pub fn new(table: Arc) -> Self { - Self { table } - } -} - -#[pymethods] -impl PyCatalog { - fn names(&self) -> Vec { - self.catalog.schema_names() - } - - #[args(name = "\"public\"")] - fn database(&self, name: &str) -> PyResult { - match self.catalog.schema(name) { - Some(database) => Ok(PyDatabase::new(database)), - None => Err(PyKeyError::new_err(format!( - "Database with name {} doesn't exist.", - name - ))), - } - } -} - -#[pymethods] -impl PyDatabase { - fn names(&self) -> HashSet { - self.database.table_names().into_iter().collect() - } - - fn table(&self, name: &str) -> PyResult { - match self.database.table(name) { - Some(table) => Ok(PyTable::new(table)), - None => Err(PyKeyError::new_err(format!( - "Table with name {} doesn't exist.", - name - ))), - } - } - - // register_table - // deregister_table -} - -#[pymethods] -impl PyTable { - /// Get a reference to the schema for this table - #[getter] - fn schema(&self, py: Python) -> PyResult { - self.table.schema().to_pyarrow(py) - } - - /// Get the type of this table for metadata/catalog purposes. - #[getter] - fn kind(&self) -> &str { - match self.table.table_type() { - TableType::Base => "physical", - TableType::View => "view", - TableType::Temporary => "temporary", - } - } - - // fn scan - // fn statistics - // fn has_exact_statistics - // fn supports_filter_pushdown -} diff --git a/dask_planner/src/errors.rs b/dask_planner/src/errors.rs deleted file mode 100644 index 18c475f11..000000000 --- a/dask_planner/src/errors.rs +++ /dev/null @@ -1,41 +0,0 @@ - -use core::fmt; - -use datafusion::arrow::error::ArrowError; -use datafusion::error::DataFusionError as InnerDataFusionError; -use pyo3::{exceptions::PyException, PyErr}; - -#[derive(Debug)] -pub enum DataFusionError { - ExecutionError(InnerDataFusionError), - ArrowError(ArrowError), - Common(String), -} - -impl fmt::Display for DataFusionError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - DataFusionError::ExecutionError(e) => write!(f, "DataFusion error: {:?}", e), - DataFusionError::ArrowError(e) => write!(f, "Arrow error: {:?}", e), - DataFusionError::Common(e) => write!(f, "{}", e), - } - } -} - -impl From for DataFusionError { - fn from(err: ArrowError) -> DataFusionError { - DataFusionError::ArrowError(err) - } -} - -impl From for DataFusionError { - fn from(err: InnerDataFusionError) -> DataFusionError { - DataFusionError::ExecutionError(err) - } -} - -impl From for PyErr { - fn from(err: DataFusionError) -> PyErr { - PyException::new_err(err.to_string()) - } -} diff --git a/dask_planner/src/expression.rs b/dask_planner/src/expression.rs index f72455b3b..da10e7230 100644 --- a/dask_planner/src/expression.rs +++ b/dask_planner/src/expression.rs @@ -1,3 +1,5 @@ +use crate::sql::logical; +use crate::sql::types::PyDataType; use pyo3::PyMappingProtocol; use pyo3::{basic::CompareOp, prelude::*, PyNumberProtocol, PyObjectProtocol}; @@ -8,6 +10,10 @@ use datafusion::logical_plan::{col, lit, Expr}; use datafusion::scalar::ScalarValue; +pub use datafusion::logical_plan::plan::LogicalPlan; + +use datafusion::logical_expr::BuiltinScalarFunction; + /// An PyExpr that can be used on a DataFrame #[pyclass(name = "Expression", module = "datafusion", subclass)] #[derive(Debug, Clone)] @@ -27,6 +33,24 @@ impl From for PyExpr { } } +#[pyclass(name = "ScalarValue", module = "datafusion", subclass)] +#[derive(Debug, Clone)] +pub struct PyScalarValue { + pub scalar_value: ScalarValue, +} + +impl From for ScalarValue { + fn from(pyscalar: PyScalarValue) -> ScalarValue { + pyscalar.scalar_value + } +} + +impl From for PyScalarValue { + fn from(scalar_value: ScalarValue) -> PyScalarValue { + PyScalarValue { scalar_value } + } +} + #[pyproto] impl PyNumberProtocol for PyExpr { fn __add__(lhs: PyExpr, rhs: PyExpr) -> PyResult { @@ -84,84 +108,232 @@ impl PyObjectProtocol for PyExpr { #[pymethods] impl PyExpr { #[staticmethod] - pub fn literal(value: ScalarValue) -> PyExpr { - lit(value).into() + pub fn literal(value: PyScalarValue) -> PyExpr { + lit(value.scalar_value).into() } - /// Examine the current/"self" PyExpr and return its "type" /// In this context a "type" is what Dask-SQL Python /// RexConverter plugin instance should be invoked to handle /// the Rex conversion pub fn get_expr_type(&self) -> String { - match &self.expr { - Expr::Alias(..) => String::from("Alias"), - Expr::Column(..) => String::from("Column"), + String::from(match &self.expr { + Expr::Alias(..) => "Alias", + Expr::Column(..) => "Column", Expr::ScalarVariable(..) => panic!("ScalarVariable!!!"), - Expr::Literal(..) => panic!("Literal!!!"), - Expr::BinaryExpr {..} => String::from("BinaryExpr"), + Expr::Literal(..) => "Literal", + Expr::BinaryExpr { .. } => "BinaryExpr", Expr::Not(..) => panic!("Not!!!"), Expr::IsNotNull(..) => panic!("IsNotNull!!!"), Expr::Negative(..) => panic!("Negative!!!"), - Expr::GetIndexedField{..} => panic!("GetIndexedField!!!"), + Expr::GetIndexedField { .. } => panic!("GetIndexedField!!!"), Expr::IsNull(..) => panic!("IsNull!!!"), - Expr::Between{..} => panic!("Between!!!"), - Expr::Case{..} => panic!("Case!!!"), - Expr::Cast{..} => panic!("Cast!!!"), - Expr::TryCast{..} => panic!("TryCast!!!"), - Expr::Sort{..} => panic!("Sort!!!"), - Expr::ScalarFunction{..} => panic!("ScalarFunction!!!"), - Expr::AggregateFunction{..} => panic!("AggregateFunction!!!"), - Expr::WindowFunction{..} => panic!("WindowFunction!!!"), - Expr::AggregateUDF{..} => panic!("AggregateUDF!!!"), - Expr::InList{..} => panic!("InList!!!"), + Expr::Between { .. } => panic!("Between!!!"), + Expr::Case { .. } => panic!("Case!!!"), + Expr::Cast { .. } => "Cast", + Expr::TryCast { .. } => panic!("TryCast!!!"), + Expr::Sort { .. } => panic!("Sort!!!"), + Expr::ScalarFunction { .. } => "ScalarFunction", + Expr::AggregateFunction { .. } => "AggregateFunction", + Expr::WindowFunction { .. } => panic!("WindowFunction!!!"), + Expr::AggregateUDF { .. } => panic!("AggregateUDF!!!"), + Expr::InList { .. } => panic!("InList!!!"), Expr::Wildcard => panic!("Wildcard!!!"), - _ => String::from("OTHER") - } + _ => "OTHER", + }) } - pub fn column_name(&self) -> String { + pub fn column_name(&self, mut plan: logical::PyLogicalPlan) -> String { match &self.expr { - Expr::Alias(exprs, name) => { - println!("Expressions: {:?}, Expression Name: {:?}", exprs, name); - panic!("Alias") - }, - Expr::Column(column) => { column.name.clone() }, - Expr::ScalarVariable(..) => panic!("ScalarVariable!!!"), - Expr::Literal(..) => panic!("Literal!!!"), - Expr::BinaryExpr {..} => panic!("BinaryExpr"), - Expr::Not(..) => panic!("Not!!!"), - Expr::IsNotNull(..) => panic!("IsNotNull!!!"), - Expr::Negative(..) => panic!("Negative!!!"), - Expr::GetIndexedField{..} => panic!("GetIndexedField!!!"), - Expr::IsNull(..) => panic!("IsNull!!!"), - Expr::Between{..} => panic!("Between!!!"), - Expr::Case{..} => panic!("Case!!!"), - Expr::Cast{..} => panic!("Cast!!!"), - Expr::TryCast{..} => panic!("TryCast!!!"), - Expr::Sort{..} => panic!("Sort!!!"), - Expr::ScalarFunction{..} => panic!("ScalarFunction!!!"), - Expr::AggregateFunction{..} => panic!("AggregateFunction!!!"), - Expr::WindowFunction{..} => panic!("WindowFunction!!!"), - Expr::AggregateUDF{..} => panic!("AggregateUDF!!!"), - Expr::InList{..} => panic!("InList!!!"), - Expr::Wildcard => panic!("Wildcard!!!"), - _ => panic!("Nothing found!!!") + Expr::Alias(expr, name) => { + println!("Alias encountered with name: {:?}", name); + + // Only certain LogicalPlan variants are valid in this nested Alias scenario so we + // extract the valid ones and error on the invalid ones + match expr.as_ref() { + Expr::Column(col) => { + // First we must iterate the current node before getting its input + match plan.current_node() { + LogicalPlan::Projection(proj) => match proj.input.as_ref() { + LogicalPlan::Aggregate(agg) => { + let mut exprs = agg.group_expr.clone(); + exprs.extend_from_slice(&agg.aggr_expr); + match &exprs[plan.get_index(col)] { + Expr::AggregateFunction { args, .. } => match &args[0] { + Expr::Column(col) => { + println!("AGGREGATE COLUMN IS {}", col.name); + col.name.clone() + } + _ => name.clone(), + }, + _ => name.clone(), + } + } + _ => name.clone(), + }, + _ => name.clone(), + } + } + _ => name.clone(), + } + } + Expr::Column(column) => column.name.clone(), + Expr::ScalarVariable(..) => unimplemented!("ScalarVariable!!!"), + Expr::Literal(..) => unimplemented!("Literal!!!"), + Expr::BinaryExpr { + left: _, + op: _, + right: _, + } => { + // /// TODO: Examine this more deeply about whether name comes from the left or right + // self.column_name(left) + unimplemented!("BinaryExpr HERE!!!") + } + Expr::Not(..) => unimplemented!("Not!!!"), + Expr::IsNotNull(..) => unimplemented!("IsNotNull!!!"), + Expr::Negative(..) => unimplemented!("Negative!!!"), + Expr::GetIndexedField { .. } => unimplemented!("GetIndexedField!!!"), + Expr::IsNull(..) => unimplemented!("IsNull!!!"), + Expr::Between { .. } => unimplemented!("Between!!!"), + Expr::Case { .. } => unimplemented!("Case!!!"), + Expr::Cast { .. } => unimplemented!("Cast!!!"), + Expr::TryCast { .. } => unimplemented!("TryCast!!!"), + Expr::Sort { .. } => unimplemented!("Sort!!!"), + Expr::ScalarFunction { .. } => unimplemented!("ScalarFunction!!!"), + Expr::AggregateFunction { .. } => unimplemented!("AggregateFunction!!!"), + Expr::WindowFunction { .. } => unimplemented!("WindowFunction!!!"), + Expr::AggregateUDF { .. } => unimplemented!("AggregateUDF!!!"), + Expr::InList { .. } => unimplemented!("InList!!!"), + Expr::Wildcard => unimplemented!("Wildcard!!!"), + _ => panic!("Nothing found!!!"), } } - /// Gets the operands for a BinaryExpr call - pub fn getOperands(&self) -> PyResult> { + #[pyo3(name = "getOperands")] + pub fn get_operands(&self) -> PyResult> { match &self.expr { - Expr::BinaryExpr {left, op, right} => { - let operands: Vec = Vec::new(); + Expr::BinaryExpr { left, op: _, right } => { + let mut operands: Vec = Vec::new(); + let left_desc: Expr = *left.clone(); + operands.push(left_desc.into()); + let right_desc: Expr = *right.clone(); + operands.push(right_desc.into()); Ok(operands) - }, - _ => Err(PyErr::new::("current_node is not of type Projection")) + } + Expr::ScalarFunction { fun: _, args } => { + let mut operands: Vec = Vec::new(); + for arg in args { + operands.push(arg.clone().into()); + } + Ok(operands) + } + Expr::Cast { expr, data_type: _ } => { + let mut operands: Vec = Vec::new(); + let ex: Expr = *expr.clone(); + operands.push(ex.into()); + Ok(operands) + } + _ => Err(PyErr::new::( + "unknown Expr type encountered", + )), } } + #[pyo3(name = "getOperatorName")] + pub fn get_operator_name(&self) -> PyResult { + match &self.expr { + Expr::BinaryExpr { + left: _, + op, + right: _, + } => Ok(format!("{}", op)), + Expr::ScalarFunction { fun, args: _ } => Ok(format!("{}", fun)), + Expr::Cast { + expr: _, + data_type: _, + } => Ok(String::from("cast")), + _ => Err(PyErr::new::( + "Catch all triggered ....", + )), + } + } + + /// Gets the ScalarValue represented by the Expression + #[pyo3(name = "getType")] + pub fn get_type(&self) -> PyResult { + match &self.expr { + Expr::ScalarVariable(..) => panic!("ScalarVariable!!!"), + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Boolean(_value) => Ok(String::from("Boolean")), + ScalarValue::Float32(_value) => Ok(String::from("Float32")), + ScalarValue::Float64(_value) => Ok(String::from("Float64")), + ScalarValue::Decimal128(_value, ..) => Ok(String::from("Decimal128")), + ScalarValue::Int8(_value) => Ok(String::from("Int8")), + ScalarValue::Int16(_value) => Ok(String::from("Int16")), + ScalarValue::Int32(_value) => Ok(String::from("Int32")), + ScalarValue::Int64(_value) => Ok(String::from("Int64")), + ScalarValue::UInt8(_value) => Ok(String::from("UInt8")), + ScalarValue::UInt16(_value) => Ok(String::from("UInt16")), + ScalarValue::UInt32(_value) => Ok(String::from("UInt32")), + ScalarValue::UInt64(_value) => Ok(String::from("UInt64")), + ScalarValue::Utf8(_value) => Ok(String::from("Utf8")), + ScalarValue::LargeUtf8(_value) => Ok(String::from("LargeUtf8")), + ScalarValue::Binary(_value) => Ok(String::from("Binary")), + ScalarValue::LargeBinary(_value) => Ok(String::from("LargeBinary")), + ScalarValue::Date32(_value) => Ok(String::from("Date32")), + ScalarValue::Date64(_value) => Ok(String::from("Date64")), + _ => { + panic!("CatchAll") + } + }, + Expr::ScalarFunction { fun, args: _ } => match fun { + BuiltinScalarFunction::Abs => Ok(String::from("Abs")), + BuiltinScalarFunction::DatePart => Ok(String::from("DatePart")), + _ => { + panic!("fire here for scalar function") + } + }, + Expr::Cast { expr: _, data_type } => match data_type { + DataType::Null => Ok(String::from("NULL")), + DataType::Boolean => Ok(String::from("BOOLEAN")), + DataType::Int8 => Ok(String::from("TINYINT")), + DataType::UInt8 => Ok(String::from("TINYINT")), + DataType::Int16 => Ok(String::from("SMALLINT")), + DataType::UInt16 => Ok(String::from("SMALLINT")), + DataType::Int32 => Ok(String::from("INTEGER")), + DataType::UInt32 => Ok(String::from("INTEGER")), + DataType::Int64 => Ok(String::from("BIGINT")), + DataType::UInt64 => Ok(String::from("BIGINT")), + DataType::Float32 => Ok(String::from("FLOAT")), + DataType::Float64 => Ok(String::from("DOUBLE")), + DataType::Timestamp { .. } => Ok(String::from("TIMESTAMP")), + DataType::Date32 => Ok(String::from("DATE")), + DataType::Date64 => Ok(String::from("DATE")), + DataType::Time32(..) => Ok(String::from("TIME32")), + DataType::Time64(..) => Ok(String::from("TIME64")), + DataType::Duration(..) => Ok(String::from("DURATION")), + DataType::Interval(..) => Ok(String::from("INTERVAL")), + DataType::Binary => Ok(String::from("BINARY")), + DataType::FixedSizeBinary(..) => Ok(String::from("FIXEDSIZEBINARY")), + DataType::LargeBinary => Ok(String::from("LARGEBINARY")), + DataType::Utf8 => Ok(String::from("VARCHAR")), + DataType::LargeUtf8 => Ok(String::from("BIGVARCHAR")), + DataType::List(..) => Ok(String::from("LIST")), + DataType::FixedSizeList(..) => Ok(String::from("FIXEDSIZELIST")), + DataType::LargeList(..) => Ok(String::from("LARGELIST")), + DataType::Struct(..) => Ok(String::from("STRUCT")), + DataType::Union(..) => Ok(String::from("UNION")), + DataType::Dictionary(..) => Ok(String::from("DICTIONARY")), + DataType::Decimal(..) => Ok(String::from("DECIMAL")), + DataType::Map(..) => Ok(String::from("MAP")), + _ => { + panic!("This is not yet implemented!!!") + } + }, + _ => panic!("OTHER"), + } + } #[staticmethod] pub fn column(value: &str) -> PyExpr { @@ -183,17 +355,228 @@ impl PyExpr { self.expr.clone().is_null().into() } - pub fn cast(&self, to: DataType) -> PyExpr { + pub fn cast(&self, to: PyDataType) -> PyExpr { // self.expr.cast_to() requires DFSchema to validate that the cast // is supported, omit that for now let expr = Expr::Cast { expr: Box::new(self.expr.clone()), - data_type: to, + data_type: to.data_type, }; expr.into() } + + /// TODO: I can't express how much I dislike explicity listing all of these methods out + /// but PyO3 makes it necessary since its annotations cannot be used in trait impl blocks + #[pyo3(name = "getFloat32Value")] + pub fn float_32_value(&mut self) -> f32 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Float32(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getFloat64Value")] + pub fn float_64_value(&mut self) -> f64 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Float64(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getInt8Value")] + pub fn int_8_value(&mut self) -> i8 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Int8(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getInt16Value")] + pub fn int_16_value(&mut self) -> i16 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Int16(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getInt32Value")] + pub fn int_32_value(&mut self) -> i32 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Int32(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getInt64Value")] + pub fn int_64_value(&mut self) -> i64 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Int64(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getUInt8Value")] + pub fn uint_8_value(&mut self) -> u8 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::UInt8(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getUInt16Value")] + pub fn uint_16_value(&mut self) -> u16 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::UInt16(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getUInt32Value")] + pub fn uint_32_value(&mut self) -> u32 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::UInt32(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getUInt64Value")] + pub fn uint_64_value(&mut self) -> u64 { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::UInt64(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getBoolValue")] + pub fn bool_value(&mut self) -> bool { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Boolean(iv) => iv.unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } + + #[pyo3(name = "getStringValue")] + pub fn string_value(&mut self) -> String { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Utf8(iv) => iv.clone().unwrap(), + _ => { + panic!("getValue() - Unexpected value") + } + }, + _ => panic!("getValue() - Non literal value encountered"), + } + } } +// pub trait ObtainValue { +// fn getValue(&mut self) -> T; +// } + +// /// Expansion macro to get all typed values from a DataFusion Expr +// macro_rules! get_typed_value { +// ($t:ty, $func_name:ident) => { +// impl ObtainValue<$t> for PyExpr { +// #[inline] +// fn getValue(&mut self) -> $t +// { +// match &self.expr { +// Expr::Literal(scalar_value) => { +// match scalar_value { +// ScalarValue::$func_name(iv) => { +// iv.unwrap() +// }, +// _ => { +// panic!("getValue() - Unexpected value") +// } +// } +// }, +// _ => panic!("getValue() - Non literal value encountered") +// } +// } +// } +// } +// } + +// get_typed_value!(u8, UInt8); +// get_typed_value!(u16, UInt16); +// get_typed_value!(u32, UInt32); +// get_typed_value!(u64, UInt64); +// get_typed_value!(i8, Int8); +// get_typed_value!(i16, Int16); +// get_typed_value!(i32, Int32); +// get_typed_value!(i64, Int64); +// get_typed_value!(bool, Boolean); +// get_typed_value!(f32, Float32); +// get_typed_value!(f64, Float64); + +// get_typed_value!(for usize u8 u16 u32 u64 isize i8 i16 i32 i64 bool f32 f64); +// get_typed_value!(usize, Integer); +// get_typed_value!(isize, ); +// Decimal128(Option, usize, usize), +// Utf8(Option), +// LargeUtf8(Option), +// Binary(Option>), +// LargeBinary(Option>), +// List(Option, Global>>, Box), +// Date32(Option), +// Date64(Option), + #[pyproto] impl PyMappingProtocol for PyExpr { fn __getitem__(&self, key: &str) -> PyResult { diff --git a/dask_planner/src/functions.rs b/dask_planner/src/functions.rs deleted file mode 100644 index 0c8784dca..000000000 --- a/dask_planner/src/functions.rs +++ /dev/null @@ -1,322 +0,0 @@ - -use pyo3::{prelude::*, wrap_pyfunction}; - -use datafusion::logical_plan; - -use datafusion::physical_plan::{aggregates::AggregateFunction, functions::BuiltinScalarFunction}; - -use crate::errors; -use crate::expression::PyExpr; - -#[pyfunction] -fn array(value: Vec) -> PyExpr { - PyExpr { - expr: logical_plan::array(value.into_iter().map(|x| x.expr).collect::>()), - } -} - -#[pyfunction] -fn in_list(expr: PyExpr, value: Vec, negated: bool) -> PyExpr { - logical_plan::in_list( - expr.expr, - value.into_iter().map(|x| x.expr).collect::>(), - negated, - ) - .into() -} - -/// Current date and time -#[pyfunction] -fn now() -> PyExpr { - PyExpr { - // here lit(0) is a stub for conform to arity - expr: logical_plan::now(logical_plan::lit(0)), - } -} - -/// Returns a random value in the range 0.0 <= x < 1.0 -#[pyfunction] -fn random() -> PyExpr { - PyExpr { - expr: logical_plan::random(), - } -} - -/// Computes a binary hash of the given data. type is the algorithm to use. -/// Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, blake2b, and blake3. -#[pyfunction(value, method)] -fn digest(value: PyExpr, method: PyExpr) -> PyExpr { - PyExpr { - expr: logical_plan::digest(value.expr, method.expr), - } -} - -/// Concatenates the text representations of all the arguments. -/// NULL arguments are ignored. -#[pyfunction(args = "*")] -fn concat(args: Vec) -> PyResult { - let args = args.into_iter().map(|e| e.expr).collect::>(); - Ok(logical_plan::concat(&args).into()) -} - -/// Concatenates all but the first argument, with separators. -/// The first argument is used as the separator string, and should not be NULL. -/// Other NULL arguments are ignored. -#[pyfunction(sep, args = "*")] -fn concat_ws(sep: String, args: Vec) -> PyResult { - let args = args.into_iter().map(|e| e.expr).collect::>(); - Ok(logical_plan::concat_ws(sep, &args).into()) -} - -/// Creates a new Sort expression -#[pyfunction] -fn order_by(expr: PyExpr, asc: Option, nulls_first: Option) -> PyResult { - Ok(PyExpr { - expr: datafusion::logical_plan::Expr::Sort { - expr: Box::new(expr.expr), - asc: asc.unwrap_or(true), - nulls_first: nulls_first.unwrap_or(true), - }, - }) -} - -/// Creates a new Alias expression -#[pyfunction] -fn alias(expr: PyExpr, name: &str) -> PyResult { - Ok(PyExpr { - expr: datafusion::logical_plan::Expr::Alias(Box::new(expr.expr), String::from(name)), - }) -} - -/// Creates a new Window function expression -#[pyfunction] -fn window( - name: &str, - args: Vec, - partition_by: Option>, - order_by: Option>, -) -> PyResult { - use std::str::FromStr; - let fun = datafusion::physical_plan::window_functions::WindowFunction::from_str(name) - .map_err(|e| -> errors::DataFusionError { e.into() })?; - Ok(PyExpr { - expr: datafusion::logical_plan::Expr::WindowFunction { - fun, - args: args.into_iter().map(|x| x.expr).collect::>(), - partition_by: partition_by - .unwrap_or_default() - .into_iter() - .map(|x| x.expr) - .collect::>(), - order_by: order_by - .unwrap_or_default() - .into_iter() - .map(|x| x.expr) - .collect::>(), - window_frame: None, - }, - }) -} - -macro_rules! scalar_function { - ($NAME: ident, $FUNC: ident) => { - scalar_function!($NAME, $FUNC, stringify!($NAME)); - }; - ($NAME: ident, $FUNC: ident, $DOC: expr) => { - #[doc = $DOC] - #[pyfunction(args = "*")] - fn $NAME(args: Vec) -> PyExpr { - let expr = logical_plan::Expr::ScalarFunction { - fun: BuiltinScalarFunction::$FUNC, - args: args.into_iter().map(|e| e.into()).collect(), - }; - expr.into() - } - }; -} - -macro_rules! aggregate_function { - ($NAME: ident, $FUNC: ident) => { - aggregate_function!($NAME, $FUNC, stringify!($NAME)); - }; - ($NAME: ident, $FUNC: ident, $DOC: expr) => { - #[doc = $DOC] - #[pyfunction(args = "*", distinct = "false")] - fn $NAME(args: Vec, distinct: bool) -> PyExpr { - let expr = logical_plan::Expr::AggregateFunction { - fun: AggregateFunction::$FUNC, - args: args.into_iter().map(|e| e.into()).collect(), - distinct, - }; - expr.into() - } - }; -} - -scalar_function!(abs, Abs); -scalar_function!(acos, Acos); -scalar_function!(ascii, Ascii, "Returns the numeric code of the first character of the argument. In UTF8 encoding, returns the Unicode code point of the character. In other multibyte encodings, the argument must be an ASCII character."); -scalar_function!(asin, Asin); -scalar_function!(atan, Atan); -scalar_function!( - bit_length, - BitLength, - "Returns number of bits in the string (8 times the octet_length)." -); -scalar_function!(btrim, Btrim, "Removes the longest string containing only characters in characters (a space by default) from the start and end of string."); -scalar_function!(ceil, Ceil); -scalar_function!( - character_length, - CharacterLength, - "Returns number of characters in the string." -); -scalar_function!(chr, Chr, "Returns the character with the given code."); -scalar_function!(cos, Cos); -scalar_function!(exp, Exp); -scalar_function!(floor, Floor); -scalar_function!(initcap, InitCap, "Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters."); -scalar_function!(left, Left, "Returns first n characters in the string, or when n is negative, returns all but last |n| characters."); -scalar_function!(ln, Ln); -scalar_function!(log10, Log10); -scalar_function!(log2, Log2); -scalar_function!(lower, Lower, "Converts the string to all lower case"); -scalar_function!(lpad, Lpad, "Extends the string to length length by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right)."); -scalar_function!(ltrim, Ltrim, "Removes the longest string containing only characters in characters (a space by default) from the start of string."); -scalar_function!( - md5, - MD5, - "Computes the MD5 hash of the argument, with the result written in hexadecimal." -); -scalar_function!(octet_length, OctetLength, "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces."); -scalar_function!(regexp_match, RegexpMatch); -scalar_function!( - regexp_replace, - RegexpReplace, - "Replaces substring(s) matching a POSIX regular expression" -); -scalar_function!( - repeat, - Repeat, - "Repeats string the specified number of times." -); -scalar_function!( - replace, - Replace, - "Replaces all occurrences in string of substring from with substring to." -); -scalar_function!( - reverse, - Reverse, - "Reverses the order of the characters in the string." -); -scalar_function!(right, Right, "Returns last n characters in the string, or when n is negative, returns all but first |n| characters."); -scalar_function!(round, Round); -scalar_function!(rpad, Rpad, "Extends the string to length length by appending the characters fill (a space by default). If the string is already longer than length then it is truncated."); -scalar_function!(rtrim, Rtrim, "Removes the longest string containing only characters in characters (a space by default) from the end of string."); -scalar_function!(sha224, SHA224); -scalar_function!(sha256, SHA256); -scalar_function!(sha384, SHA384); -scalar_function!(sha512, SHA512); -scalar_function!(signum, Signum); -scalar_function!(sin, Sin); -scalar_function!( - split_part, - SplitPart, - "Splits string at occurrences of delimiter and returns the n'th field (counting from one)." -); -scalar_function!(sqrt, Sqrt); -scalar_function!( - starts_with, - StartsWith, - "Returns true if string starts with prefix." -); -scalar_function!(strpos, Strpos, "Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)"); -scalar_function!(substr, Substr); -scalar_function!(tan, Tan); -scalar_function!( - to_hex, - ToHex, - "Converts the number to its equivalent hexadecimal representation." -); -scalar_function!(to_timestamp, ToTimestamp); -scalar_function!(translate, Translate, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted."); -scalar_function!(trim, Trim, "Removes the longest string containing only characters in characters (a space by default) from the start, end, or both ends (BOTH is the default) of string."); -scalar_function!(trunc, Trunc); -scalar_function!(upper, Upper, "Converts the string to all upper case."); - -aggregate_function!(avg, Avg); -aggregate_function!(count, Count); -aggregate_function!(max, Max); -aggregate_function!(min, Min); -aggregate_function!(sum, Sum); -aggregate_function!(approx_distinct, ApproxDistinct); - -pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { - m.add_wrapped(wrap_pyfunction!(abs))?; - m.add_wrapped(wrap_pyfunction!(acos))?; - m.add_wrapped(wrap_pyfunction!(approx_distinct))?; - m.add_wrapped(wrap_pyfunction!(alias))?; - m.add_wrapped(wrap_pyfunction!(array))?; - m.add_wrapped(wrap_pyfunction!(ascii))?; - m.add_wrapped(wrap_pyfunction!(asin))?; - m.add_wrapped(wrap_pyfunction!(atan))?; - m.add_wrapped(wrap_pyfunction!(avg))?; - m.add_wrapped(wrap_pyfunction!(bit_length))?; - m.add_wrapped(wrap_pyfunction!(btrim))?; - m.add_wrapped(wrap_pyfunction!(ceil))?; - m.add_wrapped(wrap_pyfunction!(character_length))?; - m.add_wrapped(wrap_pyfunction!(chr))?; - m.add_wrapped(wrap_pyfunction!(concat_ws))?; - m.add_wrapped(wrap_pyfunction!(concat))?; - m.add_wrapped(wrap_pyfunction!(cos))?; - m.add_wrapped(wrap_pyfunction!(count))?; - m.add_wrapped(wrap_pyfunction!(digest))?; - m.add_wrapped(wrap_pyfunction!(exp))?; - m.add_wrapped(wrap_pyfunction!(floor))?; - m.add_wrapped(wrap_pyfunction!(in_list))?; - m.add_wrapped(wrap_pyfunction!(initcap))?; - m.add_wrapped(wrap_pyfunction!(left))?; - m.add_wrapped(wrap_pyfunction!(ln))?; - m.add_wrapped(wrap_pyfunction!(log10))?; - m.add_wrapped(wrap_pyfunction!(log2))?; - m.add_wrapped(wrap_pyfunction!(lower))?; - m.add_wrapped(wrap_pyfunction!(lpad))?; - m.add_wrapped(wrap_pyfunction!(ltrim))?; - m.add_wrapped(wrap_pyfunction!(max))?; - m.add_wrapped(wrap_pyfunction!(md5))?; - m.add_wrapped(wrap_pyfunction!(min))?; - m.add_wrapped(wrap_pyfunction!(now))?; - m.add_wrapped(wrap_pyfunction!(octet_length))?; - m.add_wrapped(wrap_pyfunction!(order_by))?; - m.add_wrapped(wrap_pyfunction!(random))?; - m.add_wrapped(wrap_pyfunction!(regexp_match))?; - m.add_wrapped(wrap_pyfunction!(regexp_replace))?; - m.add_wrapped(wrap_pyfunction!(repeat))?; - m.add_wrapped(wrap_pyfunction!(replace))?; - m.add_wrapped(wrap_pyfunction!(reverse))?; - m.add_wrapped(wrap_pyfunction!(right))?; - m.add_wrapped(wrap_pyfunction!(round))?; - m.add_wrapped(wrap_pyfunction!(rpad))?; - m.add_wrapped(wrap_pyfunction!(rtrim))?; - m.add_wrapped(wrap_pyfunction!(sha224))?; - m.add_wrapped(wrap_pyfunction!(sha256))?; - m.add_wrapped(wrap_pyfunction!(sha384))?; - m.add_wrapped(wrap_pyfunction!(sha512))?; - m.add_wrapped(wrap_pyfunction!(signum))?; - m.add_wrapped(wrap_pyfunction!(sin))?; - m.add_wrapped(wrap_pyfunction!(split_part))?; - m.add_wrapped(wrap_pyfunction!(sqrt))?; - m.add_wrapped(wrap_pyfunction!(starts_with))?; - m.add_wrapped(wrap_pyfunction!(strpos))?; - m.add_wrapped(wrap_pyfunction!(substr))?; - m.add_wrapped(wrap_pyfunction!(sum))?; - m.add_wrapped(wrap_pyfunction!(tan))?; - m.add_wrapped(wrap_pyfunction!(to_hex))?; - m.add_wrapped(wrap_pyfunction!(to_timestamp))?; - m.add_wrapped(wrap_pyfunction!(translate))?; - m.add_wrapped(wrap_pyfunction!(trim))?; - m.add_wrapped(wrap_pyfunction!(trunc))?; - m.add_wrapped(wrap_pyfunction!(upper))?; - m.add_wrapped(wrap_pyfunction!(window))?; - Ok(()) -} diff --git a/dask_planner/src/lib.rs b/dask_planner/src/lib.rs index 1bc79a0e3..df9343151 100644 --- a/dask_planner/src/lib.rs +++ b/dask_planner/src/lib.rs @@ -1,10 +1,7 @@ use mimalloc::MiMalloc; use pyo3::prelude::*; -mod catalog; -mod errors; mod expression; -mod functions; mod sql; #[global_allocator] @@ -15,26 +12,18 @@ static GLOBAL: MiMalloc = MiMalloc; /// The higher-level public API is defined in pure python files under the /// dask_planner directory. #[pymodule] +#[pyo3(name = "rust")] fn rust(_py: Python, m: &PyModule) -> PyResult<()> { // Register the python classes - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; m.add_class::()?; - - // SQL specific classes m.add_class::()?; - m.add_class::()?; - - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/dask_planner/src/sql.rs b/dask_planner/src/sql.rs index 8509c612d..11d37df72 100644 --- a/dask_planner/src/sql.rs +++ b/dask_planner/src/sql.rs @@ -1,126 +1,26 @@ - -use std::collections::HashMap; - -use pyo3::prelude::*; - -use datafusion::sql::parser::{DFParser, Statement}; -use sqlparser::ast::{Query, Select}; - -use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; - +pub mod column; +pub mod function; +pub mod logical; +pub mod schema; +pub mod statement; +pub mod table; +pub mod types; + +use datafusion::arrow::datatypes::{Field, Schema}; use datafusion::catalog::TableReference; -use datafusion::datasource::TableProvider; -use datafusion::sql::planner::{SqlToRel}; -use datafusion::logical_plan::plan::{ - LogicalPlan, - Projection, - Filter, - Window, - Aggregate, - Sort, - Join, - CrossJoin, - Repartition, - Union, - TableScan, - EmptyRelation, - Limit, - CreateExternalTable, - CreateMemoryTable, - DropTable, - Values, - Explain, - Analyze, - Extension -}; +use datafusion::error::DataFusionError; +use datafusion::physical_plan::udaf::AggregateUDF; +use datafusion::physical_plan::udf::ScalarUDF; +use datafusion::sql::parser::DFParser; +use datafusion::sql::planner::{ContextProvider, SqlToRel}; +use datafusion_expr::ScalarFunctionImplementation; +use std::collections::HashMap; use std::sync::Arc; -use crate::expression::PyExpr; - - -#[pyclass(name = "LogicalPlanGenerator", module = "dask_planner", subclass)] -#[derive(Clone)] -pub struct LogicalPlanGenerator { - // Holds the ordered plan steps - #[pyo3(get)] - pub plan_steps: Vec, - pub projection: Option, - pub table_scan: Option, -} - -impl Default for LogicalPlanGenerator { - fn default() -> LogicalPlanGenerator { - LogicalPlanGenerator { - plan_steps: Vec::new(), - projection: None, - table_scan: None, - } - } -} - -#[pymethods] -impl LogicalPlanGenerator { - - pub fn get_named_projects(&self) -> Vec { - match &self.projection { - Some(proj) => { - let mut exprs:Vec = Vec::new(); - for expr in &proj.expr { - exprs.push(expr.clone().into()); - } - exprs - }, - None => panic!("There is no Projection node present in the Logical Plan!") - } - } -} - -impl datafusion::logical_plan::plan::PlanVisitor for LogicalPlanGenerator { - type Error = String; - - fn pre_visit( - &mut self, - _plan: &LogicalPlan, - ) -> std::result::Result { - Ok(true) - } - - /// By inserting in `post_visit` we effectively create a depth first traversal of the SQL parsed tree - fn post_visit( - &mut self, - plan: &LogicalPlan, - ) -> std::result::Result { - let s = match plan { - LogicalPlan::Projection(projection) => { self.projection = Some(projection.clone()); "Projection" }, - LogicalPlan::Filter { .. } => "Filter", - LogicalPlan::Window { .. } => "Window", - LogicalPlan::Aggregate { .. } => "Aggregate", - LogicalPlan::Sort { .. } => "Sort", - LogicalPlan::Join { .. } => "Join", - LogicalPlan::CrossJoin { .. } => "CrossJoin", - LogicalPlan::Repartition { .. } => "Repartition", - LogicalPlan::Union { .. } => "Union", - LogicalPlan::TableScan(table_scan) => { self.table_scan = Some(table_scan.clone()); "TableScan" }, - LogicalPlan::EmptyRelation { .. } => "EmptyRelation", - LogicalPlan::Limit { .. } => "Limit", - LogicalPlan::CreateExternalTable { .. } => "CreateExternalTable", - LogicalPlan::CreateMemoryTable { .. } => "CreateMemoryTable", - LogicalPlan::DropTable { .. } => "DropTable", - LogicalPlan::Values { .. } => "Values", - LogicalPlan::Explain { .. } => "Explain", - LogicalPlan::Analyze { .. } => "Analyze", - LogicalPlan::Extension { .. } => "Extension", - }; - - self.plan_steps.push(s.into()); - Ok(true) - } -} - - +use pyo3::prelude::*; -/// DaskSQLContext is main interface used for interacting with Datafusion to +/// DaskSQLContext is main interface used for interacting with DataFusion to /// parse SQL queries, build logical plans, and optimize logical plans. /// /// The following example demonstrates how to generate an optimized LogicalPlan @@ -140,20 +40,18 @@ impl datafusion::logical_plan::plan::PlanVisitor for LogicalPlanGenerator { /// # } /// ``` #[pyclass(name = "DaskSQLContext", module = "dask_planner", subclass)] -#[derive(Clone)] +#[derive(Debug, Clone)] pub struct DaskSQLContext { default_schema_name: String, - pub schemas: HashMap, + schemas: HashMap, } -impl datafusion::sql::planner::ContextProvider for DaskSQLContext { - fn get_table_provider( - &self, - name: TableReference, - ) -> Option> { - match self.schemas.get(&String::from(&self.default_schema_name)) { +impl ContextProvider for DaskSQLContext { + fn get_table_provider(&self, name: TableReference) -> Option> { + match self.schemas.get(&self.default_schema_name) { Some(schema) => { let mut resp = None; + let mut table_name: String = "".to_string(); for (_table_name, table) in &schema.tables { if table.name.eq(&name.table()) { // Build the Schema here @@ -161,744 +59,125 @@ impl datafusion::sql::planner::ContextProvider for DaskSQLContext { // Iterate through the DaskTable instance and create a Schema instance for (column_name, column_type) in &table.columns { - fields.push(Field::new(column_name, column_type.sqlType.clone(), false)); + fields.push(Field::new( + column_name, + column_type.sql_type.clone(), + false, + )); } resp = Some(Schema::new(fields)); + table_name = _table_name.clone(); } } - Some(Arc::new(datafusion::datasource::empty::EmptyTable::new( - Arc::new( - resp.unwrap() - ) + Some(Arc::new(table::DaskTableProvider::new( + Arc::new(resp.unwrap()), + table_name, ))) - }, - None => panic!("Schema with name {} not found", "table_name"), + } + None => { + DataFusionError::Execution(format!( + "Schema with name {} not found", + &self.default_schema_name + )); + None + } } } - fn get_function_meta(&self, name: &str) -> Option> { - println!("RUST: get_function_meta"); - let _f: datafusion::physical_plan::functions::ScalarFunctionImplementation = - Arc::new(|_| Err(datafusion::error::DataFusionError::NotImplemented("".to_string()))); - match name { - _ => None, - } + fn get_function_meta(&self, _name: &str) -> Option> { + let _f: ScalarFunctionImplementation = + Arc::new(|_| Err(DataFusionError::NotImplemented("".to_string()))); + None } - fn get_aggregate_meta(&self, _name: &str) -> Option> { - println!("RUST: get_aggregate_meta"); - unimplemented!() + fn get_aggregate_meta(&self, _name: &str) -> Option> { + unimplemented!("RUST: get_aggregate_meta is not yet implemented for DaskSQLContext"); } -} + fn get_variable_type(&self, _: &[String]) -> Option { + unimplemented!("RUST: get_variable_type is not yet implemented for DaskSQLContext") + } +} #[pymethods] impl DaskSQLContext { #[new] pub fn new(default_schema_name: String) -> Self { Self { - default_schema_name: default_schema_name, + default_schema_name, schemas: HashMap::new(), } } - pub fn register_schema(&mut self, schema_name:String, schema: DaskSchema) { + /// Register a Schema with the current DaskSQLContext + pub fn register_schema( + &mut self, + schema_name: String, + schema: schema::DaskSchema, + ) -> PyResult { self.schemas.insert(schema_name, schema); + Ok(true) } - pub fn register_table(&mut self, schema_name:String, table: DaskTable) { + /// Register a DaskTable instance under the specified schema in the current DaskSQLContext + pub fn register_table( + &mut self, + schema_name: String, + table: table::DaskTable, + ) -> PyResult { match self.schemas.get_mut(&schema_name) { - Some(schema) => schema.add_table(table), - None => println!("Schema: {} not found in DaskSQLContext", schema_name), + Some(schema) => { + schema.add_table(table); + Ok(true) + } + None => Err(PyErr::new::(format!( + "Schema: {} not found in DaskSQLContext", + schema_name + ))), } } /// Parses a SQL string into an AST presented as a Vec of Statements - pub fn parse_sql(&self, sql: &str) -> Vec { + pub fn parse_sql(&self, sql: &str) -> PyResult> { match DFParser::parse_sql(sql) { Ok(k) => { - let mut statements = Vec::new(); + let mut statements: Vec = Vec::new(); for statement in k { statements.push(statement.into()); } - statements - }, - Err(e) => panic!("{}", e.to_string()), + assert!( + statements.len() == 1, + "More than 1 expected statement was encounterd!" + ); + Ok(statements) + } + Err(e) => Err(PyErr::new::(format!( + "{}", + e + ))), } } /// Creates a non-optimized Relational Algebra LogicalPlan from an AST Statement - pub fn logical_relational_algebra(&self, statement: PyStatement) -> PyLogicalPlan { + pub fn logical_relational_algebra( + &self, + statement: statement::PyStatement, + ) -> PyResult { let planner = SqlToRel::new(self); - match planner.statement_to_plan(&statement.statement) { + match planner.statement_to_plan(statement.statement) { Ok(k) => { - println!("Full Logical Plan: {:?}", k); - PyLogicalPlan { + println!("\nLogicalPlan: {:?}\n\n", k); + Ok(logical::PyLogicalPlan { original_plan: k, current_node: None, - } - }, - Err(e) => panic!("{}", e.to_string()), - } - } -} - - -#[pyclass(name = "LogicalPlan", module = "dask_planner", subclass)] -#[derive(Debug, Clone)] -pub struct PyLogicalPlan { - /// The orginal LogicalPlan that was parsed by DataFusion from the input SQL - original_plan: LogicalPlan, - /// The original_plan is traversed. current_node stores the current node of this traversal - current_node: Option, -} - -impl From for LogicalPlan { - fn from(logical_plan: PyLogicalPlan) -> LogicalPlan { - logical_plan.original_plan - } -} - -impl From for PyLogicalPlan { - fn from(logical_plan: LogicalPlan) -> PyLogicalPlan { - PyLogicalPlan { - original_plan: logical_plan, - current_node: None, - } - } -} - - -/// Traverses the logical plan to locate the Table associated with the query -fn table_from_logical_plan(plan: &LogicalPlan) -> Option { - match plan { - datafusion::logical_plan::plan::LogicalPlan::Projection(projection) => { - println!("Projection Input: {:?}", &projection.input); - table_from_logical_plan(&projection.input) - }, - datafusion::logical_plan::plan::LogicalPlan::Filter(filter) => { - println!("Filter Input: {:?}", &filter.input); - table_from_logical_plan(&filter.input) - }, - datafusion::logical_plan::plan::LogicalPlan::Window(_window) => { - println!("Window"); - None - }, - datafusion::logical_plan::plan::LogicalPlan::Aggregate(_aggregate) => { - println!("Aggregate"); - None - }, - datafusion::logical_plan::plan::LogicalPlan::Sort(_sort) => { - println!("Sort"); - None - }, - datafusion::logical_plan::plan::LogicalPlan::Join(_join) => { - println!("Join"); - None - }, - datafusion::logical_plan::plan::LogicalPlan::CrossJoin(_crossJoin) => { - println!("CrossJoin"); - None - }, - datafusion::logical_plan::plan::LogicalPlan::TableScan(tableScan) => { - - // Get the TableProvider for this Table instance - let tbl_provider: Arc = tableScan.source.clone(); - let tbl_schema: SchemaRef = tbl_provider.schema(); - let fields = tbl_schema.fields(); - - let mut cols: Vec<(String, DaskRelDataType)> = Vec::new(); - for field in fields { - cols.push( - ( - String::from(field.name()), - DaskRelDataType { - name: String::from(field.name()), - sqlType: field.data_type().clone(), - } - ) - ); + }) } - - Some(DaskTable { - name: String::from(&tableScan.table_name), - statistics: DaskStatistics { row_count: 0.0 }, - columns: cols, - }) - }, - _ => { - panic!("Ok something went wrong here!!!") - } - } -} - -/// Unfortunately PyO3 forces us to do this as placing these methods in the #[pymethods] version -/// of `impl PyLogicalPlan` causes issues with types not properly being mapped to Python from Rust -impl PyLogicalPlan { - /// Getter method for the LogicalPlan, if current_node is None return original_plan. - fn current_node(&mut self) -> LogicalPlan { - match &self.current_node { - Some(current) => current.clone(), - None => { - self.current_node = Some(self.original_plan.clone()); - self.current_node.clone().unwrap() - }, - } - } -} - - -#[pymethods] -impl PyLogicalPlan { - - /// Projection: Gets the names of the fields that should be projected - fn get_named_projects(&mut self) -> PyResult> { - match self.current_node() { - LogicalPlan::Projection(projection) => { - let mut projs: Vec = Vec::new(); - for expr in projection.expr { - projs.push(expr.clone().into()); - } - Ok(projs) - }, - _ => Err(PyErr::new::("current_node is not of type Projection")), - } - } - - /// Gets the "input" for the current LogicalPlan - pub fn get_inputs(&mut self) -> PyResult> { - let mut py_inputs: Vec = Vec::new(); - for input in self.current_node().inputs() { - py_inputs.push(input.clone().into()); - } - Ok(py_inputs) - } - - /// Examines the current_node and get the fields associated with it - pub fn get_field_names(&mut self) -> PyResult> { - let mut field_names: Vec = Vec::new(); - for field in self.current_node().schema().fields() { - field_names.push(String::from(field.name())); - } - Ok(field_names) - } - - - /// If the LogicalPlan represents access to a Table that instance is returned - /// otherwise None is returned - pub fn table(&mut self) -> PyResult { - match table_from_logical_plan(&self.current_node()) { - Some(table) => Ok(table), - None => Err(PyErr::new::("Unable to compute DaskTable from Datafusion LogicalPlan")), - } - } - - - /// Gets the Relation "type" of the current node. Ex: Projection, TableScan, etc - pub fn get_current_node_type(&mut self) -> PyResult { - match self.current_node() { - LogicalPlan::Projection(_projection) => Ok(String::from("Projection")), - LogicalPlan::Filter(_filter) => Ok(String::from("Filter")), - LogicalPlan::Window(_window) => Ok(String::from("Window")), - LogicalPlan::Aggregate(_aggregate) => Ok(String::from("Aggregate")), - LogicalPlan::Sort(_sort) => Ok(String::from("Sort")), - LogicalPlan::Join(_join) => Ok(String::from("Join")), - LogicalPlan::CrossJoin(_cross_join) => Ok(String::from("CrossJoin")), - LogicalPlan::Repartition(_repartition) => Ok(String::from("Repartition")), - LogicalPlan::Union(_union) => Ok(String::from("Union")), - LogicalPlan::TableScan(_table_scan) => Ok(String::from("TableScan")), - LogicalPlan::EmptyRelation(_empty_relation) => Ok(String::from("EmptyRelation")), - LogicalPlan::Limit(_limit) => Ok(String::from("Limit")), - LogicalPlan::CreateExternalTable(_create_external_table) => Ok(String::from("CreateExternalTable")), - LogicalPlan::CreateMemoryTable(_create_memory_table) => Ok(String::from("CreateMemoryTable")), - LogicalPlan::DropTable(_drop_table) => Ok(String::from("DropTable")), - LogicalPlan::Values(_values) => Ok(String::from("Values")), - LogicalPlan::Explain(_explain) => Ok(String::from("Explain")), - LogicalPlan::Analyze(_analyze) => Ok(String::from("Analyze")), - LogicalPlan::Extension(_extension) => Ok(String::from("Extension")), - } - } - - - /// Explain plan for the full and original LogicalPlan - pub fn explain_original(&self) -> PyResult { - Ok(format!("{}", self.original_plan.display_indent())) - } - - - /// Explain plan from the current node onward - pub fn explain_current(&mut self) -> PyResult { - Ok(format!("{}", self.current_node().display_indent())) - } - - - // pub fn plan_generator(&self) -> LogicalPlanGenerator { - // // Actually gonna test out walking the plan here .... - // let mut visitor = LogicalPlanGenerator::default(); - // self.logical_plan.accept(&mut visitor); - // // visitor.plan_steps.clone() - // visitor - // } - - /// LogicalPlan::Filter: The PyExpr, predicate, that represents the filtering condition - pub fn getCondition(&mut self) -> PyResult { - match self.current_node() { - LogicalPlan::Filter(filter) => { - Ok(filter.predicate.clone().into()) - }, - _ => panic!("Something wrong here") - } - } -} - - -#[pyclass(name = "Statement", module = "dask_planner", subclass)] -#[derive(Debug, Clone)] -pub struct PyStatement { - pub statement: Statement, -} - -impl From for Statement { - fn from(statement: PyStatement) -> Statement { - statement.statement - } -} - -impl From for PyStatement { - fn from(statement: Statement) -> PyStatement { - PyStatement { statement } - } -} - - -impl PyStatement { - pub fn new(statement: Statement) -> Self { - Self { statement } - } -} - - -#[pymethods] -impl PyStatement { - - #[staticmethod] - pub fn table_name() -> String { - String::from("Got here!!!") - } -} - -#[pyclass(name = "Query", module = "dask_planner", subclass)] -#[derive(Debug, Clone)] -pub struct PyQuery { - pub(crate) query: Query, -} - -impl From for Query { - fn from(query: PyQuery) -> Query { - query.query - } -} - -impl From for PyQuery { - fn from(query: Query) -> PyQuery { - PyQuery { query } - } -} - -#[pyclass(name = "DaskSQLNode", module = "dask_planner", subclass)] -#[derive(Debug, Clone)] -pub struct DaskSQLNode { - pub(crate) statements: Vec, -} - - -#[pyclass(name = "Select", module = "dask_planner", subclass)] -#[derive(Debug, Clone)] -pub struct PySelect { - pub(crate) select: Select, -} - -impl From for Select { - fn from(select: PySelect) -> Select { - select.select - } -} - -impl From