diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 000000000..40b343f1e
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,5 @@
+# global codeowners
+* @ayushdg @charlesbluca @galipremsagar
+
+# rust codeowners
+dask_planner/ @ayushdg @galipremsagar @jdye64
diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml
new file mode 100644
index 000000000..7f576cd69
--- /dev/null
+++ b/.github/actions/setup-builder/action.yaml
@@ -0,0 +1,17 @@
+name: Prepare Rust Builder
+description: 'Prepare Rust Build Environment'
+inputs:
+ rust-version:
+ description: 'version of rust to install (e.g. stable)'
+ required: true
+ default: 'stable'
+runs:
+ using: "composite"
+ steps:
+ - name: Setup Rust toolchain
+ shell: bash
+ run: |
+ echo "Installing ${{ inputs.rust-version }}"
+ rustup toolchain install ${{ inputs.rust-version }}
+ rustup default ${{ inputs.rust-version }}
+ rustup component add rustfmt
diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
index 54e69b5fc..78047147a 100644
--- a/.github/workflows/conda.yml
+++ b/.github/workflows/conda.yml
@@ -3,7 +3,18 @@ on:
push:
branches:
- main
+ - datafusion-sql-planner
pull_request:
+ paths:
+ - setup.py
+ - dask_planner/Cargo.toml
+ - dask_planner/Cargo.lock
+ - dask_planner/pyproject.toml
+ - dask_planner/rust-toolchain.toml
+ - continuous_integration/recipe/**
+ - .github/workflows/conda.yml
+ schedule:
+ - cron: '0 0 * * 0'
# When this workflow is queued, automatically cancel any previous running
# or pending jobs from the same branch
@@ -49,12 +60,12 @@ jobs:
- name: Upload conda package
if: |
github.event_name == 'push'
- && github.ref == 'refs/heads/main'
&& github.repository == 'dask-contrib/dask-sql'
env:
ANACONDA_API_TOKEN: ${{ secrets.DASK_CONDA_TOKEN }}
+ LABEL: ${{ github.ref == 'refs/heads/datafusion-sql-planner' && 'dev_datafusion' || 'dev' }}
run: |
# install anaconda for upload
mamba install anaconda-client
- anaconda upload --label dev noarch/*.tar.bz2
+ anaconda upload --label $LABEL linux-64/*.tar.bz2
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
new file mode 100644
index 000000000..0b8b8536e
--- /dev/null
+++ b/.github/workflows/rust.yml
@@ -0,0 +1,72 @@
+name: Rust
+
+on:
+ # always trigger on PR
+ push:
+ pull_request:
+ # manual trigger
+ # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
+ workflow_dispatch:
+
+jobs:
+ # Check crate compiles
+ linux-build-lib:
+ name: cargo check
+ runs-on: ubuntu-latest
+ container:
+ image: amd64/rust
+ env:
+ # Disable full debug symbol generation to speed up CI build and keep memory down
+ # "1" means line tables only, which is useful for panic tracebacks.
+ RUSTFLAGS: "-C debuginfo=1"
+ steps:
+ - uses: actions/checkout@v3
+ - name: Cache Cargo
+ uses: actions/cache@v3
+ with:
+ # these represent dependencies downloaded by cargo
+ # and thus do not depend on the OS, arch nor rust version.
+ path: /github/home/.cargo
+ key: cargo-cache-
+ - name: Setup Rust toolchain
+ uses: ./.github/actions/setup-builder
+ with:
+ rust-version: stable
+ - name: Check workspace in debug mode
+ run: |
+ cd dask_planner
+ cargo check
+ - name: Check workspace in release mode
+ run: |
+ cd dask_planner
+ cargo check --release
+
+ # test the crate
+ linux-test:
+ name: cargo test (amd64)
+ needs: [linux-build-lib]
+ runs-on: ubuntu-latest
+ container:
+ image: amd64/rust
+ env:
+ # Disable full debug symbol generation to speed up CI build and keep memory down
+ # "1" means line tables only, which is useful for panic tracebacks.
+ RUSTFLAGS: "-C debuginfo=1"
+ steps:
+ - uses: actions/checkout@v3
+ with:
+ submodules: true
+ - name: Cache Cargo
+ uses: actions/cache@v3
+ with:
+ path: /github/home/.cargo
+ # this key equals the ones on `linux-build-lib` for re-use
+ key: cargo-cache-
+ - name: Setup Rust toolchain
+ uses: ./.github/actions/setup-builder
+ with:
+ rust-version: stable
+ - name: Run tests
+ run: |
+ cd dask_planner
+ cargo test
diff --git a/.github/workflows/test-upstream.yml b/.github/workflows/test-upstream.yml
index 7f41a4d6c..fd0cae327 100644
--- a/.github/workflows/test-upstream.yml
+++ b/.github/workflows/test-upstream.yml
@@ -10,58 +10,24 @@ defaults:
shell: bash -l {0}
jobs:
- build:
- # This build step should be similar to the deploy build, to make sure we actually test
- # the future deployable
- name: Build the jar on ubuntu
- runs-on: ubuntu-latest
- if: github.repository == 'dask-contrib/dask-sql'
- steps:
- - uses: actions/checkout@v2
- - name: Cache local Maven repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }}
- - name: Set up Python
- uses: conda-incubator/setup-miniconda@v2
- with:
- miniforge-variant: Mambaforge
- use-mamba: true
- python-version: "3.8"
- channel-priority: strict
- activate-environment: dask-sql
- environment-file: continuous_integration/environment-3.8-jdk11-dev.yaml
- - name: Install dependencies and build the jar
- run: |
- python setup.py build_ext
- - name: Upload the jar
- uses: actions/upload-artifact@v1
- with:
- name: jar
- path: dask_sql/jar/DaskSQL.jar
-
test-dev:
- name: "Test upstream dev (${{ matrix.os }}, java: ${{ matrix.java }}, python: ${{ matrix.python }})"
- needs: build
+ name: "Test upstream dev (${{ matrix.os }}, python: ${{ matrix.python }})"
runs-on: ${{ matrix.os }}
+ if: github.repository == 'dask-contrib/dask-sql'
env:
- CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-jdk${{ matrix.java }}-dev.yaml
+ CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-dev.yaml
+ defaults:
+ run:
+ shell: bash -l {0}
strategy:
fail-fast: false
matrix:
- java: [8, 11]
os: [ubuntu-latest, windows-latest]
python: ["3.8", "3.9", "3.10"]
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0 # Fetch all history for all branches and tags.
- - name: Cache local Maven repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: ${{ runner.os }}-maven-v1-jdk${{ matrix.java }}-${{ hashFiles('**/pom.xml') }}
- name: Set up Python
uses: conda-incubator/setup-miniconda@v2
with:
@@ -72,21 +38,21 @@ jobs:
channels: dask/label/dev,conda-forge,nodefaults
activate-environment: dask-sql
environment-file: ${{ env.CONDA_FILE }}
- - name: Download the pre-build jar
- uses: actions/download-artifact@v1
+ - name: Setup Rust Toolchain
+ uses: actions-rs/toolchain@v1
+ id: rust-toolchain
with:
- name: jar
- path: dask_sql/jar/
+ toolchain: stable
+ override: true
+ - name: Build the Rust DataFusion bindings
+ run: |
+ python setup.py build install
- name: Install hive testing dependencies for Linux
if: matrix.os == 'ubuntu-latest'
run: |
mamba install -c conda-forge sasl>=0.3.1
docker pull bde2020/hive:2.3.2-postgresql-metastore
docker pull bde2020/hive-metastore-postgresql:2.3.0
- - name: Set proper JAVA_HOME for Windows
- if: matrix.os == 'windows-latest'
- run: |
- echo "JAVA_HOME=${{ env.CONDA }}\envs\dask-sql\Library" >> $GITHUB_ENV
- name: Install upstream dev Dask / dask-ml
run: |
mamba update dask
@@ -101,11 +67,6 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- - name: Cache local Maven repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }}
- name: Set up Python
uses: conda-incubator/setup-miniconda@v2
with:
@@ -115,12 +76,16 @@ jobs:
channel-priority: strict
channels: dask/label/dev,conda-forge,nodefaults
activate-environment: dask-sql
- environment-file: continuous_integration/environment-3.9-jdk11-dev.yaml
- - name: Download the pre-build jar
- uses: actions/download-artifact@v1
+ environment-file: continuous_integration/environment-3.9-dev.yaml
+ - name: Setup Rust Toolchain
+ uses: actions-rs/toolchain@v1
+ id: rust-toolchain
with:
- name: jar
- path: dask_sql/jar/
+ toolchain: stable
+ override: true
+ - name: Build the Rust DataFusion bindings
+ run: |
+ python setup.py build install
- name: Install cluster dependencies
run: |
mamba install python-blosc lz4 -c conda-forge
@@ -151,11 +116,6 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- - name: Cache local Maven repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }}
- name: Set up Python
uses: conda-incubator/setup-miniconda@v2
with:
@@ -163,11 +123,15 @@ jobs:
mamba-version: "*"
channels: dask/label/dev,conda-forge,nodefaults
channel-priority: strict
- - name: Download the pre-build jar
- uses: actions/download-artifact@v1
+ - name: Setup Rust Toolchain
+ uses: actions-rs/toolchain@v1
+ id: rust-toolchain
with:
- name: jar
- path: dask_sql/jar/
+ toolchain: stable
+ override: true
+ - name: Build the Rust DataFusion bindings
+ run: |
+ python setup.py build install
- name: Install upstream dev Dask / dask-ml
if: needs.detect-ci-trigger.outputs.triggered == 'true'
run: |
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c0319afd1..ed9c25a74 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,7 +1,7 @@
---
# Test the main branch and every pull request by
-# 1. building the jar on ubuntu
-# 2. testing code (using the build jar) on ubuntu and windows, with different java versions
+# 1. build dask_planner (Arrow DataFusion Rust bindings) on ubuntu
+# 2. testing code (using the build DataFusion bindings) on ubuntu and windows
name: Test Python package
on:
push:
@@ -36,55 +36,20 @@ jobs:
with:
keyword: "[test-upstream]"
- build:
- # This build step should be similar to the deploy build, to make sure we actually test
- # the future deployable
- name: Build the jar on ubuntu
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v2
- - name: Cache local Maven repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }}
- - name: Set up Python
- uses: conda-incubator/setup-miniconda@v2
- with:
- miniforge-variant: Mambaforge
- use-mamba: true
- python-version: "3.8"
- channel-priority: strict
- activate-environment: dask-sql
- environment-file: continuous_integration/environment-3.8-jdk11-dev.yaml
- - name: Build the jar
- run: |
- python setup.py build_ext
- - name: Upload the jar
- uses: actions/upload-artifact@v1
- with:
- name: jar
- path: dask_sql/jar/DaskSQL.jar
-
test:
- name: "Test (${{ matrix.os }}, java: ${{ matrix.java }}, python: ${{ matrix.python }})"
- needs: [detect-ci-trigger, build]
+ name: "Build & Test (${{ matrix.os }}, python: ${{ matrix.python }}, Rust: ${{ matrix.toolchain }})"
+ needs: [detect-ci-trigger]
runs-on: ${{ matrix.os }}
env:
- CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-jdk${{ matrix.java }}-dev.yaml
+ CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-dev.yaml
strategy:
fail-fast: false
matrix:
- java: [8, 11]
os: [ubuntu-latest, windows-latest]
python: ["3.8", "3.9", "3.10"]
+ toolchain: [stable]
steps:
- uses: actions/checkout@v2
- - name: Cache local Maven repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: ${{ runner.os }}-maven-v1-jdk${{ matrix.java }}-${{ hashFiles('**/pom.xml') }}
- name: Set up Python
uses: conda-incubator/setup-miniconda@v2
with:
@@ -95,21 +60,21 @@ jobs:
channels: ${{ needs.detect-ci-trigger.outputs.triggered == 'true' && 'dask/label/dev,conda-forge,nodefaults' || 'conda-forge,nodefaults' }}
activate-environment: dask-sql
environment-file: ${{ env.CONDA_FILE }}
- - name: Download the pre-build jar
- uses: actions/download-artifact@v1
+ - name: Setup Rust Toolchain
+ uses: actions-rs/toolchain@v1
+ id: rust-toolchain
with:
- name: jar
- path: dask_sql/jar/
+ toolchain: stable
+ override: true
+ - name: Build the Rust DataFusion bindings
+ run: |
+ python setup.py build install
- name: Install hive testing dependencies for Linux
if: matrix.os == 'ubuntu-latest'
run: |
mamba install -c conda-forge sasl>=0.3.1
docker pull bde2020/hive:2.3.2-postgresql-metastore
docker pull bde2020/hive-metastore-postgresql:2.3.0
- - name: Set proper JAVA_HOME for Windows
- if: matrix.os == 'windows-latest'
- run: |
- echo "JAVA_HOME=${{ env.CONDA }}\envs\dask-sql\Library" >> $GITHUB_ENV
- name: Optionally install upstream dev Dask / dask-ml
if: needs.detect-ci-trigger.outputs.triggered == 'true'
run: |
@@ -130,15 +95,10 @@ jobs:
cluster:
name: "Test in a dask cluster"
- needs: [detect-ci-trigger, build]
+ needs: [detect-ci-trigger]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- - name: Cache local Maven repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }}
- name: Set up Python
uses: conda-incubator/setup-miniconda@v2
with:
@@ -148,13 +108,17 @@ jobs:
channel-priority: strict
channels: ${{ needs.detect-ci-trigger.outputs.triggered == 'true' && 'dask/label/dev,conda-forge,nodefaults' || 'conda-forge,nodefaults' }}
activate-environment: dask-sql
- environment-file: continuous_integration/environment-3.9-jdk11-dev.yaml
- - name: Download the pre-build jar
- uses: actions/download-artifact@v1
+ environment-file: continuous_integration/environment-3.9-dev.yaml
+ - name: Setup Rust Toolchain
+ uses: actions-rs/toolchain@v1
+ id: rust-toolchain
with:
- name: jar
- path: dask_sql/jar/
- - name: Install cluster dependencies
+ toolchain: stable
+ override: true
+ - name: Build the Rust DataFusion bindings
+ run: |
+ python setup.py build install
+ - name: Install dependencies
run: |
mamba install python-blosc lz4 -c conda-forge
@@ -187,15 +151,10 @@ jobs:
import:
name: "Test importing with bare requirements"
- needs: [detect-ci-trigger, build]
+ needs: [detect-ci-trigger]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- - name: Cache local Maven repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }}
- name: Set up Python
uses: conda-incubator/setup-miniconda@v2
with:
@@ -203,18 +162,9 @@ jobs:
mamba-version: "*"
channels: ${{ needs.detect-ci-trigger.outputs.triggered == 'true' && 'dask/label/dev,conda-forge,nodefaults' || 'conda-forge,nodefaults' }}
channel-priority: strict
- - name: Download the pre-build jar
- uses: actions/download-artifact@v1
- with:
- name: jar
- path: dask_sql/jar/
- - name: Optionally install upstream dev Dask / dask-ml
- if: needs.detect-ci-trigger.outputs.triggered == 'true'
- run: |
- mamba update dask
- python -m pip install --no-deps git+https://github.com/dask/dask-ml
- name: Install dependencies and nothing else
run: |
+ conda install setuptools-rust
pip install -e .
which python
diff --git a/.gitignore b/.gitignore
index 947f81393..245817fc1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,7 @@ share/python-wheels/
.installed.cfg
*.egg
MANIFEST
+*.so
# Unit test / coverage reports
htmlcov/
@@ -32,6 +33,7 @@ coverage.xml
*.cover
.pytest_cache/
.hypothesis/
+.pytest-html
# Jupyter Notebook
.ipynb_checkpoints
@@ -59,3 +61,9 @@ dask_sql/jar
dask-worker-space/
node_modules/
docs/source/_build/
+tests/unit/queries
+tests/unit/data
+
+# Ignore development specific local testing files
+dev_tests
+dev-tests
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 50af0bbb4..f1feccf47 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -16,8 +16,17 @@ repos:
args:
- "--profile"
- "black"
+ - repo: https://github.com/doublify/pre-commit-rust
+ rev: v1.0
+ hooks:
+ - id: fmt
+ args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--']
+ - id: cargo-check
+ args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--']
+ - id: clippy
+ args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--', '-D', 'warnings']
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v3.2.0
+ rev: v4.2.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 3b3682543..831ec6d50 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -3,15 +3,15 @@ version: 2
build:
os: ubuntu-20.04
tools:
- python: "3.8"
- apt_packages:
- - maven
+ python: "mambaforge-4.10"
sphinx:
configuration: docs/source/conf.py
+conda:
+ environment: docs/environment.yml
+
python:
install:
- - requirements: docs/requirements-docs.txt
- method: pip
path: .
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 000000000..b7fdf1de7
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,127 @@
+# Contributing to Dask-SQL
+
+## Environment Setup
+
+Conda is used both by CI and the development team. Therefore Conda is the fully supported and preferred method for using and developing Dask-SQL.
+
+Installing Conda is outside the scope of this document. However a nice guide for installing on Linux can be found [here](https://docs.conda.io/projects/conda/en/latest/user-guide/install/linux.html)
+
+Setting up your Conda environment for development is straightforward. First, clone the repository locally
+```
+DASK_SQL_HOME=${pwd}/dask-sql
+git clone https://github.com/dask-contrib/dask-sql.git
+cd $DASK_SQL_HOME
+```
+Then, run:
+```
+conda env create -f {DASK_SQL_HOME}/continuous_integration/environment-3.10-dev.yaml
+```
+
+The Conda process will take awhile to complete, once finished you will have a resulting environment named `dask-sql` which can be activated and used by running `conda activate dask-sql`
+
+## Rust Developers Guide
+
+Dask-SQL utilizes [Apache Arrow Datafusion](https://github.com/apache/arrow-datafusion) for parsing, planning, and optimizing SQL queries. DataFusion is written in Rust and therefore requires some Rust experience to be productive. Luckily, there are tons of great Rust learning resources on the internet. We have listed some of our favorite ones [here](#rust-learning-resources)
+
+### Apache Arrow DataFusion
+The Dask-SQL Rust codebase makes heavy use [Apache Arrow DataFusion](https://github.com/apache/arrow-datafusion). Contributors should familiarize themselves with the [codebase](https://github.com/apache/arrow-datafusion) and [documentation](https://docs.rs/datafusion/latest/datafusion/).
+
+#### Purpose
+DataFusion provides Dask-SQL with key functionality.
+- Parsing SQL query strings into a `LogicalPlan` datastructure
+- Future integration points with [substrait.io](https://substrait.io/)
+- An optimization framework used as the baseline for creating custom highly efficient `LogicalPlan`s specific to Dask.
+
+### Building
+Building the Dask-SQL Rust codebase is a straightforward process. If you create and activate the Dask-SQL Conda environment the Rust compiler and all necessary components will be installed for you during that process and therefore requires no further manual setup.
+
+`setuptools-rust` is used by Dask-SQL for building and bundling the resulting Rust binaries. This helps make building and installing the Rust binaries feel much more like a native Python workflow.
+
+More details about the building setup can be found at [setup.py](setup.py) and searching for `rust_extensions` which is the hook for the Rust code build and inclusion.
+
+Note that while `setuptools-rust` is used by CI and should be used during your development cycle, if the need arises to do something more specific that is not yet supported by `setuptools-rust` you can opt to use `cargo` directly from the command line.
+
+#### Building with Python
+Building Dask-SQL is straightforward with Python. To build run ```python setup.py install```. This will build both the Rust and Python codebase and install it into your locally activated conda environment. While not required, if you have updated dependencies for Rust you might prefer a clean build. To clean your setup run ```python setup.py clean``` and then run ```python setup.py install```
+
+#### DataFusion Modules
+DataFusion is broken down into a few modules. We consume those modules in our [Cargo.toml](dask_planner/Cargo.toml). The modules that we use currently are
+
+- `datafusion-common` - Datastructures and core logic
+- `datafusion-expr` - Expression based logic and operators
+- `datafusion-sql` - SQL components such as parsing and planning
+- `datafusion-optimizer` - Optimization logic and datastructures for modifying current plans into more efficient ones.
+
+#### Retrieving Upstream Dependencies
+During development you might find yourself needing some upstream DataFusion changes not present in the projects current version. Luckily this can easily be achieved by updating [Cargo.toml](dask_planner/Cargo.toml) and changing the `rev` to the SHA of the version you need. Note that the same SHA should be used for all DataFusion modules.
+
+After updating the `Cargo.toml` file the codebase can be re-built to reflect those changes by running `python setup.py install`
+
+#### Local Documentation
+Sometimes when building against the latest Github commits for DataFusion you may find that the features you are consuming do not have their documentation public yet. In this case it can be helpful to build the DataFusion documentation locally so that it can be referenced to assist with development. Here is a rough outline for building that documentation locally.
+
+- clone https://github.com/apache/arrow-datafusion
+- change into the `arrow-datafusion` directory
+- run `cargo doc`
+- navigate to `target/doc/datafusion/all.html` and open in your desired browser
+
+### Datastructures
+While working in the Rust codebase there are a few datastructures that you should make yourself familiar with. This section does not aim to verbosely list out all of the datastructure with in the project but rather just the key datastructures that you are likely to encounter while working on almost any feature/issue. The aim is to give you a better overview of the codebase without having to manually dig through the all the source code.
+
+- [`PyLogicalPlan`](dask_planner/src/sql/logical.rs) -> [DataFusion LogicalPlan](https://docs.rs/datafusion/latest/datafusion/logical_plan/enum.LogicalPlan.html)
+ - Often encountered in Python code with variable name `rel`
+ - Python serializable umbrella representation of the entire LogicalPlan that was generated by DataFusion
+ - Provides access to `DaskTable` instances and type information for each table
+ - Access to individual nodes in the logical plan tree. Ex: `TableScan`
+- [`DaskSQLContext`](dask_planner/src/sql.rs)
+ - Analogous to Python `Context`
+ - Contains metadata about the tables, schemas, functions, operators, and configurations that are persent within the current execution context
+ - When adding custom functions/UDFs this is the location that you would register them
+ - Entry point for parsing SQL strings to sql node trees. This is the location Python will begin its interactions with Rust
+- [`PyExpr`](dask_planner/src/expression.rs) -> [DataFusion Expr](https://docs.rs/datafusion/latest/datafusion/prelude/enum.Expr.html)
+ - Arguably where most of your time will be spent
+ - Represents a single node in sql tree. Ex: `avg(age)` from `SELECT avg(age) FROM people`
+ - Is associate with a single `RexType`
+ - Can contain literal values or represent function calls, `avg()` for example
+ - The expressions "index" in the tree can be retrieved by calling `PyExpr.index()` on an instance. This is useful when mapping frontend column names in Dask code to backend Dataframe columns
+ - Certain `PyExpr`s contain operands. Ex: `2 + 2` would contain 3 operands. 1) A literal `PyExpr` instance with value 2 2) Another literal `PyExpr` instance with a value of 2. 3) A `+` `PyExpr` representing the addition of the 2 literals.
+- [`DaskSqlOptimizer`](dask_planner/src/sql/optimizer.rs)
+ - Registering location for all Dask-SQL specific logical plan optimizations
+ - Optimizations that are written either custom or use from another source, DataFusion, are registered here in the order they are wished to be executed
+ - Represents functions that modify/convert an original `PyLogicalPlan` into another `PyLogicalPlan` that would be more efficient when running in the underlying Dask framework
+- [`RelDataType`](dask_planner/src/sql/types/rel_data_type.rs)
+ - Not a fan of this name, was chosen to match existing Calcite logic
+ - Represents a "row" in a table
+ - Contains a list of "columns" that are present in that row
+ - [RelDataTypeField](dask_planner/src/sql/types/rel_data_type_field.rs)
+- [RelDataTypeField](dask_planner/src/sql/types/rel_data_type_field.rs)
+ - Represents an individual column in a table
+ - Contains:
+ - `qualifier` - schema the field belongs to
+ - `name` - name of the column/field
+ - `data_type` - `DaskTypeMap` instance containing information about the SQL type and underlying Arrow DataType
+ - `index` - location of the field in the LogicalPlan
+- [DaskTypeMap](dask_planner/src/sql/types.rs)
+ - Maps a conventional SQL type to an underlying Arrow DataType
+
+
+### Rust Learning Resources
+- ["The Book"](https://doc.rust-lang.org/book/)
+- [Lets Get Rusty "LGR" YouTube series](https://www.youtube.com/c/LetsGetRusty)
+
+## Documentation TODO
+- [ ] SQL Parsing overview diagram
+- [ ] Architecture diagram
+- [x] Setup dev environment
+- [x] Version of Rust and specs
+- [x] Updating version of datafusion
+- [x] Building
+- [x] Rust learning resources
+- [x] Rust Datastructures local to Dask-SQL
+- [x] Build DataFusion documentation locally
+- [ ] Python & Rust with PyO3
+- [ ] Types mapping, Arrow datatypes
+- [ ] RexTypes explaination, show simple query and show it broken down into its parts in a diagram
+- [ ] Registering tables with DaskSqlContext, also functions
+- [ ] Creating your own optimizer
+- [ ] Simple diagram of PyExpr, showing something like 2+2 but broken down into a tree looking diagram
diff --git a/MANIFEST.in b/MANIFEST.in
index 4b4310eee..d0108fedd 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,5 @@
recursive-include dask_sql *.yaml
-recursive-include planner *
+recursive-include dask_planner *
include versioneer.py
include dask_sql/_version.py
diff --git a/README.md b/README.md
index 7651c8196..b97640bf7 100644
--- a/README.md
+++ b/README.md
@@ -89,17 +89,7 @@ Install the package from the `conda-forge` channel:
### With `pip`
-`dask-sql` needs Java for the parsing of the SQL queries.
-Make sure you have a running java installation with version >= 8.
-
-To test if you have Java properly installed and set up, run
-
- $ java -version
- openjdk version "1.8.0_152-release"
- OpenJDK Runtime Environment (build 1.8.0_152-release-1056-b12)
- OpenJDK 64-Bit Server VM (build 25.152-b12, mixed mode)
-
-After installing Java, you can install the package with
+You can install the package with
pip install dask-sql
@@ -111,19 +101,18 @@ If you want to have the newest (unreleased) `dask-sql` version or if you plan to
Create a new conda environment and install the development environment:
- conda env create -f continuous_integration/environment-3.9-jdk11-dev.yaml
+ conda env create -f continuous_integration/environment-3.9-dev.yaml
It is not recommended to use `pip` instead of `conda` for the environment setup.
-If you however need to, make sure to have Java (jdk >= 8) and maven installed and correctly setup before continuing.
-Have a look into `environment-3.9-jdk11-dev.yaml` for the rest of the development environment.
After that, you can install the package in development mode
pip install -e ".[dev]"
-To recompile the Java classes after changes have been made to the source contained in `planner/`, run
+The Rust DataFusion bindings are built as part of the `pip install`.
+If changes are made to the Rust source in `dask_planner/`, another build/install must be run to recompile the bindings:
- python setup.py build_ext
+ python setup.py build install
This repository uses [pre-commit](https://pre-commit.com/) hooks. To install them, call
@@ -195,5 +184,5 @@ At the core, `dask-sql` does two things:
- translate the SQL query using [Apache Calcite](https://calcite.apache.org/) into a relational algebra, which is specified as a tree of java objects - similar to many other SQL engines (Hive, Flink, ...)
- convert this description of the query from java objects into dask API calls (and execute them) - returning a dask dataframe.
-For the first step, Apache Calcite needs to know about the columns and types of the dask dataframes, therefore some java classes to store this information for dask dataframes are defined in `planner`.
-After the translation to a relational algebra is done (using `RelationalAlgebraGenerator.getRelationalAlgebra`), the python methods defined in `dask_sql.physical` turn this into a physical dask execution plan by converting each piece of the relational algebra one-by-one.
+For the first step, Arrow DataFusion needs to know about the columns and types of the dask dataframes, therefore some Rust code to store this information for dask dataframes are defined in `dask_planner`.
+After the translation to a relational algebra is done (using `DaskSQLContext.logical_relational_algebra`), the python methods defined in `dask_sql.physical` turn this into a physical dask execution plan by converting each piece of the relational algebra one-by-one.
diff --git a/conftest.py b/conftest.py
index 6f38951e1..594208456 100644
--- a/conftest.py
+++ b/conftest.py
@@ -5,8 +5,11 @@
def pytest_addoption(parser):
parser.addoption("--rungpu", action="store_true", help="run tests meant for GPU")
+ parser.addoption("--runqueries", action="store_true", help="run test queries")
def pytest_runtest_setup(item):
if "gpu" in item.keywords and not item.config.getoption("--rungpu"):
pytest.skip("need --rungpu option to run")
+ if "queries" in item.keywords and not item.config.getoption("--runqueries"):
+ pytest.skip("need --runqueries option to run")
diff --git a/continuous_integration/environment-3.10-jdk11-dev.yaml b/continuous_integration/environment-3.10-dev.yaml
similarity index 84%
rename from continuous_integration/environment-3.10-jdk11-dev.yaml
rename to continuous_integration/environment-3.10-dev.yaml
index 5dd8b19d6..47e7a7d2d 100644
--- a/continuous_integration/environment-3.10-jdk11-dev.yaml
+++ b/continuous_integration/environment-3.10-dev.yaml
@@ -8,15 +8,13 @@ dependencies:
- fastapi>=0.69.0
- fugue>=0.7.0
- intake>=0.6.0
-- jpype1>=1.0.2
- jsonschema
- lightgbm
-- maven
+- maturin>=0.12.8
- mlflow
- mock
- nest-asyncio
-- openjdk=11
-- pandas>=1.1.2
+- pandas>=1.4.0
- pre-commit
- prompt_toolkit
- psycopg2
@@ -27,7 +25,9 @@ dependencies:
- pytest-xdist
- pytest
- python=3.10
+- rust=1.62.1
- scikit-learn>=1.0.0
+- setuptools-rust>=1.4.1
- sphinx
- tpot
- tzlocal>=2.1
diff --git a/continuous_integration/environment-3.10-jdk8-dev.yaml b/continuous_integration/environment-3.10-jdk8-dev.yaml
deleted file mode 100644
index f93609bd6..000000000
--- a/continuous_integration/environment-3.10-jdk8-dev.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-name: dask-sql
-channels:
-- conda-forge
-- nodefaults
-dependencies:
-- dask-ml>=2022.1.22
-- dask>=2022.3.0
-- fastapi>=0.69.0
-- fugue>=0.7.0
-- intake>=0.6.0
-- jpype1>=1.0.2
-- jsonschema
-- lightgbm
-- maven
-- mlflow
-- mock
-- nest-asyncio
-- openjdk=8
-- pandas>=1.1.2
-- pre-commit
-- prompt_toolkit
-- psycopg2
-- pyarrow>=6.0.1
-- pygments
-- pyhive
-- pytest-cov
-- pytest-xdist
-- pytest
-- python=3.10
-- scikit-learn>=1.0.0
-- sphinx
-- tpot
-- tzlocal>=2.1
-- uvicorn>=0.11.3
diff --git a/continuous_integration/environment-3.8-jdk11-dev.yaml b/continuous_integration/environment-3.8-dev.yaml
similarity index 85%
rename from continuous_integration/environment-3.8-jdk11-dev.yaml
rename to continuous_integration/environment-3.8-dev.yaml
index 638bf743f..1c3185f11 100644
--- a/continuous_integration/environment-3.8-jdk11-dev.yaml
+++ b/continuous_integration/environment-3.8-dev.yaml
@@ -8,15 +8,13 @@ dependencies:
- fastapi=0.69.0
- fugue=0.7.0
- intake=0.6.0
-- jpype1=1.0.2
- jsonschema
- lightgbm
-- maven
+- maturin=0.12.8
- mlflow
- mock
- nest-asyncio
-- openjdk=11
-- pandas=1.1.2
+- pandas=1.4.0
- pre-commit
- prompt_toolkit
- psycopg2
@@ -27,7 +25,9 @@ dependencies:
- pytest-xdist
- pytest
- python=3.8
+- rust=1.62.1
- scikit-learn=1.0.0
+- setuptools-rust=1.4.1
- sphinx
- tpot
- tzlocal=2.1
diff --git a/continuous_integration/environment-3.8-jdk8-dev.yaml b/continuous_integration/environment-3.8-jdk8-dev.yaml
deleted file mode 100644
index 1b6fc69a4..000000000
--- a/continuous_integration/environment-3.8-jdk8-dev.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-name: dask-sql
-channels:
-- conda-forge
-- nodefaults
-dependencies:
-- dask-ml=2022.1.22
-- dask=2022.3.0
-- fastapi=0.69.0
-- fugue=0.7.0
-- intake=0.6.0
-- jpype1=1.0.2
-- jsonschema
-- lightgbm
-- maven
-- mlflow
-- mock
-- nest-asyncio
-- openjdk=8
-- pandas=1.1.2
-- pre-commit
-- prompt_toolkit
-- psycopg2
-- pyarrow=6.0.1
-- pygments
-- pyhive
-- pytest-cov
-- pytest-xdist
-- pytest
-- python=3.8
-- scikit-learn=1.0.0
-- sphinx
-- tpot
-- tzlocal=2.1
-- uvicorn=0.11.3
diff --git a/continuous_integration/environment-3.9-jdk11-dev.yaml b/continuous_integration/environment-3.9-dev.yaml
similarity index 84%
rename from continuous_integration/environment-3.9-jdk11-dev.yaml
rename to continuous_integration/environment-3.9-dev.yaml
index 1751f685b..60335b7e2 100644
--- a/continuous_integration/environment-3.9-jdk11-dev.yaml
+++ b/continuous_integration/environment-3.9-dev.yaml
@@ -8,15 +8,13 @@ dependencies:
- fastapi>=0.69.0
- fugue>=0.7.0
- intake>=0.6.0
-- jpype1>=1.0.2
- jsonschema
- lightgbm
-- maven
+- maturin>=0.12.8
- mlflow
- mock
- nest-asyncio
-- openjdk=11
-- pandas>=1.1.2
+- pandas>=1.4.0
- pre-commit
- prompt_toolkit
- psycopg2
@@ -27,7 +25,9 @@ dependencies:
- pytest-xdist
- pytest
- python=3.9
+- rust=1.62.1
- scikit-learn>=1.0.0
+- setuptools-rust>=1.4.1
- sphinx
- tpot
- tzlocal>=2.1
diff --git a/continuous_integration/environment-3.9-jdk8-dev.yaml b/continuous_integration/environment-3.9-jdk8-dev.yaml
deleted file mode 100644
index 5d42c22a7..000000000
--- a/continuous_integration/environment-3.9-jdk8-dev.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-name: dask-sql
-channels:
-- conda-forge
-- nodefaults
-dependencies:
-- dask-ml>=2022.1.22
-- dask>=2022.3.0
-- fastapi>=0.69.0
-- fugue>=0.7.0
-- intake>=0.6.0
-- jpype1>=1.0.2
-- jsonschema
-- lightgbm
-- maven
-- mlflow
-- mock
-- nest-asyncio
-- openjdk=8
-- pandas>=1.1.2
-- pre-commit
-- prompt_toolkit
-- psycopg2
-- pyarrow>=6.0.1
-- pygments
-- pyhive
-- pytest-cov
-- pytest-xdist
-- pytest
-- python=3.9
-- scikit-learn>=1.0.0
-- sphinx
-- tpot
-- tzlocal>=2.1
-- uvicorn>=0.11.3
diff --git a/continuous_integration/gpuci/build.sh b/continuous_integration/gpuci/build.sh
index 38cb8c00a..cdcb4a6e9 100644
--- a/continuous_integration/gpuci/build.sh
+++ b/continuous_integration/gpuci/build.sh
@@ -17,9 +17,6 @@ export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
# Set home to the job's workspace
export HOME="$WORKSPACE"
-# specify maven options
-export MAVEN_OPTS="-Dmaven.repo.local=${WORKSPACE}/.m2/repository"
-
# Switch to project root; also root of repo checkout
cd "$WORKSPACE"
@@ -40,6 +37,15 @@ gpuci_logger "Activate conda env"
. /opt/conda/etc/profile.d/conda.sh
conda activate dask_sql
+gpuci_logger "Install awscli"
+gpuci_mamba_retry install -y -c conda-forge awscli
+
+gpuci_logger "Download parquet dataset"
+gpuci_retry aws s3 cp --only-show-errors "${DASK_SQL_BUCKET_NAME}parquet_2gb/" tests/unit/data/ --recursive
+
+gpuci_logger "Download query files"
+gpuci_retry aws s3 cp --only-show-errors "${DASK_SQL_BUCKET_NAME}queries/" tests/unit/queries/ --recursive
+
gpuci_logger "Install dask"
python -m pip install git+https://github.com/dask/dask
@@ -58,4 +64,4 @@ conda config --show-sources
conda list --show-channel-urls
gpuci_logger "Python py.test for dask-sql"
-py.test $WORKSPACE -n 4 -v -m gpu --rungpu --junitxml="$WORKSPACE/junit-dask-sql.xml" --cov-config="$WORKSPACE/.coveragerc" --cov=dask_sql --cov-report=xml:"$WORKSPACE/dask-sql-coverage.xml" --cov-report term
+py.test $WORKSPACE -n 4 -v -m gpu --runqueries --rungpu --junitxml="$WORKSPACE/junit-dask-sql.xml" --cov-config="$WORKSPACE/.coveragerc" --cov=dask_sql --cov-report=xml:"$WORKSPACE/dask-sql-coverage.xml" --cov-report term
diff --git a/continuous_integration/gpuci/environment.yaml b/continuous_integration/gpuci/environment.yaml
index 2e4eff82d..efbc41122 100644
--- a/continuous_integration/gpuci/environment.yaml
+++ b/continuous_integration/gpuci/environment.yaml
@@ -11,15 +11,13 @@ dependencies:
- fastapi>=0.69.0
- fugue>=0.7.0
- intake>=0.6.0
-- jpype1>=1.0.2
- jsonschema
- lightgbm
-- maven
+- maturin>=0.12.8
- mlflow
- mock
- nest-asyncio
-- openjdk=11
-- pandas>=1.1.2
+- pandas>=1.4.0
- pre-commit
- prompt_toolkit
- psycopg2
@@ -30,7 +28,9 @@ dependencies:
- pytest-xdist
- pytest
- python=3.9
+- rust=1.62.1
- scikit-learn>=1.0.0
+- setuptools-rust>=1.4.1
- sphinx
- tpot
- tzlocal>=2.1
diff --git a/continuous_integration/recipe/conda_build_config.yaml b/continuous_integration/recipe/conda_build_config.yaml
new file mode 100644
index 000000000..dcffe42d2
--- /dev/null
+++ b/continuous_integration/recipe/conda_build_config.yaml
@@ -0,0 +1,6 @@
+rust_compiler_version:
+ - 1.62.1
+python:
+ - 3.8
+ - 3.9
+ - 3.10
diff --git a/continuous_integration/recipe/meta.yaml b/continuous_integration/recipe/meta.yaml
index cd5abd580..51d4c5ac0 100644
--- a/continuous_integration/recipe/meta.yaml
+++ b/continuous_integration/recipe/meta.yaml
@@ -13,25 +13,25 @@ source:
build:
number: {{ GIT_DESCRIBE_NUMBER }}
- noarch: python
+ skip: true # [py2k]
entry_points:
- dask-sql-server = dask_sql.server.app:main
- dask-sql = dask_sql.cmd:main
- string: py_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+ string: py{{ python | replace(".", "") }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
script: {{ PYTHON }} -m pip install . --no-deps -vv
requirements:
build:
- - maven >=3.6.0
+ - {{ compiler('rust') }}
+ - setuptools-rust >=1.4.1
host:
- pip
- - python >=3.8
+ - python
+ - setuptools-rust >=1.4.1
run:
- python
- dask >=2022.3.0
- - pandas >=1.1.2
- - jpype1 >=1.0.2
- - openjdk >=8
+ - pandas >=1.4.0
- fastapi >=0.69.0
- uvicorn >=0.11.3
- tzlocal >=2.1
diff --git a/continuous_integration/recipe/run_test.py b/continuous_integration/recipe/run_test.py
index 0ca97261b..01616d1db 100644
--- a/continuous_integration/recipe/run_test.py
+++ b/continuous_integration/recipe/run_test.py
@@ -13,19 +13,21 @@
df = pd.DataFrame({"name": ["Alice", "Bob", "Chris"] * 100, "x": list(range(300))})
ddf = dd.from_pandas(df, npartitions=10)
-c.create_table("my_data", ddf)
-got = c.sql(
- """
- SELECT
- my_data.name,
- SUM(my_data.x) AS "S"
- FROM
- my_data
- GROUP BY
- my_data.name
-"""
-)
-expect = pd.DataFrame({"name": ["Alice", "Bob", "Chris"], "S": [14850, 14950, 15050]})
+# This needs to be temprarily disabled since this query requires features that are not yet implemented
+# c.create_table("my_data", ddf)
+
+# got = c.sql(
+# """
+# SELECT
+# my_data.name,
+# SUM(my_data.x) AS "S"
+# FROM
+# my_data
+# GROUP BY
+# my_data.name
+# """
+# )
+# expect = pd.DataFrame({"name": ["Alice", "Bob", "Chris"], "S": [14850, 14950, 15050]})
-dd.assert_eq(got, expect)
+# dd.assert_eq(got, expect)
diff --git a/dask_planner/.classpath b/dask_planner/.classpath
new file mode 100644
index 000000000..b14b13a76
--- /dev/null
+++ b/dask_planner/.classpath
@@ -0,0 +1,55 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dask_planner/.gitignore b/dask_planner/.gitignore
new file mode 100644
index 000000000..c8f044299
--- /dev/null
+++ b/dask_planner/.gitignore
@@ -0,0 +1,72 @@
+/target
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+.pytest_cache/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+.venv/
+env/
+bin/
+build/
+develop-eggs/
+dist/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+include/
+man/
+venv/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+pip-selfcheck.json
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# Rope
+.ropeproject
+
+# Django stuff:
+*.log
+*.pot
+
+.DS_Store
+
+# Sphinx documentation
+docs/_build/
+
+# PyCharm
+.idea/
+
+# VSCode
+.vscode/
+
+# Pyenv
+.python-version
diff --git a/dask_planner/.settings/org.eclipse.core.resources.prefs b/dask_planner/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 000000000..92920805e
--- /dev/null
+++ b/dask_planner/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,5 @@
+eclipse.preferences.version=1
+encoding//src/main/java=UTF-8
+encoding//src/main/resources=UTF-8
+encoding//target/generated-sources/annotations=UTF-8
+encoding/=UTF-8
diff --git a/dask_planner/.settings/org.eclipse.jdt.apt.core.prefs b/dask_planner/.settings/org.eclipse.jdt.apt.core.prefs
new file mode 100644
index 000000000..d4313d4b2
--- /dev/null
+++ b/dask_planner/.settings/org.eclipse.jdt.apt.core.prefs
@@ -0,0 +1,2 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.apt.aptEnabled=false
diff --git a/dask_planner/.settings/org.eclipse.jdt.core.prefs b/dask_planner/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 000000000..1b6e1ef22
--- /dev/null
+++ b/dask_planner/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,9 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
+org.eclipse.jdt.core.compiler.compliance=1.8
+org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
+org.eclipse.jdt.core.compiler.processAnnotations=disabled
+org.eclipse.jdt.core.compiler.release=disabled
+org.eclipse.jdt.core.compiler.source=1.8
diff --git a/dask_planner/.settings/org.eclipse.m2e.core.prefs b/dask_planner/.settings/org.eclipse.m2e.core.prefs
new file mode 100644
index 000000000..f897a7f1c
--- /dev/null
+++ b/dask_planner/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
diff --git a/dask_planner/Cargo.lock b/dask_planner/Cargo.lock
new file mode 100644
index 000000000..72482d972
--- /dev/null
+++ b/dask_planner/Cargo.lock
@@ -0,0 +1,1419 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "ahash"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
+dependencies = [
+ "getrandom 0.2.7",
+ "once_cell",
+ "version_check",
+]
+
+[[package]]
+name = "ahash"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57e6e951cfbb2db8de1828d49073a113a29fd7117b1596caa781a258c7e38d72"
+dependencies = [
+ "cfg-if",
+ "const-random",
+ "getrandom 0.2.7",
+ "once_cell",
+ "version_check",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7ed72e1635e121ca3e79420540282af22da58be50de153d36f81ddc6b83aa9e"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "arrayref"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
+
+[[package]]
+name = "arrayvec"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
+
+[[package]]
+name = "arrow"
+version = "23.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fedc767fbaa36ea50f086215f54f1a007d22046fc4754b0448c657bcbe9f8413"
+dependencies = [
+ "ahash 0.8.0",
+ "arrow-buffer",
+ "bitflags",
+ "chrono",
+ "comfy-table",
+ "csv",
+ "flatbuffers",
+ "half",
+ "hashbrown",
+ "indexmap",
+ "lazy_static",
+ "lexical-core",
+ "multiversion",
+ "num",
+ "regex",
+ "regex-syntax",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "arrow-buffer"
+version = "23.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d290050c6e12a81a24ad08525cef2203c4156a6350f75508d49885d677e88ea9"
+dependencies = [
+ "half",
+ "num",
+]
+
+[[package]]
+name = "async-trait"
+version = "0.1.57"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76464446b8bc32758d7e88ee1a804d9914cd9b1cb264c029899680b0be29826f"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "blake2"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9cf849ee05b2ee5fba5e36f97ff8ec2533916700fc0758d40d92136a42f3388"
+dependencies = [
+ "digest",
+]
+
+[[package]]
+name = "blake3"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a08e53fc5a564bb15bfe6fae56bd71522205f1f91893f9c0116edad6496c183f"
+dependencies = [
+ "arrayref",
+ "arrayvec",
+ "cc",
+ "cfg-if",
+ "constant_time_eq",
+ "digest",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "bstr"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
+dependencies = [
+ "lazy_static",
+ "memchr",
+ "regex-automata",
+ "serde",
+]
+
+[[package]]
+name = "bumpalo"
+version = "3.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d"
+
+[[package]]
+name = "cc"
+version = "1.0.73"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "chrono"
+version = "0.4.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1"
+dependencies = [
+ "iana-time-zone",
+ "num-integer",
+ "num-traits",
+ "winapi",
+]
+
+[[package]]
+name = "comfy-table"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121d8a5b0346092c18a4b2fd6f620d7a06f0eb7ac0a45860939a0884bc579c56"
+dependencies = [
+ "strum",
+ "strum_macros",
+ "unicode-width",
+]
+
+[[package]]
+name = "const-random"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4"
+dependencies = [
+ "const-random-macro",
+ "proc-macro-hack",
+]
+
+[[package]]
+name = "const-random-macro"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40"
+dependencies = [
+ "getrandom 0.2.7",
+ "lazy_static",
+ "proc-macro-hack",
+ "tiny-keccak",
+]
+
+[[package]]
+name = "constant_time_eq"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc948ebb96241bb40ab73effeb80d9f93afaad49359d159a5e61be51619fe813"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crunchy"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
+
+[[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "csv"
+version = "1.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
+dependencies = [
+ "bstr",
+ "csv-core",
+ "itoa 0.4.8",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "dask_planner"
+version = "0.1.0"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-optimizer",
+ "datafusion-sql",
+ "env_logger",
+ "log",
+ "mimalloc",
+ "parking_lot",
+ "pyo3",
+ "rand 0.7.3",
+ "tokio",
+ "uuid",
+]
+
+[[package]]
+name = "datafusion-common"
+version = "12.0.0"
+source = "git+https://github.com/apache/arrow-datafusion/?rev=1261741af2a5e142fa0c7916e759859cc18ea59a#1261741af2a5e142fa0c7916e759859cc18ea59a"
+dependencies = [
+ "arrow",
+ "ordered-float",
+ "sqlparser",
+]
+
+[[package]]
+name = "datafusion-expr"
+version = "12.0.0"
+source = "git+https://github.com/apache/arrow-datafusion/?rev=1261741af2a5e142fa0c7916e759859cc18ea59a#1261741af2a5e142fa0c7916e759859cc18ea59a"
+dependencies = [
+ "ahash 0.8.0",
+ "arrow",
+ "datafusion-common",
+ "sqlparser",
+]
+
+[[package]]
+name = "datafusion-optimizer"
+version = "12.0.0"
+source = "git+https://github.com/apache/arrow-datafusion/?rev=1261741af2a5e142fa0c7916e759859cc18ea59a#1261741af2a5e142fa0c7916e759859cc18ea59a"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "chrono",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "hashbrown",
+ "log",
+]
+
+[[package]]
+name = "datafusion-physical-expr"
+version = "12.0.0"
+source = "git+https://github.com/apache/arrow-datafusion/?rev=1261741af2a5e142fa0c7916e759859cc18ea59a#1261741af2a5e142fa0c7916e759859cc18ea59a"
+dependencies = [
+ "ahash 0.8.0",
+ "arrow",
+ "blake2",
+ "blake3",
+ "chrono",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-row",
+ "hashbrown",
+ "lazy_static",
+ "md-5",
+ "ordered-float",
+ "paste",
+ "rand 0.8.5",
+ "regex",
+ "sha2",
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "datafusion-row"
+version = "12.0.0"
+source = "git+https://github.com/apache/arrow-datafusion/?rev=1261741af2a5e142fa0c7916e759859cc18ea59a#1261741af2a5e142fa0c7916e759859cc18ea59a"
+dependencies = [
+ "arrow",
+ "datafusion-common",
+ "paste",
+ "rand 0.8.5",
+]
+
+[[package]]
+name = "datafusion-sql"
+version = "12.0.0"
+source = "git+https://github.com/apache/arrow-datafusion/?rev=1261741af2a5e142fa0c7916e759859cc18ea59a#1261741af2a5e142fa0c7916e759859cc18ea59a"
+dependencies = [
+ "ahash 0.8.0",
+ "arrow",
+ "datafusion-common",
+ "datafusion-expr",
+ "hashbrown",
+ "sqlparser",
+ "tokio",
+]
+
+[[package]]
+name = "digest"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+ "subtle",
+]
+
+[[package]]
+name = "env_logger"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c90bf5f19754d10198ccb95b70664fc925bd1fc090a0fd9a6ebc54acc8cd6272"
+dependencies = [
+ "atty",
+ "humantime",
+ "log",
+ "regex",
+ "termcolor",
+]
+
+[[package]]
+name = "flatbuffers"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b428b715fdbdd1c364b84573b5fdc0f84f8e423661b9f398735278bc7f2b6a"
+dependencies = [
+ "bitflags",
+ "smallvec",
+ "thiserror",
+]
+
+[[package]]
+name = "generic-array"
+version = "0.14.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi 0.9.0+wasi-snapshot-preview1",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+]
+
+[[package]]
+name = "half"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ad6a9459c9c30b177b925162351f97e7d967c7ea8bab3b8352805327daf45554"
+dependencies = [
+ "crunchy",
+ "num-traits",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+dependencies = [
+ "ahash 0.7.6",
+]
+
+[[package]]
+name = "heck"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "humantime"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ad2bfd338099682614d3ee3fe0cd72e0b6a41ca6a87f6a74a3bd593c91650501"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "js-sys",
+ "wasm-bindgen",
+ "winapi",
+]
+
+[[package]]
+name = "indexmap"
+version = "1.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
+dependencies = [
+ "autocfg",
+ "hashbrown",
+]
+
+[[package]]
+name = "indoc"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "adab1eaa3408fb7f0c777a73e7465fd5656136fc93b670eb6df3c88c2c1344e3"
+
+[[package]]
+name = "itoa"
+version = "0.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
+
+[[package]]
+name = "itoa"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754"
+
+[[package]]
+name = "js-sys"
+version = "0.3.59"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "258451ab10b34f8af53416d1fdab72c22e805f0c92a1136d59470ec0b11138b2"
+dependencies = [
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "lexical-core"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46"
+dependencies = [
+ "lexical-parse-float",
+ "lexical-parse-integer",
+ "lexical-util",
+ "lexical-write-float",
+ "lexical-write-integer",
+]
+
+[[package]]
+name = "lexical-parse-float"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f"
+dependencies = [
+ "lexical-parse-integer",
+ "lexical-util",
+ "static_assertions",
+]
+
+[[package]]
+name = "lexical-parse-integer"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9"
+dependencies = [
+ "lexical-util",
+ "static_assertions",
+]
+
+[[package]]
+name = "lexical-util"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc"
+dependencies = [
+ "static_assertions",
+]
+
+[[package]]
+name = "lexical-write-float"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862"
+dependencies = [
+ "lexical-util",
+ "lexical-write-integer",
+ "static_assertions",
+]
+
+[[package]]
+name = "lexical-write-integer"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446"
+dependencies = [
+ "lexical-util",
+ "static_assertions",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.132"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
+
+[[package]]
+name = "libm"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "292a948cd991e376cf75541fe5b97a1081d713c618b4f1b9500f8844e49eb565"
+
+[[package]]
+name = "libmimalloc-sys"
+version = "0.1.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11ca136052550448f55df7898c6dbe651c6b574fe38a0d9ea687a9f8088a2e2c"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "lock_api"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "md-5"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "658646b21e0b72f7866c7038ab086d3d5e1cd6271f060fd37defb241949d0582"
+dependencies = [
+ "digest",
+]
+
+[[package]]
+name = "memchr"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
+
+[[package]]
+name = "memoffset"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "mimalloc"
+version = "0.1.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f64ad83c969af2e732e907564deb0d0ed393cec4af80776f77dd77a1a427698"
+dependencies = [
+ "libmimalloc-sys",
+]
+
+[[package]]
+name = "multiversion"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373"
+dependencies = [
+ "multiversion-macros",
+]
+
+[[package]]
+name = "multiversion-macros"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "num"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606"
+dependencies = [
+ "num-bigint",
+ "num-complex",
+ "num-integer",
+ "num-iter",
+ "num-rational",
+ "num-traits",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-complex"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
+dependencies = [
+ "autocfg",
+ "num-traits",
+]
+
+[[package]]
+name = "num-iter"
+version = "0.1.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
+dependencies = [
+ "autocfg",
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
+dependencies = [
+ "autocfg",
+ "libm",
+]
+
+[[package]]
+name = "num_cpus"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "074864da206b4973b84eb91683020dbefd6a8c3f0f38e054d93954e891935e4e"
+
+[[package]]
+name = "ordered-float"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96bcbab4bfea7a59c2c0fe47211a1ac4e3e96bea6eb446d704f310bc5c732ae2"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-sys",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9423e2b32f7a043629287a536f21951e8c6a82482d0acb1eeebfc90bc2225b22"
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
+
+[[package]]
+name = "proc-macro-hack"
+version = "0.5.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "pyo3"
+version = "0.17.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "12f72538a0230791398a0986a6518ebd88abc3fded89007b506ed072acc831e1"
+dependencies = [
+ "cfg-if",
+ "indoc",
+ "libc",
+ "memoffset",
+ "parking_lot",
+ "pyo3-build-config",
+ "pyo3-ffi",
+ "pyo3-macros",
+ "unindent",
+]
+
+[[package]]
+name = "pyo3-build-config"
+version = "0.17.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc4cf18c20f4f09995f3554e6bcf9b09bd5e4d6b67c562fdfaafa644526ba479"
+dependencies = [
+ "once_cell",
+ "target-lexicon",
+]
+
+[[package]]
+name = "pyo3-ffi"
+version = "0.17.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a41877f28d8ebd600b6aa21a17b40c3b0fc4dfe73a27b6e81ab3d895e401b0e9"
+dependencies = [
+ "libc",
+ "pyo3-build-config",
+]
+
+[[package]]
+name = "pyo3-macros"
+version = "0.17.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e81c8d4bcc2f216dc1b665412df35e46d12ee8d3d046b381aad05f1fcf30547"
+dependencies = [
+ "proc-macro2",
+ "pyo3-macros-backend",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "pyo3-macros-backend"
+version = "0.17.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85752a767ee19399a78272cc2ab625cd7d373b2e112b4b13db28de71fa892784"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
+dependencies = [
+ "getrandom 0.1.16",
+ "libc",
+ "rand_chacha 0.2.2",
+ "rand_core 0.5.1",
+ "rand_hc",
+]
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha 0.3.1",
+ "rand_core 0.6.3",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.5.1",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.6.3",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
+dependencies = [
+ "getrandom 0.1.16",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
+dependencies = [
+ "getrandom 0.2.7",
+]
+
+[[package]]
+name = "rand_hc"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
+dependencies = [
+ "rand_core 0.5.1",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244"
+
+[[package]]
+name = "rustversion"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97477e48b4cf8603ad5f7aaf897467cf42ab4218a38ef76fb14c2d6773a6d6a8"
+
+[[package]]
+name = "ryu"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
+
+[[package]]
+name = "scopeguard"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
+
+[[package]]
+name = "serde"
+version = "1.0.144"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f747710de3dcd43b88c9168773254e809d8ddbdf9653b84e2554ab219f17860"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.144"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94ed3a816fb1d101812f83e789f888322c34e291f894f19590dc310963e87a00"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.85"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44"
+dependencies = [
+ "itoa 1.0.3",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "sha2"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
+[[package]]
+name = "smallvec"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1"
+
+[[package]]
+name = "sqlparser"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0beb13adabbdda01b63d595f38c8bfd19a361e697fd94ce0098a634077bc5b25"
+dependencies = [
+ "log",
+]
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "strum"
+version = "0.24.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
+
+[[package]]
+name = "strum_macros"
+version = "0.24.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn",
+]
+
+[[package]]
+name = "subtle"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
+
+[[package]]
+name = "syn"
+version = "1.0.99"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "target-lexicon"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c02424087780c9b71cc96799eaeddff35af2bc513278cda5c99fc1f5d026d3c1"
+
+[[package]]
+name = "termcolor"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f5f6586b7f764adc0231f4c79be7b920e766bb2f3e51b3661cdb263828f19994"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "12bafc5b54507e0149cdf1b145a5d80ab80a90bcd9275df43d4fff68460f6c21"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tiny-keccak"
+version = "2.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
+dependencies = [
+ "crunchy",
+]
+
+[[package]]
+name = "tokio"
+version = "1.20.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a8325f63a7d4774dd041e363b2409ed1c5cbbd0f867795e661df066b2b0a581"
+dependencies = [
+ "autocfg",
+ "num_cpus",
+ "once_cell",
+ "parking_lot",
+ "pin-project-lite",
+ "tokio-macros",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "typenum"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf"
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
+
+[[package]]
+name = "unindent"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "58ee9362deb4a96cef4d437d1ad49cffc9b9e92d202b6995674e928ce684f112"
+
+[[package]]
+name = "uuid"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
+dependencies = [
+ "getrandom 0.2.7",
+]
+
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
+[[package]]
+name = "wasi"
+version = "0.9.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
+
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.82"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc7652e3f6c4706c8d9cd54832c4a4ccb9b5336e2c3bd154d5cccfbf1c1f5f7d"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.82"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "662cd44805586bd52971b9586b1df85cdbbd9112e4ef4d8f41559c334dc6ac3f"
+dependencies = [
+ "bumpalo",
+ "log",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.82"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b260f13d3012071dfb1512849c033b1925038373aea48ced3012c09df952c602"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.82"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5be8e654bdd9b79216c2929ab90721aa82faf65c48cdf08bdc4e7f51357b80da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.82"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6598dd0bd3c7d51095ff6531a5b23e02acdc81804e30d8f07afb77b7215a140a"
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-sys"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
+dependencies = [
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
diff --git a/dask_planner/Cargo.toml b/dask_planner/Cargo.toml
new file mode 100644
index 000000000..51826ab96
--- /dev/null
+++ b/dask_planner/Cargo.toml
@@ -0,0 +1,28 @@
+[package]
+name = "dask_planner"
+repository = "https://github.com/dask-contrib/dask-sql"
+version = "0.1.0"
+description = "Bindings for DataFusion used by Dask-SQL"
+readme = "README.md"
+license = "Apache-2.0"
+edition = "2021"
+rust-version = "1.62"
+
+[dependencies]
+arrow = { version = "23.0.0", features = ["prettyprint"] }
+async-trait = "0.1.41"
+datafusion-common = { git = "https://github.com/apache/arrow-datafusion/", rev = "1261741af2a5e142fa0c7916e759859cc18ea59a" }
+datafusion-expr = { git = "https://github.com/apache/arrow-datafusion/", rev = "1261741af2a5e142fa0c7916e759859cc18ea59a" }
+datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion/", rev = "1261741af2a5e142fa0c7916e759859cc18ea59a" }
+datafusion-sql = { git = "https://github.com/apache/arrow-datafusion/", rev = "1261741af2a5e142fa0c7916e759859cc18ea59a" }
+env_logger = "0.9"
+log = "^0.4"
+mimalloc = { version = "*", default-features = false }
+parking_lot = "0.12"
+pyo3 = { version = "0.17.1", features = ["extension-module", "abi3", "abi3-py38"] }
+rand = "0.7"
+tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
+uuid = { version = "0.8", features = ["v4"] }
+
+[lib]
+crate-type = ["cdylib"]
diff --git a/dask_planner/MANIFEST.in b/dask_planner/MANIFEST.in
new file mode 100644
index 000000000..7c68298bd
--- /dev/null
+++ b/dask_planner/MANIFEST.in
@@ -0,0 +1,2 @@
+include Cargo.toml
+recursive-include src *
diff --git a/dask_planner/README.md b/dask_planner/README.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/dask_planner/pyproject.toml b/dask_planner/pyproject.toml
new file mode 100644
index 000000000..f153e3f5a
--- /dev/null
+++ b/dask_planner/pyproject.toml
@@ -0,0 +1,11 @@
+[build-system]
+requires = ["setuptools", "wheel", "setuptools-rust"]
+
+[project]
+name = "datafusion_planner"
+requires-python = ">=3.8"
+classifiers = [
+ "Programming Language :: Rust",
+ "Programming Language :: Python :: Implementation :: CPython",
+ "Programming Language :: Python :: Implementation :: PyPy",
+]
diff --git a/dask_planner/rust-toolchain.toml b/dask_planner/rust-toolchain.toml
new file mode 100644
index 000000000..5d56faf9a
--- /dev/null
+++ b/dask_planner/rust-toolchain.toml
@@ -0,0 +1,2 @@
+[toolchain]
+channel = "nightly"
diff --git a/dask_planner/src/dialect.rs b/dask_planner/src/dialect.rs
new file mode 100644
index 000000000..492f4aca3
--- /dev/null
+++ b/dask_planner/src/dialect.rs
@@ -0,0 +1,40 @@
+use core::iter::Peekable;
+use core::str::Chars;
+use datafusion_sql::sqlparser::dialect::Dialect;
+
+#[derive(Debug)]
+pub struct DaskDialect {}
+
+impl Dialect for DaskDialect {
+ fn is_identifier_start(&self, ch: char) -> bool {
+ // See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
+ // We don't yet support identifiers beginning with "letters with
+ // diacritical marks and non-Latin letters"
+ ('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
+ }
+
+ fn is_identifier_part(&self, ch: char) -> bool {
+ ('a'..='z').contains(&ch)
+ || ('A'..='Z').contains(&ch)
+ || ('0'..='9').contains(&ch)
+ || ch == '$'
+ || ch == '_'
+ }
+
+ /// Determine if a character starts a quoted identifier. The default
+ /// implementation, accepting "double quoted" ids is both ANSI-compliant
+ /// and appropriate for most dialects (with the notable exception of
+ /// MySQL, MS SQL, and sqlite). You can accept one of characters listed
+ /// in `Word::matching_end_quote` here
+ fn is_delimited_identifier_start(&self, ch: char) -> bool {
+ ch == '"'
+ }
+ /// Determine if quoted characters are proper for identifier
+ fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable>) -> bool {
+ true
+ }
+ /// Determine if FILTER (WHERE ...) filters are allowed during aggregations
+ fn supports_filter_during_aggregation(&self) -> bool {
+ true
+ }
+}
diff --git a/dask_planner/src/expression.rs b/dask_planner/src/expression.rs
new file mode 100644
index 000000000..9c85b5324
--- /dev/null
+++ b/dask_planner/src/expression.rs
@@ -0,0 +1,844 @@
+use crate::sql::exceptions::{py_runtime_err, py_type_err};
+use crate::sql::logical;
+use crate::sql::types::RexType;
+use arrow::datatypes::DataType;
+use datafusion_common::{Column, DFField, DFSchema, Result, ScalarValue};
+use datafusion_expr::Operator;
+use datafusion_expr::{lit, utils::exprlist_to_fields, BuiltinScalarFunction, Expr, LogicalPlan};
+use pyo3::prelude::*;
+use std::convert::From;
+use std::sync::Arc;
+
+/// An PyExpr that can be used on a DataFrame
+#[pyclass(name = "Expression", module = "datafusion", subclass)]
+#[derive(Debug, Clone)]
+pub struct PyExpr {
+ pub expr: Expr,
+ // Why a Vec here? Because BinaryExpr on Join might have multiple LogicalPlans
+ pub input_plan: Option>>,
+}
+
+impl From for Expr {
+ fn from(expr: PyExpr) -> Expr {
+ expr.expr
+ }
+}
+
+#[pyclass(name = "ScalarValue", module = "datafusion", subclass)]
+#[derive(Debug, Clone)]
+pub struct PyScalarValue {
+ pub scalar_value: ScalarValue,
+}
+
+impl From for ScalarValue {
+ fn from(pyscalar: PyScalarValue) -> ScalarValue {
+ pyscalar.scalar_value
+ }
+}
+
+impl From for PyScalarValue {
+ fn from(scalar_value: ScalarValue) -> PyScalarValue {
+ PyScalarValue { scalar_value }
+ }
+}
+
+/// Convert a list of DataFusion Expr to PyExpr
+pub fn py_expr_list(input: &Arc, expr: &[Expr]) -> PyResult> {
+ Ok(expr
+ .iter()
+ .map(|e| PyExpr::from(e.clone(), Some(vec![input.clone()])))
+ .collect())
+}
+
+impl PyExpr {
+ /// Generally we would implement the `From` trait offered by Rust
+ /// However in this case Expr does not contain the contextual
+ /// `LogicalPlan` instance that we need so we need to make a instance
+ /// function to take and create the PyExpr.
+ pub fn from(expr: Expr, input: Option>>) -> PyExpr {
+ PyExpr {
+ input_plan: input,
+ expr,
+ }
+ }
+
+ /// Determines the name of the `Expr` instance by examining the LogicalPlan
+ pub fn _column_name(&self, plan: &LogicalPlan) -> Result {
+ let field = expr_to_field(&self.expr, plan)?;
+ Ok(field.qualified_column().flat_name())
+ }
+
+ fn _rex_type(&self, expr: &Expr) -> RexType {
+ match expr {
+ Expr::Alias(..)
+ | Expr::Column(..)
+ | Expr::QualifiedWildcard { .. }
+ | Expr::GetIndexedField { .. } => RexType::Reference,
+ Expr::ScalarVariable(..) | Expr::Literal(..) => RexType::Literal,
+ Expr::BinaryExpr { .. }
+ | Expr::Not(..)
+ | Expr::IsNotNull(..)
+ | Expr::Negative(..)
+ | Expr::IsNull(..)
+ | Expr::Like { .. }
+ | Expr::ILike { .. }
+ | Expr::SimilarTo { .. }
+ | Expr::Between { .. }
+ | Expr::Case { .. }
+ | Expr::Cast { .. }
+ | Expr::TryCast { .. }
+ | Expr::Sort { .. }
+ | Expr::ScalarFunction { .. }
+ | Expr::AggregateFunction { .. }
+ | Expr::WindowFunction { .. }
+ | Expr::AggregateUDF { .. }
+ | Expr::InList { .. }
+ | Expr::Wildcard
+ | Expr::ScalarUDF { .. }
+ | Expr::Exists { .. }
+ | Expr::InSubquery { .. }
+ | Expr::GroupingSet(..)
+ | Expr::IsTrue(..)
+ | Expr::IsFalse(..)
+ | Expr::IsUnknown(_)
+ | Expr::IsNotTrue(..)
+ | Expr::IsNotFalse(..)
+ | Expr::IsNotUnknown(_) => RexType::Call,
+ Expr::ScalarSubquery(..) => RexType::SubqueryAlias,
+ }
+ }
+}
+
+#[pymethods]
+impl PyExpr {
+ #[staticmethod]
+ pub fn literal(value: PyScalarValue) -> PyExpr {
+ PyExpr::from(lit(value.scalar_value), None)
+ }
+
+ /// Extracts the LogicalPlan from a Subquery, or supported Subquery sub-type, from
+ /// the expression instance
+ #[pyo3(name = "getSubqueryLogicalPlan")]
+ pub fn subquery_plan(&self) -> PyResult {
+ match &self.expr {
+ Expr::ScalarSubquery(subquery) => Ok(subquery.subquery.as_ref().clone().into()),
+ _ => Err(py_type_err(format!(
+ "Attempted to extract a LogicalPlan instance from invalid Expr {:?}.
+ Only Subquery and related variants are supported for this operation.",
+ &self.expr
+ ))),
+ }
+ }
+
+ /// If this Expression instances references an existing
+ /// Column in the SQL parse tree or not
+ #[pyo3(name = "isInputReference")]
+ pub fn is_input_reference(&self) -> PyResult {
+ Ok(matches!(&self.expr, Expr::Column(_col)))
+ }
+
+ #[pyo3(name = "toString")]
+ pub fn to_string(&self) -> PyResult {
+ Ok(format!("{}", &self.expr))
+ }
+
+ /// Gets the positional index of the Expr instance from the LogicalPlan DFSchema
+ #[pyo3(name = "getIndex")]
+ pub fn index(&self) -> PyResult {
+ let input: &Option>> = &self.input_plan;
+ match input {
+ Some(input_plans) if !input_plans.is_empty() => {
+ let mut schema: DFSchema = (**input_plans[0].schema()).clone();
+ for plan in input_plans.iter().skip(1) {
+ schema.merge(plan.schema().as_ref());
+ }
+ get_expr_name(&self.expr)
+ .and_then(|fq_name| {
+ schema.index_of_column(&Column::from_qualified_name(&fq_name))
+ })
+ .map_err(py_runtime_err)
+ }
+ _ => Err(py_runtime_err(
+ "We need a valid LogicalPlan instance to get the Expr's index in the schema",
+ )),
+ }
+ }
+
+ /// Examine the current/"self" PyExpr and return its "type"
+ /// In this context a "type" is what Dask-SQL Python
+ /// RexConverter plugin instance should be invoked to handle
+ /// the Rex conversion
+ #[pyo3(name = "getExprType")]
+ pub fn get_expr_type(&self) -> PyResult {
+ Ok(String::from(match &self.expr {
+ Expr::Alias(..)
+ | Expr::Column(..)
+ | Expr::Literal(..)
+ | Expr::BinaryExpr { .. }
+ | Expr::Between { .. }
+ | Expr::Cast { .. }
+ | Expr::Sort { .. }
+ | Expr::ScalarFunction { .. }
+ | Expr::AggregateFunction { .. }
+ | Expr::InList { .. }
+ | Expr::InSubquery { .. }
+ | Expr::ScalarUDF { .. }
+ | Expr::AggregateUDF { .. }
+ | Expr::Exists { .. }
+ | Expr::ScalarSubquery(..)
+ | Expr::QualifiedWildcard { .. }
+ | Expr::Not(..)
+ | Expr::GroupingSet(..) => self.expr.variant_name(),
+ Expr::ScalarVariable(..)
+ | Expr::IsNotNull(..)
+ | Expr::Negative(..)
+ | Expr::GetIndexedField { .. }
+ | Expr::IsNull(..)
+ | Expr::IsTrue(_)
+ | Expr::IsFalse(_)
+ | Expr::IsUnknown(_)
+ | Expr::IsNotTrue(_)
+ | Expr::IsNotFalse(_)
+ | Expr::Like { .. }
+ | Expr::ILike { .. }
+ | Expr::SimilarTo { .. }
+ | Expr::IsNotUnknown(_)
+ | Expr::Case { .. }
+ | Expr::TryCast { .. }
+ | Expr::WindowFunction { .. }
+ | Expr::Wildcard => {
+ return Err(py_type_err(format!(
+ "Encountered unsupported expression type: {}",
+ &self.expr.variant_name()
+ )))
+ }
+ }))
+ }
+
+ /// Determines the type of this Expr based on its variant
+ #[pyo3(name = "getRexType")]
+ pub fn rex_type(&self) -> PyResult {
+ Ok(self._rex_type(&self.expr))
+ }
+
+ /// Python friendly shim code to get the name of a column referenced by an expression
+ pub fn column_name(&self, mut plan: logical::PyLogicalPlan) -> PyResult {
+ self._column_name(&plan.current_node())
+ .map_err(py_runtime_err)
+ }
+
+ /// Row expressions, Rex(s), operate on the concept of operands. This maps to expressions that are used in
+ /// the "call" logic of the Dask-SQL python codebase. Different variants of Expressions, Expr(s),
+ /// store those operands in different datastructures. This function examines the Expr variant and returns
+ /// the operands to the calling logic as a Vec of PyExpr instances.
+ #[pyo3(name = "getOperands")]
+ pub fn get_operands(&self) -> PyResult> {
+ match &self.expr {
+ // Expr variants that are themselves the operand to return
+ Expr::Column(..) | Expr::ScalarVariable(..) | Expr::Literal(..) => {
+ Ok(vec![PyExpr::from(
+ self.expr.clone(),
+ self.input_plan.clone(),
+ )])
+ }
+
+ // Expr(s) that house the Expr instance to return in their bounded params
+ Expr::Alias(expr, ..)
+ | Expr::Not(expr)
+ | Expr::IsNull(expr)
+ | Expr::IsNotNull(expr)
+ | Expr::IsTrue(expr)
+ | Expr::IsFalse(expr)
+ | Expr::IsUnknown(expr)
+ | Expr::IsNotTrue(expr)
+ | Expr::IsNotFalse(expr)
+ | Expr::IsNotUnknown(expr)
+ | Expr::Negative(expr)
+ | Expr::GetIndexedField { expr, .. }
+ | Expr::Cast { expr, .. }
+ | Expr::TryCast { expr, .. }
+ | Expr::Sort { expr, .. }
+ | Expr::InSubquery { expr, .. } => {
+ Ok(vec![PyExpr::from(*expr.clone(), self.input_plan.clone())])
+ }
+
+ // Expr variants containing a collection of Expr(s) for operands
+ Expr::AggregateFunction { args, .. }
+ | Expr::AggregateUDF { args, .. }
+ | Expr::ScalarFunction { args, .. }
+ | Expr::ScalarUDF { args, .. }
+ | Expr::WindowFunction { args, .. } => Ok(args
+ .iter()
+ .map(|arg| PyExpr::from(arg.clone(), self.input_plan.clone()))
+ .collect()),
+
+ // Expr(s) that require more specific processing
+ Expr::Case {
+ expr,
+ when_then_expr,
+ else_expr,
+ } => {
+ let mut operands: Vec = Vec::new();
+
+ if let Some(e) = expr {
+ operands.push(PyExpr::from(*e.clone(), self.input_plan.clone()));
+ };
+
+ for (when, then) in when_then_expr {
+ operands.push(PyExpr::from(*when.clone(), self.input_plan.clone()));
+ operands.push(PyExpr::from(*then.clone(), self.input_plan.clone()));
+ }
+
+ if let Some(e) = else_expr {
+ operands.push(PyExpr::from(*e.clone(), self.input_plan.clone()));
+ };
+
+ Ok(operands)
+ }
+ Expr::InList { expr, list, .. } => {
+ let mut operands: Vec =
+ vec![PyExpr::from(*expr.clone(), self.input_plan.clone())];
+ for list_elem in list {
+ operands.push(PyExpr::from(list_elem.clone(), self.input_plan.clone()));
+ }
+
+ Ok(operands)
+ }
+ Expr::BinaryExpr { left, right, .. } => Ok(vec![
+ PyExpr::from(*left.clone(), self.input_plan.clone()),
+ PyExpr::from(*right.clone(), self.input_plan.clone()),
+ ]),
+ Expr::Like { expr, pattern, .. } => Ok(vec![
+ PyExpr::from(*expr.clone(), self.input_plan.clone()),
+ PyExpr::from(*pattern.clone(), self.input_plan.clone()),
+ ]),
+ Expr::ILike { expr, pattern, .. } => Ok(vec![
+ PyExpr::from(*expr.clone(), self.input_plan.clone()),
+ PyExpr::from(*pattern.clone(), self.input_plan.clone()),
+ ]),
+ Expr::SimilarTo { expr, pattern, .. } => Ok(vec![
+ PyExpr::from(*expr.clone(), self.input_plan.clone()),
+ PyExpr::from(*pattern.clone(), self.input_plan.clone()),
+ ]),
+ Expr::Between {
+ expr,
+ negated: _,
+ low,
+ high,
+ } => Ok(vec![
+ PyExpr::from(*expr.clone(), self.input_plan.clone()),
+ PyExpr::from(*low.clone(), self.input_plan.clone()),
+ PyExpr::from(*high.clone(), self.input_plan.clone()),
+ ]),
+
+ // Currently un-support/implemented Expr types for Rex Call operations
+ Expr::GroupingSet(..)
+ | Expr::Wildcard
+ | Expr::QualifiedWildcard { .. }
+ | Expr::ScalarSubquery(..)
+ | Expr::Exists { .. } => Err(py_runtime_err(format!(
+ "Unimplemented Expr type: {}",
+ self.expr
+ ))),
+ }
+ }
+
+ #[pyo3(name = "getOperatorName")]
+ pub fn get_operator_name(&self) -> PyResult {
+ Ok(match &self.expr {
+ Expr::BinaryExpr {
+ left: _,
+ op,
+ right: _,
+ } => format!("{}", op),
+ Expr::ScalarFunction { fun, args: _ } => format!("{}", fun),
+ Expr::ScalarUDF { fun, .. } => fun.name.clone(),
+ Expr::Cast { .. } => "cast".to_string(),
+ Expr::Between { .. } => "between".to_string(),
+ Expr::Case { .. } => "case".to_string(),
+ Expr::IsNull(..) => "is null".to_string(),
+ Expr::IsNotNull(..) => "is not null".to_string(),
+ Expr::IsTrue(_) => "is true".to_string(),
+ Expr::IsFalse(_) => "is false".to_string(),
+ Expr::IsUnknown(_) => "is unknown".to_string(),
+ Expr::IsNotTrue(_) => "is not true".to_string(),
+ Expr::IsNotFalse(_) => "is not false".to_string(),
+ Expr::IsNotUnknown(_) => "is not unknown".to_string(),
+ Expr::InList { .. } => "in list".to_string(),
+ Expr::Negative(..) => "negative".to_string(),
+ Expr::Not(..) => "not".to_string(),
+ Expr::Like { negated, .. } => {
+ if *negated {
+ "not like".to_string()
+ } else {
+ "like".to_string()
+ }
+ }
+ Expr::ILike { negated, .. } => {
+ if *negated {
+ "not ilike".to_string()
+ } else {
+ "ilike".to_string()
+ }
+ }
+ Expr::SimilarTo { negated, .. } => {
+ if *negated {
+ "not similar to".to_string()
+ } else {
+ "similar to".to_string()
+ }
+ }
+ _ => {
+ return Err(py_type_err(format!(
+ "Catch all triggered in get_operator_name: {:?}",
+ &self.expr
+ )))
+ }
+ })
+ }
+
+ /// Gets the ScalarValue represented by the Expression
+ #[pyo3(name = "getType")]
+ pub fn get_type(&self) -> PyResult {
+ Ok(String::from(match &self.expr {
+ Expr::BinaryExpr {
+ left: _,
+ op,
+ right: _,
+ } => match op {
+ Operator::Eq
+ | Operator::NotEq
+ | Operator::Lt
+ | Operator::LtEq
+ | Operator::Gt
+ | Operator::GtEq
+ | Operator::And
+ | Operator::Or
+ | Operator::Like
+ | Operator::NotLike
+ | Operator::IsDistinctFrom
+ | Operator::IsNotDistinctFrom
+ | Operator::RegexMatch
+ | Operator::RegexIMatch
+ | Operator::RegexNotMatch
+ | Operator::RegexNotIMatch => "BOOLEAN",
+ Operator::Plus | Operator::Minus | Operator::Multiply | Operator::Modulo => {
+ "BIGINT"
+ }
+ Operator::Divide => "FLOAT",
+ Operator::StringConcat => "VARCHAR",
+ Operator::BitwiseShiftLeft
+ | Operator::BitwiseShiftRight
+ | Operator::BitwiseXor
+ | Operator::BitwiseAnd
+ | Operator::BitwiseOr => {
+ // the type here should be the same as the type of the left expression
+ // but we can only compute that if we have the schema available
+ return Err(py_type_err(
+ "Bitwise operators unsupported in get_type".to_string(),
+ ));
+ }
+ },
+ Expr::Literal(scalar_value) => match scalar_value {
+ ScalarValue::Boolean(_value) => "Boolean",
+ ScalarValue::Float32(_value) => "Float32",
+ ScalarValue::Float64(_value) => "Float64",
+ ScalarValue::Decimal128(_value, ..) => "Decimal128",
+ ScalarValue::Dictionary(..) => "Dictionary",
+ ScalarValue::Int8(_value) => "Int8",
+ ScalarValue::Int16(_value) => "Int16",
+ ScalarValue::Int32(_value) => "Int32",
+ ScalarValue::Int64(_value) => "Int64",
+ ScalarValue::UInt8(_value) => "UInt8",
+ ScalarValue::UInt16(_value) => "UInt16",
+ ScalarValue::UInt32(_value) => "UInt32",
+ ScalarValue::UInt64(_value) => "UInt64",
+ ScalarValue::Utf8(_value) => "Utf8",
+ ScalarValue::LargeUtf8(_value) => "LargeUtf8",
+ ScalarValue::Binary(_value) => "Binary",
+ ScalarValue::LargeBinary(_value) => "LargeBinary",
+ ScalarValue::Date32(_value) => "Date32",
+ ScalarValue::Date64(_value) => "Date64",
+ ScalarValue::Time64(_value) => "Time64",
+ ScalarValue::Null => "Null",
+ ScalarValue::TimestampSecond(..) => "TimestampSecond",
+ ScalarValue::TimestampMillisecond(..) => "TimestampMillisecond",
+ ScalarValue::TimestampMicrosecond(..) => "TimestampMicrosecond",
+ ScalarValue::TimestampNanosecond(..) => "TimestampNanosecond",
+ ScalarValue::IntervalYearMonth(..) => "IntervalYearMonth",
+ ScalarValue::IntervalDayTime(..) => "IntervalDayTime",
+ ScalarValue::IntervalMonthDayNano(..) => "IntervalMonthDayNano",
+ ScalarValue::List(..) => "List",
+ ScalarValue::Struct(..) => "Struct",
+ },
+ Expr::ScalarFunction { fun, args: _ } => match fun {
+ BuiltinScalarFunction::Abs => "Abs",
+ BuiltinScalarFunction::DatePart => "DatePart",
+ _ => {
+ return Err(py_type_err(format!(
+ "Catch all triggered for ScalarFunction in get_type; {:?}",
+ fun
+ )))
+ }
+ },
+ Expr::Cast { expr: _, data_type } => match data_type {
+ DataType::Null => "NULL",
+ DataType::Boolean => "BOOLEAN",
+ DataType::Int8 | DataType::UInt8 => "TINYINT",
+ DataType::Int16 | DataType::UInt16 => "SMALLINT",
+ DataType::Int32 | DataType::UInt32 => "INTEGER",
+ DataType::Int64 | DataType::UInt64 => "BIGINT",
+ DataType::Float32 => "FLOAT",
+ DataType::Float64 => "DOUBLE",
+ DataType::Timestamp { .. } => "TIMESTAMP",
+ DataType::Date32 | DataType::Date64 => "DATE",
+ DataType::Time32(..) => "TIME32",
+ DataType::Time64(..) => "TIME64",
+ DataType::Duration(..) => "DURATION",
+ DataType::Interval(..) => "INTERVAL",
+ DataType::Binary => "BINARY",
+ DataType::FixedSizeBinary(..) => "FIXEDSIZEBINARY",
+ DataType::LargeBinary => "LARGEBINARY",
+ DataType::Utf8 => "VARCHAR",
+ DataType::LargeUtf8 => "BIGVARCHAR",
+ DataType::List(..) => "LIST",
+ DataType::FixedSizeList(..) => "FIXEDSIZELIST",
+ DataType::LargeList(..) => "LARGELIST",
+ DataType::Struct(..) => "STRUCT",
+ DataType::Union(..) => "UNION",
+ DataType::Dictionary(..) => "DICTIONARY",
+ DataType::Decimal128(..) => "DECIMAL",
+ DataType::Decimal256(..) => "DECIMAL",
+ DataType::Map(..) => "MAP",
+ _ => {
+ return Err(py_type_err(format!(
+ "Catch all triggered for Cast in get_type; {:?}",
+ data_type
+ )))
+ }
+ },
+ _ => {
+ return Err(py_type_err(format!(
+ "Catch all triggered in get_type; {:?}",
+ &self.expr
+ )))
+ }
+ }))
+ }
+
+ #[pyo3(name = "getFilterExpr")]
+ pub fn get_filter_expr(&self) -> PyResult