diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/Cargo.lock b/Cargo.lock index 4e4d95c..47e8672 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -93,29 +93,6 @@ dependencies = [ "libloading", ] -[[package]] -name = "cosette-prover" -version = "0.1.0" -dependencies = [ - "anyhow", - "crossbeam", - "env_logger", - "imbl", - "indenter", - "isoperm", - "itertools", - "log", - "num", - "paste", - "permutation", - "scopeguard", - "serde", - "serde-enum-str", - "serde_json", - "walkdir", - "z3", -] - [[package]] name = "crossbeam" version = "0.8.2" @@ -507,6 +484,29 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "qed-prover" +version = "0.1.0" +dependencies = [ + "anyhow", + "crossbeam", + "env_logger", + "imbl", + "indenter", + "isoperm", + "itertools", + "log", + "num", + "paste", + "permutation", + "scopeguard", + "serde", + "serde-enum-str", + "serde_json", + "walkdir", + "z3", +] + [[package]] name = "quote" version = "1.0.33" diff --git a/Cargo.toml b/Cargo.toml index 8a4203e..11b05d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "cosette-prover" +name = "qed-prover" version = "0.1.0" authors = ["Shuxian Wang "] edition = "2018" diff --git a/LICENSE b/LICENSE index 985b15a..7c17874 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2021 The Cosette Team +Copyright (c) 2021 The QED Team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 61154d9..2511bfb 100644 --- a/README.md +++ b/README.md @@ -1,44 +1,66 @@ -# The Cosette Prover +# The QED Prover -This is a reimplementation of the Cosette prover in Rust aiming for high performance and better SQL feature coverage. -The theory behind the prover is described in [this paper](https://www.vldb.org/pvldb/vol11/p1482-chu.pdf). -It currently expect input generated from [this parser](https://github.com/cosette-solver/cosette-parser). +This is a SQL query equivalence prover in Rust aiming for high performance and large SQL feature coverage. +The project starts as a successor of the Cosette prover, which is described in [this paper](https://www.vldb.org/pvldb/vol11/p1482-chu.pdf). -## Usage +## Install + +### Build from source manually -After downloading the source, build the source with +The project relies on a nightly Rust compiler toolchain, +and additionally, `libclang` and header files for `z3` are needed as build-time dependencies. +Build the prover with ```sh cargo build --release ``` -Currently the build-time dependency are `libclang` and header files for `z3`. -Also, a nightly Rust compiler toolchain is required. -The build should produce an executable in `target/release/`, or you can use `cargo` to run: +would produce an executable `./target/release/qed-prover`. +The current setup of the prover has the Z3 *and* CVC5 solver as a runtime dependency, +so you need to have both the `z3` *and* `cvc5` executable in the `PATH` when running `qed-solver`. + +### Build with Nix + +Alternatively, if one has Nix and Nix flake in their environment, this project contains a flake for easy reproducibility. +Simply use ```sh -cargo run --release -- +nix run github:qed-solver/prover -- ``` -where the `` are paths to input files or directories containing the files. -The current setup of the solver has the Z3 *and* CVC5 solver as a runtime dependency, -so you need to have both the `z3` *and* `cvc5` executable in the `PATH` when running Cosette. -The results will be simply printed out, with the name of each file and their result (provable/not provable). -You can set the environment variable to `RUST_LOG=info` when running and get a (very) verbose output. +to run the prover with arguments ``. + +To have a development environment with all the build-time and runtime dependencies, run `nix shell .` in the project's root. +For `direnv` users, a `.envrc` file is also prepared, so simply `cd` into the project directory, and `direnv` will try to reproduce the development environment. +(NOTICE: For first-time use, you need to explicitly trust the `.envrc` file, by executing `direnv allow .`) + +## Usage + +Normally, one invoke the parser as +```sh +qed-prover [input-path]... +``` +where every `input-path` is a JSON file that represents a case to prove, or a directory containing such files. +Currently one can generate conforming JSON files using [this parser](https://github.com/qed-solver/parser) by passing in SQL statements. + +During execution, the results will simply be printed out, with the names of every files followed by their result (provable/not provable). +One can use the environment variable `QED_SMT_TIMEOUT` to set a timeout for each SMT request the prover makes in milliseconds, which is defaulted to 10,000. +Setting the environment variable `RUST_LOG=info` when running would give a (very) verbose output. This is useful for debugging as it prints out how expressions are simplified in the solver at various stages. -Then input files should be generated by the parser. -They are all in JSON format. -The directory `tests/RelOptRulesTest/` contains such files that are directly extracted from the [Calcite](https://calcite.apache.org/) project. +### Test cases + +For convience, the directory `tests/calcite/` contains JSON files representing the query rewrite pairs use in the optimizer test suite of the [Calcite](https://calcite.apache.org/) project. You may try out running these inputs by ```sh -cargo run --release -- tests/RelOptRulesTest/ +qed-prover tests/calcite/ ``` -WARNING: many test cases in this folder contain features that we haven't support yet. -Now ~200 out of all ~380 cases are actually provable. +WARNING: some test cases in this folder contain features that we haven't support yet. +Now ~290 out of all ~440 cases are actually provable. ## Feature coverage ### Supported features + - Basic `SELECT-FROM-WHERE` queries -- Set operations (`UNION`, `UNION ALL`, `EXCEPT`, and `EXCEPT ALL`) -- Joins (`INNER JOIN`, `LEFT`/`RIGHT`/`FULL` `OUTER JOIN`, `SEMI`/`ANTI` `JOIN`, and correlated join) +- Set operations (`UNION`, `UNION ALL`, `INTERSECT` (but not `INTERSECT ALL`), `EXCEPT`, and `EXCEPT ALL`) +- Joins (`INNER JOIN`, `LEFT`/`RIGHT`/`FULL` `OUTER JOIN`, `SEMI`/`ANTI` `JOIN`, and lateral/correlated join) - `DISTINCT` - `VALUES` - Aggretation (as uninterpreted functions) @@ -48,55 +70,11 @@ Now ~200 out of all ~380 cases are actually provable. - Arbitrary `CHECK` constraints ### Planned features + - Foreign key constraint - Law of excluded middle ($A + \neg A = 1$) -- `INTERSECT` (but not `INTERSECT ALL`) ### Unsupported features + - Semantics of aggretations, such as understanding them as algebras over a monad - Semantics of `ORDER BY` and `LIMIT` (requires temporarily modelling a table as list?) - -## Reproducible environment - -### With Nix - -The project contains a Nix flake for easy reproducibility. -Simply use `nix run . -- ` to run the prover. -For `direnv` user, create the `.envrc` file in the project root directory with the content -``` -use flake -``` -(NOTICE: For first-time use, you need to explicitly trust the `.envrc` file, and execute `direnv allow .` to proceed with the build) -Now simply `cd` into the project directory, and `direnv` will try to reproduce the development environment. - -### With GNU Guix - -All the development dependencies are declared in `manifest.scm`, with packages drawn from `channels.scm`. -The `channels.lock` file is then derived from `channels.scm` to pin down channels to their exact Git commit hashes. -Finally, one can use `direnv` to automatically reproduce the declared environment after entering the project directory. - -To run with this setup, add the following to `~/.config/direnv/direnvrc`. -```bash -use_guix-shell() { - local profile=./.profile - local channels=./channels.lock - [ -L $profile ] && rm $profile - if [ -f $channels ]; then - eval "$(guix time-machine -C "$channels" -- shell -r "$profile" "$@" --search-paths)" - else - eval "$(guix shell -r "$profile" "$@" --search-paths)" - fi -} -``` -And create the `.envrc` file under the project root directory with content -``` -use guix-shell -``` -(NOTICE: For first-time use, you need to explicitly trust the `.envrc` file, and execute `direnv allow .` to proceed with the build) -Now simply `cd` into the project directory, and `direnv` will try to reproduce the development environment. - -Within the development envrionment created by `direnv`, one can then use the convensional -``` -cargo run --release -- -``` -to run the Cosette prover. diff --git a/flake.nix b/flake.nix index a4fac89..609743f 100644 --- a/flake.nix +++ b/flake.nix @@ -30,16 +30,16 @@ ++ lib.optionals stdenv.isDarwin [ libiconv ]; }; cargoArtifacts = craneLib.buildDepsOnly packageDef; - cosette-prover = craneLib.buildPackage (packageDef // { + qed-prover = craneLib.buildPackage (packageDef // { inherit cargoArtifacts; doNotLinkInheritedArtifacts = true; - postInstall = with pkgs; "wrapProgram $out/bin/cosette-prover --set PATH ${lib.makeBinPath [ cvc5 z3_4_12 ]}"; + postInstall = with pkgs; "wrapProgram $out/bin/qed-prover --set PATH ${lib.makeBinPath [ cvc5 z3_4_12 ]}"; }); in { - packages.default = cosette-prover; + packages.default = qed-prover; devShells.default = pkgs.mkShell { - inputsFrom = [ cosette-prover ]; + inputsFrom = [ qed-prover ]; packages = with pkgs; [ rust-analyzer julia-bin jless ] ++ lib.optionals stdenv.isLinux [ linuxPackages.perf ]; }; }); diff --git a/src/pipeline/shared.rs b/src/pipeline/shared.rs index c0d664f..76fea7d 100644 --- a/src/pipeline/shared.rs +++ b/src/pipeline/shared.rs @@ -513,9 +513,7 @@ impl<'c> Ctx<'c> { } pub fn timeout() -> Duration { - if let Some(t) = - std::env::var("COSETTE_SMT_TIMEOUT").ok().and_then(|t| t.parse::().ok()) - { + if let Some(t) = std::env::var("QED_SMT_TIMEOUT").ok().and_then(|t| t.parse::().ok()) { Duration::from_millis(t) } else { Duration::from_secs(10)