diff --git a/Cargo.toml b/Cargo.toml index c01654790e9f1..be99180947c48 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ members = [ ] [workspace.package] -version = "21.1.0" +version = "22.0.0" edition = "2021" readme = "README.md" authors = ["Apache Arrow "] diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 1f1d9792d050c..b41e54e19f000 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-benchmarks" description = "DataFusion Benchmarks" -version = "21.1.0" +version = "22.0.0" edition = "2021" authors = ["Apache Arrow "] homepage = "https://github.com/apache/arrow-datafusion" @@ -34,7 +34,7 @@ snmalloc = ["snmalloc-rs"] [dependencies] arrow = { workspace = true } -datafusion = { path = "../datafusion/core", version = "21.1.0", features = ["scheduler"] } +datafusion = { path = "../datafusion/core", version = "22.0.0", features = ["scheduler"] } env_logger = "0.10" futures = "0.3" mimalloc = { version = "0.1", optional = true, default-features = false } @@ -48,4 +48,4 @@ test-utils = { path = "../test-utils/", version = "0.1.0" } tokio = { version = "^1.0", features = ["macros", "rt", "rt-multi-thread", "parking_lot"] } [dev-dependencies] -datafusion-proto = { path = "../datafusion/proto", version = "21.1.0" } +datafusion-proto = { path = "../datafusion/proto", version = "22.0.0" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index e385f9f6483b0..0f26e5720c043 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -301,7 +301,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -576,9 +576,9 @@ checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b" [[package]] name = "core-foundation-sys" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" @@ -659,7 +659,7 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -676,7 +676,7 @@ checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -694,7 +694,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "21.1.0" +version = "22.0.0" dependencies = [ "ahash", "arrow", @@ -740,7 +740,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "21.1.0" +version = "22.0.0" dependencies = [ "arrow", "async-trait", @@ -758,7 +758,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "21.1.0" +version = "22.0.0" dependencies = [ "arrow", "arrow-array", @@ -771,7 +771,7 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "21.1.0" +version = "22.0.0" dependencies = [ "dashmap", "datafusion-common", @@ -787,7 +787,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "21.1.0" +version = "22.0.0" dependencies = [ "ahash", "arrow", @@ -797,7 +797,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "21.1.0" +version = "22.0.0" dependencies = [ "arrow", "async-trait", @@ -813,7 +813,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "21.1.0" +version = "22.0.0" dependencies = [ "ahash", "arrow", @@ -843,7 +843,7 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "21.1.0" +version = "22.0.0" dependencies = [ "arrow", "datafusion-common", @@ -853,7 +853,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "21.1.0" +version = "22.0.0" dependencies = [ "arrow", "arrow-schema", @@ -963,7 +963,7 @@ checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" dependencies = [ "errno-dragonfly", "libc", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -997,13 +997,13 @@ dependencies = [ [[package]] name = "fd-lock" -version = "3.0.11" +version = "3.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9799aefb4a2e4a01cc47610b1dd47c18ab13d991f27bbcaed9296f5a53d5cbad" +checksum = "39ae6b3d9530211fb3b12a95374b8b0823be812f53d09e18c5675c0146b09642" dependencies = [ "cfg-if", "rustix", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -1103,7 +1103,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -1148,9 +1148,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", "libc", @@ -1316,9 +1316,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.54" +version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c17cc76786e99f8d2f055c11159e7f0091c42474dcc3189fbab96072e873e6d" +checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1375,13 +1375,13 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "io-lifetimes" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" +checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" dependencies = [ "hermit-abi 0.3.1", "libc", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -1495,9 +1495,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.140" +version = "0.2.141" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" +checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" [[package]] name = "libm" @@ -1507,9 +1507,9 @@ checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" [[package]] name = "libmimalloc-sys" -version = "0.1.30" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8c7cbf8b89019683667e347572e6d55a7df7ea36b0c4ce69961b0cde67b174" +checksum = "43a558e3d911bc3c7bfc8c78bc580b404d6e51c1cefbf656e176a94b49b0df40" dependencies = [ "cc", "libc", @@ -1597,9 +1597,9 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "mimalloc" -version = "0.1.34" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dcb174b18635f7561a0c6c9fc2ce57218ac7523cf72c50af80e2d79ab8f3ba1" +checksum = "3d88dad3f985ec267a3fcb7a1726f5cb1a7e8cad8b646e70a84f967210df23da" dependencies = [ "libmimalloc-sys", ] @@ -1628,7 +1628,7 @@ dependencies = [ "libc", "log", "wasi", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -1741,9 +1741,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1ea8f683b4f89a64181393742c041520a1a87e9775e6b4c0dd5a3281af05fc6" +checksum = "ec9cd6ca25e796a49fa242876d1c4de36a24a6da5258e9f0bc062dbf5e81c53b" dependencies = [ "async-trait", "base64", @@ -1808,7 +1808,7 @@ dependencies = [ "libc", "redox_syscall 0.2.16", "smallvec", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -1969,18 +1969,18 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.54" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" dependencies = [ "unicode-ident", ] [[package]] name = "quick-xml" -version = "0.27.1" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffc053f057dd768a56f62cd7e434c42c831d296968997e9ac1f76ea7c2d14c41" +checksum = "e5c1a97b1bc42b1d550bfb48d4262153fe400a12bab1511821736f7eac76d7e2" dependencies = [ "memchr", "serde", @@ -2148,16 +2148,16 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.5" +version = "0.37.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e78cc525325c06b4a7ff02db283472f3c042b7ff0c391f96c6d5ac6f4f91b75" +checksum = "1aef160324be24d31a62147fae491c14d2204a3865c7ca8c3b0d7f7bcb3ea635" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -2276,7 +2276,7 @@ checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" dependencies = [ "proc-macro2", "quote", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -2455,9 +2455,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.12" +version = "2.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79d9531f94112cfc3e4c8f5f02cb2b58f72c97b7efd85f70203cc6d8efda5927" +checksum = "4c9da457c5285ac1f936ebd076af6dac17a61cfe7826f2076b4d015cf47bc8ec" dependencies = [ "proc-macro2", "quote", @@ -2474,7 +2474,7 @@ dependencies = [ "fastrand", "redox_syscall 0.3.5", "rustix", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -2509,7 +2509,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -2562,7 +2562,7 @@ dependencies = [ "pin-project-lite", "socket2", "tokio-macros", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -2573,7 +2573,7 @@ checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" dependencies = [ "proc-macro2", "quote", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -2910,11 +2910,11 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows" -version = "0.46.0" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdacb41e6a96a052c6cb63a144f24900236121c6f63f4f8219fef5977ecb0c25" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" dependencies = [ - "windows-targets", + "windows-targets 0.48.0", ] [[package]] @@ -2923,7 +2923,16 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets", + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.0", ] [[package]] @@ -2932,13 +2941,28 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", ] [[package]] @@ -2947,42 +2971,84 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + [[package]] name = "windows_i686_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + [[package]] name = "windows_i686_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + [[package]] name = "winreg" version = "0.10.1" @@ -3016,7 +3082,7 @@ version = "0.12.3+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" dependencies = [ - "zstd-safe 6.0.4+zstd.1.5.4", + "zstd-safe 6.0.5+zstd.1.5.4", ] [[package]] @@ -3031,9 +3097,9 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "6.0.4+zstd.1.5.4" +version = "6.0.5+zstd.1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7afb4b54b8910cf5447638cb54bf4e8a65cbedd783af98b98c62ffe91f185543" +checksum = "d56d9e60b4b1758206c238a10165fbcae3ca37b01744e394c463463f6529d23b" dependencies = [ "libc", "zstd-sys", @@ -3041,9 +3107,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.7+zstd.1.5.4" +version = "2.0.8+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94509c3ba2fe55294d752b79842c530ccfab760192521df74a081a78d2b3c7f5" +checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" dependencies = [ "cc", "libc", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index f1758f49d1b5d..e3ae99f962f90 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-cli" description = "Command Line Client for DataFusion query engine." -version = "21.1.0" +version = "22.0.0" authors = ["Apache Arrow "] edition = "2021" keywords = ["arrow", "datafusion", "query", "sql"] @@ -32,7 +32,7 @@ readme = "README.md" arrow = "36.0.0" async-trait = "0.1.41" clap = { version = "3", features = ["derive", "cargo"] } -datafusion = { path = "../datafusion/core", version = "21.1.0" } +datafusion = { path = "../datafusion/core", version = "22.0.0" } dirs = "4.0.0" env_logger = "0.9" mimalloc = { version = "0.1", default-features = false } diff --git a/datafusion/CHANGELOG.md b/datafusion/CHANGELOG.md index 017ec30ea1c0f..b574084a9c5c4 100644 --- a/datafusion/CHANGELOG.md +++ b/datafusion/CHANGELOG.md @@ -19,3897 +19,24 @@ # Changelog -## [21.1.0](https://github.com/apache/arrow-datafusion/tree/21.1.0) (2023-03-24) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/21.0.0...21.1.0) - - -**Fixed bugs:** - -- fix: Use consistent arrow version (do not use both `arrow 34.0.0` and `arrow-array 35.0.0`) [#5765](https://github.com/apache/arrow-datafusion/pull/5765) - - -## [21.0.0](https://github.com/apache/arrow-datafusion/tree/21.0.0) (2023-03-24) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/20.0.0...21.0.0) - -**Breaking changes:** - -- Support arbitrary user defined partition column in `ListingTable` (rather than assuming they are always Dictionary encoded) [#5545](https://github.com/apache/arrow-datafusion/pull/5545) (crepererum) -- Use TableReference for TableScan [#5615](https://github.com/apache/arrow-datafusion/pull/5615) (alamb) -- Update the type of `param_values` to `&[ScalarValue]` in function `replace_params_with_values` [#5640](https://github.com/apache/arrow-datafusion/pull/5640) (HaoYang670) - -**Implemented enhancements:** - -- feat: extract (epoch from col) [#5555](https://github.com/apache/arrow-datafusion/pull/5555) (Weijun-H) -- INSERT INTO support for MemTable [#5520](https://github.com/apache/arrow-datafusion/pull/5520) (metesynnada) -- Memory limited nested-loop join [#5564](https://github.com/apache/arrow-datafusion/pull/5564) (korowa) -- Timestamp subtraction and interval operations for `ScalarValue` [#5603](https://github.com/apache/arrow-datafusion/pull/5603) (berkaysynnada) -- Substrait: Add cast expression with bool, integers and decimal128 support [#5137](https://github.com/apache/arrow-datafusion/pull/5137) (nseekhao) -- Support `date_bin` with 2 arguments [#5643](https://github.com/apache/arrow-datafusion/pull/5643) (Weijun-H) -- improve: support combining multiple grouping expressions [#5559](https://github.com/apache/arrow-datafusion/pull/5559) (yukkit) -- Substrait: Add support for WindowFunction [#5653](https://github.com/apache/arrow-datafusion/pull/5653) (nseekhao) -- feat: `date_bin` supports MonthDayNano, microsecond and nanosecond units [#5698](https://github.com/apache/arrow-datafusion/pull/5698) (stuartcarnie) -- Handle serialization of TryCast [#5692](https://github.com/apache/arrow-datafusion/pull/5692) (thinkharderdev) - -**Fixed bugs:** - -- fix: failed to execute sql with subquery [#5542](https://github.com/apache/arrow-datafusion/pull/5542) (MichaelScofield) -- fix: cast literal to timestamp [#5517](https://github.com/apache/arrow-datafusion/pull/5517) (Weijun-H) -- fix dataframe only boolean/binary column got error on describe [#5585](https://github.com/apache/arrow-datafusion/pull/5585) (jiangzhx) -- Median returns null on empty input instead of error [#5624](https://github.com/apache/arrow-datafusion/pull/5624) (toppyy) -- add CountWildcardRule to fix error on Count(Expr:Wildcard) with DataFrame API [#5627](https://github.com/apache/arrow-datafusion/pull/5627) (jiangzhx) -- fix: correct CountWildcardRule and move analyzer into a new directory. [#5671](https://github.com/apache/arrow-datafusion/pull/5671) (jackwener) - -**Documentation updates:** - -- Minor: improve docstrings for `ObjectStoreRegistry` and `ObjectStoreProvider` [#5577](https://github.com/apache/arrow-datafusion/pull/5577) (alamb) -- Clarify differences of DataFusion with other systems in README.md [#5578](https://github.com/apache/arrow-datafusion/pull/5578) (alamb) -- Minor: Document docs build process [#5687](https://github.com/apache/arrow-datafusion/pull/5687) (alamb) - -**Merged pull requests:** - -- Refactor DecorrelateWhereExists and add back Distinct if needs [#5345](https://github.com/apache/arrow-datafusion/pull/5345) (ygf11) -- Simplify simplify test cases, support `^`, `&`, `|`, `<<` and `>>` operators for building exprs [#5511](https://github.com/apache/arrow-datafusion/pull/5511) (alamb) -- minor: improve sqllogictest docs [#5553](https://github.com/apache/arrow-datafusion/pull/5553) (alamb) -- Remove unused dependencies found by cargo-machete [#5552](https://github.com/apache/arrow-datafusion/pull/5552) (Jefffrey) -- make AggregateStatistics return the same result whether optimizer disabled or enabled [#5485](https://github.com/apache/arrow-datafusion/pull/5485) (jiangzhx) -- Avoid circular(ish) dependency parquet-test-utils on datafusion, try 2 [#5536](https://github.com/apache/arrow-datafusion/pull/5536) (alamb) -- Enforce ambiguity check whilst normalizing columns [#5509](https://github.com/apache/arrow-datafusion/pull/5509) (Jefffrey) -- Generated changelog for 20.0.0 [#5563](https://github.com/apache/arrow-datafusion/pull/5563) (andygrove) -- fix: failed to execute sql with subquery [#5542](https://github.com/apache/arrow-datafusion/pull/5542) (MichaelScofield) -- Revert describe count() workaround [#5556](https://github.com/apache/arrow-datafusion/pull/5556) (Jefffrey) -- fix: cast literal to timestamp [#5517](https://github.com/apache/arrow-datafusion/pull/5517) (Weijun-H) -- feat: extract (epoch from col) [#5555](https://github.com/apache/arrow-datafusion/pull/5555) (Weijun-H) -- Minor: improve docstrings for `ObjectStoreRegistry` and `ObjectStoreProvider` [#5577](https://github.com/apache/arrow-datafusion/pull/5577) (alamb) -- Minor: Move RuntimeEnv to `datafusion_execution` [#5580](https://github.com/apache/arrow-datafusion/pull/5580) (alamb) -- INSERT INTO support for MemTable [#5520](https://github.com/apache/arrow-datafusion/pull/5520) (metesynnada) -- Minor: restore explicit match to help avoid subtle bugs in the future when new `Expr` variants are added [#5579](https://github.com/apache/arrow-datafusion/pull/5579) (alamb) -- refactor: add more error info when array is empty [#5560](https://github.com/apache/arrow-datafusion/pull/5560) (Weijun-H) -- Memory limited nested-loop join [#5564](https://github.com/apache/arrow-datafusion/pull/5564) (korowa) -- Support catalog.schema.table.column in SQL SELECT and WHERE [#5343](https://github.com/apache/arrow-datafusion/pull/5343) (Jefffrey) -- Minor: clean up aggregates.slt tests [#5599](https://github.com/apache/arrow-datafusion/pull/5599) (alamb) -- Minor: Port more aggregate tests to sqllogictests [#5574](https://github.com/apache/arrow-datafusion/pull/5574) (alamb) -- Add a utility function to get all of the PartitionedFile for an ExecutionPlan [#5572](https://github.com/apache/arrow-datafusion/pull/5572) (yahoNanJing) -- minor: port some join tests to sqllogictests [#5567](https://github.com/apache/arrow-datafusion/pull/5567) (ygf11) -- Support arbitrary user defined partition column in `ListingTable` (rather than assuming they are always Dictionary encoded) [#5545](https://github.com/apache/arrow-datafusion/pull/5545) (crepererum) -- feat: add the similar optimization function for bitwise negative [#5516](https://github.com/apache/arrow-datafusion/pull/5516) (izveigor) -- Clarify differences of DataFusion with other systems in README.md [#5578](https://github.com/apache/arrow-datafusion/pull/5578) (alamb) -- Minor: Add more documentation about table_partition_columns [#5576](https://github.com/apache/arrow-datafusion/pull/5576) (alamb) -- Add Analyzer phase to DataFusion , add basic validation logic to Subquery Plans and Expressions [#5570](https://github.com/apache/arrow-datafusion/pull/5570) (mingmwang) -- Use TableReference for TableScan [#5615](https://github.com/apache/arrow-datafusion/pull/5615) (alamb) -- Preserve casts in rewrite_sort_cols_by_aggs [#5611](https://github.com/apache/arrow-datafusion/pull/5611) (mpurins-coralogix) -- Miscellaneous ArrayData Cleanup [#5612](https://github.com/apache/arrow-datafusion/pull/5612) (tustvold) -- Update substrait requirement from 0.4 to 0.5 [#5620](https://github.com/apache/arrow-datafusion/pull/5620) (dependabot[bot]) -- Do not break pipeline for window queries with GROUPS [#5587](https://github.com/apache/arrow-datafusion/pull/5587) (mustafasrepo) -- fix dataframe only boolean/binary column got error on describe [#5585](https://github.com/apache/arrow-datafusion/pull/5585) (jiangzhx) -- Minor: Add Documentation and Examples to `TableReference` [#5616](https://github.com/apache/arrow-datafusion/pull/5616) (alamb) -- [FOLLOWUP] eliminate the duplicated sort keys in Order By clause [#5607](https://github.com/apache/arrow-datafusion/pull/5607) (mingmwang) -- Update default behaviour of compression algorithms (support multistreams) [#5629](https://github.com/apache/arrow-datafusion/pull/5629) (metesynnada) -- Timestamp subtraction and interval operations for `ScalarValue` [#5603](https://github.com/apache/arrow-datafusion/pull/5603) (berkaysynnada) -- Use modulus dyn kernels for arithmetic expressions [#5634](https://github.com/apache/arrow-datafusion/pull/5634) (viirya) -- Minor: reduce cloning in `infer_placeholder_types` [#5638](https://github.com/apache/arrow-datafusion/pull/5638) (alamb) -- Move `SessionConfig` to `datafusion_execution` [#5581](https://github.com/apache/arrow-datafusion/pull/5581) (alamb) -- Update the type of `param_values` to `&[ScalarValue]` in function `replace_params_with_values` [#5640](https://github.com/apache/arrow-datafusion/pull/5640) (HaoYang670) -- WITH ORDER support on CREATE EXTERNAL TABLE [#5618](https://github.com/apache/arrow-datafusion/pull/5618) (metesynnada) -- Median returns null on empty input instead of error [#5624](https://github.com/apache/arrow-datafusion/pull/5624) (toppyy) -- feat: Memory limited merge join [#5632](https://github.com/apache/arrow-datafusion/pull/5632) (korowa) -- Update rstest requirement from 0.16.0 to 0.17.0 [#5648](https://github.com/apache/arrow-datafusion/pull/5648) (dependabot[bot]) -- add CountWildcardRule to fix error on Count(Expr:Wildcard) with DataFrame API [#5627](https://github.com/apache/arrow-datafusion/pull/5627) (jiangzhx) -- Add OuterReferenceColumn to Expr to represent correlated expression [#5593](https://github.com/apache/arrow-datafusion/pull/5593) (mingmwang) -- Minor: Simplify `Result` [#5659](https://github.com/apache/arrow-datafusion/pull/5659) (comphead) -- minor: remove redundant `DataFusionError` and fix `clippy` [#5669](https://github.com/apache/arrow-datafusion/pull/5669) (jackwener) -- Substrait: Add cast expression with bool, integers and decimal128 support [#5137](https://github.com/apache/arrow-datafusion/pull/5137) (nseekhao) -- Support `date_bin` with 2 arguments [#5643](https://github.com/apache/arrow-datafusion/pull/5643) (Weijun-H) -- Add LogicalPlanSignature and use in the optimizer loop [#5623](https://github.com/apache/arrow-datafusion/pull/5623) (mslapek) -- fix: correct CountWildcardRule and move analyzer into a new directory. [#5671](https://github.com/apache/arrow-datafusion/pull/5671) (jackwener) -- refactoring: added tests and fixed comments in "math_expressions" [#5656](https://github.com/apache/arrow-datafusion/pull/5656) (izveigor) -- improve: support combining multiple grouping expressions [#5559](https://github.com/apache/arrow-datafusion/pull/5559) (yukkit) -- community: polish issue template [#5668](https://github.com/apache/arrow-datafusion/pull/5668) (jackwener) -- minor: correct issue template [#5679](https://github.com/apache/arrow-datafusion/pull/5679) (jackwener) -- Change ObjectStoreRegistry from struct to trait to provide polymorphism [#5543](https://github.com/apache/arrow-datafusion/pull/5543) (yahoNanJing) -- Minor: Add `Extensions::new()` [#5676](https://github.com/apache/arrow-datafusion/pull/5676) (alamb) -- minor: add with_plan for Subquery [#5680](https://github.com/apache/arrow-datafusion/pull/5680) (jackwener) -- minor: reduce replication in `date_bin` implementation [#5673](https://github.com/apache/arrow-datafusion/pull/5673) (alamb) -- Fixes #5500 - Add a GitHub Actions workflow that builds the docs [#5670](https://github.com/apache/arrow-datafusion/pull/5670) (martin-g) -- Minor: port some content to the docs [#5684](https://github.com/apache/arrow-datafusion/pull/5684) (alamb) -- Docs: Add logo back to sidebar [#5688](https://github.com/apache/arrow-datafusion/pull/5688) (alamb) -- Substrait: Add support for WindowFunction [#5653](https://github.com/apache/arrow-datafusion/pull/5653) (nseekhao) -- Add -o option to all e2e benches [#5658](https://github.com/apache/arrow-datafusion/pull/5658) (jaylmiller) -- create table default to null [#5606](https://github.com/apache/arrow-datafusion/pull/5606) (Weijun-H) -- Minor: Document docs build process [#5687](https://github.com/apache/arrow-datafusion/pull/5687) (alamb) -- Minor: change doc formatting to force a republish [#5702](https://github.com/apache/arrow-datafusion/pull/5702) (alamb) -- Move `TaskContext` to datafusion-execution [#5677](https://github.com/apache/arrow-datafusion/pull/5677) (alamb) -- feat: `date_bin` supports MonthDayNano, microsecond and nanosecond units [#5698](https://github.com/apache/arrow-datafusion/pull/5698) (stuartcarnie) -- Return plan error when adding utf8 and timestamp [#5696](https://github.com/apache/arrow-datafusion/pull/5696) (Weijun-H) -- Handle serialization of TryCast [#5692](https://github.com/apache/arrow-datafusion/pull/5692) (thinkharderdev) -- analyzer: move InlineTableScan into Analyzer. [#5683](https://github.com/apache/arrow-datafusion/pull/5683) (jackwener) -- minor: Add doc comments to clarify what Analyzer is for [#5705](https://github.com/apache/arrow-datafusion/pull/5705) (alamb) - -## [20.0.0](https://github.com/apache/arrow-datafusion/tree/20.0.0) (2023-03-10) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/19.0.0...20.0.0 - -**Breaking changes:** - -- Minor: Move TableProviderFactories up out of `RuntimeEnv` and into `SessionState` [#5477](https://github.com/apache/arrow-datafusion/pull/5477) (alamb) -- chore: Remove references from SessionState from physical_plan [#5455](https://github.com/apache/arrow-datafusion/pull/5455) (alamb) -- Implement `Debug` for `ExecutionProps` and `VarProvider` [#5489](https://github.com/apache/arrow-datafusion/pull/5489) (alamb) - -**Implemented enhancements:** - -- Add UserDefinedLogicalNodeCore [#5521](https://github.com/apache/arrow-datafusion/pull/5521) (mslapek) -- feat: add `arrow_cast` function to support supports arbitrary arrow types [#5166](https://github.com/apache/arrow-datafusion/pull/5166) (alamb) -- feat: interval add timestamp [#5491](https://github.com/apache/arrow-datafusion/pull/5491) (Weijun-H) -- feat: `ParquetExec` predicate preservation [#5495](https://github.com/apache/arrow-datafusion/pull/5495) (crepererum) -- feat: add optimization rules for bitwise operations [#5423](https://github.com/apache/arrow-datafusion/pull/5423) (izveigor) -- feat: Support bitwise operations for unsigned integer types [#5476](https://github.com/apache/arrow-datafusion/pull/5476) (izveigor) -- feat: eliminate the duplicated sort keys in Order By clause [#5462](https://github.com/apache/arrow-datafusion/pull/5462) (jackwener) -- feat: add name() method to UserDefinedLogicalNode [#5450](https://github.com/apache/arrow-datafusion/pull/5450) (waynexia) -- feat: express unsigned literal in substrait [#5448](https://github.com/apache/arrow-datafusion/pull/5448) (waynexia) -- feat: `extensions_options` macro [#5442](https://github.com/apache/arrow-datafusion/pull/5442) (crepererum) -- [feat]:fast check has column [#5328](https://github.com/apache/arrow-datafusion/pull/5328) (suxiaogang223) -- feat: eliminate unnecessary projection. [#5366](https://github.com/apache/arrow-datafusion/pull/5366) (jackwener) - -**Fixed bugs:** - -- revert accidently deleted size code in count_distinct [#5533](https://github.com/apache/arrow-datafusion/pull/5533) (comphead) -- fix: return schema of ExtensionPlan instead of its children's [#5514](https://github.com/apache/arrow-datafusion/pull/5514) (waynexia) -- fix: logical merge conflict -- hash_join tests with passing boolean by value [#5531](https://github.com/apache/arrow-datafusion/pull/5531) (korowa) -- fix: build union schema with child has same column name but qualifier… [#5452](https://github.com/apache/arrow-datafusion/pull/5452) (yukkit) -- Fix is_distinct from for float NaN values [#5446](https://github.com/apache/arrow-datafusion/pull/5446) (comphead) -- Bug/union wrong casting [#5342](https://github.com/apache/arrow-datafusion/pull/5342) (berkaysynnada) -- fix nested loop join with literal join filter [#5431](https://github.com/apache/arrow-datafusion/pull/5431) (ygf11) -- Fix filter pushdown for extension plans [#5425](https://github.com/apache/arrow-datafusion/pull/5425) (thinkharderdev) -- Bug fix: Window frame range value outside the type range [#5384](https://github.com/apache/arrow-datafusion/pull/5384) (mustafasrepo) -- fix: misc phys. expression display bugs [#5387](https://github.com/apache/arrow-datafusion/pull/5387) (crepererum) - -**Documentation updates:** - -- Minor: Improve docs for UserDefinedLogicalNode `dyn_eq` and `dyn_hash` [#5515](https://github.com/apache/arrow-datafusion/pull/5515) (alamb) -- chore: add known project ZincObserve [#5376](https://github.com/apache/arrow-datafusion/pull/5376) (hengfeiyang) -- docs: clarify spark [#5391](https://github.com/apache/arrow-datafusion/pull/5391) (hyoklee) - -**Merged pull requests:** - -- Manual changelog for 20.0.0 [#5551](https://github.com/apache/arrow-datafusion/pull/5551) (andygrove) -- Prepare for 20.0.0 release [Part 1] [#5539](https://github.com/apache/arrow-datafusion/pull/5539) (andygrove) -- chore: deduplicate workspace fields in Cargo.toml [#5519](https://github.com/apache/arrow-datafusion/pull/5519) (waynexia) -- Add necessary features to optimizer [#5540](https://github.com/apache/arrow-datafusion/pull/5540) (viirya) -- Minor: add the concise way for matching numerics [#5537](https://github.com/apache/arrow-datafusion/pull/5537) (izveigor) -- Add UserDefinedLogicalNodeCore [#5521](https://github.com/apache/arrow-datafusion/pull/5521) (mslapek) -- revert accidently deleted size code in count_distinct [#5533](https://github.com/apache/arrow-datafusion/pull/5533) (comphead) -- fix: return schema of ExtensionPlan instead of its children's [#5514](https://github.com/apache/arrow-datafusion/pull/5514) (waynexia) -- Minor: Move `ObjectStoreRegistry` to datafusion_execution crate [#5478](https://github.com/apache/arrow-datafusion/pull/5478) (alamb) -- Minor: Add db-benchmark URL to db-benchmark readme [#5503](https://github.com/apache/arrow-datafusion/pull/5503) (alamb) -- minor: fix clippy problem in new version. [#5532](https://github.com/apache/arrow-datafusion/pull/5532) (jackwener) -- fix: logical merge conflict -- hash_join tests with passing boolean by value [#5531](https://github.com/apache/arrow-datafusion/pull/5531) (korowa) -- Memory limited hash join [#5490](https://github.com/apache/arrow-datafusion/pull/5490) (korowa) -- minor: improve error style [#5510](https://github.com/apache/arrow-datafusion/pull/5510) (alamb) -- feat: add `arrow_cast` function to support supports arbitrary arrow types [#5166](https://github.com/apache/arrow-datafusion/pull/5166) (alamb) -- build(deps): update sqlparser requirement from 0.30 to 0.32 w/ API update [#5457](https://github.com/apache/arrow-datafusion/pull/5457) (alamb) -- Allow setting config extensions for TaskContext [#5497](https://github.com/apache/arrow-datafusion/pull/5497) (mpurins-coralogix) -- Minor: Improve docs for UserDefinedLogicalNode `dyn_eq` and `dyn_hash` [#5515](https://github.com/apache/arrow-datafusion/pull/5515) (alamb) -- feat: interval add timestamp [#5491](https://github.com/apache/arrow-datafusion/pull/5491) (Weijun-H) -- Pass booleans by value instead of by reference [#5487](https://github.com/apache/arrow-datafusion/pull/5487) (maxburke) -- Minor: Move TableProviderFactories up out of `RuntimeEnv` and into `SessionState` [#5477](https://github.com/apache/arrow-datafusion/pull/5477) (alamb) -- feat: `ParquetExec` predicate preservation [#5495](https://github.com/apache/arrow-datafusion/pull/5495) (crepererum) -- feat: add optimization rules for bitwise operations [#5423](https://github.com/apache/arrow-datafusion/pull/5423) (izveigor) -- chore: Remove references from SessionState from physical_plan [#5455](https://github.com/apache/arrow-datafusion/pull/5455) (alamb) -- Implement `Debug` for `ExecutionProps` and `VarProvider` [#5489](https://github.com/apache/arrow-datafusion/pull/5489) (alamb) -- feat: Support bitwise operations for unsigned integer types [#5476](https://github.com/apache/arrow-datafusion/pull/5476) (izveigor) -- Apply workaround for #5444 to `DataFrame::describe` [#5468](https://github.com/apache/arrow-datafusion/pull/5468) (jiangzhx) -- feat: eliminate the duplicated sort keys in Order By clause [#5462](https://github.com/apache/arrow-datafusion/pull/5462) (jackwener) -- Propagate timezone to created arrays [#5481](https://github.com/apache/arrow-datafusion/pull/5481) (maxburke) -- refactor: make GeometricMean not to have update and merge [#5469](https://github.com/apache/arrow-datafusion/pull/5469) (Weijun-H) -- feat: add name() method to UserDefinedLogicalNode [#5450](https://github.com/apache/arrow-datafusion/pull/5450) (waynexia) -- Comment out description text in issue templates [#5482](https://github.com/apache/arrow-datafusion/pull/5482) (Jefffrey) -- feat: express unsigned literal in substrait [#5448](https://github.com/apache/arrow-datafusion/pull/5448) (waynexia) -- fix: build union schema with child has same column name but qualifier… [#5452](https://github.com/apache/arrow-datafusion/pull/5452) (yukkit) -- refactor: make sum_distinct not to have update and merge [#5474](https://github.com/apache/arrow-datafusion/pull/5474) (Weijun-H) -- `compute_decimal_op_dyn_scalar` should not cast lhs array to decimal array [#5465](https://github.com/apache/arrow-datafusion/pull/5465) (viirya) -- feat: `extensions_options` macro [#5442](https://github.com/apache/arrow-datafusion/pull/5442) (crepererum) -- Enable hash joins on FixedSizeBinary columns [#5461](https://github.com/apache/arrow-datafusion/pull/5461) (maxburke) -- Fix is_distinct from for float NaN values [#5446](https://github.com/apache/arrow-datafusion/pull/5446) (comphead) -- Implement/fix Eq and Hash for Expr and LogicalPlan [#5421](https://github.com/apache/arrow-datafusion/pull/5421) (mslapek) -- [feat]:fast check has column [#5328](https://github.com/apache/arrow-datafusion/pull/5328) (suxiaogang223) -- Parquet sorting benchmark [#5433](https://github.com/apache/arrow-datafusion/pull/5433) (jaylmiller) -- refactor count_distinct to not to have update and merge [#5408](https://github.com/apache/arrow-datafusion/pull/5408) (Weijun-H) -- build(deps): update zstd requirement from 0.11 to 0.12 [#5458](https://github.com/apache/arrow-datafusion/pull/5458) (alamb) -- Upgrade bytes to 1.4 [#5460](https://github.com/apache/arrow-datafusion/pull/5460) (viirya) -- add std,median result to describe method [#5445](https://github.com/apache/arrow-datafusion/pull/5445) (jiangzhx) -- minor: Port more window tests to sqlogictests [#5434](https://github.com/apache/arrow-datafusion/pull/5434) (alamb) -- Use compute_op_dyn_scalar for datatime [#5315](https://github.com/apache/arrow-datafusion/pull/5315) (viirya) -- add a unit test that cover cast bug. [#5443](https://github.com/apache/arrow-datafusion/pull/5443) (jackwener) -- create new `datafusion-execution` crate, start splitting code out [#5432](https://github.com/apache/arrow-datafusion/pull/5432) (alamb) -- minor: fix clippy in nightly. [#5440](https://github.com/apache/arrow-datafusion/pull/5440) (jackwener) -- Support for Sliding Windows Joins with Symmetric Hash Join (SHJ) [#5322](https://github.com/apache/arrow-datafusion/pull/5322) (metesynnada) -- refactor: ParquetExec logical expr. => phys. expr. [#5419](https://github.com/apache/arrow-datafusion/pull/5419) (crepererum) -- Update README.md fix [DataFusion] links [#5438](https://github.com/apache/arrow-datafusion/pull/5438) (jiangzhx) -- add mean result for describe method [#5435](https://github.com/apache/arrow-datafusion/pull/5435) (jiangzhx) -- add expr_fn::median [#5437](https://github.com/apache/arrow-datafusion/pull/5437) (jiangzhx) -- Bug/union wrong casting [#5342](https://github.com/apache/arrow-datafusion/pull/5342) (berkaysynnada) -- reimplement `push_down_projection` and `prune_column`. [#4465](https://github.com/apache/arrow-datafusion/pull/4465) (jackwener) -- Add `expr_fn::stddev` [#5409](https://github.com/apache/arrow-datafusion/pull/5409) (jiangzhx) -- fix nested loop join with literal join filter [#5431](https://github.com/apache/arrow-datafusion/pull/5431) (ygf11) -- add a describe method on DataFrame like Polars [#5226](https://github.com/apache/arrow-datafusion/pull/5226) (jiangzhx) -- Memory reservation & metrics for cross join [#5339](https://github.com/apache/arrow-datafusion/pull/5339) (korowa) -- Optimize count_distinct.size [#5377](https://github.com/apache/arrow-datafusion/pull/5377) (comphead) -- Fix filter pushdown for extension plans [#5425](https://github.com/apache/arrow-datafusion/pull/5425) (thinkharderdev) -- Also push down all filters in TableProvider [#5420](https://github.com/apache/arrow-datafusion/pull/5420) (avantgardnerio) -- Update arrow 34 [#5375](https://github.com/apache/arrow-datafusion/pull/5375) (tustvold) -- Parquet limit pushdown (#5404) [#5416](https://github.com/apache/arrow-datafusion/pull/5416) (tustvold) -- Move file format config.rs to live with the rest of the datasource code [#5406](https://github.com/apache/arrow-datafusion/pull/5406) (alamb) -- Support Zstd compressed files [#5397](https://github.com/apache/arrow-datafusion/pull/5397) (dennybritz) -- Add example of catalog API usage (#5291) [#5326](https://github.com/apache/arrow-datafusion/pull/5326) (jaylmiller) -- Add support for protobuf serialisation of Arrow Map type [#5359](https://github.com/apache/arrow-datafusion/pull/5359) (ahmedriza) -- minor: port window tests to slt (part 2) [#5399](https://github.com/apache/arrow-datafusion/pull/5399) (alamb) -- fix(docs): fix typos [#5403](https://github.com/apache/arrow-datafusion/pull/5403) (WenyXu) -- Try to push down full filter before break-up [#5367](https://github.com/apache/arrow-datafusion/pull/5367) (avantgardnerio) -- enhance: remove more projection. [#5402](https://github.com/apache/arrow-datafusion/pull/5402) (jackwener) -- refactor `push_down_filter` to fix dead-loop and use optimizer_recurse. [#5337](https://github.com/apache/arrow-datafusion/pull/5337) (jackwener) -- feat: eliminate unnecessary projection. [#5366](https://github.com/apache/arrow-datafusion/pull/5366) (jackwener) -- minor: add forgotten large_utf8 [#5393](https://github.com/apache/arrow-datafusion/pull/5393) (jackwener) -- Minor: add tests for subquery to join [#5363](https://github.com/apache/arrow-datafusion/pull/5363) (ygf11) -- bugfix: fix master `bors` problem. [#5395](https://github.com/apache/arrow-datafusion/pull/5395) (jackwener) -- Rule ReplaceDistinctWithAggregate [#5354](https://github.com/apache/arrow-datafusion/pull/5354) (mingmwang) -- chore: add known project ZincObserve [#5376](https://github.com/apache/arrow-datafusion/pull/5376) (hengfeiyang) -- refactor: parquet pruning simplifications [#5386](https://github.com/apache/arrow-datafusion/pull/5386) (crepererum) -- Minor: intersect expressions optimization [#5388](https://github.com/apache/arrow-datafusion/pull/5388) (izveigor) -- docs: clarify spark [#5391](https://github.com/apache/arrow-datafusion/pull/5391) (hyoklee) -- UDF zero params #5378 [#5380](https://github.com/apache/arrow-datafusion/pull/5380) (jaylmiller) -- Minor: added some tests for coercion type [#5389](https://github.com/apache/arrow-datafusion/pull/5389) (izveigor) -- minor: make table resolution an independent function ... [#5373](https://github.com/apache/arrow-datafusion/pull/5373) (MichaelScofield) -- minor: port predicates tests to sqllogictests [#5374](https://github.com/apache/arrow-datafusion/pull/5374) (jackwener) -- Bug fix: Window frame range value outside the type range [#5384](https://github.com/apache/arrow-datafusion/pull/5384) (mustafasrepo) -- Fixed small typos in files of the optimizer [#5356](https://github.com/apache/arrow-datafusion/pull/5356) (izveigor) -- fix: misc phys. expression display bugs [#5387](https://github.com/apache/arrow-datafusion/pull/5387) (crepererum) -- Prepare for 19.0.0 release [#5381](https://github.com/apache/arrow-datafusion/pull/5381) (andygrove) -- minor: disable tpcds-q41 due to not support decorrelate disjunction subquery [#5369](https://github.com/apache/arrow-datafusion/pull/5369) (jackwener) - -## [19.0.0](https://github.com/apache/arrow-datafusion/tree/19.0.0) (2023-02-24) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/18.0.0...19.0.0) - -**Breaking changes:** - -- Use DataFusionError instead of ArrowError in SendableRecordBatchStream [\#5101](https://github.com/apache/arrow-datafusion/pull/5101) ([comphead](https://github.com/comphead)) -- Update to arrow 32 and Switch to RawDecoder for JSON [\#5056](https://github.com/apache/arrow-datafusion/pull/5056) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Allow `SessionContext::read_csv`, etc to read multiple files [\#4908](https://github.com/apache/arrow-datafusion/pull/4908) ([saikrishna1-bidgely](https://github.com/saikrishna1-bidgely)) - -**Implemented enhancements:** - -- Ignore Arrow in dependabot [\#5340](https://github.com/apache/arrow-datafusion/issues/5340) -- Provide access to internal fields of SessionContext [\#5317](https://github.com/apache/arrow-datafusion/issues/5317) -- Investigate performance drop for DISTINCT queries [\#5313](https://github.com/apache/arrow-datafusion/issues/5313) -- \[DOC\] Update math expression documentation [\#5312](https://github.com/apache/arrow-datafusion/issues/5312) -- Replace merge\_batches with concat\_batches [\#5297](https://github.com/apache/arrow-datafusion/issues/5297) -- Support for some of the window frame range queries [\#5275](https://github.com/apache/arrow-datafusion/issues/5275) -- Make `log` function to be in sync with PostgresSql [\#5259](https://github.com/apache/arrow-datafusion/issues/5259) -- \[SQLLogicTest\] Make schema validation ignore nullable and metadata attributes [\#5231](https://github.com/apache/arrow-datafusion/issues/5231) -- Add support for linear groups search [\#5213](https://github.com/apache/arrow-datafusion/issues/5213) -- Add SQL function overload `LOG(base, x)` for logarithm of x to base [\#5206](https://github.com/apache/arrow-datafusion/issues/5206) -- `all_schema()` will get schema of child of child of .... [\#5192](https://github.com/apache/arrow-datafusion/issues/5192) -- Enable parquet parallel scans by default [\#5125](https://github.com/apache/arrow-datafusion/issues/5125) -- Don't repartition ProjectionExec when it does not compute anything [\#4968](https://github.com/apache/arrow-datafusion/issues/4968) -- Support non-tuple expression for Exists Subquery to Join [\#4934](https://github.com/apache/arrow-datafusion/issues/4934) -- Read multiple files/folders using `read_csv` [\#4909](https://github.com/apache/arrow-datafusion/issues/4909) - -**Fixed bugs:** - -- Make inline\_table\_scan optimize whole plan during first optimization stage. [\#5364](https://github.com/apache/arrow-datafusion/issues/5364) -- tpcds\_logical\_q8 ambiguous name. [\#5334](https://github.com/apache/arrow-datafusion/issues/5334) -- Protobuf serialisation is missing for GetIndexedFieldExpr [\#5323](https://github.com/apache/arrow-datafusion/issues/5323) -- Indexing a nested list with 0 or an index larger than list size is not handled correctly [\#5310](https://github.com/apache/arrow-datafusion/issues/5310) -- Protobuf serialization drops `preserve_partitioning` from `SortExec` [\#5305](https://github.com/apache/arrow-datafusion/issues/5305) -- data file without suffix can't be read correctly [\#5301](https://github.com/apache/arrow-datafusion/issues/5301) -- Idk [\#5298](https://github.com/apache/arrow-datafusion/issues/5298) -- Error with query that has DISTINCT with ORDER BY and aliased select list [\#5293](https://github.com/apache/arrow-datafusion/issues/5293) -- Optimizer prunes UnnestExec on aggregate count [\#5281](https://github.com/apache/arrow-datafusion/issues/5281) -- Strange Behaviour on RepartitionExec with CoalescePartitionsExec. [\#5278](https://github.com/apache/arrow-datafusion/issues/5278) -- Error "For SELECT DISTINCT, ORDER BY expressions id must appear in select list" may be over eager [\#5255](https://github.com/apache/arrow-datafusion/issues/5255) -- SQL allows SORT BY keyword [\#5247](https://github.com/apache/arrow-datafusion/issues/5247) -- test `sort_on_window_null_string` failed after disable `skip_fail`. [\#5233](https://github.com/apache/arrow-datafusion/issues/5233) -- Dataframe API adds ?table? qualifier [\#5187](https://github.com/apache/arrow-datafusion/issues/5187) -- Re-ordering Projections in scan are not working anymore \(since DF15\) [\#5146](https://github.com/apache/arrow-datafusion/issues/5146) -- parquet page level skipping \(page index pruning\) doesn't work with evolved schemas [\#5104](https://github.com/apache/arrow-datafusion/issues/5104) -- Incorrect results on queries with `distinct` and orderby [\#5065](https://github.com/apache/arrow-datafusion/issues/5065) -- NestedLoopJoin will panic when right child contains RepartitionExec [\#5022](https://github.com/apache/arrow-datafusion/issues/5022) -- JSON projection only work when the index is in ascending order [\#4832](https://github.com/apache/arrow-datafusion/issues/4832) -- Stack overflows when planning tpcds 22 in debug mode [\#4786](https://github.com/apache/arrow-datafusion/issues/4786) -- Failed to create Left anti join physical plan due to SchemaError::FieldNotFound [\#4366](https://github.com/apache/arrow-datafusion/issues/4366) -- Filters/limit are not pushdown druing optimalization for table with alias [\#2270](https://github.com/apache/arrow-datafusion/issues/2270) - -**Documentation updates:** - -- Update README.md fix \[welcoming community\] links [\#5232](https://github.com/apache/arrow-datafusion/pull/5232) ([jiangzhx](https://github.com/jiangzhx)) -- Update README.md update blaze-rs link to https://github.com/blaze-init/blaze [\#5190](https://github.com/apache/arrow-datafusion/pull/5190) ([jiangzhx](https://github.com/jiangzhx)) -- Typo of greptimedb [\#5103](https://github.com/apache/arrow-datafusion/pull/5103) ([fengjiachun](https://github.com/fengjiachun)) -- chore: change `DataBend` to `Databend` [\#5096](https://github.com/apache/arrow-datafusion/pull/5096) ([xudong963](https://github.com/xudong963)) - -**Closed issues:** - -- Implement column number / column type verification for sqllogictest [\#4499](https://github.com/apache/arrow-datafusion/issues/4499) - -**Merged pull requests:** - -- generate new projection plan in inline\_table\_scan instead of discarding [\#5371](https://github.com/apache/arrow-datafusion/pull/5371) ([jackwener](https://github.com/jackwener)) -- minor: fix rule name and comment. [\#5370](https://github.com/apache/arrow-datafusion/pull/5370) ([jackwener](https://github.com/jackwener)) -- minor: port limit tests to sqllogictests [\#5355](https://github.com/apache/arrow-datafusion/pull/5355) ([jackwener](https://github.com/jackwener)) -- feat: add rule to merge projection. [\#5349](https://github.com/apache/arrow-datafusion/pull/5349) ([jackwener](https://github.com/jackwener)) -- Ignore Arrow in dependabot [\#5341](https://github.com/apache/arrow-datafusion/pull/5341) ([iajoiner](https://github.com/iajoiner)) -- minor: remove useless `.get()` [\#5336](https://github.com/apache/arrow-datafusion/pull/5336) ([jackwener](https://github.com/jackwener)) -- bugfix: fix tpcds\_logical\_q8 ambiguous name. [\#5335](https://github.com/apache/arrow-datafusion/pull/5335) ([jackwener](https://github.com/jackwener)) -- minor: disable tpcds\_logical\_q10/q35 [\#5333](https://github.com/apache/arrow-datafusion/pull/5333) ([jackwener](https://github.com/jackwener)) -- minor: port intersection sql tests to sqllogictests [\#5331](https://github.com/apache/arrow-datafusion/pull/5331) ([alamb](https://github.com/alamb)) -- minor: port more window tests to sqllogictests [\#5330](https://github.com/apache/arrow-datafusion/pull/5330) ([alamb](https://github.com/alamb)) -- MINOR: nicer error messages for cli, use display format rather than debug [\#5329](https://github.com/apache/arrow-datafusion/pull/5329) ([kmitchener](https://github.com/kmitchener)) -- Add missing protobuf serialisation functionality GetIndexedFieldExpr. [\#5324](https://github.com/apache/arrow-datafusion/pull/5324) ([ahmedriza](https://github.com/ahmedriza)) -- chore: small typo in the example README [\#5319](https://github.com/apache/arrow-datafusion/pull/5319) ([gianarb](https://github.com/gianarb)) -- feat: add accessor to SessionContext fields for ContextProvider impl [\#5318](https://github.com/apache/arrow-datafusion/pull/5318) ([sunng87](https://github.com/sunng87)) -- \[DOC\] Update math expression documentation [\#5316](https://github.com/apache/arrow-datafusion/pull/5316) ([comphead](https://github.com/comphead)) -- Fix nested list indexing when the index is 0 or larger than the list size [\#5311](https://github.com/apache/arrow-datafusion/pull/5311) ([ahmedriza](https://github.com/ahmedriza)) -- Fix SortExec bench case and Add SortExec input cases to bench for SortPreservingMergeExec [\#5308](https://github.com/apache/arrow-datafusion/pull/5308) ([jaylmiller](https://github.com/jaylmiller)) -- Allow DISTINCT with ORDER BY and an aliased select list [\#5307](https://github.com/apache/arrow-datafusion/pull/5307) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Serialize preserve\_partitioning in SortExec [\#5306](https://github.com/apache/arrow-datafusion/pull/5306) ([thinkharderdev](https://github.com/thinkharderdev)) -- fix: correct plan builder when test `scalar_subquery_project_expr` [\#5304](https://github.com/apache/arrow-datafusion/pull/5304) ([jackwener](https://github.com/jackwener)) -- Make SQL query consistent with API syntax expression in code examples [\#5303](https://github.com/apache/arrow-datafusion/pull/5303) ([ongchi](https://github.com/ongchi)) -- enable tpcds-64 test [\#5302](https://github.com/apache/arrow-datafusion/pull/5302) ([jackwener](https://github.com/jackwener)) -- Feature/merge batches removal [\#5300](https://github.com/apache/arrow-datafusion/pull/5300) ([berkaysynnada](https://github.com/berkaysynnada)) -- fix: add yield point to `RepartitionExec` [\#5299](https://github.com/apache/arrow-datafusion/pull/5299) ([crepererum](https://github.com/crepererum)) -- `datafusion.optimizer.repartition_file_scans` enabled by default [\#5295](https://github.com/apache/arrow-datafusion/pull/5295) ([korowa](https://github.com/korowa)) -- minor: derive Ord/PartialOrd/Eq/PartialEq traits for `ObjectStoreUrl` [\#5288](https://github.com/apache/arrow-datafusion/pull/5288) ([crepererum](https://github.com/crepererum)) -- Fix the potential bug of check\_all\_column\_from\_schema [\#5287](https://github.com/apache/arrow-datafusion/pull/5287) ([ygf11](https://github.com/ygf11)) -- Linear search support for Window Group queries [\#5286](https://github.com/apache/arrow-datafusion/pull/5286) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mustafasrepo](https://github.com/mustafasrepo)) -- Prevent optimizer from pruning UnnestExec. [\#5282](https://github.com/apache/arrow-datafusion/pull/5282) ([vincev](https://github.com/vincev)) -- Minor: Add fetch to SortExec display [\#5279](https://github.com/apache/arrow-datafusion/pull/5279) ([thinkharderdev](https://github.com/thinkharderdev)) -- Set `catalog_list` from outside for `SessionState`. [\#5277](https://github.com/apache/arrow-datafusion/pull/5277) ([MichaelScofield](https://github.com/MichaelScofield)) -- Support page skipping / page\_index pushdown for evolved schemas [\#5268](https://github.com/apache/arrow-datafusion/pull/5268) ([alamb](https://github.com/alamb)) -- Use upstream newline\_delimited\_stream [\#5267](https://github.com/apache/arrow-datafusion/pull/5267) ([tustvold](https://github.com/tustvold)) -- Support non-tuple expression for exists-subquery to join [\#5264](https://github.com/apache/arrow-datafusion/pull/5264) ([ygf11](https://github.com/ygf11)) -- minor: Fix cargo fmt [\#5263](https://github.com/apache/arrow-datafusion/pull/5263) ([alamb](https://github.com/alamb)) -- minor: replace `unwrap()` with `?` [\#5262](https://github.com/apache/arrow-datafusion/pull/5262) ([jackwener](https://github.com/jackwener)) -- Preserve `TableScan.projection` order in `push_down_projection` optimizer rule [\#5261](https://github.com/apache/arrow-datafusion/pull/5261) ([korowa](https://github.com/korowa)) -- Minor: refactor ParquetExec roundtrip tests [\#5260](https://github.com/apache/arrow-datafusion/pull/5260) ([alamb](https://github.com/alamb)) -- \[fix\]\[plan\] relax the check for distinct, order by for dataframe [\#5258](https://github.com/apache/arrow-datafusion/pull/5258) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xiaoyong-z](https://github.com/xiaoyong-z)) -- enhance the checking of type errors in the test `window_frame_creation` [\#5257](https://github.com/apache/arrow-datafusion/pull/5257) ([HaoYang670](https://github.com/HaoYang670)) -- SQL planning benchmarks for very wide tables [\#5256](https://github.com/apache/arrow-datafusion/pull/5256) ([alamb](https://github.com/alamb)) -- Minor: Add negative test for SORT BY [\#5254](https://github.com/apache/arrow-datafusion/pull/5254) ([alamb](https://github.com/alamb)) -- \[sqllogictest\] Define output types and check them in tests [\#5253](https://github.com/apache/arrow-datafusion/pull/5253) ([melgenek](https://github.com/melgenek)) -- Minor: port some explain test to sqllogictest, add filename normalization [\#5252](https://github.com/apache/arrow-datafusion/pull/5252) ([alamb](https://github.com/alamb)) -- Disallow SORT BY in SQL [\#5249](https://github.com/apache/arrow-datafusion/pull/5249) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- \[SQLLogicTest\] Make schema validation ignore nullable and metadata attributes [\#5246](https://github.com/apache/arrow-datafusion/pull/5246) ([comphead](https://github.com/comphead)) -- Add SQL function overload LOG\(base, x\) for logarithm of x to base [\#5245](https://github.com/apache/arrow-datafusion/pull/5245) ([comphead](https://github.com/comphead)) -- Update sqllogictest requirement from 0.11.1 to 0.12.0 \#5237 [\#5244](https://github.com/apache/arrow-datafusion/pull/5244) ([alamb](https://github.com/alamb)) -- Test case for NDJsonExec with randomly ordered projection [\#5243](https://github.com/apache/arrow-datafusion/pull/5243) ([korowa](https://github.com/korowa)) -- Update to arrow `33.0.0` [\#5241](https://github.com/apache/arrow-datafusion/pull/5241) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- DataFusion 18.0.0 Release [\#5240](https://github.com/apache/arrow-datafusion/pull/5240) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- fix clippy in nightly [\#5238](https://github.com/apache/arrow-datafusion/pull/5238) ([jackwener](https://github.com/jackwener)) -- refactor: correct the implementation of `all_schemas()` [\#5236](https://github.com/apache/arrow-datafusion/pull/5236) ([jackwener](https://github.com/jackwener)) -- bugfix: fix error when `get_coerced_window_frame` meet `utf8` [\#5234](https://github.com/apache/arrow-datafusion/pull/5234) ([jackwener](https://github.com/jackwener)) -- Feature/sort enforcement refactor [\#5228](https://github.com/apache/arrow-datafusion/pull/5228) ([mustafasrepo](https://github.com/mustafasrepo)) -- Minor: Fix doc links and typos [\#5225](https://github.com/apache/arrow-datafusion/pull/5225) ([Jefffrey](https://github.com/Jefffrey)) -- fix: correct expected error in test [\#5224](https://github.com/apache/arrow-datafusion/pull/5224) ([jackwener](https://github.com/jackwener)) -- bugfix: fix propagating empty\_relation generates an illegal plan [\#5219](https://github.com/apache/arrow-datafusion/pull/5219) ([yukkit](https://github.com/yukkit)) -- Replace placeholders in ScalarSubqueries [\#5216](https://github.com/apache/arrow-datafusion/pull/5216) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Dataframe join\_on method [\#5210](https://github.com/apache/arrow-datafusion/pull/5210) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- bugfix: fix eval `nullalbe()` in `simplify_exprs` [\#5208](https://github.com/apache/arrow-datafusion/pull/5208) ([jackwener](https://github.com/jackwener)) -- minor: remove unnecessary clone [\#5207](https://github.com/apache/arrow-datafusion/pull/5207) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- minor: extract `merge_schema()` function. [\#5203](https://github.com/apache/arrow-datafusion/pull/5203) ([jackwener](https://github.com/jackwener)) -- minor: remove unnecessary `continue` [\#5200](https://github.com/apache/arrow-datafusion/pull/5200) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Minor: Begin porting some window tests to sqllogictests [\#5199](https://github.com/apache/arrow-datafusion/pull/5199) ([alamb](https://github.com/alamb)) -- fix\(MemTable\): make it cancel-safe and fix parallelism [\#5197](https://github.com/apache/arrow-datafusion/pull/5197) ([DDtKey](https://github.com/DDtKey)) -- fix: make `write_csv/json/parquet` cancel-safe [\#5196](https://github.com/apache/arrow-datafusion/pull/5196) ([DDtKey](https://github.com/DDtKey)) -- Support arithmetic operation on DictionaryArray [\#5194](https://github.com/apache/arrow-datafusion/pull/5194) ([viirya](https://github.com/viirya)) -- sqllogicaltest: add cleanup and use rowsort. [\#5189](https://github.com/apache/arrow-datafusion/pull/5189) ([jackwener](https://github.com/jackwener)) -- bugfix: fix `TableScan` may contain fields not included in `schema` [\#5188](https://github.com/apache/arrow-datafusion/pull/5188) ([jackwener](https://github.com/jackwener)) -- Create disk manager spill folder if doesn't exist [\#5185](https://github.com/apache/arrow-datafusion/pull/5185) ([comphead](https://github.com/comphead)) -- Parse identifiers properly for TableReferences [\#5183](https://github.com/apache/arrow-datafusion/pull/5183) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- Fix decimal scalar dyn kernels [\#5179](https://github.com/apache/arrow-datafusion/pull/5179) ([viirya](https://github.com/viirya)) -- Patch git Safe Paths in CI [\#5177](https://github.com/apache/arrow-datafusion/pull/5177) ([tustvold](https://github.com/tustvold)) -- Add initial support for serializing physical plans with Substrait [\#5176](https://github.com/apache/arrow-datafusion/pull/5176) ([andygrove](https://github.com/andygrove)) -- Bump tokio from 1.24.1 to 1.24.2 in /datafusion-cli [\#5172](https://github.com/apache/arrow-datafusion/pull/5172) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Make EnforceSorting global sort aware, fix sort mis-optimizations involving unions, support parallel sort + merge transformations [\#5171](https://github.com/apache/arrow-datafusion/pull/5171) ([mustafasrepo](https://github.com/mustafasrepo)) -- Update substrait README.md [\#5168](https://github.com/apache/arrow-datafusion/pull/5168) ([jiangzhx](https://github.com/jiangzhx)) -- Switch to use sum kernel from arrow-rs for Decimal128 [\#5167](https://github.com/apache/arrow-datafusion/pull/5167) ([sunchao](https://github.com/sunchao)) -- FileStream: Open next file in parallel while decoding [\#5161](https://github.com/apache/arrow-datafusion/pull/5161) ([thinkharderdev](https://github.com/thinkharderdev)) -- Fix FairSpillPool try\_grow for non-spillable consumers [\#5160](https://github.com/apache/arrow-datafusion/pull/5160) ([tustvold](https://github.com/tustvold)) -- fix: treat unsupported SQL plans as "not implemented" [\#5159](https://github.com/apache/arrow-datafusion/pull/5159) ([crepererum](https://github.com/crepererum)) -- Compare NULL types [\#5158](https://github.com/apache/arrow-datafusion/pull/5158) ([melgenek](https://github.com/melgenek)) -- Always wrapping OnceAsync for the inner table side in NestedLoopJoinExec [\#5156](https://github.com/apache/arrow-datafusion/pull/5156) ([ygf11](https://github.com/ygf11)) -- chore: add object\_name\_to\_table\_reference in SqlToRel [\#5155](https://github.com/apache/arrow-datafusion/pull/5155) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) -- Ambiguity check for where selection [\#5153](https://github.com/apache/arrow-datafusion/pull/5153) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- feat: Type coercion for Dictionary\(\_, \_\) to Utf8 for regex conditions [\#5152](https://github.com/apache/arrow-datafusion/pull/5152) ([stuartcarnie](https://github.com/stuartcarnie)) -- Support arithmetic scalar operation with DictionaryArray [\#5151](https://github.com/apache/arrow-datafusion/pull/5151) ([viirya](https://github.com/viirya)) -- \[sqllogictest\] Support `pg_typeof` [\#5148](https://github.com/apache/arrow-datafusion/pull/5148) ([melgenek](https://github.com/melgenek)) -- Date to Timestamp cast [\#5140](https://github.com/apache/arrow-datafusion/pull/5140) ([comphead](https://github.com/comphead)) -- add example for Flight SQL server that supports JDBC driver [\#5138](https://github.com/apache/arrow-datafusion/pull/5138) ([kmitchener](https://github.com/kmitchener)) -- Add in-list test [\#5135](https://github.com/apache/arrow-datafusion/pull/5135) ([nseekhao](https://github.com/nseekhao)) -- \[BugFix\] abort plan if order by column not in select list [\#5132](https://github.com/apache/arrow-datafusion/pull/5132) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Bug fix: Empty Record Batch handling [\#5131](https://github.com/apache/arrow-datafusion/pull/5131) ([mustafasrepo](https://github.com/mustafasrepo)) -- Add option to control whether to normalize ident [\#5124](https://github.com/apache/arrow-datafusion/pull/5124) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) -- Make `parse_physical_expr` public [\#5118](https://github.com/apache/arrow-datafusion/pull/5118) ([comphead](https://github.com/comphead)) -- Support coercing `utf8` to `interval` and `timestamp` \(including arguments to `date_bin`\) [\#5117](https://github.com/apache/arrow-datafusion/pull/5117) ([alamb](https://github.com/alamb)) -- Fix release issues [\#5116](https://github.com/apache/arrow-datafusion/pull/5116) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- minor: port date\_bin tests to sqllogictests [\#5115](https://github.com/apache/arrow-datafusion/pull/5115) ([alamb](https://github.com/alamb)) -- Minor: reduce code duplication using `rewrite_expr` [\#5114](https://github.com/apache/arrow-datafusion/pull/5114) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Replace &Option\ with Option\<&T\> [\#5113](https://github.com/apache/arrow-datafusion/pull/5113) ([gaoxinge](https://github.com/gaoxinge)) -- Improve `get_meet_of_orderings` to check for common prefixes [\#5111](https://github.com/apache/arrow-datafusion/pull/5111) ([ozankabak](https://github.com/ozankabak)) -- \[sqllogictest\] Apply rowsort when there is no explicit order by [\#5110](https://github.com/apache/arrow-datafusion/pull/5110) ([melgenek](https://github.com/melgenek)) -- Add unnest\_column to DataFrame [\#5106](https://github.com/apache/arrow-datafusion/pull/5106) ([vincev](https://github.com/vincev)) -- Minor: reduce indent level in page filter pruning code [\#5105](https://github.com/apache/arrow-datafusion/pull/5105) ([alamb](https://github.com/alamb)) -- Replace &Option\ with Option\<&T\> [\#5102](https://github.com/apache/arrow-datafusion/pull/5102) ([gaoxinge](https://github.com/gaoxinge)) -- Minor: remove unused methods in datafusion/optimizer/src/utils.rs [\#5098](https://github.com/apache/arrow-datafusion/pull/5098) ([ygf11](https://github.com/ygf11)) -- ci: don't trigger rust ci for doc changes [\#5097](https://github.com/apache/arrow-datafusion/pull/5097) ([xudong963](https://github.com/xudong963)) -- sqllogicaltest: fix unstable slt case. [\#5095](https://github.com/apache/arrow-datafusion/pull/5095) ([jackwener](https://github.com/jackwener)) -- chore: update cranelift-module [\#5094](https://github.com/apache/arrow-datafusion/pull/5094) ([jackwener](https://github.com/jackwener)) -- refactor: Add `rewrite_expr` convenience method for rewriting `Expr`s [\#5092](https://github.com/apache/arrow-datafusion/pull/5092) ([alamb](https://github.com/alamb)) -- Minor: extract sort col rewrite into its own module, add unit tests [\#5088](https://github.com/apache/arrow-datafusion/pull/5088) ([alamb](https://github.com/alamb)) -- \[sqllogictest\] Move `decimal.rs` tests [\#5086](https://github.com/apache/arrow-datafusion/pull/5086) ([melgenek](https://github.com/melgenek)) -- Insert target columns empty fix [\#5079](https://github.com/apache/arrow-datafusion/pull/5079) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) -- sqllogicaltest: move union.rs [\#5075](https://github.com/apache/arrow-datafusion/pull/5075) ([jackwener](https://github.com/jackwener)) -- \[Enhancement\] Don't repartition ProjectionExec when it does not compute anything [\#5074](https://github.com/apache/arrow-datafusion/pull/5074) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Support ORDER BY an aliased column [\#5067](https://github.com/apache/arrow-datafusion/pull/5067) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Parquet parallel scan [\#5057](https://github.com/apache/arrow-datafusion/pull/5057) ([korowa](https://github.com/korowa)) -- \[BugFix\] fix file stream time scanning metrics bug [\#5020](https://github.com/apache/arrow-datafusion/pull/5020) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Show optimization errors in explain [\#4819](https://github.com/apache/arrow-datafusion/pull/4819) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) - - -## [18.0.0](https://github.com/apache/arrow-datafusion/tree/18.0.0) (2023-02-10) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/17.0.0...18.0.0) - -**Breaking changes:** - -- Use DataFusionError instead of ArrowError in SendableRecordBatchStream [\#5101](https://github.com/apache/arrow-datafusion/pull/5101) ([comphead](https://github.com/comphead)) -- Update to arrow 32 and Switch to RawDecoder for JSON [\#5056](https://github.com/apache/arrow-datafusion/pull/5056) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) - -**Implemented enhancements:** - -- DiskManager to create a spill folder if doesn't exist [\#5186](https://github.com/apache/arrow-datafusion/issues/5186) -- cast expression may cause duplicate column name error [\#5174](https://github.com/apache/arrow-datafusion/issues/5174) -- Add type coercion from Dictionary to string for regular expressions [\#5154](https://github.com/apache/arrow-datafusion/issues/5154) -- Unnecessary `Filter` on Parquet datasources [\#5149](https://github.com/apache/arrow-datafusion/issues/5149) -- \[sqllogictest\] Support `pg_typeof` for Postgres compatibility tests [\#5147](https://github.com/apache/arrow-datafusion/issues/5147) -- Supporting Grafana global variables [\#5144](https://github.com/apache/arrow-datafusion/issues/5144) -- add example for standalone DataFusion server which supports Arrow Flight SQL JDBC driver [\#5139](https://github.com/apache/arrow-datafusion/issues/5139) -- Support for InList in datafusion-substrait [\#5134](https://github.com/apache/arrow-datafusion/issues/5134) -- Pipeline file opening in `FileStream` [\#5129](https://github.com/apache/arrow-datafusion/issues/5129) -- Make `parse_physical_expr` public [\#5107](https://github.com/apache/arrow-datafusion/issues/5107) -- Use DataFusionError in SendableRecordBatchStream [\#5039](https://github.com/apache/arrow-datafusion/issues/5039) -- Interval coercion:`date_bin('1 hour',...)` does not work but `date_bin(interval '1 hour', ...` does [\#4853](https://github.com/apache/arrow-datafusion/issues/4853) -- `Explain ` should not fail if meeting errors when optimizing the query [\#4766](https://github.com/apache/arrow-datafusion/issues/4766) -- Add option to determine whether to convert identifiers [\#4551](https://github.com/apache/arrow-datafusion/issues/4551) -- Replace `&Option` with `Option<&T>`. [\#4424](https://github.com/apache/arrow-datafusion/issues/4424) -- Error type in `RecordBatchStream` [\#4172](https://github.com/apache/arrow-datafusion/issues/4172) -- Support non-equi join \(e.g. `ON` clause\) in Dataframe API [\#1254](https://github.com/apache/arrow-datafusion/issues/1254) -- Allow ParquetExec to parallelize work based on row groups [\#137](https://github.com/apache/arrow-datafusion/issues/137) - -**Fixed bugs:** - -- Confusing schema errors when using window partition [\#5229](https://github.com/apache/arrow-datafusion/issues/5229) -- Propagating empty\_relation generate an illegal plan [\#5218](https://github.com/apache/arrow-datafusion/issues/5218) -- The test `in_list_types_struct_literal` fails when setting `skip_failed_rules` as `false` [\#5217](https://github.com/apache/arrow-datafusion/issues/5217) -- Placeholder values are not replaced in ScalarSubqueries [\#5215](https://github.com/apache/arrow-datafusion/issues/5215) -- Querying against delta lake table does not seem to work [\#5202](https://github.com/apache/arrow-datafusion/issues/5202) -- Arithmetic operation doesn't work with DictionaryArray [\#5193](https://github.com/apache/arrow-datafusion/issues/5193) -- simplify\_expr\(\) invoke nullable\(\) exist bug [\#5191](https://github.com/apache/arrow-datafusion/issues/5191) -- CI is currently broken on git diff: Not a git repository [\#5180](https://github.com/apache/arrow-datafusion/issues/5180) -- `write_csv/json/parquet` isn't cancel safe [\#5178](https://github.com/apache/arrow-datafusion/issues/5178) -- no hyperlink to blaze-rs \[doc: README-"Use Cases"\] [\#5175](https://github.com/apache/arrow-datafusion/issues/5175) -- Arithmetic scalar operation doesn't work with DictionaryArray [\#5150](https://github.com/apache/arrow-datafusion/issues/5150) -- Sort operator disappear in physical\_plan [\#5100](https://github.com/apache/arrow-datafusion/issues/5100) -- Window function error: InvalidArgumentError\("number of columns\(27\) must match number of fields\(35\) in schema" [\#5090](https://github.com/apache/arrow-datafusion/issues/5090) -- `INSERT` statements without target column list are not working [\#5078](https://github.com/apache/arrow-datafusion/issues/5078) -- fix file stream time scanning metrics bug [\#5019](https://github.com/apache/arrow-datafusion/issues/5019) -- Date before `1678` causes panic [\#4875](https://github.com/apache/arrow-datafusion/issues/4875) -- Can not ORDER BY an aliased group column [\#4854](https://github.com/apache/arrow-datafusion/issues/4854) -- The `filters` expressions in `TableScan` may contain fields not included in `schema`. [\#4793](https://github.com/apache/arrow-datafusion/issues/4793) -- Comparing a `Timestamp` to a `Date32` fails [\#4644](https://github.com/apache/arrow-datafusion/issues/4644) -- String --\> TableReference parsing does not properly handle `"` and `.` [\#4532](https://github.com/apache/arrow-datafusion/issues/4532) -- can't compare NULL type with NULL type [\#4335](https://github.com/apache/arrow-datafusion/issues/4335) -- Add ambiguous check when generate selection plan [\#4196](https://github.com/apache/arrow-datafusion/issues/4196) -- Internal error in CAST from Timestamp\[us\] [\#3922](https://github.com/apache/arrow-datafusion/issues/3922) -- Run median expr on parquet file column got error [\#3805](https://github.com/apache/arrow-datafusion/issues/3805) -- aliasing a field renders it missing in the order by clause [\#669](https://github.com/apache/arrow-datafusion/issues/669) -- Querying datetime data in DataFusion with an embedded timezone always fails [\#153](https://github.com/apache/arrow-datafusion/issues/153) - -**Documentation updates:** - -- Update README.md fix \[welcoming community\] links [\#5232](https://github.com/apache/arrow-datafusion/pull/5232) ([jiangzhx](https://github.com/jiangzhx)) -- Update README.md update blaze-rs link to https://github.com/blaze-init/blaze [\#5190](https://github.com/apache/arrow-datafusion/pull/5190) ([jiangzhx](https://github.com/jiangzhx)) -- Typo of greptimedb [\#5103](https://github.com/apache/arrow-datafusion/pull/5103) ([fengjiachun](https://github.com/fengjiachun)) -- chore: change `DataBend` to `Databend` [\#5096](https://github.com/apache/arrow-datafusion/pull/5096) ([xudong963](https://github.com/xudong963)) - -**Closed issues:** - -- Change coerced type for comparison between timestamp with date to timestamp [\#4761](https://github.com/apache/arrow-datafusion/issues/4761) - -**Merged pull requests:** - -- fix: correct expected error in test [\#5224](https://github.com/apache/arrow-datafusion/pull/5224) ([jackwener](https://github.com/jackwener)) -- bugfix: fix propagating empty\_relation generates an illegal plan [\#5219](https://github.com/apache/arrow-datafusion/pull/5219) ([yukkit](https://github.com/yukkit)) -- Replace placeholders in ScalarSubqueries [\#5216](https://github.com/apache/arrow-datafusion/pull/5216) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Dataframe join\_on method [\#5210](https://github.com/apache/arrow-datafusion/pull/5210) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- bugfix: fix eval `nullalbe()` in `simplify_exprs` [\#5208](https://github.com/apache/arrow-datafusion/pull/5208) ([jackwener](https://github.com/jackwener)) -- minor: remove unnecessary clone [\#5207](https://github.com/apache/arrow-datafusion/pull/5207) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- minor: extract `merge_schema()` function. [\#5203](https://github.com/apache/arrow-datafusion/pull/5203) ([jackwener](https://github.com/jackwener)) -- minor: remove unnecessary `continue` [\#5200](https://github.com/apache/arrow-datafusion/pull/5200) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- fix\(MemTable\): make it cancel-safe and fix parallelism [\#5197](https://github.com/apache/arrow-datafusion/pull/5197) ([DDtKey](https://github.com/DDtKey)) -- fix: make `write_csv/json/parquet` cancel-safe [\#5196](https://github.com/apache/arrow-datafusion/pull/5196) ([DDtKey](https://github.com/DDtKey)) -- Support arithmetic operation on DictionaryArray [\#5194](https://github.com/apache/arrow-datafusion/pull/5194) ([viirya](https://github.com/viirya)) -- sqllogicaltest: add cleanup and use rowsort. [\#5189](https://github.com/apache/arrow-datafusion/pull/5189) ([jackwener](https://github.com/jackwener)) -- bugfix: fix `TableScan` may contain fields not included in `schema` [\#5188](https://github.com/apache/arrow-datafusion/pull/5188) ([jackwener](https://github.com/jackwener)) -- Create disk manager spill folder if doesn't exist [\#5185](https://github.com/apache/arrow-datafusion/pull/5185) ([comphead](https://github.com/comphead)) -- Parse identifiers properly for TableReferences [\#5183](https://github.com/apache/arrow-datafusion/pull/5183) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- Fix decimal scalar dyn kernels [\#5179](https://github.com/apache/arrow-datafusion/pull/5179) ([viirya](https://github.com/viirya)) -- Patch git Safe Paths in CI [\#5177](https://github.com/apache/arrow-datafusion/pull/5177) ([tustvold](https://github.com/tustvold)) -- Add initial support for serializing physical plans with Substrait [\#5176](https://github.com/apache/arrow-datafusion/pull/5176) ([andygrove](https://github.com/andygrove)) -- Bump tokio from 1.24.1 to 1.24.2 in /datafusion-cli [\#5172](https://github.com/apache/arrow-datafusion/pull/5172) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Make EnforceSorting global sort aware, fix sort mis-optimizations involving unions, support parallel sort + merge transformations [\#5171](https://github.com/apache/arrow-datafusion/pull/5171) ([mustafasrepo](https://github.com/mustafasrepo)) -- Update substrait README.md [\#5168](https://github.com/apache/arrow-datafusion/pull/5168) ([jiangzhx](https://github.com/jiangzhx)) -- Switch to use sum kernel from arrow-rs for Decimal128 [\#5167](https://github.com/apache/arrow-datafusion/pull/5167) ([sunchao](https://github.com/sunchao)) -- FileStream: Open next file in parallel while decoding [\#5161](https://github.com/apache/arrow-datafusion/pull/5161) ([thinkharderdev](https://github.com/thinkharderdev)) -- Fix FairSpillPool try\_grow for non-spillable consumers [\#5160](https://github.com/apache/arrow-datafusion/pull/5160) ([tustvold](https://github.com/tustvold)) -- fix: treat unsupported SQL plans as "not implemented" [\#5159](https://github.com/apache/arrow-datafusion/pull/5159) ([crepererum](https://github.com/crepererum)) -- Compare NULL types [\#5158](https://github.com/apache/arrow-datafusion/pull/5158) ([melgenek](https://github.com/melgenek)) -- chore: add object\_name\_to\_table\_reference in SqlToRel [\#5155](https://github.com/apache/arrow-datafusion/pull/5155) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) -- Ambiguity check for where selection [\#5153](https://github.com/apache/arrow-datafusion/pull/5153) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- feat: Type coercion for Dictionary\(\_, \_\) to Utf8 for regex conditions [\#5152](https://github.com/apache/arrow-datafusion/pull/5152) ([stuartcarnie](https://github.com/stuartcarnie)) -- Support arithmetic scalar operation with DictionaryArray [\#5151](https://github.com/apache/arrow-datafusion/pull/5151) ([viirya](https://github.com/viirya)) -- \[sqllogictest\] Support `pg_typeof` [\#5148](https://github.com/apache/arrow-datafusion/pull/5148) ([melgenek](https://github.com/melgenek)) -- Date to Timestamp cast [\#5140](https://github.com/apache/arrow-datafusion/pull/5140) ([comphead](https://github.com/comphead)) -- add example for Flight SQL server that supports JDBC driver [\#5138](https://github.com/apache/arrow-datafusion/pull/5138) ([kmitchener](https://github.com/kmitchener)) -- Add in-list test [\#5135](https://github.com/apache/arrow-datafusion/pull/5135) ([nseekhao](https://github.com/nseekhao)) -- Bug fix: Empty Record Batch handling [\#5131](https://github.com/apache/arrow-datafusion/pull/5131) ([mustafasrepo](https://github.com/mustafasrepo)) -- Add option to control whether to normalize ident [\#5124](https://github.com/apache/arrow-datafusion/pull/5124) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) -- Make `parse_physical_expr` public [\#5118](https://github.com/apache/arrow-datafusion/pull/5118) ([comphead](https://github.com/comphead)) -- Support coercing `utf8` to `interval` and `timestamp` \(including arguments to `date_bin`\) [\#5117](https://github.com/apache/arrow-datafusion/pull/5117) ([alamb](https://github.com/alamb)) -- Fix release issues [\#5116](https://github.com/apache/arrow-datafusion/pull/5116) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- minor: port date\_bin tests to sqllogictests [\#5115](https://github.com/apache/arrow-datafusion/pull/5115) ([alamb](https://github.com/alamb)) -- Minor: reduce code duplication using `rewrite_expr` [\#5114](https://github.com/apache/arrow-datafusion/pull/5114) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Replace &Option\ with Option\<&T\> [\#5113](https://github.com/apache/arrow-datafusion/pull/5113) ([gaoxinge](https://github.com/gaoxinge)) -- Improve `get_meet_of_orderings` to check for common prefixes [\#5111](https://github.com/apache/arrow-datafusion/pull/5111) ([ozankabak](https://github.com/ozankabak)) -- \[sqllogictest\] Apply rowsort when there is no explicit order by [\#5110](https://github.com/apache/arrow-datafusion/pull/5110) ([melgenek](https://github.com/melgenek)) -- Add unnest\_column to DataFrame [\#5106](https://github.com/apache/arrow-datafusion/pull/5106) ([vincev](https://github.com/vincev)) -- Minor: reduce indent level in page filter pruning code [\#5105](https://github.com/apache/arrow-datafusion/pull/5105) ([alamb](https://github.com/alamb)) -- Replace &Option\ with Option\<&T\> [\#5102](https://github.com/apache/arrow-datafusion/pull/5102) ([gaoxinge](https://github.com/gaoxinge)) -- Minor: remove unused methods in datafusion/optimizer/src/utils.rs [\#5098](https://github.com/apache/arrow-datafusion/pull/5098) ([ygf11](https://github.com/ygf11)) -- ci: don't trigger rust ci for doc changes [\#5097](https://github.com/apache/arrow-datafusion/pull/5097) ([xudong963](https://github.com/xudong963)) -- sqllogicaltest: fix unstable slt case. [\#5095](https://github.com/apache/arrow-datafusion/pull/5095) ([jackwener](https://github.com/jackwener)) -- chore: update cranelift-module [\#5094](https://github.com/apache/arrow-datafusion/pull/5094) ([jackwener](https://github.com/jackwener)) -- refactor: Add `rewrite_expr` convenience method for rewriting `Expr`s [\#5092](https://github.com/apache/arrow-datafusion/pull/5092) ([alamb](https://github.com/alamb)) -- Minor: extract sort col rewrite into its own module, add unit tests [\#5088](https://github.com/apache/arrow-datafusion/pull/5088) ([alamb](https://github.com/alamb)) -- \[sqllogictest\] Move `decimal.rs` tests [\#5086](https://github.com/apache/arrow-datafusion/pull/5086) ([melgenek](https://github.com/melgenek)) -- Insert target columns empty fix [\#5079](https://github.com/apache/arrow-datafusion/pull/5079) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) -- sqllogicaltest: move union.rs [\#5075](https://github.com/apache/arrow-datafusion/pull/5075) ([jackwener](https://github.com/jackwener)) -- Support ORDER BY an aliased column [\#5067](https://github.com/apache/arrow-datafusion/pull/5067) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Parquet parallel scan [\#5057](https://github.com/apache/arrow-datafusion/pull/5057) ([korowa](https://github.com/korowa)) -- \[BugFix\] fix file stream time scanning metrics bug [\#5020](https://github.com/apache/arrow-datafusion/pull/5020) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Show optimization errors in explain [\#4819](https://github.com/apache/arrow-datafusion/pull/4819) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) - - -## [17.0.0](https://github.com/apache/arrow-datafusion/tree/17.0.0) (2023-01-27) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/17.0.0-rc1...17.0.0) - -**Breaking changes:** - -- Implemented a ReadOptions trait for cleaner code. [\#5025](https://github.com/apache/arrow-datafusion/pull/5025) ([saikrishna1-bidgely](https://github.com/saikrishna1-bidgely)) - -**Implemented enhancements:** - -- Add null-equals-null JOIN support in Substrait producer/consumer [\#5084](https://github.com/apache/arrow-datafusion/issues/5084) -- Cleaner code for Read Options in reader methdos. [\#5024](https://github.com/apache/arrow-datafusion/issues/5024) -- Substrait donation follow-on work [\#4897](https://github.com/apache/arrow-datafusion/issues/4897) -- Add `len` method to `DataFrame` [\#1926](https://github.com/apache/arrow-datafusion/issues/1926) - -**Fixed bugs:** - -- Clippy failures in master branch and in PRs \(due to new nightly Rust\) [\#5080](https://github.com/apache/arrow-datafusion/issues/5080) - -**Merged pull requests:** - -- Add null-equals-null join support [\#5085](https://github.com/apache/arrow-datafusion/pull/5085) ([nseekhao](https://github.com/nseekhao)) -- Optimize returned plan in roundtrip\_fill\_na function [\#5083](https://github.com/apache/arrow-datafusion/pull/5083) ([nseekhao](https://github.com/nseekhao)) -- fix clippy failures [\#5081](https://github.com/apache/arrow-datafusion/pull/5081) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Add NULL literal support for decimal and integers [\#5077](https://github.com/apache/arrow-datafusion/pull/5077) ([nseekhao](https://github.com/nseekhao)) -- DataFrame count method [\#5071](https://github.com/apache/arrow-datafusion/pull/5071) ([Jefffrey](https://github.com/Jefffrey)) -- \[sqllogictests\] Port orderby.rs to sqllogictests [\#5062](https://github.com/apache/arrow-datafusion/pull/5062) ([alamb](https://github.com/alamb)) - -## [17.0.0-rc1](https://github.com/apache/arrow-datafusion/tree/17.0.0-rc1) (2023-01-26) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/16.1.0...17.0.0-rc1) - -**Breaking changes:** - -- Change ExecutionPlan::maintains\_input\_order to return vector \(to support multi children executors better\) [\#5035](https://github.com/apache/arrow-datafusion/pull/5035) ([mustafasrepo](https://github.com/mustafasrepo)) -- Allow overriding error type in DataFusion Result [\#5000](https://github.com/apache/arrow-datafusion/pull/5000) ([tustvold](https://github.com/tustvold)) -- Add dictionary\_expresions feature \(\#4386\) [\#4999](https://github.com/apache/arrow-datafusion/pull/4999) ([tustvold](https://github.com/tustvold)) - -**Implemented enhancements:** - -- Retain the ordering of fields in the table schema when creating the projection for an update plan [\#5052](https://github.com/apache/arrow-datafusion/issues/5052) -- \[sqllogictest\] Remove `integration-tests` directory [\#5011](https://github.com/apache/arrow-datafusion/issues/5011) -- \[sqllogictest\] Consolidate normalization code for the postgres and non-postgres paths [\#5010](https://github.com/apache/arrow-datafusion/issues/5010) -- \[sqllogictest\] Don't orchestrate the postgres containers with rust / docker [\#5009](https://github.com/apache/arrow-datafusion/issues/5009) -- check external table exist before creating a table [\#4997](https://github.com/apache/arrow-datafusion/issues/4997) -- Implement `std::error::Error` for DataFusionError [\#4991](https://github.com/apache/arrow-datafusion/issues/4991) -- Return Vec\ instead of bool in ExecutionPlan::maintains\_input\_order [\#4980](https://github.com/apache/arrow-datafusion/issues/4980) -- Add support for linear range search [\#4979](https://github.com/apache/arrow-datafusion/issues/4979) -- Add support for bounded execution when window query involves UNBOUNDED PRECEDING [\#4978](https://github.com/apache/arrow-datafusion/issues/4978) -- Infer prepared statement parameter types for insert queries with values clauses [\#4976](https://github.com/apache/arrow-datafusion/issues/4976) -- The filter of outer table happens multiple time after optimizing in-subquery to join [\#4914](https://github.com/apache/arrow-datafusion/issues/4914) -- Support Describe FILE in datafusion-cli [\#4913](https://github.com/apache/arrow-datafusion/issues/4913) -- Release DataFusion 16 [\#4776](https://github.com/apache/arrow-datafusion/issues/4776) -- Support writing lists in the arrow csv writer [\#4502](https://github.com/apache/arrow-datafusion/issues/4502) -- Replace python based integration test with sqllogictest [\#4462](https://github.com/apache/arrow-datafusion/issues/4462) -- Support CREATE TABLE table\_name\(...schema\_fields\) [\#4396](https://github.com/apache/arrow-datafusion/issues/4396) -- Make Binary Dictionary Operations Optional [\#4386](https://github.com/apache/arrow-datafusion/issues/4386) -- Improve / Cleanup DataFusion CI [\#3045](https://github.com/apache/arrow-datafusion/issues/3045) -- More frequent DataFusion releases to crates.io \(discussion\) [\#2327](https://github.com/apache/arrow-datafusion/issues/2327) - -**Fixed bugs:** - -- UPDATE statment for non existent column doesn't error out [\#5068](https://github.com/apache/arrow-datafusion/issues/5068) -- Limit doesn't drop on first batch when limit size == fetch size. [\#5064](https://github.com/apache/arrow-datafusion/issues/5064) -- Performance regressions since DataFusion 15.x [\#5060](https://github.com/apache/arrow-datafusion/issues/5060) -- Quoted schema and table names result in double-quoted names in logical plan. [\#5058](https://github.com/apache/arrow-datafusion/issues/5058) -- Homebrew release script has the amount of arguments being incorrect [\#5043](https://github.com/apache/arrow-datafusion/issues/5043) -- CI Failing with Out of Disk [\#5040](https://github.com/apache/arrow-datafusion/issues/5040) -- Doc links to LogicalPlan in the core package need updating. [\#5036](https://github.com/apache/arrow-datafusion/issues/5036) -- explain analyze can not see csvexec execution time metrics [\#5014](https://github.com/apache/arrow-datafusion/issues/5014) -- AVG\(nulls\) returns 0 rather than NULL [\#5007](https://github.com/apache/arrow-datafusion/issues/5007) -- Invalid Placeholders return internal error \(rather than Plan error\) [\#5005](https://github.com/apache/arrow-datafusion/issues/5005) -- select \* from csv error [\#4996](https://github.com/apache/arrow-datafusion/issues/4996) -- Incorrect nested error wrapped to `ArrowError:External` variant for joins [\#4981](https://github.com/apache/arrow-datafusion/issues/4981) - -**Documentation updates:** - -- MINOR: Add Substrait to feature list in README [\#4955](https://github.com/apache/arrow-datafusion/pull/4955) ([andygrove](https://github.com/andygrove)) -- Minor: comma engineering in Readme [\#4954](https://github.com/apache/arrow-datafusion/pull/4954) ([alamb](https://github.com/alamb)) -- Update main DataFusion README [\#4903](https://github.com/apache/arrow-datafusion/pull/4903) ([alamb](https://github.com/alamb)) -- Docs: Add known user - Kamu [\#4899](https://github.com/apache/arrow-datafusion/pull/4899) ([sergiimk](https://github.com/sergiimk)) - -**Closed issues:** - -- Support sub directories in sqllogictest runner [\#4709](https://github.com/apache/arrow-datafusion/issues/4709) -- Bug displaying fractional seconds in `IntervalMonthDayNano` [\#4220](https://github.com/apache/arrow-datafusion/issues/4220) - -**Merged pull requests:** - -- Add `release-crates.sh` script [\#5070](https://github.com/apache/arrow-datafusion/pull/5070) ([iajoiner](https://github.com/iajoiner)) -- Validate assignment target column existence for UPDATE statements [\#5069](https://github.com/apache/arrow-datafusion/pull/5069) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) -- Fix limit when size of batch to poll == skip/fetch value [\#5066](https://github.com/apache/arrow-datafusion/pull/5066) ([Dandandan](https://github.com/Dandandan)) -- Fix CREATE SCHEMA schema name double quoting issue. [\#5059](https://github.com/apache/arrow-datafusion/pull/5059) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([neumark](https://github.com/neumark)) -- Minor: Move some aggregate error tests to sqllogictests [\#5055](https://github.com/apache/arrow-datafusion/pull/5055) ([alamb](https://github.com/alamb)) -- Add decimal support to substrait serde [\#5054](https://github.com/apache/arrow-datafusion/pull/5054) ([andygrove](https://github.com/andygrove)) -- Retain schema order in projection [\#5053](https://github.com/apache/arrow-datafusion/pull/5053) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Improve join type support in substrait [\#5051](https://github.com/apache/arrow-datafusion/pull/5051) ([andygrove](https://github.com/andygrove)) -- \[Substrait\] ReadRel. Get column names from TableScan source [\#5050](https://github.com/apache/arrow-datafusion/pull/5050) ([andygrove](https://github.com/andygrove)) -- Ensure insert projections are of correct type [\#5049](https://github.com/apache/arrow-datafusion/pull/5049) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Remove unnecessary pyo3 dependency from datafusion crate [\#5048](https://github.com/apache/arrow-datafusion/pull/5048) ([tustvold](https://github.com/tustvold)) -- Cleanup CI \(\#5040\) [\#5047](https://github.com/apache/arrow-datafusion/pull/5047) ([tustvold](https://github.com/tustvold)) -- Fix homebrew publish script [\#5044](https://github.com/apache/arrow-datafusion/pull/5044) ([iajoiner](https://github.com/iajoiner)) -- Update docs links to logical plans module. [\#5037](https://github.com/apache/arrow-datafusion/pull/5037) ([vincev](https://github.com/vincev)) -- \[sqllogictest\] Read subdirectories in `test_files` [\#5033](https://github.com/apache/arrow-datafusion/pull/5033) ([melgenek](https://github.com/melgenek)) -- minor: Fix docs for create\_default\_catalog\_and\_schema [\#5032](https://github.com/apache/arrow-datafusion/pull/5032) ([alamb](https://github.com/alamb)) -- Remove python based posgres comparsion `integration-test` [\#5031](https://github.com/apache/arrow-datafusion/pull/5031) ([alamb](https://github.com/alamb)) -- \[sqllogictest\] Create empty tables [\#5026](https://github.com/apache/arrow-datafusion/pull/5026) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([melgenek](https://github.com/melgenek)) -- Simplify the `PushDownLimit`. [\#5021](https://github.com/apache/arrow-datafusion/pull/5021) ([HaoYang670](https://github.com/HaoYang670)) -- \[BugFix\] fix explain csv/json/avro exec can not see metrics bug [\#5018](https://github.com/apache/arrow-datafusion/pull/5018) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Check placeholder \_\_timeTo and return Datafusion::Plan error [\#5017](https://github.com/apache/arrow-datafusion/pull/5017) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthias-Q](https://github.com/matthias-Q)) -- \[sqllogictets\] Remove postgres container orchestration [\#5015](https://github.com/apache/arrow-datafusion/pull/5015) ([alamb](https://github.com/alamb)) -- Sqllogictest: use the same normalization for all tests [\#5013](https://github.com/apache/arrow-datafusion/pull/5013) ([melgenek](https://github.com/melgenek)) -- Minor: Remove invalid comments [\#5012](https://github.com/apache/arrow-datafusion/pull/5012) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- AVG\(null\) is NULL \(not zero\) [\#5008](https://github.com/apache/arrow-datafusion/pull/5008) ([alamb](https://github.com/alamb)) -- Minor: improve internal error message [\#5006](https://github.com/apache/arrow-datafusion/pull/5006) ([alamb](https://github.com/alamb)) -- Support for bounded execution when window frame involves UNBOUNDED PRECEDING [\#5003](https://github.com/apache/arrow-datafusion/pull/5003) ([mustafasrepo](https://github.com/mustafasrepo)) -- Bump sqllogictest to v0.11.1 [\#5002](https://github.com/apache/arrow-datafusion/pull/5002) ([xudong963](https://github.com/xudong963)) -- Minor: Document how to create `ListingTables` [\#5001](https://github.com/apache/arrow-datafusion/pull/5001) ([alamb](https://github.com/alamb)) -- \[Enhancement\] early check table exist before create [\#4998](https://github.com/apache/arrow-datafusion/pull/4998) ([xiaoyong-z](https://github.com/xiaoyong-z)) -- \[Feature\] support describe file [\#4995](https://github.com/apache/arrow-datafusion/pull/4995) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xiaoyong-z](https://github.com/xiaoyong-z)) -- Implement `std::error::Error::source()` for `DataFusionError`, make `DataFusionError::find_root` more generic [\#4992](https://github.com/apache/arrow-datafusion/pull/4992) ([alamb](https://github.com/alamb)) -- Add support for linear range calculation in WINDOW functions [\#4989](https://github.com/apache/arrow-datafusion/pull/4989) ([mustafasrepo](https://github.com/mustafasrepo)) -- re-export substrait crate [\#4988](https://github.com/apache/arrow-datafusion/pull/4988) ([jdye64](https://github.com/jdye64)) -- minor: Update data type support documentation [\#4984](https://github.com/apache/arrow-datafusion/pull/4984) ([alamb](https://github.com/alamb)) -- fix\(4981\): incorrect error wrapping in `OnceFut` [\#4983](https://github.com/apache/arrow-datafusion/pull/4983) ([DDtKey](https://github.com/DDtKey)) -- Infer values for inserts [\#4977](https://github.com/apache/arrow-datafusion/pull/4977) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Simplify GroupByHash implementation \(to prepare for more work\) [\#4972](https://github.com/apache/arrow-datafusion/pull/4972) ([alamb](https://github.com/alamb)) -- Add DataFusionError::Substrait variant to DataFusionError enum [\#4971](https://github.com/apache/arrow-datafusion/pull/4971) ([jdye64](https://github.com/jdye64)) -- refactor: display input partitions for `RepartitionExec` [\#4969](https://github.com/apache/arrow-datafusion/pull/4969) ([crepererum](https://github.com/crepererum)) -- Upgrade to Substrait 0.4.0 [\#4966](https://github.com/apache/arrow-datafusion/pull/4966) ([mbrobbel](https://github.com/mbrobbel)) -- Expose `sql_to_statement` and `statement_to_plan` on `SessionState` [\#4958](https://github.com/apache/arrow-datafusion/pull/4958) ([avantgardnerio](https://github.com/avantgardnerio)) -- Minor: Make messages consistent for LogicalPlan::Dml [\#4953](https://github.com/apache/arrow-datafusion/pull/4953) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Do not resort inputs to `UnionExec` if they are already sorted [\#4946](https://github.com/apache/arrow-datafusion/pull/4946) ([alamb](https://github.com/alamb)) -- Minor: Reduce even more redundancy creating window\_agg in sort\_enforcement tests [\#4945](https://github.com/apache/arrow-datafusion/pull/4945) ([alamb](https://github.com/alamb)) -- Only add outer filter once when transforming exists/in subquery to join [\#4944](https://github.com/apache/arrow-datafusion/pull/4944) ([ygf11](https://github.com/ygf11)) -- fix: `FieldNotFound` error message without valid fields [\#4942](https://github.com/apache/arrow-datafusion/pull/4942) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([DDtKey](https://github.com/DDtKey)) -- Propagate planning error back to user [\#4940](https://github.com/apache/arrow-datafusion/pull/4940) ([fsdvh](https://github.com/fsdvh)) -- Make it able to specify a session id for SessionState [\#4933](https://github.com/apache/arrow-datafusion/pull/4933) ([yahoNanJing](https://github.com/yahoNanJing)) -- SUPPORT SEMI/ANTI JOIN SQL syntax in DataFusion [\#4932](https://github.com/apache/arrow-datafusion/pull/4932) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mingmwang](https://github.com/mingmwang)) -- Support gs:// as GCS schema [\#4930](https://github.com/apache/arrow-datafusion/pull/4930) ([jychen7](https://github.com/jychen7)) -- Upgrade object\_store from 0.5.0 to 0.5.3 [\#4929](https://github.com/apache/arrow-datafusion/pull/4929) ([jychen7](https://github.com/jychen7)) -- Reduce redundancy in sort\_enforcement tests [\#4928](https://github.com/apache/arrow-datafusion/pull/4928) ([alamb](https://github.com/alamb)) -- Update to arrow 31 [\#4927](https://github.com/apache/arrow-datafusion/pull/4927) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Unify Row hash and hash implementation [\#4924](https://github.com/apache/arrow-datafusion/pull/4924) ([mustafasrepo](https://github.com/mustafasrepo)) -- Support join-filter pushdown for semi/anti join [\#4923](https://github.com/apache/arrow-datafusion/pull/4923) ([ygf11](https://github.com/ygf11)) -- Minor add ticket link to broken test [\#4919](https://github.com/apache/arrow-datafusion/pull/4919) ([alamb](https://github.com/alamb)) -- Improve documentation for ExprVisitor, port simple uses to new walking function [\#4916](https://github.com/apache/arrow-datafusion/pull/4916) ([alamb](https://github.com/alamb)) -- Add substrait label to PRs [\#4915](https://github.com/apache/arrow-datafusion/pull/4915) ([alamb](https://github.com/alamb)) -- Executing ProjectionExec with no column should not return an Err [\#4912](https://github.com/apache/arrow-datafusion/pull/4912) ([viirya](https://github.com/viirya)) -- Refactor: `Add LogicalPlan::observe_expressions` to walk expressions [\#4906](https://github.com/apache/arrow-datafusion/pull/4906) ([alamb](https://github.com/alamb)) -- Minor: Port information schema tests to sqllogictest [\#4905](https://github.com/apache/arrow-datafusion/pull/4905) ([alamb](https://github.com/alamb)) -- Add insert/update/delete to LogicalPlan and add SQL planner support [\#4902](https://github.com/apache/arrow-datafusion/pull/4902) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- fix: Visit subqueries in `Expr::Alias` [\#4900](https://github.com/apache/arrow-datafusion/pull/4900) ([askoa](https://github.com/askoa)) -- \[Substrait\] Change API to return LogicalPlan instead of DataFrame [\#4896](https://github.com/apache/arrow-datafusion/pull/4896) ([andygrove](https://github.com/andygrove)) -- Upgrade to substrait 0.3 [\#4895](https://github.com/apache/arrow-datafusion/pull/4895) ([andygrove](https://github.com/andygrove)) -- Add datafusion-substrait crate to workspace [\#4893](https://github.com/apache/arrow-datafusion/pull/4893) ([andygrove](https://github.com/andygrove)) -- refactor and add simple function to deserialize and serialize proto b… [\#4892](https://github.com/apache/arrow-datafusion/pull/4892) ([jdye64](https://github.com/jdye64)) -- Update `optimize_children` to return `Result>` [\#4888](https://github.com/apache/arrow-datafusion/pull/4888) ([HaoYang670](https://github.com/HaoYang670)) -- Do not repartition inputs whose sort order is required [\#4885](https://github.com/apache/arrow-datafusion/pull/4885) ([alamb](https://github.com/alamb)) -- Minor: Add docstrings to UnionExec [\#4884](https://github.com/apache/arrow-datafusion/pull/4884) ([alamb](https://github.com/alamb)) -- Update datafusion-substrait crate to build against repo version of DataFusion [\#4879](https://github.com/apache/arrow-datafusion/pull/4879) ([andygrove](https://github.com/andygrove)) -- Fix column indices in EnforceDistribution optimizer in Partial AggregateMode [\#4878](https://github.com/apache/arrow-datafusion/pull/4878) ([jonmmease](https://github.com/jonmmease)) -- refactor: improve repartition buffering [\#4867](https://github.com/apache/arrow-datafusion/pull/4867) ([crepererum](https://github.com/crepererum)) -- Rewrite coerce\_plan\_expr\_for\_schema to fix union type coercion [\#4862](https://github.com/apache/arrow-datafusion/pull/4862) ([ygf11](https://github.com/ygf11)) -- \(\#4462\) Postgres compatibility tests using sqllogictest [\#4834](https://github.com/apache/arrow-datafusion/pull/4834) ([melgenek](https://github.com/melgenek)) -- Support non-tuple expression for in-subquery to join [\#4826](https://github.com/apache/arrow-datafusion/pull/4826) ([ygf11](https://github.com/ygf11)) -- Update to arrow `30.0.1` [\#4818](https://github.com/apache/arrow-datafusion/pull/4818) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Refine the statistics estimation for the limit and aggregate operator [\#4716](https://github.com/apache/arrow-datafusion/pull/4716) ([yahoNanJing](https://github.com/yahoNanJing)) -- Infer prepared statement parameter types [\#4701](https://github.com/apache/arrow-datafusion/pull/4701) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Add datafusion-substrait crate [\#4543](https://github.com/apache/arrow-datafusion/pull/4543) ([andygrove](https://github.com/andygrove)) -- Refactor loser tree code in SortPreservingMerge per PR comments [\#4407](https://github.com/apache/arrow-datafusion/pull/4407) ([alamb](https://github.com/alamb)) - - -## [16.1.0](https://github.com/apache/arrow-datafusion/tree/16.1.0) (2023-01-19) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/16.1.0-rc1...16.0.0) - -**Merged pull requests:** - -- Fix column indices in EnforceDistribution optimizer in Partial AggregateMode \(\#4878\) [\#4959](https://github.com/apache/arrow-datafusion/pull/4959) -- Make it able to specify a session id for SessionState \(\#4933\) [\#4951](https://github.com/apache/arrow-datafusion/pull/4951) - -## [16.0.0](https://github.com/apache/arrow-datafusion/tree/16.0.0) (2023-01-12) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/16.0.0-rc1...16.0.0) - -**Breaking changes:** - -- Remove unused ExecutionPlan::relies\_input\_order \(has been replaced with `required_input_ordering`\) [\#4856](https://github.com/apache/arrow-datafusion/pull/4856) ([alamb](https://github.com/alamb)) -- Add DataFrame::into\_view instead of implementing TableProvider \(\#2659\) [\#4778](https://github.com/apache/arrow-datafusion/pull/4778) ([tustvold](https://github.com/tustvold)) - -**Implemented enhancements:** - -- Support custom window frame with AVG aggregate function [\#4845](https://github.com/apache/arrow-datafusion/issues/4845) -- add sqllogicaltest for tpch and remove some duplicated test. [\#4801](https://github.com/apache/arrow-datafusion/issues/4801) -- Catalog Snapshot Isolation [\#4697](https://github.com/apache/arrow-datafusion/issues/4697) -- Support `select .. FROM 'parquet.file'` in datafusion-cli [\#4580](https://github.com/apache/arrow-datafusion/issues/4580) - -**Fixed bugs:** - -- Regression: `write_csv` result has incorrect formatting [\#4876](https://github.com/apache/arrow-datafusion/issues/4876) -- Incorrect results for join condition against current master branch [\#4844](https://github.com/apache/arrow-datafusion/issues/4844) -- Match Postgres for stddev and variance on less than 3 values [\#4843](https://github.com/apache/arrow-datafusion/issues/4843) -- `JOIN ... USING (columns)` works incorrectly with multiple columns \(joined-over columns are missing in the output\) [\#4674](https://github.com/apache/arrow-datafusion/issues/4674) -- ROW\_NUMBER window function inconsistent across partitions in multi-threaded runtime [\#4673](https://github.com/apache/arrow-datafusion/issues/4673) -- `SELECT ... FROM (tbl1 UNION tbl2)` wrongly works like `SELECT DISTINCT ... FROM (tbl1 UNION tbl2)` [\#4667](https://github.com/apache/arrow-datafusion/issues/4667) -- DataFrame TableProvider Circular Reference [\#2659](https://github.com/apache/arrow-datafusion/issues/2659) - -**Documentation updates:** - -- Add Synnada to known uses [\#4857](https://github.com/apache/arrow-datafusion/pull/4857) ([ozankabak](https://github.com/ozankabak)) - -**Closed issues:** - -- Remove tests from `sql_integration` that were ported to `sqllogictest` [\#4498](https://github.com/apache/arrow-datafusion/issues/4498) -- How to register a http url to the `object_store` [\#4491](https://github.com/apache/arrow-datafusion/issues/4491) -- optimizer: support `unsigned <-> decimal` for unwrap\_cast\_in\_comparion rule [\#4287](https://github.com/apache/arrow-datafusion/issues/4287) -- Add SQL support for NATURAL JOIN [\#117](https://github.com/apache/arrow-datafusion/issues/117) -- \[Datafusion\] Datafusion queries involving a column name that begins with a number produces unexpected results [\#108](https://github.com/apache/arrow-datafusion/issues/108) - -**Merged pull requests:** - -- docs: improve `Column::normalize_with_schemas` docs [\#4871](https://github.com/apache/arrow-datafusion/pull/4871) ([crepererum](https://github.com/crepererum)) -- Skip EliminateCrossJoin rule when meet non-empty join filter [\#4869](https://github.com/apache/arrow-datafusion/pull/4869) ([ygf11](https://github.com/ygf11)) -- Support for SQL Natural Join [\#4863](https://github.com/apache/arrow-datafusion/pull/4863) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- Minor: Move test data into `datafusion/core/tests/data` [\#4855](https://github.com/apache/arrow-datafusion/pull/4855) ([alamb](https://github.com/alamb)) -- Covariance single row input & null skipping [\#4852](https://github.com/apache/arrow-datafusion/pull/4852) ([korowa](https://github.com/korowa)) -- Document ability to select directly from files in datafusion-cli [\#4851](https://github.com/apache/arrow-datafusion/pull/4851) ([alamb](https://github.com/alamb)) -- Fix push\_down\_projection through a distinct [\#4849](https://github.com/apache/arrow-datafusion/pull/4849) ([Jefffrey](https://github.com/Jefffrey)) -- Support using var/var\_pop/stddev/stddev\_pop in window expressions with custom frames [\#4848](https://github.com/apache/arrow-datafusion/pull/4848) ([jonmmease](https://github.com/jonmmease)) -- Update variance/stddev to work with single values [\#4847](https://github.com/apache/arrow-datafusion/pull/4847) ([jonmmease](https://github.com/jonmmease)) -- Implement retract\_batch for AvgAccumulator [\#4846](https://github.com/apache/arrow-datafusion/pull/4846) ([jonmmease](https://github.com/jonmmease)) -- Support wildcard select on multiple column using joins [\#4840](https://github.com/apache/arrow-datafusion/pull/4840) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- Orthogonalize distribution and sort enforcement rules into `EnforceDistribution` and `EnforceSorting` [\#4839](https://github.com/apache/arrow-datafusion/pull/4839) ([mustafasrepo](https://github.com/mustafasrepo)) -- support `select .. FROM 'parquet.file'` in datafusion-cli [\#4838](https://github.com/apache/arrow-datafusion/pull/4838) ([unconsolable](https://github.com/unconsolable)) -- Remove tests from sql\_integration that were ported to sqllogictest [\#4836](https://github.com/apache/arrow-datafusion/pull/4836) ([matthewwillian](https://github.com/matthewwillian)) -- add tpch sqllogicaltest and remove some duplicated test [\#4802](https://github.com/apache/arrow-datafusion/pull/4802) ([jackwener](https://github.com/jackwener)) - -## [16.0.0-rc1](https://github.com/apache/arrow-datafusion/tree/16.0.0-rc1) (2023-01-07) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/15.0.0...16.0.0-rc1) - -**Breaking changes:** - -- Enable PhysicalOptimizerRule lazily \(\#4806\) [\#4807](https://github.com/apache/arrow-datafusion/pull/4807) ([tustvold](https://github.com/tustvold)) -- Move ConfigOptions to core [\#4803](https://github.com/apache/arrow-datafusion/pull/4803) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- remove Operator::{Like,NotLike,ILike,NotILike} [\#4792](https://github.com/apache/arrow-datafusion/pull/4792) ([unconsolable](https://github.com/unconsolable)) -- Move subquery alias assignment onto rules [\#4767](https://github.com/apache/arrow-datafusion/pull/4767) ([tustvold](https://github.com/tustvold)) -- Make SessionState members private [\#4764](https://github.com/apache/arrow-datafusion/pull/4764) ([tustvold](https://github.com/tustvold)) -- Deprecate SessionContext physical plan methods \(\#4617\) [\#4751](https://github.com/apache/arrow-datafusion/pull/4751) ([tustvold](https://github.com/tustvold)) -- Decouple physical optimizer from SessionConfig \(\#3887\) [\#4749](https://github.com/apache/arrow-datafusion/pull/4749) ([tustvold](https://github.com/tustvold)) -- Don't share ConfigOptions \(\#3886\) [\#4712](https://github.com/apache/arrow-datafusion/pull/4712) ([tustvold](https://github.com/tustvold)) -- Push SessionState into FileFormat \(\#4349\) [\#4699](https://github.com/apache/arrow-datafusion/pull/4699) ([tustvold](https://github.com/tustvold)) -- Make SessionContext members private [\#4698](https://github.com/apache/arrow-datafusion/pull/4698) ([tustvold](https://github.com/tustvold)) -- Make OptimizerConfig a trait \(\#4631\) \(\#4638\) [\#4645](https://github.com/apache/arrow-datafusion/pull/4645) ([tustvold](https://github.com/tustvold)) -- DataFrame owned SessionState [\#4633](https://github.com/apache/arrow-datafusion/pull/4633) ([tustvold](https://github.com/tustvold)) -- Make LogicalPlanBuilder consuming \(\#4622\) [\#4632](https://github.com/apache/arrow-datafusion/pull/4632) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Make DataFrame API consuming \(\#4621\) [\#4624](https://github.com/apache/arrow-datafusion/pull/4624) ([tustvold](https://github.com/tustvold)) -- Make execute\_stream functions sync [\#4608](https://github.com/apache/arrow-datafusion/pull/4608) ([tustvold](https://github.com/tustvold)) -- Remove ObjectStore from FileStream \(\#4533\) [\#4601](https://github.com/apache/arrow-datafusion/pull/4601) ([tustvold](https://github.com/tustvold)) -- Remove `AggregateState` wrapper [\#4582](https://github.com/apache/arrow-datafusion/pull/4582) ([alamb](https://github.com/alamb)) -- Fix querying and defining table / view names with period [\#4530](https://github.com/apache/arrow-datafusion/pull/4530) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- refactor code about `subquery_alias` and `expr-alias`. [\#4451](https://github.com/apache/arrow-datafusion/pull/4451) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) - -**Implemented enhancements:** - -- Move the ExtractEquijoinPredicate behind the SubqueryFilterToJoin [\#4759](https://github.com/apache/arrow-datafusion/issues/4759) -- Remove the config `datafusion.execution.coalesce_target_batch_size` [\#4756](https://github.com/apache/arrow-datafusion/issues/4756) -- SimplifyExpressions will fail when rebuild equijoin with alias [\#4754](https://github.com/apache/arrow-datafusion/issues/4754) -- Provide a constructor for the ConfigOptions with HashMap\ [\#4752](https://github.com/apache/arrow-datafusion/issues/4752) -- Non-deprecated support for planning SQL without DDL [\#4720](https://github.com/apache/arrow-datafusion/issues/4720) -- Add regression tests for planning TPC-DS queries [\#4718](https://github.com/apache/arrow-datafusion/issues/4718) -- Move the extracting join keys logic to optimizer [\#4710](https://github.com/apache/arrow-datafusion/issues/4710) -- Support compression in `IPCWriter` [\#4708](https://github.com/apache/arrow-datafusion/issues/4708) -- Support prepared statement parameter type inference [\#4700](https://github.com/apache/arrow-datafusion/issues/4700) -- PruningPredicate Use Physical not Logical Predicate [\#4695](https://github.com/apache/arrow-datafusion/issues/4695) -- Support for executing infinite files [\#4692](https://github.com/apache/arrow-datafusion/issues/4692) -- Add a sort rule to remove unnecessary SortExecs from physical plan [\#4686](https://github.com/apache/arrow-datafusion/issues/4686) -- Install `protoc` automatically when building `datafusion/proto` crate [\#4684](https://github.com/apache/arrow-datafusion/issues/4684) -- Make DfSchema wrap SchemaRef [\#4680](https://github.com/apache/arrow-datafusion/issues/4680) -- Reorder the physical plan optimizer rules [\#4678](https://github.com/apache/arrow-datafusion/issues/4678) -- Inconsistent behavior with PostgreSQL to decide Window Expressions ordering [\#4641](https://github.com/apache/arrow-datafusion/issues/4641) -- Returns error too late when parsing invalid file compression type. [\#4636](https://github.com/apache/arrow-datafusion/issues/4636) -- Make OptimizerConfig a Trait [\#4631](https://github.com/apache/arrow-datafusion/issues/4631) -- Move Optimize onto DataFrame [\#4626](https://github.com/apache/arrow-datafusion/issues/4626) -- Make LogicalPlanBuilder Consuming [\#4622](https://github.com/apache/arrow-datafusion/issues/4622) -- Make DataFrame Consuming [\#4621](https://github.com/apache/arrow-datafusion/issues/4621) -- rules don't need to recursion inside themselves [\#4613](https://github.com/apache/arrow-datafusion/issues/4613) -- \[window function\] support min max with self define sliding window. [\#4603](https://github.com/apache/arrow-datafusion/issues/4603) -- Add `try_optimize` for all\_rules [\#4598](https://github.com/apache/arrow-datafusion/issues/4598) -- Refine the physical plan serialization and deserialization [\#4597](https://github.com/apache/arrow-datafusion/issues/4597) -- Normalize datafusion configuration names [\#4595](https://github.com/apache/arrow-datafusion/issues/4595) -- Add need\_data\_exchange in the ExecutionPlan to indicate whether a physical operator needs data exchange [\#4585](https://github.com/apache/arrow-datafusion/issues/4585) -- Bump Datafusion sql-parser dependency to 0.28 [\#4573](https://github.com/apache/arrow-datafusion/issues/4573) -- tpch test exist duplicated [\#4563](https://github.com/apache/arrow-datafusion/issues/4563) -- user-defined aggregate function as window function [\#4552](https://github.com/apache/arrow-datafusion/issues/4552) -- Convert a Prepare Logical Plan into a Logical Plan with all parameters replaced with values [\#4550](https://github.com/apache/arrow-datafusion/issues/4550) -- FileStream requires fake ObjectStore when ParquetFileReaderFactory is used [\#4533](https://github.com/apache/arrow-datafusion/issues/4533) -- Avoid reading the entire file in ChunkedStore [\#4524](https://github.com/apache/arrow-datafusion/issues/4524) -- Enrich filter statistics predictions with estimated column boundaries [\#4518](https://github.com/apache/arrow-datafusion/issues/4518) -- Show window frame info in physical plan [\#4509](https://github.com/apache/arrow-datafusion/issues/4509) -- Add sqllogictest auto labeler [\#4507](https://github.com/apache/arrow-datafusion/issues/4507) -- Optimize `is_distinct_from` / `is_not_distinct_from` [\#4482](https://github.com/apache/arrow-datafusion/issues/4482) -- Add window func related logic plan to proto ability. [\#4480](https://github.com/apache/arrow-datafusion/issues/4480) -- Make window function related struct public. [\#4479](https://github.com/apache/arrow-datafusion/issues/4479) -- Improve partition file explain plan display to show groupings [\#4466](https://github.com/apache/arrow-datafusion/issues/4466) -- Add support for non-column key for equijoin when eliminating cross join to inner join [\#4442](https://github.com/apache/arrow-datafusion/issues/4442) -- Remove the schema checking from `CrossJoinExec::try_new` [\#4431](https://github.com/apache/arrow-datafusion/issues/4431) -- Initial support for prepared statement [\#4426](https://github.com/apache/arrow-datafusion/issues/4426) -- Add support for NTILE built-in Window Function [\#4403](https://github.com/apache/arrow-datafusion/issues/4403) -- Add Support for MIN, MAX Aggregate Functions when run with custom window frames [\#4402](https://github.com/apache/arrow-datafusion/issues/4402) -- Support `INSERT INTO` statement [\#4397](https://github.com/apache/arrow-datafusion/issues/4397) -- Enhancement: split the SQL `planner` into smaller modules [\#4392](https://github.com/apache/arrow-datafusion/issues/4392) -- Proposal: Improve the join keys of logical plan [\#4389](https://github.com/apache/arrow-datafusion/issues/4389) -- Add `MergeSubqueryAlias` rule [\#4383](https://github.com/apache/arrow-datafusion/issues/4383) -- Optimizer rule support `subqueryAlias` [\#4381](https://github.com/apache/arrow-datafusion/issues/4381) -- Rewrite simple regex expressions [\#4370](https://github.com/apache/arrow-datafusion/issues/4370) -- Revisit get\_statistics\_with\_limit\(\) method in datasource mod [\#4323](https://github.com/apache/arrow-datafusion/issues/4323) -- Support for type coercion for a \(Timestamp, Utf8\) pair [\#4311](https://github.com/apache/arrow-datafusion/issues/4311) -- replace the operation about decimal to the arrow-rs kernel [\#4289](https://github.com/apache/arrow-datafusion/issues/4289) -- change` date_part` return types to f64 [\#3997](https://github.com/apache/arrow-datafusion/issues/3997) -- Better api for setting `ConfigOptions` from SessionContext [\#3908](https://github.com/apache/arrow-datafusion/issues/3908) -- Make `ConfigOptions` easier to work with [\#3886](https://github.com/apache/arrow-datafusion/issues/3886) -- An asynchronous version of `CatalogList`/`CatalogProvider`/`SchemaProvider` [\#3777](https://github.com/apache/arrow-datafusion/issues/3777) -- Allow configs to be set with string values [\#3500](https://github.com/apache/arrow-datafusion/issues/3500) -- support scientific notation for SQL literals [\#3448](https://github.com/apache/arrow-datafusion/issues/3448) -- Adopt physical plan serde from arrow-ballista [\#3257](https://github.com/apache/arrow-datafusion/issues/3257) -- Improve codebase readability and error messages by and consistently handle downcasting [\#3152](https://github.com/apache/arrow-datafusion/issues/3152) -- Re-enable where\_clauses\_object\_safety [\#3081](https://github.com/apache/arrow-datafusion/issues/3081) -- optimize/simplify the literal data type and remove unnecessary cast、try\_cast [\#3031](https://github.com/apache/arrow-datafusion/issues/3031) -- Move `datafusion-substrait` crate into `arrow-datafusion` repo [\#2646](https://github.com/apache/arrow-datafusion/issues/2646) -- \[enhancement\] rules don't need to recursion inside themselves [\#2620](https://github.com/apache/arrow-datafusion/issues/2620) -- Add support for `GROUPING SETS` syntax in SQL planner [\#2469](https://github.com/apache/arrow-datafusion/issues/2469) -- Optimize EXISTS subquery expressions by rewriting as semi-join [\#2351](https://github.com/apache/arrow-datafusion/issues/2351) -- Add Delta Lake TableProvider [\#525](https://github.com/apache/arrow-datafusion/issues/525) -- Support window functions with window frame [\#361](https://github.com/apache/arrow-datafusion/issues/361) - -**Fixed bugs:** - -- PushdownFilter rule exist bug will cause filter change wrong [\#4822](https://github.com/apache/arrow-datafusion/issues/4822) -- Unlimited memory consumption in `RepartitionExec` [\#4816](https://github.com/apache/arrow-datafusion/issues/4816) -- Physical Optimizer Config Mutation Doesn't Take Effect [\#4806](https://github.com/apache/arrow-datafusion/issues/4806) -- cargo test failed `error: linking with `cc` failed: exit status: 1` [\#4790](https://github.com/apache/arrow-datafusion/issues/4790) -- Parquet files generated by DataFusion cannot be read by Apache Spark [\#4782](https://github.com/apache/arrow-datafusion/issues/4782) -- datafusion-physical-expr doesn't compile when blake3/traits-preview is enabled [\#4781](https://github.com/apache/arrow-datafusion/issues/4781) -- Multiple ways to express `like` / `ilike` / `not like` / `not ilike` [\#4765](https://github.com/apache/arrow-datafusion/issues/4765) -- SessionState::optimize and SessionState::create\_physical\_plan Don't Update Query Start Time [\#4747](https://github.com/apache/arrow-datafusion/issues/4747) -- Page Filtering Incorrectly Handles Pages with Different Row Counts [\#4744](https://github.com/apache/arrow-datafusion/issues/4744) -- cargo test failing on master due to tpcds\_logical\_q41 stackoverflow [\#4728](https://github.com/apache/arrow-datafusion/issues/4728) -- PruningPredicate Different Evaluation Context from Query [\#4693](https://github.com/apache/arrow-datafusion/issues/4693) -- Skipping optimizer rule due to create\_name not supporting wildcard [\#4681](https://github.com/apache/arrow-datafusion/issues/4681) -- Create physical plan bug: got Arrow schema with 1 and DataFusion schema with 0 [\#4677](https://github.com/apache/arrow-datafusion/issues/4677) -- Timestamp \<-\> Date32 compare doesn't work [\#4672](https://github.com/apache/arrow-datafusion/issues/4672) -- Wrongly use the function `clamp` [\#4654](https://github.com/apache/arrow-datafusion/issues/4654) -- Fix the clippy errors [\#4653](https://github.com/apache/arrow-datafusion/issues/4653) -- Filter Null Keys Update Not Taking Effect [\#4638](https://github.com/apache/arrow-datafusion/issues/4638) -- Should not generate duplicate sort keys from Window expr's partition by keys [\#4635](https://github.com/apache/arrow-datafusion/issues/4635) -- `common_sub_expression_eliminate` exists bug [\#4575](https://github.com/apache/arrow-datafusion/issues/4575) -- Confusing "Bare" in doesn't exist messages [\#4571](https://github.com/apache/arrow-datafusion/issues/4571) -- `having` shouldn't include alias in projection [\#4556](https://github.com/apache/arrow-datafusion/issues/4556) -- wrong comment about having [\#4554](https://github.com/apache/arrow-datafusion/issues/4554) -- `drop view t1, t2, ...` and `drop table t1, t2, ...` silently ignores arguments past the first [\#4531](https://github.com/apache/arrow-datafusion/issues/4531) -- Extract from timestamp doesn't support nanosecond [\#4528](https://github.com/apache/arrow-datafusion/issues/4528) -- `prepare_select_exprs` don't need `outer_query_schema` [\#4526](https://github.com/apache/arrow-datafusion/issues/4526) -- Table names with periods are not handled correctly [\#4513](https://github.com/apache/arrow-datafusion/issues/4513) -- `Push_down_projection` push redundant column. [\#4486](https://github.com/apache/arrow-datafusion/issues/4486) -- Planner don't generate `SubqueryAlias` [\#4483](https://github.com/apache/arrow-datafusion/issues/4483) -- Planner generate replicated `Projection` | `SubqueryAlias` [\#4481](https://github.com/apache/arrow-datafusion/issues/4481) -- `apply_table_alias` will ignore alias\_name when columns is empty. [\#4454](https://github.com/apache/arrow-datafusion/issues/4454) -- Fix output\_ordering of WindowAggExec [\#4438](https://github.com/apache/arrow-datafusion/issues/4438) -- Incorrect error for plus/minus operations over timestamps and dates [\#4420](https://github.com/apache/arrow-datafusion/issues/4420) -- Optimization rule `filter_push_down` causes `FieldNotFound` error [\#4401](https://github.com/apache/arrow-datafusion/issues/4401) -- Should not convert a normal non-inner join to Cross Join when there are non-equal Join conditions [\#4363](https://github.com/apache/arrow-datafusion/issues/4363) -- MemoryConsumer::try\_grow Underflow [\#4328](https://github.com/apache/arrow-datafusion/issues/4328) -- Potential MemoryManager Deadlock [\#4325](https://github.com/apache/arrow-datafusion/issues/4325) -- `create external table` should fail to parse if syntax is incorrect [\#4262](https://github.com/apache/arrow-datafusion/issues/4262) -- Nullif func states support for Boolean type, but fails if this is attempted [\#4205](https://github.com/apache/arrow-datafusion/issues/4205) -- `ProjectionPushDown` rule don't consider the alias in projection. [\#4174](https://github.com/apache/arrow-datafusion/issues/4174) -- Stack overflow planning complex query [\#4065](https://github.com/apache/arrow-datafusion/issues/4065) -- Can not use `extract ` on the value of `now()` [\#3980](https://github.com/apache/arrow-datafusion/issues/3980) -- Bug with intervals and logical and/or [\#3944](https://github.com/apache/arrow-datafusion/issues/3944) -- CoalesceBatches doesn't provide correct elapsed\_compute info in metrics [\#3894](https://github.com/apache/arrow-datafusion/issues/3894) -- Paniced at to\_timestamp\_micros function when the timestamp is too large. [\#3832](https://github.com/apache/arrow-datafusion/issues/3832) -- Optimizer casts decimals to different values on different platforms [\#3791](https://github.com/apache/arrow-datafusion/issues/3791) -- CSV inference reads in the whole file to memory, regardless of row limit [\#3658](https://github.com/apache/arrow-datafusion/issues/3658) -- after type coercion `CommonSubexprEliminate` will produce invalid projection [\#3635](https://github.com/apache/arrow-datafusion/issues/3635) -- panic at `attempt to multiply with overflow` when doing math on Decimal128 columns [\#3437](https://github.com/apache/arrow-datafusion/issues/3437) -- Precedence bug with date comparison to date plus interval [\#3408](https://github.com/apache/arrow-datafusion/issues/3408) -- Median aggregation using DataFrame panics: "AggregateState is not a scalar aggregate" [\#3105](https://github.com/apache/arrow-datafusion/issues/3105) -- `date_part` does't work for `now()` [\#3096](https://github.com/apache/arrow-datafusion/issues/3096) -- hash\_join panics when join keys have different data types [\#2877](https://github.com/apache/arrow-datafusion/issues/2877) -- Memory manager triggers unnecessary spills [\#2829](https://github.com/apache/arrow-datafusion/issues/2829) -- Address performance/execution plan of TPCH query 9 [\#77](https://github.com/apache/arrow-datafusion/issues/77) - -**Documentation updates:** - -- Add a new open source project that is use DataFusion as query engine [\#4768](https://github.com/apache/arrow-datafusion/pull/4768) ([francis-du](https://github.com/francis-du)) - -**Closed issues:** - -- move the tests in planner [\#4798](https://github.com/apache/arrow-datafusion/issues/4798) -- Make it easier to update sqltestlogic test expected output \("test script completion mode"\) [\#4570](https://github.com/apache/arrow-datafusion/issues/4570) -- Make ConfigOption names into an Enum [\#4517](https://github.com/apache/arrow-datafusion/issues/4517) -- Implement null / empty string handling for sqllogictest [\#4500](https://github.com/apache/arrow-datafusion/issues/4500) -- Write a blog about parquet predicate pushdown [\#3464](https://github.com/apache/arrow-datafusion/issues/3464) -- Ensure column names are equivalent with or without optimization [\#1123](https://github.com/apache/arrow-datafusion/issues/1123) - -**Merged pull requests:** - -- Bump tokio from 1.23.0 to 1.23.1 in /datafusion-cli [\#4835](https://github.com/apache/arrow-datafusion/pull/4835) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Fix a few links in `roadmap.md` [\#4833](https://github.com/apache/arrow-datafusion/pull/4833) ([romanz](https://github.com/romanz)) -- DataFusion 16.0.0 release prep: Update version + add changelog [\#4831](https://github.com/apache/arrow-datafusion/pull/4831) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- feat: use arrow row format for hash-group-by [\#4830](https://github.com/apache/arrow-datafusion/pull/4830) ([crepererum](https://github.com/crepererum)) -- refactor: split relation of planner into one part. [\#4829](https://github.com/apache/arrow-datafusion/pull/4829) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- bugfix: remove cnf\_rewrite in push\_down\_filter [\#4825](https://github.com/apache/arrow-datafusion/pull/4825) ([jackwener](https://github.com/jackwener)) -- minor: add some comments to row group pruning tests [\#4823](https://github.com/apache/arrow-datafusion/pull/4823) ([alamb](https://github.com/alamb)) -- Handle trailing tbl column in TPCH benchmarks [\#4821](https://github.com/apache/arrow-datafusion/pull/4821) ([tustvold](https://github.com/tustvold)) -- fix: account for memory in `RepartitionExec` [\#4820](https://github.com/apache/arrow-datafusion/pull/4820) ([crepererum](https://github.com/crepererum)) -- Fix clippy [\#4817](https://github.com/apache/arrow-datafusion/pull/4817) ([tustvold](https://github.com/tustvold)) -- Add test cases: row group filter with missing statistics for decimal data type [\#4810](https://github.com/apache/arrow-datafusion/pull/4810) ([liukun4515](https://github.com/liukun4515)) -- Move default catalog and schema onto ConfigOptions \(\#3887\) [\#4805](https://github.com/apache/arrow-datafusion/pull/4805) ([tustvold](https://github.com/tustvold)) -- remove duplicated test [\#4800](https://github.com/apache/arrow-datafusion/pull/4800) ([jackwener](https://github.com/jackwener)) -- Update sqlparser requirement from 0.29 to 0.30 [\#4799](https://github.com/apache/arrow-datafusion/pull/4799) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([dependabot[bot]](https://github.com/apps/dependabot)) -- rewrite the function `ensure_any_column_reference_is_unambiguous` [\#4797](https://github.com/apache/arrow-datafusion/pull/4797) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Uncomment nanoseconds tests after sql parser upgrade [\#4789](https://github.com/apache/arrow-datafusion/pull/4789) ([comphead](https://github.com/comphead)) -- fix: ListingSchemaProvider directory paths \(related: \#4204\) [\#4788](https://github.com/apache/arrow-datafusion/pull/4788) ([cfraz89](https://github.com/cfraz89)) -- Minimize stack space required to plan deeply nested binary expressions [\#4787](https://github.com/apache/arrow-datafusion/pull/4787) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Minor: Refactor some sql planning code into functions [\#4785](https://github.com/apache/arrow-datafusion/pull/4785) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Make datafusion-physical-expr compatible with blake3/traits-preview feature. [\#4784](https://github.com/apache/arrow-datafusion/pull/4784) ([BoredPerson](https://github.com/BoredPerson)) -- refactor: split expression pf planner into one part. [\#4783](https://github.com/apache/arrow-datafusion/pull/4783) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Fix Stack overflow in sql planning in debug builds [\#4779](https://github.com/apache/arrow-datafusion/pull/4779) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Pipeline-friendly Bounded Memory Window Executor [\#4777](https://github.com/apache/arrow-datafusion/pull/4777) ([mustafasrepo](https://github.com/mustafasrepo)) -- Implement OptimizerConfig for SessionState [\#4775](https://github.com/apache/arrow-datafusion/pull/4775) ([tustvold](https://github.com/tustvold)) -- refactor: extract `parse_value` [\#4774](https://github.com/apache/arrow-datafusion/pull/4774) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Structify ConfigOptions \(\#4517\) [\#4771](https://github.com/apache/arrow-datafusion/pull/4771) ([tustvold](https://github.com/tustvold)) -- Update sqlparser to `29.0.0` [\#4770](https://github.com/apache/arrow-datafusion/pull/4770) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Refactor extract\_join\_keys and move the ExtractEquijoinPredicate rule [\#4760](https://github.com/apache/arrow-datafusion/pull/4760) ([ygf11](https://github.com/ygf11)) -- Remove the config datafusion.execution.coalesce\_target\_batch\_size and use datafusion.execution.batch\_size instead [\#4757](https://github.com/apache/arrow-datafusion/pull/4757) ([yahoNanJing](https://github.com/yahoNanJing)) -- Add alias check for equijoin in from\_plan [\#4755](https://github.com/apache/arrow-datafusion/pull/4755) ([ygf11](https://github.com/ygf11)) -- Take the top level `schema` into account when creating `UnionExec` [\#4753](https://github.com/apache/arrow-datafusion/pull/4753) ([HaoYang670](https://github.com/HaoYang670)) -- Set query\_execution\_start\_time on snapshot from SessionContext \(\#4747\) [\#4750](https://github.com/apache/arrow-datafusion/pull/4750) ([tustvold](https://github.com/tustvold)) -- minor: Improve docstrings [\#4748](https://github.com/apache/arrow-datafusion/pull/4748) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Append generated column to the schema instead of prepending for WindowAggExec [\#4746](https://github.com/apache/arrow-datafusion/pull/4746) ([mustafasrepo](https://github.com/mustafasrepo)) -- Minor: comments about coercion in physical planner [\#4745](https://github.com/apache/arrow-datafusion/pull/4745) ([alamb](https://github.com/alamb)) -- Simplify parquet filter predicate test, fix Page Filtering Incorrectly Handles Pages with Different Row Counts [\#4743](https://github.com/apache/arrow-datafusion/pull/4743) ([tustvold](https://github.com/tustvold)) -- support byte array for decimal in parquet page and row group filters [\#4742](https://github.com/apache/arrow-datafusion/pull/4742) ([liukun4515](https://github.com/liukun4515)) -- revert some code for \#4726 / remove unnecessary coercion in physical plans [\#4741](https://github.com/apache/arrow-datafusion/pull/4741) ([liukun4515](https://github.com/liukun4515)) -- Cleanup InformationSchema plumbing [\#4740](https://github.com/apache/arrow-datafusion/pull/4740) ([tustvold](https://github.com/tustvold)) -- Minor: use a common method to check the validate of equijoin predicate [\#4739](https://github.com/apache/arrow-datafusion/pull/4739) ([ygf11](https://github.com/ygf11)) -- minor: Support more data type for `null_counts` in the `PruningStatistics` [\#4738](https://github.com/apache/arrow-datafusion/pull/4738) ([liukun4515](https://github.com/liukun4515)) -- Extended datatypes & signatures support for `NULLIF` function [\#4737](https://github.com/apache/arrow-datafusion/pull/4737) ([korowa](https://github.com/korowa)) -- minor: improve debug logging for pruning predicates [\#4736](https://github.com/apache/arrow-datafusion/pull/4736) ([alamb](https://github.com/alamb)) -- refactor: parallelize `parquet_exec` test case `single_file` [\#4735](https://github.com/apache/arrow-datafusion/pull/4735) ([waynexia](https://github.com/waynexia)) -- fix: add one more projection to recover output schema [\#4733](https://github.com/apache/arrow-datafusion/pull/4733) ([waynexia](https://github.com/waynexia)) -- remove `SubqueryFilterToJoin` [\#4731](https://github.com/apache/arrow-datafusion/pull/4731) ([jackwener](https://github.com/jackwener)) -- Create writer with `arrow::ipc::IPCWriteOptions` [\#4730](https://github.com/apache/arrow-datafusion/pull/4730) ([askoa](https://github.com/askoa)) -- Implement cast between Date and Timestamp [\#4726](https://github.com/apache/arrow-datafusion/pull/4726) ([comphead](https://github.com/comphead)) -- Dynamic information\_schema configuration and port more tests [\#4722](https://github.com/apache/arrow-datafusion/pull/4722) ([alamb](https://github.com/alamb)) -- Add TPC-DS query planning regression tests [\#4719](https://github.com/apache/arrow-datafusion/pull/4719) ([andygrove](https://github.com/andygrove)) -- Minor: refactor streaming CSV inference code [\#4717](https://github.com/apache/arrow-datafusion/pull/4717) ([alamb](https://github.com/alamb)) -- Reorder the physical plan optimizer rules, extract `GlobalSortSelection`, make `Repartition` optional [\#4714](https://github.com/apache/arrow-datafusion/pull/4714) ([yahoNanJing](https://github.com/yahoNanJing)) -- Eagerly construct PagePruningPredicate [\#4713](https://github.com/apache/arrow-datafusion/pull/4713) ([tustvold](https://github.com/tustvold)) -- Move the extract\_join\_keys to optimizer [\#4711](https://github.com/apache/arrow-datafusion/pull/4711) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- Avoid to bypass `try_new/new()` to build plan directly and cleanup filter [\#4702](https://github.com/apache/arrow-datafusion/pull/4702) ([jackwener](https://github.com/jackwener)) -- MINOR: Remove where\_clause\_object\_safety clippy ignore \(\#3081\) [\#4696](https://github.com/apache/arrow-datafusion/pull/4696) ([tustvold](https://github.com/tustvold)) -- Support for executing infinite files and boundedness-aware join reordering rule [\#4694](https://github.com/apache/arrow-datafusion/pull/4694) ([metesynnada](https://github.com/metesynnada)) -- Unnecessary SortExec removal rule from Physical Plan [\#4691](https://github.com/apache/arrow-datafusion/pull/4691) ([mustafasrepo](https://github.com/mustafasrepo)) -- minor: rename the github actions [\#4689](https://github.com/apache/arrow-datafusion/pull/4689) ([jackwener](https://github.com/jackwener)) -- FOLLOWUP: remove more recursion in optimizer rules. [\#4687](https://github.com/apache/arrow-datafusion/pull/4687) ([jackwener](https://github.com/jackwener)) -- Add line that prevents display\_name from being called on Wildcard [\#4682](https://github.com/apache/arrow-datafusion/pull/4682) ([andre-cc-natzka](https://github.com/andre-cc-natzka)) -- Deprecate SessionContext::create\_logical\_plan \(\#4617\) [\#4679](https://github.com/apache/arrow-datafusion/pull/4679) ([tustvold](https://github.com/tustvold)) -- Support `NTILE` window function [\#4676](https://github.com/apache/arrow-datafusion/pull/4676) ([berkaycpp](https://github.com/berkaycpp)) -- Support min max aggregates in window functions with sliding windows [\#4675](https://github.com/apache/arrow-datafusion/pull/4675) ([berkaycpp](https://github.com/berkaycpp)) -- Refactor Expr::AggregateFunction and Expr::WindowFunction to use struct [\#4671](https://github.com/apache/arrow-datafusion/pull/4671) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- Support type coercion for equijoin [\#4666](https://github.com/apache/arrow-datafusion/pull/4666) ([ygf11](https://github.com/ygf11)) -- Add `--complete` auto completion mode to `sqllogictests` [\#4665](https://github.com/apache/arrow-datafusion/pull/4665) ([alamb](https://github.com/alamb)) -- Fix CoalesceBatches elasped\_compute metric [\#4664](https://github.com/apache/arrow-datafusion/pull/4664) ([Jefffrey](https://github.com/Jefffrey)) -- Refactor Expr::Sort to use struct [\#4663](https://github.com/apache/arrow-datafusion/pull/4663) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) -- More descriptive error for plus/minus between timestamps/dates [\#4662](https://github.com/apache/arrow-datafusion/pull/4662) ([Jefffrey](https://github.com/Jefffrey)) -- Stream CSV file during schema inference [\#4661](https://github.com/apache/arrow-datafusion/pull/4661) ([Jefffrey](https://github.com/Jefffrey)) -- Refine the logical and physical plan serialization and deserialization [\#4659](https://github.com/apache/arrow-datafusion/pull/4659) ([yahoNanJing](https://github.com/yahoNanJing)) -- Use thiserror in sqllogictest erorr [\#4657](https://github.com/apache/arrow-datafusion/pull/4657) ([xudong963](https://github.com/xudong963)) -- fix `cargo clippy` warning [\#4652](https://github.com/apache/arrow-datafusion/pull/4652) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Improve group by hash performance: avoid group-key/-state clones for hash-groupby [\#4651](https://github.com/apache/arrow-datafusion/pull/4651) ([crepererum](https://github.com/crepererum)) -- remove recursion in optimizer rules [\#4650](https://github.com/apache/arrow-datafusion/pull/4650) ([jackwener](https://github.com/jackwener)) -- replace the arithmetic op for decimal array op decimal array using arrow kernel [\#4648](https://github.com/apache/arrow-datafusion/pull/4648) ([liukun4515](https://github.com/liukun4515)) -- simplify regex expressions [\#4646](https://github.com/apache/arrow-datafusion/pull/4646) ([crepererum](https://github.com/crepererum)) -- Avoid generate duplicate sort Keys from Window Expressions, fix bug when decide Window Expressions ordering [\#4643](https://github.com/apache/arrow-datafusion/pull/4643) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mingmwang](https://github.com/mingmwang)) -- Refactor Expr::TryCast to use a struct [\#4642](https://github.com/apache/arrow-datafusion/pull/4642) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- add `ILIKE` support [\#4639](https://github.com/apache/arrow-datafusion/pull/4639) ([crepererum](https://github.com/crepererum)) -- Detect invalid \(unsupported\) compression types when parsing [\#4637](https://github.com/apache/arrow-datafusion/pull/4637) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- unwrap\_cast\_in\_comparison.rs: support unint \<-\> decimal [\#4634](https://github.com/apache/arrow-datafusion/pull/4634) ([liukun4515](https://github.com/liukun4515)) -- MINOR: Fix incorrect config definitions [\#4623](https://github.com/apache/arrow-datafusion/pull/4623) ([andygrove](https://github.com/andygrove)) -- FOLLOWUP: remove `optimize()` [\#4619](https://github.com/apache/arrow-datafusion/pull/4619) ([jackwener](https://github.com/jackwener)) -- Optimizer: avoid every rule must recursive children in optimizer [\#4618](https://github.com/apache/arrow-datafusion/pull/4618) ([jackwener](https://github.com/jackwener)) -- fix: run logical optimizer rules for `TableScan` expressions [\#4614](https://github.com/apache/arrow-datafusion/pull/4614) ([crepererum](https://github.com/crepererum)) -- refactor: relax the signature of register\_\* in SessionContext [\#4612](https://github.com/apache/arrow-datafusion/pull/4612) ([waynexia](https://github.com/waynexia)) -- Remove the function `consume_token` from the parser [\#4609](https://github.com/apache/arrow-datafusion/pull/4609) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Make SchemaProvider::table async [\#4607](https://github.com/apache/arrow-datafusion/pull/4607) ([tustvold](https://github.com/tustvold)) -- Lazy system tables [\#4606](https://github.com/apache/arrow-datafusion/pull/4606) ([tustvold](https://github.com/tustvold)) -- Refactor: Change equijoin keys from column to expression in logical join [\#4602](https://github.com/apache/arrow-datafusion/pull/4602) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- refactor: extract `assert_optimized_plan_eq` from UT. [\#4600](https://github.com/apache/arrow-datafusion/pull/4600) ([jackwener](https://github.com/jackwener)) -- add `try_optimize()` for all rules. [\#4599](https://github.com/apache/arrow-datafusion/pull/4599) ([jackwener](https://github.com/jackwener)) -- Normalize datafusion configuration names [\#4596](https://github.com/apache/arrow-datafusion/pull/4596) ([yahoNanJing](https://github.com/yahoNanJing)) -- Fix the bugs in parsing `COMPRESSION TYPE` [\#4590](https://github.com/apache/arrow-datafusion/pull/4590) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Minor: Remove datafusion-core dev dependency from datafusion-sql [\#4589](https://github.com/apache/arrow-datafusion/pull/4589) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Improve error handling for array downcasting [\#4588](https://github.com/apache/arrow-datafusion/pull/4588) ([retikulum](https://github.com/retikulum)) -- Update to arrow v29 [\#4587](https://github.com/apache/arrow-datafusion/pull/4587) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Add need\_data\_exchange in the ExecutionPlan to indicate whether a physical operator needs data exchange [\#4586](https://github.com/apache/arrow-datafusion/pull/4586) ([yahoNanJing](https://github.com/yahoNanJing)) -- Move subset of select tests to sqllogic [\#4583](https://github.com/apache/arrow-datafusion/pull/4583) ([ajayaa](https://github.com/ajayaa)) -- bugfix: just allow having use expr in `groupby` or `aggr` [\#4579](https://github.com/apache/arrow-datafusion/pull/4579) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Output sqllogictests with arrow display rather than CSV writer [\#4578](https://github.com/apache/arrow-datafusion/pull/4578) ([alamb](https://github.com/alamb)) -- Minor: Add test case for reduce cross join [\#4577](https://github.com/apache/arrow-datafusion/pull/4577) ([ygf11](https://github.com/ygf11)) -- refactor: remove redundant `outer_query_schema` [\#4576](https://github.com/apache/arrow-datafusion/pull/4576) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Preserve the TryCast expression in columnize\_expr [\#4574](https://github.com/apache/arrow-datafusion/pull/4574) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([byteink](https://github.com/byteink)) -- Remove Confusing "Bare" in does not exist messages [\#4572](https://github.com/apache/arrow-datafusion/pull/4572) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Minor: Add tests for date interval predicate handling [\#4569](https://github.com/apache/arrow-datafusion/pull/4569) ([alamb](https://github.com/alamb)) -- Update sqlparser requirement from 0.27 to 0.28 [\#4568](https://github.com/apache/arrow-datafusion/pull/4568) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Avoid materializing local varaibles when creating `sortMergeJoinExec` [\#4566](https://github.com/apache/arrow-datafusion/pull/4566) ([HaoYang670](https://github.com/HaoYang670)) -- Minor: Fix logical conflict [\#4565](https://github.com/apache/arrow-datafusion/pull/4565) ([alamb](https://github.com/alamb)) -- feat: support nested loop join with the initial version [\#4562](https://github.com/apache/arrow-datafusion/pull/4562) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- feat: prepare logical plan to logical plan without params/placeholders [\#4561](https://github.com/apache/arrow-datafusion/pull/4561) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([NGA-TRAN](https://github.com/NGA-TRAN)) -- Write faster kernel for is\_distinct [\#4560](https://github.com/apache/arrow-datafusion/pull/4560) ([comphead](https://github.com/comphead)) -- refactor code about `query -> plan` for subqueries [\#4559](https://github.com/apache/arrow-datafusion/pull/4559) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- fix: remove wrong comment about `having` [\#4555](https://github.com/apache/arrow-datafusion/pull/4555) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- feat: user-defined aggregate function\(UDAF\) as window function [\#4553](https://github.com/apache/arrow-datafusion/pull/4553) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([MichaelScofield](https://github.com/MichaelScofield)) -- Fix date\_part/extract functions to support now\(\) [\#4548](https://github.com/apache/arrow-datafusion/pull/4548) ([comphead](https://github.com/comphead)) -- bump sqllogictest to 0.9.0 [\#4547](https://github.com/apache/arrow-datafusion/pull/4547) ([xxchan](https://github.com/xxchan)) -- minor: Remove more clones from the planner [\#4546](https://github.com/apache/arrow-datafusion/pull/4546) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Add tests for coercion of timestamps to strings [\#4545](https://github.com/apache/arrow-datafusion/pull/4545) ([alamb](https://github.com/alamb)) -- MINOR: move sqllogictest to dev-dependencies [\#4544](https://github.com/apache/arrow-datafusion/pull/4544) ([alamb](https://github.com/alamb)) -- MINOR: add some comments about intended use of ChunkedStore [\#4541](https://github.com/apache/arrow-datafusion/pull/4541) ([alamb](https://github.com/alamb)) -- fix: remove TODOs linked to arrow\#3147 [\#4540](https://github.com/apache/arrow-datafusion/pull/4540) ([crepererum](https://github.com/crepererum)) -- refactor: remove redundant `build_join_schema()` [\#4538](https://github.com/apache/arrow-datafusion/pull/4538) ([jackwener](https://github.com/jackwener)) -- Move some create/drop tests to `ddl.slt` [\#4535](https://github.com/apache/arrow-datafusion/pull/4535) ([alamb](https://github.com/alamb)) -- Minor: Avoid cloning as many `Ident` during SQL planning [\#4534](https://github.com/apache/arrow-datafusion/pull/4534) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- shouldn't add `outer_query_schema` in `sql_select_to_rex` [\#4527](https://github.com/apache/arrow-datafusion/pull/4527) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Avoid reading the entire file in ChunkedStore [\#4525](https://github.com/apache/arrow-datafusion/pull/4525) ([metesynnada](https://github.com/metesynnada)) -- Simplify MemoryManager [\#4522](https://github.com/apache/arrow-datafusion/pull/4522) ([tustvold](https://github.com/tustvold)) -- Fix limited statistic collection accross files with no stats [\#4521](https://github.com/apache/arrow-datafusion/pull/4521) ([isidentical](https://github.com/isidentical)) -- refactor: make Ctes a struct to also store data types provided by prepare stmt [\#4520](https://github.com/apache/arrow-datafusion/pull/4520) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([NGA-TRAN](https://github.com/NGA-TRAN)) -- Enrich filter statistics with known column boundaries [\#4519](https://github.com/apache/arrow-datafusion/pull/4519) ([isidentical](https://github.com/isidentical)) -- Remove Option from window frame [\#4516](https://github.com/apache/arrow-datafusion/pull/4516) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mustafasrepo](https://github.com/mustafasrepo)) -- Make nightly clippy happy [\#4515](https://github.com/apache/arrow-datafusion/pull/4515) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Remove interior mutability of `MemTable` [\#4514](https://github.com/apache/arrow-datafusion/pull/4514) ([xudong963](https://github.com/xudong963)) -- Make window function related struct public for ballista. [\#4511](https://github.com/apache/arrow-datafusion/pull/4511) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- minor: rename `push_down_limit` [\#4510](https://github.com/apache/arrow-datafusion/pull/4510) ([jackwener](https://github.com/jackwener)) -- Add get\_window\_frame in window\_expr, show frame info in window\_agg\_exec [\#4508](https://github.com/apache/arrow-datafusion/pull/4508) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add sqllogictest auto labeler [\#4506](https://github.com/apache/arrow-datafusion/pull/4506) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Add some more aggregate sqllogictests and remove rust tests [\#4505](https://github.com/apache/arrow-datafusion/pull/4505) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Remove sqllogictests CI run [\#4504](https://github.com/apache/arrow-datafusion/pull/4504) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Refactor code for `insert` in sqllogictest [\#4503](https://github.com/apache/arrow-datafusion/pull/4503) ([xudong963](https://github.com/xudong963)) -- Add empty string normalization to sqllogictests [\#4501](https://github.com/apache/arrow-datafusion/pull/4501) ([alamb](https://github.com/alamb)) -- sqllogictest: A logging and command line filter [\#4497](https://github.com/apache/arrow-datafusion/pull/4497) ([alamb](https://github.com/alamb)) -- Support `insert into` statement in sqllogictest [\#4496](https://github.com/apache/arrow-datafusion/pull/4496) ([xudong963](https://github.com/xudong963)) -- Improve error handling for array downcasting [\#4493](https://github.com/apache/arrow-datafusion/pull/4493) ([retikulum](https://github.com/retikulum)) -- Unify most of `SessionConfig` settings into `ConfigOptions` [\#4492](https://github.com/apache/arrow-datafusion/pull/4492) ([alamb](https://github.com/alamb)) -- feat: support prepare statement [\#4490](https://github.com/apache/arrow-datafusion/pull/4490) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([NGA-TRAN](https://github.com/NGA-TRAN)) -- Minor: Update docstrings and comments to aggregate code [\#4489](https://github.com/apache/arrow-datafusion/pull/4489) ([alamb](https://github.com/alamb)) -- Fix panic in median "AggregateState is not a scalar aggregate" [\#4488](https://github.com/apache/arrow-datafusion/pull/4488) ([alamb](https://github.com/alamb)) -- fix `push_down_projection` push redundant columns. [\#4487](https://github.com/apache/arrow-datafusion/pull/4487) ([jackwener](https://github.com/jackwener)) -- Add window func related logic plan to proto ability. [\#4485](https://github.com/apache/arrow-datafusion/pull/4485) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- fix `Planner` don't generate `SubqueryAlias` and generate duplicated `SubqueryAlias` [\#4484](https://github.com/apache/arrow-datafusion/pull/4484) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Improve parquet partition\_file output display [\#4467](https://github.com/apache/arrow-datafusion/pull/4467) ([alamb](https://github.com/alamb)) -- minor: remove redundant `unwrap()` [\#4463](https://github.com/apache/arrow-datafusion/pull/4463) ([jackwener](https://github.com/jackwener)) -- Fix `Cte` in `from` clause with duplicated cte name [\#4461](https://github.com/apache/arrow-datafusion/pull/4461) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Replace `&Option` with `Option<&T>` part 2 [\#4458](https://github.com/apache/arrow-datafusion/pull/4458) ([askoa](https://github.com/askoa)) -- Fix output\_partitioning\(\), output\_ordering\(\), equivalence\_properties\(\) in WindowAggExec, shift the Column indexes [\#4455](https://github.com/apache/arrow-datafusion/pull/4455) ([mingmwang](https://github.com/mingmwang)) -- fix `push_down_filter` for pushing filters on grouping columns rather than aggregate columns [\#4447](https://github.com/apache/arrow-datafusion/pull/4447) ([jackwener](https://github.com/jackwener)) -- Add support for non-column key for equijoin when eliminating cross join to inner join [\#4443](https://github.com/apache/arrow-datafusion/pull/4443) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- Remove the schema checking when creating `CrossJoinExec` [\#4432](https://github.com/apache/arrow-datafusion/pull/4432) ([HaoYang670](https://github.com/HaoYang670)) -- `date_part` support fractions of second [\#4385](https://github.com/apache/arrow-datafusion/pull/4385) ([comphead](https://github.com/comphead)) -- Minor: use upstream RowSelection code from arrow `intersect_row_selection` [\#4340](https://github.com/apache/arrow-datafusion/pull/4340) ([alamb](https://github.com/alamb)) -- Support type coercion for timestamp and utf8 [\#4312](https://github.com/apache/arrow-datafusion/pull/4312) ([andre-cc-natzka](https://github.com/andre-cc-natzka)) - - -## [15.0.0](https://github.com/apache/arrow-datafusion/tree/15.0.0) (2022-12-01) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/14.0.0-rc1...15.0.0) - -**Breaking changes:** - -- Expose remaining parquet config options into ConfigOptions \(try 2\) [\#4427](https://github.com/apache/arrow-datafusion/pull/4427) ([alamb](https://github.com/alamb)) -- Config Cleanup: Remove TaskProperties and KV structure, keep key=value serialization [\#4382](https://github.com/apache/arrow-datafusion/pull/4382) ([alamb](https://github.com/alamb)) -- add `{TDigest,ScalarValue,Accumulator}::size` [\#4342](https://github.com/apache/arrow-datafusion/pull/4342) ([crepererum](https://github.com/crepererum)) -- API-break: Support `SubqueryAlias` and remove `Alias in Projection` [\#4333](https://github.com/apache/arrow-datafusion/pull/4333) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- split `try_new_with_schema_alias` from original code [\#4284](https://github.com/apache/arrow-datafusion/pull/4284) ([jackwener](https://github.com/jackwener)) -- Collapse statistics in normal explain plan [\#4157](https://github.com/apache/arrow-datafusion/pull/4157) ([alamb](https://github.com/alamb)) -- Linearize binary expressions to reduce proto tree complexity [\#4115](https://github.com/apache/arrow-datafusion/pull/4115) ([isidentical](https://github.com/isidentical)) -- support `SET Timezone` [\#4107](https://github.com/apache/arrow-datafusion/pull/4107) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) - -**Implemented enhancements:** - -- Refactor Built-in, Aggregate window functions to increase code reuse. [\#4440](https://github.com/apache/arrow-datafusion/issues/4440) -- Helper to get "root" error [\#4435](https://github.com/apache/arrow-datafusion/issues/4435) -- Do NOT convert intermediate/source errors to strings. [\#4434](https://github.com/apache/arrow-datafusion/issues/4434) -- Estimate the `total_byte_size` of the filter expression's result when selectivity is available [\#4374](https://github.com/apache/arrow-datafusion/issues/4374) -- refactor the code of the `HashJoin` [\#4356](https://github.com/apache/arrow-datafusion/issues/4356) -- `CoalesceBatchesExec` reports no ordering [\#4331](https://github.com/apache/arrow-datafusion/issues/4331) -- Introduce tournament tree to achieve better k-way sort-merging [\#4300](https://github.com/apache/arrow-datafusion/issues/4300) -- Add a checker to confirm physical optimizer rules will keep the physical plan schema immutable [\#4299](https://github.com/apache/arrow-datafusion/issues/4299) -- Remove the macro rule `unary_scalar_expr` from `expr_fn.rs` [\#4298](https://github.com/apache/arrow-datafusion/issues/4298) -- Remove Alias-in-Projection, replace it with `SubqueryAlias` [\#4291](https://github.com/apache/arrow-datafusion/issues/4291) -- reimplement `reduce_outer_join` [\#4270](https://github.com/apache/arrow-datafusion/issues/4270) -- Reimplement `filter_push_down` [\#4266](https://github.com/apache/arrow-datafusion/issues/4266) -- Reimplement `eliminate_limit` [\#4264](https://github.com/apache/arrow-datafusion/issues/4264) -- Reimplement `limit_push_down` [\#4263](https://github.com/apache/arrow-datafusion/issues/4263) -- Make a data driven SQL testing tool \(so we can reuse duckdb test suite, example\) [\#4248](https://github.com/apache/arrow-datafusion/issues/4248) -- upgrade chrono to 0.4.23 [\#4224](https://github.com/apache/arrow-datafusion/issues/4224) -- support scan non-string columns partitioned parquet files [\#4218](https://github.com/apache/arrow-datafusion/issues/4218) -- Allow optimizer rules to skip optimizing plans [\#4209](https://github.com/apache/arrow-datafusion/issues/4209) -- Supporting specifying schema when create tables [\#4183](https://github.com/apache/arrow-datafusion/issues/4183) -- Improve ergonomics of creating `ListingOptions` [\#4178](https://github.com/apache/arrow-datafusion/issues/4178) -- Add ability to specify external sort information for ParquetExec [\#4169](https://github.com/apache/arrow-datafusion/issues/4169) -- Add another method to collect referenced columns from an expression [\#4152](https://github.com/apache/arrow-datafusion/issues/4152) -- Improve `EXPLAIN ANALYZE` output for parquet exec [\#4144](https://github.com/apache/arrow-datafusion/issues/4144) -- `TableProviderFactory::create` should have `Optional` parameter [\#4142](https://github.com/apache/arrow-datafusion/issues/4142) -- Support more expressions in equality join [\#4140](https://github.com/apache/arrow-datafusion/issues/4140) -- JoinSelection Rule to choose physical join implementation: HashJoin\(Partitioned or CollectLeft\) or SortMergeJoin base on Stats [\#4139](https://github.com/apache/arrow-datafusion/issues/4139) -- Allow TPCH tooling to create a combined result for easier processing by outside tools [\#4127](https://github.com/apache/arrow-datafusion/issues/4127) -- Allow additional options when creating an external table [\#4125](https://github.com/apache/arrow-datafusion/issues/4125) -- reuse code utils::optimize\_children instead of redundant implementation [\#4120](https://github.com/apache/arrow-datafusion/issues/4120) -- Add test field to PR template [\#4113](https://github.com/apache/arrow-datafusion/issues/4113) -- Allow for automatic registration of `ListingTables` [\#4111](https://github.com/apache/arrow-datafusion/issues/4111) -- Add CI check that configs.md is up-to-date [\#4108](https://github.com/apache/arrow-datafusion/issues/4108) -- Support `SET` timezone to non-UTC time zone [\#4106](https://github.com/apache/arrow-datafusion/issues/4106) -- Parquet predicates contains `and true` expressions [\#4091](https://github.com/apache/arrow-datafusion/issues/4091) -- Replace RwLock\ and Mutex\ by using DashMap [\#4077](https://github.com/apache/arrow-datafusion/issues/4077) -- add support for `.xz` compressed files [\#4074](https://github.com/apache/arrow-datafusion/issues/4074) -- add a feature gate to make support for compressed files optional [\#4073](https://github.com/apache/arrow-datafusion/issues/4073) -- Support serializing more deeply nested AND / OR expressions [\#4066](https://github.com/apache/arrow-datafusion/issues/4066) -- Use f64::total\_cmp instead of OrderedFloat [\#4051](https://github.com/apache/arrow-datafusion/issues/4051) -- Add documentation to make it clear that decimal support is still experimental [\#4036](https://github.com/apache/arrow-datafusion/issues/4036) -- Simplify Pushed Down Predicates [\#4020](https://github.com/apache/arrow-datafusion/issues/4020) -- Improve HashJoinExec metrics [\#4009](https://github.com/apache/arrow-datafusion/issues/4009) -- Move physical plan serde from Ballista to DataFusion [\#3949](https://github.com/apache/arrow-datafusion/issues/3949) -- Support `SubqueryAlias` better in planner [\#3927](https://github.com/apache/arrow-datafusion/issues/3927) -- A framework for expression boundary analysis \(and statistics\) [\#3898](https://github.com/apache/arrow-datafusion/issues/3898) -- Replace `Filter: Boolean(false)` with `EmptyRelation` [\#3864](https://github.com/apache/arrow-datafusion/issues/3864) -- Implement statistics estimation for `FilterExec` [\#3845](https://github.com/apache/arrow-datafusion/issues/3845) -- Support parquet page filtering for more types: String, Binary\(Decimal\), Int96 [\#3833](https://github.com/apache/arrow-datafusion/issues/3833) -- Allow configuring parquet filter pushdown dynamically [\#3821](https://github.com/apache/arrow-datafusion/issues/3821) -- Unable to register tables in non-cloud S3 servers [\#3640](https://github.com/apache/arrow-datafusion/issues/3640) -- support more data type in prune for cast/try\_cast [\#3442](https://github.com/apache/arrow-datafusion/issues/3442) -- Disable spill to disk globally [\#3264](https://github.com/apache/arrow-datafusion/issues/3264) -- Consider to categorize Operator [\#3216](https://github.com/apache/arrow-datafusion/issues/3216) -- Replace Projection.alias with SubqueryAlias [\#2212](https://github.com/apache/arrow-datafusion/issues/2212) -- \[Optimizer\] Eliminate the distinct [\#2045](https://github.com/apache/arrow-datafusion/issues/2045) -- beautify datafusion's site: https://arrow.apache.org/datafusion/ [\#1819](https://github.com/apache/arrow-datafusion/issues/1819) -- split datafusion-logical-plan sub-module [\#1755](https://github.com/apache/arrow-datafusion/issues/1755) -- convert `outer join` to `inner join` to improve performance [\#1585](https://github.com/apache/arrow-datafusion/issues/1585) -- Add sqllogictest for datafusion [\#1453](https://github.com/apache/arrow-datafusion/issues/1453) -- Add additional simplification rules [\#1406](https://github.com/apache/arrow-datafusion/issues/1406) -- support more subqueries [\#1209](https://github.com/apache/arrow-datafusion/issues/1209) -- Add baseline metrics for remaining execution plan nodes [\#1019](https://github.com/apache/arrow-datafusion/issues/1019) -- Make `ExecutionPlan` implementations immutable [\#987](https://github.com/apache/arrow-datafusion/issues/987) -- Architecture overview may be insufficient in README [\#980](https://github.com/apache/arrow-datafusion/issues/980) -- Add a separate configuration setting for parallelism of scanning parquet files [\#924](https://github.com/apache/arrow-datafusion/issues/924) -- Support hash repartion elimination [\#41](https://github.com/apache/arrow-datafusion/issues/41) - -**Fixed bugs:** - -- `pyarrow` CI failed [\#4448](https://github.com/apache/arrow-datafusion/issues/4448) -- `UnwrapCastInComparison` exist bug [\#4430](https://github.com/apache/arrow-datafusion/issues/4430) -- The CLI panics when passing an invalid `explain` query [\#4378](https://github.com/apache/arrow-datafusion/issues/4378) -- HashJoin should return Err when the right side input stream produce Err [\#4362](https://github.com/apache/arrow-datafusion/issues/4362) -- Optimizer check errors if resulting schema has different metadata [\#4346](https://github.com/apache/arrow-datafusion/issues/4346) -- Panic with function `to_hex` [\#4339](https://github.com/apache/arrow-datafusion/issues/4339) -- `LimitPushDown` pushdown into limit, result is wrong [\#4308](https://github.com/apache/arrow-datafusion/issues/4308) -- DESCRIBE statement issue with qualified table references [\#4303](https://github.com/apache/arrow-datafusion/issues/4303) -- Panic with window function LAST\_VALUE [\#4297](https://github.com/apache/arrow-datafusion/issues/4297) -- CI failed in `Compare to postgres` [\#4294](https://github.com/apache/arrow-datafusion/issues/4294) -- Field alias can't work in where clause [\#4288](https://github.com/apache/arrow-datafusion/issues/4288) -- Some valid filters are not pushed down to parquet scan [\#4282](https://github.com/apache/arrow-datafusion/issues/4282) -- The type renaming `pub type NullColumnarValue = ColumnarValue` makes no sense [\#4271](https://github.com/apache/arrow-datafusion/issues/4271) -- Current `limit_push_down` can't support cross\_join [\#4256](https://github.com/apache/arrow-datafusion/issues/4256) -- Cargo test fail [\#4253](https://github.com/apache/arrow-datafusion/issues/4253) -- RightSemi/RightAnti HashJoin has bug, the left\_indices is never populated, causing failure to apply join filters. [\#4247](https://github.com/apache/arrow-datafusion/issues/4247) -- Clippy failures [\#4245](https://github.com/apache/arrow-datafusion/issues/4245) -- Cannot query s3 data from datafusion-cli [\#4239](https://github.com/apache/arrow-datafusion/issues/4239) -- Bug parsing interval with negative values [\#4237](https://github.com/apache/arrow-datafusion/issues/4237) -- `cargo test` reports errors on the master branch. [\#4236](https://github.com/apache/arrow-datafusion/issues/4236) -- Doc of the expression function`log2` is incorrect [\#4231](https://github.com/apache/arrow-datafusion/issues/4231) -- HashJoin with mode PartitionMode:CollectLeft has bug and can produce wrong result [\#4230](https://github.com/apache/arrow-datafusion/issues/4230) -- Add ambiguous check when generate projection plan [\#4210](https://github.com/apache/arrow-datafusion/issues/4210) -- What happened for NDJSON support on CLI? [\#4198](https://github.com/apache/arrow-datafusion/issues/4198) -- Add ambiguous check when generate join plan [\#4197](https://github.com/apache/arrow-datafusion/issues/4197) -- Clippy failing on master : error: use of deprecated associated function `chrono::NaiveDate::from_ymd`: use `from_ymd_opt()` instead [\#4187](https://github.com/apache/arrow-datafusion/issues/4187) -- Reimplement the `eliminate_cross_join` [\#4176](https://github.com/apache/arrow-datafusion/issues/4176) -- Incorrect handling of column names [\#4166](https://github.com/apache/arrow-datafusion/issues/4166) -- Update release scripts to support datafusion-benchmarks [\#4134](https://github.com/apache/arrow-datafusion/issues/4134) -- Bug in interpreting correctly parsed SQL with aliases [\#4123](https://github.com/apache/arrow-datafusion/issues/4123) -- The percentile argument for ApproxPercentileCont must be Float64, not Decimal128\(2, 1\) [\#4103](https://github.com/apache/arrow-datafusion/issues/4103) -- Panic when using array\_agg [\#4080](https://github.com/apache/arrow-datafusion/issues/4080) -- Wrong result for FIRST\_VALUE AND LAST\_VALUE window functions [\#4076](https://github.com/apache/arrow-datafusion/issues/4076) -- Round error when casting float to decimal [\#4071](https://github.com/apache/arrow-datafusion/issues/4071) -- Predicate still has cast when comparing Timestamp\(Nano, None\) to a timestamp literal, so can't be pushed down or used for pruning [\#3938](https://github.com/apache/arrow-datafusion/issues/3938) -- Revisit required\_child\_distribution\(\), output\_partitioning\(\), output\_ordering\(\) implementations in ExecutionPlan's implementations [\#3653](https://github.com/apache/arrow-datafusion/issues/3653) -- Can't push down projection after do type coercion [\#3583](https://github.com/apache/arrow-datafusion/issues/3583) -- In some circumstances cast expression is not working [\#3499](https://github.com/apache/arrow-datafusion/issues/3499) -- output\_partitioning\(\) and output\_ordering\(\) implementations are wrong in some physical plan implementations with alias [\#3400](https://github.com/apache/arrow-datafusion/issues/3400) -- Interval Literal doesn't work for timeunit less than millisecond [\#3204](https://github.com/apache/arrow-datafusion/issues/3204) -- `INTERVAL` literal with duplicated interval types should raise error [\#3183](https://github.com/apache/arrow-datafusion/issues/3183) -- Error occurs when only using partition columns in query [\#1999](https://github.com/apache/arrow-datafusion/issues/1999) -- regex\_match does not compile using the `g` flag [\#1429](https://github.com/apache/arrow-datafusion/issues/1429) -- `between` with NULL literals does not work: can't be evaluated because there isn't a common type to coerce the types to [\#1193](https://github.com/apache/arrow-datafusion/issues/1193) -- \[Datafusion\] Error with CAST: Unsupported SQL type Time [\#193](https://github.com/apache/arrow-datafusion/issues/193) - -**Closed issues:** - -- SQL level coverage for when memory limit is exceeded [\#4404](https://github.com/apache/arrow-datafusion/issues/4404) -- Throw error \(not `panic`\) if a listing table specifies an missing partition column [\#4350](https://github.com/apache/arrow-datafusion/issues/4350) -- Page index pruning fail on complex\_expr [\#4317](https://github.com/apache/arrow-datafusion/issues/4317) -- optimize `limit-full join` in the limit push down rule [\#4275](https://github.com/apache/arrow-datafusion/issues/4275) -- `infer_schema` function is not working with s3 Urls or http endpoints [\#4269](https://github.com/apache/arrow-datafusion/issues/4269) -- Add support binary boolean operators with nulls [\#4241](https://github.com/apache/arrow-datafusion/issues/4241) -- Add additional testing to parquet predicate pushdown integration tests [\#4087](https://github.com/apache/arrow-datafusion/issues/4087) -- Add metrics for parquet page level skipping [\#4086](https://github.com/apache/arrow-datafusion/issues/4086) -- Add parquet page index pushdown metrics [\#4058](https://github.com/apache/arrow-datafusion/issues/4058) -- Throw a runtime error if the memory allocated to GroupByHash exceeds a limit [\#3940](https://github.com/apache/arrow-datafusion/issues/3940) -- support unsigned numeric data type in UnwrapCastInBinaryComparison rule [\#3702](https://github.com/apache/arrow-datafusion/issues/3702) -- Support type cast in union [\#2125](https://github.com/apache/arrow-datafusion/issues/2125) -- \[EPIC\] Memory Limited Sort \(Externalized / Spill\) [\#1568](https://github.com/apache/arrow-datafusion/issues/1568) -- Maintain partition information in Union [\#189](https://github.com/apache/arrow-datafusion/issues/189) -- Add coercion support for `NULL` literals [\#185](https://github.com/apache/arrow-datafusion/issues/185) - -**Merged pull requests:** - -- Make `datafusion-sql` depend on `arrow-schema` instead of `arrow` [\#4456](https://github.com/apache/arrow-datafusion/pull/4456) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mbrobbel](https://github.com/mbrobbel)) -- replace the comparator for `decimal array op scalar` using arrow kernel [\#4453](https://github.com/apache/arrow-datafusion/pull/4453) ([liukun4515](https://github.com/liukun4515)) -- Fix pyarrow test [\#4450](https://github.com/apache/arrow-datafusion/pull/4450) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Replace `&Option` with `Option<&T>` [\#4446](https://github.com/apache/arrow-datafusion/pull/4446) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([askoa](https://github.com/askoa)) -- Improve error handling for array downcasting [\#4445](https://github.com/apache/arrow-datafusion/pull/4445) ([retikulum](https://github.com/retikulum)) -- Refactor Builtin Window Function Implementation [\#4441](https://github.com/apache/arrow-datafusion/pull/4441) ([mustafasrepo](https://github.com/mustafasrepo)) -- feat: `DataFusionError::find_root` [\#4437](https://github.com/apache/arrow-datafusion/pull/4437) ([crepererum](https://github.com/crepererum)) -- fix: do NOT convert errors to strings but keep the type [\#4436](https://github.com/apache/arrow-datafusion/pull/4436) ([crepererum](https://github.com/crepererum)) -- The CLI panics when passing an invalid explain query [\#4429](https://github.com/apache/arrow-datafusion/pull/4429) ([comphead](https://github.com/comphead)) -- \[minor\] use arrow kernel concat\_batches instead combine\_batches [\#4423](https://github.com/apache/arrow-datafusion/pull/4423) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- fix panic on to\_hex function for negative numbers [\#4422](https://github.com/apache/arrow-datafusion/pull/4422) ([retikulum](https://github.com/retikulum)) -- Optimize filter executor in pull-based executor [\#4421](https://github.com/apache/arrow-datafusion/pull/4421) ([xudong963](https://github.com/xudong963)) -- optimize limit push for join case [\#4411](https://github.com/apache/arrow-datafusion/pull/4411) ([liukun4515](https://github.com/liukun4515)) -- Add integration test for erroring when memory limits are hit [\#4406](https://github.com/apache/arrow-datafusion/pull/4406) ([alamb](https://github.com/alamb)) -- feat: `ResourceExhausted` for memory limit in `AggregateStream` [\#4405](https://github.com/apache/arrow-datafusion/pull/4405) ([crepererum](https://github.com/crepererum)) -- Update to arrow 28 [\#4400](https://github.com/apache/arrow-datafusion/pull/4400) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Update rstest requirement from 0.15.0 to 0.16.0 [\#4399](https://github.com/apache/arrow-datafusion/pull/4399) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Add sqllogictests \(v0\) [\#4395](https://github.com/apache/arrow-datafusion/pull/4395) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- improve hashjoin execution metrics [\#4394](https://github.com/apache/arrow-datafusion/pull/4394) ([AssHero](https://github.com/AssHero)) -- Add `with_new_inputs` for LogicalPlan [\#4393](https://github.com/apache/arrow-datafusion/pull/4393) ([jackwener](https://github.com/jackwener)) -- Clean the code in `limit.rs`. [\#4391](https://github.com/apache/arrow-datafusion/pull/4391) ([HaoYang670](https://github.com/HaoYang670)) -- Move physical plan serde from Ballista to DataFusion [\#4390](https://github.com/apache/arrow-datafusion/pull/4390) ([Kikkon](https://github.com/Kikkon)) -- Fix page index pruning fail on complex\_expr [\#4387](https://github.com/apache/arrow-datafusion/pull/4387) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add check for nested types in equivalent names and types [\#4380](https://github.com/apache/arrow-datafusion/pull/4380) ([alamb](https://github.com/alamb)) -- refine the code of build schema for ambiguous check, factor this out into a function [\#4379](https://github.com/apache/arrow-datafusion/pull/4379) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) -- Refactor the Hash Join [\#4377](https://github.com/apache/arrow-datafusion/pull/4377) ([liukun4515](https://github.com/liukun4515)) -- Minor: Fix typos in the documentation [\#4376](https://github.com/apache/arrow-datafusion/pull/4376) ([martin-g](https://github.com/martin-g)) -- Include byte size estimates in the filter statistics [\#4375](https://github.com/apache/arrow-datafusion/pull/4375) ([isidentical](https://github.com/isidentical)) -- HashJoin should return Err when the right side input stream produce Err, add more join UTs to cover different join types [\#4373](https://github.com/apache/arrow-datafusion/pull/4373) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mingmwang](https://github.com/mingmwang)) -- feat: `ResourceExhausted` for memory limit in `GroupedHashAggregateStream` [\#4371](https://github.com/apache/arrow-datafusion/pull/4371) ([crepererum](https://github.com/crepererum)) -- Use limit\(\) function instead of show\_limit\(\) in the first example [\#4369](https://github.com/apache/arrow-datafusion/pull/4369) ([martin-g](https://github.com/martin-g)) -- Update env\_logger requirement from 0.9 to 0.10 [\#4367](https://github.com/apache/arrow-datafusion/pull/4367) ([dependabot[bot]](https://github.com/apps/dependabot)) -- reimplement `push_down_filter` to remove global-state [\#4365](https://github.com/apache/arrow-datafusion/pull/4365) ([jackwener](https://github.com/jackwener)) -- Support to use Schedular in tpch benchmark [\#4361](https://github.com/apache/arrow-datafusion/pull/4361) ([xudong963](https://github.com/xudong963)) -- Adding more dataframe example to read csv files [\#4360](https://github.com/apache/arrow-datafusion/pull/4360) ([DataPsycho](https://github.com/DataPsycho)) -- minor: correct name and typo [\#4359](https://github.com/apache/arrow-datafusion/pull/4359) ([jackwener](https://github.com/jackwener)) -- Do not log error if page index can not be evaluated [\#4358](https://github.com/apache/arrow-datafusion/pull/4358) ([alamb](https://github.com/alamb)) -- Clean the `expr_fn` - use `scalar_expr` to create unary scalar expr functions, remove macro `unary_scalar_functions` [\#4357](https://github.com/apache/arrow-datafusion/pull/4357) ([HaoYang670](https://github.com/HaoYang670)) -- Throw error \(not `panic`\) if a listing table specifies an missing partition column [\#4354](https://github.com/apache/arrow-datafusion/pull/4354) ([doki23](https://github.com/doki23)) -- Improve error handling and add some more types for proper downcasting [\#4352](https://github.com/apache/arrow-datafusion/pull/4352) ([retikulum](https://github.com/retikulum)) -- Add check to avoid underflow in memory manager [\#4351](https://github.com/apache/arrow-datafusion/pull/4351) ([askoa](https://github.com/askoa)) -- Improve error messages when memory is exhausted while sorting [\#4348](https://github.com/apache/arrow-datafusion/pull/4348) ([alamb](https://github.com/alamb)) -- Do not error in optimizer if resulting schema has different metadata [\#4347](https://github.com/apache/arrow-datafusion/pull/4347) ([alamb](https://github.com/alamb)) -- minor: improve optimizer logging and do not repeat rule name [\#4345](https://github.com/apache/arrow-datafusion/pull/4345) ([alamb](https://github.com/alamb)) -- minor: fix typos in test names [\#4344](https://github.com/apache/arrow-datafusion/pull/4344) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Minor: Add docstrings to `EliminateOuterJoins` optimizer pass [\#4343](https://github.com/apache/arrow-datafusion/pull/4343) ([alamb](https://github.com/alamb)) -- Minor: refactor: isolate common memory accounting utils [\#4341](https://github.com/apache/arrow-datafusion/pull/4341) ([crepererum](https://github.com/crepererum)) -- minor: make `plan_from_tables` return one plan instead of `Vec` [\#4336](https://github.com/apache/arrow-datafusion/pull/4336) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- enhancement: when fetch == 0, pushdown limit 0 instead skip+fetch. [\#4334](https://github.com/apache/arrow-datafusion/pull/4334) ([jackwener](https://github.com/jackwener)) -- Teach optimizer that `CoalesceBatchesExec` does not destroy output order [\#4332](https://github.com/apache/arrow-datafusion/pull/4332) ([alamb](https://github.com/alamb)) -- Add ability to disable DiskManager [\#4330](https://github.com/apache/arrow-datafusion/pull/4330) ([tustvold](https://github.com/tustvold)) -- Update cli.md [\#4329](https://github.com/apache/arrow-datafusion/pull/4329) ([psvri](https://github.com/psvri)) -- fix bug: right semi join can't support the filter [\#4327](https://github.com/apache/arrow-datafusion/pull/4327) ([liukun4515](https://github.com/liukun4515)) -- reimplment `eliminate_limit` to remove `global-state`. [\#4324](https://github.com/apache/arrow-datafusion/pull/4324) ([jackwener](https://github.com/jackwener)) -- Refine Err propagation and avoid unwrap in transform closures [\#4318](https://github.com/apache/arrow-datafusion/pull/4318) ([mingmwang](https://github.com/mingmwang)) -- Add a checker to confirm physical optimizer rules will keep the physical plan schema immutable [\#4316](https://github.com/apache/arrow-datafusion/pull/4316) ([mingmwang](https://github.com/mingmwang)) -- Refactor downcasting functions with downcastvalue macro and improve error handling of `ListArray` downcasting [\#4313](https://github.com/apache/arrow-datafusion/pull/4313) ([retikulum](https://github.com/retikulum)) -- minor: add another test case to cover join ambiguous check [\#4305](https://github.com/apache/arrow-datafusion/pull/4305) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- Fix DESCRIBE statement qualified table issue [\#4304](https://github.com/apache/arrow-datafusion/pull/4304) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) -- Use tournament loser tree for k-way sort-merging, increase merge speed by 50% [\#4301](https://github.com/apache/arrow-datafusion/pull/4301) ([richox](https://github.com/richox)) -- Pin Python `setuptools` in the CI to fix integration tests [\#4296](https://github.com/apache/arrow-datafusion/pull/4296) ([isidentical](https://github.com/isidentical)) -- Support `SubqueryAlias` in optimizer, physcial planner. [\#4293](https://github.com/apache/arrow-datafusion/pull/4293) ([jackwener](https://github.com/jackwener)) -- minor: avoid a clone into string when checking ambiguous [\#4292](https://github.com/apache/arrow-datafusion/pull/4292) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- replace the comparison op for decimal array op using the arrow-rs kernel [\#4290](https://github.com/apache/arrow-datafusion/pull/4290) ([liukun4515](https://github.com/liukun4515)) -- MINOR: replace `{..}` with `(_)`, typo, remove outdated TODO [\#4286](https://github.com/apache/arrow-datafusion/pull/4286) ([jackwener](https://github.com/jackwener)) -- Reduce Expr copies in `ParquetExec` [\#4283](https://github.com/apache/arrow-datafusion/pull/4283) ([alamb](https://github.com/alamb)) -- Fix issue in filter pushdown with overloaded projection index [\#4281](https://github.com/apache/arrow-datafusion/pull/4281) ([thinkharderdev](https://github.com/thinkharderdev)) -- Skip useless pruning predicates in `ParquetExec` [\#4280](https://github.com/apache/arrow-datafusion/pull/4280) ([alamb](https://github.com/alamb)) -- Push down more predicates into `ParquetExec` [\#4279](https://github.com/apache/arrow-datafusion/pull/4279) ([alamb](https://github.com/alamb)) -- Fix EXPLAIN plan for ParquetExec to show pruning\_predicate [\#4278](https://github.com/apache/arrow-datafusion/pull/4278) ([alamb](https://github.com/alamb)) -- reimplement `limit_push_down` to remove global-state, enhance optimize and simplify code. [\#4276](https://github.com/apache/arrow-datafusion/pull/4276) ([jackwener](https://github.com/jackwener)) -- Bump actions/labeler from 4.0.2 to 4.1.0 [\#4274](https://github.com/apache/arrow-datafusion/pull/4274) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Remove the type alias `NullColumnarValue` [\#4273](https://github.com/apache/arrow-datafusion/pull/4273) ([HaoYang670](https://github.com/HaoYang670)) -- reimplement `eliminate_outer_join` [\#4272](https://github.com/apache/arrow-datafusion/pull/4272) ([jackwener](https://github.com/jackwener)) -- Fix bugs in parsing `with header row` and `partitioned by` [\#4268](https://github.com/apache/arrow-datafusion/pull/4268) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- improve error messages while downcasting `UInt32Array`, `UInt64Array` and `BooleanArray` [\#4261](https://github.com/apache/arrow-datafusion/pull/4261) ([retikulum](https://github.com/retikulum)) -- add ambiguous check for projection [\#4260](https://github.com/apache/arrow-datafusion/pull/4260) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) -- Add ambiguous check for join [\#4258](https://github.com/apache/arrow-datafusion/pull/4258) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- support cross\_join in `limit_push_down` [\#4257](https://github.com/apache/arrow-datafusion/pull/4257) ([jackwener](https://github.com/jackwener)) -- Support parquet page filtering on min\_max for `decimal128` and `string` columns [\#4255](https://github.com/apache/arrow-datafusion/pull/4255) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- fix conflict and UT, cleanup redundant legacy code [\#4252](https://github.com/apache/arrow-datafusion/pull/4252) ([jackwener](https://github.com/jackwener)) -- Minor: remove unecessary clone\(\) in planner [\#4249](https://github.com/apache/arrow-datafusion/pull/4249) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fix nightly clippy failures [\#4246](https://github.com/apache/arrow-datafusion/pull/4246) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Improve Error Handling and Readibility for downcasting `Float32Array`, `Float64Array`, `StringArray` [\#4244](https://github.com/apache/arrow-datafusion/pull/4244) ([retikulum](https://github.com/retikulum)) -- Use defaults for ListingOptions builder [\#4243](https://github.com/apache/arrow-datafusion/pull/4243) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Support binary boolean operators with nulls [\#4242](https://github.com/apache/arrow-datafusion/pull/4242) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Fixing doc of the expression [\#4240](https://github.com/apache/arrow-datafusion/pull/4240) ([Creampanda](https://github.com/Creampanda)) -- Fix negative interval parsing bug [\#4238](https://github.com/apache/arrow-datafusion/pull/4238) ([Jefffrey](https://github.com/Jefffrey)) -- remove duplicate or redundant code [\#4235](https://github.com/apache/arrow-datafusion/pull/4235) ([jackwener](https://github.com/jackwener)) -- add a checker to confirm optimizer can keep plan schema immutable. [\#4233](https://github.com/apache/arrow-datafusion/pull/4233) ([jackwener](https://github.com/jackwener)) -- Fix the percentile argument for ApproxPercentileCont must be Float64, not Decimal128\(2, 1\) [\#4228](https://github.com/apache/arrow-datafusion/pull/4228) ([comphead](https://github.com/comphead)) -- refactor how we create listing tables [\#4227](https://github.com/apache/arrow-datafusion/pull/4227) ([timvw](https://github.com/timvw)) -- Update sqlparser requirement from 0.26 to 0.27 [\#4226](https://github.com/apache/arrow-datafusion/pull/4226) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- upgrade required chrono version to 0.4.23 [\#4225](https://github.com/apache/arrow-datafusion/pull/4225) ([waitingkuo](https://github.com/waitingkuo)) -- Support types other than String for partition columns on ListingTables [\#4221](https://github.com/apache/arrow-datafusion/pull/4221) ([doki23](https://github.com/doki23)) -- \[CBO\] JoinSelection Rule, select HashJoin Partition Mode based on the Join Type and available statistics, option for SortMergeJoin [\#4219](https://github.com/apache/arrow-datafusion/pull/4219) ([mingmwang](https://github.com/mingmwang)) -- Remove alias in Union [\#4212](https://github.com/apache/arrow-datafusion/pull/4212) ([jackwener](https://github.com/jackwener)) -- Add try\_optimize method [\#4208](https://github.com/apache/arrow-datafusion/pull/4208) ([andygrove](https://github.com/andygrove)) -- Provide a builder for ListingOptions with fixups [\#4207](https://github.com/apache/arrow-datafusion/pull/4207) ([alamb](https://github.com/alamb)) -- Avoid error with empty iterators used for `ScalarValue::iter_to_array` [\#4206](https://github.com/apache/arrow-datafusion/pull/4206) ([GrandChaman](https://github.com/GrandChaman)) -- Improve error message for regexp\_match 'g' flag [\#4203](https://github.com/apache/arrow-datafusion/pull/4203) ([Jefffrey](https://github.com/Jefffrey)) -- Return `ResourceExhausted` errors when memory limit is exceed in `GroupedHashAggregateStreamV2` \(Row Hash\) [\#4202](https://github.com/apache/arrow-datafusion/pull/4202) ([crepererum](https://github.com/crepererum)) -- Add additional expr boolean simplification rules [\#4200](https://github.com/apache/arrow-datafusion/pull/4200) ([Jefffrey](https://github.com/Jefffrey)) -- Update to arrow and parquet 27.0.0 [\#4199](https://github.com/apache/arrow-datafusion/pull/4199) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Support `create table` with explicit column definitions [\#4194](https://github.com/apache/arrow-datafusion/pull/4194) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) -- Support all equality predicates in equality join [\#4193](https://github.com/apache/arrow-datafusion/pull/4193) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- add `propagate_empty_relation` optimizer rule [\#4192](https://github.com/apache/arrow-datafusion/pull/4192) ([jackwener](https://github.com/jackwener)) -- fix clippy [\#4190](https://github.com/apache/arrow-datafusion/pull/4190) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Fix clippy by avoiding deprecated functions in chrono [\#4189](https://github.com/apache/arrow-datafusion/pull/4189) ([alamb](https://github.com/alamb)) -- Disallow duplicate interval types during parsing [\#4188](https://github.com/apache/arrow-datafusion/pull/4188) ([Jefffrey](https://github.com/Jefffrey)) -- Parse nanoseconds for intervals [\#4186](https://github.com/apache/arrow-datafusion/pull/4186) ([Jefffrey](https://github.com/Jefffrey)) -- Add rule to reimplement `Eliminate cross join` and remove it in planner [\#4185](https://github.com/apache/arrow-datafusion/pull/4185) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- \[FOLLOWUP\] Enforcement Rule: resolve review comments, refactor adjust\_input\_keys\_ordering\(\) [\#4184](https://github.com/apache/arrow-datafusion/pull/4184) ([mingmwang](https://github.com/mingmwang)) -- Simplify boolean parquet pushdown predicate [\#4182](https://github.com/apache/arrow-datafusion/pull/4182) ([Jefffrey](https://github.com/Jefffrey)) -- Minor: consolidate parquet `custom_reader` integration test into parquet\_exec [\#4175](https://github.com/apache/arrow-datafusion/pull/4175) ([alamb](https://github.com/alamb)) -- minor: remove redundant println and cleanup [\#4173](https://github.com/apache/arrow-datafusion/pull/4173) ([jackwener](https://github.com/jackwener)) -- Add ability to specify external sort information for ListingTables [\#4170](https://github.com/apache/arrow-datafusion/pull/4170) ([alamb](https://github.com/alamb)) -- Improve Error Handling and Readibility for downcasting `Decimal128Array` [\#4168](https://github.com/apache/arrow-datafusion/pull/4168) ([retikulum](https://github.com/retikulum)) -- Minor: Remove completed comment on parquet row group pruning [\#4167](https://github.com/apache/arrow-datafusion/pull/4167) ([alamb](https://github.com/alamb)) -- Update hashbrown requirement from 0.12 to 0.13 [\#4164](https://github.com/apache/arrow-datafusion/pull/4164) ([dependabot[bot]](https://github.com/apps/dependabot)) -- MINOR: enable `dyn_cmp_dict` feature on arrow for physical expr crate [\#4163](https://github.com/apache/arrow-datafusion/pull/4163) ([isidentical](https://github.com/isidentical)) -- Derive filter statistic estimates from the predicate expression [\#4162](https://github.com/apache/arrow-datafusion/pull/4162) ([isidentical](https://github.com/isidentical)) -- Minor: pass `ParquetFileMetrics` to `build_row_filter` in parquet [\#4161](https://github.com/apache/arrow-datafusion/pull/4161) ([alamb](https://github.com/alamb)) -- Minor: Extract parquet row group pruning code into its own module [\#4160](https://github.com/apache/arrow-datafusion/pull/4160) ([alamb](https://github.com/alamb)) -- Full support for time32 and time64 literal values \(`ScalarValue`\) [\#4156](https://github.com/apache/arrow-datafusion/pull/4156) ([andre-cc-natzka](https://github.com/andre-cc-natzka)) -- Window frame GROUPS mode support [\#4155](https://github.com/apache/arrow-datafusion/pull/4155) ([zembunia](https://github.com/zembunia)) -- Improve error messages while downcasting Int64Array [\#4154](https://github.com/apache/arrow-datafusion/pull/4154) ([retikulum](https://github.com/retikulum)) -- Add another method to collect referenced columns from an expression [\#4153](https://github.com/apache/arrow-datafusion/pull/4153) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- Remove BoxedAsyncFileReader [\#4150](https://github.com/apache/arrow-datafusion/pull/4150) ([tustvold](https://github.com/tustvold)) -- Support unsigned integers in `unwrap_cast_in_comparison` Optimizer rule [\#4149](https://github.com/apache/arrow-datafusion/pull/4149) ([alamb](https://github.com/alamb)) -- Add support for `DataType::Timestamp` casts in `unwrap_cast_in_comparison` optimizer pass [\#4148](https://github.com/apache/arrow-datafusion/pull/4148) ([alamb](https://github.com/alamb)) -- Add additional testing for `unwrap_cast_in_comparison` [\#4147](https://github.com/apache/arrow-datafusion/pull/4147) ([alamb](https://github.com/alamb)) -- improve error messages while downcasting Int32Array [\#4146](https://github.com/apache/arrow-datafusion/pull/4146) ([retikulum](https://github.com/retikulum)) -- Minor: Update docstring on unwrap\_cast\_in\_comparison [\#4145](https://github.com/apache/arrow-datafusion/pull/4145) ([alamb](https://github.com/alamb)) -- add schema parameter to table provider factory create method [\#4143](https://github.com/apache/arrow-datafusion/pull/4143) ([milenkovicm](https://github.com/milenkovicm)) -- fix: shouldn't pass alias through into subquery. [\#4141](https://github.com/apache/arrow-datafusion/pull/4141) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Preserve the `Cast` expression in `columnize_expr` [\#4137](https://github.com/apache/arrow-datafusion/pull/4137) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Set versions to dependencies with path in benchmarks Cargo.toml file [\#4136](https://github.com/apache/arrow-datafusion/pull/4136) ([ArkashaJavelin](https://github.com/ArkashaJavelin)) -- Fix links [\#4135](https://github.com/apache/arrow-datafusion/pull/4135) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- Use f64::total\_cmp instead of OrderedFloat [\#4133](https://github.com/apache/arrow-datafusion/pull/4133) ([comphead](https://github.com/comphead)) -- Add parquet integration tests for explicitly smaller page sizes, page pruning [\#4131](https://github.com/apache/arrow-datafusion/pull/4131) ([alamb](https://github.com/alamb)) -- Consolidate `ParquetExec` tests in `parquet_exec` integration test [\#4130](https://github.com/apache/arrow-datafusion/pull/4130) ([alamb](https://github.com/alamb)) -- Minor: Use upstream `BooleanArray::true_count` [\#4129](https://github.com/apache/arrow-datafusion/pull/4129) ([alamb](https://github.com/alamb)) -- Combined TPCH runs & uniformed summaries for benchmarks [\#4128](https://github.com/apache/arrow-datafusion/pull/4128) ([isidentical](https://github.com/isidentical)) -- Enable TableProviderFactories to receive additional options when creating an external table [\#4126](https://github.com/apache/arrow-datafusion/pull/4126) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([timvw](https://github.com/timvw)) -- Add CI check that configs.md is up-to-date [\#4124](https://github.com/apache/arrow-datafusion/pull/4124) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) -- \[Part3\] Partition and Sort Enforcement, Enforcement rule implementation [\#4122](https://github.com/apache/arrow-datafusion/pull/4122) ([mingmwang](https://github.com/mingmwang)) -- reuse code `utils::optimize_children` but affect inline. [\#4121](https://github.com/apache/arrow-datafusion/pull/4121) ([jackwener](https://github.com/jackwener)) -- reuse code `utils::optimize_children` instead of redundant implementation [\#4119](https://github.com/apache/arrow-datafusion/pull/4119) ([jackwener](https://github.com/jackwener)) -- Allow listing tables to be created via TableFactories [\#4112](https://github.com/apache/arrow-datafusion/pull/4112) ([avantgardnerio](https://github.com/avantgardnerio)) -- Update SQL reference to state that decimal support is currently experimental [\#4109](https://github.com/apache/arrow-datafusion/pull/4109) ([andygrove](https://github.com/andygrove)) -- Add metrics for parquet page level skipping [\#4105](https://github.com/apache/arrow-datafusion/pull/4105) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add parser option for parsing SQL numeric literals as decimal [\#4102](https://github.com/apache/arrow-datafusion/pull/4102) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Replace RwLock\ and Mutex\ by using DashMap [\#4079](https://github.com/apache/arrow-datafusion/pull/4079) ([yahoNanJing](https://github.com/yahoNanJing)) -- Custom window frame support extended to built-in window functions [\#4078](https://github.com/apache/arrow-datafusion/pull/4078) ([mustafasrepo](https://github.com/mustafasrepo)) -- Enable tests for page index filtering in parquet filter pushdown test [\#4062](https://github.com/apache/arrow-datafusion/pull/4062) ([alamb](https://github.com/alamb)) -- \[Part2\] Partition and Sort Enforcement, ExecutionPlan enhancement [\#4043](https://github.com/apache/arrow-datafusion/pull/4043) ([mingmwang](https://github.com/mingmwang)) -- add support for xz file compression and `compression` feature [\#3993](https://github.com/apache/arrow-datafusion/pull/3993) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) -- Expression boundary analysis framework [\#3912](https://github.com/apache/arrow-datafusion/pull/3912) ([isidentical](https://github.com/isidentical)) - -## [14.0.0-rc1](https://github.com/apache/arrow-datafusion/tree/14.0.0-rc1) (2022-11-04) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/14.0.0...14.0.0-rc1) - - -## [14.0.0](https://github.com/apache/arrow-datafusion/tree/14.0.0) (2022-11-04) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/13.0.0-rc1...14.0.0) - -**Breaking changes:** - -- Improve FieldNotFound errors [\#4084](https://github.com/apache/arrow-datafusion/pull/4084) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Refactor: move `simplify_expression.rs` and `expr_simplifier.rs` to a new mod `simplify_expressions` [\#3951](https://github.com/apache/arrow-datafusion/pull/3951) ([HaoYang670](https://github.com/HaoYang670)) -- Support for non-u64 types for Window Bound [\#3916](https://github.com/apache/arrow-datafusion/pull/3916) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mustafasrepo](https://github.com/mustafasrepo)) -- Expose parquet reader settings using normal DataFusion `ConfigOptions` [\#3822](https://github.com/apache/arrow-datafusion/pull/3822) ([alamb](https://github.com/alamb)) -- Add `Filter::try_new` with validation [\#3796](https://github.com/apache/arrow-datafusion/pull/3796) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Change public simplify API and add a public coerce API [\#3758](https://github.com/apache/arrow-datafusion/pull/3758) ([alamb](https://github.com/alamb)) - -**Implemented enhancements:** - -- Automatically register tables if ObjectStore root is configured [\#4094](https://github.com/apache/arrow-datafusion/issues/4094) -- Simplify small `InList` expressions [\#4089](https://github.com/apache/arrow-datafusion/issues/4089) -- Support `SET` command [\#4067](https://github.com/apache/arrow-datafusion/issues/4067) -- add uuid\(\) function to generate unique uuid per row [\#4045](https://github.com/apache/arrow-datafusion/issues/4045) -- Publish benchmark crate so that it can be used as a library in Ballista [\#4016](https://github.com/apache/arrow-datafusion/issues/4016) -- Add statistics methods to `TableProvider` trait for use in cost-based optimizations in the logical plan [\#3983](https://github.com/apache/arrow-datafusion/issues/3983) -- Implement `current_time` Function [\#3982](https://github.com/apache/arrow-datafusion/issues/3982) -- Implement `current_date` Function [\#3981](https://github.com/apache/arrow-datafusion/issues/3981) -- Put common code used for testing code into datafusion/test\_utils.rs [\#3960](https://github.com/apache/arrow-datafusion/issues/3960) -- Print the configurations of ConfigOptions in an ordered way so that we can directly compare the equality of two ConfigOptions by their debug strings [\#3952](https://github.com/apache/arrow-datafusion/issues/3952) -- Don't make dependants install protoc [\#3947](https://github.com/apache/arrow-datafusion/issues/3947) -- Implement right anti join and support it in HashBuildProbeOrder [\#3946](https://github.com/apache/arrow-datafusion/issues/3946) -- Implement right semi join and support it in HashBuildProbeOrder [\#3945](https://github.com/apache/arrow-datafusion/issues/3945) -- Refactor `simplify_expressions` and `expr_simplifier` [\#3934](https://github.com/apache/arrow-datafusion/issues/3934) -- Implement serialization for `ScalarValue::FixedSizeBinary` [\#3928](https://github.com/apache/arrow-datafusion/issues/3928) -- Support inlining view / dataframes logical plan [\#3913](https://github.com/apache/arrow-datafusion/issues/3913) -- Plans with tables from `TableProviderFactory`s can't be serialized [\#3906](https://github.com/apache/arrow-datafusion/issues/3906) -- Simplify `a AND a` and `a OR a`. [\#3895](https://github.com/apache/arrow-datafusion/issues/3895) -- Allow configuring statistics on TPC-H benchmarks [\#3888](https://github.com/apache/arrow-datafusion/issues/3888) -- CI checks stuck in queued mode [\#3883](https://github.com/apache/arrow-datafusion/issues/3883) -- Multiple optimizer passes [\#3879](https://github.com/apache/arrow-datafusion/issues/3879) -- datafusion-proto does not support view table scan [\#3874](https://github.com/apache/arrow-datafusion/issues/3874) -- TableProviderFactories need to be async and return a Result to be useful [\#3866](https://github.com/apache/arrow-datafusion/issues/3866) -- Factorize common AND factors out of OR predicates to support filterPushDown as possible [\#3858](https://github.com/apache/arrow-datafusion/issues/3858) -- Replace `concat_ws` with `concat` when the delimiter is empty string [\#3857](https://github.com/apache/arrow-datafusion/issues/3857) -- Concatenate contiguous literal arguments of `concat_ws` when doing the expression simplification [\#3856](https://github.com/apache/arrow-datafusion/issues/3856) -- Partition and Sort Enforcement [\#3854](https://github.com/apache/arrow-datafusion/issues/3854) -- Enable mimalloc by default in benchmarks [\#3851](https://github.com/apache/arrow-datafusion/issues/3851) -- Add collect statistics configuration [\#3847](https://github.com/apache/arrow-datafusion/issues/3847) -- \[SQL\] - Support cache/uncache table syntax [\#3842](https://github.com/apache/arrow-datafusion/issues/3842) -- Filter pushdown doesn't seem to apply for filter on TPC-H Q17 [\#3839](https://github.com/apache/arrow-datafusion/issues/3839) -- Support pushdown multi-columns in PageIndex pruning. [\#3834](https://github.com/apache/arrow-datafusion/issues/3834) -- Consolidate `Expr` manipulation code so it is more discoverable and make it easier to use [\#3808](https://github.com/apache/arrow-datafusion/issues/3808) -- Leverage input array's null buffer for regex replace to optimize sparse arrays [\#3803](https://github.com/apache/arrow-datafusion/issues/3803) -- Improve join cardinality estimation when there is no overlap in the min/max values [\#3802](https://github.com/apache/arrow-datafusion/issues/3802) -- datafusion-cli up to date check is failing on master [\#3798](https://github.com/apache/arrow-datafusion/issues/3798) -- Optimize benchmark q2 subquery filter [\#3789](https://github.com/apache/arrow-datafusion/issues/3789) -- Benchmark should infer schema when running against Parquet [\#3776](https://github.com/apache/arrow-datafusion/issues/3776) -- Allow specialized physical functions to provide hints for the array adapter [\#3762](https://github.com/apache/arrow-datafusion/issues/3762) -- \[User Guide\] Add `EXPLAIN` to SQL reference [\#3755](https://github.com/apache/arrow-datafusion/issues/3755) -- move `type coercion` for agg/agg udf [\#3752](https://github.com/apache/arrow-datafusion/issues/3752) -- Prevent Cargo.lock for datafusion-cli being out-of-date [\#3744](https://github.com/apache/arrow-datafusion/issues/3744) -- Add example of expr apis including simplification and coercion [\#3740](https://github.com/apache/arrow-datafusion/issues/3740) -- support `type coercion` for ScalarFunction expr in the logical phase [\#3731](https://github.com/apache/arrow-datafusion/issues/3731) -- Add support for DISTINCT projections in `decorrelate_where_exists` [\#3724](https://github.com/apache/arrow-datafusion/issues/3724) -- Add type coercion rule for `CONCAT` and `CONCAT_WS` [\#3720](https://github.com/apache/arrow-datafusion/issues/3720) -- Expose and document a simpler public API for simplify expressions [\#3709](https://github.com/apache/arrow-datafusion/issues/3709) -- Expose + document the type coercion API publicly [\#3708](https://github.com/apache/arrow-datafusion/issues/3708) -- Concatenate contiguous literal arguments of `CONCAT` during the expression simplification. [\#3683](https://github.com/apache/arrow-datafusion/issues/3683) -- DataFusion 13.0.0 Release [\#3671](https://github.com/apache/arrow-datafusion/issues/3671) -- Add division by `0` rules in the expression simplification [\#3663](https://github.com/apache/arrow-datafusion/issues/3663) -- Compressed CSV/JSON Read [\#3641](https://github.com/apache/arrow-datafusion/issues/3641) -- remove type coercion for agg [\#3623](https://github.com/apache/arrow-datafusion/issues/3623) -- extract or clause as predicate for join rels [\#3577](https://github.com/apache/arrow-datafusion/issues/3577) -- Improve performance of `regex_replace` [\#3518](https://github.com/apache/arrow-datafusion/issues/3518) -- Add benchmarks for parquet queries with filter pushdown enabled [\#3457](https://github.com/apache/arrow-datafusion/issues/3457) -- Make type coercion rule more robust [\#3390](https://github.com/apache/arrow-datafusion/issues/3390) -- `ViewTable::scan` ignores filters and limits [\#3249](https://github.com/apache/arrow-datafusion/issues/3249) -- Add `CREATE VIEW` documentation to user guide [\#3211](https://github.com/apache/arrow-datafusion/issues/3211) -- Push additional parquet filtering into the parquet scan \[EPIC\] [\#3147](https://github.com/apache/arrow-datafusion/issues/3147) -- Remove `core/logical_plan` module [\#2683](https://github.com/apache/arrow-datafusion/issues/2683) -- Datafusion Optimizer Enhancement [\#2255](https://github.com/apache/arrow-datafusion/issues/2255) -- \[Optimizer\] Eliminate self compare self [\#2252](https://github.com/apache/arrow-datafusion/issues/2252) -- Break datafusion crate into smaller crates [\#1750](https://github.com/apache/arrow-datafusion/issues/1750) -- Benchmark `constellation-rs/amadeus`'s parquet implementation [\#1341](https://github.com/apache/arrow-datafusion/issues/1341) -- Use `parquet2` async reader in `physical_plan/parquet` [\#1058](https://github.com/apache/arrow-datafusion/issues/1058) -- Table Scan Enhancement Plan [\#944](https://github.com/apache/arrow-datafusion/issues/944) -- Implement parquet page-level skipping with column index, using min/max stats [\#847](https://github.com/apache/arrow-datafusion/issues/847) -- Support min/max statistics in ParquetTable and ParquetExec [\#537](https://github.com/apache/arrow-datafusion/issues/537) - -**Fixed bugs:** - -- Clippy failing on master [\#4100](https://github.com/apache/arrow-datafusion/issues/4100) -- Panic when the number of partitions of the pipeline that throws the exception is inconsistent with the number of partitions output by the query [\#4096](https://github.com/apache/arrow-datafusion/issues/4096) -- FieldNotFound when field is available [\#4083](https://github.com/apache/arrow-datafusion/issues/4083) -- SingleDistinctToGroupBy being applied too broadly [\#4082](https://github.com/apache/arrow-datafusion/issues/4082) -- single\_distinct\_to\_groupby strips qualifiers from group-by expressions [\#4049](https://github.com/apache/arrow-datafusion/issues/4049) -- Another Internal error when parquet predicate pushdown is enabled "Error evaluating filter predicate: [\#4046](https://github.com/apache/arrow-datafusion/issues/4046) -- Decimal multiplied by Float produces incorrect results [\#4035](https://github.com/apache/arrow-datafusion/issues/4035) -- Cannot query external table - TableScan replaced with EmptyExec [\#4027](https://github.com/apache/arrow-datafusion/issues/4027) -- benchmark q17 produces incorrect result [\#4026](https://github.com/apache/arrow-datafusion/issues/4026) -- benchmark q14 produces incorrect result [\#4025](https://github.com/apache/arrow-datafusion/issues/4025) -- benchmark q11 producing incorrect results [\#4023](https://github.com/apache/arrow-datafusion/issues/4023) -- Internal error when parquet predicate pushdown is enabled "Error evaluating filter predicate:" [\#4006](https://github.com/apache/arrow-datafusion/issues/4006) -- Incorrect results with parquet filtering pushdown enabled [\#4005](https://github.com/apache/arrow-datafusion/issues/4005) -- Wrong results when parquet page index filtering is enabled [\#4002](https://github.com/apache/arrow-datafusion/issues/4002) -- Output schema of semi join has invalid projection added after HashBuildProbeOrder [\#4001](https://github.com/apache/arrow-datafusion/issues/4001) -- `async` deserialization functions are unintuitive and possibly insecure [\#3977](https://github.com/apache/arrow-datafusion/issues/3977) -- `Expr::to_bytes` can produce output that hits `Expr::from_bytes` recursion limit [\#3968](https://github.com/apache/arrow-datafusion/issues/3968) -- Bug on propagating arrow field metadata [\#3964](https://github.com/apache/arrow-datafusion/issues/3964) -- Predicate still has cast when comparing Timestamp\(Nano, None\) to a timestamp literal, so can't be pushed down or used for pruning [\#3938](https://github.com/apache/arrow-datafusion/issues/3938) -- Error using `IN` list on dictionary encoded data: `InList does not support datatype Dictionary(Int32, Utf8).` [\#3936](https://github.com/apache/arrow-datafusion/issues/3936) -- Internal error in CAST from Timestamp\[us\] [\#3922](https://github.com/apache/arrow-datafusion/issues/3922) -- ScalarValue not implemented for FixedSizeBinary types [\#3910](https://github.com/apache/arrow-datafusion/issues/3910) -- \[DOC\] - There are unsupported DDL in the official documentation [\#3904](https://github.com/apache/arrow-datafusion/issues/3904) -- datafusion-proto deserialize with Substring\(str \[from int\] \[for int\]\) fails [\#3901](https://github.com/apache/arrow-datafusion/issues/3901) -- `count(Literal)` gives wrong column name [\#3891](https://github.com/apache/arrow-datafusion/issues/3891) -- `projection_push_down` adds duplicate projections with multiple passes [\#3881](https://github.com/apache/arrow-datafusion/issues/3881) -- Default physical planner generates empty relation for DROP TABLE, CREATE MEMORY TABLE, etc [\#3873](https://github.com/apache/arrow-datafusion/issues/3873) -- Binary expression canonical names are incorrect in some cases [\#3865](https://github.com/apache/arrow-datafusion/issues/3865) -- Using the window function lag causes panic. [\#3830](https://github.com/apache/arrow-datafusion/issues/3830) -- chrono crate : specify 0.4.22 as the minimum version due to spurious build failures [\#3827](https://github.com/apache/arrow-datafusion/issues/3827) -- datafusion-proto deserialize with q16 sql fails [\#3820](https://github.com/apache/arrow-datafusion/issues/3820) -- Filter predicates should not be aliased [\#3795](https://github.com/apache/arrow-datafusion/issues/3795) -- Write csv not save all lines of dataframe [\#3783](https://github.com/apache/arrow-datafusion/issues/3783) -- Regression in simplifying expressions in subqueries [\#3760](https://github.com/apache/arrow-datafusion/issues/3760) -- DataFusionError\(Internal\("The size of the sorted batch is larger than the size of the input batch: 2120 \> 2312"\)\) [\#3747](https://github.com/apache/arrow-datafusion/issues/3747) -- "labeler" PR check is broken [\#3743](https://github.com/apache/arrow-datafusion/issues/3743) -- `DataFrame::select_columns` doesn't work with names containing "." [\#3733](https://github.com/apache/arrow-datafusion/issues/3733) -- TPC-H Query 1 has regressed [\#3729](https://github.com/apache/arrow-datafusion/issues/3729) -- \[RUST\]\[Datafusion\] What causes "Error: Execution\("file size of 4 is less than footer"\)" error? [\#3800](https://github.com/apache/arrow-datafusion/issues/3800) -- Field names containing periods such as f.c cannot work [\#3682](https://github.com/apache/arrow-datafusion/issues/3682) -- TableProvider implementation for DataFrame does not support filter pushdown [\#3681](https://github.com/apache/arrow-datafusion/issues/3681) -- using Decimal\(0\) make system panicked [\#3665](https://github.com/apache/arrow-datafusion/issues/3665) -- Cannot query some parquet files in S3, but they work locally [\#3633](https://github.com/apache/arrow-datafusion/issues/3633) -- ` col / col` returns `1` when `col = 0` [\#3615](https://github.com/apache/arrow-datafusion/issues/3615) -- register\_csv allow space in table\_path [\#3589](https://github.com/apache/arrow-datafusion/issues/3589) -- Hardcoded u64 for WindowFrameBound fields [\#3571](https://github.com/apache/arrow-datafusion/issues/3571) -- `docs.rs` cannot build `datafusion-proto` crate [\#3538](https://github.com/apache/arrow-datafusion/issues/3538) -- Row Hash loads whole aggregation state to memory before sending [\#3460](https://github.com/apache/arrow-datafusion/issues/3460) -- approx\_percentile\_cont return wrong result when scan multi parquet files. [\#3140](https://github.com/apache/arrow-datafusion/issues/3140) -- User guide is incorrect regarding using CLI to register CSV files using schema inference [\#3001](https://github.com/apache/arrow-datafusion/issues/3001) -- Exception: Internal error, Exception: Schema error [\#2938](https://github.com/apache/arrow-datafusion/issues/2938) -- Version 0.6.0 Panic error during SQL execution [\#2738](https://github.com/apache/arrow-datafusion/issues/2738) -- wrong result when operation parquet [\#2044](https://github.com/apache/arrow-datafusion/issues/2044) -- Local object store accepts file:/// as base path, but LocalStore returns meta without the prefix. [\#1923](https://github.com/apache/arrow-datafusion/issues/1923) -- Reading nested parquet files results in `index out of bounds` [\#1383](https://github.com/apache/arrow-datafusion/issues/1383) -- `-` \(negation\) with NULL literals does not work: can't be evaluated because the expression's type is Utf8, not signed [\#1192](https://github.com/apache/arrow-datafusion/issues/1192) -- Inconsistent cast behavior [\#957](https://github.com/apache/arrow-datafusion/issues/957) -- single\_distinct\_to\_groupby no longer drops qualifiers [\#4050](https://github.com/apache/arrow-datafusion/pull/4050) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) - -**Documentation updates:** - -- Clarify in docs that Identifiers are made lower-case in SQL query [\#2374](https://github.com/apache/arrow-datafusion/issues/2374) -- Fix broken links in contributor guide [\#3956](https://github.com/apache/arrow-datafusion/pull/3956) ([Jefffrey](https://github.com/Jefffrey)) -- add create view explanation [\#3925](https://github.com/apache/arrow-datafusion/pull/3925) ([retikulum](https://github.com/retikulum)) -- Update `datafusion-examples` README [\#3814](https://github.com/apache/arrow-datafusion/pull/3814) ([alamb](https://github.com/alamb)) -- Add Seafowl to list of projects using DataFusion [\#3792](https://github.com/apache/arrow-datafusion/pull/3792) ([mildbyte](https://github.com/mildbyte)) - -**Closed issues:** - -- \[QUESTION\] How many times should be the function `create_name` called when executing a query? [\#3900](https://github.com/apache/arrow-datafusion/issues/3900) -- Improve the `Expr` string format [\#3878](https://github.com/apache/arrow-datafusion/issues/3878) -- Simplify division by zero \(division by one / multiplication by zero / multiplication by one\) for Decimal types as well [\#3643](https://github.com/apache/arrow-datafusion/issues/3643) -- InList: merge check branch [\#2833](https://github.com/apache/arrow-datafusion/issues/2833) -- Optimization InList: compare the float data type using OrderedFloat\ [\#2831](https://github.com/apache/arrow-datafusion/issues/2831) -- Outdated section of the add function of the contribution guide [\#2560](https://github.com/apache/arrow-datafusion/issues/2560) -- Optimize InList implementation with native types rather than ScalarValue [\#2165](https://github.com/apache/arrow-datafusion/issues/2165) -- Improve testing of optimizers using EXPLAIN [\#1118](https://github.com/apache/arrow-datafusion/issues/1118) -- Crash on parsing sql query with Cyrillic letters [\#184](https://github.com/apache/arrow-datafusion/issues/184) -- \[EPIC\] Support all TPC-H queries in benchmark [\#158](https://github.com/apache/arrow-datafusion/issues/158) -- Implement optional second argument to ltrim and rtrim functions [\#144](https://github.com/apache/arrow-datafusion/issues/144) -- Benchmark crate does not have a SIMD feature [\#124](https://github.com/apache/arrow-datafusion/issues/124) -- ColumnarValue::into\_array should not require batch [\#113](https://github.com/apache/arrow-datafusion/issues/113) -- \[Rust\] Parquet data source does not support complex types [\#83](https://github.com/apache/arrow-datafusion/issues/83) - -**Merged pull requests:** - -- Appease new clippy [\#4101](https://github.com/apache/arrow-datafusion/pull/4101) ([alamb](https://github.com/alamb)) -- minor: Split parquet reader up into smaller modules [\#4099](https://github.com/apache/arrow-datafusion/pull/4099) ([alamb](https://github.com/alamb)) -- \[MINOR\] Update `SET` in cli.md [\#4098](https://github.com/apache/arrow-datafusion/pull/4098) ([waitingkuo](https://github.com/waitingkuo)) -- fix: Scheduler panic routing errors [\#4097](https://github.com/apache/arrow-datafusion/pull/4097) ([yukkit](https://github.com/yukkit)) -- Automatically register tables if ObjectStore root is configured [\#4095](https://github.com/apache/arrow-datafusion/pull/4095) ([avantgardnerio](https://github.com/avantgardnerio)) -- minor: Use Operator::swap [\#4092](https://github.com/apache/arrow-datafusion/pull/4092) ([alamb](https://github.com/alamb)) -- Simplify small InListExpr [\#4090](https://github.com/apache/arrow-datafusion/pull/4090) ([Dandandan](https://github.com/Dandandan)) -- Minor: Add arrow-rs ticket reference and turn some comments into docstrings [\#4088](https://github.com/apache/arrow-datafusion/pull/4088) ([alamb](https://github.com/alamb)) -- Support Dictionary in InListExpr [\#4070](https://github.com/apache/arrow-datafusion/pull/4070) ([tustvold](https://github.com/tustvold)) -- support `SET` variable [\#4069](https://github.com/apache/arrow-datafusion/pull/4069) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) -- Add in list bench [\#4068](https://github.com/apache/arrow-datafusion/pull/4068) ([tustvold](https://github.com/tustvold)) -- Improve Error Handling and Readibility for downcasting `StructArray` [\#4061](https://github.com/apache/arrow-datafusion/pull/4061) ([retikulum](https://github.com/retikulum)) -- Build tests separately from running [\#4060](https://github.com/apache/arrow-datafusion/pull/4060) ([alamb](https://github.com/alamb)) -- Simplify InListExpr ~20-70% Faster [\#4057](https://github.com/apache/arrow-datafusion/pull/4057) ([tustvold](https://github.com/tustvold)) -- MINOR: Print unoptimized logical plan in execute\_query of tpch benchmark [\#4056](https://github.com/apache/arrow-datafusion/pull/4056) ([viirya](https://github.com/viirya)) -- Minor: clean the code in `eliminate_filter` [\#4055](https://github.com/apache/arrow-datafusion/pull/4055) ([HaoYang670](https://github.com/HaoYang670)) -- Implement `current_time` scalar function [\#4054](https://github.com/apache/arrow-datafusion/pull/4054) ([naosense](https://github.com/naosense)) -- Cleanup hash\_utils adding support for decimal256 and f16 [\#4053](https://github.com/apache/arrow-datafusion/pull/4053) ([tustvold](https://github.com/tustvold)) -- Fix multicolumn parquet predicate pushdown \(\#4046\) [\#4048](https://github.com/apache/arrow-datafusion/pull/4048) ([tustvold](https://github.com/tustvold)) -- Add CI checks that we can serde all benchmark queries [\#4047](https://github.com/apache/arrow-datafusion/pull/4047) ([andygrove](https://github.com/andygrove)) -- Enable more benchmark verification tests [\#4044](https://github.com/apache/arrow-datafusion/pull/4044) ([andygrove](https://github.com/andygrove)) -- Extract common parquet testing code to `parquet-test-util` crate [\#4042](https://github.com/apache/arrow-datafusion/pull/4042) ([alamb](https://github.com/alamb)) -- add uuid\(\) function [\#4041](https://github.com/apache/arrow-datafusion/pull/4041) ([Jimexist](https://github.com/Jimexist)) -- Update to arrow 26, change timezones [\#4039](https://github.com/apache/arrow-datafusion/pull/4039) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Fix Decimal and Floating type coerce rule [\#4038](https://github.com/apache/arrow-datafusion/pull/4038) ([viirya](https://github.com/viirya)) -- Reserve the literal expression of `Count` function [\#4031](https://github.com/apache/arrow-datafusion/pull/4031) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Implement current\_date scalar function [\#4022](https://github.com/apache/arrow-datafusion/pull/4022) ([comphead](https://github.com/comphead)) -- Fix predicate pushdown bugs: project columns within DatafusionArrowPredicate \(\#4005\) \(\#4006\) [\#4021](https://github.com/apache/arrow-datafusion/pull/4021) ([tustvold](https://github.com/tustvold)) -- minor: remove redundant code/TODO [\#4019](https://github.com/apache/arrow-datafusion/pull/4019) ([jackwener](https://github.com/jackwener)) -- Add CI check to verify that benchmark queries return the expected results [\#4015](https://github.com/apache/arrow-datafusion/pull/4015) ([andygrove](https://github.com/andygrove)) -- Minor: Add TODO and tracking ticket reference [\#4012](https://github.com/apache/arrow-datafusion/pull/4012) ([alamb](https://github.com/alamb)) -- Add right anti join support and support it in HashBuildProbeOrder [\#4011](https://github.com/apache/arrow-datafusion/pull/4011) ([Dandandan](https://github.com/Dandandan)) -- MINOR: Generate expected benchmark query results [\#4010](https://github.com/apache/arrow-datafusion/pull/4010) ([andygrove](https://github.com/andygrove)) -- Minor: remove unecessary clippy allow [\#4008](https://github.com/apache/arrow-datafusion/pull/4008) ([alamb](https://github.com/alamb)) -- Minor: Do what clippy says and clean up some code [\#4007](https://github.com/apache/arrow-datafusion/pull/4007) ([alamb](https://github.com/alamb)) -- Improve Error Handling and Readibility for downcasting `Date32Array` [\#4004](https://github.com/apache/arrow-datafusion/pull/4004) ([retikulum](https://github.com/retikulum)) -- Don't add projection for semi joins in HashBuildProbeOrder [\#4000](https://github.com/apache/arrow-datafusion/pull/4000) ([Dandandan](https://github.com/Dandandan)) -- Minor: use `DataType::is_nested` [\#3995](https://github.com/apache/arrow-datafusion/pull/3995) ([alamb](https://github.com/alamb)) -- \[minor\] bump prettier version [\#3992](https://github.com/apache/arrow-datafusion/pull/3992) ([Jimexist](https://github.com/Jimexist)) -- Add parquet predicate pushdown metrics [\#3989](https://github.com/apache/arrow-datafusion/pull/3989) ([alamb](https://github.com/alamb)) -- Pin datafusion-proto build dependencies [\#3987](https://github.com/apache/arrow-datafusion/pull/3987) ([tustvold](https://github.com/tustvold)) -- Add TableProvider.statistics method [\#3986](https://github.com/apache/arrow-datafusion/pull/3986) ([andygrove](https://github.com/andygrove)) -- Add Pull Request guidelines to contributor guide [\#3985](https://github.com/apache/arrow-datafusion/pull/3985) ([alamb](https://github.com/alamb)) -- Update protos [\#3979](https://github.com/apache/arrow-datafusion/pull/3979) ([tustvold](https://github.com/tustvold)) -- Revert async changes but keep deltalake working [\#3978](https://github.com/apache/arrow-datafusion/pull/3978) ([avantgardnerio](https://github.com/avantgardnerio)) -- Correctness integration test for parquet filter pushdown [\#3976](https://github.com/apache/arrow-datafusion/pull/3976) ([alamb](https://github.com/alamb)) -- MINOR: Stop pretty printing batches in benchmark when there are no results [\#3974](https://github.com/apache/arrow-datafusion/pull/3974) ([andygrove](https://github.com/andygrove)) -- MINOR: Re-export Cast struct [\#3971](https://github.com/apache/arrow-datafusion/pull/3971) ([andygrove](https://github.com/andygrove)) -- fix: check recursion limit in `Expr::to_bytes` [\#3970](https://github.com/apache/arrow-datafusion/pull/3970) ([crepererum](https://github.com/crepererum)) -- \[Part1\] Partition and Sort Enforcement, PhysicalExpr enhancement [\#3969](https://github.com/apache/arrow-datafusion/pull/3969) ([mingmwang](https://github.com/mingmwang)) -- Support pushdown multi-columns in PageIndex pruning. [\#3967](https://github.com/apache/arrow-datafusion/pull/3967) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Fix benchmarks README formatting [\#3966](https://github.com/apache/arrow-datafusion/pull/3966) ([Jefffrey](https://github.com/Jefffrey)) -- Bug fix on DFField to Field conversion: preserve metadata [\#3965](https://github.com/apache/arrow-datafusion/pull/3965) ([metesynnada](https://github.com/metesynnada)) -- Informative Error Message for LAG and LEAD functions [\#3963](https://github.com/apache/arrow-datafusion/pull/3963) ([mustafasrepo](https://github.com/mustafasrepo)) -- Minor: Add some docstrings to `FileScanConfig` and `RuntimeEnv` [\#3962](https://github.com/apache/arrow-datafusion/pull/3962) ([alamb](https://github.com/alamb)) -- Move common code used for testing code into datafusion/test\_utils [\#3961](https://github.com/apache/arrow-datafusion/pull/3961) ([alamb](https://github.com/alamb)) -- Update minimum chrono dependency to 0.4.22 [\#3959](https://github.com/apache/arrow-datafusion/pull/3959) ([alamb](https://github.com/alamb)) -- Implement right semi join and support in HashBuildProbeorder [\#3958](https://github.com/apache/arrow-datafusion/pull/3958) ([Dandandan](https://github.com/Dandandan)) -- Print the configurations of ConfigOptions in an ordered way so that we can directly compare the equality of two ConfigOptions by their debug strings [\#3953](https://github.com/apache/arrow-datafusion/pull/3953) ([yahoNanJing](https://github.com/yahoNanJing)) -- Vendor Generated Protobuf Code \(\#3947\) [\#3950](https://github.com/apache/arrow-datafusion/pull/3950) ([tustvold](https://github.com/tustvold)) -- Implement serialization for ScalarValue::FixedSizeBinary [\#3943](https://github.com/apache/arrow-datafusion/pull/3943) ([retikulum](https://github.com/retikulum)) -- Consolidate physical join code into `datafusion/core/src/physical_plan/joins` [\#3942](https://github.com/apache/arrow-datafusion/pull/3942) ([alamb](https://github.com/alamb)) -- Add optimizer test for simplifying predicates on timestamps [\#3939](https://github.com/apache/arrow-datafusion/pull/3939) ([alamb](https://github.com/alamb)) -- Add test for querying predicate on dictionary [\#3937](https://github.com/apache/arrow-datafusion/pull/3937) ([alamb](https://github.com/alamb)) -- fix: return error for unsupported SQL [\#3933](https://github.com/apache/arrow-datafusion/pull/3933) ([Kikkon](https://github.com/Kikkon)) -- doc: fix doc about `CREATE TABLE IF NOT EXISTS` [\#3932](https://github.com/apache/arrow-datafusion/pull/3932) ([jackwener](https://github.com/jackwener)) -- Refactor Expr::Cast to use a struct. [\#3931](https://github.com/apache/arrow-datafusion/pull/3931) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- minor: fix some typo. [\#3930](https://github.com/apache/arrow-datafusion/pull/3930) ([jackwener](https://github.com/jackwener)) -- chore: update cranelift-related dependencies [\#3926](https://github.com/apache/arrow-datafusion/pull/3926) ([xudong963](https://github.com/xudong963)) -- Change cast error from Internal to NotImplemented [\#3924](https://github.com/apache/arrow-datafusion/pull/3924) ([alamb](https://github.com/alamb)) -- Support inlining view / dataframes logical plan [\#3923](https://github.com/apache/arrow-datafusion/pull/3923) ([Dandandan](https://github.com/Dandandan)) -- Add test for Simplify redundant predicates [\#3915](https://github.com/apache/arrow-datafusion/pull/3915) ([src255](https://github.com/src255)) -- Implement ScalarValue for FixedSizeBinary [\#3911](https://github.com/apache/arrow-datafusion/pull/3911) ([maxburke](https://github.com/maxburke)) -- Add serde for plans with tables from `TableProviderFactory`s [\#3907](https://github.com/apache/arrow-datafusion/pull/3907) ([avantgardnerio](https://github.com/avantgardnerio)) -- Support filter/limit pushdown for views/dataframes [\#3905](https://github.com/apache/arrow-datafusion/pull/3905) ([Dandandan](https://github.com/Dandandan)) -- Factorize common AND factors out of OR predicates to support filterPu… [\#3903](https://github.com/apache/arrow-datafusion/pull/3903) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add `Substring(str [from int] [for int])` support in `datafusion-proto` [\#3902](https://github.com/apache/arrow-datafusion/pull/3902) ([r4ntix](https://github.com/r4ntix)) -- Revert "Factorize common AND factors out of OR predicates to supportfilter Pu… \(\#3859\)" [\#3897](https://github.com/apache/arrow-datafusion/pull/3897) ([alamb](https://github.com/alamb)) -- MINOR: Add notes on Apache Reporter [\#3893](https://github.com/apache/arrow-datafusion/pull/3893) ([andygrove](https://github.com/andygrove)) -- Allow configuring collection of statistics during TPC-H benchmarks [\#3889](https://github.com/apache/arrow-datafusion/pull/3889) ([isidentical](https://github.com/isidentical)) -- Improve formatting of binary expressions [\#3884](https://github.com/apache/arrow-datafusion/pull/3884) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Multiple optimizer passes [\#3880](https://github.com/apache/arrow-datafusion/pull/3880) ([andygrove](https://github.com/andygrove)) -- \[MINOR\] Update docs with newly added configuration values [\#3877](https://github.com/apache/arrow-datafusion/pull/3877) ([alamb](https://github.com/alamb)) -- \[MINOR\] Add a hint about how to resolve the `Cargo.lock` CI check [\#3876](https://github.com/apache/arrow-datafusion/pull/3876) ([alamb](https://github.com/alamb)) -- Add `LogicalPlan::ViewTable` support in `datafusion-proto` [\#3875](https://github.com/apache/arrow-datafusion/pull/3875) ([r4ntix](https://github.com/r4ntix)) -- Optimize the `concat_ws` function [\#3869](https://github.com/apache/arrow-datafusion/pull/3869) ([HaoYang670](https://github.com/HaoYang670)) -- Implement foundational filter selectivity analysis [\#3868](https://github.com/apache/arrow-datafusion/pull/3868) ([isidentical](https://github.com/isidentical)) -- Update `TableProviderFactory` trait to support real-world use-cases [\#3867](https://github.com/apache/arrow-datafusion/pull/3867) ([avantgardnerio](https://github.com/avantgardnerio)) -- put subquery's equal clause into join on clauses instead of filter cl… [\#3862](https://github.com/apache/arrow-datafusion/pull/3862) ([AssHero](https://github.com/AssHero)) -- Factorize common AND factors out of OR predicates to support filterPu… [\#3859](https://github.com/apache/arrow-datafusion/pull/3859) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Enable mimalloc by default in benchmark [\#3853](https://github.com/apache/arrow-datafusion/pull/3853) ([Dandandan](https://github.com/Dandandan)) -- Refactor `Expr::Between` to use a struct [\#3850](https://github.com/apache/arrow-datafusion/pull/3850) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) -- Handle cardinality estimation for disjoint inner and outer joins [\#3848](https://github.com/apache/arrow-datafusion/pull/3848) ([isidentical](https://github.com/isidentical)) -- Add setting for statistics collection [\#3846](https://github.com/apache/arrow-datafusion/pull/3846) ([Dandandan](https://github.com/Dandandan)) -- Update to arrow 25.0.0 [\#3844](https://github.com/apache/arrow-datafusion/pull/3844) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) -- Tweak list of optimization rules [\#3841](https://github.com/apache/arrow-datafusion/pull/3841) ([Dandandan](https://github.com/Dandandan)) -- Refactor Expr::GetIndexedField to use a struct [\#3838](https://github.com/apache/arrow-datafusion/pull/3838) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) -- Infer the count of maximum distinct values from min/max [\#3837](https://github.com/apache/arrow-datafusion/pull/3837) ([isidentical](https://github.com/isidentical)) -- Refactor `Expr::Like`, `Expr::ILike`, `Expr::SimilarTo` to use a struct [\#3836](https://github.com/apache/arrow-datafusion/pull/3836) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) -- Refactor Expr::BinaryExpr to use a struct [\#3835](https://github.com/apache/arrow-datafusion/pull/3835) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([zhoudongyan](https://github.com/zhoudongyan)) -- update postgres version to 15 in integration test [\#3831](https://github.com/apache/arrow-datafusion/pull/3831) ([Jimexist](https://github.com/Jimexist)) -- Fix the panic when lpad/rpad parameter is negative [\#3829](https://github.com/apache/arrow-datafusion/pull/3829) ([ZuoTiJia](https://github.com/ZuoTiJia)) -- MINOR: Document SHOW ALL in the users guide [\#3826](https://github.com/apache/arrow-datafusion/pull/3826) ([alamb](https://github.com/alamb)) -- MINOR: Add datafusion-cli documentation on showing configuration [\#3825](https://github.com/apache/arrow-datafusion/pull/3825) ([alamb](https://github.com/alamb)) -- Add/Remove Division Rules [\#3824](https://github.com/apache/arrow-datafusion/pull/3824) ([retikulum](https://github.com/retikulum)) -- Minor: Sort the output of SHOW ALL by config name [\#3823](https://github.com/apache/arrow-datafusion/pull/3823) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Add `precision != 0` check when making decimal type [\#3818](https://github.com/apache/arrow-datafusion/pull/3818) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Infer schema when running benchmarks against parquet [\#3817](https://github.com/apache/arrow-datafusion/pull/3817) ([andygrove](https://github.com/andygrove)) -- Finish removing deprecated `datafusion::logical_plan` module [\#3816](https://github.com/apache/arrow-datafusion/pull/3816) ([andygrove](https://github.com/andygrove)) -- Clarify initial example with respect to capitalization [\#3815](https://github.com/apache/arrow-datafusion/pull/3815) ([alamb](https://github.com/alamb)) -- Improve expression simplification by running it twice [\#3811](https://github.com/apache/arrow-datafusion/pull/3811) ([alamb](https://github.com/alamb)) -- Make expression manipulation consistent and easier to use: `combine/split filter` `conjunction`, etc [\#3810](https://github.com/apache/arrow-datafusion/pull/3810) ([alamb](https://github.com/alamb)) -- Consolidate expression manipulation functions into `datafusion_optimizer` [\#3809](https://github.com/apache/arrow-datafusion/pull/3809) ([alamb](https://github.com/alamb)) -- Optimize `regexp_replace` when the input is a sparse array [\#3804](https://github.com/apache/arrow-datafusion/pull/3804) ([isidentical](https://github.com/isidentical)) -- Stop ignoring errors when writing DataFrame to csv, parquet, json [\#3801](https://github.com/apache/arrow-datafusion/pull/3801) ([andygrove](https://github.com/andygrove)) -- Update datafusion-cli Cargo.lock to fix CI check on master [\#3799](https://github.com/apache/arrow-datafusion/pull/3799) ([alamb](https://github.com/alamb)) -- MINOR: Benchmark regression tests [\#3790](https://github.com/apache/arrow-datafusion/pull/3790) ([andygrove](https://github.com/andygrove)) -- MINOR: Optimizer example and docs, deprecate `Expr::name` [\#3788](https://github.com/apache/arrow-datafusion/pull/3788) ([andygrove](https://github.com/andygrove)) -- Join cardinality computation for cost-based nested join optimizations [\#3787](https://github.com/apache/arrow-datafusion/pull/3787) ([isidentical](https://github.com/isidentical)) -- Optimizer now simplifies multiplication, division, module arg is a literal Decimal zero or one [\#3782](https://github.com/apache/arrow-datafusion/pull/3782) ([drrtuy](https://github.com/drrtuy)) -- Implement parquet page-level skipping with column index, using min/ma… [\#3780](https://github.com/apache/arrow-datafusion/pull/3780) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Bump actions/labeler from 4.0.1 to 4.0.2 [\#3779](https://github.com/apache/arrow-datafusion/pull/3779) ([dependabot[bot]](https://github.com/apps/dependabot)) -- MINOR: correct `ListingOptions.try_new` docs to include the enabled stat collection [\#3775](https://github.com/apache/arrow-datafusion/pull/3775) ([isidentical](https://github.com/isidentical)) -- Teach a negative NULL expression to return NULL instead of an error [\#3771](https://github.com/apache/arrow-datafusion/pull/3771) ([drrtuy](https://github.com/drrtuy)) -- Add benchmarks for testing row filtering [\#3769](https://github.com/apache/arrow-datafusion/pull/3769) ([thinkharderdev](https://github.com/thinkharderdev)) -- move type coercion of agg and agg\_udaf to logical phase [\#3768](https://github.com/apache/arrow-datafusion/pull/3768) ([liukun4515](https://github.com/liukun4515)) -- User Guide: Add `EXPLAIN` to SQL reference [\#3767](https://github.com/apache/arrow-datafusion/pull/3767) ([unvalley](https://github.com/unvalley)) -- Allow specialized implementations to produce hints for the array adapter [\#3765](https://github.com/apache/arrow-datafusion/pull/3765) ([isidentical](https://github.com/isidentical)) -- Fix optimizer regression with simplifying expressions in subquery filters [\#3764](https://github.com/apache/arrow-datafusion/pull/3764) ([andygrove](https://github.com/andygrove)) -- Run all `datafusion-examples` in CI tests [\#3761](https://github.com/apache/arrow-datafusion/pull/3761) ([alamb](https://github.com/alamb)) -- MINOR: Remove deprecated module `datafusion::logical_plan::plan` [\#3759](https://github.com/apache/arrow-datafusion/pull/3759) ([andygrove](https://github.com/andygrove)) -- Refactor `Expr::Case` to use a struct [\#3757](https://github.com/apache/arrow-datafusion/pull/3757) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Do not run labeler CI check if it would fail due to permissions [\#3756](https://github.com/apache/arrow-datafusion/pull/3756) ([alamb](https://github.com/alamb)) -- MINOR: Improvements to `scalar_subquery_to_join` error handling [\#3754](https://github.com/apache/arrow-datafusion/pull/3754) ([andygrove](https://github.com/andygrove)) -- Always track the final size of the in-mem sorted arrays [\#3753](https://github.com/apache/arrow-datafusion/pull/3753) ([isidentical](https://github.com/isidentical)) -- Fix DataFrame::select\_columns to handle column names with a period [\#3751](https://github.com/apache/arrow-datafusion/pull/3751) ([zhoudongyan](https://github.com/zhoudongyan)) -- Fix `ListingTableUrl` to decode percent [\#3750](https://github.com/apache/arrow-datafusion/pull/3750) ([unvalley](https://github.com/unvalley)) -- remove `type coercion` for physical ScalarFunction [\#3749](https://github.com/apache/arrow-datafusion/pull/3749) ([liukun4515](https://github.com/liukun4515)) -- CI: Add a new run to check whether `datafusion-cli` lock file is up-to-date [\#3745](https://github.com/apache/arrow-datafusion/pull/3745) ([isidentical](https://github.com/isidentical)) -- Add datafusion example of expression apis [\#3741](https://github.com/apache/arrow-datafusion/pull/3741) ([alamb](https://github.com/alamb)) -- fix subquery where exists distinct [\#3732](https://github.com/apache/arrow-datafusion/pull/3732) ([b41sh](https://github.com/b41sh)) -- Remove some uneeded code in `CommonSubexprEliminate` [\#3730](https://github.com/apache/arrow-datafusion/pull/3730) ([alamb](https://github.com/alamb)) -- Consolidate and better tests for expression re-rewriting / aliasing [\#3727](https://github.com/apache/arrow-datafusion/pull/3727) ([alamb](https://github.com/alamb)) -- Fix output schema generated by CommonSubExprEliminate [\#3726](https://github.com/apache/arrow-datafusion/pull/3726) ([alex-natzka](https://github.com/alex-natzka)) -- Add type coercion rule for `concat` and `concat_ws` [\#3721](https://github.com/apache/arrow-datafusion/pull/3721) ([HaoYang670](https://github.com/HaoYang670)) -- Expose and document a simpler public API for simplify expressions [\#3719](https://github.com/apache/arrow-datafusion/pull/3719) ([ygf11](https://github.com/ygf11)) -- Remove dead code in `UnwrapCastExprRewriter` that may mask errors [\#3703](https://github.com/apache/arrow-datafusion/pull/3703) ([alamb](https://github.com/alamb)) -- Fix `DataFrame::with_column` to handle creating column names with a period [\#3700](https://github.com/apache/arrow-datafusion/pull/3700) ([alamb](https://github.com/alamb)) -- Add simplification rules for the `CONCAT` function [\#3684](https://github.com/apache/arrow-datafusion/pull/3684) ([HaoYang670](https://github.com/HaoYang670)) -- Compressed CSV/JSON support [\#3642](https://github.com/apache/arrow-datafusion/pull/3642) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Licht-T](https://github.com/Licht-T)) -- Simplify serialization by removing redundant `PrimitiveScalarValue` [\#3612](https://github.com/apache/arrow-datafusion/pull/3612) ([alamb](https://github.com/alamb)) -- Pushdown single column predicates from ON join clauses [\#3578](https://github.com/apache/arrow-datafusion/pull/3578) ([AssHero](https://github.com/AssHero)) -- Simplify the serialization of `ScalarValue::List` [\#3547](https://github.com/apache/arrow-datafusion/pull/3547) ([alamb](https://github.com/alamb)) -- Generate hash aggregation output in smaller record batches [\#3461](https://github.com/apache/arrow-datafusion/pull/3461) ([milenkovicm](https://github.com/milenkovicm)) -- Improve doc on lowercase treatment of columns on SQL [\#3385](https://github.com/apache/arrow-datafusion/pull/3385) ([nanicpc](https://github.com/nanicpc)) - -## [13.0.0-rc1](https://github.com/apache/arrow-datafusion/tree/13.0.0-rc1) (2022-10-07) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/13.0.0...13.0.0-rc1) - - -## [13.0.0](https://github.com/apache/arrow-datafusion/tree/13.0.0) (2022-10-06) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/12.0.0...13.0.0) - -**Breaking changes:** - -- Make ObjectStoreProvider fallible \(return `Result` rather than `Option`\) [\#3584](https://github.com/apache/arrow-datafusion/pull/3584) ([tustvold](https://github.com/tustvold)) -- Make `OptimizerConfig` a builder style API [\#3525](https://github.com/apache/arrow-datafusion/pull/3525) ([alamb](https://github.com/alamb)) - -**Implemented enhancements:** - -- remove `type coercion` for ScalarUDF in the physical phase [\#3734](https://github.com/apache/arrow-datafusion/issues/3734) -- Allow with statements to specify their columns alongside their expression names [\#3716](https://github.com/apache/arrow-datafusion/issues/3716) -- Support SQLDataType::Timestamp\(TimezoneInfo\) [\#3693](https://github.com/apache/arrow-datafusion/issues/3693) -- support `type coercion` for case when expr [\#3673](https://github.com/apache/arrow-datafusion/issues/3673) -- Add simplification rules for the `Modulo` operator [\#3664](https://github.com/apache/arrow-datafusion/issues/3664) -- Add TIMESTAMPTZ [\#3659](https://github.com/apache/arrow-datafusion/issues/3659) -- Simplify `A * 0` and `A * null`. [\#3626](https://github.com/apache/arrow-datafusion/issues/3626) -- change rule of `PreCastLitInComparisonExpressions` to unwrap cast rule after \#3582 [\#3622](https://github.com/apache/arrow-datafusion/issues/3622) -- Optimize regex\_replace with a known pattern / replacement [\#3613](https://github.com/apache/arrow-datafusion/issues/3613) -- Simplify `CONCAT_WS(NULL, ..)` to `NULL` [\#3607](https://github.com/apache/arrow-datafusion/issues/3607) -- Add OctoSQL to list of systems powered by DataFusion [\#3605](https://github.com/apache/arrow-datafusion/issues/3605) -- Prevent over-allocation \(and spills\) on TopK queries [\#3596](https://github.com/apache/arrow-datafusion/issues/3596) -- Allow ObjectStoreProvider to return None \(return Result\ rather than Result\) [\#3594](https://github.com/apache/arrow-datafusion/issues/3594) -- simplify between expr should consider the data type [\#3587](https://github.com/apache/arrow-datafusion/issues/3587) -- make type coercion simple and remove the evaluate logic [\#3585](https://github.com/apache/arrow-datafusion/issues/3585) -- ReduceOuterJoin optimizer support `cast or try_cast` expr. [\#3565](https://github.com/apache/arrow-datafusion/issues/3565) -- Support type coercion for subquery [\#3557](https://github.com/apache/arrow-datafusion/issues/3557) -- Make `ParquetScanOptions` public and expose a reference to the scan options from `ParquetExec` [\#3550](https://github.com/apache/arrow-datafusion/issues/3550) -- Use `fetch` limit in `get_sorted_iter` [\#3544](https://github.com/apache/arrow-datafusion/issues/3544) -- Push limit to sort [\#3528](https://github.com/apache/arrow-datafusion/issues/3528) -- Execute sorts in parallel when limit is used after sort [\#3526](https://github.com/apache/arrow-datafusion/issues/3526) -- Consolidate optimizer passes in optimizer module for better testing [\#3524](https://github.com/apache/arrow-datafusion/issues/3524) -- Support Top-K query optimization for `ORDER BY \ \[ASC [\#3515](https://github.com/apache/arrow-datafusion/issues/3515) -- support the type coercion for `like` `unlike` `istrue` `isfalse` `isunknown` [\#3509](https://github.com/apache/arrow-datafusion/issues/3509) -- Automate the pushing of releases to Homebrew [\#3506](https://github.com/apache/arrow-datafusion/issues/3506) -- Add extra DATE\_PART units that are already supported in arrow-rs [\#3502](https://github.com/apache/arrow-datafusion/issues/3502) -- Release datafusion-cli 12.0.0 on Homebrew [\#3501](https://github.com/apache/arrow-datafusion/issues/3501) -- Make `from_proto_binary_op` public [\#3489](https://github.com/apache/arrow-datafusion/issues/3489) -- coercion between decimal and other types lacking, compared to other numeric types [\#3479](https://github.com/apache/arrow-datafusion/issues/3479) -- move type coercion for inlist from physical phase to logical phase [\#3468](https://github.com/apache/arrow-datafusion/issues/3468) -- Make `datafusion::physical_plan::file_format::file_strean::FileStream` public [\#3466](https://github.com/apache/arrow-datafusion/issues/3466) -- Support using offset index in `ParquetRecordBatchStream` when pushing down `RowFilter` [\#3456](https://github.com/apache/arrow-datafusion/issues/3456) -- Support timestamp data type in In\_list node [\#3449](https://github.com/apache/arrow-datafusion/issues/3449) -- Evaluate expressions after type coercion [\#3431](https://github.com/apache/arrow-datafusion/issues/3431) -- Make a convenience function to register a single `RecordBatch` as a table from SessionContext [\#3426](https://github.com/apache/arrow-datafusion/issues/3426) -- add datafusion-cli support of external table locations that object\_store supports [\#3424](https://github.com/apache/arrow-datafusion/issues/3424) -- pruning support cast/try\_cast expr [\#3414](https://github.com/apache/arrow-datafusion/issues/3414) -- Add documentation on querying against files in object store such as S3 [\#3399](https://github.com/apache/arrow-datafusion/issues/3399) -- Remove type-coercion from physical planner [\#3388](https://github.com/apache/arrow-datafusion/issues/3388) -- support `Statement::ShowVariable` to show session configs [\#3364](https://github.com/apache/arrow-datafusion/issues/3364) -- Support `RowFilter` in `ParquetExec` [\#3360](https://github.com/apache/arrow-datafusion/issues/3360) -- Apply `TypeCoercion` rule before `FilterPushDown` [\#3289](https://github.com/apache/arrow-datafusion/issues/3289) -- Add support for `get` / `show` timezone [\#3255](https://github.com/apache/arrow-datafusion/issues/3255) -- Consider adding DataFusion to ClickBench benchmarks [\#2902](https://github.com/apache/arrow-datafusion/issues/2902) -- `filter_push_down` panics on semi/anti join with join filters [\#2888](https://github.com/apache/arrow-datafusion/issues/2888) -- Migrate the `cross join -> inner join optimization` from the planner to the optimizer [\#2859](https://github.com/apache/arrow-datafusion/issues/2859) -- ObjectStore write support [\#2185](https://github.com/apache/arrow-datafusion/issues/2185) -- DataFusion should scan Parquet statistics once per query [\#871](https://github.com/apache/arrow-datafusion/issues/871) -- Extend & generalize constant folding / evaluation in logical optimizer [\#237](https://github.com/apache/arrow-datafusion/issues/237) - -**Fixed bugs:** - -- `projection_push_down` produces invalid aggregate plans in some cases [\#3738](https://github.com/apache/arrow-datafusion/issues/3738) -- `Time With Time Zone` should raise error until `DataType::Time64` support tz [\#3715](https://github.com/apache/arrow-datafusion/issues/3715) -- SQL Planner doesn't distinguish normal CTEs from the recursive ones. [\#3713](https://github.com/apache/arrow-datafusion/issues/3713) -- Fix inconsistency between column name formats [\#3711](https://github.com/apache/arrow-datafusion/issues/3711) -- Optimizer rule 'projection\_push\_down' failed due to unexpected error: Error during planning: Aggregate schema has wrong number of fields. Expected 3 got 8 [\#3704](https://github.com/apache/arrow-datafusion/issues/3704) -- Optimizer regressions in `unwrap_cast_in_comparison` [\#3690](https://github.com/apache/arrow-datafusion/issues/3690) -- Internal error when evaluating a predicate = "The type of Dictionary\(Int16, Utf8\) = Int64 of binary physical should be same" [\#3685](https://github.com/apache/arrow-datafusion/issues/3685) -- Specialized regexp\_replace should early-abort when the the input arrays are empty [\#3647](https://github.com/apache/arrow-datafusion/issues/3647) -- Internal error: Failed to coerce types Decimal128\(10, 2\) and Boolean in BETWEEN expression [\#3646](https://github.com/apache/arrow-datafusion/issues/3646) -- Internal error: Failed to coerce types Decimal128\(10, 2\) and Boolean in BETWEEN expression [\#3645](https://github.com/apache/arrow-datafusion/issues/3645) -- Type coercion error: The type of Boolean AND Decimal128\(10, 2\) of binary physical should be same [\#3644](https://github.com/apache/arrow-datafusion/issues/3644) -- LEFT JOIN not working as expected, error message is confusing [\#3639](https://github.com/apache/arrow-datafusion/issues/3639) -- `INTERSECT` and `EXCEPT` don't return an error when 2 sets have the different number of columns [\#3632](https://github.com/apache/arrow-datafusion/issues/3632) -- The datafusion-cli panics when `union` 2 table with different number of columns. [\#3630](https://github.com/apache/arrow-datafusion/issues/3630) -- The expression `col(a) / null` is not optimized. [\#3624](https://github.com/apache/arrow-datafusion/issues/3624) -- `s3_build_error` test may fail in some environments [\#3601](https://github.com/apache/arrow-datafusion/issues/3601) -- New clippy errors appears to be break the CI on the master [\#3597](https://github.com/apache/arrow-datafusion/issues/3597) -- `StringConcat` gives inconsistent result with `concat` when containing `null` [\#3569](https://github.com/apache/arrow-datafusion/issues/3569) -- simplify\_expressions don't support different data type for binary [\#3556](https://github.com/apache/arrow-datafusion/issues/3556) -- Broken logical plan serialization for aggregation queries [\#3555](https://github.com/apache/arrow-datafusion/issues/3555) -- Aggregate filters do not get pushed down to table scan [\#3546](https://github.com/apache/arrow-datafusion/issues/3546) -- `docs.rs` cannot build `datafusion-proto` crate [\#3538](https://github.com/apache/arrow-datafusion/issues/3538) -- DataFusion serialization doesn't handle `ScalarValue::Dictionary, Binary, LargeBinary, Time64, IntervalMonthDayNano, Struct` [\#3531](https://github.com/apache/arrow-datafusion/issues/3531) -- What should be returned when trying to get a config in invalid format? [\#3505](https://github.com/apache/arrow-datafusion/issues/3505) -- Dividing decimal type gives wrong error: "170141183460469231731687303715884105727 is too large to store in a Decimal128 [\#3498](https://github.com/apache/arrow-datafusion/issues/3498) -- Add BitwiseXor in function `from_proto_binary_op` [\#3495](https://github.com/apache/arrow-datafusion/issues/3495) -- comparison operations with a scalar null and decimal array panics [\#3487](https://github.com/apache/arrow-datafusion/issues/3487) -- Union columns with different types [\#3467](https://github.com/apache/arrow-datafusion/issues/3467) -- Can't get the right logical plan after optimizer [\#3421](https://github.com/apache/arrow-datafusion/issues/3421) -- Fix conflict between simplify\_expression rule and CAST expressions [\#3409](https://github.com/apache/arrow-datafusion/issues/3409) -- Empty array giving error [\#2439](https://github.com/apache/arrow-datafusion/issues/2439) -- Internal error: Unsupported data type in hasher: FixedSizeBinary\(16\) [\#1516](https://github.com/apache/arrow-datafusion/issues/1516) -- Predicates on to\_timestamp do not work as expected with "naive" timestamp strings [\#765](https://github.com/apache/arrow-datafusion/issues/765) -- Address performance/execution plan of TPCH query 19 [\#78](https://github.com/apache/arrow-datafusion/issues/78) -- Bug fix: expr\_visitor was not visiting aggregate filter expressions [\#3548](https://github.com/apache/arrow-datafusion/pull/3548) ([andygrove](https://github.com/andygrove)) - -**Documentation updates:** - -- Publish 8.0.0 user guide [\#2558](https://github.com/apache/arrow-datafusion/issues/2558) -- MINOR: Add Dask SQL to list of projects powered by DataFusion [\#3581](https://github.com/apache/arrow-datafusion/pull/3581) ([andygrove](https://github.com/andygrove)) -- Add Parseable as Datafusion user [\#3471](https://github.com/apache/arrow-datafusion/pull/3471) ([nitisht](https://github.com/nitisht)) - -**Closed issues:** - -- Upgrade to Arrow 24.0.0 [\#3689](https://github.com/apache/arrow-datafusion/issues/3689) -- what's the best practice to get a single value from arrow array? [\#3497](https://github.com/apache/arrow-datafusion/issues/3497) -- The data type of predicate in the row filter should be same in the binary expr [\#3469](https://github.com/apache/arrow-datafusion/issues/3469) -- Extend constant folding and parquet filtering support [\#188](https://github.com/apache/arrow-datafusion/issues/188) -- Add FORMAT to explain plan and an easy to visualize format [\#96](https://github.com/apache/arrow-datafusion/issues/96) - -**Merged pull requests:** - -- Build aggregate schema in Aggregate::try\_new [\#3739](https://github.com/apache/arrow-datafusion/pull/3739) ([andygrove](https://github.com/andygrove)) -- delete type coercion for scalar udf in the physical phase [\#3735](https://github.com/apache/arrow-datafusion/pull/3735) ([liukun4515](https://github.com/liukun4515)) -- Consolidate coercion code in `datafusion_expr::type_coercion` and submodules [\#3728](https://github.com/apache/arrow-datafusion/pull/3728) ([alamb](https://github.com/alamb)) -- Skip filter push down on semi/anti joins [\#3723](https://github.com/apache/arrow-datafusion/pull/3723) ([andygrove](https://github.com/andygrove)) -- Raise `Unsupported SQL type` for `Time(WithTimeZone)` and `Time(Tz)` [\#3718](https://github.com/apache/arrow-datafusion/pull/3718) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) -- Support column aliases specified by `WITH` statements [\#3717](https://github.com/apache/arrow-datafusion/pull/3717) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) -- Reject recursive CTEs before processing the sub-expressions [\#3714](https://github.com/apache/arrow-datafusion/pull/3714) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) -- Make column name consistent between Expr::name and Display/Debug [\#3712](https://github.com/apache/arrow-datafusion/pull/3712) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Fix aggregate type coercion bug [\#3710](https://github.com/apache/arrow-datafusion/pull/3710) ([alamb](https://github.com/alamb)) -- MINOR: Add `Expr::canonical_name` and improve docs on `Expr::name` [\#3706](https://github.com/apache/arrow-datafusion/pull/3706) ([andygrove](https://github.com/andygrove)) -- Remove type coercions from ScalarValue and aggregation function code [\#3705](https://github.com/apache/arrow-datafusion/pull/3705) ([ozankabak](https://github.com/ozankabak)) -- `unwrap_cast_in_comparison`: fix bug which can find the field for the schema [\#3699](https://github.com/apache/arrow-datafusion/pull/3699) ([liukun4515](https://github.com/liukun4515)) -- bump sql-parser 0.25 [\#3698](https://github.com/apache/arrow-datafusion/pull/3698) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Move optimizer init to optimizer crate [\#3692](https://github.com/apache/arrow-datafusion/pull/3692) ([andygrove](https://github.com/andygrove)) -- Upgrade `arrow` `parquet` and `arrow-flight` to 24.0.0 [\#3691](https://github.com/apache/arrow-datafusion/pull/3691) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fix bug in dictionary coercion and allow better coercion [\#3688](https://github.com/apache/arrow-datafusion/pull/3688) ([alamb](https://github.com/alamb)) -- \[MINOR\] Improve docstrings in binary\_rule.rs [\#3687](https://github.com/apache/arrow-datafusion/pull/3687) ([alamb](https://github.com/alamb)) -- \[MINOR\] Add `ScalarValue::new_utf8`, clean up creation of literals in casting tests [\#3680](https://github.com/apache/arrow-datafusion/pull/3680) ([alamb](https://github.com/alamb)) -- Disable code coverage until we figure out why it is broken [\#3679](https://github.com/apache/arrow-datafusion/pull/3679) ([alamb](https://github.com/alamb)) -- move `type coercion` for case when expr [\#3676](https://github.com/apache/arrow-datafusion/pull/3676) ([liukun4515](https://github.com/liukun4515)) -- Update sqlparser to 0.24.0 [\#3675](https://github.com/apache/arrow-datafusion/pull/3675) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fail if field lengths are not same in INTERSECT and EXPECT [\#3674](https://github.com/apache/arrow-datafusion/pull/3674) ([askoa](https://github.com/askoa)) -- Simplification Rules for Modulo Operator [\#3669](https://github.com/apache/arrow-datafusion/pull/3669) ([askoa](https://github.com/askoa)) -- change pre\_cast\_lit\_in\_comparison to unwrap\_cast\_in\_comparison [\#3662](https://github.com/apache/arrow-datafusion/pull/3662) ([liukun4515](https://github.com/liukun4515)) -- restore optimization for `between` in simplify expression rule [\#3661](https://github.com/apache/arrow-datafusion/pull/3661) ([liukun4515](https://github.com/liukun4515)) -- add timestamptz [\#3660](https://github.com/apache/arrow-datafusion/pull/3660) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) -- remove the type coercion in the simplify\_expressions rule [\#3657](https://github.com/apache/arrow-datafusion/pull/3657) ([liukun4515](https://github.com/liukun4515)) -- Cache collected file statistics [\#3649](https://github.com/apache/arrow-datafusion/pull/3649) ([mateuszkj](https://github.com/mateuszkj)) -- make regexp\_replace early abort with empty input [\#3648](https://github.com/apache/arrow-datafusion/pull/3648) ([isidentical](https://github.com/isidentical)) -- Check each query has same number of columns when building the UNION plan [\#3638](https://github.com/apache/arrow-datafusion/pull/3638) ([HaoYang670](https://github.com/HaoYang670)) -- move the `type coercion` to the beginning of the optimizer rule and support type coercion for subquery [\#3636](https://github.com/apache/arrow-datafusion/pull/3636) ([liukun4515](https://github.com/liukun4515)) -- Add documentation for querying S3 data with CLI [\#3631](https://github.com/apache/arrow-datafusion/pull/3631) ([andygrove](https://github.com/andygrove)) -- Simplify multiplication by `0` and by `null` [\#3627](https://github.com/apache/arrow-datafusion/pull/3627) ([HaoYang670](https://github.com/HaoYang670)) -- Simplify null division. [\#3625](https://github.com/apache/arrow-datafusion/pull/3625) ([HaoYang670](https://github.com/HaoYang670)) -- support cast/try\_cast expr in reduceOuterJoin [\#3621](https://github.com/apache/arrow-datafusion/pull/3621) ([AssHero](https://github.com/AssHero)) -- MINOR: fix TPC-H conversion function to not miss a row of data [\#3620](https://github.com/apache/arrow-datafusion/pull/3620) ([kmitchener](https://github.com/kmitchener)) -- Document ObjectStoreProvider [\#3619](https://github.com/apache/arrow-datafusion/pull/3619) ([tustvold](https://github.com/tustvold)) -- \[feat\] Support using offset index in ParquetRecordBatchStream when pu… [\#3616](https://github.com/apache/arrow-datafusion/pull/3616) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Optimize `regex_replace` for scalar patterns [\#3614](https://github.com/apache/arrow-datafusion/pull/3614) ([isidentical](https://github.com/isidentical)) -- Simplify `concat_ws(null, ..)` to `null` [\#3608](https://github.com/apache/arrow-datafusion/pull/3608) ([HaoYang670](https://github.com/HaoYang670)) -- MINOR: improve docstrings on SessionContext [\#3603](https://github.com/apache/arrow-datafusion/pull/3603) ([alamb](https://github.com/alamb)) -- Merge s3\_success and s3\_build\_error tests into one test [\#3602](https://github.com/apache/arrow-datafusion/pull/3602) ([Licht-T](https://github.com/Licht-T)) -- add `register_batch` and `read_batch` to `SessionContext` to register a single RecordBatch as a table [\#3600](https://github.com/apache/arrow-datafusion/pull/3600) ([BaymaxHWY](https://github.com/BaymaxHWY)) -- \[CI\] Fix the newly added linting errors to make clippy happy [\#3598](https://github.com/apache/arrow-datafusion/pull/3598) ([isidentical](https://github.com/isidentical)) -- Prevent over-allocations \(and spills\) on sorts with a fixed limit [\#3593](https://github.com/apache/arrow-datafusion/pull/3593) ([isidentical](https://github.com/isidentical)) -- update datafusion cli deps [\#3588](https://github.com/apache/arrow-datafusion/pull/3588) ([Jimexist](https://github.com/Jimexist)) -- Update cranelift\* dependencies `0.87` --\> `0.88` [\#3586](https://github.com/apache/arrow-datafusion/pull/3586) ([alamb](https://github.com/alamb)) -- Fix docs.rs [\#3580](https://github.com/apache/arrow-datafusion/pull/3580) ([avantgardnerio](https://github.com/avantgardnerio)) -- Fix build [\#3576](https://github.com/apache/arrow-datafusion/pull/3576) ([alamb](https://github.com/alamb)) -- Use consistent name for `TimeUnit::Millisecond` [\#3575](https://github.com/apache/arrow-datafusion/pull/3575) ([alamb](https://github.com/alamb)) -- Fix logical plan serialization [\#3574](https://github.com/apache/arrow-datafusion/pull/3574) ([thinkharderdev](https://github.com/thinkharderdev)) -- Custom window frame logic \(support `ROWS`, `RANGE`, `PRECEDING` and `FOLLOWING` for window functions\) [\#3570](https://github.com/apache/arrow-datafusion/pull/3570) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([metesynnada](https://github.com/metesynnada)) -- fix comparison of decimal array with null scalar [\#3567](https://github.com/apache/arrow-datafusion/pull/3567) ([kmitchener](https://github.com/kmitchener)) -- Reduce dependencies of `datafusion-sql` crate [\#3566](https://github.com/apache/arrow-datafusion/pull/3566) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mbrobbel](https://github.com/mbrobbel)) -- Update pbjson-types requirement from 0.3 to 0.5 [\#3560](https://github.com/apache/arrow-datafusion/pull/3560) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Update pbjson requirement from 0.3 to 0.5 [\#3559](https://github.com/apache/arrow-datafusion/pull/3559) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Update pbjson-build requirement from 0.3 to 0.5 [\#3558](https://github.com/apache/arrow-datafusion/pull/3558) ([dependabot[bot]](https://github.com/apps/dependabot)) -- MINOR: enable q19 in TPCH [\#3553](https://github.com/apache/arrow-datafusion/pull/3553) ([kmitchener](https://github.com/kmitchener)) -- MINOR: remove out-of-date is\_dictionary checks from binary\_rule.rs [\#3552](https://github.com/apache/arrow-datafusion/pull/3552) ([kmitchener](https://github.com/kmitchener)) -- Make ParquetScanOptions public and add method to get a reference from… [\#3551](https://github.com/apache/arrow-datafusion/pull/3551) ([thinkharderdev](https://github.com/thinkharderdev)) -- fix coercion of null for decimal math in binary\_rules [\#3549](https://github.com/apache/arrow-datafusion/pull/3549) ([kmitchener](https://github.com/kmitchener)) -- Use `fetch` limit in get\_sorted\_iter [\#3545](https://github.com/apache/arrow-datafusion/pull/3545) ([Dandandan](https://github.com/Dandandan)) -- feat: allow object store registration from datafusion-cli [\#3540](https://github.com/apache/arrow-datafusion/pull/3540) ([turbo1912](https://github.com/turbo1912)) -- Actually test that `ScalarValue`s are the same after round trip serialization [\#3537](https://github.com/apache/arrow-datafusion/pull/3537) ([alamb](https://github.com/alamb)) -- Add serialization of `ScalarValue::Struct` [\#3536](https://github.com/apache/arrow-datafusion/pull/3536) ([alamb](https://github.com/alamb)) -- Add serialization of `ScalarValue::IntervalMonthDayNano` [\#3535](https://github.com/apache/arrow-datafusion/pull/3535) ([alamb](https://github.com/alamb)) -- Add serialization of `ScalarValue::Binary` and `ScalarValue::LargeBinary`, `ScalarValue::Time64` [\#3534](https://github.com/apache/arrow-datafusion/pull/3534) ([alamb](https://github.com/alamb)) -- MINOR: Impl `Debug` for TableReference and ResolvedTableReference [\#3533](https://github.com/apache/arrow-datafusion/pull/3533) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Add support for serializing `ScalarValue::Dictionary` to datafusion-proto [\#3532](https://github.com/apache/arrow-datafusion/pull/3532) ([alamb](https://github.com/alamb)) -- Push down limit to sort [\#3530](https://github.com/apache/arrow-datafusion/pull/3530) ([Dandandan](https://github.com/Dandandan)) -- Execute sort in parallel when a limit is used after sort [\#3527](https://github.com/apache/arrow-datafusion/pull/3527) ([Dandandan](https://github.com/Dandandan)) -- Config support type conversion [\#3522](https://github.com/apache/arrow-datafusion/pull/3522) ([comphead](https://github.com/comphead)) -- MINOR: Add more execs to list of supported execs [\#3519](https://github.com/apache/arrow-datafusion/pull/3519) ([andygrove](https://github.com/andygrove)) -- fix divide by zero not throwing proper error for decimal [\#3517](https://github.com/apache/arrow-datafusion/pull/3517) ([kmitchener](https://github.com/kmitchener)) -- Make FileStream and FileOpener public [\#3514](https://github.com/apache/arrow-datafusion/pull/3514) ([thinkharderdev](https://github.com/thinkharderdev)) -- feat: Union types coercion [\#3513](https://github.com/apache/arrow-datafusion/pull/3513) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gandronchik](https://github.com/gandronchik)) -- \[DataFrame\] - Add cache function for DataFrame [\#3512](https://github.com/apache/arrow-datafusion/pull/3512) ([francis-du](https://github.com/francis-du)) -- type coercion: support is/is\_not\_`bool`/like/unknown expr [\#3510](https://github.com/apache/arrow-datafusion/pull/3510) ([liukun4515](https://github.com/liukun4515)) -- MINOR: remove unused dependencies [\#3508](https://github.com/apache/arrow-datafusion/pull/3508) ([waynexia](https://github.com/waynexia)) -- Automate postrelease publishing to Homebrew [\#3507](https://github.com/apache/arrow-datafusion/pull/3507) ([iajoiner](https://github.com/iajoiner)) -- Add additional DATE\_PART units [\#3503](https://github.com/apache/arrow-datafusion/pull/3503) ([jonmmease](https://github.com/jonmmease)) -- Add BitwiseXor in function from\_proto\_binary\_op [\#3496](https://github.com/apache/arrow-datafusion/pull/3496) ([askoa](https://github.com/askoa)) -- Make the function from\_proto\_binary\_op public [\#3490](https://github.com/apache/arrow-datafusion/pull/3490) ([askoa](https://github.com/askoa)) -- minor: fix bug in `downcast_value!` macro \(`T` --\> `$T`\) [\#3486](https://github.com/apache/arrow-datafusion/pull/3486) ([alamb](https://github.com/alamb)) -- add time\_zone into ConfigOptions [\#3485](https://github.com/apache/arrow-datafusion/pull/3485) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) -- \[MINOR\] Change `downcast_value!` macro so it does not need to use `use std::any::type_name;` [\#3484](https://github.com/apache/arrow-datafusion/pull/3484) ([alamb](https://github.com/alamb)) -- Convert more cross joins to inner joins \(Address performance/execution plan of TPCH query 19\) [\#3482](https://github.com/apache/arrow-datafusion/pull/3482) ([DhamoPS](https://github.com/DhamoPS)) -- \[minor\] Remove unused arg in macro in Inlist [\#3474](https://github.com/apache/arrow-datafusion/pull/3474) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- inlist: move type coercion to logical phase [\#3472](https://github.com/apache/arrow-datafusion/pull/3472) ([liukun4515](https://github.com/liukun4515)) -- Use the column data type as the NULL data type in the row filter [\#3470](https://github.com/apache/arrow-datafusion/pull/3470) ([liukun4515](https://github.com/liukun4515)) -- apply type coercion before filter pushdown [\#3459](https://github.com/apache/arrow-datafusion/pull/3459) ([liukun4515](https://github.com/liukun4515)) -- add FixedSizeBinary support to create\_hashes [\#3458](https://github.com/apache/arrow-datafusion/pull/3458) ([mcassels](https://github.com/mcassels)) -- Support ShowVariable Statement [\#3455](https://github.com/apache/arrow-datafusion/pull/3455) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) -- Add additional pruning tests with casts, handle unsupported predicates better [\#3454](https://github.com/apache/arrow-datafusion/pull/3454) ([alamb](https://github.com/alamb)) -- Add `InList` support for timestamp type. \(\#3449\) [\#3450](https://github.com/apache/arrow-datafusion/pull/3450) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Evaluate expressions after type coercion [\#3444](https://github.com/apache/arrow-datafusion/pull/3444) ([Dandandan](https://github.com/Dandandan)) -- remove type coercion in the binary physical expr [\#3396](https://github.com/apache/arrow-datafusion/pull/3396) ([liukun4515](https://github.com/liukun4515)) -- Use arrow row format in SortPreservingMerge ~50-70% faster [\#3386](https://github.com/apache/arrow-datafusion/pull/3386) ([tustvold](https://github.com/tustvold)) -- Pushdown `RowFilter` in `ParquetExec` [\#3380](https://github.com/apache/arrow-datafusion/pull/3380) ([thinkharderdev](https://github.com/thinkharderdev)) - - -## [12.0.0](https://github.com/apache/arrow-datafusion/tree/12.0.0) (2022-09-12) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/11.0.0...12.0.0) - -**Breaking changes:** - -- Pass `return_type` to `AccumulatorFunctionImplementation ` for user defined aggregates [\#3428](https://github.com/apache/arrow-datafusion/pull/3428) ([alamb](https://github.com/alamb)) -- Use `usize` rather than `Option` to represent `Limit::skip`and `Limit::offset` [\#3374](https://github.com/apache/arrow-datafusion/pull/3374) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Deprecate legacy datafusion::logical\_plan module [\#3338](https://github.com/apache/arrow-datafusion/pull/3338) ([andygrove](https://github.com/andygrove)) -- Update signature for Expr.name so that schema is no longer required [\#3336](https://github.com/apache/arrow-datafusion/pull/3336) ([andygrove](https://github.com/andygrove)) -- MINOR: rename optimizer rule to ScalarSubqueryToJoin [\#3306](https://github.com/apache/arrow-datafusion/pull/3306) ([kmitchener](https://github.com/kmitchener)) -- Add top-level `Like`, `ILike`, `SimilarTo` expressions in logical plan [\#3298](https://github.com/apache/arrow-datafusion/pull/3298) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Upgrade to sqlparser 0.22 [\#3278](https://github.com/apache/arrow-datafusion/pull/3278) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- `Expr` variants for boolean operations [\#3275](https://github.com/apache/arrow-datafusion/pull/3275) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([sarahyurick](https://github.com/sarahyurick)) -- Upgrade to sqlparser 0.21 [\#3200](https://github.com/apache/arrow-datafusion/pull/3200) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Add SQL planner support for `Like`, `ILike` and `SimilarTo`, with optional escape character [\#3101](https://github.com/apache/arrow-datafusion/pull/3101) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) - -**Implemented enhancements:** - -- support `cast` inside `values` [\#3446](https://github.com/apache/arrow-datafusion/issues/3446) -- update TPCH test schemas to use Decimal128 from Float [\#3435](https://github.com/apache/arrow-datafusion/issues/3435) -- Include Bitwise operators in the documentation [\#3434](https://github.com/apache/arrow-datafusion/issues/3434) -- How to read excel file with datafusion? [\#3433](https://github.com/apache/arrow-datafusion/issues/3433) -- Pass return type to the accumulator state factory in aggregates [\#3427](https://github.com/apache/arrow-datafusion/issues/3427) -- Support bitwise XOR operator \(`#`\) [\#3420](https://github.com/apache/arrow-datafusion/issues/3420) -- support InList with datatype Date32 [\#3412](https://github.com/apache/arrow-datafusion/issues/3412) -- add simplification for `between` expression during logical plan optimization [\#3402](https://github.com/apache/arrow-datafusion/issues/3402) -- Replace From trait with TryFrom trait for datafusion-proto crate [\#3401](https://github.com/apache/arrow-datafusion/issues/3401) -- update TPC-H benchmark to Decimal types from Float [\#3392](https://github.com/apache/arrow-datafusion/issues/3392) -- Use `usize` to represent `Limit::skip` [\#3369](https://github.com/apache/arrow-datafusion/issues/3369) -- Avoid coping in `LogicalPlan::expressions` [\#3368](https://github.com/apache/arrow-datafusion/issues/3368) -- Upgrade to Arrow 22 [\#3362](https://github.com/apache/arrow-datafusion/issues/3362) -- Eliminate `OFFSET 0` in the logical plan optimization [\#3355](https://github.com/apache/arrow-datafusion/issues/3355) -- Add ability to get unoptimized logical plan from DataFrame [\#3340](https://github.com/apache/arrow-datafusion/issues/3340) -- Allow IDEs to recognize generated code [\#3332](https://github.com/apache/arrow-datafusion/issues/3332) -- `CAST` should not change the name of an expression [\#3326](https://github.com/apache/arrow-datafusion/issues/3326) -- add SQL support for unsigned integers [\#3325](https://github.com/apache/arrow-datafusion/issues/3325) -- Review use of panic in `datafusion-proto` crate [\#3318](https://github.com/apache/arrow-datafusion/issues/3318) -- Review use of panic in `datafusion-sql` crate [\#3315](https://github.com/apache/arrow-datafusion/issues/3315) -- Review use of panic in `datafusion-optimizer` crate [\#3314](https://github.com/apache/arrow-datafusion/issues/3314) -- Review use of panic in `datafusion-expr` crate [\#3312](https://github.com/apache/arrow-datafusion/issues/3312) -- Support registration of custom TableProviders through SQL [\#3310](https://github.com/apache/arrow-datafusion/issues/3310) -- Support binary data in sha hash functions [\#3308](https://github.com/apache/arrow-datafusion/issues/3308) -- add SQL support for tinyint and unsigned versions of all INTs [\#3307](https://github.com/apache/arrow-datafusion/issues/3307) -- Support binary types in InList expression [\#3300](https://github.com/apache/arrow-datafusion/issues/3300) -- Physical planner should map `IsTrue` and similar expressions to `IsDistinctFrom` [\#3288](https://github.com/apache/arrow-datafusion/issues/3288) -- Introduce physical plan version of `Operator` enum [\#3269](https://github.com/apache/arrow-datafusion/issues/3269) -- Introduce `Expr` variants for `IS [NOT] TRUE / FALSE / UNKNOWN` [\#3268](https://github.com/apache/arrow-datafusion/issues/3268) -- Add support for non-correlated subqueries [\#3266](https://github.com/apache/arrow-datafusion/issues/3266) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] -- \(Re-\)add support for glob patterns in ListingTableUrl [\#3261](https://github.com/apache/arrow-datafusion/issues/3261) -- `PreCastLitInComparisonExpressions` should use ExprRewriter and supported nested expressions [\#3259](https://github.com/apache/arrow-datafusion/issues/3259) -- implement `DROP VIEW` [\#3251](https://github.com/apache/arrow-datafusion/issues/3251) -- Upgrade to Arrow 21 [\#3224](https://github.com/apache/arrow-datafusion/issues/3224) -- Add TypeCoercion optimizer rule [\#3221](https://github.com/apache/arrow-datafusion/issues/3221) -- Create bench for approx\_percentile\_cont aggregate [\#3217](https://github.com/apache/arrow-datafusion/issues/3217) -- Add SQL query planner support for `DISTRIBUTED BY` [\#3207](https://github.com/apache/arrow-datafusion/issues/3207) -- Support "IS \[NOT\] UNKNOWN" syntax [\#3195](https://github.com/apache/arrow-datafusion/issues/3195) -- sqlparser 0.21 upgrade [\#3192](https://github.com/apache/arrow-datafusion/issues/3192) -- Re-implement parsing/planning for SHOW TABLES due to sqlparser changes [\#3188](https://github.com/apache/arrow-datafusion/issues/3188) -- Support `SUM` `AVG`, `MIN`, `MAX` on `Time` columns. [\#3166](https://github.com/apache/arrow-datafusion/issues/3166) -- Support "IS TRUE/FALSE" syntax [\#3159](https://github.com/apache/arrow-datafusion/issues/3159) -- Support number of histogram bins in approx\_percentile\_cont [\#3145](https://github.com/apache/arrow-datafusion/issues/3145) -- Support create ApproxPercentileAccumulator with TDigest max\_size [\#3142](https://github.com/apache/arrow-datafusion/issues/3142) -- Remove support for `array` function and only support `array[]` style postgres syntax [\#3115](https://github.com/apache/arrow-datafusion/issues/3115) -- Allow inline column aliases for create view [\#3108](https://github.com/apache/arrow-datafusion/issues/3108) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] -- Add support for Postgres `SIMILAR TO` and `ILIKE` syntax [\#3099](https://github.com/apache/arrow-datafusion/issues/3099) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] -- Update SQL reference in user guide to cover all supported syntax [\#3091](https://github.com/apache/arrow-datafusion/issues/3091) -- DataFusion prelude should import all logical expression functions [\#3068](https://github.com/apache/arrow-datafusion/issues/3068) -- Proposal: Add similar to operator [\#3016](https://github.com/apache/arrow-datafusion/issues/3016) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] -- Release DataFusion 11.0.0 [\#3012](https://github.com/apache/arrow-datafusion/issues/3012) -- Implement "SHOW CREATE TABLE" for external tables [\#2848](https://github.com/apache/arrow-datafusion/issues/2848) -- Change java package names in protobuf files [\#2513](https://github.com/apache/arrow-datafusion/issues/2513) -- When creating `DFField` from `Expr` we should provide input plan not input schema [\#2456](https://github.com/apache/arrow-datafusion/issues/2456) -- Support "IS NOT TRUE/FALSE" syntax [\#2265](https://github.com/apache/arrow-datafusion/issues/2265) -- RFC: Spill-To-Disk Object Storage Download [\#2205](https://github.com/apache/arrow-datafusion/issues/2205) -- Support for BitwiseAnd `&`, BitOr `|` binary operators [\#1619](https://github.com/apache/arrow-datafusion/issues/1619) -- \[Question\] Usage of async object store APIs in consuming code [\#1313](https://github.com/apache/arrow-datafusion/issues/1313) -- Allow User Defined Aggregates to return multiple values / structs [\#600](https://github.com/apache/arrow-datafusion/issues/600) -- Implement vectorized hashing for dictionary types [\#331](https://github.com/apache/arrow-datafusion/issues/331) - -**Fixed bugs:** - -- Intermittent build error when changing selected features [\#3366](https://github.com/apache/arrow-datafusion/issues/3366) -- `sql::timestamp::timestamp_add_interval_months` failing since September 1st [\#3327](https://github.com/apache/arrow-datafusion/issues/3327) -- `sql::timestamp::timestamp_add_interval_months` test fails [\#3322](https://github.com/apache/arrow-datafusion/issues/3322) -- test case `timestamp_add_interval_months` failed on master branch [\#3321](https://github.com/apache/arrow-datafusion/issues/3321) -- datafusion-proto does not support untyped null scalar values [\#3302](https://github.com/apache/arrow-datafusion/issues/3302) -- `ConfigOptions` creation is slow [\#3295](https://github.com/apache/arrow-datafusion/issues/3295) -- FilterPushDown optimization through UNION ALL results in SchemaError [\#3281](https://github.com/apache/arrow-datafusion/issues/3281) -- Execute LogicalPlans after building for TPCH Benchmarks [\#3273](https://github.com/apache/arrow-datafusion/issues/3273) -- `CREATE TABLE` should return empty DataFrame [\#3265](https://github.com/apache/arrow-datafusion/issues/3265) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] -- `CREATE EXTERNAL TABLE` from CSV creates a table with no columns if there is just a header row [\#3263](https://github.com/apache/arrow-datafusion/issues/3263) -- View TableProvider ignores projections, resulting in invalid plans [\#3240](https://github.com/apache/arrow-datafusion/issues/3240) -- CREATE VIEW should return an empty dataframe on success [\#3236](https://github.com/apache/arrow-datafusion/issues/3236) -- `DISTRIBUTE BY` expressions get removed during optimization [\#3234](https://github.com/apache/arrow-datafusion/issues/3234) -- datafusion cannot recognize chinese charactors. [\#3203](https://github.com/apache/arrow-datafusion/issues/3203) -- Panicked at 'byte index 1 is out of bounds on invalid query [\#3190](https://github.com/apache/arrow-datafusion/issues/3190) -- `like_nlike_with_null_lt` fails with latest sqlparser code [\#3187](https://github.com/apache/arrow-datafusion/issues/3187) -- Interval Literal output inconsistent date\_type [\#3180](https://github.com/apache/arrow-datafusion/issues/3180) -- `array` function allows different data types [\#3123](https://github.com/apache/arrow-datafusion/issues/3123) -- eq operator doesn't work on binary data [\#3117](https://github.com/apache/arrow-datafusion/issues/3117) -- incorrect `where` clause comparison while using table alias [\#3073](https://github.com/apache/arrow-datafusion/issues/3073) -- Some functions are incorrectly declared as unary [\#3069](https://github.com/apache/arrow-datafusion/issues/3069) -- once now\(\) is called in a statement, it forever returns the same value [\#3057](https://github.com/apache/arrow-datafusion/issues/3057) -- single\_distinct\_to\_groupby panic when group by expr is a binaryExpr [\#2994](https://github.com/apache/arrow-datafusion/issues/2994) -- Cannot have `order by` expression that references complex `group by` expression [\#2360](https://github.com/apache/arrow-datafusion/issues/2360) -- Fix some bugs in TypeCoercion rule [\#3407](https://github.com/apache/arrow-datafusion/pull/3407) ([andygrove](https://github.com/andygrove)) -- MINOR: Stop ignoring `AggregateFunction::distinct` in protobuf serde code [\#3250](https://github.com/apache/arrow-datafusion/pull/3250) ([andygrove](https://github.com/andygrove)) -- Add assertion for invariant in `create_physical_expression` and fix ViewTable projection [\#3242](https://github.com/apache/arrow-datafusion/pull/3242) ([andygrove](https://github.com/andygrove)) -- Fix bug where optimizer was removing `Partitioning::DistributeBy` expressions [\#3229](https://github.com/apache/arrow-datafusion/pull/3229) ([andygrove](https://github.com/andygrove)) - -**Documentation updates:** - -- \[minor\] add Coverage Status in readme [\#3220](https://github.com/apache/arrow-datafusion/pull/3220) ([Ted-Jiang](https://github.com/Ted-Jiang)) - -**Closed issues:** - -- Add `\i` command to datafusion-cli [\#1906](https://github.com/apache/arrow-datafusion/issues/1906) -- TPC-H Query 15 [\#166](https://github.com/apache/arrow-datafusion/issues/166) - -**Merged pull requests:** - -- minor: fix some typo. [\#3453](https://github.com/apache/arrow-datafusion/pull/3453) ([jackwener](https://github.com/jackwener)) -- Update criterion requirement from 0.3 to 0.4 [\#3452](https://github.com/apache/arrow-datafusion/pull/3452) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Update object\_store requirement from 0.4.0 to 0.5.0 [\#3451](https://github.com/apache/arrow-datafusion/pull/3451) ([dependabot[bot]](https://github.com/apps/dependabot)) -- add `cast` support inside `values` [\#3447](https://github.com/apache/arrow-datafusion/pull/3447) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) -- Use hash repartitioning for aggregates on dictionaries [\#3445](https://github.com/apache/arrow-datafusion/pull/3445) ([isidentical](https://github.com/isidentical)) -- Review `unwrap` and `panic` from the `aggregate` directory of `datafusion-physical-expr` [\#3443](https://github.com/apache/arrow-datafusion/pull/3443) ([iajoiner](https://github.com/iajoiner)) -- MINOR: Implement protobuf serde for all binary operators [\#3441](https://github.com/apache/arrow-datafusion/pull/3441) ([andygrove](https://github.com/andygrove)) -- MINOR: Add accessor methods to DateTimeIntervalExpr [\#3440](https://github.com/apache/arrow-datafusion/pull/3440) ([andygrove](https://github.com/andygrove)) -- update TPCH-mimicking tests to Decimal data type from Float, matching the benchmark [\#3438](https://github.com/apache/arrow-datafusion/pull/3438) ([kmitchener](https://github.com/kmitchener)) -- Include Bitwise operators in the documentation [\#3436](https://github.com/apache/arrow-datafusion/pull/3436) ([askoa](https://github.com/askoa)) -- minor: make sql number parsing slightly more efficient + functional [\#3432](https://github.com/apache/arrow-datafusion/pull/3432) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Implement bitwise XOR operator \(`#`\) [\#3430](https://github.com/apache/arrow-datafusion/pull/3430) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([askoa](https://github.com/askoa)) -- Replace From trait with TryFrom trait for datafusion-proto crate \#3401 [\#3429](https://github.com/apache/arrow-datafusion/pull/3429) ([comphead](https://github.com/comphead)) -- Tests showing user defined aggregate returning a struct [\#3425](https://github.com/apache/arrow-datafusion/pull/3425) ([alamb](https://github.com/alamb)) -- MINOR: update optimizer rule names to be consistent style as the rest [\#3415](https://github.com/apache/arrow-datafusion/pull/3415) ([kmitchener](https://github.com/kmitchener)) -- Support date32 and date 64 in inlist node [\#3413](https://github.com/apache/arrow-datafusion/pull/3413) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Update sqlparser requirement from 0.22 to 0.23 [\#3411](https://github.com/apache/arrow-datafusion/pull/3411) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([dependabot[bot]](https://github.com/apps/dependabot)) -- simplify the `between` expr during logical plan optimization [\#3404](https://github.com/apache/arrow-datafusion/pull/3404) ([kmitchener](https://github.com/kmitchener)) -- MINOR: Improve optimizer error [\#3403](https://github.com/apache/arrow-datafusion/pull/3403) ([andygrove](https://github.com/andygrove)) -- Review panics in the sql crate [\#3397](https://github.com/apache/arrow-datafusion/pull/3397) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- changed TPC-H benchmark to use Decimal types [\#3393](https://github.com/apache/arrow-datafusion/pull/3393) ([kmitchener](https://github.com/kmitchener)) -- minor: remove redundant code. [\#3389](https://github.com/apache/arrow-datafusion/pull/3389) ([jackwener](https://github.com/jackwener)) -- Add dictionary cases to merge bench [\#3384](https://github.com/apache/arrow-datafusion/pull/3384) ([tustvold](https://github.com/tustvold)) -- Implement Eq trait for Expr and nested types [\#3381](https://github.com/apache/arrow-datafusion/pull/3381) ([jdye64](https://github.com/jdye64)) -- Minor: Improvements to type coercion rule [\#3379](https://github.com/apache/arrow-datafusion/pull/3379) ([alamb](https://github.com/alamb)) -- MINOR: Note that most communication happens on github [\#3375](https://github.com/apache/arrow-datafusion/pull/3375) ([alamb](https://github.com/alamb)) -- minor fix: clean data type for negative operation [\#3370](https://github.com/apache/arrow-datafusion/pull/3370) ([liukun4515](https://github.com/liukun4515)) -- Fix code generation for json feature [\#3367](https://github.com/apache/arrow-datafusion/pull/3367) ([avantgardnerio](https://github.com/avantgardnerio)) -- Review use of panic in datafusion-proto crate [\#3365](https://github.com/apache/arrow-datafusion/pull/3365) ([comphead](https://github.com/comphead)) -- Upgrade to arrow 22 [\#3363](https://github.com/apache/arrow-datafusion/pull/3363) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- return empty dataframe on create table, remove a duplicate optimize call [\#3361](https://github.com/apache/arrow-datafusion/pull/3361) ([kmitchener](https://github.com/kmitchener)) -- Add SQL support for `tinyint` , `smallint`, and `unsigned int variants` [\#3359](https://github.com/apache/arrow-datafusion/pull/3359) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) -- Minor: add hint in README of example [\#3358](https://github.com/apache/arrow-datafusion/pull/3358) ([jackwener](https://github.com/jackwener)) -- Collect to `HashSet` directly in `in_list` [\#3356](https://github.com/apache/arrow-datafusion/pull/3356) ([HaoYang670](https://github.com/HaoYang670)) -- MINOR: Add comments about rewrite\_disjunctive\_predicate [\#3351](https://github.com/apache/arrow-datafusion/pull/3351) ([alamb](https://github.com/alamb)) -- \[MINOR\] Add debug logging to plan teardown [\#3350](https://github.com/apache/arrow-datafusion/pull/3350) ([alamb](https://github.com/alamb)) -- MINOR: add df.to\_unoptimized\_plan\(\) to docs, remove erroneous comment [\#3348](https://github.com/apache/arrow-datafusion/pull/3348) ([kmitchener](https://github.com/kmitchener)) -- Replace `unwrap` in `convert_to_ordered_float` and add `downcast_value` [\#3347](https://github.com/apache/arrow-datafusion/pull/3347) ([iajoiner](https://github.com/iajoiner)) -- Remove panics from `common_subexpr_eliminate` [\#3346](https://github.com/apache/arrow-datafusion/pull/3346) ([andygrove](https://github.com/andygrove)) -- Remove Result.unwrap from single\_distinct\_to\_groupby [\#3345](https://github.com/apache/arrow-datafusion/pull/3345) ([andygrove](https://github.com/andygrove)) -- Add to\_unoptimized\_plan [\#3344](https://github.com/apache/arrow-datafusion/pull/3344) ([iajoiner](https://github.com/iajoiner)) -- Remove panics from simplify\_expressions optimizer rule [\#3343](https://github.com/apache/arrow-datafusion/pull/3343) ([andygrove](https://github.com/andygrove)) -- Remove `unreachable!` from filter push down rule [\#3342](https://github.com/apache/arrow-datafusion/pull/3342) ([andygrove](https://github.com/andygrove)) -- Replace panic in `datafusion-expr` crate [\#3341](https://github.com/apache/arrow-datafusion/pull/3341) ([iajoiner](https://github.com/iajoiner)) -- Re-implement ExprIdentifierVisitor::desc\_expr to use Expr::Display [\#3339](https://github.com/apache/arrow-datafusion/pull/3339) ([andygrove](https://github.com/andygrove)) -- Fix the test`timestamp_add_interval_months` [\#3337](https://github.com/apache/arrow-datafusion/pull/3337) ([HaoYang670](https://github.com/HaoYang670)) -- Bump lz4-sys from 1.9.3 to 1.9.4 in /datafusion-cli [\#3335](https://github.com/apache/arrow-datafusion/pull/3335) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Make binary operator formatting consistent between logical and physical plans [\#3331](https://github.com/apache/arrow-datafusion/pull/3331) ([andygrove](https://github.com/andygrove)) -- Fix build: Ignore failing test [\#3329](https://github.com/apache/arrow-datafusion/pull/3329) ([andygrove](https://github.com/andygrove)) -- Add `InList` support for binary type. [\#3324](https://github.com/apache/arrow-datafusion/pull/3324) ([HaoYang670](https://github.com/HaoYang670)) -- MINOR: add github action trigger [\#3323](https://github.com/apache/arrow-datafusion/pull/3323) ([waynexia](https://github.com/waynexia)) -- add explain sql test for optimizer rule PreCastLitInComparisonExpressions [\#3320](https://github.com/apache/arrow-datafusion/pull/3320) ([liukun4515](https://github.com/liukun4515)) -- Custom / Dynamic table provider factories [\#3311](https://github.com/apache/arrow-datafusion/pull/3311) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- fix: alias group\_by exprs in single\_distinct\_to\_groupby optimizer [\#3305](https://github.com/apache/arrow-datafusion/pull/3305) ([waynexia](https://github.com/waynexia)) -- Add support for serializing null scalar values [\#3303](https://github.com/apache/arrow-datafusion/pull/3303) ([andygrove](https://github.com/andygrove)) -- Finish integrating `Expr::Is[Not]True` and similar expressions [\#3301](https://github.com/apache/arrow-datafusion/pull/3301) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- MINOR: Remove `unwrap` calls from `single_distinct_to_groupby optimizer` rule [\#3299](https://github.com/apache/arrow-datafusion/pull/3299) ([andygrove](https://github.com/andygrove)) -- docs: update the Python library repository [\#3297](https://github.com/apache/arrow-datafusion/pull/3297) ([haoxins](https://github.com/haoxins)) -- fix: speed up `ConfigOptions` creation [\#3296](https://github.com/apache/arrow-datafusion/pull/3296) ([crepererum](https://github.com/crepererum)) -- Execute LogicalPlans after building for TPCH Benchmarks [\#3290](https://github.com/apache/arrow-datafusion/pull/3290) ([DaltonModlin](https://github.com/DaltonModlin)) -- support for non-correlated subqueries [\#3287](https://github.com/apache/arrow-datafusion/pull/3287) ([kmitchener](https://github.com/kmitchener)) -- Add `Aggregate::try new` with validation checks [\#3286](https://github.com/apache/arrow-datafusion/pull/3286) ([andygrove](https://github.com/andygrove)) -- Fix SchemaError in FilterPushDown optimization with UNION ALL [\#3282](https://github.com/apache/arrow-datafusion/pull/3282) ([jonmmease](https://github.com/jonmmease)) -- Allow sorting by aggregated groups [\#3280](https://github.com/apache/arrow-datafusion/pull/3280) ([isidentical](https://github.com/isidentical)) -- Add show external tables [\#3279](https://github.com/apache/arrow-datafusion/pull/3279) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([psvri](https://github.com/psvri)) -- Return from task execution if send fails as there is nothing more to do \(faster cancel / limit\) [\#3276](https://github.com/apache/arrow-datafusion/pull/3276) ([nvartolomei](https://github.com/nvartolomei)) -- Let prelude import all expression functions [\#3274](https://github.com/apache/arrow-datafusion/pull/3274) ([sadilet](https://github.com/sadilet)) -- Fix no schema when CSV is only header [\#3272](https://github.com/apache/arrow-datafusion/pull/3272) ([comphead](https://github.com/comphead)) -- support inlist for pre cast literal expression [\#3270](https://github.com/apache/arrow-datafusion/pull/3270) ([liukun4515](https://github.com/liukun4515)) -- implement `drop view` [\#3267](https://github.com/apache/arrow-datafusion/pull/3267) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) -- Use `ExprRewriter` in `pre_cast_lit_in_comparison` [\#3260](https://github.com/apache/arrow-datafusion/pull/3260) ([andygrove](https://github.com/andygrove)) -- Add type coercion for UDFs in logical plan [\#3254](https://github.com/apache/arrow-datafusion/pull/3254) ([andygrove](https://github.com/andygrove)) -- Support "IS NOT TRUE/FALSE" syntax [\#3252](https://github.com/apache/arrow-datafusion/pull/3252) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([sarahyurick](https://github.com/sarahyurick)) -- Implement `IS UNKNOWN`/`IS NOT UNKNOWN` operators [\#3246](https://github.com/apache/arrow-datafusion/pull/3246) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) -- support decimal data type for the optimizer rule of PreCastLitInComparisonExpressions [\#3245](https://github.com/apache/arrow-datafusion/pull/3245) ([liukun4515](https://github.com/liukun4515)) -- chore: update cranelifts to 0.87.0 [\#3243](https://github.com/apache/arrow-datafusion/pull/3243) ([yjshen](https://github.com/yjshen)) -- Moved nullif out of unary functions [\#3241](https://github.com/apache/arrow-datafusion/pull/3241) ([comphead](https://github.com/comphead)) -- MINOR: documentation updates [\#3239](https://github.com/apache/arrow-datafusion/pull/3239) ([kmitchener](https://github.com/kmitchener)) -- MINOR: Add bounds check to Column physical expression [\#3238](https://github.com/apache/arrow-datafusion/pull/3238) ([andygrove](https://github.com/andygrove)) -- CREATE VIEW should return empty dataframe [\#3237](https://github.com/apache/arrow-datafusion/pull/3237) ([kmitchener](https://github.com/kmitchener)) -- Support "IS TRUE/FALSE" syntax \(redo\) [\#3235](https://github.com/apache/arrow-datafusion/pull/3235) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([sarahyurick](https://github.com/sarahyurick)) -- Fix propagation of optimized predicates on nested projections [\#3228](https://github.com/apache/arrow-datafusion/pull/3228) ([isidentical](https://github.com/isidentical)) -- Add more trim test cases [\#3226](https://github.com/apache/arrow-datafusion/pull/3226) ([ayushdg](https://github.com/ayushdg)) -- Upgrade to arrow 21 [\#3225](https://github.com/apache/arrow-datafusion/pull/3225) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- Add optimizer rule for type coercion \(binary operations only\) [\#3222](https://github.com/apache/arrow-datafusion/pull/3222) ([andygrove](https://github.com/andygrove)) -- \[Improve\] Use arrow::compute::sort in approx\_percentile\_cont [\#3219](https://github.com/apache/arrow-datafusion/pull/3219) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- \[minor\] fix bench aggregate\_query\_sql meta [\#3218](https://github.com/apache/arrow-datafusion/pull/3218) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- minor: refactor simplify negate [\#3213](https://github.com/apache/arrow-datafusion/pull/3213) ([jackwener](https://github.com/jackwener)) -- MINOR: update cargo.lock and rust-version for datafusion-cli [\#3212](https://github.com/apache/arrow-datafusion/pull/3212) ([kmitchener](https://github.com/kmitchener)) -- fix issue with now\(\) returning same value across statements [\#3210](https://github.com/apache/arrow-datafusion/pull/3210) ([kmitchener](https://github.com/kmitchener)) -- Add support for inline column alias in CREATE VIEW [\#3209](https://github.com/apache/arrow-datafusion/pull/3209) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([DaltonModlin](https://github.com/DaltonModlin)) -- Add SQL query planner support for `DISTRIBUTE BY` [\#3208](https://github.com/apache/arrow-datafusion/pull/3208) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- minor: remove test code that's in the arrow library now [\#3206](https://github.com/apache/arrow-datafusion/pull/3206) ([kmitchener](https://github.com/kmitchener)) -- Use .get\(\) to avoid panic [\#3201](https://github.com/apache/arrow-datafusion/pull/3201) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jklamer](https://github.com/jklamer)) -- \[Minor\] Reduce code duplication creating ScalarValue::List [\#3197](https://github.com/apache/arrow-datafusion/pull/3197) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Clean up CI workflows by removing "matrix" strategy, simplifying names [\#3196](https://github.com/apache/arrow-datafusion/pull/3196) ([alamb](https://github.com/alamb)) -- optimizer: add framework for the rule of pre-add cast to the literal in comparison binary [\#3185](https://github.com/apache/arrow-datafusion/pull/3185) ([liukun4515](https://github.com/liukun4515)) -- Fix clippy [\#3182](https://github.com/apache/arrow-datafusion/pull/3182) ([alamb](https://github.com/alamb)) -- MINOR: Add notes on writing release blog posts [\#3179](https://github.com/apache/arrow-datafusion/pull/3179) ([andygrove](https://github.com/andygrove)) -- add min/max for time [\#3178](https://github.com/apache/arrow-datafusion/pull/3178) ([waitingkuo](https://github.com/waitingkuo)) -- Recursively apply remove filter rule if filter is a true scalar value [\#3175](https://github.com/apache/arrow-datafusion/pull/3175) ([byteink](https://github.com/byteink)) -- Update `ahash` requirement from 0.7 to 0.8 [\#3161](https://github.com/apache/arrow-datafusion/pull/3161) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Support number of centroids in approx\_percentile\_cont [\#3146](https://github.com/apache/arrow-datafusion/pull/3146) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Introduce `\i` command to execute from a file [\#3136](https://github.com/apache/arrow-datafusion/pull/3136) ([turbo1912](https://github.com/turbo1912)) -- impl binary ops between binary arrays and scalars [\#3124](https://github.com/apache/arrow-datafusion/pull/3124) ([ozgrakkurt](https://github.com/ozgrakkurt)) - - -## [11.0.0](https://github.com/apache/arrow-datafusion/tree/11.0.0) (2022-08-16) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/10.0.0-rc1...11.0.0) - -**Breaking changes:** - -- Implement exact median, add `AggregateState` [\#3009](https://github.com/apache/arrow-datafusion/pull/3009) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) - -**Implemented enhancements:** - -- Make RowAccumulator public [\#3138](https://github.com/apache/arrow-datafusion/issues/3138) -- docs: proposal for consolidating docs into a Contributor Guide [\#3127](https://github.com/apache/arrow-datafusion/issues/3127) -- feat: support Timestamp +/- Interval [\#3103](https://github.com/apache/arrow-datafusion/issues/3103) -- a `arrow_typeof` like posgresql's `pg_typeof` [\#3095](https://github.com/apache/arrow-datafusion/issues/3095) -- Add DataFrame section to user guide [\#3066](https://github.com/apache/arrow-datafusion/issues/3066) -- Document all scalar SQL functions in user guide [\#3065](https://github.com/apache/arrow-datafusion/issues/3065) -- Simplify implementation of approx\_median so that it can be exposed in Python [\#3063](https://github.com/apache/arrow-datafusion/issues/3063) -- Support double quoted literal strings for dialects\(such as mysql,bigquery\) [\#3055](https://github.com/apache/arrow-datafusion/issues/3055) -- Simplify / speed up implementation of character\_length to unicode points [\#3049](https://github.com/apache/arrow-datafusion/issues/3049) -- Follow-up on Clickbench benchmark [\#3048](https://github.com/apache/arrow-datafusion/issues/3048) -- Why the PhysicalPlanner is an async trait ? [\#3032](https://github.com/apache/arrow-datafusion/issues/3032) -- Optimize file stream metrics. [\#3024](https://github.com/apache/arrow-datafusion/issues/3024) -- Proposal: Enable typed strings expressions for VALUES clause [\#3017](https://github.com/apache/arrow-datafusion/issues/3017) -- Proposal: Add `date_bin` function [\#3015](https://github.com/apache/arrow-datafusion/issues/3015) -- The upcoming release of Arrow \(20?\) breaks datafusion [\#3006](https://github.com/apache/arrow-datafusion/issues/3006) -- Can I select some files for query based on the filtering rules in the directory? [\#2993](https://github.com/apache/arrow-datafusion/issues/2993) -- Rename FormatReader to FileOpener [\#2990](https://github.com/apache/arrow-datafusion/issues/2990) -- Derive `Hash` trait for `JoinType` [\#2971](https://github.com/apache/arrow-datafusion/issues/2971) -- CAST from Utf8 to Boolean [\#2967](https://github.com/apache/arrow-datafusion/issues/2967) -- Add baseline\_metrics for FileStream to record metrics like elapsed time, record output, etc [\#2961](https://github.com/apache/arrow-datafusion/issues/2961) -- Example to show how to convert query result into rust struct [\#2959](https://github.com/apache/arrow-datafusion/issues/2959) -- simplify not clause [\#2957](https://github.com/apache/arrow-datafusion/issues/2957) -- Implement Debug for ColumnarValue [\#2950](https://github.com/apache/arrow-datafusion/issues/2950) -- Parallel fetching of column chunks when reading parquet files [\#2949](https://github.com/apache/arrow-datafusion/issues/2949) -- Extension mechanism for `SessionConfig` [\#2939](https://github.com/apache/arrow-datafusion/issues/2939) -- Streaming CSV/JSON Object Store Read [\#2935](https://github.com/apache/arrow-datafusion/issues/2935) -- Support CSV Limit Pushdown to Object Storage [\#2930](https://github.com/apache/arrow-datafusion/issues/2930) -- Add support for `pow` scalar function [\#2926](https://github.com/apache/arrow-datafusion/issues/2926) -- Add support for exact `median` aggregate function [\#2925](https://github.com/apache/arrow-datafusion/issues/2925) -- Support `mean` as synonym for `avg` [\#2922](https://github.com/apache/arrow-datafusion/issues/2922) -- Rename a column name [\#2919](https://github.com/apache/arrow-datafusion/issues/2919) -- Move `ScalarValue` tests alongside implementation, move `from_slice` to `core` [\#2913](https://github.com/apache/arrow-datafusion/issues/2913) -- Fail gracefully if optimization rule fails [\#2908](https://github.com/apache/arrow-datafusion/issues/2908) -- Make ObjectStoreRegistry as a trait which can allow Ballista to introduce a self registry ObjectStoreRegistry [\#2905](https://github.com/apache/arrow-datafusion/issues/2905) -- Remove datafusion-data-access crate [\#2903](https://github.com/apache/arrow-datafusion/issues/2903) -- Improve formatting of logical plans containing subquery expressions [\#2898](https://github.com/apache/arrow-datafusion/issues/2898) -- Atan2 added to built-in functions [\#2897](https://github.com/apache/arrow-datafusion/issues/2897) -- The explain statements only print logical plans for debug/other purpose. [\#2894](https://github.com/apache/arrow-datafusion/issues/2894) -- JSON version of `display_indent()` [\#2889](https://github.com/apache/arrow-datafusion/issues/2889) -- It would be nice to have a way to generate unique IDs in optimizer rules [\#2886](https://github.com/apache/arrow-datafusion/issues/2886) -- Add support for `TIME` literal values [\#2883](https://github.com/apache/arrow-datafusion/issues/2883) -- Add h2o benchmark [\#2879](https://github.com/apache/arrow-datafusion/issues/2879) -- Implement `from_unixtime` function [\#2871](https://github.com/apache/arrow-datafusion/issues/2871) -- Add `cast` function for creating logical cast expression [\#2870](https://github.com/apache/arrow-datafusion/issues/2870) -- Release DataFusion 10.0.0 [\#2862](https://github.com/apache/arrow-datafusion/issues/2862) -- Implement `information_schema.views` [\#2857](https://github.com/apache/arrow-datafusion/issues/2857) -- Migrate from avro\_rs to apache\_avro [\#2783](https://github.com/apache/arrow-datafusion/issues/2783) -- Add optimizer rule to remove `OFFSET 0` [\#2584](https://github.com/apache/arrow-datafusion/issues/2584) -- Preserve Element Name in ScalarValue::List [\#2450](https://github.com/apache/arrow-datafusion/issues/2450) -- Add EXISTS subquery support to Ballista [\#2338](https://github.com/apache/arrow-datafusion/issues/2338) -- Add documentation on supported functions to datafusion website [\#1487](https://github.com/apache/arrow-datafusion/issues/1487) -- documentations for datafusion-cli can be consolidated a bit more [\#1352](https://github.com/apache/arrow-datafusion/issues/1352) -- Optimizer: Predicate Rewrite pass for TPCH Q19 [\#217](https://github.com/apache/arrow-datafusion/issues/217) -- feat: add optimize rule `rewrite_disjunctive_predicate` [\#2858](https://github.com/apache/arrow-datafusion/pull/2858) ([xudong963](https://github.com/xudong963)) - -**Fixed bugs:** - -- Regression in SQL support for `ORDER BY` and aliased expressions [\#3160](https://github.com/apache/arrow-datafusion/issues/3160) -- panic when deal with `@` operator [\#3137](https://github.com/apache/arrow-datafusion/issues/3137) -- Incorrect type coercion rule for date + interval [\#3093](https://github.com/apache/arrow-datafusion/issues/3093) -- Cast string to timestamp crash while we input time before 1970 with floating number second [\#3082](https://github.com/apache/arrow-datafusion/issues/3082) -- INTEGER type does't work while importing csv [\#3059](https://github.com/apache/arrow-datafusion/issues/3059) -- Cannot GROUP BY Binary [\#3050](https://github.com/apache/arrow-datafusion/issues/3050) -- incorrect i32 coercion for `to_timestamp` [\#3046](https://github.com/apache/arrow-datafusion/issues/3046) -- Error pruning `IsNull` expressions: Column 'instance\_null\_count' is declared as non-nullable but contains null values [\#3042](https://github.com/apache/arrow-datafusion/issues/3042) -- I want to query some files in a directory. Is there any way? [\#3013](https://github.com/apache/arrow-datafusion/issues/3013) -- The expression to get an indexed field is only valid for `List` types \(`common_sub_expression_eliminate`\) [\#3002](https://github.com/apache/arrow-datafusion/issues/3002) -- Double to\_timestamp\_seconds produces abnormal result [\#2998](https://github.com/apache/arrow-datafusion/issues/2998) -- External parquet table fails when schema contains differing key / value metadata [\#2982](https://github.com/apache/arrow-datafusion/issues/2982) -- SELECT on column with uppercase column name fails with FieldNotFound error [\#2978](https://github.com/apache/arrow-datafusion/issues/2978) -- panic reading AWS-generated parquet file [\#2963](https://github.com/apache/arrow-datafusion/issues/2963) -- Can't filter rowgroup for parquet prune for some data type [\#2962](https://github.com/apache/arrow-datafusion/issues/2962) -- CI test is failing with ` final link failed: No space left on device` [\#2947](https://github.com/apache/arrow-datafusion/issues/2947) -- bug: new ObjectStore breaks backward compatibility with contrib plugins [\#2931](https://github.com/apache/arrow-datafusion/issues/2931) -- bug: file types handled wrong [\#2929](https://github.com/apache/arrow-datafusion/issues/2929) -- bug: changing the number of partitions does not increase concurrency [\#2928](https://github.com/apache/arrow-datafusion/issues/2928) -- csv\_explain fails on RC verifier [\#2916](https://github.com/apache/arrow-datafusion/issues/2916) -- index out of range error from datafusion\_row::write::write\_field [\#2910](https://github.com/apache/arrow-datafusion/issues/2910) -- Optimization rule `CommonSubexprEliminate` creates invalid projections [\#2907](https://github.com/apache/arrow-datafusion/issues/2907) -- serde\_json requires that either `std` \(default\) or `alloc` feature is enabled [\#2896](https://github.com/apache/arrow-datafusion/issues/2896) -- Inconsistent type coercion rules with comparison expressions [\#2890](https://github.com/apache/arrow-datafusion/issues/2890) -- Doc Error: the test directory link 404 which is in CONTRIBUTING.md [\#2880](https://github.com/apache/arrow-datafusion/issues/2880) -- Round trips through `ScalarValue`'s sometimes don't preserve types \(e.g. change types from `DictionaryArray`\) [\#2874](https://github.com/apache/arrow-datafusion/issues/2874) -- Error with CASE and DictionaryArrays: `ArrowError(InvalidArgumentError("arguments need to have the same data type"))` [\#2873](https://github.com/apache/arrow-datafusion/issues/2873) -- window functions not supported in expressions [\#2869](https://github.com/apache/arrow-datafusion/issues/2869) -- Unable to work with month intervals [\#2796](https://github.com/apache/arrow-datafusion/issues/2796) -- Discord invite link in communication page has expired [\#2743](https://github.com/apache/arrow-datafusion/issues/2743) -- Test \(path normalization\) failures while verifying release candidate 9.0.0 RC1 [\#2719](https://github.com/apache/arrow-datafusion/issues/2719) -- Reading parquet with \(pre-release\) arrow fails with "out of order projection is not supported" [\#2543](https://github.com/apache/arrow-datafusion/issues/2543) -- Fix SQL planner bug when resolving columns with same name as a relation [\#3003](https://github.com/apache/arrow-datafusion/pull/3003) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- fix `RowWriter` index out of bounds error [\#2968](https://github.com/apache/arrow-datafusion/pull/2968) ([comphead](https://github.com/comphead)) -- fix: support decimal statistic for row group prune [\#2966](https://github.com/apache/arrow-datafusion/pull/2966) ([liukun4515](https://github.com/liukun4515)) -- Fix invalid projection in `CommonSubexprEliminate` [\#2915](https://github.com/apache/arrow-datafusion/pull/2915) ([andygrove](https://github.com/andygrove)) - -**Documentation updates:** - -- MINOR: Fix broken links in contrib guide [\#3135](https://github.com/apache/arrow-datafusion/pull/3135) ([andygrove](https://github.com/andygrove)) -- MINOR: User Guide: Move expressions to top-level page [\#3134](https://github.com/apache/arrow-datafusion/pull/3134) ([andygrove](https://github.com/andygrove)) -- User Guide: Combine CLI pages [\#3133](https://github.com/apache/arrow-datafusion/pull/3133) ([andygrove](https://github.com/andygrove)) -- User Guide: Add documentation for JOIN syntax [\#3130](https://github.com/apache/arrow-datafusion/pull/3130) ([andygrove](https://github.com/andygrove)) -- separate contributors guide [\#3128](https://github.com/apache/arrow-datafusion/pull/3128) ([kmitchener](https://github.com/kmitchener)) -- minor: remove python docs, now they're in another project [\#3119](https://github.com/apache/arrow-datafusion/pull/3119) ([kmitchener](https://github.com/kmitchener)) -- minor: doc fixes: fix link to datafusion-python project and add link to slides for rece… [\#3118](https://github.com/apache/arrow-datafusion/pull/3118) ([kmitchener](https://github.com/kmitchener)) -- Add all scalar SQL functions to user guide [\#3090](https://github.com/apache/arrow-datafusion/pull/3090) ([andygrove](https://github.com/andygrove)) -- Add DataFrame reference to the user guide [\#3067](https://github.com/apache/arrow-datafusion/pull/3067) ([andygrove](https://github.com/andygrove)) -- MINOR: Add CeresDB to list of products using DataFusion [\#3060](https://github.com/apache/arrow-datafusion/pull/3060) ([andygrove](https://github.com/andygrove)) -- Minor: improve some docstrings about pruning [\#3041](https://github.com/apache/arrow-datafusion/pull/3041) ([alamb](https://github.com/alamb)) -- doc: add a new video link about datafusion [\#3025](https://github.com/apache/arrow-datafusion/pull/3025) ([xudong963](https://github.com/xudong963)) -- Update README.md to add CnosDB into the Known Uses [\#2933](https://github.com/apache/arrow-datafusion/pull/2933) ([cnoshb](https://github.com/cnoshb)) - -**Performance improvements:** - -- Use code points instead of grapheme clusters for string functions [\#3054](https://github.com/apache/arrow-datafusion/pull/3054) ([Dandandan](https://github.com/Dandandan)) - -**Closed issues:** - -- Rename `do_data_time_math()` to `do_date_time_math()` [\#3172](https://github.com/apache/arrow-datafusion/issues/3172) -- Automatic version updates for github actions with dependabot [\#3106](https://github.com/apache/arrow-datafusion/issues/3106) -- \[EPIC\] Proposal for Date/Time enhancement [\#3100](https://github.com/apache/arrow-datafusion/issues/3100) -- Upgrade prost/tonic everywhere [\#3028](https://github.com/apache/arrow-datafusion/issues/3028) -- \[Question\] interested in helping with documentation [\#2866](https://github.com/apache/arrow-datafusion/issues/2866) -- Introducing a new optimizer framework for datafusion. [\#2633](https://github.com/apache/arrow-datafusion/issues/2633) -- Enable discussion tab? [\#2350](https://github.com/apache/arrow-datafusion/issues/2350) -- Add support for AVG\(Timestamp\) types [\#200](https://github.com/apache/arrow-datafusion/issues/200) -- TPC-H Query 22 [\#175](https://github.com/apache/arrow-datafusion/issues/175) -- TPC-H Query 21 [\#172](https://github.com/apache/arrow-datafusion/issues/172) -- TPC-H Query 20 [\#171](https://github.com/apache/arrow-datafusion/issues/171) -- TPC-H Query 17 [\#168](https://github.com/apache/arrow-datafusion/issues/168) -- TPC-H Query 11 [\#163](https://github.com/apache/arrow-datafusion/issues/163) -- TPC-H Query 4 [\#160](https://github.com/apache/arrow-datafusion/issues/160) -- TPC-H Query 2 [\#159](https://github.com/apache/arrow-datafusion/issues/159) -- \[Datafusion\] Optimize literal expression evaluation [\#106](https://github.com/apache/arrow-datafusion/issues/106) - -**Merged pull requests:** - -- Rename do\_data\_time\_math\(\) to do\_date\_time\_math\(\) [\#3173](https://github.com/apache/arrow-datafusion/pull/3173) ([JasonLi-cn](https://github.com/JasonLi-cn)) -- \[Minor\] Remove some redundant code [\#3169](https://github.com/apache/arrow-datafusion/pull/3169) ([alamb](https://github.com/alamb)) -- Support `INTEGER` again in addition to `INT` in `CREATE TABLE` and `CAST` statements [\#3167](https://github.com/apache/arrow-datafusion/pull/3167) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fix regression in SQL parser related to resolution of aliased expressions [\#3165](https://github.com/apache/arrow-datafusion/pull/3165) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- update cargo lock [\#3164](https://github.com/apache/arrow-datafusion/pull/3164) ([waitingkuo](https://github.com/waitingkuo)) -- add test case for cast\_timestamp\_before\_1970 [\#3163](https://github.com/apache/arrow-datafusion/pull/3163) ([waitingkuo](https://github.com/waitingkuo)) -- Return proper error message for ill formed variable reference [\#3162](https://github.com/apache/arrow-datafusion/pull/3162) ([alamb](https://github.com/alamb)) -- Remove outdated license text left over from arrow repo [\#3154](https://github.com/apache/arrow-datafusion/pull/3154) ([alamb](https://github.com/alamb)) -- Expose RowAccumulator in physical\_plan [\#3151](https://github.com/apache/arrow-datafusion/pull/3151) ([iajoiner](https://github.com/iajoiner)) -- Rename `DateIntervalExpr` to `DateTimeIntervalExpr` [\#3150](https://github.com/apache/arrow-datafusion/pull/3150) ([alamb](https://github.com/alamb)) -- Bump actions/labeler from 4.0.0 to 4.0.1 [\#3144](https://github.com/apache/arrow-datafusion/pull/3144) ([dependabot[bot]](https://github.com/apps/dependabot)) -- User Guide: Add documentation for subquery syntax [\#3132](https://github.com/apache/arrow-datafusion/pull/3132) ([andygrove](https://github.com/andygrove)) -- MINOR: User Guide: Move Data Types and Information Schema to their own pages [\#3131](https://github.com/apache/arrow-datafusion/pull/3131) ([andygrove](https://github.com/andygrove)) -- Minor: Clean up `array` test [\#3121](https://github.com/apache/arrow-datafusion/pull/3121) ([alamb](https://github.com/alamb)) -- add arrow\_typeof [\#3120](https://github.com/apache/arrow-datafusion/pull/3120) ([waitingkuo](https://github.com/waitingkuo)) -- Bump actions/labeler from 2.2.0 to 4.0.0 [\#3114](https://github.com/apache/arrow-datafusion/pull/3114) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Bump actions/checkout from 2 to 3 [\#3113](https://github.com/apache/arrow-datafusion/pull/3113) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Bump actions/setup-node from 2 to 3 [\#3112](https://github.com/apache/arrow-datafusion/pull/3112) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Bump actions/setup-python from 3 to 4 [\#3111](https://github.com/apache/arrow-datafusion/pull/3111) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Feature/support timestamp plus minus interval [\#3110](https://github.com/apache/arrow-datafusion/pull/3110) ([JasonLi-cn](https://github.com/JasonLi-cn)) -- docs: fix typo [\#3109](https://github.com/apache/arrow-datafusion/pull/3109) ([dzvon](https://github.com/dzvon)) -- Remove offset if its zero [\#3102](https://github.com/apache/arrow-datafusion/pull/3102) ([turbo1912](https://github.com/turbo1912)) -- Hash binary values [\#3098](https://github.com/apache/arrow-datafusion/pull/3098) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) -- Update to object\_store 0.4 [\#3089](https://github.com/apache/arrow-datafusion/pull/3089) ([tustvold](https://github.com/tustvold)) -- Add cast function for creating cast expression [\#3084](https://github.com/apache/arrow-datafusion/pull/3084) ([turbo1912](https://github.com/turbo1912)) -- Upgrade to arrow 20.0.0 \(but no change to object\_store\), including `prost`, and `tonic` [\#3083](https://github.com/apache/arrow-datafusion/pull/3083) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) -- impl Debug for ColumnarValue, add some docs [\#3076](https://github.com/apache/arrow-datafusion/pull/3076) ([alamb](https://github.com/alamb)) -- \[Minor\] run cargo update in datafusion-cli directory [\#3075](https://github.com/apache/arrow-datafusion/pull/3075) ([alamb](https://github.com/alamb)) -- update cargo.lock in `datafusion-cli` [\#3074](https://github.com/apache/arrow-datafusion/pull/3074) ([waitingkuo](https://github.com/waitingkuo)) -- Update sql parser to v0.20.0 [\#3072](https://github.com/apache/arrow-datafusion/pull/3072) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) -- Add opening, scanning, processing metrics in file stream [\#3070](https://github.com/apache/arrow-datafusion/pull/3070) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Simplify `approx_median` implementation, expose via `DataFrame` API [\#3064](https://github.com/apache/arrow-datafusion/pull/3064) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- docs: fix PruningStatistics example and some typos [\#3062](https://github.com/apache/arrow-datafusion/pull/3062) ([roeap](https://github.com/roeap)) -- feat: support double quoted literal strings for dialects\(such as mysql,bigquery,spark\) [\#3056](https://github.com/apache/arrow-datafusion/pull/3056) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Rachelint](https://github.com/Rachelint)) -- Allow Overriding AsyncFileReader used by ParquetExec [\#3051](https://github.com/apache/arrow-datafusion/pull/3051) ([Cheappie](https://github.com/Cheappie)) -- to\_timestamp i32 coerced to i64 [\#3047](https://github.com/apache/arrow-datafusion/pull/3047) ([waitingkuo](https://github.com/waitingkuo)) -- Fix `IsNull` pruning expression generation without null\_count statistics [\#3044](https://github.com/apache/arrow-datafusion/pull/3044) ([alamb](https://github.com/alamb)) -- feat: Support `week`, `decade`, `century` for Interval literal [\#3038](https://github.com/apache/arrow-datafusion/pull/3038) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) -- feat: Support Binary bitwise shift operators \(\<\< and \>\>\) [\#3037](https://github.com/apache/arrow-datafusion/pull/3037) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) -- Use concat\_elements\_utf8 from arrow rather than custom kernel [\#3036](https://github.com/apache/arrow-datafusion/pull/3036) ([alamb](https://github.com/alamb)) -- minor: update minimal rust version to 1.62, matching arrow-rs [\#3035](https://github.com/apache/arrow-datafusion/pull/3035) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) -- feat: Add `date_bin` built-in function [\#3034](https://github.com/apache/arrow-datafusion/pull/3034) ([stuartcarnie](https://github.com/stuartcarnie)) -- Split `binary_expr.rs` into smaller modules [\#3026](https://github.com/apache/arrow-datafusion/pull/3026) ([alamb](https://github.com/alamb)) -- feat: Enable typed strings expressions for VALUES clause [\#3018](https://github.com/apache/arrow-datafusion/pull/3018) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([stuartcarnie](https://github.com/stuartcarnie)) -- fix typo for PR3003 [\#3011](https://github.com/apache/arrow-datafusion/pull/3011) ([waitingkuo](https://github.com/waitingkuo)) -- feat: Add support for TIME literal values [\#3010](https://github.com/apache/arrow-datafusion/pull/3010) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([stuartcarnie](https://github.com/stuartcarnie)) -- add TimeUnit::Second as signature for ToTimestampSeconds [\#3004](https://github.com/apache/arrow-datafusion/pull/3004) ([waitingkuo](https://github.com/waitingkuo)) -- Rename FileReader to FileOpener \(\#2990\) [\#2991](https://github.com/apache/arrow-datafusion/pull/2991) ([tustvold](https://github.com/tustvold)) -- minor: collation the prune test [\#2986](https://github.com/apache/arrow-datafusion/pull/2986) ([liukun4515](https://github.com/liukun4515)) -- Optionally skip metadata from schema when merging parquet files [\#2985](https://github.com/apache/arrow-datafusion/pull/2985) ([alamb](https://github.com/alamb)) -- \[Minor\] Extract interval parsing logic, add unit tests [\#2984](https://github.com/apache/arrow-datafusion/pull/2984) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Update sqlparser to 0.19 [\#2981](https://github.com/apache/arrow-datafusion/pull/2981) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- test: add file/SQL level test for pruning parquet row group with decimal data type. [\#2977](https://github.com/apache/arrow-datafusion/pull/2977) ([liukun4515](https://github.com/liukun4515)) -- Derive Hash for JoinType [\#2972](https://github.com/apache/arrow-datafusion/pull/2972) ([liurenjie1024](https://github.com/liurenjie1024)) -- Example that shows how to convert query result into rust struct \#2959 [\#2969](https://github.com/apache/arrow-datafusion/pull/2969) ([thomas-k-cameron](https://github.com/thomas-k-cameron)) -- Add baseline\_metrics for FileStream to record metrics like elapsed ti… [\#2965](https://github.com/apache/arrow-datafusion/pull/2965) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- test: add test for decimal and pruning for decimal column [\#2960](https://github.com/apache/arrow-datafusion/pull/2960) ([liukun4515](https://github.com/liukun4515)) -- Simplify expressions with `NOT` clause [\#2958](https://github.com/apache/arrow-datafusion/pull/2958) ([AssHero](https://github.com/AssHero)) -- chore: update jit-related dependencies [\#2956](https://github.com/apache/arrow-datafusion/pull/2956) ([xudong963](https://github.com/xudong963)) -- Update to arrow `19.0.0` [\#2955](https://github.com/apache/arrow-datafusion/pull/2955) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Remove CI Caching to preserve diskspace [\#2948](https://github.com/apache/arrow-datafusion/pull/2948) ([alamb](https://github.com/alamb)) -- Add metadata\_size\_hint for optimistic fetching of parquet metadata [\#2946](https://github.com/apache/arrow-datafusion/pull/2946) ([thinkharderdev](https://github.com/thinkharderdev)) -- Minor: Remove left over debugging statement [\#2944](https://github.com/apache/arrow-datafusion/pull/2944) ([alamb](https://github.com/alamb)) -- add Atan2 [\#2942](https://github.com/apache/arrow-datafusion/pull/2942) ([waitingkuo](https://github.com/waitingkuo)) -- Use `Arc` and remove ObjectStoreRegistry::clone [\#2941](https://github.com/apache/arrow-datafusion/pull/2941) ([tustvold](https://github.com/tustvold)) -- add extension system to `SessionConfig` [\#2940](https://github.com/apache/arrow-datafusion/pull/2940) ([crepererum](https://github.com/crepererum)) -- Update prost-build requirement from 0.7 to 0.10 [\#2937](https://github.com/apache/arrow-datafusion/pull/2937) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Add streaming JSON and CSV reading, `NewlineDelimitedStream' \(\#2935\) [\#2936](https://github.com/apache/arrow-datafusion/pull/2936) ([tustvold](https://github.com/tustvold)) -- feat\(catalog\): Implement information\_schema.views [\#2934](https://github.com/apache/arrow-datafusion/pull/2934) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([BaymaxHWY](https://github.com/BaymaxHWY)) -- Support `window` functions in expressions by re-write projection after building window plan [\#2932](https://github.com/apache/arrow-datafusion/pull/2932) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) -- Add pow as synonym for power [\#2927](https://github.com/apache/arrow-datafusion/pull/2927) ([andygrove](https://github.com/andygrove)) -- Add `from_unixtime` function [\#2924](https://github.com/apache/arrow-datafusion/pull/2924) ([waitingkuo](https://github.com/waitingkuo)) -- fix\(aggregate\): support mean as synonym avg [\#2923](https://github.com/apache/arrow-datafusion/pull/2923) ([BaymaxHWY](https://github.com/BaymaxHWY)) -- Add `DataFrame::with_column_renamed` [\#2920](https://github.com/apache/arrow-datafusion/pull/2920) ([andygrove](https://github.com/andygrove)) -- Run clippy with optional features [\#2918](https://github.com/apache/arrow-datafusion/pull/2918) ([tustvold](https://github.com/tustvold)) -- Fix release verification script by not overriding `ARROW_TEST_DATA` or `PARQUET_TEST_DATA` [\#2917](https://github.com/apache/arrow-datafusion/pull/2917) ([alamb](https://github.com/alamb)) -- Move `ScalarValue` tests alongside implementation, move `from_slice` to `datafusion_core` [\#2914](https://github.com/apache/arrow-datafusion/pull/2914) ([alamb](https://github.com/alamb)) -- Optimizer should have option to skip failing rules [\#2909](https://github.com/apache/arrow-datafusion/pull/2909) ([andygrove](https://github.com/andygrove)) -- Introduce ObjectStoreProvider to create an object store based on the url [\#2906](https://github.com/apache/arrow-datafusion/pull/2906) ([yahoNanJing](https://github.com/yahoNanJing)) -- Remove datafusion-data-access crate [\#2904](https://github.com/apache/arrow-datafusion/pull/2904) ([yahoNanJing](https://github.com/yahoNanJing)) -- Combine all comparison coercion rules [\#2901](https://github.com/apache/arrow-datafusion/pull/2901) ([andygrove](https://github.com/andygrove)) -- Add `Projection::try_new` and `Projection::try_new_with_schema` [\#2900](https://github.com/apache/arrow-datafusion/pull/2900) ([andygrove](https://github.com/andygrove)) -- Improve formatting of logical plans containing subqueries [\#2899](https://github.com/apache/arrow-datafusion/pull/2899) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- add session option 'datafusion.explain.logical\_plan'. when set to true, the explain statement will only print logical plans. [\#2895](https://github.com/apache/arrow-datafusion/pull/2895) ([AssHero](https://github.com/AssHero)) -- Preserve field name in `ScalarValue::List` [\#2893](https://github.com/apache/arrow-datafusion/pull/2893) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([comphead](https://github.com/comphead)) -- Adds optional serde support to datafusion-proto [\#2892](https://github.com/apache/arrow-datafusion/pull/2892) ([tustvold](https://github.com/tustvold)) -- Implement `ScalarValue::Dictionary` and preserve type through conversion back/forth to Array [\#2891](https://github.com/apache/arrow-datafusion/pull/2891) ([alamb](https://github.com/alamb)) -- Add an ID generator in preparation for PR 2885 [\#2887](https://github.com/apache/arrow-datafusion/pull/2887) ([avantgardnerio](https://github.com/avantgardnerio)) -- Add support for correlated subqueries & fix all related TPC-H benchmark issues [\#2885](https://github.com/apache/arrow-datafusion/pull/2885) ([avantgardnerio](https://github.com/avantgardnerio)) -- fix\(doc\): update test directory link in CONTRIBUTING.md [\#2882](https://github.com/apache/arrow-datafusion/pull/2882) ([BaymaxHWY](https://github.com/BaymaxHWY)) -- Add h2o bench groupby queries [\#2881](https://github.com/apache/arrow-datafusion/pull/2881) ([andygrove](https://github.com/andygrove)) -- Add support for month & year intervals [\#2797](https://github.com/apache/arrow-datafusion/pull/2797) ([avantgardnerio](https://github.com/avantgardnerio)) -- Migrate from avro\_rs \(0.13\) to apache\_avro \(0.14\) [\#2784](https://github.com/apache/arrow-datafusion/pull/2784) ([martin-g](https://github.com/martin-g)) - -## [10.0.0-rc1](https://github.com/apache/arrow-datafusion/tree/10.0.0-rc1) (2022-07-12) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/10.0.0...10.0.0-rc1) - - -## [10.0.0](https://github.com/apache/arrow-datafusion/tree/10.0.0) (2022-07-12) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/9.0.0...10.0.0) - -**Breaking changes:** - -- Convert batch\_size to config option [\#2771](https://github.com/apache/arrow-datafusion/pull/2771) ([andygrove](https://github.com/andygrove)) -- MINOR: Remove Offset struct [\#2734](https://github.com/apache/arrow-datafusion/pull/2734) ([andygrove](https://github.com/andygrove)) -- feat: async extension planner [\#2713](https://github.com/apache/arrow-datafusion/pull/2713) ([waynexia](https://github.com/waynexia)) -- Switch to object\_store crate \(\#2489\) [\#2677](https://github.com/apache/arrow-datafusion/pull/2677) ([tustvold](https://github.com/tustvold)) - -**Implemented enhancements:** - -- update documentation, fix styling to match main Arrow project [\#2864](https://github.com/apache/arrow-datafusion/issues/2864) -- Update top-level README [\#2850](https://github.com/apache/arrow-datafusion/issues/2850) -- \[Question\]How to call an async function in `ExecutionPlan::exec` method? [\#2847](https://github.com/apache/arrow-datafusion/issues/2847) -- Add `DataFrame::with_column` [\#2844](https://github.com/apache/arrow-datafusion/issues/2844) -- Improve ergonomics of physical expr `lit` [\#2827](https://github.com/apache/arrow-datafusion/issues/2827) -- Add Python examples for reading CSV and query by SQL in Doc [\#2824](https://github.com/apache/arrow-datafusion/issues/2824) -- eliminate multi limit-offset nodes to EmptyRelation if possible [\#2822](https://github.com/apache/arrow-datafusion/issues/2822) -- Make `LogicalPlan::Union` be consistent with other plans [\#2816](https://github.com/apache/arrow-datafusion/issues/2816) -- Use coerced data type from value and list expressions during planning inlist expression [\#2793](https://github.com/apache/arrow-datafusion/issues/2793) -- Add configuration option to enable/disalbe `CoalesceBatchesExec` [\#2790](https://github.com/apache/arrow-datafusion/issues/2790) -- Simplify FilterNullJoinKeys rule [\#2780](https://github.com/apache/arrow-datafusion/issues/2780) -- Allow configuration settings to be specified with environment variables [\#2776](https://github.com/apache/arrow-datafusion/issues/2776) -- Automatically update `configs.md` in user guide [\#2770](https://github.com/apache/arrow-datafusion/issues/2770) -- Support multiple paths for ListingTableScanNode [\#2768](https://github.com/apache/arrow-datafusion/issues/2768) -- Reduce outer joins [\#2757](https://github.com/apache/arrow-datafusion/issues/2757) -- support data type coerced and decimal in INLIST expr [\#2755](https://github.com/apache/arrow-datafusion/issues/2755) -- Change ExtensionPlanner::plan\_extension\(\) to an async function [\#2749](https://github.com/apache/arrow-datafusion/issues/2749) -- Add `IsNotNull` filter to join inputs if one side of join condition does not allow null [\#2739](https://github.com/apache/arrow-datafusion/issues/2739) -- Sort preserving MergeJoin [\#2698](https://github.com/apache/arrow-datafusion/issues/2698) -- Improve readability of table scan projections in query plans [\#2697](https://github.com/apache/arrow-datafusion/issues/2697) -- DataFusion 9.0.0 Release [\#2676](https://github.com/apache/arrow-datafusion/issues/2676) -- Improve UX for `UNION` vs `UNION ALL` \(introduce a LogicalPlan::Distinct\) [\#2573](https://github.com/apache/arrow-datafusion/issues/2573) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] -- Implement some way to show the sql used to create a view [\#2529](https://github.com/apache/arrow-datafusion/issues/2529) -- Consider adopting IOx ObjectStore abstraction [\#2489](https://github.com/apache/arrow-datafusion/issues/2489) -- Support `sum0` as a built-in agg function [\#2067](https://github.com/apache/arrow-datafusion/issues/2067) -- implement grouping sets, cubes, and rollups [\#1327](https://github.com/apache/arrow-datafusion/issues/1327) -- Ruby bindings [\#1114](https://github.com/apache/arrow-datafusion/issues/1114) -- Support dates in hash join [\#2746](https://github.com/apache/arrow-datafusion/pull/2746) ([andygrove](https://github.com/andygrove)) - -**Fixed bugs:** - -- Docker Error [\#2851](https://github.com/apache/arrow-datafusion/issues/2851) -- Anti join ignores join filters [\#2842](https://github.com/apache/arrow-datafusion/issues/2842) -- Can't test or compile sub-model code after upgrade to arrow-rs 17.0.0 [\#2835](https://github.com/apache/arrow-datafusion/issues/2835) -- Not evaluate the set expr in the InList for the optimization [\#2820](https://github.com/apache/arrow-datafusion/issues/2820) -- CASE When: result type should be coercible to a common type [\#2818](https://github.com/apache/arrow-datafusion/issues/2818) -- IN/NOT IN List: NULL is not equal to NULL [\#2817](https://github.com/apache/arrow-datafusion/issues/2817) -- panic when case statement returns null [\#2798](https://github.com/apache/arrow-datafusion/issues/2798) -- InList: Can't cast the list expr data type to value expr data type directly [\#2774](https://github.com/apache/arrow-datafusion/issues/2774) -- InList Expr: expr and list values must can be converted to a same data type [\#2759](https://github.com/apache/arrow-datafusion/issues/2759) -- tpchgen docker syntax change prevents volume from binding [\#2751](https://github.com/apache/arrow-datafusion/issues/2751) -- Cannot join on date columns \(Unsupported data type in hasher: Date32\) [\#2744](https://github.com/apache/arrow-datafusion/issues/2744) -- `rewrite_expression` does not properly handle `Exists` and `ScalarSubquery` [\#2736](https://github.com/apache/arrow-datafusion/issues/2736) -- LocalFileSystem Not sorted by file name, As a result, the data lines queried in multiple files are out of order. [\#2730](https://github.com/apache/arrow-datafusion/issues/2730) -- Filter push down need consider alias columns [\#2725](https://github.com/apache/arrow-datafusion/issues/2725) -- Recent API change in `GlobalLimitExec` breaks compatibility with Ballista [\#2720](https://github.com/apache/arrow-datafusion/issues/2720) -- Common Subexpression Eliminiation pass errors if run twice on some plans: Schema contains duplicate unqualified field name 'IsNull-Column-sys.host' [\#2712](https://github.com/apache/arrow-datafusion/issues/2712) -- The data type is not compatible with other system, for example spark or PG database [\#1379](https://github.com/apache/arrow-datafusion/issues/1379) - -**Documentation updates:** - -- Fix docs styling [\#2865](https://github.com/apache/arrow-datafusion/pull/2865) ([kmitchener](https://github.com/kmitchener)) -- Various updates to top-level README [\#2854](https://github.com/apache/arrow-datafusion/pull/2854) ([andygrove](https://github.com/andygrove)) -- MINOR: Add documentation for running integration tests [\#2839](https://github.com/apache/arrow-datafusion/pull/2839) ([andygrove](https://github.com/andygrove)) -- add csv registration and sql query to examples [\#2825](https://github.com/apache/arrow-datafusion/pull/2825) ([waitingkuo](https://github.com/waitingkuo)) -- \[minor\] refine doc [\#2753](https://github.com/apache/arrow-datafusion/pull/2753) ([Ted-Jiang](https://github.com/Ted-Jiang)) - -**Closed issues:** - -- Consider adding a prominent note in the readme about ballista [\#2853](https://github.com/apache/arrow-datafusion/issues/2853) -- support decimal in \(NULL\) [\#2800](https://github.com/apache/arrow-datafusion/issues/2800) -- InList: Don't treat Null as UTF8\(None\) [\#2782](https://github.com/apache/arrow-datafusion/issues/2782) -- InList: don't need to treat Null as UTF8 data type [\#2773](https://github.com/apache/arrow-datafusion/issues/2773) -- Implement extensible configuration mechanism [\#138](https://github.com/apache/arrow-datafusion/issues/138) - -**Merged pull requests:** - -- Update CONTRIBUTING.md [\#2876](https://github.com/apache/arrow-datafusion/pull/2876) ([waitingkuo](https://github.com/waitingkuo)) -- Make LogicalPlan::Union be consistent with other plans [\#2868](https://github.com/apache/arrow-datafusion/pull/2868) ([comphead](https://github.com/comphead)) -- minor: remove unneeded files from project root [\#2863](https://github.com/apache/arrow-datafusion/pull/2863) ([kmitchener](https://github.com/kmitchener)) -- chore: make cargo clippy happy in nigtly [\#2860](https://github.com/apache/arrow-datafusion/pull/2860) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Update to arrow 18.0.0 [\#2856](https://github.com/apache/arrow-datafusion/pull/2856) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- chore: remove ballista-related docker-compose file [\#2852](https://github.com/apache/arrow-datafusion/pull/2852) ([xudong963](https://github.com/xudong963)) -- Adding dataframe with\_column function [\#2849](https://github.com/apache/arrow-datafusion/pull/2849) ([comphead](https://github.com/comphead)) -- anti joins now respect join filters [\#2843](https://github.com/apache/arrow-datafusion/pull/2843) ([andygrove](https://github.com/andygrove)) -- MINOR: make name meaningful and clean up code [\#2841](https://github.com/apache/arrow-datafusion/pull/2841) ([liukun4515](https://github.com/liukun4515)) -- Make `lit` implementation more concise [\#2838](https://github.com/apache/arrow-datafusion/pull/2838) ([alamb](https://github.com/alamb)) -- InList: set/list value must be evaluated to get the values [\#2834](https://github.com/apache/arrow-datafusion/pull/2834) ([liukun4515](https://github.com/liukun4515)) -- Add SHOW CREATE TABLE with initial support for views [\#2830](https://github.com/apache/arrow-datafusion/pull/2830) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mrob95](https://github.com/mrob95)) -- Improve ergonomics of physical expr `lit` [\#2828](https://github.com/apache/arrow-datafusion/pull/2828) ([alamb](https://github.com/alamb)) -- Eliminate multi limit-offset nodes to emptyRelation [\#2823](https://github.com/apache/arrow-datafusion/pull/2823) ([AssHero](https://github.com/AssHero)) -- Fix the ci [\#2821](https://github.com/apache/arrow-datafusion/pull/2821) ([liukun4515](https://github.com/liukun4515)) -- CaseWhen: coerce the all then and else data type to a common data type [\#2819](https://github.com/apache/arrow-datafusion/pull/2819) ([liukun4515](https://github.com/liukun4515)) -- Fix `ScalarValue::isNull` calculation [\#2815](https://github.com/apache/arrow-datafusion/pull/2815) ([alamb](https://github.com/alamb)) -- Fix nullability calculation for `CASE` expressions [\#2814](https://github.com/apache/arrow-datafusion/pull/2814) ([alamb](https://github.com/alamb)) -- Bump numpy from 1.21.3 to 1.22.0 in /integration-tests [\#2811](https://github.com/apache/arrow-datafusion/pull/2811) ([xudong963](https://github.com/xudong963)) -- Fix data type calculation for `CaseExpr` s with `NULLs` [\#2810](https://github.com/apache/arrow-datafusion/pull/2810) ([AssHero](https://github.com/AssHero)) -- InList: fix bug for comparing with Null in the list using the set optimization [\#2809](https://github.com/apache/arrow-datafusion/pull/2809) ([liukun4515](https://github.com/liukun4515)) -- Use specialized dictionary kernels \(\#1178\) [\#2808](https://github.com/apache/arrow-datafusion/pull/2808) ([tustvold](https://github.com/tustvold)) -- fix schema nullability for `information_schema` schema [\#2804](https://github.com/apache/arrow-datafusion/pull/2804) ([alamb](https://github.com/alamb)) -- fix: correctly calculate join output schema nullability [\#2803](https://github.com/apache/arrow-datafusion/pull/2803) ([alamb](https://github.com/alamb)) -- Correct schema nullability declaration in tests [\#2802](https://github.com/apache/arrow-datafusion/pull/2802) ([alamb](https://github.com/alamb)) -- Don't treat Null as UTF8\(None\) and change error info. [\#2801](https://github.com/apache/arrow-datafusion/pull/2801) ([liukun4515](https://github.com/liukun4515)) -- MINOR: Remove reference to docker image that is no longer available [\#2795](https://github.com/apache/arrow-datafusion/pull/2795) ([andygrove](https://github.com/andygrove)) -- Use coerced type in inlist expr planning [\#2794](https://github.com/apache/arrow-datafusion/pull/2794) ([viirya](https://github.com/viirya)) -- Add LogicalPlan::Distinct [\#2792](https://github.com/apache/arrow-datafusion/pull/2792) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mrob95](https://github.com/mrob95)) -- Add config option for coalesce\_batches physical optimization rule, make optional [\#2791](https://github.com/apache/arrow-datafusion/pull/2791) ([andygrove](https://github.com/andygrove)) -- Improve readability of table scan projections in query plans \(remove `Some` and `None`\) [\#2789](https://github.com/apache/arrow-datafusion/pull/2789) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([comphead](https://github.com/comphead)) -- Simplify FilterNullJoinKeys rule [\#2781](https://github.com/apache/arrow-datafusion/pull/2781) ([andygrove](https://github.com/andygrove)) -- MINOR: re-export sqlparser from datafusion-sql crate [\#2779](https://github.com/apache/arrow-datafusion/pull/2779) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) -- Update to arrow 17.0.0 [\#2778](https://github.com/apache/arrow-datafusion/pull/2778) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Support multiple paths for ListingTableScanNode [\#2775](https://github.com/apache/arrow-datafusion/pull/2775) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Remove expr\_sub\_expressions and rewrite\_expression functions [\#2772](https://github.com/apache/arrow-datafusion/pull/2772) ([mrob95](https://github.com/mrob95)) -- minor: update cranelift related dependencies [\#2769](https://github.com/apache/arrow-datafusion/pull/2769) ([xudong963](https://github.com/xudong963)) -- minor: panic rather than fail silently on bad dictionary in hash join [\#2767](https://github.com/apache/arrow-datafusion/pull/2767) ([alamb](https://github.com/alamb)) -- MINOR: make `prettier` use consistent between CI and contributing guide [\#2766](https://github.com/apache/arrow-datafusion/pull/2766) ([andygrove](https://github.com/andygrove)) -- Rewrite subexpressions of InSubquery in rewrite\_expression [\#2765](https://github.com/apache/arrow-datafusion/pull/2765) ([mrob95](https://github.com/mrob95)) -- Support `DataType::Decimal` for `IN` and `NOT IN` expressions [\#2764](https://github.com/apache/arrow-datafusion/pull/2764) ([liukun4515](https://github.com/liukun4515)) -- Implement extensible configuration mechanism [\#2754](https://github.com/apache/arrow-datafusion/pull/2754) ([andygrove](https://github.com/andygrove)) -- Remove redundant docker argument [\#2752](https://github.com/apache/arrow-datafusion/pull/2752) ([avantgardnerio](https://github.com/avantgardnerio)) -- Add optimizer pass to reduce `left`/`right`/`full` joins to `inner` join if possible [\#2750](https://github.com/apache/arrow-datafusion/pull/2750) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) -- MINOR: Remove legacy CLI context enum [\#2748](https://github.com/apache/arrow-datafusion/pull/2748) ([andygrove](https://github.com/andygrove)) -- CSE unit test for duplicate fields [\#2747](https://github.com/apache/arrow-datafusion/pull/2747) ([waynexia](https://github.com/waynexia)) -- MINOR: Improve unsupported data type error message [\#2745](https://github.com/apache/arrow-datafusion/pull/2745) ([andygrove](https://github.com/andygrove)) -- Add optimizer rule to filter out null keys before a join [\#2740](https://github.com/apache/arrow-datafusion/pull/2740) ([andygrove](https://github.com/andygrove)) -- Sort file names in a directory \#2730 [\#2735](https://github.com/apache/arrow-datafusion/pull/2735) ([yourenawo](https://github.com/yourenawo)) -- fix: filter push down with `InList` expressions [\#2729](https://github.com/apache/arrow-datafusion/pull/2729) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- \[Minor\] add debug info in optimizer.rs [\#2726](https://github.com/apache/arrow-datafusion/pull/2726) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add public API for GlobalLimitExec and LocalLimitExec [\#2722](https://github.com/apache/arrow-datafusion/pull/2722) ([andygrove](https://github.com/andygrove)) -- Add additional data types are supported in hash join [\#2721](https://github.com/apache/arrow-datafusion/pull/2721) ([AssHero](https://github.com/AssHero)) -- Upgrade to arrow `16.0.0` [\#2718](https://github.com/apache/arrow-datafusion/pull/2718) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fix clippy warnings with toolchain 1.63 [\#2717](https://github.com/apache/arrow-datafusion/pull/2717) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waynexia](https://github.com/waynexia)) -- Support for GROUPING SETS/CUBE/ROLLUP [\#2716](https://github.com/apache/arrow-datafusion/pull/2716) ([thinkharderdev](https://github.com/thinkharderdev)) -- fix: check redundant fields while building projection plan [\#2715](https://github.com/apache/arrow-datafusion/pull/2715) ([waynexia](https://github.com/waynexia)) -- Sort preserving `SortMergeJoin` [\#2699](https://github.com/apache/arrow-datafusion/pull/2699) ([korowa](https://github.com/korowa)) -- fix: union schema fix [\#2688](https://github.com/apache/arrow-datafusion/pull/2688) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gandronchik](https://github.com/gandronchik)) -- Support default precision and scale to`CAST AS DECIMAL` [\#2680](https://github.com/apache/arrow-datafusion/pull/2680) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gandronchik](https://github.com/gandronchik)) - - -## [9.0.0](https://github.com/apache/arrow-datafusion/tree/9.0.0) (2022-06-10) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/8.0.0-rc2...9.0.0) - -**Breaking changes:** - -- MINOR: Move `simplify_expression` rule to `datafusion-optimizer` crate [\#2686](https://github.com/apache/arrow-datafusion/pull/2686) ([andygrove](https://github.com/andygrove)) -- Move physical expression planning to `datafusion-physical-expr` crate [\#2682](https://github.com/apache/arrow-datafusion/pull/2682) ([andygrove](https://github.com/andygrove)) -- Create new `datafusion-optimizer` crate for logical optimizer rules [\#2675](https://github.com/apache/arrow-datafusion/pull/2675) ([andygrove](https://github.com/andygrove)) -- Remove `ExecutionProps` dependency from `OptimizerRule` [\#2666](https://github.com/apache/arrow-datafusion/pull/2666) ([andygrove](https://github.com/andygrove)) -- Remove ObjectStoreSchemaProvider \(\#2656\) [\#2665](https://github.com/apache/arrow-datafusion/pull/2665) ([tustvold](https://github.com/tustvold)) -- Move `LogicalPlanBuilder` to `datafusion-expr` crate [\#2576](https://github.com/apache/arrow-datafusion/pull/2576) ([andygrove](https://github.com/andygrove)) -- `LogicalPlanBuilder` now uses `TableSource` instead of `TableProvider` [\#2569](https://github.com/apache/arrow-datafusion/pull/2569) ([andygrove](https://github.com/andygrove)) -- Remove `scan_empty` method from `LogicalPlanBuilder` [\#2568](https://github.com/apache/arrow-datafusion/pull/2568) ([andygrove](https://github.com/andygrove)) -- MINOR: Move expression utils from sql module to expr crate [\#2553](https://github.com/apache/arrow-datafusion/pull/2553) ([andygrove](https://github.com/andygrove)) -- Remove `scan_json` methods from `LogicalPlanBuilder` [\#2541](https://github.com/apache/arrow-datafusion/pull/2541) ([andygrove](https://github.com/andygrove)) -- Remove `scan_avro` methods from `LogicalPlanBuilder` [\#2540](https://github.com/apache/arrow-datafusion/pull/2540) ([andygrove](https://github.com/andygrove)) -- Remove `scan_parquet` methods from `LogicalPlanBuilder` [\#2539](https://github.com/apache/arrow-datafusion/pull/2539) ([andygrove](https://github.com/andygrove)) -- MINOR: Move `ExprVisitable` and `exprlist_to_columns` to datafusion-expr crate [\#2538](https://github.com/apache/arrow-datafusion/pull/2538) ([andygrove](https://github.com/andygrove)) -- Remove `scan_csv` methods from `LogicalPlanBuilder` [\#2537](https://github.com/apache/arrow-datafusion/pull/2537) ([andygrove](https://github.com/andygrove)) -- Fix Redundant ScalarValue Boxed Collection [\#2523](https://github.com/apache/arrow-datafusion/pull/2523) ([comphead](https://github.com/comphead)) -- Support for OFFSET in LogicalPlan [\#2521](https://github.com/apache/arrow-datafusion/pull/2521) ([jdye64](https://github.com/jdye64)) - -**Implemented enhancements:** - -- \[EPIC\] JIT support for `DataFusion` [\#2703](https://github.com/apache/arrow-datafusion/issues/2703) -- Show column names instead of column indices in query plans [\#2689](https://github.com/apache/arrow-datafusion/issues/2689) -- Proposal: remove automated ballista CI checks from DataFusion [\#2679](https://github.com/apache/arrow-datafusion/issues/2679) -- Pass SessionState to TableProvider [\#2658](https://github.com/apache/arrow-datafusion/issues/2658) -- Is ObjectStoreSchemaProvider Still Needed? [\#2656](https://github.com/apache/arrow-datafusion/issues/2656) -- Add logical plan support to `datafusion-proto` [\#2630](https://github.com/apache/arrow-datafusion/issues/2630) -- Like, NotLike expressions work with literal `NULL` [\#2626](https://github.com/apache/arrow-datafusion/issues/2626) -- Move `JOIN ON` predicates push down logic from planner to optimizer [\#2619](https://github.com/apache/arrow-datafusion/issues/2619) -- Remove `ExecutionProps` from `OptimizerRule` trait [\#2614](https://github.com/apache/arrow-datafusion/issues/2614) -- Add, Minus, Multiply, divide, Modulo operator work with literal `NULL` [\#2609](https://github.com/apache/arrow-datafusion/issues/2609) -- Support `DESCRIBE ` to show table schemas [\#2606](https://github.com/apache/arrow-datafusion/issues/2606) -- Support `CREATE OR REPLACE TABLE` [\#2605](https://github.com/apache/arrow-datafusion/issues/2605) -- filter\_push\_down tests should not rely on TableProvider and ExecutionPlan [\#2600](https://github.com/apache/arrow-datafusion/issues/2600) -- Move logical optimizer rules out of the core datafusion crate [\#2599](https://github.com/apache/arrow-datafusion/issues/2599) -- Push Limit through outer Join [\#2579](https://github.com/apache/arrow-datafusion/issues/2579) -- `datafusion_proto` crate should have exhaustive match statements for handling `Expr` [\#2565](https://github.com/apache/arrow-datafusion/issues/2565) -- String representation of Expr variant [\#2563](https://github.com/apache/arrow-datafusion/issues/2563) -- File URI Scheme Interpretation [\#2562](https://github.com/apache/arrow-datafusion/issues/2562) -- Implement physical plan for OFFSET [\#2551](https://github.com/apache/arrow-datafusion/issues/2551) -- Update limit pushdown rule to support offsets [\#2550](https://github.com/apache/arrow-datafusion/issues/2550) -- Move `LogicalPlanBuilder` to `datafusion-expr` crate [\#2536](https://github.com/apache/arrow-datafusion/issues/2536) -- Logical optimizer rule "simplify expressions" should not depend on the core datafusion crate [\#2535](https://github.com/apache/arrow-datafusion/issues/2535) -- Support optional filter in Join [\#2509](https://github.com/apache/arrow-datafusion/issues/2509) -- Improve SQL planner & logical plan support for JOIN conditions [\#2496](https://github.com/apache/arrow-datafusion/issues/2496) -- Numeric, String, Boolean comparisons with literal `NULL` [\#2482](https://github.com/apache/arrow-datafusion/issues/2482) -- Redundant ScalarValue Boxed Collection [\#2449](https://github.com/apache/arrow-datafusion/issues/2449) -- ObjectStore Directory Semantics [\#2445](https://github.com/apache/arrow-datafusion/issues/2445) -- Add support for `OFFSET` in SQL query planner + logical plan [\#2377](https://github.com/apache/arrow-datafusion/issues/2377) -- SQL planner should use `TableSource` not `TableProvider` [\#2346](https://github.com/apache/arrow-datafusion/issues/2346) -- Move SQL query planning to new crate [\#2345](https://github.com/apache/arrow-datafusion/issues/2345) -- Update LogicalPlan rustdoc code to not use LogicalPlanBuilder [\#2308](https://github.com/apache/arrow-datafusion/issues/2308) -- \[Optimizer\] Refactor `convert join` [\#2256](https://github.com/apache/arrow-datafusion/issues/2256) -- \[Optimizer\] Infer is not null predicate from `where clause` [\#2254](https://github.com/apache/arrow-datafusion/issues/2254) -- Support ArrayIndex for ScalarValue\(List\) [\#2207](https://github.com/apache/arrow-datafusion/issues/2207) -- \[Ballista\] Fill functional gaps between datafusion and ballista [\#2062](https://github.com/apache/arrow-datafusion/issues/2062) -- \[Ballista\] support datafusion built\_in UDAF work in ballista cluster [\#1985](https://github.com/apache/arrow-datafusion/issues/1985) -- Export C API [\#1113](https://github.com/apache/arrow-datafusion/issues/1113) - -**Fixed bugs:** - -- Fix Typos in Docs [\#2695](https://github.com/apache/arrow-datafusion/issues/2695) -- Unable to build a docker image [\#2691](https://github.com/apache/arrow-datafusion/issues/2691) -- Optimization pass `AggregateStatistics` changes type of output from `Int64` to `UInt64` [\#2673](https://github.com/apache/arrow-datafusion/issues/2673) -- ViewTable Circular Reference [\#2657](https://github.com/apache/arrow-datafusion/issues/2657) -- `ScalarValue::to_array_of_size` panics computing statistics for nested parquet file [\#2653](https://github.com/apache/arrow-datafusion/issues/2653) -- The result type of count/count\_distinct [\#2635](https://github.com/apache/arrow-datafusion/issues/2635) -- limit\_push\_down is not working properly with OFFSET [\#2624](https://github.com/apache/arrow-datafusion/issues/2624) -- Avro Tests Fail To Compile [\#2570](https://github.com/apache/arrow-datafusion/issues/2570) -- Unused Window functions experssion is wrongly removed from LogicalPlan during optimalization [\#2542](https://github.com/apache/arrow-datafusion/issues/2542) -- Bug: ObjectStoreRegistry get\_by\_uri does not return correct path when "scheme" is provided [\#2525](https://github.com/apache/arrow-datafusion/issues/2525) -- There are duplicate and inconsistent copies of `datafusion.proto` [\#2514](https://github.com/apache/arrow-datafusion/issues/2514) -- Projection pushdown produces incorrect results when column names are reused [\#2462](https://github.com/apache/arrow-datafusion/issues/2462) -- Incorrect Parquet Projection For Nested Types [\#2453](https://github.com/apache/arrow-datafusion/issues/2453) -- LogicalPlanBuilder::scan\_csv creates scans with invalid table names [\#2278](https://github.com/apache/arrow-datafusion/issues/2278) -- Inner join incorrectly pushdown predicate with OR operation [\#2271](https://github.com/apache/arrow-datafusion/issues/2271) -- Ignored alias for columns with aggregate function and incorrect results when collecting statistics is enabled [\#2176](https://github.com/apache/arrow-datafusion/issues/2176) -- Join on path partitioned columns fails with error [\#2145](https://github.com/apache/arrow-datafusion/issues/2145) - -**Documentation updates:** - -- Fix Ballista link [\#2654](https://github.com/apache/arrow-datafusion/pull/2654) ([dsaxton](https://github.com/dsaxton)) -- MINOR: Add Blaze as a project using DataFusion [\#2618](https://github.com/apache/arrow-datafusion/pull/2618) ([yjshen](https://github.com/yjshen)) -- \[MINOR\] remove datafusion-cli's ballista feature from docs [\#2612](https://github.com/apache/arrow-datafusion/pull/2612) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- chore\(doc\) remove ballista from datafusion-cli readme [\#2604](https://github.com/apache/arrow-datafusion/pull/2604) ([ming535](https://github.com/ming535)) - -**Closed issues:** - -- \[Question\] Converting TableSource to custom TableProvider [\#2644](https://github.com/apache/arrow-datafusion/issues/2644) -- \[Question\] Why DataFusion is shipped with arrow version 9.1.0 on crates.io ? [\#2474](https://github.com/apache/arrow-datafusion/issues/2474) - -**Merged pull requests:** - -- Test optional features in CI [\#2708](https://github.com/apache/arrow-datafusion/pull/2708) ([tustvold](https://github.com/tustvold)) -- support indexed fields proto [\#2707](https://github.com/apache/arrow-datafusion/pull/2707) ([nl5887](https://github.com/nl5887)) -- Update sqlparser-rs to 0.18.0 [\#2705](https://github.com/apache/arrow-datafusion/pull/2705) ([alamb](https://github.com/alamb)) -- \[MINOR\]: Add documentation to `datafusion-row` modules [\#2704](https://github.com/apache/arrow-datafusion/pull/2704) ([alamb](https://github.com/alamb)) -- Make sure that the data types are supported in hashjoin before genera… [\#2702](https://github.com/apache/arrow-datafusion/pull/2702) ([AssHero](https://github.com/AssHero)) -- Move remaining code out of legacy `core/logical_plan` module [\#2701](https://github.com/apache/arrow-datafusion/pull/2701) ([andygrove](https://github.com/andygrove)) -- Move some tests from core to expr [\#2700](https://github.com/apache/arrow-datafusion/pull/2700) ([andygrove](https://github.com/andygrove)) -- MINOR: Improve Docs Readability [\#2696](https://github.com/apache/arrow-datafusion/pull/2696) ([ryanrussell](https://github.com/ryanrussell)) -- Combine limit and offset to `fetch` and `skip` and implement physical plan support [\#2694](https://github.com/apache/arrow-datafusion/pull/2694) ([ming535](https://github.com/ming535)) -- MINOR: Add datafusion-sql example [\#2693](https://github.com/apache/arrow-datafusion/pull/2693) ([andygrove](https://github.com/andygrove)) -- Remove Ballista related lines from Dockerfile [\#2692](https://github.com/apache/arrow-datafusion/pull/2692) ([mocknen](https://github.com/mocknen)) -- Show column names instead of indices in query plans [\#2690](https://github.com/apache/arrow-datafusion/pull/2690) ([andygrove](https://github.com/andygrove)) -- MINOR: Remove uses of TryClone for Parquet [\#2681](https://github.com/apache/arrow-datafusion/pull/2681) ([tustvold](https://github.com/tustvold)) -- Fix `AggregateStatistics` optimization so it doesn't change output type [\#2674](https://github.com/apache/arrow-datafusion/pull/2674) ([alamb](https://github.com/alamb)) -- If statistics of column Max/Min value does not exists in parquet file, sent Min/Max to None [\#2671](https://github.com/apache/arrow-datafusion/pull/2671) ([AssHero](https://github.com/AssHero)) -- MINOR: Move more expression code to `datafusion-expr` crate [\#2669](https://github.com/apache/arrow-datafusion/pull/2669) ([andygrove](https://github.com/andygrove)) -- MINOR: Rewrite imports in optimizer moduler [\#2667](https://github.com/apache/arrow-datafusion/pull/2667) ([andygrove](https://github.com/andygrove)) -- Update snmalloc-rs requirement from 0.2 to 0.3 [\#2663](https://github.com/apache/arrow-datafusion/pull/2663) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Add module doc for RuntimeEnv, SessionContext, TaskContext, etc... [\#2655](https://github.com/apache/arrow-datafusion/pull/2655) ([tustvold](https://github.com/tustvold)) -- Prune unused dependencies from datafusion-proto [\#2651](https://github.com/apache/arrow-datafusion/pull/2651) ([tustvold](https://github.com/tustvold)) -- MINOR: Implement serde for join filter [\#2649](https://github.com/apache/arrow-datafusion/pull/2649) ([andygrove](https://github.com/andygrove)) -- pushdown support for predicates in `ON` clause of joins [\#2647](https://github.com/apache/arrow-datafusion/pull/2647) ([korowa](https://github.com/korowa)) -- Move `SortKeyCursor` and `RowIndex` into modules, add `sort_key_cursor` test [\#2645](https://github.com/apache/arrow-datafusion/pull/2645) ([alamb](https://github.com/alamb)) -- Implement DESCRIBE \ [\#2642](https://github.com/apache/arrow-datafusion/pull/2642) ([LiuYuHui](https://github.com/LiuYuHui)) -- Implement `LogicalPlan` serde in `datafusion-proto` [\#2639](https://github.com/apache/arrow-datafusion/pull/2639) ([andygrove](https://github.com/andygrove)) -- Fix limit + offset pushdown [\#2638](https://github.com/apache/arrow-datafusion/pull/2638) ([ming535](https://github.com/ming535)) -- change result type of count/count\_distinct from uint64 to int64 [\#2636](https://github.com/apache/arrow-datafusion/pull/2636) ([liukun4515](https://github.com/liukun4515)) -- if none columns in window expr are needed, remove the window exprs [\#2634](https://github.com/apache/arrow-datafusion/pull/2634) ([AssHero](https://github.com/AssHero)) -- Like, NotLike expressions work with literal `NULL` [\#2627](https://github.com/apache/arrow-datafusion/pull/2627) ([WinkerDu](https://github.com/WinkerDu)) -- MINOR: Refactor `datafusion-proto` dependencies and imports [\#2623](https://github.com/apache/arrow-datafusion/pull/2623) ([andygrove](https://github.com/andygrove)) -- MINOR: add optimizer struct [\#2616](https://github.com/apache/arrow-datafusion/pull/2616) ([jackwener](https://github.com/jackwener)) -- Remove FilterPushDown dependency on physical plan [\#2615](https://github.com/apache/arrow-datafusion/pull/2615) ([andygrove](https://github.com/andygrove)) -- Support CREATE OR REPLACE TABLE [\#2613](https://github.com/apache/arrow-datafusion/pull/2613) ([AssHero](https://github.com/AssHero)) -- Support binary mathematical operators work with `NULL` literals [\#2610](https://github.com/apache/arrow-datafusion/pull/2610) ([WinkerDu](https://github.com/WinkerDu)) -- chore: try fix CI coverage [\#2608](https://github.com/apache/arrow-datafusion/pull/2608) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- MINOR: Rename benchmark crate [\#2607](https://github.com/apache/arrow-datafusion/pull/2607) ([andygrove](https://github.com/andygrove)) -- chore\(dep\): bump cranelift to 0.84.0 [\#2598](https://github.com/apache/arrow-datafusion/pull/2598) ([waynexia](https://github.com/waynexia)) -- fix some typos [\#2597](https://github.com/apache/arrow-datafusion/pull/2597) ([ming535](https://github.com/ming535)) -- Support limit pushdown through left right outer join [\#2596](https://github.com/apache/arrow-datafusion/pull/2596) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Unignore rustdoc code examples in `datafusion-expr` crate [\#2590](https://github.com/apache/arrow-datafusion/pull/2590) ([andygrove](https://github.com/andygrove)) -- Evaluate JIT'd expression over arrays [\#2587](https://github.com/apache/arrow-datafusion/pull/2587) ([waynexia](https://github.com/waynexia)) -- \[minor\]Fix ci clippy for unused import [\#2586](https://github.com/apache/arrow-datafusion/pull/2586) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- \[Doc\]add doc for enable SIMD need `cargo nightly` [\#2577](https://github.com/apache/arrow-datafusion/pull/2577) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add DataFrame `union_distinct` and fix documentation for `distinct` [\#2574](https://github.com/apache/arrow-datafusion/pull/2574) ([andygrove](https://github.com/andygrove)) -- Fix avro tests \(\#2570\) [\#2571](https://github.com/apache/arrow-datafusion/pull/2571) ([tustvold](https://github.com/tustvold)) -- Make datafusion-proto match exhaustive [\#2567](https://github.com/apache/arrow-datafusion/pull/2567) ([andygrove](https://github.com/andygrove)) -- Support limit push down for offset\_plan [\#2566](https://github.com/apache/arrow-datafusion/pull/2566) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Introduce Expr.variant\_name\(\) function [\#2564](https://github.com/apache/arrow-datafusion/pull/2564) ([jdye64](https://github.com/jdye64)) -- Fix some 404 links in the contribution guide [\#2561](https://github.com/apache/arrow-datafusion/pull/2561) ([hi-rustin](https://github.com/hi-rustin)) -- Update datafusion-cli readme cli version [\#2559](https://github.com/apache/arrow-datafusion/pull/2559) ([hi-rustin](https://github.com/hi-rustin)) -- MINOR: Move `expr_rewriter.rs` to `datafusion-expr` crate [\#2552](https://github.com/apache/arrow-datafusion/pull/2552) ([andygrove](https://github.com/andygrove)) -- Fix `JOIN`s with complex predicates in ON \(split ON expressions only by AND operator\) [\#2534](https://github.com/apache/arrow-datafusion/pull/2534) ([korowa](https://github.com/korowa)) -- Reduce duplication in file scan tests [\#2533](https://github.com/apache/arrow-datafusion/pull/2533) ([tustvold](https://github.com/tustvold)) -- Fix size\_of\_scalar test [\#2531](https://github.com/apache/arrow-datafusion/pull/2531) ([alamb](https://github.com/alamb)) -- Update to arrow-rs 14.0.0 [\#2528](https://github.com/apache/arrow-datafusion/pull/2528) ([alamb](https://github.com/alamb)) -- ObjectStoreRegistry get\_by\_uri now returns correct path when "scheme" is provided [\#2526](https://github.com/apache/arrow-datafusion/pull/2526) ([timvw](https://github.com/timvw)) -- MINOR: Add ORDER BY clause to test [\#2524](https://github.com/apache/arrow-datafusion/pull/2524) ([andygrove](https://github.com/andygrove)) -- Remove unused `binary_array_op_scalar!` in binary.rs [\#2512](https://github.com/apache/arrow-datafusion/pull/2512) ([alamb](https://github.com/alamb)) -- fix `NULL column` evaluation, tests for same [\#2510](https://github.com/apache/arrow-datafusion/pull/2510) ([alamb](https://github.com/alamb)) -- Fix projection pushdown produces incorrect results when column names are reused [\#2463](https://github.com/apache/arrow-datafusion/pull/2463) ([jonmmease](https://github.com/jonmmease)) -- Benchmark for sort preserving merge [\#2431](https://github.com/apache/arrow-datafusion/pull/2431) ([alamb](https://github.com/alamb)) -- Support GetIndexedFieldExpr for ScalarValue [\#2196](https://github.com/apache/arrow-datafusion/pull/2196) ([ovr](https://github.com/ovr)) - -## [8.0.0](https://github.com/apache/arrow-datafusion/tree/8.0.0) (2022-05-12) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/7.1.0-rc1...8.0.0) - -**Breaking changes:** - -- Add SQL planner support for `ROLLUP` and `CUBE` grouping set expressions [\#2446](https://github.com/apache/arrow-datafusion/pull/2446) ([andygrove](https://github.com/andygrove)) -- Make `ExecutionPlan::execute` Sync [\#2434](https://github.com/apache/arrow-datafusion/pull/2434) ([tustvold](https://github.com/tustvold)) -- Introduce new `DataFusionError::SchemaError` type [\#2371](https://github.com/apache/arrow-datafusion/pull/2371) ([andygrove](https://github.com/andygrove)) -- Add `Expr::InSubquery` and `Expr::ScalarSubquery` [\#2342](https://github.com/apache/arrow-datafusion/pull/2342) ([andygrove](https://github.com/andygrove)) -- Add `Expr::Exists` to represent EXISTS subquery expression [\#2339](https://github.com/apache/arrow-datafusion/pull/2339) ([andygrove](https://github.com/andygrove)) -- Move `LogicalPlan` enum to `datafusion-expr` crate [\#2294](https://github.com/apache/arrow-datafusion/pull/2294) ([andygrove](https://github.com/andygrove)) -- Remove dependency from `LogicalPlan::TableScan` to `ExecutionPlan` [\#2284](https://github.com/apache/arrow-datafusion/pull/2284) ([andygrove](https://github.com/andygrove)) -- Move logical expression type-coercion code from `physical-expr` crate to `expr` crate [\#2257](https://github.com/apache/arrow-datafusion/pull/2257) ([andygrove](https://github.com/andygrove)) -- feat: 2061 create external table ddl table partition cols [\#2099](https://github.com/apache/arrow-datafusion/pull/2099) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jychen7](https://github.com/jychen7)) -- Reorganize the project folders [\#2081](https://github.com/apache/arrow-datafusion/pull/2081) ([yahoNanJing](https://github.com/yahoNanJing)) -- Support more ScalarFunction in Ballista [\#2008](https://github.com/apache/arrow-datafusion/pull/2008) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Merge dataframe and dataframe imp [\#1998](https://github.com/apache/arrow-datafusion/pull/1998) ([vchag](https://github.com/vchag)) -- Rename `ExecutionContext` to `SessionContext`, `ExecutionContextState` to `SessionState`, add `TaskContext` to support multi-tenancy configurations - Part 1 [\#1987](https://github.com/apache/arrow-datafusion/pull/1987) ([mingmwang](https://github.com/mingmwang)) -- Add Coalesce function [\#1969](https://github.com/apache/arrow-datafusion/pull/1969) ([msathis](https://github.com/msathis)) -- Add Create Schema functionality in SQL [\#1959](https://github.com/apache/arrow-datafusion/pull/1959) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) -- omit some clone when converting sql to logical plan [\#1945](https://github.com/apache/arrow-datafusion/pull/1945) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) -- \[split/16\] move physical plan expressions folder to datafusion-physical-expr crate [\#1889](https://github.com/apache/arrow-datafusion/pull/1889) ([Jimexist](https://github.com/Jimexist)) -- remove sync constraint of SendableRecordBatchStream [\#1884](https://github.com/apache/arrow-datafusion/pull/1884) ([doki23](https://github.com/doki23)) -- \[split/15\] move built in window expr and partition evaluator [\#1865](https://github.com/apache/arrow-datafusion/pull/1865) ([Jimexist](https://github.com/Jimexist)) - -**Implemented enhancements:** - -- Include `Expr` to `datafusion::prelude` [\#2347](https://github.com/apache/arrow-datafusion/issues/2347) -- Implement `Serialization` API for DataFusion [\#2340](https://github.com/apache/arrow-datafusion/issues/2340) -- Implement `power` function [\#1493](https://github.com/apache/arrow-datafusion/issues/1493) -- allow `lit` python function to support `boolean` and other types [\#1136](https://github.com/apache/arrow-datafusion/issues/1136) -- Automate dependency updates [\#37](https://github.com/apache/arrow-datafusion/issues/37) -- Add `CREATE VIEW` [\#2279](https://github.com/apache/arrow-datafusion/pull/2279) ([matthewmturner](https://github.com/matthewmturner)) -- \[Ballista\] Support Union in ballista. [\#2098](https://github.com/apache/arrow-datafusion/pull/2098) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Change the DataFusion explain plans to make it clearer in the predicate/filter [\#2063](https://github.com/apache/arrow-datafusion/pull/2063) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add `write_json`, `read_json`, `register_json`, and `JsonFormat` to `CREATE EXTERNAL TABLE` functionality [\#2023](https://github.com/apache/arrow-datafusion/pull/2023) ([matthewmturner](https://github.com/matthewmturner)) -- Qualified wildcard [\#2012](https://github.com/apache/arrow-datafusion/pull/2012) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) -- support bitwise or/'|' operation [\#1876](https://github.com/apache/arrow-datafusion/pull/1876) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- Introduce JIT code generation [\#1849](https://github.com/apache/arrow-datafusion/pull/1849) ([yjshen](https://github.com/yjshen)) - -**Fixed bugs:** - -- CASE expr with NULL literals panics `'WHEN expression did not return a BooleanArray'` [\#1189](https://github.com/apache/arrow-datafusion/issues/1189) -- Function calls with NULL literals do not work [\#1188](https://github.com/apache/arrow-datafusion/issues/1188) -- Add SQL planner support for calling `round` function with two arguments [\#2503](https://github.com/apache/arrow-datafusion/pull/2503) ([andygrove](https://github.com/andygrove)) -- nested query fix [\#2402](https://github.com/apache/arrow-datafusion/pull/2402) ([comphead](https://github.com/comphead)) -- fix issue\#2058 file\_format/json.rs attempt to subtract with overflow [\#2066](https://github.com/apache/arrow-datafusion/pull/2066) ([silence-coding](https://github.com/silence-coding)) -- fix bug the optimizer rule filter push down [\#2039](https://github.com/apache/arrow-datafusion/pull/2039) ([jackwener](https://github.com/jackwener)) -- fix: replace `ExecutionContex` and `ExecutionConfig` with `SessionContext` and `SessionConfig` [\#2030](https://github.com/apache/arrow-datafusion/pull/2030) ([xudong963](https://github.com/xudong963)) -- Fixed parquet path partitioning when only selecting partitioned columns [\#2000](https://github.com/apache/arrow-datafusion/pull/2000) ([pjmore](https://github.com/pjmore)) -- Fix ambiguous reference error in filter plan [\#1925](https://github.com/apache/arrow-datafusion/pull/1925) ([jonmmease](https://github.com/jonmmease)) -- platform aware partition parsing [\#1867](https://github.com/apache/arrow-datafusion/pull/1867) ([korowa](https://github.com/korowa)) -- Fix incorrect aggregation in case that GROUP BY contains duplicate column names [\#1855](https://github.com/apache/arrow-datafusion/pull/1855) ([alex-natzka](https://github.com/alex-natzka)) - -**Documentation updates:** - -- MINOR: Make crate READMEs consistent [\#2437](https://github.com/apache/arrow-datafusion/pull/2437) ([andygrove](https://github.com/andygrove)) -- minor: Improve documentation for DFSchema join and merge functions [\#2367](https://github.com/apache/arrow-datafusion/pull/2367) ([andygrove](https://github.com/andygrove)) -- Change the code location and add annotation [\#2037](https://github.com/apache/arrow-datafusion/pull/2037) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) -- Fix typos \(Datafusion -\> DataFusion\) [\#1993](https://github.com/apache/arrow-datafusion/pull/1993) ([andygrove](https://github.com/andygrove)) -- Add examples to use MemTable and TableProvider \(\#1864\) [\#1946](https://github.com/apache/arrow-datafusion/pull/1946) ([PierreZ](https://github.com/PierreZ)) -- Add doc for building `datafusion-cli` when connect the ballista [\#1866](https://github.com/apache/arrow-datafusion/pull/1866) ([liukun4515](https://github.com/liukun4515)) -- Add benchmarks section to DEVELOPERS.md [\#1838](https://github.com/apache/arrow-datafusion/pull/1838) ([tustvold](https://github.com/tustvold)) - -**Performance improvements:** - -- Avoid an Arc::clone per row in benchmark [\#1975](https://github.com/apache/arrow-datafusion/pull/1975) ([jhorstmann](https://github.com/jhorstmann)) -- Update datafusion-cli allocator [\#1878](https://github.com/apache/arrow-datafusion/pull/1878) ([matthewmturner](https://github.com/matthewmturner)) - -**Closed issues:** - -- Make expected result string in unit tests more readable [\#2412](https://github.com/apache/arrow-datafusion/issues/2412) -- remove duplicated `fn aggregate()` in aggregate expression tests [\#2399](https://github.com/apache/arrow-datafusion/issues/2399) -- split `distinct_expression.rs` into `count_distinct.rs` and `array_agg_distinct.rs` [\#2385](https://github.com/apache/arrow-datafusion/issues/2385) -- move sql tests in `context.rs` to corresponding test files in `datafustion/core/tests/sql` [\#2328](https://github.com/apache/arrow-datafusion/issues/2328) -- Date32/Date64 as join keys for merge join [\#2314](https://github.com/apache/arrow-datafusion/issues/2314) -- Error precision and scale for decimal coercion in logic comparison [\#2232](https://github.com/apache/arrow-datafusion/issues/2232) -- Support Multiple row layout [\#2188](https://github.com/apache/arrow-datafusion/issues/2188) -- TPC-H Query 18 [\#169](https://github.com/apache/arrow-datafusion/issues/169) -- TPC-H Query 16 [\#167](https://github.com/apache/arrow-datafusion/issues/167) -- Implement Sort-Merge Join [\#141](https://github.com/apache/arrow-datafusion/issues/141) -- Split logical expressions out into separate source files [\#114](https://github.com/apache/arrow-datafusion/issues/114) - -**Merged pull requests:** - -- Minor: remove code that is now included in arrow-rs [\#2511](https://github.com/apache/arrow-datafusion/pull/2511) ([alamb](https://github.com/alamb)) -- MINOR: Enable multi-statement benchmark queries [\#2507](https://github.com/apache/arrow-datafusion/pull/2507) ([andygrove](https://github.com/andygrove)) -- MINOR: Add ignored tests for all remaining benchmark queries [\#2506](https://github.com/apache/arrow-datafusion/pull/2506) ([andygrove](https://github.com/andygrove)) -- Update to `sqlparser` `0.17.0` [\#2500](https://github.com/apache/arrow-datafusion/pull/2500) ([alamb](https://github.com/alamb)) -- Add metrics for ParquetExec [\#2499](https://github.com/apache/arrow-datafusion/pull/2499) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Limit cpu cores used when generating changelog [\#2494](https://github.com/apache/arrow-datafusion/pull/2494) ([andygrove](https://github.com/andygrove)) -- Optimize MergeJoin by storing joined indices instead of creating small record batches for each match [\#2492](https://github.com/apache/arrow-datafusion/pull/2492) ([richox](https://github.com/richox)) -- Add SQL planner support for `grouping()` aggregate expressions [\#2486](https://github.com/apache/arrow-datafusion/pull/2486) ([andygrove](https://github.com/andygrove)) -- MINOR: Parameterize changelog script [\#2484](https://github.com/apache/arrow-datafusion/pull/2484) ([jychen7](https://github.com/jychen7)) -- Numeric, String, Boolean comparisons with literal `NULL` [\#2481](https://github.com/apache/arrow-datafusion/pull/2481) ([WinkerDu](https://github.com/WinkerDu)) -- Adds unit test cases of mathematical expressions working with `null` literal [\#2478](https://github.com/apache/arrow-datafusion/pull/2478) ([WinkerDu](https://github.com/WinkerDu)) -- Minor: Move test code from `context.rs` into `sql_integration` [\#2473](https://github.com/apache/arrow-datafusion/pull/2473) ([alamb](https://github.com/alamb)) -- Minor: Use ExprVisitor to find columns referenced by expr [\#2471](https://github.com/apache/arrow-datafusion/pull/2471) ([alamb](https://github.com/alamb)) -- minor: remove expr dependency from the row crate, update crate-deps.dot/svg [\#2470](https://github.com/apache/arrow-datafusion/pull/2470) ([yjshen](https://github.com/yjshen)) -- Fix `read_from_registered_table_with_glob_path` fails if path contains // \#2465 [\#2468](https://github.com/apache/arrow-datafusion/pull/2468) ([timvw](https://github.com/timvw)) -- Add support for list\_dir\(\) on local fs [\#2467](https://github.com/apache/arrow-datafusion/pull/2467) ([wjones127](https://github.com/wjones127)) -- MINOR: Partial fix for SQL aggregate queries with aliases [\#2464](https://github.com/apache/arrow-datafusion/pull/2464) ([andygrove](https://github.com/andygrove)) -- minor: move struct definition out of `aggregate/mod.rs`, etc [\#2458](https://github.com/apache/arrow-datafusion/pull/2458) ([WinkerDu](https://github.com/WinkerDu)) -- Fix bugs in SQL planner with GROUP BY scalar function and alias [\#2457](https://github.com/apache/arrow-datafusion/pull/2457) ([andygrove](https://github.com/andygrove)) -- feat: Support CompoundIdentifier as GetIndexedField access [\#2454](https://github.com/apache/arrow-datafusion/pull/2454) ([ovr](https://github.com/ovr)) -- Table provider error propagation [\#2438](https://github.com/apache/arrow-datafusion/pull/2438) ([jdye64](https://github.com/jdye64)) -- MINOR: Improve error messages for GROUP BY / HAVING queries [\#2435](https://github.com/apache/arrow-datafusion/pull/2435) ([andygrove](https://github.com/andygrove)) -- minor: remove redundant code [\#2432](https://github.com/apache/arrow-datafusion/pull/2432) ([jackwener](https://github.com/jackwener)) -- minor: update versions and paths in changelog scripts [\#2429](https://github.com/apache/arrow-datafusion/pull/2429) ([andygrove](https://github.com/andygrove)) -- Fix Ballista executing during plan [\#2428](https://github.com/apache/arrow-datafusion/pull/2428) ([tustvold](https://github.com/tustvold)) -- minor: format table result vec & remove some unnecessary semicolons [\#2425](https://github.com/apache/arrow-datafusion/pull/2425) ([WinkerDu](https://github.com/WinkerDu)) -- Basic support for `IN` and `NOT IN` Subqueries by rewriting them to `SEMI` / `ANTI` Join [\#2421](https://github.com/apache/arrow-datafusion/pull/2421) ([korowa](https://github.com/korowa)) -- Allow subqueries without aliases [\#2418](https://github.com/apache/arrow-datafusion/pull/2418) ([andygrove](https://github.com/andygrove)) -- Fix bug in subquery join filters referencing outer query [\#2416](https://github.com/apache/arrow-datafusion/pull/2416) ([andygrove](https://github.com/andygrove)) -- MINOR: remove duplicated function `format_state_name()` [\#2414](https://github.com/apache/arrow-datafusion/pull/2414) ([WinkerDu](https://github.com/WinkerDu)) -- Make expected result string in unit tests more readable [\#2413](https://github.com/apache/arrow-datafusion/pull/2413) ([WinkerDu](https://github.com/WinkerDu)) -- `sum(distinct)` support [\#2405](https://github.com/apache/arrow-datafusion/pull/2405) ([WinkerDu](https://github.com/WinkerDu)) -- Update ordered-float requirement from 2.10 to 3.0 [\#2403](https://github.com/apache/arrow-datafusion/pull/2403) ([dependabot[bot]](https://github.com/apps/dependabot)) -- remove duplicated `fn aggregate()` in aggregate expression tests [\#2400](https://github.com/apache/arrow-datafusion/pull/2400) ([WinkerDu](https://github.com/WinkerDu)) -- Support type-coercion from Decimal to Float64 [\#2396](https://github.com/apache/arrow-datafusion/pull/2396) ([comphead](https://github.com/comphead)) -- minor: SchemaError code cleanup and improvements [\#2391](https://github.com/apache/arrow-datafusion/pull/2391) ([andygrove](https://github.com/andygrove)) -- Support struct\_expr generate struct in sql [\#2389](https://github.com/apache/arrow-datafusion/pull/2389) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Re-organize and rename aggregates physical plan [\#2388](https://github.com/apache/arrow-datafusion/pull/2388) ([yjshen](https://github.com/yjshen)) -- refactor `distinct_expressions.rs` and split into `count_distinct.rs` and `array_agg_distinct.rs` [\#2386](https://github.com/apache/arrow-datafusion/pull/2386) ([WinkerDu](https://github.com/WinkerDu)) -- Allow CTEs to be referenced from subquery expressions [\#2384](https://github.com/apache/arrow-datafusion/pull/2384) ([andygrove](https://github.com/andygrove)) -- Upgrade to arrow 13 [\#2382](https://github.com/apache/arrow-datafusion/pull/2382) ([alamb](https://github.com/alamb)) -- Grouped Aggregate in row format [\#2375](https://github.com/apache/arrow-datafusion/pull/2375) ([yjshen](https://github.com/yjshen)) -- Fix bugs with CTE aliasing and normalize all identifiers in the SQL planner [\#2373](https://github.com/apache/arrow-datafusion/pull/2373) ([andygrove](https://github.com/andygrove)) -- Stop optimizing queries twice [\#2369](https://github.com/apache/arrow-datafusion/pull/2369) ([andygrove](https://github.com/andygrove)) -- feat: Support casting to arrays to primitive type [\#2366](https://github.com/apache/arrow-datafusion/pull/2366) ([ovr](https://github.com/ovr)) -- Add proper support for `null` literal by introducing `ScalarValue::Null` [\#2364](https://github.com/apache/arrow-datafusion/pull/2364) ([WinkerDu](https://github.com/WinkerDu)) -- minor: fix duplicate column bug in subquery support [\#2362](https://github.com/apache/arrow-datafusion/pull/2362) ([andygrove](https://github.com/andygrove)) -- Normalize subquery aliases [\#2359](https://github.com/apache/arrow-datafusion/pull/2359) ([andygrove](https://github.com/andygrove)) -- Implement physical planner support for DATE +/- INTERVAL [\#2357](https://github.com/apache/arrow-datafusion/pull/2357) ([andygrove](https://github.com/andygrove)) -- Add SQL query planner support for Scalar Subqueries [\#2354](https://github.com/apache/arrow-datafusion/pull/2354) ([andygrove](https://github.com/andygrove)) -- Add SQL query planner support for IN subqueries [\#2352](https://github.com/apache/arrow-datafusion/pull/2352) ([andygrove](https://github.com/andygrove)) -- Add `Expr` to prelude [\#2348](https://github.com/apache/arrow-datafusion/pull/2348) ([alamb](https://github.com/alamb)) -- Add SQL planner support for EXISTS subqueries [\#2344](https://github.com/apache/arrow-datafusion/pull/2344) ([andygrove](https://github.com/andygrove)) -- Add public Serialization/Deserialization API for `Expr` to/from bytes [\#2341](https://github.com/apache/arrow-datafusion/pull/2341) ([alamb](https://github.com/alamb)) -- Support for date32 and date64 in sort merge join [\#2336](https://github.com/apache/arrow-datafusion/pull/2336) ([hntd187](https://github.com/hntd187)) -- \[physical-expr\] move aggregate exprs and window exprs to their own modules [\#2335](https://github.com/apache/arrow-datafusion/pull/2335) ([yjshen](https://github.com/yjshen)) -- fix: union schema [\#2334](https://github.com/apache/arrow-datafusion/pull/2334) ([gandronchik](https://github.com/gandronchik)) -- Improve sql integration test organization [\#2333](https://github.com/apache/arrow-datafusion/pull/2333) ([alamb](https://github.com/alamb)) -- Support scalar values for func Array [\#2332](https://github.com/apache/arrow-datafusion/pull/2332) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- move sql tests from `context.rs` to corresponding test files in `tests/sql` [\#2329](https://github.com/apache/arrow-datafusion/pull/2329) ([WinkerDu](https://github.com/WinkerDu)) -- deprecate `index_of` and make `index_of_column_by_name` public [\#2320](https://github.com/apache/arrow-datafusion/pull/2320) ([jdye64](https://github.com/jdye64)) -- Fix HashJoin evaluating during plan [\#2317](https://github.com/apache/arrow-datafusion/pull/2317) ([tustvold](https://github.com/tustvold)) -- minor: remove two source files that only had re-exports [\#2313](https://github.com/apache/arrow-datafusion/pull/2313) ([andygrove](https://github.com/andygrove)) -- Don't sort batches during plan [\#2312](https://github.com/apache/arrow-datafusion/pull/2312) ([tustvold](https://github.com/tustvold)) -- Move case/when expressions to datafusion-expr crate [\#2311](https://github.com/apache/arrow-datafusion/pull/2311) ([andygrove](https://github.com/andygrove)) -- Fix CrossJoinExec evaluating during plan [\#2310](https://github.com/apache/arrow-datafusion/pull/2310) ([tustvold](https://github.com/tustvold)) -- Make SortPreservingMerge Usable Outside Tokio \(\#2201\) [\#2305](https://github.com/apache/arrow-datafusion/pull/2305) ([tustvold](https://github.com/tustvold)) -- chore: update cranelift to 0.83.0 [\#2304](https://github.com/apache/arrow-datafusion/pull/2304) ([yjshen](https://github.com/yjshen)) -- Always increment timer on record [\#2298](https://github.com/apache/arrow-datafusion/pull/2298) ([tustvold](https://github.com/tustvold)) -- Remove unnecessary env var for parquet\_sql example [\#2297](https://github.com/apache/arrow-datafusion/pull/2297) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) -- Simplify sort streams [\#2296](https://github.com/apache/arrow-datafusion/pull/2296) ([tustvold](https://github.com/tustvold)) -- MINOR: beautify code with neat idents [\#2295](https://github.com/apache/arrow-datafusion/pull/2295) ([WinkerDu](https://github.com/WinkerDu)) -- Move FileType enum from sql module to logical\_plan module [\#2290](https://github.com/apache/arrow-datafusion/pull/2290) ([andygrove](https://github.com/andygrove)) -- Remove Parquet Empty Projection Workaround [\#2289](https://github.com/apache/arrow-datafusion/pull/2289) ([tustvold](https://github.com/tustvold)) -- Add BatchPartitioner \(\#2285\) [\#2287](https://github.com/apache/arrow-datafusion/pull/2287) ([tustvold](https://github.com/tustvold)) -- Make row its crate to make it accessible from physical-expr [\#2283](https://github.com/apache/arrow-datafusion/pull/2283) ([yjshen](https://github.com/yjshen)) -- Enable filter pushdown when using In\_list on parquet [\#2282](https://github.com/apache/arrow-datafusion/pull/2282) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Update uuid requirement from 0.8 to 1.0 [\#2280](https://github.com/apache/arrow-datafusion/pull/2280) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Add bytes scanned metric to ParquetExec [\#2273](https://github.com/apache/arrow-datafusion/pull/2273) ([thinkharderdev](https://github.com/thinkharderdev)) -- Fix outer join output with all-null indices on empty batch [\#2272](https://github.com/apache/arrow-datafusion/pull/2272) ([yjshen](https://github.com/yjshen)) -- Re-export DataFusion crates [\#2264](https://github.com/apache/arrow-datafusion/pull/2264) ([andygrove](https://github.com/andygrove)) -- rewrite approx\_median to approx\_percentile\_cont while planning phase [\#2262](https://github.com/apache/arrow-datafusion/pull/2262) ([korowa](https://github.com/korowa)) -- Introduce RowLayout to represent rows for different purposes [\#2261](https://github.com/apache/arrow-datafusion/pull/2261) ([yjshen](https://github.com/yjshen)) -- fix string coercion missing in Eq/NotEq operator [\#2258](https://github.com/apache/arrow-datafusion/pull/2258) ([WinkerDu](https://github.com/WinkerDu)) -- Update to Arrow 12.0.0, update tonic and prost [\#2253](https://github.com/apache/arrow-datafusion/pull/2253) ([alamb](https://github.com/alamb)) -- minor: move field\_util from `physical-expr` crate to `expr` crate [\#2250](https://github.com/apache/arrow-datafusion/pull/2250) ([andygrove](https://github.com/andygrove)) -- Move identifer case tests to `sql_integ`, add negative cases, Debug for `DataFrame` [\#2243](https://github.com/apache/arrow-datafusion/pull/2243) ([alamb](https://github.com/alamb)) -- Implement sort-merge join [\#2242](https://github.com/apache/arrow-datafusion/pull/2242) ([richox](https://github.com/richox)) -- fix: find the right wider decimal datatype for comparison operation [\#2241](https://github.com/apache/arrow-datafusion/pull/2241) ([liukun4515](https://github.com/liukun4515)) -- Fix join without constraints [\#2240](https://github.com/apache/arrow-datafusion/pull/2240) ([Dandandan](https://github.com/Dandandan)) -- Add type coercion rule for date + interval [\#2235](https://github.com/apache/arrow-datafusion/pull/2235) ([andygrove](https://github.com/andygrove)) -- support array with scalar arithmetic operation for decimal data type [\#2233](https://github.com/apache/arrow-datafusion/pull/2233) ([liukun4515](https://github.com/liukun4515)) -- chore: add `debug!` log in some execution operators [\#2231](https://github.com/apache/arrow-datafusion/pull/2231) ([NGA-TRAN](https://github.com/NGA-TRAN)) -- Introduce new optional scheduler, using Morsel-driven Parallelism + rayon \(\#2199\) [\#2226](https://github.com/apache/arrow-datafusion/pull/2226) ([tustvold](https://github.com/tustvold)) -- minor: add editor config file [\#2224](https://github.com/apache/arrow-datafusion/pull/2224) ([jackwener](https://github.com/jackwener)) -- minor: Refactor to avoid repeated code in replace\_qualifier [\#2222](https://github.com/apache/arrow-datafusion/pull/2222) ([andygrove](https://github.com/andygrove)) -- update cli readme [\#2220](https://github.com/apache/arrow-datafusion/pull/2220) ([liukun4515](https://github.com/liukun4515)) -- Use `filter` \(filter\_record\_batch\) instead of `take` to avoid using indices [\#2218](https://github.com/apache/arrow-datafusion/pull/2218) ([Dandandan](https://github.com/Dandandan)) -- Add single line description of ExecutionPlan \(\#2216\) [\#2217](https://github.com/apache/arrow-datafusion/pull/2217) ([tustvold](https://github.com/tustvold)) -- Remove tokio::spawn from HashAggregateExec \(\#2201\) [\#2215](https://github.com/apache/arrow-datafusion/pull/2215) ([tustvold](https://github.com/tustvold)) -- Remove tokio::spawn from WindowAggExec \(\#2201\) [\#2203](https://github.com/apache/arrow-datafusion/pull/2203) ([tustvold](https://github.com/tustvold)) -- Make ParquetExec usable outside of a tokio runtime \(\#2201\) [\#2202](https://github.com/apache/arrow-datafusion/pull/2202) ([tustvold](https://github.com/tustvold)) -- add sql level test for decimal data type [\#2200](https://github.com/apache/arrow-datafusion/pull/2200) ([liukun4515](https://github.com/liukun4515)) -- `case when` supports `NULL` constant [\#2197](https://github.com/apache/arrow-datafusion/pull/2197) ([WinkerDu](https://github.com/WinkerDu)) -- feat: Support simple Arrays with Literals [\#2194](https://github.com/apache/arrow-datafusion/pull/2194) ([ovr](https://github.com/ovr)) -- \[Ballista\] Enable ApproxPercentileWithWeight in Ballista and fill UT [\#2192](https://github.com/apache/arrow-datafusion/pull/2192) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- refactor: simplify `prepare_select_exprs` [\#2190](https://github.com/apache/arrow-datafusion/pull/2190) ([jackwener](https://github.com/jackwener)) -- Multiple row-layout support, part-1: Restructure code for clearness [\#2189](https://github.com/apache/arrow-datafusion/pull/2189) ([yjshen](https://github.com/yjshen)) -- make nightly clippy happy [\#2186](https://github.com/apache/arrow-datafusion/pull/2186) ([xudong963](https://github.com/xudong963)) -- \[Ballista\]Make PhysicalAggregateExprNode has repeated PhysicalExprNode [\#2184](https://github.com/apache/arrow-datafusion/pull/2184) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- MINOR: handle `NULL` in advance to avoid value copy in `string_concat` [\#2183](https://github.com/apache/arrow-datafusion/pull/2183) ([WinkerDu](https://github.com/WinkerDu)) -- fix: Sort with a lot of repetition values [\#2182](https://github.com/apache/arrow-datafusion/pull/2182) ([yjshen](https://github.com/yjshen)) -- cli: update lockfile [\#2178](https://github.com/apache/arrow-datafusion/pull/2178) ([happysalada](https://github.com/happysalada)) -- Add LogicalPlan::SubqueryAlias [\#2172](https://github.com/apache/arrow-datafusion/pull/2172) ([andygrove](https://github.com/andygrove)) -- minor: Avoid per cell evaluation in Coalesce, use zip in CaseWhen [\#2171](https://github.com/apache/arrow-datafusion/pull/2171) ([yjshen](https://github.com/yjshen)) -- Handle merged schemas in parquet pruning [\#2170](https://github.com/apache/arrow-datafusion/pull/2170) ([thinkharderdev](https://github.com/thinkharderdev)) -- Implement fast path of with\_new\_children\(\) in ExecutionPlan [\#2168](https://github.com/apache/arrow-datafusion/pull/2168) ([mingmwang](https://github.com/mingmwang)) -- enable explain for ballista [\#2163](https://github.com/apache/arrow-datafusion/pull/2163) ([doki23](https://github.com/doki23)) -- Add delimiter for create external table [\#2162](https://github.com/apache/arrow-datafusion/pull/2162) ([matthewmturner](https://github.com/matthewmturner)) -- \[MINOR\] enable `EXTRACT week` and add test \(after sqlparser update to 0.16\) [\#2157](https://github.com/apache/arrow-datafusion/pull/2157) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Optimize the evaluation of `IN` for large lists using InSet [\#2156](https://github.com/apache/arrow-datafusion/pull/2156) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Update sqlparser requirement from 0.15 to 0.16 [\#2152](https://github.com/apache/arrow-datafusion/pull/2152) ([dependabot[bot]](https://github.com/apps/dependabot)) -- fix `not(null)` with constant `null` [\#2144](https://github.com/apache/arrow-datafusion/pull/2144) ([WinkerDu](https://github.com/WinkerDu)) -- Add IF NOT EXISTS to `CREATE TABLE` and `CREATE EXTERNAL TABLE` [\#2143](https://github.com/apache/arrow-datafusion/pull/2143) ([matthewmturner](https://github.com/matthewmturner)) -- implement 'StringConcat' operator to support sql like "select 'aa' || 'b' " [\#2142](https://github.com/apache/arrow-datafusion/pull/2142) ([WinkerDu](https://github.com/WinkerDu)) -- \#2109 By default, use only 1000 rows to infer the schema [\#2139](https://github.com/apache/arrow-datafusion/pull/2139) ([jychen7](https://github.com/jychen7)) -- \[CLI\] Add show tables in ballista for datafusion-cli [\#2137](https://github.com/apache/arrow-datafusion/pull/2137) ([gaojun2048](https://github.com/gaojun2048)) -- fix: incorrect memory usage track for sort [\#2135](https://github.com/apache/arrow-datafusion/pull/2135) ([yjshen](https://github.com/yjshen)) -- Update quarterly roadmap for Q2 [\#2133](https://github.com/apache/arrow-datafusion/pull/2133) ([matthewmturner](https://github.com/matthewmturner)) -- Reduce SortExec memory usage by void constructing single huge batch [\#2132](https://github.com/apache/arrow-datafusion/pull/2132) ([yjshen](https://github.com/yjshen)) -- MINOR: fix concat\_ws corner bug [\#2128](https://github.com/apache/arrow-datafusion/pull/2128) ([WinkerDu](https://github.com/WinkerDu)) -- Minor add clarifying comment in parquet [\#2127](https://github.com/apache/arrow-datafusion/pull/2127) ([alamb](https://github.com/alamb)) -- Minor: make disk\_manager public [\#2126](https://github.com/apache/arrow-datafusion/pull/2126) ([yjshen](https://github.com/yjshen)) -- JIT-compille DataFusion expression with column name [\#2124](https://github.com/apache/arrow-datafusion/pull/2124) ([Dandandan](https://github.com/Dandandan)) -- minor: replace array\_equals in case evaluation with eq\_dyn from arrow-rs [\#2121](https://github.com/apache/arrow-datafusion/pull/2121) ([alamb](https://github.com/alamb)) -- Serialize timezone in timestamp scalar values [\#2120](https://github.com/apache/arrow-datafusion/pull/2120) ([thinkharderdev](https://github.com/thinkharderdev)) -- minor: fix some clippy warnings from nightly rust [\#2119](https://github.com/apache/arrow-datafusion/pull/2119) ([alamb](https://github.com/alamb)) -- Fix case evaluation with NULLs [\#2118](https://github.com/apache/arrow-datafusion/pull/2118) ([alamb](https://github.com/alamb)) -- issue\#1967 ignore channel close [\#2113](https://github.com/apache/arrow-datafusion/pull/2113) ([silence-coding](https://github.com/silence-coding)) -- cli: add cargo.lock [\#2112](https://github.com/apache/arrow-datafusion/pull/2112) ([happysalada](https://github.com/happysalada)) -- doc: update release schedule [\#2110](https://github.com/apache/arrow-datafusion/pull/2110) ([jychen7](https://github.com/jychen7)) -- fix df union all bug [\#2108](https://github.com/apache/arrow-datafusion/pull/2108) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([WinkerDu](https://github.com/WinkerDu)) -- Reduce repetition in Decimal binary kernels, upgrade to arrow 11.1 [\#2107](https://github.com/apache/arrow-datafusion/pull/2107) ([alamb](https://github.com/alamb)) -- update zlib version to 1.2.12 [\#2106](https://github.com/apache/arrow-datafusion/pull/2106) ([waitingkuo](https://github.com/waitingkuo)) -- Create jit-expression from datafusion expression [\#2103](https://github.com/apache/arrow-datafusion/pull/2103) ([Dandandan](https://github.com/Dandandan)) -- Add CREATE DATABASE command to SQL [\#2094](https://github.com/apache/arrow-datafusion/pull/2094) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) -- Refactor SessionContext, BallistaContext to support multi-tenancy configurations - Part 3 [\#2091](https://github.com/apache/arrow-datafusion/pull/2091) ([mingmwang](https://github.com/mingmwang)) -- minor: remove duplicate test [\#2089](https://github.com/apache/arrow-datafusion/pull/2089) ([jackwener](https://github.com/jackwener)) -- minor: remove repeated test [\#2085](https://github.com/apache/arrow-datafusion/pull/2085) ([jackwener](https://github.com/jackwener)) -- Fix lost filters and projections in ParquetExec, CSVExec etc [\#2077](https://github.com/apache/arrow-datafusion/pull/2077) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Remove dependency of common for the storage crate [\#2076](https://github.com/apache/arrow-datafusion/pull/2076) ([yahoNanJing](https://github.com/yahoNanJing)) -- [MINOR] fix doc in `EXTRACT\(field FROM source\) [\#2074](https://github.com/apache/arrow-datafusion/pull/2074) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- \[Bug\]\[Datafusion\] fix TaskContext session\_config bug [\#2070](https://github.com/apache/arrow-datafusion/pull/2070) ([gaojun2048](https://github.com/gaojun2048)) -- Short-circuit evaluation for `CaseWhen` [\#2068](https://github.com/apache/arrow-datafusion/pull/2068) ([yjshen](https://github.com/yjshen)) -- split datafusion-object-store module [\#2065](https://github.com/apache/arrow-datafusion/pull/2065) ([yahoNanJing](https://github.com/yahoNanJing)) -- Allow `CatalogProvider::register_catalog` to return an error [\#2052](https://github.com/apache/arrow-datafusion/pull/2052) ([alamb](https://github.com/alamb)) -- Add test in register\_catalog and change to use named symbolic constants [\#2050](https://github.com/apache/arrow-datafusion/pull/2050) ([alamb](https://github.com/alamb)) -- Update to arrow/parquet 11.0 [\#2048](https://github.com/apache/arrow-datafusion/pull/2048) ([alamb](https://github.com/alamb)) -- minor: format comments \(`//` to `// `\) [\#2047](https://github.com/apache/arrow-datafusion/pull/2047) ([jackwener](https://github.com/jackwener)) -- use cargo-tomlfmt to check Cargo.toml formatting in CI [\#2033](https://github.com/apache/arrow-datafusion/pull/2033) ([WinkerDu](https://github.com/WinkerDu)) -- feat: \#2004 approx percentile with weight [\#2031](https://github.com/apache/arrow-datafusion/pull/2031) ([jychen7](https://github.com/jychen7)) -- Refactor SessionContext, SessionState and SessionConfig to support multi-tenancy configurations - Part 2 [\#2029](https://github.com/apache/arrow-datafusion/pull/2029) ([mingmwang](https://github.com/mingmwang)) -- Simplify prerequisites for running examples [\#2028](https://github.com/apache/arrow-datafusion/pull/2028) ([doki23](https://github.com/doki23)) -- Replace usage of `println!` with logger macros [\#2020](https://github.com/apache/arrow-datafusion/pull/2020) ([silence-coding](https://github.com/silence-coding)) -- Automatically test examples in user guide [\#2018](https://github.com/apache/arrow-datafusion/pull/2018) ([vchag](https://github.com/vchag)) -- return VecDeque for DFParser::parse\_sql [\#2017](https://github.com/apache/arrow-datafusion/pull/2017) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) -- Eliminate the scalar value filter [\#2002](https://github.com/apache/arrow-datafusion/pull/2002) ([jackwener](https://github.com/jackwener)) -- Fixing a typo in documentation [\#1997](https://github.com/apache/arrow-datafusion/pull/1997) ([psvri](https://github.com/psvri)) -- Correct documentation of ExprVisitor [\#1996](https://github.com/apache/arrow-datafusion/pull/1996) ([alamb](https://github.com/alamb)) -- Make it possible to only scan part of a parquet file in a partition [\#1990](https://github.com/apache/arrow-datafusion/pull/1990) ([yjshen](https://github.com/yjshen)) -- Update Dockerfile to fix integration tests [\#1982](https://github.com/apache/arrow-datafusion/pull/1982) ([andygrove](https://github.com/andygrove)) -- Remove some more unecessary cloning in sql\_expr\_to\_logical\_expr [\#1981](https://github.com/apache/arrow-datafusion/pull/1981) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Add ticket reference to clippy allow [\#1978](https://github.com/apache/arrow-datafusion/pull/1978) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Implement EXTRACT expression with week, month, day, hour [\#1974](https://github.com/apache/arrow-datafusion/pull/1974) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Address typo in ExprVisitable trait documentation [\#1970](https://github.com/apache/arrow-datafusion/pull/1970) ([jdye64](https://github.com/jdye64)) -- Update sqlparser requirement from 0.14 to 0.15 [\#1966](https://github.com/apache/arrow-datafusion/pull/1966) ([dependabot[bot]](https://github.com/apps/dependabot)) -- PruningPredicate should take owned Expr [\#1960](https://github.com/apache/arrow-datafusion/pull/1960) ([thinkharderdev](https://github.com/thinkharderdev)) -- Update to arrow 10.0.0, pyo3 0.16 [\#1957](https://github.com/apache/arrow-datafusion/pull/1957) ([alamb](https://github.com/alamb)) -- update jit-related dependencies [\#1953](https://github.com/apache/arrow-datafusion/pull/1953) ([xudong963](https://github.com/xudong963)) -- minor code refinement: `if_exists` name change, wildcard field for logical plan, etc. [\#1951](https://github.com/apache/arrow-datafusion/pull/1951) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Allow different types of query variables \(`@@var`\) rather than just string [\#1943](https://github.com/apache/arrow-datafusion/pull/1943) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([maxburke](https://github.com/maxburke)) -- Pruning serialization [\#1941](https://github.com/apache/arrow-datafusion/pull/1941) ([thinkharderdev](https://github.com/thinkharderdev)) -- Add write\_parquet to `DataFrame` [\#1940](https://github.com/apache/arrow-datafusion/pull/1940) ([matthewmturner](https://github.com/matthewmturner)) -- Fix select from EmptyExec always return 0 row after optimizer passes [\#1938](https://github.com/apache/arrow-datafusion/pull/1938) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add debug log when waiting for spilling on other consumers [\#1933](https://github.com/apache/arrow-datafusion/pull/1933) ([viirya](https://github.com/viirya)) -- Add db benchmark script [\#1928](https://github.com/apache/arrow-datafusion/pull/1928) ([matthewmturner](https://github.com/matthewmturner)) -- Add write\_csv to DataFrame [\#1922](https://github.com/apache/arrow-datafusion/pull/1922) ([matthewmturner](https://github.com/matthewmturner)) -- \[MINOR\] Update copyright year in Docs [\#1918](https://github.com/apache/arrow-datafusion/pull/1918) ([alamb](https://github.com/alamb)) -- add metadata to DFSchema, close \#1806. [\#1914](https://github.com/apache/arrow-datafusion/pull/1914) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) -- Clippy fix on nightly [\#1907](https://github.com/apache/arrow-datafusion/pull/1907) ([yjshen](https://github.com/yjshen)) -- Updated Rust version to 1.59 in all the files [\#1903](https://github.com/apache/arrow-datafusion/pull/1903) ([NaincyKumariKnoldus](https://github.com/NaincyKumariKnoldus)) -- support extract second and minute in expr. [\#1901](https://github.com/apache/arrow-datafusion/pull/1901) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Update crate descriptions [\#1899](https://github.com/apache/arrow-datafusion/pull/1899) ([alamb](https://github.com/alamb)) -- Remove uneeded Mutex in Ballista Client [\#1898](https://github.com/apache/arrow-datafusion/pull/1898) ([alamb](https://github.com/alamb)) -- \[split/17\] move the rest of physical expr to datafusion-physical-expr crate [\#1892](https://github.com/apache/arrow-datafusion/pull/1892) ([Jimexist](https://github.com/Jimexist)) -- Avoid unnecessary branching in row read/write if schema is null-free [\#1891](https://github.com/apache/arrow-datafusion/pull/1891) ([yjshen](https://github.com/yjshen)) -- Make parquet support optional for datafusion-common crate [\#1886](https://github.com/apache/arrow-datafusion/pull/1886) ([jonmmease](https://github.com/jonmmease)) -- Fix clippy lints [\#1885](https://github.com/apache/arrow-datafusion/pull/1885) ([HaoYang670](https://github.com/HaoYang670)) -- Add support for `~/.datafusionrc` and cli option for overriding it to datafusion-cli [\#1875](https://github.com/apache/arrow-datafusion/pull/1875) ([matthewmturner](https://github.com/matthewmturner)) -- \[Minor\] Clean up DecimalArray API Usage [\#1869](https://github.com/apache/arrow-datafusion/pull/1869) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Changes after went through "Datafusion as a library section" [\#1868](https://github.com/apache/arrow-datafusion/pull/1868) ([nonontb](https://github.com/nonontb)) -- Enhance MemorySchemaProvider to support `register_listing_table` [\#1863](https://github.com/apache/arrow-datafusion/pull/1863) ([matthewmturner](https://github.com/matthewmturner)) -- Increase default partition column type from Dict\(UInt8\) to Dict\(UInt16\) [\#1860](https://github.com/apache/arrow-datafusion/pull/1860) ([Igosuki](https://github.com/Igosuki)) -- Update to arrow 9.1.0 [\#1851](https://github.com/apache/arrow-datafusion/pull/1851) ([alamb](https://github.com/alamb)) -- move some tests out of context and into sql [\#1846](https://github.com/apache/arrow-datafusion/pull/1846) ([alamb](https://github.com/alamb)) -- \[split/14\] create `datafusion-physical-expr` module [\#1843](https://github.com/apache/arrow-datafusion/pull/1843) ([Jimexist](https://github.com/Jimexist)) -- Return `Error` when parquet reader fails rather than no data with `println!` [\#1837](https://github.com/apache/arrow-datafusion/pull/1837) ([alamb](https://github.com/alamb)) -- determine build side in hash join by `total_byte_size` instead of `num_rows` [\#1831](https://github.com/apache/arrow-datafusion/pull/1831) ([xudong963](https://github.com/xudong963)) -- Make ballista support an optional feature to datafusion-cli [\#1816](https://github.com/apache/arrow-datafusion/pull/1816) ([alamb](https://github.com/alamb)) -- Update documentation example for change in API [\#1812](https://github.com/apache/arrow-datafusion/pull/1812) ([alamb](https://github.com/alamb)) -- rename references of expr in physical plan module after datafusion-expr split [\#1798](https://github.com/apache/arrow-datafusion/pull/1798) ([Jimexist](https://github.com/Jimexist)) -- DataFusion + Conbench Integration [\#1791](https://github.com/apache/arrow-datafusion/pull/1791) ([dianaclarke](https://github.com/dianaclarke)) -- The returned path value of get\_by\_uri should be self-described with entire path [\#1779](https://github.com/apache/arrow-datafusion/pull/1779) ([yahoNanJing](https://github.com/yahoNanJing)) -- Use`eq_dyn`, `neq_dyn`, `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` kernels from arrow [\#1475](https://github.com/apache/arrow-datafusion/pull/1475) ([alamb](https://github.com/alamb)) - - -## [7.1.0](https://github.com/apache/arrow-datafusion/tree/7.1.0) (2022-04-10) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/7.0.0...7.1.0) - -**Fixed bugs:** - -- By default, use only 1000 rows to infer the schema [\#2159](https://github.com/apache/arrow-datafusion/pull/2159) - -## [7.0.0](https://github.com/apache/arrow-datafusion/tree/7.0.0) (2022-02-14) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/6.0.0...7.0.0) - -**Breaking changes:** - -- Consolidate various configurations options, remove unrelated `batch_size` [\#1565](https://github.com/apache/arrow-datafusion/issues/1565) -- Extract logical plans in LogicalPlan as independent struct [\#1228](https://github.com/apache/arrow-datafusion/issues/1228) -- Update `ExecutionPlan` to know about sortedness and repartitioning optimizer pass respect the invariants [\#1776](https://github.com/apache/arrow-datafusion/pull/1776) ([alamb](https://github.com/alamb)) -- Update to `arrow 8.0.0` [\#1673](https://github.com/apache/arrow-datafusion/pull/1673) ([alamb](https://github.com/alamb)) -- Remove non idiomatic `DataFusionError::into_arrow_external_error` in favor of From conversion [\#1645](https://github.com/apache/arrow-datafusion/pull/1645) ([alamb](https://github.com/alamb)) -- Remove `Accumulator::update` and `Accumulator::merge` [\#1582](https://github.com/apache/arrow-datafusion/pull/1582) ([Jimexist](https://github.com/Jimexist)) -- implement `Hash` for various types and replace `PartialOrd` [\#1580](https://github.com/apache/arrow-datafusion/pull/1580) ([Jimexist](https://github.com/Jimexist)) -- Replace `DatafusionError` with `GenericError` in `ObjectStore` interface [\#1541](https://github.com/apache/arrow-datafusion/pull/1541) ([matthewmturner](https://github.com/matthewmturner)) -- Make `FLOAT` SQL type map to `Float32` rather than `Float64` [\#1423](https://github.com/apache/arrow-datafusion/pull/1423) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- Map `REAL` SQL type to `Float32` rather than `Float64` to be consistent with pg [\#1390](https://github.com/apache/arrow-datafusion/pull/1390) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([hntd187](https://github.com/hntd187)) - -**Implemented enhancements:** - -- Create new `datafusion_expr` crate [\#1753](https://github.com/apache/arrow-datafusion/issues/1753) -- Create new `datafusion_common` crate [\#1752](https://github.com/apache/arrow-datafusion/issues/1752) -- API to get Expr's type and nullability without a `DFSchema` [\#1725](https://github.com/apache/arrow-datafusion/issues/1725) -- Cleaner API to create `Expr::ScalarFunction` programatically [\#1718](https://github.com/apache/arrow-datafusion/issues/1718) -- Introduce a `Vec` based row-wise representation for DataFusion [\#1708](https://github.com/apache/arrow-datafusion/issues/1708) -- Simplify creating new `ListingTable` [\#1705](https://github.com/apache/arrow-datafusion/issues/1705) -- Implement TableProvider for DataFrameImpl to allow registration of logical plans [\#1698](https://github.com/apache/arrow-datafusion/issues/1698) -- Public Expr simplification API [\#1694](https://github.com/apache/arrow-datafusion/issues/1694) -- Query Optimizer: Add OUTER --\> INNER join conversion [\#1670](https://github.com/apache/arrow-datafusion/issues/1670) -- Support reading from CSV, Avro and Json files that have mergeable/compatible, but not identical schemas [\#1669](https://github.com/apache/arrow-datafusion/issues/1669) -- Remove `DataFusionError::into_arrow_external_error` in favor of `From` conversion [\#1644](https://github.com/apache/arrow-datafusion/issues/1644) -- Include join type in display implementation for logical plan [\#1620](https://github.com/apache/arrow-datafusion/issues/1620) -- Switch datafusion to using `eq_dyn_scalar`, etc kernels [\#1610](https://github.com/apache/arrow-datafusion/issues/1610) -- Proposal: Remove `Accumulator::update` and `Accumulator::merge` [\#1549](https://github.com/apache/arrow-datafusion/issues/1549) -- Replace DataFusionError/Result with impl Error for ObjectStore and Reader [\#1540](https://github.com/apache/arrow-datafusion/issues/1540) -- Add `approx_quantile` support [\#1538](https://github.com/apache/arrow-datafusion/issues/1538) -- support sorting decimal data type [\#1522](https://github.com/apache/arrow-datafusion/issues/1522) -- Keep all datafusion's packages up to date with Dependabot [\#1472](https://github.com/apache/arrow-datafusion/issues/1472) -- ExecutionContext support init ExecutionContextState with `new(state: Arc>)` method [\#1439](https://github.com/apache/arrow-datafusion/issues/1439) -- support the decimal scalar value [\#1393](https://github.com/apache/arrow-datafusion/issues/1393) -- Documentation for using scalar functions with the the DataFrame API [\#1364](https://github.com/apache/arrow-datafusion/issues/1364) -- Support `boolean == boolean` and `boolean != boolean` operators [\#1159](https://github.com/apache/arrow-datafusion/issues/1159) -- Support DataType::Decimal\(15, 2\) in TPC-H benchmark [\#174](https://github.com/apache/arrow-datafusion/issues/174) -- Make `MemoryStream` public [\#150](https://github.com/apache/arrow-datafusion/issues/150) -- Add support for Parquet schema merging [\#132](https://github.com/apache/arrow-datafusion/issues/132) -- Add SQL support for IN expression [\#118](https://github.com/apache/arrow-datafusion/issues/118) -- Add logging to datafusion-cli [\#1789](https://github.com/apache/arrow-datafusion/pull/1789) ([alamb](https://github.com/alamb)) -- Add `approx_median()` aggregate function [\#1729](https://github.com/apache/arrow-datafusion/pull/1729) ([realno](https://github.com/realno)) -- Add join type for logical plan display [\#1674](https://github.com/apache/arrow-datafusion/pull/1674) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Fix null comparison for Parquet pruning predicate [\#1595](https://github.com/apache/arrow-datafusion/pull/1595) ([viirya](https://github.com/viirya)) -- Add `corr` aggregate function [\#1561](https://github.com/apache/arrow-datafusion/pull/1561) ([realno](https://github.com/realno)) -- Add `covar`, `covar_pop` and `covar_samp` aggregate functions [\#1551](https://github.com/apache/arrow-datafusion/pull/1551) ([realno](https://github.com/realno)) -- Add `approx_quantile()` aggregation function [\#1539](https://github.com/apache/arrow-datafusion/pull/1539) ([domodwyer](https://github.com/domodwyer)) -- Initial MemoryManager and DiskManager APIs for query execution + External Sort implementation [\#1526](https://github.com/apache/arrow-datafusion/pull/1526) ([yjshen](https://github.com/yjshen)) -- Add `stddev` and `variance` [\#1525](https://github.com/apache/arrow-datafusion/pull/1525) ([realno](https://github.com/realno)) -- Add `rem` operation for Expr [\#1467](https://github.com/apache/arrow-datafusion/pull/1467) ([liukun4515](https://github.com/liukun4515)) -- support decimal data type in create table [\#1431](https://github.com/apache/arrow-datafusion/pull/1431) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- Ordering by index in select expression [\#1419](https://github.com/apache/arrow-datafusion/pull/1419) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([hntd187](https://github.com/hntd187)) -- Add support for `ORDER BY` on unprojected columns [\#1415](https://github.com/apache/arrow-datafusion/pull/1415) ([viirya](https://github.com/viirya)) -- Support decimal for `min` and `max` aggregate [\#1407](https://github.com/apache/arrow-datafusion/pull/1407) ([liukun4515](https://github.com/liukun4515)) -- Consolidate `ConstantFolding` and `SimplifyExpression` [\#1375](https://github.com/apache/arrow-datafusion/pull/1375) ([alamb](https://github.com/alamb)) -- Datafusion cli quiet mode command to contain option bool [\#1345](https://github.com/apache/arrow-datafusion/pull/1345) ([Jimexist](https://github.com/Jimexist)) -- Implement `array_agg` aggregate function [\#1300](https://github.com/apache/arrow-datafusion/pull/1300) ([viirya](https://github.com/viirya)) -- Add a command to switch output format in cli [\#1284](https://github.com/apache/arrow-datafusion/pull/1284) ([capkurmagati](https://github.com/capkurmagati)) -- Support `=`, `<`, `<=`, `>`, `>=`, `!=`, `is distinct from`, `is not distinct from` for `BooleanArray` [\#1163](https://github.com/apache/arrow-datafusion/pull/1163) ([alamb](https://github.com/alamb)) - -**Fixed bugs:** - -- Unsupported data type in hasher: Timestamp\(Second, None\) [\#1768](https://github.com/apache/arrow-datafusion/issues/1768) -- SQL column identifiers should be converted to lowercase when unquoted [\#1746](https://github.com/apache/arrow-datafusion/issues/1746) -- Data type Dictionary\(Int32, Utf8\) not supported for binary operation 'eq' on dyn arrays [\#1605](https://github.com/apache/arrow-datafusion/issues/1605) -- datafusion doesn't process predicate pushdown correctly when there is outer join [\#1586](https://github.com/apache/arrow-datafusion/issues/1586) -- casting `Int64` to `Float64` unsuccessfully caused tpch8 to fail [\#1576](https://github.com/apache/arrow-datafusion/issues/1576) -- CTE/WITH .. UNION ALL confuses name resolution in WHERE [\#1509](https://github.com/apache/arrow-datafusion/issues/1509) -- ORDER BY min\(x\) results in error `Plan("No field named 'foo.x'. Valid fields are 'MIN(foo.x)'.")` [\#1479](https://github.com/apache/arrow-datafusion/issues/1479) -- Sort discards field metadata on the output schema [\#1476](https://github.com/apache/arrow-datafusion/issues/1476) -- Datafusion should not strip out timezone information from existing types [\#1454](https://github.com/apache/arrow-datafusion/issues/1454) -- Error on some queries: "column types must match schema types, expected XXX but found YYY" [\#1447](https://github.com/apache/arrow-datafusion/issues/1447) -- Query failing to return any results when filter is an equality check on strings \(bad statistics in parquet\) [\#1433](https://github.com/apache/arrow-datafusion/issues/1433) -- Field names containing period such as `f.c1` cannot be named in SQL query [\#1432](https://github.com/apache/arrow-datafusion/issues/1432) -- `Select *` returns an unexpected result [\#1412](https://github.com/apache/arrow-datafusion/issues/1412) -- Turn off unused default features of chrono and ahash [\#1398](https://github.com/apache/arrow-datafusion/issues/1398) -- real data type is float32 in PG database, but in the datafusion it is as float64 [\#1380](https://github.com/apache/arrow-datafusion/issues/1380) -- TPC-H q10 performance regression \(expression for filter with added alias is not pushed down\) [\#1367](https://github.com/apache/arrow-datafusion/issues/1367) -- ProjectionExec Loses Field Metadata [\#1361](https://github.com/apache/arrow-datafusion/issues/1361) -- Support Filter on unprojected columns [\#1351](https://github.com/apache/arrow-datafusion/issues/1351) -- NULLS ORDER is inconsistent with postgres [\#1343](https://github.com/apache/arrow-datafusion/issues/1343) -- Fix bug while merging `RecordBatch`, add `SortPreservingMerge` fuzz tester [\#1678](https://github.com/apache/arrow-datafusion/pull/1678) ([alamb](https://github.com/alamb)) -- fix a cte block with same name for many times [\#1639](https://github.com/apache/arrow-datafusion/pull/1639) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- fix: casting Int64 to Float64 unsuccessfully caused tpch8 to fail [\#1601](https://github.com/apache/arrow-datafusion/pull/1601) ([xudong963](https://github.com/xudong963)) -- Fix single\_distinct\_to\_groupby for arbitrary expressions [\#1519](https://github.com/apache/arrow-datafusion/pull/1519) ([james727](https://github.com/james727)) -- Fix SortExec discards field metadata on the output schema [\#1477](https://github.com/apache/arrow-datafusion/pull/1477) ([alamb](https://github.com/alamb)) -- fix calculate in many\_to\_many\_hash\_partition test. [\#1463](https://github.com/apache/arrow-datafusion/pull/1463) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add Timezone to Scalar::Time\* types, and better timezone awareness to Datafusion's time types [\#1455](https://github.com/apache/arrow-datafusion/pull/1455) ([maxburke](https://github.com/maxburke)) -- Support identifiers with `.` in them [\#1449](https://github.com/apache/arrow-datafusion/pull/1449) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fixes for working with functions in dataframes, additional documentation [\#1430](https://github.com/apache/arrow-datafusion/pull/1430) ([tobyhede](https://github.com/tobyhede)) -- \[Minor\] Fix `send_time` metric for hash-repartition [\#1421](https://github.com/apache/arrow-datafusion/pull/1421) ([Dandandan](https://github.com/Dandandan)) -- fix: Select \* returns an unexpected result [\#1413](https://github.com/apache/arrow-datafusion/pull/1413) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Make cli handle multiple whitespaces [\#1388](https://github.com/apache/arrow-datafusion/pull/1388) ([capkurmagati](https://github.com/capkurmagati)) -- Metadata is kept in projections for non-derived columns [\#1378](https://github.com/apache/arrow-datafusion/pull/1378) ([hntd187](https://github.com/hntd187)) -- Fix Predicate Pushdown: split\_members should be able to split aliased predicate [\#1368](https://github.com/apache/arrow-datafusion/pull/1368) ([viirya](https://github.com/viirya)) -- Change the arg names and make parameters more meaningful [\#1357](https://github.com/apache/arrow-datafusion/pull/1357) ([liukun4515](https://github.com/liukun4515)) -- collect table stats by default for listing table [\#1347](https://github.com/apache/arrow-datafusion/pull/1347) ([houqp](https://github.com/houqp)) -- fix: make nulls-order consistent with postgres [\#1344](https://github.com/apache/arrow-datafusion/pull/1344) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Avoid changing expression names during constant folding [\#1319](https://github.com/apache/arrow-datafusion/pull/1319) ([viirya](https://github.com/viirya)) -- improve error message for invalid create table statement [\#1294](https://github.com/apache/arrow-datafusion/pull/1294) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([houqp](https://github.com/houqp)) -- Forbid creating the table with the same name [\#1288](https://github.com/apache/arrow-datafusion/pull/1288) ([liukun4515](https://github.com/liukun4515)) - -**Documentation updates:** - -- Clarify docs about `Accumulator::update` and `Accumulator::update_batch` [\#1542](https://github.com/apache/arrow-datafusion/pull/1542) ([alamb](https://github.com/alamb)) -- Fix duplicated `cargo run --example parquet_sql` [\#1482](https://github.com/apache/arrow-datafusion/pull/1482) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) -- add documentation to Datafusion cli's new commands [\#1348](https://github.com/apache/arrow-datafusion/pull/1348) ([liukun4515](https://github.com/liukun4515)) -- fix some clippy warnings from nightly channel [\#1277](https://github.com/apache/arrow-datafusion/pull/1277) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) - -**Performance improvements:** - -- Parquet pruning predicate for `IS NULL` [\#1591](https://github.com/apache/arrow-datafusion/issues/1591) -- Fix predicate pushdown for outer joins [\#1618](https://github.com/apache/arrow-datafusion/pull/1618) ([james727](https://github.com/james727)) -- fix: sql planner creates cross join instead of inner join from select predicates [\#1566](https://github.com/apache/arrow-datafusion/pull/1566) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Split fetch\_metadata into fetch\_statistics and fetch\_schema [\#1365](https://github.com/apache/arrow-datafusion/pull/1365) ([Dandandan](https://github.com/Dandandan)) -- Optimize the performance queries with a single distinct aggregate [\#1315](https://github.com/apache/arrow-datafusion/pull/1315) ([ic4y](https://github.com/ic4y)) -- Left join could use bitmap for left join instead of Vec\ [\#1291](https://github.com/apache/arrow-datafusion/pull/1291) ([boazberman](https://github.com/boazberman)) - -**Closed issues:** - -- Add `release compile` to CI [\#1728](https://github.com/apache/arrow-datafusion/issues/1728) -- DiskManager and TempFiles getting created several times per query [\#1690](https://github.com/apache/arrow-datafusion/issues/1690) -- Add a test for the `pyarrow` feature in CI [\#1635](https://github.com/apache/arrow-datafusion/issues/1635) -- SQL tests for when sorting exceeded available memory and had to spill to disk [\#1573](https://github.com/apache/arrow-datafusion/issues/1573) -- Consolidate the N-way merging code and `SortPreservingMergeStream` \(which has quite good tests of what is often quite tricky code, and it will be performance critical\) [\#1572](https://github.com/apache/arrow-datafusion/issues/1572) -- Consolidate the `SortExec` code \(so there is only a single sort operator that does in memory sorting if it has enough memory budget but then spills to disk if needed\). [\#1571](https://github.com/apache/arrow-datafusion/issues/1571) -- Track memory usage in Non Limited Operators [\#1569](https://github.com/apache/arrow-datafusion/issues/1569) -- \[Question\] Why does ballista store tables in the client instead of in the SchedulerServer [\#1473](https://github.com/apache/arrow-datafusion/issues/1473) -- Consolidate Projection for Schema and RecordBatch [\#1425](https://github.com/apache/arrow-datafusion/issues/1425) -- Support Sort on unprojected columns [\#1372](https://github.com/apache/arrow-datafusion/issues/1372) -- Unused code in hash\_aggregate [\#1362](https://github.com/apache/arrow-datafusion/issues/1362) -- Why use the expr types before coercion to get the result type? [\#1358](https://github.com/apache/arrow-datafusion/issues/1358) -- A problem about the projection\_push\_down optimizer gathers valid columns [\#1312](https://github.com/apache/arrow-datafusion/issues/1312) -- apply constant folding to `LogicalPlan::Values` [\#1170](https://github.com/apache/arrow-datafusion/issues/1170) -- reduce usage of `IntoIterator` in logical plan builder window fn [\#372](https://github.com/apache/arrow-datafusion/issues/372) -- Why does DataFusion throw a Tokio 0.2 runtime error? [\#176](https://github.com/apache/arrow-datafusion/issues/176) -- TPC-H Query 14 [\#165](https://github.com/apache/arrow-datafusion/issues/165) -- Length kernel returns bytes not character length [\#156](https://github.com/apache/arrow-datafusion/issues/156) -- Split the logical operators out into separate source files [\#115](https://github.com/apache/arrow-datafusion/issues/115) - -**Merged pull requests:** - -- Fixup some doc warnings [\#1811](https://github.com/apache/arrow-datafusion/pull/1811) ([alamb](https://github.com/alamb)) -- Ensure most of links in docs are correct [\#1808](https://github.com/apache/arrow-datafusion/pull/1808) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) -- Update CHANGELOG.md, update release scripts [\#1807](https://github.com/apache/arrow-datafusion/pull/1807) ([alamb](https://github.com/alamb)) -- Update versions for split crates [\#1803](https://github.com/apache/arrow-datafusion/pull/1803) ([matthewmturner](https://github.com/matthewmturner)) -- Improve the error message and UX of tpch benchmark program [\#1800](https://github.com/apache/arrow-datafusion/pull/1800) ([alamb](https://github.com/alamb)) -- rename references of expr in logical plan module after datafusion-expr split [\#1797](https://github.com/apache/arrow-datafusion/pull/1797) ([Jimexist](https://github.com/Jimexist)) -- Update to sqlparser 0.14 [\#1796](https://github.com/apache/arrow-datafusion/pull/1796) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- \[split/13\] move rest of expr to expr\_fn in datafusion-expr module [\#1794](https://github.com/apache/arrow-datafusion/pull/1794) ([Jimexist](https://github.com/Jimexist)) -- Update datafusion versions [\#1793](https://github.com/apache/arrow-datafusion/pull/1793) ([matthewmturner](https://github.com/matthewmturner)) -- Less verbose plans in debug logging [\#1787](https://github.com/apache/arrow-datafusion/pull/1787) ([alamb](https://github.com/alamb)) -- \[split/11\] split expr type and null info to be expr-schemable [\#1784](https://github.com/apache/arrow-datafusion/pull/1784) ([Jimexist](https://github.com/Jimexist)) -- Introduce `Row` format backed by raw bytes [\#1782](https://github.com/apache/arrow-datafusion/pull/1782) ([yjshen](https://github.com/yjshen)) -- rewrite predicates before pushing to union inputs [\#1781](https://github.com/apache/arrow-datafusion/pull/1781) ([korowa](https://github.com/korowa)) -- Update datafusion to use arrow 9.0.0 [\#1775](https://github.com/apache/arrow-datafusion/pull/1775) ([alamb](https://github.com/alamb)) -- \[split/10\] split up expr for rewriting, visiting, and simplification traits [\#1774](https://github.com/apache/arrow-datafusion/pull/1774) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) -- \#1768 Support TimeUnit::Second in hasher [\#1769](https://github.com/apache/arrow-datafusion/pull/1769) ([jychen7](https://github.com/jychen7)) -- TPC-H benchmark can optionally write JSON output file with benchmark summary [\#1766](https://github.com/apache/arrow-datafusion/pull/1766) ([andygrove](https://github.com/andygrove)) -- \[split/8\] move `Accumulator` and `ColumnarValue` to datafusion-expr [\#1765](https://github.com/apache/arrow-datafusion/pull/1765) ([Jimexist](https://github.com/Jimexist)) -- \[split/7\] move built-in scalar function to datafusion-expr [\#1764](https://github.com/apache/arrow-datafusion/pull/1764) ([Jimexist](https://github.com/Jimexist)) -- \[split/6\] move signature, type signature, volatility to datafusion-expr [\#1763](https://github.com/apache/arrow-datafusion/pull/1763) ([Jimexist](https://github.com/Jimexist)) -- \[split/9+12\] move udf, udaf, `Expr` to datafusion-expr module [\#1762](https://github.com/apache/arrow-datafusion/pull/1762) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) -- \[split/5\] move window frame and operator to datafusion-expr module [\#1761](https://github.com/apache/arrow-datafusion/pull/1761) ([Jimexist](https://github.com/Jimexist)) -- \[split/4\] move scalar value to datafusion-common [\#1760](https://github.com/apache/arrow-datafusion/pull/1760) ([Jimexist](https://github.com/Jimexist)) -- \[split/3\] split datafusion expr module and move aggregate and window function expr [\#1759](https://github.com/apache/arrow-datafusion/pull/1759) ([Jimexist](https://github.com/Jimexist)) -- \[split/2\] move column and dfschema to datafusion-common module [\#1758](https://github.com/apache/arrow-datafusion/pull/1758) ([Jimexist](https://github.com/Jimexist)) -- Use ordered-float 2.10 [\#1756](https://github.com/apache/arrow-datafusion/pull/1756) ([andygrove](https://github.com/andygrove)) -- \[split/1\] split datafusion-common module [\#1751](https://github.com/apache/arrow-datafusion/pull/1751) ([Jimexist](https://github.com/Jimexist)) -- use clap 3 style args parsing for datafusion cli [\#1749](https://github.com/apache/arrow-datafusion/pull/1749) ([Jimexist](https://github.com/Jimexist)) -- fix: Case insensitive unquoted identifiers in SQL [\#1747](https://github.com/apache/arrow-datafusion/pull/1747) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mkmik](https://github.com/mkmik)) -- Move more tests out of context.rs [\#1743](https://github.com/apache/arrow-datafusion/pull/1743) ([alamb](https://github.com/alamb)) -- Move optimize test out of context.rs [\#1742](https://github.com/apache/arrow-datafusion/pull/1742) ([alamb](https://github.com/alamb)) -- Fix typos in crate documentation [\#1739](https://github.com/apache/arrow-datafusion/pull/1739) ([r4ntix](https://github.com/r4ntix)) -- add `cargo check --release` to ci [\#1737](https://github.com/apache/arrow-datafusion/pull/1737) ([xudong963](https://github.com/xudong963)) -- Update parking\_lot requirement from 0.11 to 0.12 [\#1735](https://github.com/apache/arrow-datafusion/pull/1735) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Create built-in scalar functions programmatically [\#1734](https://github.com/apache/arrow-datafusion/pull/1734) ([HaoYang670](https://github.com/HaoYang670)) -- Prevent repartitioning of certain operator's direct children \(\#1731\) [\#1732](https://github.com/apache/arrow-datafusion/pull/1732) ([tustvold](https://github.com/tustvold)) -- API to get Expr's type and nullability without a `DFSchema` [\#1726](https://github.com/apache/arrow-datafusion/pull/1726) ([alamb](https://github.com/alamb)) -- minor: fix `cargo run --release` error [\#1723](https://github.com/apache/arrow-datafusion/pull/1723) ([xudong963](https://github.com/xudong963)) -- substitute `parking_lot::Mutex` for `std::sync::Mutex` [\#1720](https://github.com/apache/arrow-datafusion/pull/1720) ([xudong963](https://github.com/xudong963)) -- Convert boolean case expressions to boolean logic [\#1719](https://github.com/apache/arrow-datafusion/pull/1719) ([tustvold](https://github.com/tustvold)) -- Add Expression Simplification API [\#1717](https://github.com/apache/arrow-datafusion/pull/1717) ([alamb](https://github.com/alamb)) -- Create ListingTableConfig which includes file format and schema inference [\#1715](https://github.com/apache/arrow-datafusion/pull/1715) ([matthewmturner](https://github.com/matthewmturner)) -- make `select_to_plan` clearer [\#1714](https://github.com/apache/arrow-datafusion/pull/1714) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Add upper bound for public function `signature` [\#1713](https://github.com/apache/arrow-datafusion/pull/1713) ([HaoYang670](https://github.com/HaoYang670)) -- Add tests and CI for optional pyarrow module [\#1711](https://github.com/apache/arrow-datafusion/pull/1711) ([wjones127](https://github.com/wjones127)) -- Create SchemaAdapter trait to map table schema to file schemas [\#1709](https://github.com/apache/arrow-datafusion/pull/1709) ([thinkharderdev](https://github.com/thinkharderdev)) -- refine test in repartition.rs & coalesce\_batches.rs [\#1707](https://github.com/apache/arrow-datafusion/pull/1707) ([xudong963](https://github.com/xudong963)) -- Fuzz test for spillable sort [\#1706](https://github.com/apache/arrow-datafusion/pull/1706) ([yjshen](https://github.com/yjshen)) -- Support `create_physical_expr` and `ExecutionContextState` or `DefaultPhysicalPlanner` for faster speed [\#1700](https://github.com/apache/arrow-datafusion/pull/1700) ([alamb](https://github.com/alamb)) -- Implement TableProvider for DataFrameImpl [\#1699](https://github.com/apache/arrow-datafusion/pull/1699) ([cpcloud](https://github.com/cpcloud)) -- Move timestamp related tests out of context.rs and into sql integration test [\#1696](https://github.com/apache/arrow-datafusion/pull/1696) ([alamb](https://github.com/alamb)) -- Lazy TempDir creation in DiskManager [\#1695](https://github.com/apache/arrow-datafusion/pull/1695) ([alamb](https://github.com/alamb)) -- Add `MemTrackingMetrics` to ease memory tracking for non-limited memory consumers [\#1691](https://github.com/apache/arrow-datafusion/pull/1691) ([yjshen](https://github.com/yjshen)) -- \(minor\) Reduce memory manager and disk manager logs from `info!` to `debug!` [\#1689](https://github.com/apache/arrow-datafusion/pull/1689) ([alamb](https://github.com/alamb)) -- Make `SortPreservingMergeStream` stable on input stream order [\#1687](https://github.com/apache/arrow-datafusion/pull/1687) ([alamb](https://github.com/alamb)) -- Incorporate dyn scalar kernels [\#1685](https://github.com/apache/arrow-datafusion/pull/1685) ([matthewmturner](https://github.com/matthewmturner)) -- Move `information_schema` tests out of execution/context.rs to `sql_integration` tests [\#1684](https://github.com/apache/arrow-datafusion/pull/1684) ([alamb](https://github.com/alamb)) -- Add a new metric type: `Gauge` + `CurrentMemoryUsage` to metrics [\#1682](https://github.com/apache/arrow-datafusion/pull/1682) ([yjshen](https://github.com/yjshen)) -- refactor array\_agg to not to have `update` and `merge` [\#1681](https://github.com/apache/arrow-datafusion/pull/1681) ([Jimexist](https://github.com/Jimexist)) -- Use NamedTempFile rather than `String` in DiskManager [\#1680](https://github.com/apache/arrow-datafusion/pull/1680) ([alamb](https://github.com/alamb)) -- upgrade clap to version 3 [\#1672](https://github.com/apache/arrow-datafusion/pull/1672) ([Jimexist](https://github.com/Jimexist)) -- Improve configuration and resource use of `MemoryManager` and `DiskManager` [\#1668](https://github.com/apache/arrow-datafusion/pull/1668) ([alamb](https://github.com/alamb)) -- feat: Support quarter granularity in date\_trunc function [\#1667](https://github.com/apache/arrow-datafusion/pull/1667) ([ovr](https://github.com/ovr)) -- Fix can not load parquet table form spark in datafusion-cli. [\#1665](https://github.com/apache/arrow-datafusion/pull/1665) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Make `MemoryManager` and `MemoryStream` public [\#1664](https://github.com/apache/arrow-datafusion/pull/1664) ([yjshen](https://github.com/yjshen)) -- \[Cleanup\] Move `AggregatedMetricsSet` to `metrics` for further reuse [\#1663](https://github.com/apache/arrow-datafusion/pull/1663) ([yjshen](https://github.com/yjshen)) -- fix: substr - correct behaivour with negative start pos [\#1660](https://github.com/apache/arrow-datafusion/pull/1660) ([ovr](https://github.com/ovr)) -- suppport bitwise and as an example [\#1653](https://github.com/apache/arrow-datafusion/pull/1653) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- refine match pattern related code [\#1650](https://github.com/apache/arrow-datafusion/pull/1650) ([xudong963](https://github.com/xudong963)) -- update md-5, sha2, blake2 [\#1647](https://github.com/apache/arrow-datafusion/pull/1647) ([xudong963](https://github.com/xudong963)) -- Add `DataFusionError` -\> `ArrowError` conversion [\#1643](https://github.com/apache/arrow-datafusion/pull/1643) ([alamb](https://github.com/alamb)) -- Add `spill_count` and `spilled_bytes` to `BaselineMetrics`, test sort with spill [\#1641](https://github.com/apache/arrow-datafusion/pull/1641) ([yjshen](https://github.com/yjshen)) -- support hash decimal array and group by [\#1640](https://github.com/apache/arrow-datafusion/pull/1640) ([liukun4515](https://github.com/liukun4515)) -- Consolidate Schema and RecordBatch projection [\#1638](https://github.com/apache/arrow-datafusion/pull/1638) ([alamb](https://github.com/alamb)) -- Update hashbrown requirement from 0.11 to 0.12 [\#1631](https://github.com/apache/arrow-datafusion/pull/1631) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Update pyo3 requirement from 0.14 to 0.15 [\#1627](https://github.com/apache/arrow-datafusion/pull/1627) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Optimize `SortPreservingMergeStream` to avoid `SortKeyCursor` sharing [\#1624](https://github.com/apache/arrow-datafusion/pull/1624) ([yjshen](https://github.com/yjshen)) -- Handle merging of evolved schemas in ParquetExec [\#1622](https://github.com/apache/arrow-datafusion/pull/1622) ([thinkharderdev](https://github.com/thinkharderdev)) -- feat: Support Substring\(str \[from int\] \[for int\]\) [\#1621](https://github.com/apache/arrow-datafusion/pull/1621) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) -- feat: Support complex interval via IntervalMonthDayNano [\#1615](https://github.com/apache/arrow-datafusion/pull/1615) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) -- consolidate binary\_expr coercion rule code into `binary_rule.rs` module [\#1607](https://github.com/apache/arrow-datafusion/pull/1607) ([alamb](https://github.com/alamb)) -- Fix comparison of dictionary arrays [\#1606](https://github.com/apache/arrow-datafusion/pull/1606) ([alamb](https://github.com/alamb)) -- add test for decimal to decimal [\#1603](https://github.com/apache/arrow-datafusion/pull/1603) ([liukun4515](https://github.com/liukun4515)) -- update nightly version [\#1597](https://github.com/apache/arrow-datafusion/pull/1597) ([Jimexist](https://github.com/Jimexist)) -- Consolidate sort and external\_sort [\#1596](https://github.com/apache/arrow-datafusion/pull/1596) ([yjshen](https://github.com/yjshen)) -- support from\_slice for binary, string, and boolean array types [\#1589](https://github.com/apache/arrow-datafusion/pull/1589) ([Jimexist](https://github.com/Jimexist)) -- add from\_slice trait to ease arrow2 migration [\#1588](https://github.com/apache/arrow-datafusion/pull/1588) ([Jimexist](https://github.com/Jimexist)) -- Implement ARRAY\_AGG\(DISTINCT ...\) [\#1579](https://github.com/apache/arrow-datafusion/pull/1579) ([james727](https://github.com/james727)) -- Rename sql integration tests from `mod` to `sql_integration` [\#1575](https://github.com/apache/arrow-datafusion/pull/1575) ([alamb](https://github.com/alamb)) -- minor: improve the benchmark readme [\#1567](https://github.com/apache/arrow-datafusion/pull/1567) ([xudong963](https://github.com/xudong963)) -- Consolidate `batch_size` configuration in `ExecutionConfig`, `RuntimeConfig` and `PhysicalPlanConfig` [\#1562](https://github.com/apache/arrow-datafusion/pull/1562) ([yjshen](https://github.com/yjshen)) -- Update to rust 1.58 [\#1557](https://github.com/apache/arrow-datafusion/pull/1557) ([xudong963](https://github.com/xudong963)) -- support mathematics operation for decimal data type [\#1554](https://github.com/apache/arrow-datafusion/pull/1554) ([liukun4515](https://github.com/liukun4515)) -- Address clippy warnings [\#1553](https://github.com/apache/arrow-datafusion/pull/1553) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) -- enhance arithmetic operation for array with scalar [\#1552](https://github.com/apache/arrow-datafusion/pull/1552) ([liukun4515](https://github.com/liukun4515)) -- Remove unused `update` and `merge` implementations from Aggregates and supporting `ScalarValue` arithmetic [\#1550](https://github.com/apache/arrow-datafusion/pull/1550) ([alamb](https://github.com/alamb)) -- Add batch operations to stddev [\#1547](https://github.com/apache/arrow-datafusion/pull/1547) ([realno](https://github.com/realno)) -- Mark ARRAY\_AGG\(DISTINCT ...\) not implemented [\#1534](https://github.com/apache/arrow-datafusion/pull/1534) ([james727](https://github.com/james727)) -- Update to arrow-7.0.0 [\#1523](https://github.com/apache/arrow-datafusion/pull/1523) ([alamb](https://github.com/alamb)) -- Fix ORDER BY on aggregate [\#1506](https://github.com/apache/arrow-datafusion/pull/1506) ([viirya](https://github.com/viirya)) -- Add example on how to query multiple parquet files [\#1497](https://github.com/apache/arrow-datafusion/pull/1497) ([nitisht](https://github.com/nitisht)) -- Refactor testing modules [\#1491](https://github.com/apache/arrow-datafusion/pull/1491) ([hntd187](https://github.com/hntd187)) -- add rfcs for datafusion [\#1490](https://github.com/apache/arrow-datafusion/pull/1490) ([xudong963](https://github.com/xudong963)) -- support comparison for decimal data type and refactor the binary coercion rule [\#1483](https://github.com/apache/arrow-datafusion/pull/1483) ([liukun4515](https://github.com/liukun4515)) -- Minor: Rename `predicate_builder` --\> `pruning_predicate` for consistency [\#1481](https://github.com/apache/arrow-datafusion/pull/1481) ([alamb](https://github.com/alamb)) -- Tests for support try\_cast/cast decimal to numeric [\#1465](https://github.com/apache/arrow-datafusion/pull/1465) ([liukun4515](https://github.com/liukun4515)) -- Avoid send empty batches for Hash partitioning. [\#1459](https://github.com/apache/arrow-datafusion/pull/1459) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Planner code cleanup [\#1450](https://github.com/apache/arrow-datafusion/pull/1450) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Fix bug in projection: "column types must match schema types, expected XXX but found YYY" [\#1448](https://github.com/apache/arrow-datafusion/pull/1448) ([alamb](https://github.com/alamb)) -- Update arrow-rs to 6.4.0 and replace boolean comparison in datafusion with arrow compute kernel [\#1446](https://github.com/apache/arrow-datafusion/pull/1446) ([xudong963](https://github.com/xudong963)) -- support cast/try\_cast for decimal: signed numeric to decimal [\#1442](https://github.com/apache/arrow-datafusion/pull/1442) ([liukun4515](https://github.com/liukun4515)) -- Consolidate decimal error checking and improve error messages [\#1438](https://github.com/apache/arrow-datafusion/pull/1438) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- use 0.13 sql parser [\#1435](https://github.com/apache/arrow-datafusion/pull/1435) ([Jimexist](https://github.com/Jimexist)) -- Minor Code cleanups [\#1428](https://github.com/apache/arrow-datafusion/pull/1428) ([alamb](https://github.com/alamb)) -- Clarify communication on bi-weekly sync [\#1427](https://github.com/apache/arrow-datafusion/pull/1427) ([alamb](https://github.com/alamb)) -- support sum/avg agg for decimal, change sum\(float32\) --\> float64 [\#1408](https://github.com/apache/arrow-datafusion/pull/1408) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- Fix bugs with nullability during rewrites: Combine `simplify` and `Simplifier` [\#1401](https://github.com/apache/arrow-datafusion/pull/1401) ([alamb](https://github.com/alamb)) -- Minimize features [\#1399](https://github.com/apache/arrow-datafusion/pull/1399) ([carols10cents](https://github.com/carols10cents)) -- Update rust vesion to 1.57 [\#1395](https://github.com/apache/arrow-datafusion/pull/1395) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- support decimal scalar value [\#1394](https://github.com/apache/arrow-datafusion/pull/1394) ([liukun4515](https://github.com/liukun4515)) -- Add coercion rules for AggregateFunctions [\#1387](https://github.com/apache/arrow-datafusion/pull/1387) ([liukun4515](https://github.com/liukun4515)) -- upgrade the arrow-rs version [\#1385](https://github.com/apache/arrow-datafusion/pull/1385) ([liukun4515](https://github.com/liukun4515)) -- add array agg name [\#1382](https://github.com/apache/arrow-datafusion/pull/1382) ([liukun4515](https://github.com/liukun4515)) -- Make tests for `simplify` and `Simplifer` consistent [\#1376](https://github.com/apache/arrow-datafusion/pull/1376) ([alamb](https://github.com/alamb)) -- Refactor: Consolidate expression simplification code in `simplify_expression.rs` [\#1374](https://github.com/apache/arrow-datafusion/pull/1374) ([alamb](https://github.com/alamb)) -- remove unused code in hash\_aggregate [\#1370](https://github.com/apache/arrow-datafusion/pull/1370) ([ic4y](https://github.com/ic4y)) -- Use `BufReader` for LocalFileReader to revert performance regression in parquet reading [\#1366](https://github.com/apache/arrow-datafusion/pull/1366) ([Dandandan](https://github.com/Dandandan)) -- Add unit test for constant folding on values [\#1355](https://github.com/apache/arrow-datafusion/pull/1355) ([viirya](https://github.com/viirya)) -- Extract logical plan: rename the plan name \(follow up\) [\#1354](https://github.com/apache/arrow-datafusion/pull/1354) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- Moved aggr\_test\_schema to test\_utils [\#1338](https://github.com/apache/arrow-datafusion/pull/1338) ([rdettai](https://github.com/rdettai)) -- upgrade arrow-rs to 6.2.0 [\#1334](https://github.com/apache/arrow-datafusion/pull/1334) ([liukun4515](https://github.com/liukun4515)) -- Update release instructions [\#1331](https://github.com/apache/arrow-datafusion/pull/1331) ([alamb](https://github.com/alamb)) -- \#1268: allow datafusion-cli to toggle quiet flag within CLI [\#1330](https://github.com/apache/arrow-datafusion/pull/1330) ([jgoday](https://github.com/jgoday)) -- Extract Aggregate, Sort, and Join to struct from AggregatePlan [\#1326](https://github.com/apache/arrow-datafusion/pull/1326) ([matthewmturner](https://github.com/matthewmturner)) -- Extract `EmptyRelation`, `Limit`, `Values` from `LogicalPlan` [\#1325](https://github.com/apache/arrow-datafusion/pull/1325) ([liukun4515](https://github.com/liukun4515)) -- Extract CrossJoin, Repartition, Union in LogicalPlan [\#1322](https://github.com/apache/arrow-datafusion/pull/1322) ([liukun4515](https://github.com/liukun4515)) -- Fifth batch of updating sql tests to use assert\_batches\_eq [\#1318](https://github.com/apache/arrow-datafusion/pull/1318) ([matthewmturner](https://github.com/matthewmturner)) -- Extract Explain, Analyze, Extension in LogicalPlan as independent struct [\#1317](https://github.com/apache/arrow-datafusion/pull/1317) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Extract CreateMemoryTable, DropTable, CreateExternalTable in LogicalPlan as independent struct [\#1311](https://github.com/apache/arrow-datafusion/pull/1311) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) -- Extract Projection, Filter, Window in LogicalPlan as independent struct [\#1309](https://github.com/apache/arrow-datafusion/pull/1309) ([ic4y](https://github.com/ic4y)) -- Add PSQL comparison tests for except, intersect [\#1292](https://github.com/apache/arrow-datafusion/pull/1292) ([mrob95](https://github.com/mrob95)) -- Extract logical plans in LogicalPlan as independent struct: TableScan [\#1290](https://github.com/apache/arrow-datafusion/pull/1290) ([xudong963](https://github.com/xudong963)) -- Add statement helper command to cli [\#1285](https://github.com/apache/arrow-datafusion/pull/1285) ([matthewmturner](https://github.com/matthewmturner)) -- Python bindings for window functions [\#819](https://github.com/apache/arrow-datafusion/pull/819) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jgoday](https://github.com/jgoday)) - -## [6.0.0](https://github.com/apache/arrow-datafusion/tree/6.0.0) (2021-11-13) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/5.0.0...6.0.0) - -**Breaking changes:** - -- Removed deprecated with\_concurrency [\#1200](https://github.com/apache/arrow-datafusion/pull/1200) ([rdettai](https://github.com/rdettai)) -- File partitioning for ListingTable [\#1141](https://github.com/apache/arrow-datafusion/pull/1141) ([rdettai](https://github.com/rdettai)) -- Add function volatility to Signature [\#1071](https://github.com/apache/arrow-datafusion/pull/1071) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([pjmore](https://github.com/pjmore)) -- fix: allow duplicate field names in table join, fix output with duplicated names [\#1023](https://github.com/apache/arrow-datafusion/pull/1023) ([houqp](https://github.com/houqp)) -- Make TableProvider.scan\(\) and PhysicalPlanner::create\_physical\_plan\(\) async [\#1013](https://github.com/apache/arrow-datafusion/pull/1013) ([rdettai](https://github.com/rdettai)) -- Reorganize table providers by table format [\#1010](https://github.com/apache/arrow-datafusion/pull/1010) ([rdettai](https://github.com/rdettai)) -- Make Metrics::labels\(\) public [\#999](https://github.com/apache/arrow-datafusion/pull/999) ([alamb](https://github.com/alamb)) -- Rename NthValue::{first\_value,last\_value,nth\_value} to satisfy clippy in Rust 1.55 [\#986](https://github.com/apache/arrow-datafusion/pull/986) ([alamb](https://github.com/alamb)) -- Move CBOs and Statistics to physical plan [\#965](https://github.com/apache/arrow-datafusion/pull/965) ([rdettai](https://github.com/rdettai)) -- Update to sqlparser v 0.10.0 [\#934](https://github.com/apache/arrow-datafusion/pull/934) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- FilePartition and PartitionedFile for scanning flexibility [\#932](https://github.com/apache/arrow-datafusion/pull/932) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([yjshen](https://github.com/yjshen)) -- Improve SQLMetric APIs, port existing metrics [\#908](https://github.com/apache/arrow-datafusion/pull/908) ([alamb](https://github.com/alamb)) -- Add support for EXPLAIN ANALYZE [\#858](https://github.com/apache/arrow-datafusion/pull/858) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Rename concurrency to target\_partitions [\#706](https://github.com/apache/arrow-datafusion/pull/706) ([andygrove](https://github.com/andygrove)) - -**Implemented enhancements:** - -- Add booleans support to the `CASE` statement [\#1156](https://github.com/apache/arrow-datafusion/issues/1156) -- Implement General Purpose Constant Folding with the Expression Evaluator [\#1070](https://github.com/apache/arrow-datafusion/issues/1070) -- Mark volatility categories of functions [\#1069](https://github.com/apache/arrow-datafusion/issues/1069) -- Add "show" support to DataFrame API [\#937](https://github.com/apache/arrow-datafusion/issues/937) -- Add support for TRIM BOTH/LEADING/TRAILING [\#935](https://github.com/apache/arrow-datafusion/issues/935) -- Add "baseline" metrics to all built in operators [\#866](https://github.com/apache/arrow-datafusion/issues/866) -- Add SQL support for referencing fields in structs [\#119](https://github.com/apache/arrow-datafusion/issues/119) -- add filename completer for create table statement [\#1278](https://github.com/apache/arrow-datafusion/pull/1278) ([Jimexist](https://github.com/Jimexist)) -- Add drop table support [\#1266](https://github.com/apache/arrow-datafusion/pull/1266) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([viirya](https://github.com/viirya)) -- Dataframe supports except and update readme [\#1261](https://github.com/apache/arrow-datafusion/pull/1261) ([xudong963](https://github.com/xudong963)) -- Implement EXCEPT & EXCEPT DISTINCT [\#1259](https://github.com/apache/arrow-datafusion/pull/1259) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Add DataFrame support for `INTERSECT` and update readme [\#1258](https://github.com/apache/arrow-datafusion/pull/1258) ([xudong963](https://github.com/xudong963)) -- use arrow 6.1.0 [\#1255](https://github.com/apache/arrow-datafusion/pull/1255) ([Jimexist](https://github.com/Jimexist)) -- fix 1250, add editor support for datafusion cli with validation [\#1251](https://github.com/apache/arrow-datafusion/pull/1251) ([Jimexist](https://github.com/Jimexist)) -- Add support for `create table as` via MemTable [\#1243](https://github.com/apache/arrow-datafusion/pull/1243) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) -- Add cli show columns command to describe tables [\#1231](https://github.com/apache/arrow-datafusion/pull/1231) ([Jimexist](https://github.com/Jimexist)) -- datafusion-cli to add list table command [\#1229](https://github.com/apache/arrow-datafusion/pull/1229) ([Jimexist](https://github.com/Jimexist)) -- datafusion cli to handle EoF and interrupt signal [\#1225](https://github.com/apache/arrow-datafusion/pull/1225) ([Jimexist](https://github.com/Jimexist)) -- add \q as quit command and add \? for help [\#1224](https://github.com/apache/arrow-datafusion/pull/1224) ([Jimexist](https://github.com/Jimexist)) -- Add algebraic simplifications to constant\_folding [\#1208](https://github.com/apache/arrow-datafusion/pull/1208) ([matthewmturner](https://github.com/matthewmturner)) -- Improve GetIndexedFieldExpr adding utf8 key based access for struct v… [\#1204](https://github.com/apache/arrow-datafusion/pull/1204) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Igosuki](https://github.com/Igosuki)) -- Fix `between` in select query [\#1202](https://github.com/apache/arrow-datafusion/pull/1202) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([capkurmagati](https://github.com/capkurmagati)) -- Move code to fold Stable functions like `now()` from `Simplifier` to `ConstEvaluator` [\#1176](https://github.com/apache/arrow-datafusion/pull/1176) ([alamb](https://github.com/alamb)) -- DataFrame supports window function [\#1167](https://github.com/apache/arrow-datafusion/pull/1167) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- add values list expression [\#1165](https://github.com/apache/arrow-datafusion/pull/1165) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) -- Add booleans support to the CASE statement [\#1161](https://github.com/apache/arrow-datafusion/pull/1161) ([xudong963](https://github.com/xudong963)) -- Improve error messages when operations are not supported [\#1158](https://github.com/apache/arrow-datafusion/pull/1158) ([alamb](https://github.com/alamb)) -- Generic constant expression evaluation [\#1153](https://github.com/apache/arrow-datafusion/pull/1153) ([alamb](https://github.com/alamb)) -- python `lit` function to support bool and byte vec [\#1152](https://github.com/apache/arrow-datafusion/pull/1152) ([Jimexist](https://github.com/Jimexist)) -- \[nit\] simplify datafusion optimizer module codes [\#1146](https://github.com/apache/arrow-datafusion/pull/1146) ([panarch](https://github.com/panarch)) -- Add ScalarValue support for arbitrary list elements [\#1142](https://github.com/apache/arrow-datafusion/pull/1142) ([jonmmease](https://github.com/jonmmease)) -- Multiple files per partitions for CSV Avro Json [\#1138](https://github.com/apache/arrow-datafusion/pull/1138) ([rdettai](https://github.com/rdettai)) -- Implement INTERSECT & INTERSECT DISTINCT [\#1135](https://github.com/apache/arrow-datafusion/pull/1135) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- Simplify file struct abstractions [\#1120](https://github.com/apache/arrow-datafusion/pull/1120) ([rdettai](https://github.com/rdettai)) -- Implement `is [not] distinct from` [\#1117](https://github.com/apache/arrow-datafusion/pull/1117) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) -- Clean up spawned task on drop for `RepartitionExec`, `SortPreservingMergeExec`, `WindowAggExec` [\#1112](https://github.com/apache/arrow-datafusion/pull/1112) ([crepererum](https://github.com/crepererum)) -- add hyperloglog implementation \(`add` and `count`\) [\#1095](https://github.com/apache/arrow-datafusion/pull/1095) ([Jimexist](https://github.com/Jimexist)) -- Add ScalarValue::Struct variant [\#1091](https://github.com/apache/arrow-datafusion/pull/1091) ([jonmmease](https://github.com/jonmmease)) -- add digest\(utf8, method\) function and refactor all current hash digest functions [\#1090](https://github.com/apache/arrow-datafusion/pull/1090) ([Jimexist](https://github.com/Jimexist)) -- \[crypto\] add `blake3` algorithm to `digest` function [\#1086](https://github.com/apache/arrow-datafusion/pull/1086) ([Jimexist](https://github.com/Jimexist)) -- \[crypto\] add blake2b and blake2s functions [\#1081](https://github.com/apache/arrow-datafusion/pull/1081) ([Jimexist](https://github.com/Jimexist)) -- \[nit\] make schema qualifier error message in field lookup more readable [\#1079](https://github.com/apache/arrow-datafusion/pull/1079) ([Jimexist](https://github.com/Jimexist)) -- \[window function\] add `percent_rank` window function [\#1077](https://github.com/apache/arrow-datafusion/pull/1077) ([Jimexist](https://github.com/Jimexist)) -- \[window function\] add `cume_dist` implementation [\#1076](https://github.com/apache/arrow-datafusion/pull/1076) ([Jimexist](https://github.com/Jimexist)) -- Add a LogicalPlanBuilder::schema\(\) function [\#1075](https://github.com/apache/arrow-datafusion/pull/1075) ([alamb](https://github.com/alamb)) -- Add support for UNION \[DISTINCT\] sql [\#1068](https://github.com/apache/arrow-datafusion/pull/1068) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- fix: fix joins on Float32/Float64 columns bug [\#1054](https://github.com/apache/arrow-datafusion/pull/1054) ([francis-du](https://github.com/francis-du)) -- Update sqlparser-rs to 0.11 [\#1052](https://github.com/apache/arrow-datafusion/pull/1052) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) -- Support querying CSV files without providing the schema [\#1050](https://github.com/apache/arrow-datafusion/pull/1050) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- remove hard coded partition count in ballista logicalplan deserialization [\#1044](https://github.com/apache/arrow-datafusion/pull/1044) ([xudong963](https://github.com/xudong963)) -- feat: add lit\_timestamp\_nanosecond [\#1030](https://github.com/apache/arrow-datafusion/pull/1030) ([NGA-TRAN](https://github.com/NGA-TRAN)) -- Ignore metadata on schema merge [\#1024](https://github.com/apache/arrow-datafusion/pull/1024) ([Smurphy000](https://github.com/Smurphy000)) -- add ExecutionConfig.with\_optimizer\_rules [\#1022](https://github.com/apache/arrow-datafusion/pull/1022) ([seddonm1](https://github.com/seddonm1)) -- Add baseline execution stats to `WindowAggExec` and `UnionExec`, and fixup `CoalescePartitionsExec` [\#1018](https://github.com/apache/arrow-datafusion/pull/1018) ([alamb](https://github.com/alamb)) -- Derive PartialOrd for Expr [\#1015](https://github.com/apache/arrow-datafusion/pull/1015) ([alamb](https://github.com/alamb)) -- Indexed field access for List [\#1006](https://github.com/apache/arrow-datafusion/pull/1006) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Igosuki](https://github.com/Igosuki)) -- Add metrics for Limit and Projection, and CoalesceBatches [\#1004](https://github.com/apache/arrow-datafusion/pull/1004) ([alamb](https://github.com/alamb)) -- Update DataFusion to arrow 6.0 [\#984](https://github.com/apache/arrow-datafusion/pull/984) ([alamb](https://github.com/alamb)) -- Implement Display for Expr, improve operator display [\#971](https://github.com/apache/arrow-datafusion/pull/971) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) -- Add metrics for FilterExec [\#960](https://github.com/apache/arrow-datafusion/pull/960) ([alamb](https://github.com/alamb)) -- Change compound column field name rules [\#952](https://github.com/apache/arrow-datafusion/pull/952) ([waynexia](https://github.com/waynexia)) -- ObjectStore API to read from remote storage systems [\#950](https://github.com/apache/arrow-datafusion/pull/950) ([yjshen](https://github.com/yjshen)) -- Add baseline metrics to `SortPreservingMergeExec` [\#948](https://github.com/apache/arrow-datafusion/pull/948) ([alamb](https://github.com/alamb)) -- Add support for TRIM LEADING/TRAILING/BOTH syntax [\#947](https://github.com/apache/arrow-datafusion/pull/947) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([adsharma](https://github.com/adsharma)) -- fixes \#933 replace placeholder fmt\_as fr ExecutionPlan impls [\#939](https://github.com/apache/arrow-datafusion/pull/939) ([tiphaineruy](https://github.com/tiphaineruy)) -- Add metrics for SortExect + HashAggregateExec [\#938](https://github.com/apache/arrow-datafusion/pull/938) ([alamb](https://github.com/alamb)) -- Add some additional asserts in `utils::from_plan` [\#930](https://github.com/apache/arrow-datafusion/pull/930) ([alamb](https://github.com/alamb)) -- Avro Table Provider [\#910](https://github.com/apache/arrow-datafusion/pull/910) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Igosuki](https://github.com/Igosuki)) -- Add BaselineMetrics, Timestamp metrics, add for `CoalescePartitionsExec`, rename output\_time -\> elapsed\_compute [\#909](https://github.com/apache/arrow-datafusion/pull/909) ([alamb](https://github.com/alamb)) -- add cross join support to ballista [\#891](https://github.com/apache/arrow-datafusion/pull/891) ([houqp](https://github.com/houqp)) -- Add Ballista support to DataFusion CLI [\#889](https://github.com/apache/arrow-datafusion/pull/889) ([andygrove](https://github.com/andygrove)) -- support like on DictionaryArray [\#876](https://github.com/apache/arrow-datafusion/pull/876) ([b41sh](https://github.com/b41sh)) -- Register table based on known schema without file IO [\#872](https://github.com/apache/arrow-datafusion/pull/872) ([Dandandan](https://github.com/Dandandan)) -- Add support for PostgreSQL regex match [\#870](https://github.com/apache/arrow-datafusion/pull/870) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) -- Include planning time in datafusion-cli printing [\#860](https://github.com/apache/arrow-datafusion/pull/860) ([Dandandan](https://github.com/Dandandan)) -- Implement basic common subexpression eliminate optimization [\#792](https://github.com/apache/arrow-datafusion/pull/792) ([waynexia](https://github.com/waynexia)) -- Impl `ops::Not` for `expr` [\#763](https://github.com/apache/arrow-datafusion/pull/763) ([Jimexist](https://github.com/Jimexist)) - -**Fixed bugs:** - -- Can not use `between` in the select list: [\#1196](https://github.com/apache/arrow-datafusion/issues/1196) -- ORDER BY does not work with literals: Sort operation is not applicable to scalar value 'foo' [\#1195](https://github.com/apache/arrow-datafusion/issues/1195) -- window functions with NULL literals in `partition by` and `order by` do not work: Internal\("Sort operation is not applicable to scalar value NULL"\) [\#1194](https://github.com/apache/arrow-datafusion/issues/1194) -- Operation name not included in internal errors -- Internal\("Data type Boolean not supported for binary operation on dyn arrays"\) [\#1157](https://github.com/apache/arrow-datafusion/issues/1157) -- Physical plan explain UNION query says "ExecutionPlan\(PlaceHolder\)" [\#933](https://github.com/apache/arrow-datafusion/issues/933) -- Can not use LIKE on DictionaryArray encoded strings [\#815](https://github.com/apache/arrow-datafusion/issues/815) -- physical\_plan::repartition::tests::repartition\_with\_dropping\_output\_stream failing locally [\#614](https://github.com/apache/arrow-datafusion/issues/614) -- Fix some `BuiltinScalarFunction` panics with zero arguments [\#1249](https://github.com/apache/arrow-datafusion/pull/1249) ([capkurmagati](https://github.com/capkurmagati)) -- fix: not do boolean folding on NULL and/or expr [\#1245](https://github.com/apache/arrow-datafusion/pull/1245) ([NGA-TRAN](https://github.com/NGA-TRAN)) -- ignore case of `with header row` in sql when creating external table [\#1237](https://github.com/apache/arrow-datafusion/pull/1237) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([lichuan6](https://github.com/lichuan6)) -- fix: Min/Max aggregation data type should not be dictionary [\#1235](https://github.com/apache/arrow-datafusion/pull/1235) ([NGA-TRAN](https://github.com/NGA-TRAN)) -- Fix build with `--no-default-features` [\#1219](https://github.com/apache/arrow-datafusion/pull/1219) ([alamb](https://github.com/alamb)) -- Prevent "future cannot be sent between threads safely" compilation error [\#1155](https://github.com/apache/arrow-datafusion/pull/1155) ([jonmmease](https://github.com/jonmmease)) -- Clean up spawned task on drop for `AnalyzeExec`, `CoalescePartitionsExec`, `HashAggregateExec` [\#1121](https://github.com/apache/arrow-datafusion/pull/1121) ([crepererum](https://github.com/crepererum)) -- Clean up spawned task on `SortStream` drop [\#1105](https://github.com/apache/arrow-datafusion/pull/1105) ([crepererum](https://github.com/crepererum)) -- fix UNION ALL bug: thread 'main' panicked at 'index out of bounds: the len is 1 but the index is 1', ./src/datatypes/schema.rs:165:10 [\#1088](https://github.com/apache/arrow-datafusion/pull/1088) ([xudong963](https://github.com/xudong963)) -- python: fix generated table name in dataframe creation [\#1078](https://github.com/apache/arrow-datafusion/pull/1078) ([houqp](https://github.com/houqp)) -- fix subquery alias [\#1067](https://github.com/apache/arrow-datafusion/pull/1067) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) -- fix pattern handling in regexp\_match function [\#1065](https://github.com/apache/arrow-datafusion/pull/1065) ([houqp](https://github.com/houqp)) -- fix: joins on Timestamp columns [\#1055](https://github.com/apache/arrow-datafusion/pull/1055) ([francis-du](https://github.com/francis-du)) -- Fix metric name typo [\#943](https://github.com/apache/arrow-datafusion/pull/943) ([alamb](https://github.com/alamb)) -- EXPLAIN ANALYZE should run all Optimizer passes [\#929](https://github.com/apache/arrow-datafusion/pull/929) ([alamb](https://github.com/alamb)) - -**Documentation updates:** - -- update docs to fix DataFusion User Guide link [\#1238](https://github.com/apache/arrow-datafusion/pull/1238) ([jiangzhx](https://github.com/jiangzhx)) -- \[docs\] datafusion cli run via homebrew [\#1198](https://github.com/apache/arrow-datafusion/pull/1198) ([Jimexist](https://github.com/Jimexist)) -- add support for unary and binary values in values list, update docs [\#1172](https://github.com/apache/arrow-datafusion/pull/1172) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) -- Add additional docstring comments to `from_plan` [\#1168](https://github.com/apache/arrow-datafusion/pull/1168) ([alamb](https://github.com/alamb)) -- \[nit\] fix document issue for `approx_distinct` [\#1110](https://github.com/apache/arrow-datafusion/pull/1110) ([Jimexist](https://github.com/Jimexist)) -- implement `approx_distinct` function using HyperLogLog [\#1087](https://github.com/apache/arrow-datafusion/pull/1087) ([Jimexist](https://github.com/Jimexist)) -- Remove unused `use` statements from examples [\#1032](https://github.com/apache/arrow-datafusion/pull/1032) ([alamb](https://github.com/alamb)) -- consolidate datafusion docs with sphinx [\#993](https://github.com/apache/arrow-datafusion/pull/993) ([houqp](https://github.com/houqp)) -- Updated user-guide library docs with optimized config [\#976](https://github.com/apache/arrow-datafusion/pull/976) ([matthewmturner](https://github.com/matthewmturner)) -- Improve User Guide [\#954](https://github.com/apache/arrow-datafusion/pull/954) ([andygrove](https://github.com/andygrove)) -- \[MINOR\] Fix typos in doc comments [\#945](https://github.com/apache/arrow-datafusion/pull/945) ([alamb](https://github.com/alamb)) -- \[DataFusion\] - Add show and show\_limit function for DataFrame [\#923](https://github.com/apache/arrow-datafusion/pull/923) ([francis-du](https://github.com/francis-du)) -- Typo fix in DataFusion crate documentation [\#914](https://github.com/apache/arrow-datafusion/pull/914) ([antoinewdg](https://github.com/antoinewdg)) - -**Performance improvements:** - -- Improve avro reader performance by avoiding some cloning on avro\_rs::Value [\#1206](https://github.com/apache/arrow-datafusion/pull/1206) ([Igosuki](https://github.com/Igosuki)) -- optimize build profile for datafusion python binding, cli and ballista [\#1137](https://github.com/apache/arrow-datafusion/pull/1137) ([houqp](https://github.com/houqp)) -- Avoid stack overflow by reducing stack usage of `BinaryExpr::evaluate` in debug builds [\#1047](https://github.com/apache/arrow-datafusion/pull/1047) ([alamb](https://github.com/alamb)) -- Add ScalarValue::eq\_array optimized comparison function [\#844](https://github.com/apache/arrow-datafusion/pull/844) ([alamb](https://github.com/alamb)) -- Rework GroupByHash to for faster performance and support grouping by nulls [\#808](https://github.com/apache/arrow-datafusion/pull/808) ([alamb](https://github.com/alamb)) - -**Closed issues:** - -- InList expr with NULL literals do not work [\#1190](https://github.com/apache/arrow-datafusion/issues/1190) -- update the homepage README to include values, `approx_distinct`, etc. [\#1171](https://github.com/apache/arrow-datafusion/issues/1171) -- \[Python\]: Inconsistencies with Python package name [\#1011](https://github.com/apache/arrow-datafusion/issues/1011) -- Wanting to contribute to project where to start? [\#983](https://github.com/apache/arrow-datafusion/issues/983) -- delete redundant code [\#973](https://github.com/apache/arrow-datafusion/issues/973) -- How to build DataFusion python wheel [\#853](https://github.com/apache/arrow-datafusion/issues/853) -- Add support for partition pruning [\#204](https://github.com/apache/arrow-datafusion/issues/204) -- \[Datafusion\] Support joins on TimestampMillisecond columns [\#187](https://github.com/apache/arrow-datafusion/issues/187) -- TPC-H Query 21 [\#173](https://github.com/apache/arrow-datafusion/issues/173) -- TPC-H Query 13 [\#164](https://github.com/apache/arrow-datafusion/issues/164) -- TPC-H Query 8 [\#162](https://github.com/apache/arrow-datafusion/issues/162) -- implement split\_part\(string, delimiter, position\) [\#157](https://github.com/apache/arrow-datafusion/issues/157) -- Join Statement: Schema contains duplicate unqualified field name [\#155](https://github.com/apache/arrow-datafusion/issues/155) -- ParquetTable should avoid scanning all files twice [\#136](https://github.com/apache/arrow-datafusion/issues/136) -- Add support for reading partitioned Parquet files [\#133](https://github.com/apache/arrow-datafusion/issues/133) -- Add support for Parquet schema merging [\#132](https://github.com/apache/arrow-datafusion/issues/132) -- Catalog abstraction [\#126](https://github.com/apache/arrow-datafusion/issues/126) -- Optimizer rules should work with qualified column names [\#125](https://github.com/apache/arrow-datafusion/issues/125) -- Add optional qualifier to Expr::Column [\#121](https://github.com/apache/arrow-datafusion/issues/121) -- Implement modulus expression [\#99](https://github.com/apache/arrow-datafusion/issues/99) -- \[Rust\] Add constant folding to expressions during logically planning [\#98](https://github.com/apache/arrow-datafusion/issues/98) -- \[Rust\] Implement pretty print for physical query plan [\#93](https://github.com/apache/arrow-datafusion/issues/93) -- Can not group by boolean columns \(add boolean to valid keys of groupBy\) [\#91](https://github.com/apache/arrow-datafusion/issues/91) -- improve performance of building literal arrays [\#90](https://github.com/apache/arrow-datafusion/issues/90) -- \[rust\]\[datafusion\] optimize count\(\*\) queries on parquet sources [\#89](https://github.com/apache/arrow-datafusion/issues/89) -- Produce a design for a metrics framework [\#21](https://github.com/apache/arrow-datafusion/issues/21) - -**Merged pull requests:** - -- Add timezome string to stablize test [\#1265](https://github.com/apache/arrow-datafusion/pull/1265) ([viirya](https://github.com/viirya)) -- numerical\_coercion pattern match optimize [\#1256](https://github.com/apache/arrow-datafusion/pull/1256) ([Jimexist](https://github.com/Jimexist)) -- fix and update window function sql tests [\#1059](https://github.com/apache/arrow-datafusion/pull/1059) ([Jimexist](https://github.com/Jimexist)) -- reduce ScalarValue from trait boilerplate with macro [\#989](https://github.com/apache/arrow-datafusion/pull/989) ([houqp](https://github.com/houqp)) - - -For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md) - -## [5.0.0](https://github.com/apache/arrow-datafusion/tree/5.0.0) (2021-08-10) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/4.0.0...5.0.0) - -**Breaking changes:** - -- Box ScalarValue:Lists, reduce size by half size [\#788](https://github.com/apache/arrow-datafusion/pull/788) ([alamb](https://github.com/alamb)) -- JOIN conditions are order dependent [\#778](https://github.com/apache/arrow-datafusion/pull/778) ([seddonm1](https://github.com/seddonm1)) -- Show the result of all optimizer passes in EXPLAIN VERBOSE [\#759](https://github.com/apache/arrow-datafusion/pull/759) ([alamb](https://github.com/alamb)) -- \#723 Datafusion add option in ExecutionConfig to enable/disable parquet pruning [\#749](https://github.com/apache/arrow-datafusion/pull/749) ([lvheyang](https://github.com/lvheyang)) -- Update API for extension planning to include logical plan [\#643](https://github.com/apache/arrow-datafusion/pull/643) ([alamb](https://github.com/alamb)) -- Rename MergeExec to CoalescePartitionsExec [\#635](https://github.com/apache/arrow-datafusion/pull/635) ([andygrove](https://github.com/andygrove)) -- fix 593, reduce cloning by taking ownership in logical planner's `from` fn [\#610](https://github.com/apache/arrow-datafusion/pull/610) ([Jimexist](https://github.com/Jimexist)) -- fix join column handling logic for `On` and `Using` constraints [\#605](https://github.com/apache/arrow-datafusion/pull/605) ([houqp](https://github.com/houqp)) -- Rewrite pruning logic in terms of PruningStatistics using Array trait \(option 2\) [\#426](https://github.com/apache/arrow-datafusion/pull/426) ([alamb](https://github.com/alamb)) -- Support reading from NdJson formatted data sources [\#404](https://github.com/apache/arrow-datafusion/pull/404) ([heymind](https://github.com/heymind)) -- Add metrics to RepartitionExec [\#398](https://github.com/apache/arrow-datafusion/pull/398) ([andygrove](https://github.com/andygrove)) -- Use 4.x arrow-rs from crates.io rather than git sha [\#395](https://github.com/apache/arrow-datafusion/pull/395) ([alamb](https://github.com/alamb)) -- Return Vec\ from PredicateBuilder rather than an `Fn` [\#370](https://github.com/apache/arrow-datafusion/pull/370) ([alamb](https://github.com/alamb)) -- Refactor: move RowGroupPredicateBuilder into its own module, rename to PruningPredicateBuilder [\#365](https://github.com/apache/arrow-datafusion/pull/365) ([alamb](https://github.com/alamb)) -- \[Datafusion\] NOW\(\) function support [\#288](https://github.com/apache/arrow-datafusion/pull/288) ([msathis](https://github.com/msathis)) -- Implement select distinct [\#262](https://github.com/apache/arrow-datafusion/pull/262) ([Dandandan](https://github.com/Dandandan)) -- Refactor datafusion/src/physical\_plan/common.rs build\_file\_list to take less param and reuse code [\#253](https://github.com/apache/arrow-datafusion/pull/253) ([Jimexist](https://github.com/Jimexist)) -- Support qualified columns in queries [\#55](https://github.com/apache/arrow-datafusion/pull/55) ([houqp](https://github.com/houqp)) -- Read CSV format text from stdin or memory [\#54](https://github.com/apache/arrow-datafusion/pull/54) ([heymind](https://github.com/heymind)) -- Use atomics for SQLMetric implementation, remove unused name field [\#25](https://github.com/apache/arrow-datafusion/pull/25) ([returnString](https://github.com/returnString)) - -**Implemented enhancements:** - -- Allow extension nodes to correctly plan physical expressions with relations [\#642](https://github.com/apache/arrow-datafusion/issues/642) -- Filters aren't passed down to table scans in a union [\#557](https://github.com/apache/arrow-datafusion/issues/557) -- Support pruning for `boolean` columns [\#490](https://github.com/apache/arrow-datafusion/issues/490) -- Implement SQLMetrics for RepartitionExec [\#397](https://github.com/apache/arrow-datafusion/issues/397) -- DataFusion benchmarks should show executed plan with metrics after query completes [\#396](https://github.com/apache/arrow-datafusion/issues/396) -- Use published versions of arrow rather than github shas [\#393](https://github.com/apache/arrow-datafusion/issues/393) -- Add Compare to GroupByScalar [\#364](https://github.com/apache/arrow-datafusion/issues/364) -- Reusable "row group pruning" logic [\#363](https://github.com/apache/arrow-datafusion/issues/363) -- Add an Order Preserving merge operator [\#362](https://github.com/apache/arrow-datafusion/issues/362) -- Implement Postgres compatible `now()` function [\#251](https://github.com/apache/arrow-datafusion/issues/251) -- COUNT DISTINCT does not support dictionary types [\#249](https://github.com/apache/arrow-datafusion/issues/249) -- Use standard make\_null\_array for CASE [\#222](https://github.com/apache/arrow-datafusion/issues/222) -- Implement date\_trunc\(\) function [\#203](https://github.com/apache/arrow-datafusion/issues/203) -- COUNT DISTINCT does not support for `Float64` [\#199](https://github.com/apache/arrow-datafusion/issues/199) -- Update SQLMetric to use atomics rather than a Mutex [\#30](https://github.com/apache/arrow-datafusion/issues/30) -- Implement PartialOrd for ScalarValue [\#838](https://github.com/apache/arrow-datafusion/pull/838) ([viirya](https://github.com/viirya)) -- Support date datatypes in max/min [\#820](https://github.com/apache/arrow-datafusion/pull/820) ([viirya](https://github.com/viirya)) -- Implement vectorized hashing for DictionaryArray types [\#812](https://github.com/apache/arrow-datafusion/pull/812) ([alamb](https://github.com/alamb)) -- Convert unsupported conditions in left right join to filters [\#796](https://github.com/apache/arrow-datafusion/pull/796) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) -- Implement streaming versions of Dataframe.collect methods [\#789](https://github.com/apache/arrow-datafusion/pull/789) ([andygrove](https://github.com/andygrove)) -- impl from str for column and scalar [\#762](https://github.com/apache/arrow-datafusion/pull/762) ([Jimexist](https://github.com/Jimexist)) -- impl fmt::Display for PlanType [\#752](https://github.com/apache/arrow-datafusion/pull/752) ([Jimexist](https://github.com/Jimexist)) -- Remove unnecessary projection in logical plan optimization phase [\#747](https://github.com/apache/arrow-datafusion/pull/747) ([waynexia](https://github.com/waynexia)) -- Support table columns alias [\#735](https://github.com/apache/arrow-datafusion/pull/735) ([Dandandan](https://github.com/Dandandan)) -- Derive PartialEq for datasource enums [\#734](https://github.com/apache/arrow-datafusion/pull/734) ([alamb](https://github.com/alamb)) -- Allow filetype to be lowercase, Implement FromStr for FileType [\#728](https://github.com/apache/arrow-datafusion/pull/728) ([Jimexist](https://github.com/Jimexist)) -- Update to use arrow 5.0 [\#721](https://github.com/apache/arrow-datafusion/pull/721) ([alamb](https://github.com/alamb)) -- \#554: Lead/lag window function with offset and default value arguments [\#687](https://github.com/apache/arrow-datafusion/pull/687) ([jgoday](https://github.com/jgoday)) -- dedup using join column in wildcard expansion [\#678](https://github.com/apache/arrow-datafusion/pull/678) ([houqp](https://github.com/houqp)) -- Implement metrics for HashJoinExec [\#664](https://github.com/apache/arrow-datafusion/pull/664) ([andygrove](https://github.com/andygrove)) -- Show physical plan with metrics in benchmark [\#662](https://github.com/apache/arrow-datafusion/pull/662) ([andygrove](https://github.com/andygrove)) -- Allow non-equijoin filters in join condition [\#660](https://github.com/apache/arrow-datafusion/pull/660) ([Dandandan](https://github.com/Dandandan)) -- Add End-to-end test for parquet pruning + metrics for ParquetExec [\#657](https://github.com/apache/arrow-datafusion/pull/657) ([alamb](https://github.com/alamb)) -- Add support for leading field in interval [\#647](https://github.com/apache/arrow-datafusion/pull/647) ([Dandandan](https://github.com/Dandandan)) -- Remove hard-coded PartitionMode from Ballista serde [\#637](https://github.com/apache/arrow-datafusion/pull/637) ([andygrove](https://github.com/andygrove)) -- Ballista: Implement scalable distributed joins [\#634](https://github.com/apache/arrow-datafusion/pull/634) ([andygrove](https://github.com/andygrove)) -- implement rank and dense\_rank function and refactor built-in window function evaluation [\#631](https://github.com/apache/arrow-datafusion/pull/631) ([Jimexist](https://github.com/Jimexist)) -- Improve "field not found" error messages [\#625](https://github.com/apache/arrow-datafusion/pull/625) ([andygrove](https://github.com/andygrove)) -- Support modulus op [\#577](https://github.com/apache/arrow-datafusion/pull/577) ([gangliao](https://github.com/gangliao)) -- implement `std::default::Default` for execution config [\#570](https://github.com/apache/arrow-datafusion/pull/570) ([Jimexist](https://github.com/Jimexist)) -- `to_timestamp_millis()`, `to_timestamp_micros()`, `to_timestamp_seconds()` [\#567](https://github.com/apache/arrow-datafusion/pull/567) ([velvia](https://github.com/velvia)) -- Filter push down for Union [\#559](https://github.com/apache/arrow-datafusion/pull/559) ([Dandandan](https://github.com/Dandandan)) -- Implement window functions with `partition_by` clause [\#558](https://github.com/apache/arrow-datafusion/pull/558) ([Jimexist](https://github.com/Jimexist)) -- support table alias in join clause [\#547](https://github.com/apache/arrow-datafusion/pull/547) ([houqp](https://github.com/houqp)) -- Not equal predicate in physical\_planning pruning [\#544](https://github.com/apache/arrow-datafusion/pull/544) ([jgoday](https://github.com/jgoday)) -- add error handling and boundary checking for window frames [\#530](https://github.com/apache/arrow-datafusion/pull/530) ([Jimexist](https://github.com/Jimexist)) -- Implement window functions with `order_by` clause [\#520](https://github.com/apache/arrow-datafusion/pull/520) ([Jimexist](https://github.com/Jimexist)) -- support group by column positions [\#519](https://github.com/apache/arrow-datafusion/pull/519) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jychen7](https://github.com/jychen7)) -- Implement constant folding for CAST [\#513](https://github.com/apache/arrow-datafusion/pull/513) ([msathis](https://github.com/msathis)) -- Add window frame constructs - alternative [\#506](https://github.com/apache/arrow-datafusion/pull/506) ([Jimexist](https://github.com/Jimexist)) -- Add `partition by` constructs in window functions and modify logical planning [\#501](https://github.com/apache/arrow-datafusion/pull/501) ([Jimexist](https://github.com/Jimexist)) -- Add support for boolean columns in pruning logic [\#500](https://github.com/apache/arrow-datafusion/pull/500) ([alamb](https://github.com/alamb)) -- \#215 resolve aliases for group by exprs [\#485](https://github.com/apache/arrow-datafusion/pull/485) ([jychen7](https://github.com/jychen7)) -- Support anti join [\#482](https://github.com/apache/arrow-datafusion/pull/482) ([Dandandan](https://github.com/Dandandan)) -- Support semi join [\#470](https://github.com/apache/arrow-datafusion/pull/470) ([Dandandan](https://github.com/Dandandan)) -- add `order by` construct in window function and logical plans [\#463](https://github.com/apache/arrow-datafusion/pull/463) ([Jimexist](https://github.com/Jimexist)) -- Remove reundant filters \(e.g. c\> 5 AND c\>5 --\> c\>5\) [\#436](https://github.com/apache/arrow-datafusion/pull/436) ([jgoday](https://github.com/jgoday)) -- fix: display the content of debug explain [\#434](https://github.com/apache/arrow-datafusion/pull/434) ([NGA-TRAN](https://github.com/NGA-TRAN)) -- implement lead and lag built-in window function [\#429](https://github.com/apache/arrow-datafusion/pull/429) ([Jimexist](https://github.com/Jimexist)) -- add support for ndjson for datafusion-cli [\#427](https://github.com/apache/arrow-datafusion/pull/427) ([Jimexist](https://github.com/Jimexist)) -- add `first_value`, `last_value`, and `nth_value` built-in window functions [\#403](https://github.com/apache/arrow-datafusion/pull/403) ([Jimexist](https://github.com/Jimexist)) -- export both `now` and `random` functions [\#389](https://github.com/apache/arrow-datafusion/pull/389) ([Jimexist](https://github.com/Jimexist)) -- Function to create `ArrayRef` from an iterator of ScalarValues [\#381](https://github.com/apache/arrow-datafusion/pull/381) ([alamb](https://github.com/alamb)) -- Sort preserving merge \(\#362\) [\#379](https://github.com/apache/arrow-datafusion/pull/379) ([tustvold](https://github.com/tustvold)) -- Add support for multiple partitions with SortExec \(\#362\) [\#378](https://github.com/apache/arrow-datafusion/pull/378) ([tustvold](https://github.com/tustvold)) -- add window expression stream, delegated window aggregation to aggregate functions, and implement `row_number` [\#375](https://github.com/apache/arrow-datafusion/pull/375) ([Jimexist](https://github.com/Jimexist)) -- Add PartialOrd and Ord to GroupByScalar \(\#364\) [\#368](https://github.com/apache/arrow-datafusion/pull/368) ([tustvold](https://github.com/tustvold)) -- Implement readable explain plans for physical plans [\#337](https://github.com/apache/arrow-datafusion/pull/337) ([alamb](https://github.com/alamb)) -- Add window expression part 1 - logical and physical planning, structure, to/from proto, and explain, for empty over clause only [\#334](https://github.com/apache/arrow-datafusion/pull/334) ([Jimexist](https://github.com/Jimexist)) -- Use NullArray to Pass row count to ScalarFunctions that take 0 arguments [\#328](https://github.com/apache/arrow-datafusion/pull/328) ([Jimexist](https://github.com/Jimexist)) -- add --quiet/-q flag and allow timing info to be turned on/off [\#323](https://github.com/apache/arrow-datafusion/pull/323) ([Jimexist](https://github.com/Jimexist)) -- Implement hash partitioned aggregation [\#320](https://github.com/apache/arrow-datafusion/pull/320) ([Dandandan](https://github.com/Dandandan)) -- Support COUNT\(DISTINCT timestamps\) [\#319](https://github.com/apache/arrow-datafusion/pull/319) ([charlibot](https://github.com/charlibot)) -- add random SQL function [\#303](https://github.com/apache/arrow-datafusion/pull/303) ([Jimexist](https://github.com/Jimexist)) -- allow datafusion cli to take -- comments [\#296](https://github.com/apache/arrow-datafusion/pull/296) ([Jimexist](https://github.com/Jimexist)) -- Add json print format mode to datafusion cli [\#295](https://github.com/apache/arrow-datafusion/pull/295) ([Jimexist](https://github.com/Jimexist)) -- Add print format param with support for tsv print format to datafusion cli [\#292](https://github.com/apache/arrow-datafusion/pull/292) ([Jimexist](https://github.com/Jimexist)) -- Add print format param and support for csv print format to datafusion cli [\#289](https://github.com/apache/arrow-datafusion/pull/289) ([Jimexist](https://github.com/Jimexist)) -- allow datafusion-cli to take a file param [\#285](https://github.com/apache/arrow-datafusion/pull/285) ([Jimexist](https://github.com/Jimexist)) -- add param validation for datafusion-cli [\#284](https://github.com/apache/arrow-datafusion/pull/284) ([Jimexist](https://github.com/Jimexist)) -- \[breaking change\] fix 265, log should be log10, and add ln [\#271](https://github.com/apache/arrow-datafusion/pull/271) ([Jimexist](https://github.com/Jimexist)) -- Implement count distinct for dictionary arrays [\#256](https://github.com/apache/arrow-datafusion/pull/256) ([alamb](https://github.com/alamb)) -- Count distinct floats [\#252](https://github.com/apache/arrow-datafusion/pull/252) ([pjmore](https://github.com/pjmore)) -- Add rule to eliminate `LIMIT 0` and replace it with an `EmptyRelation` [\#213](https://github.com/apache/arrow-datafusion/pull/213) ([Dandandan](https://github.com/Dandandan)) -- Allow table providers to indicate their type for catalog metadata [\#205](https://github.com/apache/arrow-datafusion/pull/205) ([returnString](https://github.com/returnString)) -- Use arrow eq kernels in CaseWhen expression evaluation [\#52](https://github.com/apache/arrow-datafusion/pull/52) ([Dandandan](https://github.com/Dandandan)) -- Re-export Arrow and Parquet crates from DataFusion [\#39](https://github.com/apache/arrow-datafusion/pull/39) ([returnString](https://github.com/returnString)) -- \[DataFusion\] Optimize hash join inner workings, null handling fix [\#24](https://github.com/apache/arrow-datafusion/pull/24) ([Dandandan](https://github.com/Dandandan)) -- \[ARROW-12441\] \[DataFusion\] Cross join implementation [\#11](https://github.com/apache/arrow-datafusion/pull/11) ([Dandandan](https://github.com/Dandandan)) - -**Fixed bugs:** - -- Projection pushdown removes unqualified column names even when they are used [\#617](https://github.com/apache/arrow-datafusion/issues/617) -- Panic while running join datatypes/schema.rs:165:10 [\#601](https://github.com/apache/arrow-datafusion/issues/601) -- Indentation is incorrect for joins in formatted physical plans [\#345](https://github.com/apache/arrow-datafusion/issues/345) -- Error while running `COUNT DISTINCT (timestamp)`: 'Unexpected DataType for list [\#314](https://github.com/apache/arrow-datafusion/issues/314) -- When joining two tables, get Error: Plan\("Schema contains duplicate unqualified field name \'xxx\'"\) [\#311](https://github.com/apache/arrow-datafusion/issues/311) -- Incorrect answers with SELECT DISTINCT queries [\#250](https://github.com/apache/arrow-datafusion/issues/250) -- Intermitent failure in CI join\_with\_hash\_collision [\#227](https://github.com/apache/arrow-datafusion/issues/227) -- `Concat` from Dataframe API no longer accepts multiple expressions [\#226](https://github.com/apache/arrow-datafusion/issues/226) -- Fix right, full join handling when having multiple non-matching rows at the left side [\#845](https://github.com/apache/arrow-datafusion/pull/845) ([Dandandan](https://github.com/Dandandan)) -- Qualified field resolution too strict [\#810](https://github.com/apache/arrow-datafusion/pull/810) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([seddonm1](https://github.com/seddonm1)) -- Better join order resolution logic [\#797](https://github.com/apache/arrow-datafusion/pull/797) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([seddonm1](https://github.com/seddonm1)) -- Produce correct answers for Group BY NULL \(Option 1\) [\#793](https://github.com/apache/arrow-datafusion/pull/793) ([alamb](https://github.com/alamb)) -- Use consistent version of string\_to\_timestamp\_nanos in DataFusion [\#767](https://github.com/apache/arrow-datafusion/pull/767) ([alamb](https://github.com/alamb)) -- \#723 limit pruning rule to simple expression [\#764](https://github.com/apache/arrow-datafusion/pull/764) ([lvheyang](https://github.com/lvheyang)) -- \#699 fix return type conflict when calling builtin math fuctions [\#716](https://github.com/apache/arrow-datafusion/pull/716) ([lvheyang](https://github.com/lvheyang)) -- Fix Date32 and Date64 parquet row group pruning [\#690](https://github.com/apache/arrow-datafusion/pull/690) ([alamb](https://github.com/alamb)) -- Remove qualifiers on pushed down predicates / Fix parquet pruning [\#689](https://github.com/apache/arrow-datafusion/pull/689) ([alamb](https://github.com/alamb)) -- use `Weak` ptr to break catalog list \<\> info schema cyclic reference [\#681](https://github.com/apache/arrow-datafusion/pull/681) ([crepererum](https://github.com/crepererum)) -- honor table name for csv/parquet scan in ballista plan serde [\#629](https://github.com/apache/arrow-datafusion/pull/629) ([houqp](https://github.com/houqp)) -- fix 621, where unnamed window functions shall be differentiated by partition and order by clause [\#622](https://github.com/apache/arrow-datafusion/pull/622) ([Jimexist](https://github.com/Jimexist)) -- RFC: Do not prune out unnecessary columns with unqualified references [\#619](https://github.com/apache/arrow-datafusion/pull/619) ([alamb](https://github.com/alamb)) -- \[fix\] select \* on empty table [\#613](https://github.com/apache/arrow-datafusion/pull/613) ([rdettai](https://github.com/rdettai)) -- fix 592, support alias in window functions [\#607](https://github.com/apache/arrow-datafusion/pull/607) ([Jimexist](https://github.com/Jimexist)) -- RepartitionExec should not error if output has hung up [\#576](https://github.com/apache/arrow-datafusion/pull/576) ([alamb](https://github.com/alamb)) -- Fix pruning on not equal predicate [\#561](https://github.com/apache/arrow-datafusion/pull/561) ([alamb](https://github.com/alamb)) -- hash float arrays using primitive usigned integer type [\#556](https://github.com/apache/arrow-datafusion/pull/556) ([houqp](https://github.com/houqp)) -- Return errors properly from RepartitionExec [\#521](https://github.com/apache/arrow-datafusion/pull/521) ([alamb](https://github.com/alamb)) -- refactor sort exec stream and combine batches [\#515](https://github.com/apache/arrow-datafusion/pull/515) ([Jimexist](https://github.com/Jimexist)) -- Fix display of execution time in datafusion-cli [\#514](https://github.com/apache/arrow-datafusion/pull/514) ([Dandandan](https://github.com/Dandandan)) -- Wrong aggregation arguments error. [\#505](https://github.com/apache/arrow-datafusion/pull/505) ([jgoday](https://github.com/jgoday)) -- fix window aggregation with alias and add integration test case [\#454](https://github.com/apache/arrow-datafusion/pull/454) ([Jimexist](https://github.com/Jimexist)) -- fix: don't duplicate existing filters [\#409](https://github.com/apache/arrow-datafusion/pull/409) ([e-dard](https://github.com/e-dard)) -- Fixed incorrect logical type in GroupByScalar. [\#391](https://github.com/apache/arrow-datafusion/pull/391) ([jorgecarleitao](https://github.com/jorgecarleitao)) -- Fix indented display for multi-child nodes [\#358](https://github.com/apache/arrow-datafusion/pull/358) ([alamb](https://github.com/alamb)) -- Fix SQL planner to support multibyte column names [\#357](https://github.com/apache/arrow-datafusion/pull/357) ([agatan](https://github.com/agatan)) -- Fix wrong projection 'optimization' [\#268](https://github.com/apache/arrow-datafusion/pull/268) ([Dandandan](https://github.com/Dandandan)) -- Fix Left join implementation is incorrect for 0 or multiple batches on the right side [\#238](https://github.com/apache/arrow-datafusion/pull/238) ([Dandandan](https://github.com/Dandandan)) -- Count distinct boolean [\#230](https://github.com/apache/arrow-datafusion/pull/230) ([pjmore](https://github.com/pjmore)) -- Fix Filter / where clause without column names is removed in optimization pass [\#225](https://github.com/apache/arrow-datafusion/pull/225) ([Dandandan](https://github.com/Dandandan)) - -**Documentation updates:** - -- No way to get to the examples from docs.rs [\#186](https://github.com/apache/arrow-datafusion/issues/186) -- Update docs to use vendored version of arrow [\#772](https://github.com/apache/arrow-datafusion/pull/772) ([alamb](https://github.com/alamb)) -- Fix typo in DEVELOPERS.md [\#692](https://github.com/apache/arrow-datafusion/pull/692) ([lvheyang](https://github.com/lvheyang)) -- update stale documentations related to window functions [\#598](https://github.com/apache/arrow-datafusion/pull/598) ([Jimexist](https://github.com/Jimexist)) -- update readme to reflect work on window functions [\#471](https://github.com/apache/arrow-datafusion/pull/471) ([Jimexist](https://github.com/Jimexist)) -- Add examples section to datafusion crate doc [\#457](https://github.com/apache/arrow-datafusion/pull/457) ([mluts](https://github.com/mluts)) -- add invariants spec [\#443](https://github.com/apache/arrow-datafusion/pull/443) ([houqp](https://github.com/houqp)) -- add output field name rfc [\#422](https://github.com/apache/arrow-datafusion/pull/422) ([houqp](https://github.com/houqp)) -- Update more docs and also the developer.md doc [\#414](https://github.com/apache/arrow-datafusion/pull/414) ([Jimexist](https://github.com/Jimexist)) -- use prettier to format md files [\#367](https://github.com/apache/arrow-datafusion/pull/367) ([Jimexist](https://github.com/Jimexist)) -- Add new logo svg with white background [\#313](https://github.com/apache/arrow-datafusion/pull/313) ([parthsarthy](https://github.com/parthsarthy)) -- Add projects \(Squirtle and Tensorbase\) to list in readme [\#312](https://github.com/apache/arrow-datafusion/pull/312) ([parthsarthy](https://github.com/parthsarthy)) -- docs - fix the ballista link [\#274](https://github.com/apache/arrow-datafusion/pull/274) ([haoxins](https://github.com/haoxins)) -- misc\(README\): Replace Cube.js with Cube Store [\#248](https://github.com/apache/arrow-datafusion/pull/248) ([ovr](https://github.com/ovr)) -- Initial docs for SQL syntax [\#242](https://github.com/apache/arrow-datafusion/pull/242) ([Dandandan](https://github.com/Dandandan)) -- Deduplicate README.md [\#79](https://github.com/apache/arrow-datafusion/pull/79) ([msathis](https://github.com/msathis)) - -**Performance improvements:** - -- Speed up inlist for strings and primitives [\#813](https://github.com/apache/arrow-datafusion/pull/813) ([Dandandan](https://github.com/Dandandan)) -- perf: improve performance of `SortPreservingMergeExec` operator [\#722](https://github.com/apache/arrow-datafusion/pull/722) ([e-dard](https://github.com/e-dard)) -- Optimize min/max queries with table statistics [\#719](https://github.com/apache/arrow-datafusion/pull/719) ([b41sh](https://github.com/b41sh)) -- perf: Improve materialisation performance of SortPreservingMergeExec [\#691](https://github.com/apache/arrow-datafusion/pull/691) ([e-dard](https://github.com/e-dard)) -- Optimize count\(\*\) with table statistics [\#620](https://github.com/apache/arrow-datafusion/pull/620) ([Dandandan](https://github.com/Dandandan)) -- optimize window function's `find_ranges_in_range` [\#595](https://github.com/apache/arrow-datafusion/pull/595) ([Jimexist](https://github.com/Jimexist)) -- Collapse sort into window expr and do sort within logical phase [\#571](https://github.com/apache/arrow-datafusion/pull/571) ([Jimexist](https://github.com/Jimexist)) -- Use repartition in window functions to speed up [\#569](https://github.com/apache/arrow-datafusion/pull/569) ([Jimexist](https://github.com/Jimexist)) -- Constant fold / optimize `to_timestamp` function during planning [\#387](https://github.com/apache/arrow-datafusion/pull/387) ([msathis](https://github.com/msathis)) -- Speed up `create_batch_from_map` [\#339](https://github.com/apache/arrow-datafusion/pull/339) ([Dandandan](https://github.com/Dandandan)) -- Simplify math expression code \(use unary kernel\) [\#309](https://github.com/apache/arrow-datafusion/pull/309) ([Dandandan](https://github.com/Dandandan)) - -**Closed issues:** - -- Confirm git tagging strategy for releases [\#770](https://github.com/apache/arrow-datafusion/issues/770) -- arrow::util::pretty::pretty\_format\_batches missing [\#769](https://github.com/apache/arrow-datafusion/issues/769) -- move the `assert_batches_eq!` macros to a non part of datafusion [\#745](https://github.com/apache/arrow-datafusion/issues/745) -- fix an issue where aliases are not respected in generating downstream schemas in window expr [\#592](https://github.com/apache/arrow-datafusion/issues/592) -- make the planner to print more succinct and useful information in window function explain clause [\#526](https://github.com/apache/arrow-datafusion/issues/526) -- move window frame module to be in `logical_plan` [\#517](https://github.com/apache/arrow-datafusion/issues/517) -- use a more rust idiomatic way of handling nth\_value [\#448](https://github.com/apache/arrow-datafusion/issues/448) -- create a test with more than one partition for window functions [\#435](https://github.com/apache/arrow-datafusion/issues/435) -- COUNT DISTINCT does not support for `Boolean` [\#202](https://github.com/apache/arrow-datafusion/issues/202) -- Read CSV format text from stdin or memory [\#198](https://github.com/apache/arrow-datafusion/issues/198) -- Fix null handling hash join [\#195](https://github.com/apache/arrow-datafusion/issues/195) -- Allow TableProviders to indicate their type for the information schema [\#191](https://github.com/apache/arrow-datafusion/issues/191) -- Make DataFrame extensible [\#190](https://github.com/apache/arrow-datafusion/issues/190) -- TPC-H Query 19 [\#170](https://github.com/apache/arrow-datafusion/issues/170) -- TPC-H Query 7 [\#161](https://github.com/apache/arrow-datafusion/issues/161) -- Upgrade hashbrown to 0.10 [\#151](https://github.com/apache/arrow-datafusion/issues/151) -- Implement vectorized hashing for hash aggregate [\#149](https://github.com/apache/arrow-datafusion/issues/149) -- More efficient LEFT join implementation [\#143](https://github.com/apache/arrow-datafusion/issues/143) -- Implement vectorized hashing [\#142](https://github.com/apache/arrow-datafusion/issues/142) -- RFC Roadmap for 2021 \(DataFusion\) [\#140](https://github.com/apache/arrow-datafusion/issues/140) -- Implement hash partitioning [\#131](https://github.com/apache/arrow-datafusion/issues/131) -- Grouping by column position [\#110](https://github.com/apache/arrow-datafusion/issues/110) -- \[Datafusion\] GROUP BY with a high cardinality doesn't seem to finish [\#107](https://github.com/apache/arrow-datafusion/issues/107) -- \[Rust\] Add support for JSON data sources [\#103](https://github.com/apache/arrow-datafusion/issues/103) -- \[Rust\] Implement metrics framework [\#95](https://github.com/apache/arrow-datafusion/issues/95) -- Publically export Arrow crate from datafusion [\#36](https://github.com/apache/arrow-datafusion/issues/36) -- Implement hash-partitioned hash aggregate [\#27](https://github.com/apache/arrow-datafusion/issues/27) -- Consider using GitHub pages for DataFusion/Ballista documentation [\#18](https://github.com/apache/arrow-datafusion/issues/18) -- Update "repository" in Cargo.toml [\#16](https://github.com/apache/arrow-datafusion/issues/16) - -**Merged pull requests:** - -- Use `RawTable` API in hash join [\#827](https://github.com/apache/arrow-datafusion/pull/827) ([Dandandan](https://github.com/Dandandan)) -- Add test for window functions on dictionary [\#823](https://github.com/apache/arrow-datafusion/pull/823) ([alamb](https://github.com/alamb)) -- Update dependencies: prost to 0.8 and tonic to 0.5 [\#818](https://github.com/apache/arrow-datafusion/pull/818) ([alamb](https://github.com/alamb)) -- Move `hash_array` into hash\_utils.rs [\#807](https://github.com/apache/arrow-datafusion/pull/807) ([alamb](https://github.com/alamb)) -- Remove GroupByScalar and use ScalarValue in preparation for supporting null values in GroupBy [\#786](https://github.com/apache/arrow-datafusion/pull/786) ([alamb](https://github.com/alamb)) -- fix 226, make `concat`, `concat_ws`, and `random` work with `Python` crate [\#761](https://github.com/apache/arrow-datafusion/pull/761) ([Jimexist](https://github.com/Jimexist)) -- Test for parquet pruning disabling [\#754](https://github.com/apache/arrow-datafusion/pull/754) ([alamb](https://github.com/alamb)) -- Add explain verbose with limit push down [\#751](https://github.com/apache/arrow-datafusion/pull/751) ([Jimexist](https://github.com/Jimexist)) -- Move assert\_batches\_eq! macros to test\_utils.rs [\#746](https://github.com/apache/arrow-datafusion/pull/746) ([alamb](https://github.com/alamb)) -- Show optimized physical and logical plans in EXPLAIN [\#744](https://github.com/apache/arrow-datafusion/pull/744) ([alamb](https://github.com/alamb)) -- update `python` crate to support latest pyo3 syntax and gil sematics [\#741](https://github.com/apache/arrow-datafusion/pull/741) ([Jimexist](https://github.com/Jimexist)) -- update `python` crate dependencies [\#740](https://github.com/apache/arrow-datafusion/pull/740) ([Jimexist](https://github.com/Jimexist)) -- provide more details on required .parquet file extension error message [\#729](https://github.com/apache/arrow-datafusion/pull/729) ([Jimexist](https://github.com/Jimexist)) -- split up windows functions into a dedicated module with separate files [\#724](https://github.com/apache/arrow-datafusion/pull/724) ([Jimexist](https://github.com/Jimexist)) -- Use pytest in integration test [\#715](https://github.com/apache/arrow-datafusion/pull/715) ([Jimexist](https://github.com/Jimexist)) -- replace once iter chain with array::IntoIter [\#704](https://github.com/apache/arrow-datafusion/pull/704) ([houqp](https://github.com/houqp)) -- avoid iterator materialization in column index lookup [\#703](https://github.com/apache/arrow-datafusion/pull/703) ([houqp](https://github.com/houqp)) -- Fix build with 1.52.1 [\#696](https://github.com/apache/arrow-datafusion/pull/696) ([alamb](https://github.com/alamb)) -- Fix test output due to logical merge conflict [\#694](https://github.com/apache/arrow-datafusion/pull/694) ([alamb](https://github.com/alamb)) -- add more integration tests [\#668](https://github.com/apache/arrow-datafusion/pull/668) ([Jimexist](https://github.com/Jimexist)) -- Bump arrow and parquet versions to 4.4 [\#654](https://github.com/apache/arrow-datafusion/pull/654) ([toddtreece](https://github.com/toddtreece)) -- Add query 15 to TPC-H queries [\#645](https://github.com/apache/arrow-datafusion/pull/645) ([Dandandan](https://github.com/Dandandan)) -- Improve error message and comments [\#641](https://github.com/apache/arrow-datafusion/pull/641) ([alamb](https://github.com/alamb)) -- add integration tests for rank, dense\_rank, fix last\_value evaluation with rank [\#638](https://github.com/apache/arrow-datafusion/pull/638) ([Jimexist](https://github.com/Jimexist)) -- round trip TPCH queries in tests [\#630](https://github.com/apache/arrow-datafusion/pull/630) ([houqp](https://github.com/houqp)) -- use Into\ as argument type wherever applicable [\#615](https://github.com/apache/arrow-datafusion/pull/615) ([houqp](https://github.com/houqp)) -- reuse alias map in aggregate logical planning and refactor position resolution [\#606](https://github.com/apache/arrow-datafusion/pull/606) ([Jimexist](https://github.com/Jimexist)) -- fix clippy warnings [\#581](https://github.com/apache/arrow-datafusion/pull/581) ([Jimexist](https://github.com/Jimexist)) -- Add benchmarks to window function queries [\#564](https://github.com/apache/arrow-datafusion/pull/564) ([Jimexist](https://github.com/Jimexist)) -- reuse code for now function expr creation [\#548](https://github.com/apache/arrow-datafusion/pull/548) ([houqp](https://github.com/houqp)) -- turn on clippy rule for needless borrow [\#545](https://github.com/apache/arrow-datafusion/pull/545) ([Jimexist](https://github.com/Jimexist)) -- Refactor hash aggregates's planner building code [\#539](https://github.com/apache/arrow-datafusion/pull/539) ([Jimexist](https://github.com/Jimexist)) -- Cleanup Repartition Exec code [\#538](https://github.com/apache/arrow-datafusion/pull/538) ([alamb](https://github.com/alamb)) -- reuse datafusion physical planner in ballista building from protobuf [\#532](https://github.com/apache/arrow-datafusion/pull/532) ([Jimexist](https://github.com/Jimexist)) -- remove redundant `into_iter()` calls [\#527](https://github.com/apache/arrow-datafusion/pull/527) ([Jimexist](https://github.com/Jimexist)) -- Fix 517 - move `window_frames` module to `logical_plan` [\#518](https://github.com/apache/arrow-datafusion/pull/518) ([Jimexist](https://github.com/Jimexist)) -- Refactor window aggregation, simplify batch processing logic [\#516](https://github.com/apache/arrow-datafusion/pull/516) ([Jimexist](https://github.com/Jimexist)) -- Add datafusion::test\_util, resolve test data paths without env vars [\#498](https://github.com/apache/arrow-datafusion/pull/498) ([mluts](https://github.com/mluts)) -- Avoid warnings in tests when compiling without default features [\#489](https://github.com/apache/arrow-datafusion/pull/489) ([alamb](https://github.com/alamb)) -- update cargo.toml in python crate and fix unit test due to hash joins [\#483](https://github.com/apache/arrow-datafusion/pull/483) ([Jimexist](https://github.com/Jimexist)) -- use prettier check in CI [\#453](https://github.com/apache/arrow-datafusion/pull/453) ([Jimexist](https://github.com/Jimexist)) -- Optimize `nth_value`, remove `first_value`, `last_value` structs and use idiomatic rust style [\#452](https://github.com/apache/arrow-datafusion/pull/452) ([Jimexist](https://github.com/Jimexist)) -- Fixed typo / logical merge conflict [\#433](https://github.com/apache/arrow-datafusion/pull/433) ([jorgecarleitao](https://github.com/jorgecarleitao)) -- include test data and add aggregation tests in integration test [\#425](https://github.com/apache/arrow-datafusion/pull/425) ([Jimexist](https://github.com/Jimexist)) -- Add some padding around the logo [\#411](https://github.com/apache/arrow-datafusion/pull/411) ([parthsarthy](https://github.com/parthsarthy)) -- Benchmark subcommand to distinguish between DataFusion and Ballista [\#402](https://github.com/apache/arrow-datafusion/pull/402) ([jgoday](https://github.com/jgoday)) -- refactor datafusion/`scalar_value` to use more macro and avoid dup code [\#392](https://github.com/apache/arrow-datafusion/pull/392) ([Jimexist](https://github.com/Jimexist)) -- Update TPC-H benchmark to show physical plan when debug mode is enabled [\#386](https://github.com/apache/arrow-datafusion/pull/386) ([andygrove](https://github.com/andygrove)) -- Update arrow dependencies again [\#341](https://github.com/apache/arrow-datafusion/pull/341) ([alamb](https://github.com/alamb)) -- Update arrow-rs deps [\#317](https://github.com/apache/arrow-datafusion/pull/317) ([alamb](https://github.com/alamb)) -- Update PR template by commenting out instructions [\#315](https://github.com/apache/arrow-datafusion/pull/315) ([alamb](https://github.com/alamb)) -- fix clippy warning [\#286](https://github.com/apache/arrow-datafusion/pull/286) ([Jimexist](https://github.com/Jimexist)) -- add integration test to compare datafusion-cli against psql [\#281](https://github.com/apache/arrow-datafusion/pull/281) ([Jimexist](https://github.com/Jimexist)) -- Update arrow deps [\#269](https://github.com/apache/arrow-datafusion/pull/269) ([alamb](https://github.com/alamb)) -- Use multi-stage build dockerfile in datafusion-cli and reduce image size from 2.16GB to 89.9MB [\#266](https://github.com/apache/arrow-datafusion/pull/266) ([Jimexist](https://github.com/Jimexist)) -- Enable redundant\_field\_names clippy lint [\#261](https://github.com/apache/arrow-datafusion/pull/261) ([Dandandan](https://github.com/Dandandan)) -- fix clippy lint [\#259](https://github.com/apache/arrow-datafusion/pull/259) ([alamb](https://github.com/alamb)) -- Move datafusion-cli to new crate [\#231](https://github.com/apache/arrow-datafusion/pull/231) ([Dandandan](https://github.com/Dandandan)) -- Make test join\_with\_hash\_collision deterministic [\#229](https://github.com/apache/arrow-datafusion/pull/229) ([Dandandan](https://github.com/Dandandan)) -- Update arrow-rs deps \(to fix build due to flatbuffers update\) [\#224](https://github.com/apache/arrow-datafusion/pull/224) ([alamb](https://github.com/alamb)) -- Use standard make\_null\_array for CASE [\#223](https://github.com/apache/arrow-datafusion/pull/223) ([alamb](https://github.com/alamb)) -- update arrow-rs deps to latest master [\#216](https://github.com/apache/arrow-datafusion/pull/216) ([alamb](https://github.com/alamb)) -- MINOR: Remove empty rust dir [\#61](https://github.com/apache/arrow-datafusion/pull/61) ([andygrove](https://github.com/andygrove)) - - - -\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)* +- [22.0.0](../dev/changelog/22.0.0.md) +- [21.1.0](../dev/changelog/21.1.0.md) +- [21.0.0](../dev/changelog/21.0.0.md) +- [20.0.0](../dev/changelog/20.0.0.md) +- [19.0.0](../dev/changelog/19.0.0.md) +- [18.0.0](../dev/changelog/18.0.0.md) +- [17.0.0](../dev/changelog/17.0.0.md) +- [16.1.0](../dev/changelog/16.1.0.md) +- [16.0.0](../dev/changelog/16.0.0.md) +- [15.0.0](../dev/changelog/15.0.0.md) +- [14.0.0](../dev/changelog/14.0.0.md) +- [13.0.0](../dev/changelog/13.0.0.md) +- [12.0.0](../dev/changelog/12.0.0.md) +- [11.0.0](../dev/changelog/11.0.0.md) +- [10.0.0](../dev/changelog/10.0.0.md) +- [9.0.0](../dev/changelog/9.0.0.md) +- [8.0.0](../dev/changelog/8.0.0.md) +- [7.1.0](../dev/changelog/7.1.0.md) +- [7.0.0](../dev/changelog/7.0.0.md) +- [6.0.0](../dev/changelog/6.0.0.md) +- [5.0.0](../dev/changelog/5.0.0.md) diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 846c2bc9441bb..ed3b23a9af5ac 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -63,14 +63,14 @@ bytes = "1.4" bzip2 = { version = "0.4.3", optional = true } chrono = { version = "0.4.23", default-features = false } dashmap = "5.4.0" -datafusion-common = { path = "../common", version = "21.1.0", features = ["parquet", "object_store"] } -datafusion-execution = { path = "../execution", version = "21.1.0" } -datafusion-expr = { path = "../expr", version = "21.1.0" } -datafusion-jit = { path = "../jit", version = "21.1.0", optional = true } -datafusion-optimizer = { path = "../optimizer", version = "21.1.0", default-features = false } -datafusion-physical-expr = { path = "../physical-expr", version = "21.1.0", default-features = false } -datafusion-row = { path = "../row", version = "21.1.0" } -datafusion-sql = { path = "../sql", version = "21.1.0" } +datafusion-common = { path = "../common", version = "22.0.0", features = ["parquet", "object_store"] } +datafusion-execution = { path = "../execution", version = "22.0.0" } +datafusion-expr = { path = "../expr", version = "22.0.0" } +datafusion-jit = { path = "../jit", version = "22.0.0", optional = true } +datafusion-optimizer = { path = "../optimizer", version = "22.0.0", default-features = false } +datafusion-physical-expr = { path = "../physical-expr", version = "22.0.0", default-features = false } +datafusion-row = { path = "../row", version = "22.0.0" } +datafusion-sql = { path = "../sql", version = "22.0.0" } flate2 = { version = "1.0.24", optional = true } futures = "0.3" glob = "0.3.0" diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml index 9e92e7ab8f471..19cb0b1e0e008 100644 --- a/datafusion/execution/Cargo.toml +++ b/datafusion/execution/Cargo.toml @@ -34,8 +34,8 @@ path = "src/lib.rs" [dependencies] dashmap = "5.4.0" -datafusion-common = { path = "../common", version = "21.1.0" } -datafusion-expr = { path = "../expr", version = "21.1.0" } +datafusion-common = { path = "../common", version = "22.0.0" } +datafusion-expr = { path = "../expr", version = "22.0.0" } hashbrown = { version = "0.13", features = ["raw"] } log = "^0.4" object_store = "0.5.4" diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 4f8c2bb05bdcc..f3e2cec2cceb0 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -37,7 +37,7 @@ path = "src/lib.rs" [dependencies] ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } arrow = { workspace = true } -datafusion-common = { path = "../common", version = "21.1.0" } +datafusion-common = { path = "../common", version = "22.0.0" } sqlparser = "0.32" [dev-dependencies] diff --git a/datafusion/jit/Cargo.toml b/datafusion/jit/Cargo.toml index e6694a35e4058..040394469fea3 100644 --- a/datafusion/jit/Cargo.toml +++ b/datafusion/jit/Cargo.toml @@ -41,7 +41,7 @@ cranelift = "0.89.0" cranelift-jit = "0.89.0" cranelift-module = "0.89.0" cranelift-native = "0.89.0" -datafusion-common = { path = "../common", version = "21.1.0", features = ["jit"] } -datafusion-expr = { path = "../expr", version = "21.1.0" } +datafusion-common = { path = "../common", version = "22.0.0", features = ["jit"] } +datafusion-expr = { path = "../expr", version = "22.0.0" } parking_lot = "0.12" diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index b86de81aa5904..056d1ee58753b 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -43,9 +43,9 @@ unicode_expressions = ["datafusion-physical-expr/unicode_expressions"] arrow = { workspace = true } async-trait = "0.1.41" chrono = { version = "0.4.23", default-features = false } -datafusion-common = { path = "../common", version = "21.1.0" } -datafusion-expr = { path = "../expr", version = "21.1.0" } -datafusion-physical-expr = { path = "../physical-expr", version = "21.1.0", default-features = false } +datafusion-common = { path = "../common", version = "22.0.0" } +datafusion-expr = { path = "../expr", version = "22.0.0" } +datafusion-physical-expr = { path = "../physical-expr", version = "22.0.0", default-features = false } hashbrown = { version = "0.13", features = ["raw"] } itertools = "0.10" log = "^0.4" @@ -53,5 +53,5 @@ regex-syntax = "0.6.28" [dev-dependencies] ctor = "0.2.0" -datafusion-sql = { path = "../sql", version = "21.1.0" } +datafusion-sql = { path = "../sql", version = "22.0.0" } env_logger = "0.10.0" diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index 1d0152d49abec..b28ad534fbd28 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -50,9 +50,9 @@ arrow-schema = { workspace = true } blake2 = { version = "^0.10.2", optional = true } blake3 = { version = "1.0", optional = true } chrono = { version = "0.4.23", default-features = false } -datafusion-common = { path = "../common", version = "21.1.0" } -datafusion-expr = { path = "../expr", version = "21.1.0" } -datafusion-row = { path = "../row", version = "21.1.0" } +datafusion-common = { path = "../common", version = "22.0.0" } +datafusion-expr = { path = "../expr", version = "22.0.0" } +datafusion-row = { path = "../row", version = "22.0.0" } half = { version = "2.1", default-features = false } hashbrown = { version = "0.13", features = ["raw"] } indexmap = "1.9.2" diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index d68d27cd72537..73d70b61d90b8 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -42,9 +42,9 @@ json = ["pbjson", "serde", "serde_json"] [dependencies] arrow = { workspace = true } chrono = { version = "0.4", default-features = false } -datafusion = { path = "../core", version = "21.1.0" } -datafusion-common = { path = "../common", version = "21.1.0" } -datafusion-expr = { path = "../expr", version = "21.1.0" } +datafusion = { path = "../core", version = "22.0.0" } +datafusion-common = { path = "../common", version = "22.0.0" } +datafusion-expr = { path = "../expr", version = "22.0.0" } object_store = { version = "0.5.4" } pbjson = { version = "0.5", optional = true } prost = "0.11.0" diff --git a/datafusion/row/Cargo.toml b/datafusion/row/Cargo.toml index 2d44d308c7586..6556e5629b6cb 100644 --- a/datafusion/row/Cargo.toml +++ b/datafusion/row/Cargo.toml @@ -38,7 +38,7 @@ jit = ["datafusion-jit"] [dependencies] arrow = { workspace = true } -datafusion-common = { path = "../common", version = "21.1.0" } -datafusion-jit = { path = "../jit", version = "21.1.0", optional = true } +datafusion-common = { path = "../common", version = "22.0.0" } +datafusion-jit = { path = "../jit", version = "22.0.0", optional = true } paste = "^1.0" rand = "0.8" diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 819af49dd32a2..465d8331c7018 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -39,8 +39,8 @@ unicode_expressions = [] [dependencies] arrow = { workspace = true } arrow-schema = { workspace = true } -datafusion-common = { path = "../common", version = "21.1.0" } -datafusion-expr = { path = "../expr", version = "21.1.0" } +datafusion-common = { path = "../common", version = "22.0.0" } +datafusion-expr = { path = "../expr", version = "22.0.0" } log = "^0.4" sqlparser = "0.32" diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 14619d9cfc258..08c1ed8a4ea4b 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -30,7 +30,7 @@ rust-version = { workspace = true } [dependencies] async-recursion = "1.0" chrono = "0.4.23" -datafusion = { version = "21.1.0", path = "../core" } +datafusion = { version = "22.0.0", path = "../core" } itertools = "0.10.5" object_store = "0.5.4" prost = "0.11" diff --git a/dev/changelog/10.0.0.md b/dev/changelog/10.0.0.md new file mode 100644 index 0000000000000..d86ac0b0cc75b --- /dev/null +++ b/dev/changelog/10.0.0.md @@ -0,0 +1,159 @@ + + +## [10.0.0](https://github.com/apache/arrow-datafusion/tree/10.0.0) (2022-07-12) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/9.0.0...10.0.0) + +**Breaking changes:** + +- Convert batch_size to config option [\#2771](https://github.com/apache/arrow-datafusion/pull/2771) ([andygrove](https://github.com/andygrove)) +- MINOR: Remove Offset struct [\#2734](https://github.com/apache/arrow-datafusion/pull/2734) ([andygrove](https://github.com/andygrove)) +- feat: async extension planner [\#2713](https://github.com/apache/arrow-datafusion/pull/2713) ([waynexia](https://github.com/waynexia)) +- Switch to object_store crate \(\#2489\) [\#2677](https://github.com/apache/arrow-datafusion/pull/2677) ([tustvold](https://github.com/tustvold)) + +**Implemented enhancements:** + +- update documentation, fix styling to match main Arrow project [\#2864](https://github.com/apache/arrow-datafusion/issues/2864) +- Update top-level README [\#2850](https://github.com/apache/arrow-datafusion/issues/2850) +- \[Question\]How to call an async function in `ExecutionPlan::exec` method? [\#2847](https://github.com/apache/arrow-datafusion/issues/2847) +- Add `DataFrame::with_column` [\#2844](https://github.com/apache/arrow-datafusion/issues/2844) +- Improve ergonomics of physical expr `lit` [\#2827](https://github.com/apache/arrow-datafusion/issues/2827) +- Add Python examples for reading CSV and query by SQL in Doc [\#2824](https://github.com/apache/arrow-datafusion/issues/2824) +- eliminate multi limit-offset nodes to EmptyRelation if possible [\#2822](https://github.com/apache/arrow-datafusion/issues/2822) +- Make `LogicalPlan::Union` be consistent with other plans [\#2816](https://github.com/apache/arrow-datafusion/issues/2816) +- Use coerced data type from value and list expressions during planning inlist expression [\#2793](https://github.com/apache/arrow-datafusion/issues/2793) +- Add configuration option to enable/disalbe `CoalesceBatchesExec` [\#2790](https://github.com/apache/arrow-datafusion/issues/2790) +- Simplify FilterNullJoinKeys rule [\#2780](https://github.com/apache/arrow-datafusion/issues/2780) +- Allow configuration settings to be specified with environment variables [\#2776](https://github.com/apache/arrow-datafusion/issues/2776) +- Automatically update `configs.md` in user guide [\#2770](https://github.com/apache/arrow-datafusion/issues/2770) +- Support multiple paths for ListingTableScanNode [\#2768](https://github.com/apache/arrow-datafusion/issues/2768) +- Reduce outer joins [\#2757](https://github.com/apache/arrow-datafusion/issues/2757) +- support data type coerced and decimal in INLIST expr [\#2755](https://github.com/apache/arrow-datafusion/issues/2755) +- Change ExtensionPlanner::plan_extension\(\) to an async function [\#2749](https://github.com/apache/arrow-datafusion/issues/2749) +- Add `IsNotNull` filter to join inputs if one side of join condition does not allow null [\#2739](https://github.com/apache/arrow-datafusion/issues/2739) +- Sort preserving MergeJoin [\#2698](https://github.com/apache/arrow-datafusion/issues/2698) +- Improve readability of table scan projections in query plans [\#2697](https://github.com/apache/arrow-datafusion/issues/2697) +- DataFusion 9.0.0 Release [\#2676](https://github.com/apache/arrow-datafusion/issues/2676) +- Improve UX for `UNION` vs `UNION ALL` \(introduce a LogicalPlan::Distinct\) [\#2573](https://github.com/apache/arrow-datafusion/issues/2573) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] +- Implement some way to show the sql used to create a view [\#2529](https://github.com/apache/arrow-datafusion/issues/2529) +- Consider adopting IOx ObjectStore abstraction [\#2489](https://github.com/apache/arrow-datafusion/issues/2489) +- Support `sum0` as a built-in agg function [\#2067](https://github.com/apache/arrow-datafusion/issues/2067) +- implement grouping sets, cubes, and rollups [\#1327](https://github.com/apache/arrow-datafusion/issues/1327) +- Ruby bindings [\#1114](https://github.com/apache/arrow-datafusion/issues/1114) +- Support dates in hash join [\#2746](https://github.com/apache/arrow-datafusion/pull/2746) ([andygrove](https://github.com/andygrove)) + +**Fixed bugs:** + +- Docker Error [\#2851](https://github.com/apache/arrow-datafusion/issues/2851) +- Anti join ignores join filters [\#2842](https://github.com/apache/arrow-datafusion/issues/2842) +- Can't test or compile sub-model code after upgrade to arrow-rs 17.0.0 [\#2835](https://github.com/apache/arrow-datafusion/issues/2835) +- Not evaluate the set expr in the InList for the optimization [\#2820](https://github.com/apache/arrow-datafusion/issues/2820) +- CASE When: result type should be coercible to a common type [\#2818](https://github.com/apache/arrow-datafusion/issues/2818) +- IN/NOT IN List: NULL is not equal to NULL [\#2817](https://github.com/apache/arrow-datafusion/issues/2817) +- panic when case statement returns null [\#2798](https://github.com/apache/arrow-datafusion/issues/2798) +- InList: Can't cast the list expr data type to value expr data type directly [\#2774](https://github.com/apache/arrow-datafusion/issues/2774) +- InList Expr: expr and list values must can be converted to a same data type [\#2759](https://github.com/apache/arrow-datafusion/issues/2759) +- tpchgen docker syntax change prevents volume from binding [\#2751](https://github.com/apache/arrow-datafusion/issues/2751) +- Cannot join on date columns \(Unsupported data type in hasher: Date32\) [\#2744](https://github.com/apache/arrow-datafusion/issues/2744) +- `rewrite_expression` does not properly handle `Exists` and `ScalarSubquery` [\#2736](https://github.com/apache/arrow-datafusion/issues/2736) +- LocalFileSystem Not sorted by file name, As a result, the data lines queried in multiple files are out of order. [\#2730](https://github.com/apache/arrow-datafusion/issues/2730) +- Filter push down need consider alias columns [\#2725](https://github.com/apache/arrow-datafusion/issues/2725) +- Recent API change in `GlobalLimitExec` breaks compatibility with Ballista [\#2720](https://github.com/apache/arrow-datafusion/issues/2720) +- Common Subexpression Eliminiation pass errors if run twice on some plans: Schema contains duplicate unqualified field name 'IsNull-Column-sys.host' [\#2712](https://github.com/apache/arrow-datafusion/issues/2712) +- The data type is not compatible with other system, for example spark or PG database [\#1379](https://github.com/apache/arrow-datafusion/issues/1379) + +**Documentation updates:** + +- Fix docs styling [\#2865](https://github.com/apache/arrow-datafusion/pull/2865) ([kmitchener](https://github.com/kmitchener)) +- Various updates to top-level README [\#2854](https://github.com/apache/arrow-datafusion/pull/2854) ([andygrove](https://github.com/andygrove)) +- MINOR: Add documentation for running integration tests [\#2839](https://github.com/apache/arrow-datafusion/pull/2839) ([andygrove](https://github.com/andygrove)) +- add csv registration and sql query to examples [\#2825](https://github.com/apache/arrow-datafusion/pull/2825) ([waitingkuo](https://github.com/waitingkuo)) +- \[minor\] refine doc [\#2753](https://github.com/apache/arrow-datafusion/pull/2753) ([Ted-Jiang](https://github.com/Ted-Jiang)) + +**Closed issues:** + +- Consider adding a prominent note in the readme about ballista [\#2853](https://github.com/apache/arrow-datafusion/issues/2853) +- support decimal in \(NULL\) [\#2800](https://github.com/apache/arrow-datafusion/issues/2800) +- InList: Don't treat Null as UTF8\(None\) [\#2782](https://github.com/apache/arrow-datafusion/issues/2782) +- InList: don't need to treat Null as UTF8 data type [\#2773](https://github.com/apache/arrow-datafusion/issues/2773) +- Implement extensible configuration mechanism [\#138](https://github.com/apache/arrow-datafusion/issues/138) + +**Merged pull requests:** + +- Update CONTRIBUTING.md [\#2876](https://github.com/apache/arrow-datafusion/pull/2876) ([waitingkuo](https://github.com/waitingkuo)) +- Make LogicalPlan::Union be consistent with other plans [\#2868](https://github.com/apache/arrow-datafusion/pull/2868) ([comphead](https://github.com/comphead)) +- minor: remove unneeded files from project root [\#2863](https://github.com/apache/arrow-datafusion/pull/2863) ([kmitchener](https://github.com/kmitchener)) +- chore: make cargo clippy happy in nigtly [\#2860](https://github.com/apache/arrow-datafusion/pull/2860) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Update to arrow 18.0.0 [\#2856](https://github.com/apache/arrow-datafusion/pull/2856) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- chore: remove ballista-related docker-compose file [\#2852](https://github.com/apache/arrow-datafusion/pull/2852) ([xudong963](https://github.com/xudong963)) +- Adding dataframe with_column function [\#2849](https://github.com/apache/arrow-datafusion/pull/2849) ([comphead](https://github.com/comphead)) +- anti joins now respect join filters [\#2843](https://github.com/apache/arrow-datafusion/pull/2843) ([andygrove](https://github.com/andygrove)) +- MINOR: make name meaningful and clean up code [\#2841](https://github.com/apache/arrow-datafusion/pull/2841) ([liukun4515](https://github.com/liukun4515)) +- Make `lit` implementation more concise [\#2838](https://github.com/apache/arrow-datafusion/pull/2838) ([alamb](https://github.com/alamb)) +- InList: set/list value must be evaluated to get the values [\#2834](https://github.com/apache/arrow-datafusion/pull/2834) ([liukun4515](https://github.com/liukun4515)) +- Add SHOW CREATE TABLE with initial support for views [\#2830](https://github.com/apache/arrow-datafusion/pull/2830) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mrob95](https://github.com/mrob95)) +- Improve ergonomics of physical expr `lit` [\#2828](https://github.com/apache/arrow-datafusion/pull/2828) ([alamb](https://github.com/alamb)) +- Eliminate multi limit-offset nodes to emptyRelation [\#2823](https://github.com/apache/arrow-datafusion/pull/2823) ([AssHero](https://github.com/AssHero)) +- Fix the ci [\#2821](https://github.com/apache/arrow-datafusion/pull/2821) ([liukun4515](https://github.com/liukun4515)) +- CaseWhen: coerce the all then and else data type to a common data type [\#2819](https://github.com/apache/arrow-datafusion/pull/2819) ([liukun4515](https://github.com/liukun4515)) +- Fix `ScalarValue::isNull` calculation [\#2815](https://github.com/apache/arrow-datafusion/pull/2815) ([alamb](https://github.com/alamb)) +- Fix nullability calculation for `CASE` expressions [\#2814](https://github.com/apache/arrow-datafusion/pull/2814) ([alamb](https://github.com/alamb)) +- Bump numpy from 1.21.3 to 1.22.0 in /integration-tests [\#2811](https://github.com/apache/arrow-datafusion/pull/2811) ([xudong963](https://github.com/xudong963)) +- Fix data type calculation for `CaseExpr` s with `NULLs` [\#2810](https://github.com/apache/arrow-datafusion/pull/2810) ([AssHero](https://github.com/AssHero)) +- InList: fix bug for comparing with Null in the list using the set optimization [\#2809](https://github.com/apache/arrow-datafusion/pull/2809) ([liukun4515](https://github.com/liukun4515)) +- Use specialized dictionary kernels \(\#1178\) [\#2808](https://github.com/apache/arrow-datafusion/pull/2808) ([tustvold](https://github.com/tustvold)) +- fix schema nullability for `information_schema` schema [\#2804](https://github.com/apache/arrow-datafusion/pull/2804) ([alamb](https://github.com/alamb)) +- fix: correctly calculate join output schema nullability [\#2803](https://github.com/apache/arrow-datafusion/pull/2803) ([alamb](https://github.com/alamb)) +- Correct schema nullability declaration in tests [\#2802](https://github.com/apache/arrow-datafusion/pull/2802) ([alamb](https://github.com/alamb)) +- Don't treat Null as UTF8\(None\) and change error info. [\#2801](https://github.com/apache/arrow-datafusion/pull/2801) ([liukun4515](https://github.com/liukun4515)) +- MINOR: Remove reference to docker image that is no longer available [\#2795](https://github.com/apache/arrow-datafusion/pull/2795) ([andygrove](https://github.com/andygrove)) +- Use coerced type in inlist expr planning [\#2794](https://github.com/apache/arrow-datafusion/pull/2794) ([viirya](https://github.com/viirya)) +- Add LogicalPlan::Distinct [\#2792](https://github.com/apache/arrow-datafusion/pull/2792) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mrob95](https://github.com/mrob95)) +- Add config option for coalesce_batches physical optimization rule, make optional [\#2791](https://github.com/apache/arrow-datafusion/pull/2791) ([andygrove](https://github.com/andygrove)) +- Improve readability of table scan projections in query plans \(remove `Some` and `None`\) [\#2789](https://github.com/apache/arrow-datafusion/pull/2789) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([comphead](https://github.com/comphead)) +- Simplify FilterNullJoinKeys rule [\#2781](https://github.com/apache/arrow-datafusion/pull/2781) ([andygrove](https://github.com/andygrove)) +- MINOR: re-export sqlparser from datafusion-sql crate [\#2779](https://github.com/apache/arrow-datafusion/pull/2779) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Update to arrow 17.0.0 [\#2778](https://github.com/apache/arrow-datafusion/pull/2778) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Support multiple paths for ListingTableScanNode [\#2775](https://github.com/apache/arrow-datafusion/pull/2775) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Remove expr_sub_expressions and rewrite_expression functions [\#2772](https://github.com/apache/arrow-datafusion/pull/2772) ([mrob95](https://github.com/mrob95)) +- minor: update cranelift related dependencies [\#2769](https://github.com/apache/arrow-datafusion/pull/2769) ([xudong963](https://github.com/xudong963)) +- minor: panic rather than fail silently on bad dictionary in hash join [\#2767](https://github.com/apache/arrow-datafusion/pull/2767) ([alamb](https://github.com/alamb)) +- MINOR: make `prettier` use consistent between CI and contributing guide [\#2766](https://github.com/apache/arrow-datafusion/pull/2766) ([andygrove](https://github.com/andygrove)) +- Rewrite subexpressions of InSubquery in rewrite_expression [\#2765](https://github.com/apache/arrow-datafusion/pull/2765) ([mrob95](https://github.com/mrob95)) +- Support `DataType::Decimal` for `IN` and `NOT IN` expressions [\#2764](https://github.com/apache/arrow-datafusion/pull/2764) ([liukun4515](https://github.com/liukun4515)) +- Implement extensible configuration mechanism [\#2754](https://github.com/apache/arrow-datafusion/pull/2754) ([andygrove](https://github.com/andygrove)) +- Remove redundant docker argument [\#2752](https://github.com/apache/arrow-datafusion/pull/2752) ([avantgardnerio](https://github.com/avantgardnerio)) +- Add optimizer pass to reduce `left`/`right`/`full` joins to `inner` join if possible [\#2750](https://github.com/apache/arrow-datafusion/pull/2750) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) +- MINOR: Remove legacy CLI context enum [\#2748](https://github.com/apache/arrow-datafusion/pull/2748) ([andygrove](https://github.com/andygrove)) +- CSE unit test for duplicate fields [\#2747](https://github.com/apache/arrow-datafusion/pull/2747) ([waynexia](https://github.com/waynexia)) +- MINOR: Improve unsupported data type error message [\#2745](https://github.com/apache/arrow-datafusion/pull/2745) ([andygrove](https://github.com/andygrove)) +- Add optimizer rule to filter out null keys before a join [\#2740](https://github.com/apache/arrow-datafusion/pull/2740) ([andygrove](https://github.com/andygrove)) +- Sort file names in a directory \#2730 [\#2735](https://github.com/apache/arrow-datafusion/pull/2735) ([yourenawo](https://github.com/yourenawo)) +- fix: filter push down with `InList` expressions [\#2729](https://github.com/apache/arrow-datafusion/pull/2729) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- \[Minor\] add debug info in optimizer.rs [\#2726](https://github.com/apache/arrow-datafusion/pull/2726) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add public API for GlobalLimitExec and LocalLimitExec [\#2722](https://github.com/apache/arrow-datafusion/pull/2722) ([andygrove](https://github.com/andygrove)) +- Add additional data types are supported in hash join [\#2721](https://github.com/apache/arrow-datafusion/pull/2721) ([AssHero](https://github.com/AssHero)) +- Upgrade to arrow `16.0.0` [\#2718](https://github.com/apache/arrow-datafusion/pull/2718) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fix clippy warnings with toolchain 1.63 [\#2717](https://github.com/apache/arrow-datafusion/pull/2717) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waynexia](https://github.com/waynexia)) +- Support for GROUPING SETS/CUBE/ROLLUP [\#2716](https://github.com/apache/arrow-datafusion/pull/2716) ([thinkharderdev](https://github.com/thinkharderdev)) +- fix: check redundant fields while building projection plan [\#2715](https://github.com/apache/arrow-datafusion/pull/2715) ([waynexia](https://github.com/waynexia)) +- Sort preserving `SortMergeJoin` [\#2699](https://github.com/apache/arrow-datafusion/pull/2699) ([korowa](https://github.com/korowa)) +- fix: union schema fix [\#2688](https://github.com/apache/arrow-datafusion/pull/2688) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gandronchik](https://github.com/gandronchik)) +- Support default precision and scale to`CAST AS DECIMAL` [\#2680](https://github.com/apache/arrow-datafusion/pull/2680) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gandronchik](https://github.com/gandronchik)) diff --git a/dev/changelog/11.0.0.md b/dev/changelog/11.0.0.md new file mode 100644 index 0000000000000..60fe648366ece --- /dev/null +++ b/dev/changelog/11.0.0.md @@ -0,0 +1,257 @@ + + +## [11.0.0](https://github.com/apache/arrow-datafusion/tree/11.0.0) (2022-08-16) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/10.0.0-rc1...11.0.0) + +**Breaking changes:** + +- Implement exact median, add `AggregateState` [\#3009](https://github.com/apache/arrow-datafusion/pull/3009) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) + +**Implemented enhancements:** + +- Make RowAccumulator public [\#3138](https://github.com/apache/arrow-datafusion/issues/3138) +- docs: proposal for consolidating docs into a Contributor Guide [\#3127](https://github.com/apache/arrow-datafusion/issues/3127) +- feat: support Timestamp +/- Interval [\#3103](https://github.com/apache/arrow-datafusion/issues/3103) +- a `arrow_typeof` like posgresql's `pg_typeof` [\#3095](https://github.com/apache/arrow-datafusion/issues/3095) +- Add DataFrame section to user guide [\#3066](https://github.com/apache/arrow-datafusion/issues/3066) +- Document all scalar SQL functions in user guide [\#3065](https://github.com/apache/arrow-datafusion/issues/3065) +- Simplify implementation of approx_median so that it can be exposed in Python [\#3063](https://github.com/apache/arrow-datafusion/issues/3063) +- Support double quoted literal strings for dialects\(such as mysql,bigquery\) [\#3055](https://github.com/apache/arrow-datafusion/issues/3055) +- Simplify / speed up implementation of character_length to unicode points [\#3049](https://github.com/apache/arrow-datafusion/issues/3049) +- Follow-up on Clickbench benchmark [\#3048](https://github.com/apache/arrow-datafusion/issues/3048) +- Why the PhysicalPlanner is an async trait ? [\#3032](https://github.com/apache/arrow-datafusion/issues/3032) +- Optimize file stream metrics. [\#3024](https://github.com/apache/arrow-datafusion/issues/3024) +- Proposal: Enable typed strings expressions for VALUES clause [\#3017](https://github.com/apache/arrow-datafusion/issues/3017) +- Proposal: Add `date_bin` function [\#3015](https://github.com/apache/arrow-datafusion/issues/3015) +- The upcoming release of Arrow \(20?\) breaks datafusion [\#3006](https://github.com/apache/arrow-datafusion/issues/3006) +- Can I select some files for query based on the filtering rules in the directory? [\#2993](https://github.com/apache/arrow-datafusion/issues/2993) +- Rename FormatReader to FileOpener [\#2990](https://github.com/apache/arrow-datafusion/issues/2990) +- Derive `Hash` trait for `JoinType` [\#2971](https://github.com/apache/arrow-datafusion/issues/2971) +- CAST from Utf8 to Boolean [\#2967](https://github.com/apache/arrow-datafusion/issues/2967) +- Add baseline_metrics for FileStream to record metrics like elapsed time, record output, etc [\#2961](https://github.com/apache/arrow-datafusion/issues/2961) +- Example to show how to convert query result into rust struct [\#2959](https://github.com/apache/arrow-datafusion/issues/2959) +- simplify not clause [\#2957](https://github.com/apache/arrow-datafusion/issues/2957) +- Implement Debug for ColumnarValue [\#2950](https://github.com/apache/arrow-datafusion/issues/2950) +- Parallel fetching of column chunks when reading parquet files [\#2949](https://github.com/apache/arrow-datafusion/issues/2949) +- Extension mechanism for `SessionConfig` [\#2939](https://github.com/apache/arrow-datafusion/issues/2939) +- Streaming CSV/JSON Object Store Read [\#2935](https://github.com/apache/arrow-datafusion/issues/2935) +- Support CSV Limit Pushdown to Object Storage [\#2930](https://github.com/apache/arrow-datafusion/issues/2930) +- Add support for `pow` scalar function [\#2926](https://github.com/apache/arrow-datafusion/issues/2926) +- Add support for exact `median` aggregate function [\#2925](https://github.com/apache/arrow-datafusion/issues/2925) +- Support `mean` as synonym for `avg` [\#2922](https://github.com/apache/arrow-datafusion/issues/2922) +- Rename a column name [\#2919](https://github.com/apache/arrow-datafusion/issues/2919) +- Move `ScalarValue` tests alongside implementation, move `from_slice` to `core` [\#2913](https://github.com/apache/arrow-datafusion/issues/2913) +- Fail gracefully if optimization rule fails [\#2908](https://github.com/apache/arrow-datafusion/issues/2908) +- Make ObjectStoreRegistry as a trait which can allow Ballista to introduce a self registry ObjectStoreRegistry [\#2905](https://github.com/apache/arrow-datafusion/issues/2905) +- Remove datafusion-data-access crate [\#2903](https://github.com/apache/arrow-datafusion/issues/2903) +- Improve formatting of logical plans containing subquery expressions [\#2898](https://github.com/apache/arrow-datafusion/issues/2898) +- Atan2 added to built-in functions [\#2897](https://github.com/apache/arrow-datafusion/issues/2897) +- The explain statements only print logical plans for debug/other purpose. [\#2894](https://github.com/apache/arrow-datafusion/issues/2894) +- JSON version of `display_indent()` [\#2889](https://github.com/apache/arrow-datafusion/issues/2889) +- It would be nice to have a way to generate unique IDs in optimizer rules [\#2886](https://github.com/apache/arrow-datafusion/issues/2886) +- Add support for `TIME` literal values [\#2883](https://github.com/apache/arrow-datafusion/issues/2883) +- Add h2o benchmark [\#2879](https://github.com/apache/arrow-datafusion/issues/2879) +- Implement `from_unixtime` function [\#2871](https://github.com/apache/arrow-datafusion/issues/2871) +- Add `cast` function for creating logical cast expression [\#2870](https://github.com/apache/arrow-datafusion/issues/2870) +- Release DataFusion 10.0.0 [\#2862](https://github.com/apache/arrow-datafusion/issues/2862) +- Implement `information_schema.views` [\#2857](https://github.com/apache/arrow-datafusion/issues/2857) +- Migrate from avro_rs to apache_avro [\#2783](https://github.com/apache/arrow-datafusion/issues/2783) +- Add optimizer rule to remove `OFFSET 0` [\#2584](https://github.com/apache/arrow-datafusion/issues/2584) +- Preserve Element Name in ScalarValue::List [\#2450](https://github.com/apache/arrow-datafusion/issues/2450) +- Add EXISTS subquery support to Ballista [\#2338](https://github.com/apache/arrow-datafusion/issues/2338) +- Add documentation on supported functions to datafusion website [\#1487](https://github.com/apache/arrow-datafusion/issues/1487) +- documentations for datafusion-cli can be consolidated a bit more [\#1352](https://github.com/apache/arrow-datafusion/issues/1352) +- Optimizer: Predicate Rewrite pass for TPCH Q19 [\#217](https://github.com/apache/arrow-datafusion/issues/217) +- feat: add optimize rule `rewrite_disjunctive_predicate` [\#2858](https://github.com/apache/arrow-datafusion/pull/2858) ([xudong963](https://github.com/xudong963)) + +**Fixed bugs:** + +- Regression in SQL support for `ORDER BY` and aliased expressions [\#3160](https://github.com/apache/arrow-datafusion/issues/3160) +- panic when deal with `@` operator [\#3137](https://github.com/apache/arrow-datafusion/issues/3137) +- Incorrect type coercion rule for date + interval [\#3093](https://github.com/apache/arrow-datafusion/issues/3093) +- Cast string to timestamp crash while we input time before 1970 with floating number second [\#3082](https://github.com/apache/arrow-datafusion/issues/3082) +- INTEGER type does't work while importing csv [\#3059](https://github.com/apache/arrow-datafusion/issues/3059) +- Cannot GROUP BY Binary [\#3050](https://github.com/apache/arrow-datafusion/issues/3050) +- incorrect i32 coercion for `to_timestamp` [\#3046](https://github.com/apache/arrow-datafusion/issues/3046) +- Error pruning `IsNull` expressions: Column 'instance_null_count' is declared as non-nullable but contains null values [\#3042](https://github.com/apache/arrow-datafusion/issues/3042) +- I want to query some files in a directory. Is there any way? [\#3013](https://github.com/apache/arrow-datafusion/issues/3013) +- The expression to get an indexed field is only valid for `List` types \(`common_sub_expression_eliminate`\) [\#3002](https://github.com/apache/arrow-datafusion/issues/3002) +- Double to_timestamp_seconds produces abnormal result [\#2998](https://github.com/apache/arrow-datafusion/issues/2998) +- External parquet table fails when schema contains differing key / value metadata [\#2982](https://github.com/apache/arrow-datafusion/issues/2982) +- SELECT on column with uppercase column name fails with FieldNotFound error [\#2978](https://github.com/apache/arrow-datafusion/issues/2978) +- panic reading AWS-generated parquet file [\#2963](https://github.com/apache/arrow-datafusion/issues/2963) +- Can't filter rowgroup for parquet prune for some data type [\#2962](https://github.com/apache/arrow-datafusion/issues/2962) +- CI test is failing with ` final link failed: No space left on device` [\#2947](https://github.com/apache/arrow-datafusion/issues/2947) +- bug: new ObjectStore breaks backward compatibility with contrib plugins [\#2931](https://github.com/apache/arrow-datafusion/issues/2931) +- bug: file types handled wrong [\#2929](https://github.com/apache/arrow-datafusion/issues/2929) +- bug: changing the number of partitions does not increase concurrency [\#2928](https://github.com/apache/arrow-datafusion/issues/2928) +- csv_explain fails on RC verifier [\#2916](https://github.com/apache/arrow-datafusion/issues/2916) +- index out of range error from datafusion_row::write::write_field [\#2910](https://github.com/apache/arrow-datafusion/issues/2910) +- Optimization rule `CommonSubexprEliminate` creates invalid projections [\#2907](https://github.com/apache/arrow-datafusion/issues/2907) +- serde_json requires that either `std` \(default\) or `alloc` feature is enabled [\#2896](https://github.com/apache/arrow-datafusion/issues/2896) +- Inconsistent type coercion rules with comparison expressions [\#2890](https://github.com/apache/arrow-datafusion/issues/2890) +- Doc Error: the test directory link 404 which is in CONTRIBUTING.md [\#2880](https://github.com/apache/arrow-datafusion/issues/2880) +- Round trips through `ScalarValue`'s sometimes don't preserve types \(e.g. change types from `DictionaryArray`\) [\#2874](https://github.com/apache/arrow-datafusion/issues/2874) +- Error with CASE and DictionaryArrays: `ArrowError(InvalidArgumentError("arguments need to have the same data type"))` [\#2873](https://github.com/apache/arrow-datafusion/issues/2873) +- window functions not supported in expressions [\#2869](https://github.com/apache/arrow-datafusion/issues/2869) +- Unable to work with month intervals [\#2796](https://github.com/apache/arrow-datafusion/issues/2796) +- Discord invite link in communication page has expired [\#2743](https://github.com/apache/arrow-datafusion/issues/2743) +- Test \(path normalization\) failures while verifying release candidate 9.0.0 RC1 [\#2719](https://github.com/apache/arrow-datafusion/issues/2719) +- Reading parquet with \(pre-release\) arrow fails with "out of order projection is not supported" [\#2543](https://github.com/apache/arrow-datafusion/issues/2543) +- Fix SQL planner bug when resolving columns with same name as a relation [\#3003](https://github.com/apache/arrow-datafusion/pull/3003) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- fix `RowWriter` index out of bounds error [\#2968](https://github.com/apache/arrow-datafusion/pull/2968) ([comphead](https://github.com/comphead)) +- fix: support decimal statistic for row group prune [\#2966](https://github.com/apache/arrow-datafusion/pull/2966) ([liukun4515](https://github.com/liukun4515)) +- Fix invalid projection in `CommonSubexprEliminate` [\#2915](https://github.com/apache/arrow-datafusion/pull/2915) ([andygrove](https://github.com/andygrove)) + +**Documentation updates:** + +- MINOR: Fix broken links in contrib guide [\#3135](https://github.com/apache/arrow-datafusion/pull/3135) ([andygrove](https://github.com/andygrove)) +- MINOR: User Guide: Move expressions to top-level page [\#3134](https://github.com/apache/arrow-datafusion/pull/3134) ([andygrove](https://github.com/andygrove)) +- User Guide: Combine CLI pages [\#3133](https://github.com/apache/arrow-datafusion/pull/3133) ([andygrove](https://github.com/andygrove)) +- User Guide: Add documentation for JOIN syntax [\#3130](https://github.com/apache/arrow-datafusion/pull/3130) ([andygrove](https://github.com/andygrove)) +- separate contributors guide [\#3128](https://github.com/apache/arrow-datafusion/pull/3128) ([kmitchener](https://github.com/kmitchener)) +- minor: remove python docs, now they're in another project [\#3119](https://github.com/apache/arrow-datafusion/pull/3119) ([kmitchener](https://github.com/kmitchener)) +- minor: doc fixes: fix link to datafusion-python project and add link to slides for rece… [\#3118](https://github.com/apache/arrow-datafusion/pull/3118) ([kmitchener](https://github.com/kmitchener)) +- Add all scalar SQL functions to user guide [\#3090](https://github.com/apache/arrow-datafusion/pull/3090) ([andygrove](https://github.com/andygrove)) +- Add DataFrame reference to the user guide [\#3067](https://github.com/apache/arrow-datafusion/pull/3067) ([andygrove](https://github.com/andygrove)) +- MINOR: Add CeresDB to list of products using DataFusion [\#3060](https://github.com/apache/arrow-datafusion/pull/3060) ([andygrove](https://github.com/andygrove)) +- Minor: improve some docstrings about pruning [\#3041](https://github.com/apache/arrow-datafusion/pull/3041) ([alamb](https://github.com/alamb)) +- doc: add a new video link about datafusion [\#3025](https://github.com/apache/arrow-datafusion/pull/3025) ([xudong963](https://github.com/xudong963)) +- Update README.md to add CnosDB into the Known Uses [\#2933](https://github.com/apache/arrow-datafusion/pull/2933) ([cnoshb](https://github.com/cnoshb)) + +**Performance improvements:** + +- Use code points instead of grapheme clusters for string functions [\#3054](https://github.com/apache/arrow-datafusion/pull/3054) ([Dandandan](https://github.com/Dandandan)) + +**Closed issues:** + +- Rename `do_data_time_math()` to `do_date_time_math()` [\#3172](https://github.com/apache/arrow-datafusion/issues/3172) +- Automatic version updates for github actions with dependabot [\#3106](https://github.com/apache/arrow-datafusion/issues/3106) +- \[EPIC\] Proposal for Date/Time enhancement [\#3100](https://github.com/apache/arrow-datafusion/issues/3100) +- Upgrade prost/tonic everywhere [\#3028](https://github.com/apache/arrow-datafusion/issues/3028) +- \[Question\] interested in helping with documentation [\#2866](https://github.com/apache/arrow-datafusion/issues/2866) +- Introducing a new optimizer framework for datafusion. [\#2633](https://github.com/apache/arrow-datafusion/issues/2633) +- Enable discussion tab? [\#2350](https://github.com/apache/arrow-datafusion/issues/2350) +- Add support for AVG\(Timestamp\) types [\#200](https://github.com/apache/arrow-datafusion/issues/200) +- TPC-H Query 22 [\#175](https://github.com/apache/arrow-datafusion/issues/175) +- TPC-H Query 21 [\#172](https://github.com/apache/arrow-datafusion/issues/172) +- TPC-H Query 20 [\#171](https://github.com/apache/arrow-datafusion/issues/171) +- TPC-H Query 17 [\#168](https://github.com/apache/arrow-datafusion/issues/168) +- TPC-H Query 11 [\#163](https://github.com/apache/arrow-datafusion/issues/163) +- TPC-H Query 4 [\#160](https://github.com/apache/arrow-datafusion/issues/160) +- TPC-H Query 2 [\#159](https://github.com/apache/arrow-datafusion/issues/159) +- \[Datafusion\] Optimize literal expression evaluation [\#106](https://github.com/apache/arrow-datafusion/issues/106) + +**Merged pull requests:** + +- Rename do_data_time_math\(\) to do_date_time_math\(\) [\#3173](https://github.com/apache/arrow-datafusion/pull/3173) ([JasonLi-cn](https://github.com/JasonLi-cn)) +- \[Minor\] Remove some redundant code [\#3169](https://github.com/apache/arrow-datafusion/pull/3169) ([alamb](https://github.com/alamb)) +- Support `INTEGER` again in addition to `INT` in `CREATE TABLE` and `CAST` statements [\#3167](https://github.com/apache/arrow-datafusion/pull/3167) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fix regression in SQL parser related to resolution of aliased expressions [\#3165](https://github.com/apache/arrow-datafusion/pull/3165) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- update cargo lock [\#3164](https://github.com/apache/arrow-datafusion/pull/3164) ([waitingkuo](https://github.com/waitingkuo)) +- add test case for cast_timestamp_before_1970 [\#3163](https://github.com/apache/arrow-datafusion/pull/3163) ([waitingkuo](https://github.com/waitingkuo)) +- Return proper error message for ill formed variable reference [\#3162](https://github.com/apache/arrow-datafusion/pull/3162) ([alamb](https://github.com/alamb)) +- Remove outdated license text left over from arrow repo [\#3154](https://github.com/apache/arrow-datafusion/pull/3154) ([alamb](https://github.com/alamb)) +- Expose RowAccumulator in physical_plan [\#3151](https://github.com/apache/arrow-datafusion/pull/3151) ([iajoiner](https://github.com/iajoiner)) +- Rename `DateIntervalExpr` to `DateTimeIntervalExpr` [\#3150](https://github.com/apache/arrow-datafusion/pull/3150) ([alamb](https://github.com/alamb)) +- Bump actions/labeler from 4.0.0 to 4.0.1 [\#3144](https://github.com/apache/arrow-datafusion/pull/3144) ([dependabot[bot]](https://github.com/apps/dependabot)) +- User Guide: Add documentation for subquery syntax [\#3132](https://github.com/apache/arrow-datafusion/pull/3132) ([andygrove](https://github.com/andygrove)) +- MINOR: User Guide: Move Data Types and Information Schema to their own pages [\#3131](https://github.com/apache/arrow-datafusion/pull/3131) ([andygrove](https://github.com/andygrove)) +- Minor: Clean up `array` test [\#3121](https://github.com/apache/arrow-datafusion/pull/3121) ([alamb](https://github.com/alamb)) +- add arrow_typeof [\#3120](https://github.com/apache/arrow-datafusion/pull/3120) ([waitingkuo](https://github.com/waitingkuo)) +- Bump actions/labeler from 2.2.0 to 4.0.0 [\#3114](https://github.com/apache/arrow-datafusion/pull/3114) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/checkout from 2 to 3 [\#3113](https://github.com/apache/arrow-datafusion/pull/3113) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/setup-node from 2 to 3 [\#3112](https://github.com/apache/arrow-datafusion/pull/3112) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/setup-python from 3 to 4 [\#3111](https://github.com/apache/arrow-datafusion/pull/3111) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Feature/support timestamp plus minus interval [\#3110](https://github.com/apache/arrow-datafusion/pull/3110) ([JasonLi-cn](https://github.com/JasonLi-cn)) +- docs: fix typo [\#3109](https://github.com/apache/arrow-datafusion/pull/3109) ([dzvon](https://github.com/dzvon)) +- Remove offset if its zero [\#3102](https://github.com/apache/arrow-datafusion/pull/3102) ([turbo1912](https://github.com/turbo1912)) +- Hash binary values [\#3098](https://github.com/apache/arrow-datafusion/pull/3098) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) +- Update to object_store 0.4 [\#3089](https://github.com/apache/arrow-datafusion/pull/3089) ([tustvold](https://github.com/tustvold)) +- Add cast function for creating cast expression [\#3084](https://github.com/apache/arrow-datafusion/pull/3084) ([turbo1912](https://github.com/turbo1912)) +- Upgrade to arrow 20.0.0 \(but no change to object_store\), including `prost`, and `tonic` [\#3083](https://github.com/apache/arrow-datafusion/pull/3083) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- impl Debug for ColumnarValue, add some docs [\#3076](https://github.com/apache/arrow-datafusion/pull/3076) ([alamb](https://github.com/alamb)) +- \[Minor\] run cargo update in datafusion-cli directory [\#3075](https://github.com/apache/arrow-datafusion/pull/3075) ([alamb](https://github.com/alamb)) +- update cargo.lock in `datafusion-cli` [\#3074](https://github.com/apache/arrow-datafusion/pull/3074) ([waitingkuo](https://github.com/waitingkuo)) +- Update sql parser to v0.20.0 [\#3072](https://github.com/apache/arrow-datafusion/pull/3072) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- Add opening, scanning, processing metrics in file stream [\#3070](https://github.com/apache/arrow-datafusion/pull/3070) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Simplify `approx_median` implementation, expose via `DataFrame` API [\#3064](https://github.com/apache/arrow-datafusion/pull/3064) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- docs: fix PruningStatistics example and some typos [\#3062](https://github.com/apache/arrow-datafusion/pull/3062) ([roeap](https://github.com/roeap)) +- feat: support double quoted literal strings for dialects\(such as mysql,bigquery,spark\) [\#3056](https://github.com/apache/arrow-datafusion/pull/3056) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Rachelint](https://github.com/Rachelint)) +- Allow Overriding AsyncFileReader used by ParquetExec [\#3051](https://github.com/apache/arrow-datafusion/pull/3051) ([Cheappie](https://github.com/Cheappie)) +- to_timestamp i32 coerced to i64 [\#3047](https://github.com/apache/arrow-datafusion/pull/3047) ([waitingkuo](https://github.com/waitingkuo)) +- Fix `IsNull` pruning expression generation without null_count statistics [\#3044](https://github.com/apache/arrow-datafusion/pull/3044) ([alamb](https://github.com/alamb)) +- feat: Support `week`, `decade`, `century` for Interval literal [\#3038](https://github.com/apache/arrow-datafusion/pull/3038) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) +- feat: Support Binary bitwise shift operators \(\<\< and \>\>\) [\#3037](https://github.com/apache/arrow-datafusion/pull/3037) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) +- Use concat_elements_utf8 from arrow rather than custom kernel [\#3036](https://github.com/apache/arrow-datafusion/pull/3036) ([alamb](https://github.com/alamb)) +- minor: update minimal rust version to 1.62, matching arrow-rs [\#3035](https://github.com/apache/arrow-datafusion/pull/3035) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) +- feat: Add `date_bin` built-in function [\#3034](https://github.com/apache/arrow-datafusion/pull/3034) ([stuartcarnie](https://github.com/stuartcarnie)) +- Split `binary_expr.rs` into smaller modules [\#3026](https://github.com/apache/arrow-datafusion/pull/3026) ([alamb](https://github.com/alamb)) +- feat: Enable typed strings expressions for VALUES clause [\#3018](https://github.com/apache/arrow-datafusion/pull/3018) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([stuartcarnie](https://github.com/stuartcarnie)) +- fix typo for PR3003 [\#3011](https://github.com/apache/arrow-datafusion/pull/3011) ([waitingkuo](https://github.com/waitingkuo)) +- feat: Add support for TIME literal values [\#3010](https://github.com/apache/arrow-datafusion/pull/3010) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([stuartcarnie](https://github.com/stuartcarnie)) +- add TimeUnit::Second as signature for ToTimestampSeconds [\#3004](https://github.com/apache/arrow-datafusion/pull/3004) ([waitingkuo](https://github.com/waitingkuo)) +- Rename FileReader to FileOpener \(\#2990\) [\#2991](https://github.com/apache/arrow-datafusion/pull/2991) ([tustvold](https://github.com/tustvold)) +- minor: collation the prune test [\#2986](https://github.com/apache/arrow-datafusion/pull/2986) ([liukun4515](https://github.com/liukun4515)) +- Optionally skip metadata from schema when merging parquet files [\#2985](https://github.com/apache/arrow-datafusion/pull/2985) ([alamb](https://github.com/alamb)) +- \[Minor\] Extract interval parsing logic, add unit tests [\#2984](https://github.com/apache/arrow-datafusion/pull/2984) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Update sqlparser to 0.19 [\#2981](https://github.com/apache/arrow-datafusion/pull/2981) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- test: add file/SQL level test for pruning parquet row group with decimal data type. [\#2977](https://github.com/apache/arrow-datafusion/pull/2977) ([liukun4515](https://github.com/liukun4515)) +- Derive Hash for JoinType [\#2972](https://github.com/apache/arrow-datafusion/pull/2972) ([liurenjie1024](https://github.com/liurenjie1024)) +- Example that shows how to convert query result into rust struct \#2959 [\#2969](https://github.com/apache/arrow-datafusion/pull/2969) ([thomas-k-cameron](https://github.com/thomas-k-cameron)) +- Add baseline_metrics for FileStream to record metrics like elapsed ti… [\#2965](https://github.com/apache/arrow-datafusion/pull/2965) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- test: add test for decimal and pruning for decimal column [\#2960](https://github.com/apache/arrow-datafusion/pull/2960) ([liukun4515](https://github.com/liukun4515)) +- Simplify expressions with `NOT` clause [\#2958](https://github.com/apache/arrow-datafusion/pull/2958) ([AssHero](https://github.com/AssHero)) +- chore: update jit-related dependencies [\#2956](https://github.com/apache/arrow-datafusion/pull/2956) ([xudong963](https://github.com/xudong963)) +- Update to arrow `19.0.0` [\#2955](https://github.com/apache/arrow-datafusion/pull/2955) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Remove CI Caching to preserve diskspace [\#2948](https://github.com/apache/arrow-datafusion/pull/2948) ([alamb](https://github.com/alamb)) +- Add metadata_size_hint for optimistic fetching of parquet metadata [\#2946](https://github.com/apache/arrow-datafusion/pull/2946) ([thinkharderdev](https://github.com/thinkharderdev)) +- Minor: Remove left over debugging statement [\#2944](https://github.com/apache/arrow-datafusion/pull/2944) ([alamb](https://github.com/alamb)) +- add Atan2 [\#2942](https://github.com/apache/arrow-datafusion/pull/2942) ([waitingkuo](https://github.com/waitingkuo)) +- Use `Arc` and remove ObjectStoreRegistry::clone [\#2941](https://github.com/apache/arrow-datafusion/pull/2941) ([tustvold](https://github.com/tustvold)) +- add extension system to `SessionConfig` [\#2940](https://github.com/apache/arrow-datafusion/pull/2940) ([crepererum](https://github.com/crepererum)) +- Update prost-build requirement from 0.7 to 0.10 [\#2937](https://github.com/apache/arrow-datafusion/pull/2937) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add streaming JSON and CSV reading, `NewlineDelimitedStream' \(\#2935\) [\#2936](https://github.com/apache/arrow-datafusion/pull/2936) ([tustvold](https://github.com/tustvold)) +- feat\(catalog\): Implement information_schema.views [\#2934](https://github.com/apache/arrow-datafusion/pull/2934) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([BaymaxHWY](https://github.com/BaymaxHWY)) +- Support `window` functions in expressions by re-write projection after building window plan [\#2932](https://github.com/apache/arrow-datafusion/pull/2932) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) +- Add pow as synonym for power [\#2927](https://github.com/apache/arrow-datafusion/pull/2927) ([andygrove](https://github.com/andygrove)) +- Add `from_unixtime` function [\#2924](https://github.com/apache/arrow-datafusion/pull/2924) ([waitingkuo](https://github.com/waitingkuo)) +- fix\(aggregate\): support mean as synonym avg [\#2923](https://github.com/apache/arrow-datafusion/pull/2923) ([BaymaxHWY](https://github.com/BaymaxHWY)) +- Add `DataFrame::with_column_renamed` [\#2920](https://github.com/apache/arrow-datafusion/pull/2920) ([andygrove](https://github.com/andygrove)) +- Run clippy with optional features [\#2918](https://github.com/apache/arrow-datafusion/pull/2918) ([tustvold](https://github.com/tustvold)) +- Fix release verification script by not overriding `ARROW_TEST_DATA` or `PARQUET_TEST_DATA` [\#2917](https://github.com/apache/arrow-datafusion/pull/2917) ([alamb](https://github.com/alamb)) +- Move `ScalarValue` tests alongside implementation, move `from_slice` to `datafusion_core` [\#2914](https://github.com/apache/arrow-datafusion/pull/2914) ([alamb](https://github.com/alamb)) +- Optimizer should have option to skip failing rules [\#2909](https://github.com/apache/arrow-datafusion/pull/2909) ([andygrove](https://github.com/andygrove)) +- Introduce ObjectStoreProvider to create an object store based on the url [\#2906](https://github.com/apache/arrow-datafusion/pull/2906) ([yahoNanJing](https://github.com/yahoNanJing)) +- Remove datafusion-data-access crate [\#2904](https://github.com/apache/arrow-datafusion/pull/2904) ([yahoNanJing](https://github.com/yahoNanJing)) +- Combine all comparison coercion rules [\#2901](https://github.com/apache/arrow-datafusion/pull/2901) ([andygrove](https://github.com/andygrove)) +- Add `Projection::try_new` and `Projection::try_new_with_schema` [\#2900](https://github.com/apache/arrow-datafusion/pull/2900) ([andygrove](https://github.com/andygrove)) +- Improve formatting of logical plans containing subqueries [\#2899](https://github.com/apache/arrow-datafusion/pull/2899) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- add session option 'datafusion.explain.logical_plan'. when set to true, the explain statement will only print logical plans. [\#2895](https://github.com/apache/arrow-datafusion/pull/2895) ([AssHero](https://github.com/AssHero)) +- Preserve field name in `ScalarValue::List` [\#2893](https://github.com/apache/arrow-datafusion/pull/2893) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([comphead](https://github.com/comphead)) +- Adds optional serde support to datafusion-proto [\#2892](https://github.com/apache/arrow-datafusion/pull/2892) ([tustvold](https://github.com/tustvold)) +- Implement `ScalarValue::Dictionary` and preserve type through conversion back/forth to Array [\#2891](https://github.com/apache/arrow-datafusion/pull/2891) ([alamb](https://github.com/alamb)) +- Add an ID generator in preparation for PR 2885 [\#2887](https://github.com/apache/arrow-datafusion/pull/2887) ([avantgardnerio](https://github.com/avantgardnerio)) +- Add support for correlated subqueries & fix all related TPC-H benchmark issues [\#2885](https://github.com/apache/arrow-datafusion/pull/2885) ([avantgardnerio](https://github.com/avantgardnerio)) +- fix\(doc\): update test directory link in CONTRIBUTING.md [\#2882](https://github.com/apache/arrow-datafusion/pull/2882) ([BaymaxHWY](https://github.com/BaymaxHWY)) +- Add h2o bench groupby queries [\#2881](https://github.com/apache/arrow-datafusion/pull/2881) ([andygrove](https://github.com/andygrove)) +- Add support for month & year intervals [\#2797](https://github.com/apache/arrow-datafusion/pull/2797) ([avantgardnerio](https://github.com/avantgardnerio)) +- Migrate from avro_rs \(0.13\) to apache_avro \(0.14\) [\#2784](https://github.com/apache/arrow-datafusion/pull/2784) ([martin-g](https://github.com/martin-g)) diff --git a/dev/changelog/12.0.0.md b/dev/changelog/12.0.0.md new file mode 100644 index 0000000000000..c195a30cc9dc8 --- /dev/null +++ b/dev/changelog/12.0.0.md @@ -0,0 +1,246 @@ + + +## [12.0.0](https://github.com/apache/arrow-datafusion/tree/12.0.0) (2022-09-12) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/11.0.0...12.0.0) + +**Breaking changes:** + +- Pass `return_type` to `AccumulatorFunctionImplementation ` for user defined aggregates [\#3428](https://github.com/apache/arrow-datafusion/pull/3428) ([alamb](https://github.com/alamb)) +- Use `usize` rather than `Option` to represent `Limit::skip`and `Limit::offset` [\#3374](https://github.com/apache/arrow-datafusion/pull/3374) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Deprecate legacy datafusion::logical_plan module [\#3338](https://github.com/apache/arrow-datafusion/pull/3338) ([andygrove](https://github.com/andygrove)) +- Update signature for Expr.name so that schema is no longer required [\#3336](https://github.com/apache/arrow-datafusion/pull/3336) ([andygrove](https://github.com/andygrove)) +- MINOR: rename optimizer rule to ScalarSubqueryToJoin [\#3306](https://github.com/apache/arrow-datafusion/pull/3306) ([kmitchener](https://github.com/kmitchener)) +- Add top-level `Like`, `ILike`, `SimilarTo` expressions in logical plan [\#3298](https://github.com/apache/arrow-datafusion/pull/3298) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Upgrade to sqlparser 0.22 [\#3278](https://github.com/apache/arrow-datafusion/pull/3278) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- `Expr` variants for boolean operations [\#3275](https://github.com/apache/arrow-datafusion/pull/3275) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([sarahyurick](https://github.com/sarahyurick)) +- Upgrade to sqlparser 0.21 [\#3200](https://github.com/apache/arrow-datafusion/pull/3200) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Add SQL planner support for `Like`, `ILike` and `SimilarTo`, with optional escape character [\#3101](https://github.com/apache/arrow-datafusion/pull/3101) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) + +**Implemented enhancements:** + +- support `cast` inside `values` [\#3446](https://github.com/apache/arrow-datafusion/issues/3446) +- update TPCH test schemas to use Decimal128 from Float [\#3435](https://github.com/apache/arrow-datafusion/issues/3435) +- Include Bitwise operators in the documentation [\#3434](https://github.com/apache/arrow-datafusion/issues/3434) +- How to read excel file with datafusion? [\#3433](https://github.com/apache/arrow-datafusion/issues/3433) +- Pass return type to the accumulator state factory in aggregates [\#3427](https://github.com/apache/arrow-datafusion/issues/3427) +- Support bitwise XOR operator \(`#`\) [\#3420](https://github.com/apache/arrow-datafusion/issues/3420) +- support InList with datatype Date32 [\#3412](https://github.com/apache/arrow-datafusion/issues/3412) +- add simplification for `between` expression during logical plan optimization [\#3402](https://github.com/apache/arrow-datafusion/issues/3402) +- Replace From trait with TryFrom trait for datafusion-proto crate [\#3401](https://github.com/apache/arrow-datafusion/issues/3401) +- update TPC-H benchmark to Decimal types from Float [\#3392](https://github.com/apache/arrow-datafusion/issues/3392) +- Use `usize` to represent `Limit::skip` [\#3369](https://github.com/apache/arrow-datafusion/issues/3369) +- Avoid coping in `LogicalPlan::expressions` [\#3368](https://github.com/apache/arrow-datafusion/issues/3368) +- Upgrade to Arrow 22 [\#3362](https://github.com/apache/arrow-datafusion/issues/3362) +- Eliminate `OFFSET 0` in the logical plan optimization [\#3355](https://github.com/apache/arrow-datafusion/issues/3355) +- Add ability to get unoptimized logical plan from DataFrame [\#3340](https://github.com/apache/arrow-datafusion/issues/3340) +- Allow IDEs to recognize generated code [\#3332](https://github.com/apache/arrow-datafusion/issues/3332) +- `CAST` should not change the name of an expression [\#3326](https://github.com/apache/arrow-datafusion/issues/3326) +- add SQL support for unsigned integers [\#3325](https://github.com/apache/arrow-datafusion/issues/3325) +- Review use of panic in `datafusion-proto` crate [\#3318](https://github.com/apache/arrow-datafusion/issues/3318) +- Review use of panic in `datafusion-sql` crate [\#3315](https://github.com/apache/arrow-datafusion/issues/3315) +- Review use of panic in `datafusion-optimizer` crate [\#3314](https://github.com/apache/arrow-datafusion/issues/3314) +- Review use of panic in `datafusion-expr` crate [\#3312](https://github.com/apache/arrow-datafusion/issues/3312) +- Support registration of custom TableProviders through SQL [\#3310](https://github.com/apache/arrow-datafusion/issues/3310) +- Support binary data in sha hash functions [\#3308](https://github.com/apache/arrow-datafusion/issues/3308) +- add SQL support for tinyint and unsigned versions of all INTs [\#3307](https://github.com/apache/arrow-datafusion/issues/3307) +- Support binary types in InList expression [\#3300](https://github.com/apache/arrow-datafusion/issues/3300) +- Physical planner should map `IsTrue` and similar expressions to `IsDistinctFrom` [\#3288](https://github.com/apache/arrow-datafusion/issues/3288) +- Introduce physical plan version of `Operator` enum [\#3269](https://github.com/apache/arrow-datafusion/issues/3269) +- Introduce `Expr` variants for `IS [NOT] TRUE / FALSE / UNKNOWN` [\#3268](https://github.com/apache/arrow-datafusion/issues/3268) +- Add support for non-correlated subqueries [\#3266](https://github.com/apache/arrow-datafusion/issues/3266) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] +- \(Re-\)add support for glob patterns in ListingTableUrl [\#3261](https://github.com/apache/arrow-datafusion/issues/3261) +- `PreCastLitInComparisonExpressions` should use ExprRewriter and supported nested expressions [\#3259](https://github.com/apache/arrow-datafusion/issues/3259) +- implement `DROP VIEW` [\#3251](https://github.com/apache/arrow-datafusion/issues/3251) +- Upgrade to Arrow 21 [\#3224](https://github.com/apache/arrow-datafusion/issues/3224) +- Add TypeCoercion optimizer rule [\#3221](https://github.com/apache/arrow-datafusion/issues/3221) +- Create bench for approx_percentile_cont aggregate [\#3217](https://github.com/apache/arrow-datafusion/issues/3217) +- Add SQL query planner support for `DISTRIBUTED BY` [\#3207](https://github.com/apache/arrow-datafusion/issues/3207) +- Support "IS \[NOT\] UNKNOWN" syntax [\#3195](https://github.com/apache/arrow-datafusion/issues/3195) +- sqlparser 0.21 upgrade [\#3192](https://github.com/apache/arrow-datafusion/issues/3192) +- Re-implement parsing/planning for SHOW TABLES due to sqlparser changes [\#3188](https://github.com/apache/arrow-datafusion/issues/3188) +- Support `SUM` `AVG`, `MIN`, `MAX` on `Time` columns. [\#3166](https://github.com/apache/arrow-datafusion/issues/3166) +- Support "IS TRUE/FALSE" syntax [\#3159](https://github.com/apache/arrow-datafusion/issues/3159) +- Support number of histogram bins in approx_percentile_cont [\#3145](https://github.com/apache/arrow-datafusion/issues/3145) +- Support create ApproxPercentileAccumulator with TDigest max_size [\#3142](https://github.com/apache/arrow-datafusion/issues/3142) +- Remove support for `array` function and only support `array[]` style postgres syntax [\#3115](https://github.com/apache/arrow-datafusion/issues/3115) +- Allow inline column aliases for create view [\#3108](https://github.com/apache/arrow-datafusion/issues/3108) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] +- Add support for Postgres `SIMILAR TO` and `ILIKE` syntax [\#3099](https://github.com/apache/arrow-datafusion/issues/3099) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] +- Update SQL reference in user guide to cover all supported syntax [\#3091](https://github.com/apache/arrow-datafusion/issues/3091) +- DataFusion prelude should import all logical expression functions [\#3068](https://github.com/apache/arrow-datafusion/issues/3068) +- Proposal: Add similar to operator [\#3016](https://github.com/apache/arrow-datafusion/issues/3016) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] +- Release DataFusion 11.0.0 [\#3012](https://github.com/apache/arrow-datafusion/issues/3012) +- Implement "SHOW CREATE TABLE" for external tables [\#2848](https://github.com/apache/arrow-datafusion/issues/2848) +- Change java package names in protobuf files [\#2513](https://github.com/apache/arrow-datafusion/issues/2513) +- When creating `DFField` from `Expr` we should provide input plan not input schema [\#2456](https://github.com/apache/arrow-datafusion/issues/2456) +- Support "IS NOT TRUE/FALSE" syntax [\#2265](https://github.com/apache/arrow-datafusion/issues/2265) +- RFC: Spill-To-Disk Object Storage Download [\#2205](https://github.com/apache/arrow-datafusion/issues/2205) +- Support for BitwiseAnd `&`, BitOr `|` binary operators [\#1619](https://github.com/apache/arrow-datafusion/issues/1619) +- \[Question\] Usage of async object store APIs in consuming code [\#1313](https://github.com/apache/arrow-datafusion/issues/1313) +- Allow User Defined Aggregates to return multiple values / structs [\#600](https://github.com/apache/arrow-datafusion/issues/600) +- Implement vectorized hashing for dictionary types [\#331](https://github.com/apache/arrow-datafusion/issues/331) + +**Fixed bugs:** + +- Intermittent build error when changing selected features [\#3366](https://github.com/apache/arrow-datafusion/issues/3366) +- `sql::timestamp::timestamp_add_interval_months` failing since September 1st [\#3327](https://github.com/apache/arrow-datafusion/issues/3327) +- `sql::timestamp::timestamp_add_interval_months` test fails [\#3322](https://github.com/apache/arrow-datafusion/issues/3322) +- test case `timestamp_add_interval_months` failed on master branch [\#3321](https://github.com/apache/arrow-datafusion/issues/3321) +- datafusion-proto does not support untyped null scalar values [\#3302](https://github.com/apache/arrow-datafusion/issues/3302) +- `ConfigOptions` creation is slow [\#3295](https://github.com/apache/arrow-datafusion/issues/3295) +- FilterPushDown optimization through UNION ALL results in SchemaError [\#3281](https://github.com/apache/arrow-datafusion/issues/3281) +- Execute LogicalPlans after building for TPCH Benchmarks [\#3273](https://github.com/apache/arrow-datafusion/issues/3273) +- `CREATE TABLE` should return empty DataFrame [\#3265](https://github.com/apache/arrow-datafusion/issues/3265) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] +- `CREATE EXTERNAL TABLE` from CSV creates a table with no columns if there is just a header row [\#3263](https://github.com/apache/arrow-datafusion/issues/3263) +- View TableProvider ignores projections, resulting in invalid plans [\#3240](https://github.com/apache/arrow-datafusion/issues/3240) +- CREATE VIEW should return an empty dataframe on success [\#3236](https://github.com/apache/arrow-datafusion/issues/3236) +- `DISTRIBUTE BY` expressions get removed during optimization [\#3234](https://github.com/apache/arrow-datafusion/issues/3234) +- datafusion cannot recognize chinese charactors. [\#3203](https://github.com/apache/arrow-datafusion/issues/3203) +- Panicked at 'byte index 1 is out of bounds on invalid query [\#3190](https://github.com/apache/arrow-datafusion/issues/3190) +- `like_nlike_with_null_lt` fails with latest sqlparser code [\#3187](https://github.com/apache/arrow-datafusion/issues/3187) +- Interval Literal output inconsistent date_type [\#3180](https://github.com/apache/arrow-datafusion/issues/3180) +- `array` function allows different data types [\#3123](https://github.com/apache/arrow-datafusion/issues/3123) +- eq operator doesn't work on binary data [\#3117](https://github.com/apache/arrow-datafusion/issues/3117) +- incorrect `where` clause comparison while using table alias [\#3073](https://github.com/apache/arrow-datafusion/issues/3073) +- Some functions are incorrectly declared as unary [\#3069](https://github.com/apache/arrow-datafusion/issues/3069) +- once now\(\) is called in a statement, it forever returns the same value [\#3057](https://github.com/apache/arrow-datafusion/issues/3057) +- single_distinct_to_groupby panic when group by expr is a binaryExpr [\#2994](https://github.com/apache/arrow-datafusion/issues/2994) +- Cannot have `order by` expression that references complex `group by` expression [\#2360](https://github.com/apache/arrow-datafusion/issues/2360) +- Fix some bugs in TypeCoercion rule [\#3407](https://github.com/apache/arrow-datafusion/pull/3407) ([andygrove](https://github.com/andygrove)) +- MINOR: Stop ignoring `AggregateFunction::distinct` in protobuf serde code [\#3250](https://github.com/apache/arrow-datafusion/pull/3250) ([andygrove](https://github.com/andygrove)) +- Add assertion for invariant in `create_physical_expression` and fix ViewTable projection [\#3242](https://github.com/apache/arrow-datafusion/pull/3242) ([andygrove](https://github.com/andygrove)) +- Fix bug where optimizer was removing `Partitioning::DistributeBy` expressions [\#3229](https://github.com/apache/arrow-datafusion/pull/3229) ([andygrove](https://github.com/andygrove)) + +**Documentation updates:** + +- \[minor\] add Coverage Status in readme [\#3220](https://github.com/apache/arrow-datafusion/pull/3220) ([Ted-Jiang](https://github.com/Ted-Jiang)) + +**Closed issues:** + +- Add `\i` command to datafusion-cli [\#1906](https://github.com/apache/arrow-datafusion/issues/1906) +- TPC-H Query 15 [\#166](https://github.com/apache/arrow-datafusion/issues/166) + +**Merged pull requests:** + +- minor: fix some typo. [\#3453](https://github.com/apache/arrow-datafusion/pull/3453) ([jackwener](https://github.com/jackwener)) +- Update criterion requirement from 0.3 to 0.4 [\#3452](https://github.com/apache/arrow-datafusion/pull/3452) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update object_store requirement from 0.4.0 to 0.5.0 [\#3451](https://github.com/apache/arrow-datafusion/pull/3451) ([dependabot[bot]](https://github.com/apps/dependabot)) +- add `cast` support inside `values` [\#3447](https://github.com/apache/arrow-datafusion/pull/3447) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) +- Use hash repartitioning for aggregates on dictionaries [\#3445](https://github.com/apache/arrow-datafusion/pull/3445) ([isidentical](https://github.com/isidentical)) +- Review `unwrap` and `panic` from the `aggregate` directory of `datafusion-physical-expr` [\#3443](https://github.com/apache/arrow-datafusion/pull/3443) ([iajoiner](https://github.com/iajoiner)) +- MINOR: Implement protobuf serde for all binary operators [\#3441](https://github.com/apache/arrow-datafusion/pull/3441) ([andygrove](https://github.com/andygrove)) +- MINOR: Add accessor methods to DateTimeIntervalExpr [\#3440](https://github.com/apache/arrow-datafusion/pull/3440) ([andygrove](https://github.com/andygrove)) +- update TPCH-mimicking tests to Decimal data type from Float, matching the benchmark [\#3438](https://github.com/apache/arrow-datafusion/pull/3438) ([kmitchener](https://github.com/kmitchener)) +- Include Bitwise operators in the documentation [\#3436](https://github.com/apache/arrow-datafusion/pull/3436) ([askoa](https://github.com/askoa)) +- minor: make sql number parsing slightly more efficient + functional [\#3432](https://github.com/apache/arrow-datafusion/pull/3432) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Implement bitwise XOR operator \(`#`\) [\#3430](https://github.com/apache/arrow-datafusion/pull/3430) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([askoa](https://github.com/askoa)) +- Replace From trait with TryFrom trait for datafusion-proto crate \#3401 [\#3429](https://github.com/apache/arrow-datafusion/pull/3429) ([comphead](https://github.com/comphead)) +- Tests showing user defined aggregate returning a struct [\#3425](https://github.com/apache/arrow-datafusion/pull/3425) ([alamb](https://github.com/alamb)) +- MINOR: update optimizer rule names to be consistent style as the rest [\#3415](https://github.com/apache/arrow-datafusion/pull/3415) ([kmitchener](https://github.com/kmitchener)) +- Support date32 and date 64 in inlist node [\#3413](https://github.com/apache/arrow-datafusion/pull/3413) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Update sqlparser requirement from 0.22 to 0.23 [\#3411](https://github.com/apache/arrow-datafusion/pull/3411) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- simplify the `between` expr during logical plan optimization [\#3404](https://github.com/apache/arrow-datafusion/pull/3404) ([kmitchener](https://github.com/kmitchener)) +- MINOR: Improve optimizer error [\#3403](https://github.com/apache/arrow-datafusion/pull/3403) ([andygrove](https://github.com/andygrove)) +- Review panics in the sql crate [\#3397](https://github.com/apache/arrow-datafusion/pull/3397) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- changed TPC-H benchmark to use Decimal types [\#3393](https://github.com/apache/arrow-datafusion/pull/3393) ([kmitchener](https://github.com/kmitchener)) +- minor: remove redundant code. [\#3389](https://github.com/apache/arrow-datafusion/pull/3389) ([jackwener](https://github.com/jackwener)) +- Add dictionary cases to merge bench [\#3384](https://github.com/apache/arrow-datafusion/pull/3384) ([tustvold](https://github.com/tustvold)) +- Implement Eq trait for Expr and nested types [\#3381](https://github.com/apache/arrow-datafusion/pull/3381) ([jdye64](https://github.com/jdye64)) +- Minor: Improvements to type coercion rule [\#3379](https://github.com/apache/arrow-datafusion/pull/3379) ([alamb](https://github.com/alamb)) +- MINOR: Note that most communication happens on github [\#3375](https://github.com/apache/arrow-datafusion/pull/3375) ([alamb](https://github.com/alamb)) +- minor fix: clean data type for negative operation [\#3370](https://github.com/apache/arrow-datafusion/pull/3370) ([liukun4515](https://github.com/liukun4515)) +- Fix code generation for json feature [\#3367](https://github.com/apache/arrow-datafusion/pull/3367) ([avantgardnerio](https://github.com/avantgardnerio)) +- Review use of panic in datafusion-proto crate [\#3365](https://github.com/apache/arrow-datafusion/pull/3365) ([comphead](https://github.com/comphead)) +- Upgrade to arrow 22 [\#3363](https://github.com/apache/arrow-datafusion/pull/3363) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- return empty dataframe on create table, remove a duplicate optimize call [\#3361](https://github.com/apache/arrow-datafusion/pull/3361) ([kmitchener](https://github.com/kmitchener)) +- Add SQL support for `tinyint` , `smallint`, and `unsigned int variants` [\#3359](https://github.com/apache/arrow-datafusion/pull/3359) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) +- Minor: add hint in README of example [\#3358](https://github.com/apache/arrow-datafusion/pull/3358) ([jackwener](https://github.com/jackwener)) +- Collect to `HashSet` directly in `in_list` [\#3356](https://github.com/apache/arrow-datafusion/pull/3356) ([HaoYang670](https://github.com/HaoYang670)) +- MINOR: Add comments about rewrite_disjunctive_predicate [\#3351](https://github.com/apache/arrow-datafusion/pull/3351) ([alamb](https://github.com/alamb)) +- \[MINOR\] Add debug logging to plan teardown [\#3350](https://github.com/apache/arrow-datafusion/pull/3350) ([alamb](https://github.com/alamb)) +- MINOR: add df.to_unoptimized_plan\(\) to docs, remove erroneous comment [\#3348](https://github.com/apache/arrow-datafusion/pull/3348) ([kmitchener](https://github.com/kmitchener)) +- Replace `unwrap` in `convert_to_ordered_float` and add `downcast_value` [\#3347](https://github.com/apache/arrow-datafusion/pull/3347) ([iajoiner](https://github.com/iajoiner)) +- Remove panics from `common_subexpr_eliminate` [\#3346](https://github.com/apache/arrow-datafusion/pull/3346) ([andygrove](https://github.com/andygrove)) +- Remove Result.unwrap from single_distinct_to_groupby [\#3345](https://github.com/apache/arrow-datafusion/pull/3345) ([andygrove](https://github.com/andygrove)) +- Add to_unoptimized_plan [\#3344](https://github.com/apache/arrow-datafusion/pull/3344) ([iajoiner](https://github.com/iajoiner)) +- Remove panics from simplify_expressions optimizer rule [\#3343](https://github.com/apache/arrow-datafusion/pull/3343) ([andygrove](https://github.com/andygrove)) +- Remove `unreachable!` from filter push down rule [\#3342](https://github.com/apache/arrow-datafusion/pull/3342) ([andygrove](https://github.com/andygrove)) +- Replace panic in `datafusion-expr` crate [\#3341](https://github.com/apache/arrow-datafusion/pull/3341) ([iajoiner](https://github.com/iajoiner)) +- Re-implement ExprIdentifierVisitor::desc_expr to use Expr::Display [\#3339](https://github.com/apache/arrow-datafusion/pull/3339) ([andygrove](https://github.com/andygrove)) +- Fix the test`timestamp_add_interval_months` [\#3337](https://github.com/apache/arrow-datafusion/pull/3337) ([HaoYang670](https://github.com/HaoYang670)) +- Bump lz4-sys from 1.9.3 to 1.9.4 in /datafusion-cli [\#3335](https://github.com/apache/arrow-datafusion/pull/3335) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Make binary operator formatting consistent between logical and physical plans [\#3331](https://github.com/apache/arrow-datafusion/pull/3331) ([andygrove](https://github.com/andygrove)) +- Fix build: Ignore failing test [\#3329](https://github.com/apache/arrow-datafusion/pull/3329) ([andygrove](https://github.com/andygrove)) +- Add `InList` support for binary type. [\#3324](https://github.com/apache/arrow-datafusion/pull/3324) ([HaoYang670](https://github.com/HaoYang670)) +- MINOR: add github action trigger [\#3323](https://github.com/apache/arrow-datafusion/pull/3323) ([waynexia](https://github.com/waynexia)) +- add explain sql test for optimizer rule PreCastLitInComparisonExpressions [\#3320](https://github.com/apache/arrow-datafusion/pull/3320) ([liukun4515](https://github.com/liukun4515)) +- Custom / Dynamic table provider factories [\#3311](https://github.com/apache/arrow-datafusion/pull/3311) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- fix: alias group_by exprs in single_distinct_to_groupby optimizer [\#3305](https://github.com/apache/arrow-datafusion/pull/3305) ([waynexia](https://github.com/waynexia)) +- Add support for serializing null scalar values [\#3303](https://github.com/apache/arrow-datafusion/pull/3303) ([andygrove](https://github.com/andygrove)) +- Finish integrating `Expr::Is[Not]True` and similar expressions [\#3301](https://github.com/apache/arrow-datafusion/pull/3301) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- MINOR: Remove `unwrap` calls from `single_distinct_to_groupby optimizer` rule [\#3299](https://github.com/apache/arrow-datafusion/pull/3299) ([andygrove](https://github.com/andygrove)) +- docs: update the Python library repository [\#3297](https://github.com/apache/arrow-datafusion/pull/3297) ([haoxins](https://github.com/haoxins)) +- fix: speed up `ConfigOptions` creation [\#3296](https://github.com/apache/arrow-datafusion/pull/3296) ([crepererum](https://github.com/crepererum)) +- Execute LogicalPlans after building for TPCH Benchmarks [\#3290](https://github.com/apache/arrow-datafusion/pull/3290) ([DaltonModlin](https://github.com/DaltonModlin)) +- support for non-correlated subqueries [\#3287](https://github.com/apache/arrow-datafusion/pull/3287) ([kmitchener](https://github.com/kmitchener)) +- Add `Aggregate::try new` with validation checks [\#3286](https://github.com/apache/arrow-datafusion/pull/3286) ([andygrove](https://github.com/andygrove)) +- Fix SchemaError in FilterPushDown optimization with UNION ALL [\#3282](https://github.com/apache/arrow-datafusion/pull/3282) ([jonmmease](https://github.com/jonmmease)) +- Allow sorting by aggregated groups [\#3280](https://github.com/apache/arrow-datafusion/pull/3280) ([isidentical](https://github.com/isidentical)) +- Add show external tables [\#3279](https://github.com/apache/arrow-datafusion/pull/3279) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([psvri](https://github.com/psvri)) +- Return from task execution if send fails as there is nothing more to do \(faster cancel / limit\) [\#3276](https://github.com/apache/arrow-datafusion/pull/3276) ([nvartolomei](https://github.com/nvartolomei)) +- Let prelude import all expression functions [\#3274](https://github.com/apache/arrow-datafusion/pull/3274) ([sadilet](https://github.com/sadilet)) +- Fix no schema when CSV is only header [\#3272](https://github.com/apache/arrow-datafusion/pull/3272) ([comphead](https://github.com/comphead)) +- support inlist for pre cast literal expression [\#3270](https://github.com/apache/arrow-datafusion/pull/3270) ([liukun4515](https://github.com/liukun4515)) +- implement `drop view` [\#3267](https://github.com/apache/arrow-datafusion/pull/3267) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([kmitchener](https://github.com/kmitchener)) +- Use `ExprRewriter` in `pre_cast_lit_in_comparison` [\#3260](https://github.com/apache/arrow-datafusion/pull/3260) ([andygrove](https://github.com/andygrove)) +- Add type coercion for UDFs in logical plan [\#3254](https://github.com/apache/arrow-datafusion/pull/3254) ([andygrove](https://github.com/andygrove)) +- Support "IS NOT TRUE/FALSE" syntax [\#3252](https://github.com/apache/arrow-datafusion/pull/3252) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([sarahyurick](https://github.com/sarahyurick)) +- Implement `IS UNKNOWN`/`IS NOT UNKNOWN` operators [\#3246](https://github.com/apache/arrow-datafusion/pull/3246) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) +- support decimal data type for the optimizer rule of PreCastLitInComparisonExpressions [\#3245](https://github.com/apache/arrow-datafusion/pull/3245) ([liukun4515](https://github.com/liukun4515)) +- chore: update cranelifts to 0.87.0 [\#3243](https://github.com/apache/arrow-datafusion/pull/3243) ([yjshen](https://github.com/yjshen)) +- Moved nullif out of unary functions [\#3241](https://github.com/apache/arrow-datafusion/pull/3241) ([comphead](https://github.com/comphead)) +- MINOR: documentation updates [\#3239](https://github.com/apache/arrow-datafusion/pull/3239) ([kmitchener](https://github.com/kmitchener)) +- MINOR: Add bounds check to Column physical expression [\#3238](https://github.com/apache/arrow-datafusion/pull/3238) ([andygrove](https://github.com/andygrove)) +- CREATE VIEW should return empty dataframe [\#3237](https://github.com/apache/arrow-datafusion/pull/3237) ([kmitchener](https://github.com/kmitchener)) +- Support "IS TRUE/FALSE" syntax \(redo\) [\#3235](https://github.com/apache/arrow-datafusion/pull/3235) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([sarahyurick](https://github.com/sarahyurick)) +- Fix propagation of optimized predicates on nested projections [\#3228](https://github.com/apache/arrow-datafusion/pull/3228) ([isidentical](https://github.com/isidentical)) +- Add more trim test cases [\#3226](https://github.com/apache/arrow-datafusion/pull/3226) ([ayushdg](https://github.com/ayushdg)) +- Upgrade to arrow 21 [\#3225](https://github.com/apache/arrow-datafusion/pull/3225) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Add optimizer rule for type coercion \(binary operations only\) [\#3222](https://github.com/apache/arrow-datafusion/pull/3222) ([andygrove](https://github.com/andygrove)) +- \[Improve\] Use arrow::compute::sort in approx_percentile_cont [\#3219](https://github.com/apache/arrow-datafusion/pull/3219) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- \[minor\] fix bench aggregate_query_sql meta [\#3218](https://github.com/apache/arrow-datafusion/pull/3218) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- minor: refactor simplify negate [\#3213](https://github.com/apache/arrow-datafusion/pull/3213) ([jackwener](https://github.com/jackwener)) +- MINOR: update cargo.lock and rust-version for datafusion-cli [\#3212](https://github.com/apache/arrow-datafusion/pull/3212) ([kmitchener](https://github.com/kmitchener)) +- fix issue with now\(\) returning same value across statements [\#3210](https://github.com/apache/arrow-datafusion/pull/3210) ([kmitchener](https://github.com/kmitchener)) +- Add support for inline column alias in CREATE VIEW [\#3209](https://github.com/apache/arrow-datafusion/pull/3209) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([DaltonModlin](https://github.com/DaltonModlin)) +- Add SQL query planner support for `DISTRIBUTE BY` [\#3208](https://github.com/apache/arrow-datafusion/pull/3208) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- minor: remove test code that's in the arrow library now [\#3206](https://github.com/apache/arrow-datafusion/pull/3206) ([kmitchener](https://github.com/kmitchener)) +- Use .get\(\) to avoid panic [\#3201](https://github.com/apache/arrow-datafusion/pull/3201) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jklamer](https://github.com/jklamer)) +- \[Minor\] Reduce code duplication creating ScalarValue::List [\#3197](https://github.com/apache/arrow-datafusion/pull/3197) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Clean up CI workflows by removing "matrix" strategy, simplifying names [\#3196](https://github.com/apache/arrow-datafusion/pull/3196) ([alamb](https://github.com/alamb)) +- optimizer: add framework for the rule of pre-add cast to the literal in comparison binary [\#3185](https://github.com/apache/arrow-datafusion/pull/3185) ([liukun4515](https://github.com/liukun4515)) +- Fix clippy [\#3182](https://github.com/apache/arrow-datafusion/pull/3182) ([alamb](https://github.com/alamb)) +- MINOR: Add notes on writing release blog posts [\#3179](https://github.com/apache/arrow-datafusion/pull/3179) ([andygrove](https://github.com/andygrove)) +- add min/max for time [\#3178](https://github.com/apache/arrow-datafusion/pull/3178) ([waitingkuo](https://github.com/waitingkuo)) +- Recursively apply remove filter rule if filter is a true scalar value [\#3175](https://github.com/apache/arrow-datafusion/pull/3175) ([byteink](https://github.com/byteink)) +- Update `ahash` requirement from 0.7 to 0.8 [\#3161](https://github.com/apache/arrow-datafusion/pull/3161) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Support number of centroids in approx_percentile_cont [\#3146](https://github.com/apache/arrow-datafusion/pull/3146) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Introduce `\i` command to execute from a file [\#3136](https://github.com/apache/arrow-datafusion/pull/3136) ([turbo1912](https://github.com/turbo1912)) +- impl binary ops between binary arrays and scalars [\#3124](https://github.com/apache/arrow-datafusion/pull/3124) ([ozgrakkurt](https://github.com/ozgrakkurt)) diff --git a/dev/changelog/13.0.0.md b/dev/changelog/13.0.0.md new file mode 100644 index 0000000000000..0f35903e26003 --- /dev/null +++ b/dev/changelog/13.0.0.md @@ -0,0 +1,233 @@ + + +## [13.0.0](https://github.com/apache/arrow-datafusion/tree/13.0.0) (2022-10-06) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/12.0.0...13.0.0) + +**Breaking changes:** + +- Make ObjectStoreProvider fallible \(return `Result` rather than `Option`\) [\#3584](https://github.com/apache/arrow-datafusion/pull/3584) ([tustvold](https://github.com/tustvold)) +- Make `OptimizerConfig` a builder style API [\#3525](https://github.com/apache/arrow-datafusion/pull/3525) ([alamb](https://github.com/alamb)) + +**Implemented enhancements:** + +- remove `type coercion` for ScalarUDF in the physical phase [\#3734](https://github.com/apache/arrow-datafusion/issues/3734) +- Allow with statements to specify their columns alongside their expression names [\#3716](https://github.com/apache/arrow-datafusion/issues/3716) +- Support SQLDataType::Timestamp\(TimezoneInfo\) [\#3693](https://github.com/apache/arrow-datafusion/issues/3693) +- support `type coercion` for case when expr [\#3673](https://github.com/apache/arrow-datafusion/issues/3673) +- Add simplification rules for the `Modulo` operator [\#3664](https://github.com/apache/arrow-datafusion/issues/3664) +- Add TIMESTAMPTZ [\#3659](https://github.com/apache/arrow-datafusion/issues/3659) +- Simplify `A * 0` and `A * null`. [\#3626](https://github.com/apache/arrow-datafusion/issues/3626) +- change rule of `PreCastLitInComparisonExpressions` to unwrap cast rule after \#3582 [\#3622](https://github.com/apache/arrow-datafusion/issues/3622) +- Optimize regex_replace with a known pattern / replacement [\#3613](https://github.com/apache/arrow-datafusion/issues/3613) +- Simplify `CONCAT_WS(NULL, ..)` to `NULL` [\#3607](https://github.com/apache/arrow-datafusion/issues/3607) +- Add OctoSQL to list of systems powered by DataFusion [\#3605](https://github.com/apache/arrow-datafusion/issues/3605) +- Prevent over-allocation \(and spills\) on TopK queries [\#3596](https://github.com/apache/arrow-datafusion/issues/3596) +- Allow ObjectStoreProvider to return None \(return Result\ rather than Result\) [\#3594](https://github.com/apache/arrow-datafusion/issues/3594) +- simplify between expr should consider the data type [\#3587](https://github.com/apache/arrow-datafusion/issues/3587) +- make type coercion simple and remove the evaluate logic [\#3585](https://github.com/apache/arrow-datafusion/issues/3585) +- ReduceOuterJoin optimizer support `cast or try_cast` expr. [\#3565](https://github.com/apache/arrow-datafusion/issues/3565) +- Support type coercion for subquery [\#3557](https://github.com/apache/arrow-datafusion/issues/3557) +- Make `ParquetScanOptions` public and expose a reference to the scan options from `ParquetExec` [\#3550](https://github.com/apache/arrow-datafusion/issues/3550) +- Use `fetch` limit in `get_sorted_iter` [\#3544](https://github.com/apache/arrow-datafusion/issues/3544) +- Push limit to sort [\#3528](https://github.com/apache/arrow-datafusion/issues/3528) +- Execute sorts in parallel when limit is used after sort [\#3526](https://github.com/apache/arrow-datafusion/issues/3526) +- Consolidate optimizer passes in optimizer module for better testing [\#3524](https://github.com/apache/arrow-datafusion/issues/3524) +- Support Top-K query optimization for `ORDER BY \ \[ASC [\#3515](https://github.com/apache/arrow-datafusion/issues/3515) +- support the type coercion for `like` `unlike` `istrue` `isfalse` `isunknown` [\#3509](https://github.com/apache/arrow-datafusion/issues/3509) +- Automate the pushing of releases to Homebrew [\#3506](https://github.com/apache/arrow-datafusion/issues/3506) +- Add extra DATE_PART units that are already supported in arrow-rs [\#3502](https://github.com/apache/arrow-datafusion/issues/3502) +- Release datafusion-cli 12.0.0 on Homebrew [\#3501](https://github.com/apache/arrow-datafusion/issues/3501) +- Make `from_proto_binary_op` public [\#3489](https://github.com/apache/arrow-datafusion/issues/3489) +- coercion between decimal and other types lacking, compared to other numeric types [\#3479](https://github.com/apache/arrow-datafusion/issues/3479) +- move type coercion for inlist from physical phase to logical phase [\#3468](https://github.com/apache/arrow-datafusion/issues/3468) +- Make `datafusion::physical_plan::file_format::file_strean::FileStream` public [\#3466](https://github.com/apache/arrow-datafusion/issues/3466) +- Support using offset index in `ParquetRecordBatchStream` when pushing down `RowFilter` [\#3456](https://github.com/apache/arrow-datafusion/issues/3456) +- Support timestamp data type in In_list node [\#3449](https://github.com/apache/arrow-datafusion/issues/3449) +- Evaluate expressions after type coercion [\#3431](https://github.com/apache/arrow-datafusion/issues/3431) +- Make a convenience function to register a single `RecordBatch` as a table from SessionContext [\#3426](https://github.com/apache/arrow-datafusion/issues/3426) +- add datafusion-cli support of external table locations that object_store supports [\#3424](https://github.com/apache/arrow-datafusion/issues/3424) +- pruning support cast/try_cast expr [\#3414](https://github.com/apache/arrow-datafusion/issues/3414) +- Add documentation on querying against files in object store such as S3 [\#3399](https://github.com/apache/arrow-datafusion/issues/3399) +- Remove type-coercion from physical planner [\#3388](https://github.com/apache/arrow-datafusion/issues/3388) +- support `Statement::ShowVariable` to show session configs [\#3364](https://github.com/apache/arrow-datafusion/issues/3364) +- Support `RowFilter` in `ParquetExec` [\#3360](https://github.com/apache/arrow-datafusion/issues/3360) +- Apply `TypeCoercion` rule before `FilterPushDown` [\#3289](https://github.com/apache/arrow-datafusion/issues/3289) +- Add support for `get` / `show` timezone [\#3255](https://github.com/apache/arrow-datafusion/issues/3255) +- Consider adding DataFusion to ClickBench benchmarks [\#2902](https://github.com/apache/arrow-datafusion/issues/2902) +- `filter_push_down` panics on semi/anti join with join filters [\#2888](https://github.com/apache/arrow-datafusion/issues/2888) +- Migrate the `cross join -> inner join optimization` from the planner to the optimizer [\#2859](https://github.com/apache/arrow-datafusion/issues/2859) +- ObjectStore write support [\#2185](https://github.com/apache/arrow-datafusion/issues/2185) +- DataFusion should scan Parquet statistics once per query [\#871](https://github.com/apache/arrow-datafusion/issues/871) +- Extend & generalize constant folding / evaluation in logical optimizer [\#237](https://github.com/apache/arrow-datafusion/issues/237) + +**Fixed bugs:** + +- `projection_push_down` produces invalid aggregate plans in some cases [\#3738](https://github.com/apache/arrow-datafusion/issues/3738) +- `Time With Time Zone` should raise error until `DataType::Time64` support tz [\#3715](https://github.com/apache/arrow-datafusion/issues/3715) +- SQL Planner doesn't distinguish normal CTEs from the recursive ones. [\#3713](https://github.com/apache/arrow-datafusion/issues/3713) +- Fix inconsistency between column name formats [\#3711](https://github.com/apache/arrow-datafusion/issues/3711) +- Optimizer rule 'projection_push_down' failed due to unexpected error: Error during planning: Aggregate schema has wrong number of fields. Expected 3 got 8 [\#3704](https://github.com/apache/arrow-datafusion/issues/3704) +- Optimizer regressions in `unwrap_cast_in_comparison` [\#3690](https://github.com/apache/arrow-datafusion/issues/3690) +- Internal error when evaluating a predicate = "The type of Dictionary\(Int16, Utf8\) = Int64 of binary physical should be same" [\#3685](https://github.com/apache/arrow-datafusion/issues/3685) +- Specialized regexp_replace should early-abort when the the input arrays are empty [\#3647](https://github.com/apache/arrow-datafusion/issues/3647) +- Internal error: Failed to coerce types Decimal128\(10, 2\) and Boolean in BETWEEN expression [\#3646](https://github.com/apache/arrow-datafusion/issues/3646) +- Internal error: Failed to coerce types Decimal128\(10, 2\) and Boolean in BETWEEN expression [\#3645](https://github.com/apache/arrow-datafusion/issues/3645) +- Type coercion error: The type of Boolean AND Decimal128\(10, 2\) of binary physical should be same [\#3644](https://github.com/apache/arrow-datafusion/issues/3644) +- LEFT JOIN not working as expected, error message is confusing [\#3639](https://github.com/apache/arrow-datafusion/issues/3639) +- `INTERSECT` and `EXCEPT` don't return an error when 2 sets have the different number of columns [\#3632](https://github.com/apache/arrow-datafusion/issues/3632) +- The datafusion-cli panics when `union` 2 table with different number of columns. [\#3630](https://github.com/apache/arrow-datafusion/issues/3630) +- The expression `col(a) / null` is not optimized. [\#3624](https://github.com/apache/arrow-datafusion/issues/3624) +- `s3_build_error` test may fail in some environments [\#3601](https://github.com/apache/arrow-datafusion/issues/3601) +- New clippy errors appears to be break the CI on the master [\#3597](https://github.com/apache/arrow-datafusion/issues/3597) +- `StringConcat` gives inconsistent result with `concat` when containing `null` [\#3569](https://github.com/apache/arrow-datafusion/issues/3569) +- simplify_expressions don't support different data type for binary [\#3556](https://github.com/apache/arrow-datafusion/issues/3556) +- Broken logical plan serialization for aggregation queries [\#3555](https://github.com/apache/arrow-datafusion/issues/3555) +- Aggregate filters do not get pushed down to table scan [\#3546](https://github.com/apache/arrow-datafusion/issues/3546) +- `docs.rs` cannot build `datafusion-proto` crate [\#3538](https://github.com/apache/arrow-datafusion/issues/3538) +- DataFusion serialization doesn't handle `ScalarValue::Dictionary, Binary, LargeBinary, Time64, IntervalMonthDayNano, Struct` [\#3531](https://github.com/apache/arrow-datafusion/issues/3531) +- What should be returned when trying to get a config in invalid format? [\#3505](https://github.com/apache/arrow-datafusion/issues/3505) +- Dividing decimal type gives wrong error: "170141183460469231731687303715884105727 is too large to store in a Decimal128 [\#3498](https://github.com/apache/arrow-datafusion/issues/3498) +- Add BitwiseXor in function `from_proto_binary_op` [\#3495](https://github.com/apache/arrow-datafusion/issues/3495) +- comparison operations with a scalar null and decimal array panics [\#3487](https://github.com/apache/arrow-datafusion/issues/3487) +- Union columns with different types [\#3467](https://github.com/apache/arrow-datafusion/issues/3467) +- Can't get the right logical plan after optimizer [\#3421](https://github.com/apache/arrow-datafusion/issues/3421) +- Fix conflict between simplify_expression rule and CAST expressions [\#3409](https://github.com/apache/arrow-datafusion/issues/3409) +- Empty array giving error [\#2439](https://github.com/apache/arrow-datafusion/issues/2439) +- Internal error: Unsupported data type in hasher: FixedSizeBinary\(16\) [\#1516](https://github.com/apache/arrow-datafusion/issues/1516) +- Predicates on to_timestamp do not work as expected with "naive" timestamp strings [\#765](https://github.com/apache/arrow-datafusion/issues/765) +- Address performance/execution plan of TPCH query 19 [\#78](https://github.com/apache/arrow-datafusion/issues/78) +- Bug fix: expr_visitor was not visiting aggregate filter expressions [\#3548](https://github.com/apache/arrow-datafusion/pull/3548) ([andygrove](https://github.com/andygrove)) + +**Documentation updates:** + +- Publish 8.0.0 user guide [\#2558](https://github.com/apache/arrow-datafusion/issues/2558) +- MINOR: Add Dask SQL to list of projects powered by DataFusion [\#3581](https://github.com/apache/arrow-datafusion/pull/3581) ([andygrove](https://github.com/andygrove)) +- Add Parseable as Datafusion user [\#3471](https://github.com/apache/arrow-datafusion/pull/3471) ([nitisht](https://github.com/nitisht)) + +**Closed issues:** + +- Upgrade to Arrow 24.0.0 [\#3689](https://github.com/apache/arrow-datafusion/issues/3689) +- what's the best practice to get a single value from arrow array? [\#3497](https://github.com/apache/arrow-datafusion/issues/3497) +- The data type of predicate in the row filter should be same in the binary expr [\#3469](https://github.com/apache/arrow-datafusion/issues/3469) +- Extend constant folding and parquet filtering support [\#188](https://github.com/apache/arrow-datafusion/issues/188) +- Add FORMAT to explain plan and an easy to visualize format [\#96](https://github.com/apache/arrow-datafusion/issues/96) + +**Merged pull requests:** + +- Build aggregate schema in Aggregate::try_new [\#3739](https://github.com/apache/arrow-datafusion/pull/3739) ([andygrove](https://github.com/andygrove)) +- delete type coercion for scalar udf in the physical phase [\#3735](https://github.com/apache/arrow-datafusion/pull/3735) ([liukun4515](https://github.com/liukun4515)) +- Consolidate coercion code in `datafusion_expr::type_coercion` and submodules [\#3728](https://github.com/apache/arrow-datafusion/pull/3728) ([alamb](https://github.com/alamb)) +- Skip filter push down on semi/anti joins [\#3723](https://github.com/apache/arrow-datafusion/pull/3723) ([andygrove](https://github.com/andygrove)) +- Raise `Unsupported SQL type` for `Time(WithTimeZone)` and `Time(Tz)` [\#3718](https://github.com/apache/arrow-datafusion/pull/3718) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- Support column aliases specified by `WITH` statements [\#3717](https://github.com/apache/arrow-datafusion/pull/3717) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) +- Reject recursive CTEs before processing the sub-expressions [\#3714](https://github.com/apache/arrow-datafusion/pull/3714) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([isidentical](https://github.com/isidentical)) +- Make column name consistent between Expr::name and Display/Debug [\#3712](https://github.com/apache/arrow-datafusion/pull/3712) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Fix aggregate type coercion bug [\#3710](https://github.com/apache/arrow-datafusion/pull/3710) ([alamb](https://github.com/alamb)) +- MINOR: Add `Expr::canonical_name` and improve docs on `Expr::name` [\#3706](https://github.com/apache/arrow-datafusion/pull/3706) ([andygrove](https://github.com/andygrove)) +- Remove type coercions from ScalarValue and aggregation function code [\#3705](https://github.com/apache/arrow-datafusion/pull/3705) ([ozankabak](https://github.com/ozankabak)) +- `unwrap_cast_in_comparison`: fix bug which can find the field for the schema [\#3699](https://github.com/apache/arrow-datafusion/pull/3699) ([liukun4515](https://github.com/liukun4515)) +- bump sql-parser 0.25 [\#3698](https://github.com/apache/arrow-datafusion/pull/3698) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Move optimizer init to optimizer crate [\#3692](https://github.com/apache/arrow-datafusion/pull/3692) ([andygrove](https://github.com/andygrove)) +- Upgrade `arrow` `parquet` and `arrow-flight` to 24.0.0 [\#3691](https://github.com/apache/arrow-datafusion/pull/3691) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fix bug in dictionary coercion and allow better coercion [\#3688](https://github.com/apache/arrow-datafusion/pull/3688) ([alamb](https://github.com/alamb)) +- \[MINOR\] Improve docstrings in binary_rule.rs [\#3687](https://github.com/apache/arrow-datafusion/pull/3687) ([alamb](https://github.com/alamb)) +- \[MINOR\] Add `ScalarValue::new_utf8`, clean up creation of literals in casting tests [\#3680](https://github.com/apache/arrow-datafusion/pull/3680) ([alamb](https://github.com/alamb)) +- Disable code coverage until we figure out why it is broken [\#3679](https://github.com/apache/arrow-datafusion/pull/3679) ([alamb](https://github.com/alamb)) +- move `type coercion` for case when expr [\#3676](https://github.com/apache/arrow-datafusion/pull/3676) ([liukun4515](https://github.com/liukun4515)) +- Update sqlparser to 0.24.0 [\#3675](https://github.com/apache/arrow-datafusion/pull/3675) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fail if field lengths are not same in INTERSECT and EXPECT [\#3674](https://github.com/apache/arrow-datafusion/pull/3674) ([askoa](https://github.com/askoa)) +- Simplification Rules for Modulo Operator [\#3669](https://github.com/apache/arrow-datafusion/pull/3669) ([askoa](https://github.com/askoa)) +- change pre_cast_lit_in_comparison to unwrap_cast_in_comparison [\#3662](https://github.com/apache/arrow-datafusion/pull/3662) ([liukun4515](https://github.com/liukun4515)) +- restore optimization for `between` in simplify expression rule [\#3661](https://github.com/apache/arrow-datafusion/pull/3661) ([liukun4515](https://github.com/liukun4515)) +- add timestamptz [\#3660](https://github.com/apache/arrow-datafusion/pull/3660) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- remove the type coercion in the simplify_expressions rule [\#3657](https://github.com/apache/arrow-datafusion/pull/3657) ([liukun4515](https://github.com/liukun4515)) +- Cache collected file statistics [\#3649](https://github.com/apache/arrow-datafusion/pull/3649) ([mateuszkj](https://github.com/mateuszkj)) +- make regexp_replace early abort with empty input [\#3648](https://github.com/apache/arrow-datafusion/pull/3648) ([isidentical](https://github.com/isidentical)) +- Check each query has same number of columns when building the UNION plan [\#3638](https://github.com/apache/arrow-datafusion/pull/3638) ([HaoYang670](https://github.com/HaoYang670)) +- move the `type coercion` to the beginning of the optimizer rule and support type coercion for subquery [\#3636](https://github.com/apache/arrow-datafusion/pull/3636) ([liukun4515](https://github.com/liukun4515)) +- Add documentation for querying S3 data with CLI [\#3631](https://github.com/apache/arrow-datafusion/pull/3631) ([andygrove](https://github.com/andygrove)) +- Simplify multiplication by `0` and by `null` [\#3627](https://github.com/apache/arrow-datafusion/pull/3627) ([HaoYang670](https://github.com/HaoYang670)) +- Simplify null division. [\#3625](https://github.com/apache/arrow-datafusion/pull/3625) ([HaoYang670](https://github.com/HaoYang670)) +- support cast/try_cast expr in reduceOuterJoin [\#3621](https://github.com/apache/arrow-datafusion/pull/3621) ([AssHero](https://github.com/AssHero)) +- MINOR: fix TPC-H conversion function to not miss a row of data [\#3620](https://github.com/apache/arrow-datafusion/pull/3620) ([kmitchener](https://github.com/kmitchener)) +- Document ObjectStoreProvider [\#3619](https://github.com/apache/arrow-datafusion/pull/3619) ([tustvold](https://github.com/tustvold)) +- \[feat\] Support using offset index in ParquetRecordBatchStream when pu… [\#3616](https://github.com/apache/arrow-datafusion/pull/3616) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Optimize `regex_replace` for scalar patterns [\#3614](https://github.com/apache/arrow-datafusion/pull/3614) ([isidentical](https://github.com/isidentical)) +- Simplify `concat_ws(null, ..)` to `null` [\#3608](https://github.com/apache/arrow-datafusion/pull/3608) ([HaoYang670](https://github.com/HaoYang670)) +- MINOR: improve docstrings on SessionContext [\#3603](https://github.com/apache/arrow-datafusion/pull/3603) ([alamb](https://github.com/alamb)) +- Merge s3_success and s3_build_error tests into one test [\#3602](https://github.com/apache/arrow-datafusion/pull/3602) ([Licht-T](https://github.com/Licht-T)) +- add `register_batch` and `read_batch` to `SessionContext` to register a single RecordBatch as a table [\#3600](https://github.com/apache/arrow-datafusion/pull/3600) ([BaymaxHWY](https://github.com/BaymaxHWY)) +- \[CI\] Fix the newly added linting errors to make clippy happy [\#3598](https://github.com/apache/arrow-datafusion/pull/3598) ([isidentical](https://github.com/isidentical)) +- Prevent over-allocations \(and spills\) on sorts with a fixed limit [\#3593](https://github.com/apache/arrow-datafusion/pull/3593) ([isidentical](https://github.com/isidentical)) +- update datafusion cli deps [\#3588](https://github.com/apache/arrow-datafusion/pull/3588) ([Jimexist](https://github.com/Jimexist)) +- Update cranelift\* dependencies `0.87` --\> `0.88` [\#3586](https://github.com/apache/arrow-datafusion/pull/3586) ([alamb](https://github.com/alamb)) +- Fix docs.rs [\#3580](https://github.com/apache/arrow-datafusion/pull/3580) ([avantgardnerio](https://github.com/avantgardnerio)) +- Fix build [\#3576](https://github.com/apache/arrow-datafusion/pull/3576) ([alamb](https://github.com/alamb)) +- Use consistent name for `TimeUnit::Millisecond` [\#3575](https://github.com/apache/arrow-datafusion/pull/3575) ([alamb](https://github.com/alamb)) +- Fix logical plan serialization [\#3574](https://github.com/apache/arrow-datafusion/pull/3574) ([thinkharderdev](https://github.com/thinkharderdev)) +- Custom window frame logic \(support `ROWS`, `RANGE`, `PRECEDING` and `FOLLOWING` for window functions\) [\#3570](https://github.com/apache/arrow-datafusion/pull/3570) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([metesynnada](https://github.com/metesynnada)) +- fix comparison of decimal array with null scalar [\#3567](https://github.com/apache/arrow-datafusion/pull/3567) ([kmitchener](https://github.com/kmitchener)) +- Reduce dependencies of `datafusion-sql` crate [\#3566](https://github.com/apache/arrow-datafusion/pull/3566) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mbrobbel](https://github.com/mbrobbel)) +- Update pbjson-types requirement from 0.3 to 0.5 [\#3560](https://github.com/apache/arrow-datafusion/pull/3560) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update pbjson requirement from 0.3 to 0.5 [\#3559](https://github.com/apache/arrow-datafusion/pull/3559) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update pbjson-build requirement from 0.3 to 0.5 [\#3558](https://github.com/apache/arrow-datafusion/pull/3558) ([dependabot[bot]](https://github.com/apps/dependabot)) +- MINOR: enable q19 in TPCH [\#3553](https://github.com/apache/arrow-datafusion/pull/3553) ([kmitchener](https://github.com/kmitchener)) +- MINOR: remove out-of-date is_dictionary checks from binary_rule.rs [\#3552](https://github.com/apache/arrow-datafusion/pull/3552) ([kmitchener](https://github.com/kmitchener)) +- Make ParquetScanOptions public and add method to get a reference from… [\#3551](https://github.com/apache/arrow-datafusion/pull/3551) ([thinkharderdev](https://github.com/thinkharderdev)) +- fix coercion of null for decimal math in binary_rules [\#3549](https://github.com/apache/arrow-datafusion/pull/3549) ([kmitchener](https://github.com/kmitchener)) +- Use `fetch` limit in get_sorted_iter [\#3545](https://github.com/apache/arrow-datafusion/pull/3545) ([Dandandan](https://github.com/Dandandan)) +- feat: allow object store registration from datafusion-cli [\#3540](https://github.com/apache/arrow-datafusion/pull/3540) ([turbo1912](https://github.com/turbo1912)) +- Actually test that `ScalarValue`s are the same after round trip serialization [\#3537](https://github.com/apache/arrow-datafusion/pull/3537) ([alamb](https://github.com/alamb)) +- Add serialization of `ScalarValue::Struct` [\#3536](https://github.com/apache/arrow-datafusion/pull/3536) ([alamb](https://github.com/alamb)) +- Add serialization of `ScalarValue::IntervalMonthDayNano` [\#3535](https://github.com/apache/arrow-datafusion/pull/3535) ([alamb](https://github.com/alamb)) +- Add serialization of `ScalarValue::Binary` and `ScalarValue::LargeBinary`, `ScalarValue::Time64` [\#3534](https://github.com/apache/arrow-datafusion/pull/3534) ([alamb](https://github.com/alamb)) +- MINOR: Impl `Debug` for TableReference and ResolvedTableReference [\#3533](https://github.com/apache/arrow-datafusion/pull/3533) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Add support for serializing `ScalarValue::Dictionary` to datafusion-proto [\#3532](https://github.com/apache/arrow-datafusion/pull/3532) ([alamb](https://github.com/alamb)) +- Push down limit to sort [\#3530](https://github.com/apache/arrow-datafusion/pull/3530) ([Dandandan](https://github.com/Dandandan)) +- Execute sort in parallel when a limit is used after sort [\#3527](https://github.com/apache/arrow-datafusion/pull/3527) ([Dandandan](https://github.com/Dandandan)) +- Config support type conversion [\#3522](https://github.com/apache/arrow-datafusion/pull/3522) ([comphead](https://github.com/comphead)) +- MINOR: Add more execs to list of supported execs [\#3519](https://github.com/apache/arrow-datafusion/pull/3519) ([andygrove](https://github.com/andygrove)) +- fix divide by zero not throwing proper error for decimal [\#3517](https://github.com/apache/arrow-datafusion/pull/3517) ([kmitchener](https://github.com/kmitchener)) +- Make FileStream and FileOpener public [\#3514](https://github.com/apache/arrow-datafusion/pull/3514) ([thinkharderdev](https://github.com/thinkharderdev)) +- feat: Union types coercion [\#3513](https://github.com/apache/arrow-datafusion/pull/3513) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gandronchik](https://github.com/gandronchik)) +- \[DataFrame\] - Add cache function for DataFrame [\#3512](https://github.com/apache/arrow-datafusion/pull/3512) ([francis-du](https://github.com/francis-du)) +- type coercion: support is/is_not\_`bool`/like/unknown expr [\#3510](https://github.com/apache/arrow-datafusion/pull/3510) ([liukun4515](https://github.com/liukun4515)) +- MINOR: remove unused dependencies [\#3508](https://github.com/apache/arrow-datafusion/pull/3508) ([waynexia](https://github.com/waynexia)) +- Automate postrelease publishing to Homebrew [\#3507](https://github.com/apache/arrow-datafusion/pull/3507) ([iajoiner](https://github.com/iajoiner)) +- Add additional DATE_PART units [\#3503](https://github.com/apache/arrow-datafusion/pull/3503) ([jonmmease](https://github.com/jonmmease)) +- Add BitwiseXor in function from_proto_binary_op [\#3496](https://github.com/apache/arrow-datafusion/pull/3496) ([askoa](https://github.com/askoa)) +- Make the function from_proto_binary_op public [\#3490](https://github.com/apache/arrow-datafusion/pull/3490) ([askoa](https://github.com/askoa)) +- minor: fix bug in `downcast_value!` macro \(`T` --\> `$T`\) [\#3486](https://github.com/apache/arrow-datafusion/pull/3486) ([alamb](https://github.com/alamb)) +- add time_zone into ConfigOptions [\#3485](https://github.com/apache/arrow-datafusion/pull/3485) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- \[MINOR\] Change `downcast_value!` macro so it does not need to use `use std::any::type_name;` [\#3484](https://github.com/apache/arrow-datafusion/pull/3484) ([alamb](https://github.com/alamb)) +- Convert more cross joins to inner joins \(Address performance/execution plan of TPCH query 19\) [\#3482](https://github.com/apache/arrow-datafusion/pull/3482) ([DhamoPS](https://github.com/DhamoPS)) +- \[minor\] Remove unused arg in macro in Inlist [\#3474](https://github.com/apache/arrow-datafusion/pull/3474) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- inlist: move type coercion to logical phase [\#3472](https://github.com/apache/arrow-datafusion/pull/3472) ([liukun4515](https://github.com/liukun4515)) +- Use the column data type as the NULL data type in the row filter [\#3470](https://github.com/apache/arrow-datafusion/pull/3470) ([liukun4515](https://github.com/liukun4515)) +- apply type coercion before filter pushdown [\#3459](https://github.com/apache/arrow-datafusion/pull/3459) ([liukun4515](https://github.com/liukun4515)) +- add FixedSizeBinary support to create_hashes [\#3458](https://github.com/apache/arrow-datafusion/pull/3458) ([mcassels](https://github.com/mcassels)) +- Support ShowVariable Statement [\#3455](https://github.com/apache/arrow-datafusion/pull/3455) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- Add additional pruning tests with casts, handle unsupported predicates better [\#3454](https://github.com/apache/arrow-datafusion/pull/3454) ([alamb](https://github.com/alamb)) +- Add `InList` support for timestamp type. \(\#3449\) [\#3450](https://github.com/apache/arrow-datafusion/pull/3450) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Evaluate expressions after type coercion [\#3444](https://github.com/apache/arrow-datafusion/pull/3444) ([Dandandan](https://github.com/Dandandan)) +- remove type coercion in the binary physical expr [\#3396](https://github.com/apache/arrow-datafusion/pull/3396) ([liukun4515](https://github.com/liukun4515)) +- Use arrow row format in SortPreservingMerge ~50-70% faster [\#3386](https://github.com/apache/arrow-datafusion/pull/3386) ([tustvold](https://github.com/tustvold)) +- Pushdown `RowFilter` in `ParquetExec` [\#3380](https://github.com/apache/arrow-datafusion/pull/3380) ([thinkharderdev](https://github.com/thinkharderdev)) diff --git a/dev/changelog/14.0.0.md b/dev/changelog/14.0.0.md new file mode 100644 index 0000000000000..00e296103a8bf --- /dev/null +++ b/dev/changelog/14.0.0.md @@ -0,0 +1,347 @@ + + +## [14.0.0](https://github.com/apache/arrow-datafusion/tree/14.0.0) (2022-11-04) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/13.0.0-rc1...14.0.0) + +**Breaking changes:** + +- Improve FieldNotFound errors [\#4084](https://github.com/apache/arrow-datafusion/pull/4084) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Refactor: move `simplify_expression.rs` and `expr_simplifier.rs` to a new mod `simplify_expressions` [\#3951](https://github.com/apache/arrow-datafusion/pull/3951) ([HaoYang670](https://github.com/HaoYang670)) +- Support for non-u64 types for Window Bound [\#3916](https://github.com/apache/arrow-datafusion/pull/3916) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mustafasrepo](https://github.com/mustafasrepo)) +- Expose parquet reader settings using normal DataFusion `ConfigOptions` [\#3822](https://github.com/apache/arrow-datafusion/pull/3822) ([alamb](https://github.com/alamb)) +- Add `Filter::try_new` with validation [\#3796](https://github.com/apache/arrow-datafusion/pull/3796) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Change public simplify API and add a public coerce API [\#3758](https://github.com/apache/arrow-datafusion/pull/3758) ([alamb](https://github.com/alamb)) + +**Implemented enhancements:** + +- Automatically register tables if ObjectStore root is configured [\#4094](https://github.com/apache/arrow-datafusion/issues/4094) +- Simplify small `InList` expressions [\#4089](https://github.com/apache/arrow-datafusion/issues/4089) +- Support `SET` command [\#4067](https://github.com/apache/arrow-datafusion/issues/4067) +- add uuid\(\) function to generate unique uuid per row [\#4045](https://github.com/apache/arrow-datafusion/issues/4045) +- Publish benchmark crate so that it can be used as a library in Ballista [\#4016](https://github.com/apache/arrow-datafusion/issues/4016) +- Add statistics methods to `TableProvider` trait for use in cost-based optimizations in the logical plan [\#3983](https://github.com/apache/arrow-datafusion/issues/3983) +- Implement `current_time` Function [\#3982](https://github.com/apache/arrow-datafusion/issues/3982) +- Implement `current_date` Function [\#3981](https://github.com/apache/arrow-datafusion/issues/3981) +- Put common code used for testing code into datafusion/test_utils.rs [\#3960](https://github.com/apache/arrow-datafusion/issues/3960) +- Print the configurations of ConfigOptions in an ordered way so that we can directly compare the equality of two ConfigOptions by their debug strings [\#3952](https://github.com/apache/arrow-datafusion/issues/3952) +- Don't make dependants install protoc [\#3947](https://github.com/apache/arrow-datafusion/issues/3947) +- Implement right anti join and support it in HashBuildProbeOrder [\#3946](https://github.com/apache/arrow-datafusion/issues/3946) +- Implement right semi join and support it in HashBuildProbeOrder [\#3945](https://github.com/apache/arrow-datafusion/issues/3945) +- Refactor `simplify_expressions` and `expr_simplifier` [\#3934](https://github.com/apache/arrow-datafusion/issues/3934) +- Implement serialization for `ScalarValue::FixedSizeBinary` [\#3928](https://github.com/apache/arrow-datafusion/issues/3928) +- Support inlining view / dataframes logical plan [\#3913](https://github.com/apache/arrow-datafusion/issues/3913) +- Plans with tables from `TableProviderFactory`s can't be serialized [\#3906](https://github.com/apache/arrow-datafusion/issues/3906) +- Simplify `a AND a` and `a OR a`. [\#3895](https://github.com/apache/arrow-datafusion/issues/3895) +- Allow configuring statistics on TPC-H benchmarks [\#3888](https://github.com/apache/arrow-datafusion/issues/3888) +- CI checks stuck in queued mode [\#3883](https://github.com/apache/arrow-datafusion/issues/3883) +- Multiple optimizer passes [\#3879](https://github.com/apache/arrow-datafusion/issues/3879) +- datafusion-proto does not support view table scan [\#3874](https://github.com/apache/arrow-datafusion/issues/3874) +- TableProviderFactories need to be async and return a Result to be useful [\#3866](https://github.com/apache/arrow-datafusion/issues/3866) +- Factorize common AND factors out of OR predicates to support filterPushDown as possible [\#3858](https://github.com/apache/arrow-datafusion/issues/3858) +- Replace `concat_ws` with `concat` when the delimiter is empty string [\#3857](https://github.com/apache/arrow-datafusion/issues/3857) +- Concatenate contiguous literal arguments of `concat_ws` when doing the expression simplification [\#3856](https://github.com/apache/arrow-datafusion/issues/3856) +- Partition and Sort Enforcement [\#3854](https://github.com/apache/arrow-datafusion/issues/3854) +- Enable mimalloc by default in benchmarks [\#3851](https://github.com/apache/arrow-datafusion/issues/3851) +- Add collect statistics configuration [\#3847](https://github.com/apache/arrow-datafusion/issues/3847) +- \[SQL\] - Support cache/uncache table syntax [\#3842](https://github.com/apache/arrow-datafusion/issues/3842) +- Filter pushdown doesn't seem to apply for filter on TPC-H Q17 [\#3839](https://github.com/apache/arrow-datafusion/issues/3839) +- Support pushdown multi-columns in PageIndex pruning. [\#3834](https://github.com/apache/arrow-datafusion/issues/3834) +- Consolidate `Expr` manipulation code so it is more discoverable and make it easier to use [\#3808](https://github.com/apache/arrow-datafusion/issues/3808) +- Leverage input array's null buffer for regex replace to optimize sparse arrays [\#3803](https://github.com/apache/arrow-datafusion/issues/3803) +- Improve join cardinality estimation when there is no overlap in the min/max values [\#3802](https://github.com/apache/arrow-datafusion/issues/3802) +- datafusion-cli up to date check is failing on master [\#3798](https://github.com/apache/arrow-datafusion/issues/3798) +- Optimize benchmark q2 subquery filter [\#3789](https://github.com/apache/arrow-datafusion/issues/3789) +- Benchmark should infer schema when running against Parquet [\#3776](https://github.com/apache/arrow-datafusion/issues/3776) +- Allow specialized physical functions to provide hints for the array adapter [\#3762](https://github.com/apache/arrow-datafusion/issues/3762) +- \[User Guide\] Add `EXPLAIN` to SQL reference [\#3755](https://github.com/apache/arrow-datafusion/issues/3755) +- move `type coercion` for agg/agg udf [\#3752](https://github.com/apache/arrow-datafusion/issues/3752) +- Prevent Cargo.lock for datafusion-cli being out-of-date [\#3744](https://github.com/apache/arrow-datafusion/issues/3744) +- Add example of expr apis including simplification and coercion [\#3740](https://github.com/apache/arrow-datafusion/issues/3740) +- support `type coercion` for ScalarFunction expr in the logical phase [\#3731](https://github.com/apache/arrow-datafusion/issues/3731) +- Add support for DISTINCT projections in `decorrelate_where_exists` [\#3724](https://github.com/apache/arrow-datafusion/issues/3724) +- Add type coercion rule for `CONCAT` and `CONCAT_WS` [\#3720](https://github.com/apache/arrow-datafusion/issues/3720) +- Expose and document a simpler public API for simplify expressions [\#3709](https://github.com/apache/arrow-datafusion/issues/3709) +- Expose + document the type coercion API publicly [\#3708](https://github.com/apache/arrow-datafusion/issues/3708) +- Concatenate contiguous literal arguments of `CONCAT` during the expression simplification. [\#3683](https://github.com/apache/arrow-datafusion/issues/3683) +- DataFusion 13.0.0 Release [\#3671](https://github.com/apache/arrow-datafusion/issues/3671) +- Add division by `0` rules in the expression simplification [\#3663](https://github.com/apache/arrow-datafusion/issues/3663) +- Compressed CSV/JSON Read [\#3641](https://github.com/apache/arrow-datafusion/issues/3641) +- remove type coercion for agg [\#3623](https://github.com/apache/arrow-datafusion/issues/3623) +- extract or clause as predicate for join rels [\#3577](https://github.com/apache/arrow-datafusion/issues/3577) +- Improve performance of `regex_replace` [\#3518](https://github.com/apache/arrow-datafusion/issues/3518) +- Add benchmarks for parquet queries with filter pushdown enabled [\#3457](https://github.com/apache/arrow-datafusion/issues/3457) +- Make type coercion rule more robust [\#3390](https://github.com/apache/arrow-datafusion/issues/3390) +- `ViewTable::scan` ignores filters and limits [\#3249](https://github.com/apache/arrow-datafusion/issues/3249) +- Add `CREATE VIEW` documentation to user guide [\#3211](https://github.com/apache/arrow-datafusion/issues/3211) +- Push additional parquet filtering into the parquet scan \[EPIC\] [\#3147](https://github.com/apache/arrow-datafusion/issues/3147) +- Remove `core/logical_plan` module [\#2683](https://github.com/apache/arrow-datafusion/issues/2683) +- Datafusion Optimizer Enhancement [\#2255](https://github.com/apache/arrow-datafusion/issues/2255) +- \[Optimizer\] Eliminate self compare self [\#2252](https://github.com/apache/arrow-datafusion/issues/2252) +- Break datafusion crate into smaller crates [\#1750](https://github.com/apache/arrow-datafusion/issues/1750) +- Benchmark `constellation-rs/amadeus`'s parquet implementation [\#1341](https://github.com/apache/arrow-datafusion/issues/1341) +- Use `parquet2` async reader in `physical_plan/parquet` [\#1058](https://github.com/apache/arrow-datafusion/issues/1058) +- Table Scan Enhancement Plan [\#944](https://github.com/apache/arrow-datafusion/issues/944) +- Implement parquet page-level skipping with column index, using min/max stats [\#847](https://github.com/apache/arrow-datafusion/issues/847) +- Support min/max statistics in ParquetTable and ParquetExec [\#537](https://github.com/apache/arrow-datafusion/issues/537) + +**Fixed bugs:** + +- Clippy failing on master [\#4100](https://github.com/apache/arrow-datafusion/issues/4100) +- Panic when the number of partitions of the pipeline that throws the exception is inconsistent with the number of partitions output by the query [\#4096](https://github.com/apache/arrow-datafusion/issues/4096) +- FieldNotFound when field is available [\#4083](https://github.com/apache/arrow-datafusion/issues/4083) +- SingleDistinctToGroupBy being applied too broadly [\#4082](https://github.com/apache/arrow-datafusion/issues/4082) +- single_distinct_to_groupby strips qualifiers from group-by expressions [\#4049](https://github.com/apache/arrow-datafusion/issues/4049) +- Another Internal error when parquet predicate pushdown is enabled "Error evaluating filter predicate: [\#4046](https://github.com/apache/arrow-datafusion/issues/4046) +- Decimal multiplied by Float produces incorrect results [\#4035](https://github.com/apache/arrow-datafusion/issues/4035) +- Cannot query external table - TableScan replaced with EmptyExec [\#4027](https://github.com/apache/arrow-datafusion/issues/4027) +- benchmark q17 produces incorrect result [\#4026](https://github.com/apache/arrow-datafusion/issues/4026) +- benchmark q14 produces incorrect result [\#4025](https://github.com/apache/arrow-datafusion/issues/4025) +- benchmark q11 producing incorrect results [\#4023](https://github.com/apache/arrow-datafusion/issues/4023) +- Internal error when parquet predicate pushdown is enabled "Error evaluating filter predicate:" [\#4006](https://github.com/apache/arrow-datafusion/issues/4006) +- Incorrect results with parquet filtering pushdown enabled [\#4005](https://github.com/apache/arrow-datafusion/issues/4005) +- Wrong results when parquet page index filtering is enabled [\#4002](https://github.com/apache/arrow-datafusion/issues/4002) +- Output schema of semi join has invalid projection added after HashBuildProbeOrder [\#4001](https://github.com/apache/arrow-datafusion/issues/4001) +- `async` deserialization functions are unintuitive and possibly insecure [\#3977](https://github.com/apache/arrow-datafusion/issues/3977) +- `Expr::to_bytes` can produce output that hits `Expr::from_bytes` recursion limit [\#3968](https://github.com/apache/arrow-datafusion/issues/3968) +- Bug on propagating arrow field metadata [\#3964](https://github.com/apache/arrow-datafusion/issues/3964) +- Predicate still has cast when comparing Timestamp\(Nano, None\) to a timestamp literal, so can't be pushed down or used for pruning [\#3938](https://github.com/apache/arrow-datafusion/issues/3938) +- Error using `IN` list on dictionary encoded data: `InList does not support datatype Dictionary(Int32, Utf8).` [\#3936](https://github.com/apache/arrow-datafusion/issues/3936) +- Internal error in CAST from Timestamp\[us\] [\#3922](https://github.com/apache/arrow-datafusion/issues/3922) +- ScalarValue not implemented for FixedSizeBinary types [\#3910](https://github.com/apache/arrow-datafusion/issues/3910) +- \[DOC\] - There are unsupported DDL in the official documentation [\#3904](https://github.com/apache/arrow-datafusion/issues/3904) +- datafusion-proto deserialize with Substring\(str \[from int\] \[for int\]\) fails [\#3901](https://github.com/apache/arrow-datafusion/issues/3901) +- `count(Literal)` gives wrong column name [\#3891](https://github.com/apache/arrow-datafusion/issues/3891) +- `projection_push_down` adds duplicate projections with multiple passes [\#3881](https://github.com/apache/arrow-datafusion/issues/3881) +- Default physical planner generates empty relation for DROP TABLE, CREATE MEMORY TABLE, etc [\#3873](https://github.com/apache/arrow-datafusion/issues/3873) +- Binary expression canonical names are incorrect in some cases [\#3865](https://github.com/apache/arrow-datafusion/issues/3865) +- Using the window function lag causes panic. [\#3830](https://github.com/apache/arrow-datafusion/issues/3830) +- chrono crate : specify 0.4.22 as the minimum version due to spurious build failures [\#3827](https://github.com/apache/arrow-datafusion/issues/3827) +- datafusion-proto deserialize with q16 sql fails [\#3820](https://github.com/apache/arrow-datafusion/issues/3820) +- Filter predicates should not be aliased [\#3795](https://github.com/apache/arrow-datafusion/issues/3795) +- Write csv not save all lines of dataframe [\#3783](https://github.com/apache/arrow-datafusion/issues/3783) +- Regression in simplifying expressions in subqueries [\#3760](https://github.com/apache/arrow-datafusion/issues/3760) +- DataFusionError\(Internal\("The size of the sorted batch is larger than the size of the input batch: 2120 \> 2312"\)\) [\#3747](https://github.com/apache/arrow-datafusion/issues/3747) +- "labeler" PR check is broken [\#3743](https://github.com/apache/arrow-datafusion/issues/3743) +- `DataFrame::select_columns` doesn't work with names containing "." [\#3733](https://github.com/apache/arrow-datafusion/issues/3733) +- TPC-H Query 1 has regressed [\#3729](https://github.com/apache/arrow-datafusion/issues/3729) +- \[RUST\]\[Datafusion\] What causes "Error: Execution\("file size of 4 is less than footer"\)" error? [\#3800](https://github.com/apache/arrow-datafusion/issues/3800) +- Field names containing periods such as f.c cannot work [\#3682](https://github.com/apache/arrow-datafusion/issues/3682) +- TableProvider implementation for DataFrame does not support filter pushdown [\#3681](https://github.com/apache/arrow-datafusion/issues/3681) +- using Decimal\(0\) make system panicked [\#3665](https://github.com/apache/arrow-datafusion/issues/3665) +- Cannot query some parquet files in S3, but they work locally [\#3633](https://github.com/apache/arrow-datafusion/issues/3633) +- ` col / col` returns `1` when `col = 0` [\#3615](https://github.com/apache/arrow-datafusion/issues/3615) +- register_csv allow space in table_path [\#3589](https://github.com/apache/arrow-datafusion/issues/3589) +- Hardcoded u64 for WindowFrameBound fields [\#3571](https://github.com/apache/arrow-datafusion/issues/3571) +- `docs.rs` cannot build `datafusion-proto` crate [\#3538](https://github.com/apache/arrow-datafusion/issues/3538) +- Row Hash loads whole aggregation state to memory before sending [\#3460](https://github.com/apache/arrow-datafusion/issues/3460) +- approx_percentile_cont return wrong result when scan multi parquet files. [\#3140](https://github.com/apache/arrow-datafusion/issues/3140) +- User guide is incorrect regarding using CLI to register CSV files using schema inference [\#3001](https://github.com/apache/arrow-datafusion/issues/3001) +- Exception: Internal error, Exception: Schema error [\#2938](https://github.com/apache/arrow-datafusion/issues/2938) +- Version 0.6.0 Panic error during SQL execution [\#2738](https://github.com/apache/arrow-datafusion/issues/2738) +- wrong result when operation parquet [\#2044](https://github.com/apache/arrow-datafusion/issues/2044) +- Local object store accepts file:/// as base path, but LocalStore returns meta without the prefix. [\#1923](https://github.com/apache/arrow-datafusion/issues/1923) +- Reading nested parquet files results in `index out of bounds` [\#1383](https://github.com/apache/arrow-datafusion/issues/1383) +- `-` \(negation\) with NULL literals does not work: can't be evaluated because the expression's type is Utf8, not signed [\#1192](https://github.com/apache/arrow-datafusion/issues/1192) +- Inconsistent cast behavior [\#957](https://github.com/apache/arrow-datafusion/issues/957) +- single_distinct_to_groupby no longer drops qualifiers [\#4050](https://github.com/apache/arrow-datafusion/pull/4050) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) + +**Documentation updates:** + +- Clarify in docs that Identifiers are made lower-case in SQL query [\#2374](https://github.com/apache/arrow-datafusion/issues/2374) +- Fix broken links in contributor guide [\#3956](https://github.com/apache/arrow-datafusion/pull/3956) ([Jefffrey](https://github.com/Jefffrey)) +- add create view explanation [\#3925](https://github.com/apache/arrow-datafusion/pull/3925) ([retikulum](https://github.com/retikulum)) +- Update `datafusion-examples` README [\#3814](https://github.com/apache/arrow-datafusion/pull/3814) ([alamb](https://github.com/alamb)) +- Add Seafowl to list of projects using DataFusion [\#3792](https://github.com/apache/arrow-datafusion/pull/3792) ([mildbyte](https://github.com/mildbyte)) + +**Closed issues:** + +- \[QUESTION\] How many times should be the function `create_name` called when executing a query? [\#3900](https://github.com/apache/arrow-datafusion/issues/3900) +- Improve the `Expr` string format [\#3878](https://github.com/apache/arrow-datafusion/issues/3878) +- Simplify division by zero \(division by one / multiplication by zero / multiplication by one\) for Decimal types as well [\#3643](https://github.com/apache/arrow-datafusion/issues/3643) +- InList: merge check branch [\#2833](https://github.com/apache/arrow-datafusion/issues/2833) +- Optimization InList: compare the float data type using OrderedFloat\ [\#2831](https://github.com/apache/arrow-datafusion/issues/2831) +- Outdated section of the add function of the contribution guide [\#2560](https://github.com/apache/arrow-datafusion/issues/2560) +- Optimize InList implementation with native types rather than ScalarValue [\#2165](https://github.com/apache/arrow-datafusion/issues/2165) +- Improve testing of optimizers using EXPLAIN [\#1118](https://github.com/apache/arrow-datafusion/issues/1118) +- Crash on parsing sql query with Cyrillic letters [\#184](https://github.com/apache/arrow-datafusion/issues/184) +- \[EPIC\] Support all TPC-H queries in benchmark [\#158](https://github.com/apache/arrow-datafusion/issues/158) +- Implement optional second argument to ltrim and rtrim functions [\#144](https://github.com/apache/arrow-datafusion/issues/144) +- Benchmark crate does not have a SIMD feature [\#124](https://github.com/apache/arrow-datafusion/issues/124) +- ColumnarValue::into_array should not require batch [\#113](https://github.com/apache/arrow-datafusion/issues/113) +- \[Rust\] Parquet data source does not support complex types [\#83](https://github.com/apache/arrow-datafusion/issues/83) + +**Merged pull requests:** + +- Appease new clippy [\#4101](https://github.com/apache/arrow-datafusion/pull/4101) ([alamb](https://github.com/alamb)) +- minor: Split parquet reader up into smaller modules [\#4099](https://github.com/apache/arrow-datafusion/pull/4099) ([alamb](https://github.com/alamb)) +- \[MINOR\] Update `SET` in cli.md [\#4098](https://github.com/apache/arrow-datafusion/pull/4098) ([waitingkuo](https://github.com/waitingkuo)) +- fix: Scheduler panic routing errors [\#4097](https://github.com/apache/arrow-datafusion/pull/4097) ([yukkit](https://github.com/yukkit)) +- Automatically register tables if ObjectStore root is configured [\#4095](https://github.com/apache/arrow-datafusion/pull/4095) ([avantgardnerio](https://github.com/avantgardnerio)) +- minor: Use Operator::swap [\#4092](https://github.com/apache/arrow-datafusion/pull/4092) ([alamb](https://github.com/alamb)) +- Simplify small InListExpr [\#4090](https://github.com/apache/arrow-datafusion/pull/4090) ([Dandandan](https://github.com/Dandandan)) +- Minor: Add arrow-rs ticket reference and turn some comments into docstrings [\#4088](https://github.com/apache/arrow-datafusion/pull/4088) ([alamb](https://github.com/alamb)) +- Support Dictionary in InListExpr [\#4070](https://github.com/apache/arrow-datafusion/pull/4070) ([tustvold](https://github.com/tustvold)) +- support `SET` variable [\#4069](https://github.com/apache/arrow-datafusion/pull/4069) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- Add in list bench [\#4068](https://github.com/apache/arrow-datafusion/pull/4068) ([tustvold](https://github.com/tustvold)) +- Improve Error Handling and Readibility for downcasting `StructArray` [\#4061](https://github.com/apache/arrow-datafusion/pull/4061) ([retikulum](https://github.com/retikulum)) +- Build tests separately from running [\#4060](https://github.com/apache/arrow-datafusion/pull/4060) ([alamb](https://github.com/alamb)) +- Simplify InListExpr ~20-70% Faster [\#4057](https://github.com/apache/arrow-datafusion/pull/4057) ([tustvold](https://github.com/tustvold)) +- MINOR: Print unoptimized logical plan in execute_query of tpch benchmark [\#4056](https://github.com/apache/arrow-datafusion/pull/4056) ([viirya](https://github.com/viirya)) +- Minor: clean the code in `eliminate_filter` [\#4055](https://github.com/apache/arrow-datafusion/pull/4055) ([HaoYang670](https://github.com/HaoYang670)) +- Implement `current_time` scalar function [\#4054](https://github.com/apache/arrow-datafusion/pull/4054) ([naosense](https://github.com/naosense)) +- Cleanup hash_utils adding support for decimal256 and f16 [\#4053](https://github.com/apache/arrow-datafusion/pull/4053) ([tustvold](https://github.com/tustvold)) +- Fix multicolumn parquet predicate pushdown \(\#4046\) [\#4048](https://github.com/apache/arrow-datafusion/pull/4048) ([tustvold](https://github.com/tustvold)) +- Add CI checks that we can serde all benchmark queries [\#4047](https://github.com/apache/arrow-datafusion/pull/4047) ([andygrove](https://github.com/andygrove)) +- Enable more benchmark verification tests [\#4044](https://github.com/apache/arrow-datafusion/pull/4044) ([andygrove](https://github.com/andygrove)) +- Extract common parquet testing code to `parquet-test-util` crate [\#4042](https://github.com/apache/arrow-datafusion/pull/4042) ([alamb](https://github.com/alamb)) +- add uuid\(\) function [\#4041](https://github.com/apache/arrow-datafusion/pull/4041) ([Jimexist](https://github.com/Jimexist)) +- Update to arrow 26, change timezones [\#4039](https://github.com/apache/arrow-datafusion/pull/4039) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Fix Decimal and Floating type coerce rule [\#4038](https://github.com/apache/arrow-datafusion/pull/4038) ([viirya](https://github.com/viirya)) +- Reserve the literal expression of `Count` function [\#4031](https://github.com/apache/arrow-datafusion/pull/4031) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Implement current_date scalar function [\#4022](https://github.com/apache/arrow-datafusion/pull/4022) ([comphead](https://github.com/comphead)) +- Fix predicate pushdown bugs: project columns within DatafusionArrowPredicate \(\#4005\) \(\#4006\) [\#4021](https://github.com/apache/arrow-datafusion/pull/4021) ([tustvold](https://github.com/tustvold)) +- minor: remove redundant code/TODO [\#4019](https://github.com/apache/arrow-datafusion/pull/4019) ([jackwener](https://github.com/jackwener)) +- Add CI check to verify that benchmark queries return the expected results [\#4015](https://github.com/apache/arrow-datafusion/pull/4015) ([andygrove](https://github.com/andygrove)) +- Minor: Add TODO and tracking ticket reference [\#4012](https://github.com/apache/arrow-datafusion/pull/4012) ([alamb](https://github.com/alamb)) +- Add right anti join support and support it in HashBuildProbeOrder [\#4011](https://github.com/apache/arrow-datafusion/pull/4011) ([Dandandan](https://github.com/Dandandan)) +- MINOR: Generate expected benchmark query results [\#4010](https://github.com/apache/arrow-datafusion/pull/4010) ([andygrove](https://github.com/andygrove)) +- Minor: remove unecessary clippy allow [\#4008](https://github.com/apache/arrow-datafusion/pull/4008) ([alamb](https://github.com/alamb)) +- Minor: Do what clippy says and clean up some code [\#4007](https://github.com/apache/arrow-datafusion/pull/4007) ([alamb](https://github.com/alamb)) +- Improve Error Handling and Readibility for downcasting `Date32Array` [\#4004](https://github.com/apache/arrow-datafusion/pull/4004) ([retikulum](https://github.com/retikulum)) +- Don't add projection for semi joins in HashBuildProbeOrder [\#4000](https://github.com/apache/arrow-datafusion/pull/4000) ([Dandandan](https://github.com/Dandandan)) +- Minor: use `DataType::is_nested` [\#3995](https://github.com/apache/arrow-datafusion/pull/3995) ([alamb](https://github.com/alamb)) +- \[minor\] bump prettier version [\#3992](https://github.com/apache/arrow-datafusion/pull/3992) ([Jimexist](https://github.com/Jimexist)) +- Add parquet predicate pushdown metrics [\#3989](https://github.com/apache/arrow-datafusion/pull/3989) ([alamb](https://github.com/alamb)) +- Pin datafusion-proto build dependencies [\#3987](https://github.com/apache/arrow-datafusion/pull/3987) ([tustvold](https://github.com/tustvold)) +- Add TableProvider.statistics method [\#3986](https://github.com/apache/arrow-datafusion/pull/3986) ([andygrove](https://github.com/andygrove)) +- Add Pull Request guidelines to contributor guide [\#3985](https://github.com/apache/arrow-datafusion/pull/3985) ([alamb](https://github.com/alamb)) +- Update protos [\#3979](https://github.com/apache/arrow-datafusion/pull/3979) ([tustvold](https://github.com/tustvold)) +- Revert async changes but keep deltalake working [\#3978](https://github.com/apache/arrow-datafusion/pull/3978) ([avantgardnerio](https://github.com/avantgardnerio)) +- Correctness integration test for parquet filter pushdown [\#3976](https://github.com/apache/arrow-datafusion/pull/3976) ([alamb](https://github.com/alamb)) +- MINOR: Stop pretty printing batches in benchmark when there are no results [\#3974](https://github.com/apache/arrow-datafusion/pull/3974) ([andygrove](https://github.com/andygrove)) +- MINOR: Re-export Cast struct [\#3971](https://github.com/apache/arrow-datafusion/pull/3971) ([andygrove](https://github.com/andygrove)) +- fix: check recursion limit in `Expr::to_bytes` [\#3970](https://github.com/apache/arrow-datafusion/pull/3970) ([crepererum](https://github.com/crepererum)) +- \[Part1\] Partition and Sort Enforcement, PhysicalExpr enhancement [\#3969](https://github.com/apache/arrow-datafusion/pull/3969) ([mingmwang](https://github.com/mingmwang)) +- Support pushdown multi-columns in PageIndex pruning. [\#3967](https://github.com/apache/arrow-datafusion/pull/3967) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Fix benchmarks README formatting [\#3966](https://github.com/apache/arrow-datafusion/pull/3966) ([Jefffrey](https://github.com/Jefffrey)) +- Bug fix on DFField to Field conversion: preserve metadata [\#3965](https://github.com/apache/arrow-datafusion/pull/3965) ([metesynnada](https://github.com/metesynnada)) +- Informative Error Message for LAG and LEAD functions [\#3963](https://github.com/apache/arrow-datafusion/pull/3963) ([mustafasrepo](https://github.com/mustafasrepo)) +- Minor: Add some docstrings to `FileScanConfig` and `RuntimeEnv` [\#3962](https://github.com/apache/arrow-datafusion/pull/3962) ([alamb](https://github.com/alamb)) +- Move common code used for testing code into datafusion/test_utils [\#3961](https://github.com/apache/arrow-datafusion/pull/3961) ([alamb](https://github.com/alamb)) +- Update minimum chrono dependency to 0.4.22 [\#3959](https://github.com/apache/arrow-datafusion/pull/3959) ([alamb](https://github.com/alamb)) +- Implement right semi join and support in HashBuildProbeorder [\#3958](https://github.com/apache/arrow-datafusion/pull/3958) ([Dandandan](https://github.com/Dandandan)) +- Print the configurations of ConfigOptions in an ordered way so that we can directly compare the equality of two ConfigOptions by their debug strings [\#3953](https://github.com/apache/arrow-datafusion/pull/3953) ([yahoNanJing](https://github.com/yahoNanJing)) +- Vendor Generated Protobuf Code \(\#3947\) [\#3950](https://github.com/apache/arrow-datafusion/pull/3950) ([tustvold](https://github.com/tustvold)) +- Implement serialization for ScalarValue::FixedSizeBinary [\#3943](https://github.com/apache/arrow-datafusion/pull/3943) ([retikulum](https://github.com/retikulum)) +- Consolidate physical join code into `datafusion/core/src/physical_plan/joins` [\#3942](https://github.com/apache/arrow-datafusion/pull/3942) ([alamb](https://github.com/alamb)) +- Add optimizer test for simplifying predicates on timestamps [\#3939](https://github.com/apache/arrow-datafusion/pull/3939) ([alamb](https://github.com/alamb)) +- Add test for querying predicate on dictionary [\#3937](https://github.com/apache/arrow-datafusion/pull/3937) ([alamb](https://github.com/alamb)) +- fix: return error for unsupported SQL [\#3933](https://github.com/apache/arrow-datafusion/pull/3933) ([Kikkon](https://github.com/Kikkon)) +- doc: fix doc about `CREATE TABLE IF NOT EXISTS` [\#3932](https://github.com/apache/arrow-datafusion/pull/3932) ([jackwener](https://github.com/jackwener)) +- Refactor Expr::Cast to use a struct. [\#3931](https://github.com/apache/arrow-datafusion/pull/3931) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- minor: fix some typo. [\#3930](https://github.com/apache/arrow-datafusion/pull/3930) ([jackwener](https://github.com/jackwener)) +- chore: update cranelift-related dependencies [\#3926](https://github.com/apache/arrow-datafusion/pull/3926) ([xudong963](https://github.com/xudong963)) +- Change cast error from Internal to NotImplemented [\#3924](https://github.com/apache/arrow-datafusion/pull/3924) ([alamb](https://github.com/alamb)) +- Support inlining view / dataframes logical plan [\#3923](https://github.com/apache/arrow-datafusion/pull/3923) ([Dandandan](https://github.com/Dandandan)) +- Add test for Simplify redundant predicates [\#3915](https://github.com/apache/arrow-datafusion/pull/3915) ([src255](https://github.com/src255)) +- Implement ScalarValue for FixedSizeBinary [\#3911](https://github.com/apache/arrow-datafusion/pull/3911) ([maxburke](https://github.com/maxburke)) +- Add serde for plans with tables from `TableProviderFactory`s [\#3907](https://github.com/apache/arrow-datafusion/pull/3907) ([avantgardnerio](https://github.com/avantgardnerio)) +- Support filter/limit pushdown for views/dataframes [\#3905](https://github.com/apache/arrow-datafusion/pull/3905) ([Dandandan](https://github.com/Dandandan)) +- Factorize common AND factors out of OR predicates to support filterPu… [\#3903](https://github.com/apache/arrow-datafusion/pull/3903) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add `Substring(str [from int] [for int])` support in `datafusion-proto` [\#3902](https://github.com/apache/arrow-datafusion/pull/3902) ([r4ntix](https://github.com/r4ntix)) +- Revert "Factorize common AND factors out of OR predicates to supportfilter Pu… \(\#3859\)" [\#3897](https://github.com/apache/arrow-datafusion/pull/3897) ([alamb](https://github.com/alamb)) +- MINOR: Add notes on Apache Reporter [\#3893](https://github.com/apache/arrow-datafusion/pull/3893) ([andygrove](https://github.com/andygrove)) +- Allow configuring collection of statistics during TPC-H benchmarks [\#3889](https://github.com/apache/arrow-datafusion/pull/3889) ([isidentical](https://github.com/isidentical)) +- Improve formatting of binary expressions [\#3884](https://github.com/apache/arrow-datafusion/pull/3884) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Multiple optimizer passes [\#3880](https://github.com/apache/arrow-datafusion/pull/3880) ([andygrove](https://github.com/andygrove)) +- \[MINOR\] Update docs with newly added configuration values [\#3877](https://github.com/apache/arrow-datafusion/pull/3877) ([alamb](https://github.com/alamb)) +- \[MINOR\] Add a hint about how to resolve the `Cargo.lock` CI check [\#3876](https://github.com/apache/arrow-datafusion/pull/3876) ([alamb](https://github.com/alamb)) +- Add `LogicalPlan::ViewTable` support in `datafusion-proto` [\#3875](https://github.com/apache/arrow-datafusion/pull/3875) ([r4ntix](https://github.com/r4ntix)) +- Optimize the `concat_ws` function [\#3869](https://github.com/apache/arrow-datafusion/pull/3869) ([HaoYang670](https://github.com/HaoYang670)) +- Implement foundational filter selectivity analysis [\#3868](https://github.com/apache/arrow-datafusion/pull/3868) ([isidentical](https://github.com/isidentical)) +- Update `TableProviderFactory` trait to support real-world use-cases [\#3867](https://github.com/apache/arrow-datafusion/pull/3867) ([avantgardnerio](https://github.com/avantgardnerio)) +- put subquery's equal clause into join on clauses instead of filter cl… [\#3862](https://github.com/apache/arrow-datafusion/pull/3862) ([AssHero](https://github.com/AssHero)) +- Factorize common AND factors out of OR predicates to support filterPu… [\#3859](https://github.com/apache/arrow-datafusion/pull/3859) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Enable mimalloc by default in benchmark [\#3853](https://github.com/apache/arrow-datafusion/pull/3853) ([Dandandan](https://github.com/Dandandan)) +- Refactor `Expr::Between` to use a struct [\#3850](https://github.com/apache/arrow-datafusion/pull/3850) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) +- Handle cardinality estimation for disjoint inner and outer joins [\#3848](https://github.com/apache/arrow-datafusion/pull/3848) ([isidentical](https://github.com/isidentical)) +- Add setting for statistics collection [\#3846](https://github.com/apache/arrow-datafusion/pull/3846) ([Dandandan](https://github.com/Dandandan)) +- Update to arrow 25.0.0 [\#3844](https://github.com/apache/arrow-datafusion/pull/3844) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Tweak list of optimization rules [\#3841](https://github.com/apache/arrow-datafusion/pull/3841) ([Dandandan](https://github.com/Dandandan)) +- Refactor Expr::GetIndexedField to use a struct [\#3838](https://github.com/apache/arrow-datafusion/pull/3838) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- Infer the count of maximum distinct values from min/max [\#3837](https://github.com/apache/arrow-datafusion/pull/3837) ([isidentical](https://github.com/isidentical)) +- Refactor `Expr::Like`, `Expr::ILike`, `Expr::SimilarTo` to use a struct [\#3836](https://github.com/apache/arrow-datafusion/pull/3836) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) +- Refactor Expr::BinaryExpr to use a struct [\#3835](https://github.com/apache/arrow-datafusion/pull/3835) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([zhoudongyan](https://github.com/zhoudongyan)) +- update postgres version to 15 in integration test [\#3831](https://github.com/apache/arrow-datafusion/pull/3831) ([Jimexist](https://github.com/Jimexist)) +- Fix the panic when lpad/rpad parameter is negative [\#3829](https://github.com/apache/arrow-datafusion/pull/3829) ([ZuoTiJia](https://github.com/ZuoTiJia)) +- MINOR: Document SHOW ALL in the users guide [\#3826](https://github.com/apache/arrow-datafusion/pull/3826) ([alamb](https://github.com/alamb)) +- MINOR: Add datafusion-cli documentation on showing configuration [\#3825](https://github.com/apache/arrow-datafusion/pull/3825) ([alamb](https://github.com/alamb)) +- Add/Remove Division Rules [\#3824](https://github.com/apache/arrow-datafusion/pull/3824) ([retikulum](https://github.com/retikulum)) +- Minor: Sort the output of SHOW ALL by config name [\#3823](https://github.com/apache/arrow-datafusion/pull/3823) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Add `precision != 0` check when making decimal type [\#3818](https://github.com/apache/arrow-datafusion/pull/3818) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Infer schema when running benchmarks against parquet [\#3817](https://github.com/apache/arrow-datafusion/pull/3817) ([andygrove](https://github.com/andygrove)) +- Finish removing deprecated `datafusion::logical_plan` module [\#3816](https://github.com/apache/arrow-datafusion/pull/3816) ([andygrove](https://github.com/andygrove)) +- Clarify initial example with respect to capitalization [\#3815](https://github.com/apache/arrow-datafusion/pull/3815) ([alamb](https://github.com/alamb)) +- Improve expression simplification by running it twice [\#3811](https://github.com/apache/arrow-datafusion/pull/3811) ([alamb](https://github.com/alamb)) +- Make expression manipulation consistent and easier to use: `combine/split filter` `conjunction`, etc [\#3810](https://github.com/apache/arrow-datafusion/pull/3810) ([alamb](https://github.com/alamb)) +- Consolidate expression manipulation functions into `datafusion_optimizer` [\#3809](https://github.com/apache/arrow-datafusion/pull/3809) ([alamb](https://github.com/alamb)) +- Optimize `regexp_replace` when the input is a sparse array [\#3804](https://github.com/apache/arrow-datafusion/pull/3804) ([isidentical](https://github.com/isidentical)) +- Stop ignoring errors when writing DataFrame to csv, parquet, json [\#3801](https://github.com/apache/arrow-datafusion/pull/3801) ([andygrove](https://github.com/andygrove)) +- Update datafusion-cli Cargo.lock to fix CI check on master [\#3799](https://github.com/apache/arrow-datafusion/pull/3799) ([alamb](https://github.com/alamb)) +- MINOR: Benchmark regression tests [\#3790](https://github.com/apache/arrow-datafusion/pull/3790) ([andygrove](https://github.com/andygrove)) +- MINOR: Optimizer example and docs, deprecate `Expr::name` [\#3788](https://github.com/apache/arrow-datafusion/pull/3788) ([andygrove](https://github.com/andygrove)) +- Join cardinality computation for cost-based nested join optimizations [\#3787](https://github.com/apache/arrow-datafusion/pull/3787) ([isidentical](https://github.com/isidentical)) +- Optimizer now simplifies multiplication, division, module arg is a literal Decimal zero or one [\#3782](https://github.com/apache/arrow-datafusion/pull/3782) ([drrtuy](https://github.com/drrtuy)) +- Implement parquet page-level skipping with column index, using min/ma… [\#3780](https://github.com/apache/arrow-datafusion/pull/3780) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Bump actions/labeler from 4.0.1 to 4.0.2 [\#3779](https://github.com/apache/arrow-datafusion/pull/3779) ([dependabot[bot]](https://github.com/apps/dependabot)) +- MINOR: correct `ListingOptions.try_new` docs to include the enabled stat collection [\#3775](https://github.com/apache/arrow-datafusion/pull/3775) ([isidentical](https://github.com/isidentical)) +- Teach a negative NULL expression to return NULL instead of an error [\#3771](https://github.com/apache/arrow-datafusion/pull/3771) ([drrtuy](https://github.com/drrtuy)) +- Add benchmarks for testing row filtering [\#3769](https://github.com/apache/arrow-datafusion/pull/3769) ([thinkharderdev](https://github.com/thinkharderdev)) +- move type coercion of agg and agg_udaf to logical phase [\#3768](https://github.com/apache/arrow-datafusion/pull/3768) ([liukun4515](https://github.com/liukun4515)) +- User Guide: Add `EXPLAIN` to SQL reference [\#3767](https://github.com/apache/arrow-datafusion/pull/3767) ([unvalley](https://github.com/unvalley)) +- Allow specialized implementations to produce hints for the array adapter [\#3765](https://github.com/apache/arrow-datafusion/pull/3765) ([isidentical](https://github.com/isidentical)) +- Fix optimizer regression with simplifying expressions in subquery filters [\#3764](https://github.com/apache/arrow-datafusion/pull/3764) ([andygrove](https://github.com/andygrove)) +- Run all `datafusion-examples` in CI tests [\#3761](https://github.com/apache/arrow-datafusion/pull/3761) ([alamb](https://github.com/alamb)) +- MINOR: Remove deprecated module `datafusion::logical_plan::plan` [\#3759](https://github.com/apache/arrow-datafusion/pull/3759) ([andygrove](https://github.com/andygrove)) +- Refactor `Expr::Case` to use a struct [\#3757](https://github.com/apache/arrow-datafusion/pull/3757) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Do not run labeler CI check if it would fail due to permissions [\#3756](https://github.com/apache/arrow-datafusion/pull/3756) ([alamb](https://github.com/alamb)) +- MINOR: Improvements to `scalar_subquery_to_join` error handling [\#3754](https://github.com/apache/arrow-datafusion/pull/3754) ([andygrove](https://github.com/andygrove)) +- Always track the final size of the in-mem sorted arrays [\#3753](https://github.com/apache/arrow-datafusion/pull/3753) ([isidentical](https://github.com/isidentical)) +- Fix DataFrame::select_columns to handle column names with a period [\#3751](https://github.com/apache/arrow-datafusion/pull/3751) ([zhoudongyan](https://github.com/zhoudongyan)) +- Fix `ListingTableUrl` to decode percent [\#3750](https://github.com/apache/arrow-datafusion/pull/3750) ([unvalley](https://github.com/unvalley)) +- remove `type coercion` for physical ScalarFunction [\#3749](https://github.com/apache/arrow-datafusion/pull/3749) ([liukun4515](https://github.com/liukun4515)) +- CI: Add a new run to check whether `datafusion-cli` lock file is up-to-date [\#3745](https://github.com/apache/arrow-datafusion/pull/3745) ([isidentical](https://github.com/isidentical)) +- Add datafusion example of expression apis [\#3741](https://github.com/apache/arrow-datafusion/pull/3741) ([alamb](https://github.com/alamb)) +- fix subquery where exists distinct [\#3732](https://github.com/apache/arrow-datafusion/pull/3732) ([b41sh](https://github.com/b41sh)) +- Remove some uneeded code in `CommonSubexprEliminate` [\#3730](https://github.com/apache/arrow-datafusion/pull/3730) ([alamb](https://github.com/alamb)) +- Consolidate and better tests for expression re-rewriting / aliasing [\#3727](https://github.com/apache/arrow-datafusion/pull/3727) ([alamb](https://github.com/alamb)) +- Fix output schema generated by CommonSubExprEliminate [\#3726](https://github.com/apache/arrow-datafusion/pull/3726) ([alex-natzka](https://github.com/alex-natzka)) +- Add type coercion rule for `concat` and `concat_ws` [\#3721](https://github.com/apache/arrow-datafusion/pull/3721) ([HaoYang670](https://github.com/HaoYang670)) +- Expose and document a simpler public API for simplify expressions [\#3719](https://github.com/apache/arrow-datafusion/pull/3719) ([ygf11](https://github.com/ygf11)) +- Remove dead code in `UnwrapCastExprRewriter` that may mask errors [\#3703](https://github.com/apache/arrow-datafusion/pull/3703) ([alamb](https://github.com/alamb)) +- Fix `DataFrame::with_column` to handle creating column names with a period [\#3700](https://github.com/apache/arrow-datafusion/pull/3700) ([alamb](https://github.com/alamb)) +- Add simplification rules for the `CONCAT` function [\#3684](https://github.com/apache/arrow-datafusion/pull/3684) ([HaoYang670](https://github.com/HaoYang670)) +- Compressed CSV/JSON support [\#3642](https://github.com/apache/arrow-datafusion/pull/3642) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Licht-T](https://github.com/Licht-T)) +- Simplify serialization by removing redundant `PrimitiveScalarValue` [\#3612](https://github.com/apache/arrow-datafusion/pull/3612) ([alamb](https://github.com/alamb)) +- Pushdown single column predicates from ON join clauses [\#3578](https://github.com/apache/arrow-datafusion/pull/3578) ([AssHero](https://github.com/AssHero)) +- Simplify the serialization of `ScalarValue::List` [\#3547](https://github.com/apache/arrow-datafusion/pull/3547) ([alamb](https://github.com/alamb)) +- Generate hash aggregation output in smaller record batches [\#3461](https://github.com/apache/arrow-datafusion/pull/3461) ([milenkovicm](https://github.com/milenkovicm)) +- Improve doc on lowercase treatment of columns on SQL [\#3385](https://github.com/apache/arrow-datafusion/pull/3385) ([nanicpc](https://github.com/nanicpc)) diff --git a/dev/changelog/15.0.0.md b/dev/changelog/15.0.0.md new file mode 100644 index 0000000000000..474a82b1c08e9 --- /dev/null +++ b/dev/changelog/15.0.0.md @@ -0,0 +1,330 @@ + + +## [15.0.0](https://github.com/apache/arrow-datafusion/tree/15.0.0) (2022-12-01) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/14.0.0-rc1...15.0.0) + +**Breaking changes:** + +- Expose remaining parquet config options into ConfigOptions \(try 2\) [\#4427](https://github.com/apache/arrow-datafusion/pull/4427) ([alamb](https://github.com/alamb)) +- Config Cleanup: Remove TaskProperties and KV structure, keep key=value serialization [\#4382](https://github.com/apache/arrow-datafusion/pull/4382) ([alamb](https://github.com/alamb)) +- add `{TDigest,ScalarValue,Accumulator}::size` [\#4342](https://github.com/apache/arrow-datafusion/pull/4342) ([crepererum](https://github.com/crepererum)) +- API-break: Support `SubqueryAlias` and remove `Alias in Projection` [\#4333](https://github.com/apache/arrow-datafusion/pull/4333) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- split `try_new_with_schema_alias` from original code [\#4284](https://github.com/apache/arrow-datafusion/pull/4284) ([jackwener](https://github.com/jackwener)) +- Collapse statistics in normal explain plan [\#4157](https://github.com/apache/arrow-datafusion/pull/4157) ([alamb](https://github.com/alamb)) +- Linearize binary expressions to reduce proto tree complexity [\#4115](https://github.com/apache/arrow-datafusion/pull/4115) ([isidentical](https://github.com/isidentical)) +- support `SET Timezone` [\#4107](https://github.com/apache/arrow-datafusion/pull/4107) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) + +**Implemented enhancements:** + +- Refactor Built-in, Aggregate window functions to increase code reuse. [\#4440](https://github.com/apache/arrow-datafusion/issues/4440) +- Helper to get "root" error [\#4435](https://github.com/apache/arrow-datafusion/issues/4435) +- Do NOT convert intermediate/source errors to strings. [\#4434](https://github.com/apache/arrow-datafusion/issues/4434) +- Estimate the `total_byte_size` of the filter expression's result when selectivity is available [\#4374](https://github.com/apache/arrow-datafusion/issues/4374) +- refactor the code of the `HashJoin` [\#4356](https://github.com/apache/arrow-datafusion/issues/4356) +- `CoalesceBatchesExec` reports no ordering [\#4331](https://github.com/apache/arrow-datafusion/issues/4331) +- Introduce tournament tree to achieve better k-way sort-merging [\#4300](https://github.com/apache/arrow-datafusion/issues/4300) +- Add a checker to confirm physical optimizer rules will keep the physical plan schema immutable [\#4299](https://github.com/apache/arrow-datafusion/issues/4299) +- Remove the macro rule `unary_scalar_expr` from `expr_fn.rs` [\#4298](https://github.com/apache/arrow-datafusion/issues/4298) +- Remove Alias-in-Projection, replace it with `SubqueryAlias` [\#4291](https://github.com/apache/arrow-datafusion/issues/4291) +- reimplement `reduce_outer_join` [\#4270](https://github.com/apache/arrow-datafusion/issues/4270) +- Reimplement `filter_push_down` [\#4266](https://github.com/apache/arrow-datafusion/issues/4266) +- Reimplement `eliminate_limit` [\#4264](https://github.com/apache/arrow-datafusion/issues/4264) +- Reimplement `limit_push_down` [\#4263](https://github.com/apache/arrow-datafusion/issues/4263) +- Make a data driven SQL testing tool \(so we can reuse duckdb test suite, example\) [\#4248](https://github.com/apache/arrow-datafusion/issues/4248) +- upgrade chrono to 0.4.23 [\#4224](https://github.com/apache/arrow-datafusion/issues/4224) +- support scan non-string columns partitioned parquet files [\#4218](https://github.com/apache/arrow-datafusion/issues/4218) +- Allow optimizer rules to skip optimizing plans [\#4209](https://github.com/apache/arrow-datafusion/issues/4209) +- Supporting specifying schema when create tables [\#4183](https://github.com/apache/arrow-datafusion/issues/4183) +- Improve ergonomics of creating `ListingOptions` [\#4178](https://github.com/apache/arrow-datafusion/issues/4178) +- Add ability to specify external sort information for ParquetExec [\#4169](https://github.com/apache/arrow-datafusion/issues/4169) +- Add another method to collect referenced columns from an expression [\#4152](https://github.com/apache/arrow-datafusion/issues/4152) +- Improve `EXPLAIN ANALYZE` output for parquet exec [\#4144](https://github.com/apache/arrow-datafusion/issues/4144) +- `TableProviderFactory::create` should have `Optional` parameter [\#4142](https://github.com/apache/arrow-datafusion/issues/4142) +- Support more expressions in equality join [\#4140](https://github.com/apache/arrow-datafusion/issues/4140) +- JoinSelection Rule to choose physical join implementation: HashJoin\(Partitioned or CollectLeft\) or SortMergeJoin base on Stats [\#4139](https://github.com/apache/arrow-datafusion/issues/4139) +- Allow TPCH tooling to create a combined result for easier processing by outside tools [\#4127](https://github.com/apache/arrow-datafusion/issues/4127) +- Allow additional options when creating an external table [\#4125](https://github.com/apache/arrow-datafusion/issues/4125) +- reuse code utils::optimize_children instead of redundant implementation [\#4120](https://github.com/apache/arrow-datafusion/issues/4120) +- Add test field to PR template [\#4113](https://github.com/apache/arrow-datafusion/issues/4113) +- Allow for automatic registration of `ListingTables` [\#4111](https://github.com/apache/arrow-datafusion/issues/4111) +- Add CI check that configs.md is up-to-date [\#4108](https://github.com/apache/arrow-datafusion/issues/4108) +- Support `SET` timezone to non-UTC time zone [\#4106](https://github.com/apache/arrow-datafusion/issues/4106) +- Parquet predicates contains `and true` expressions [\#4091](https://github.com/apache/arrow-datafusion/issues/4091) +- Replace RwLock\ and Mutex\ by using DashMap [\#4077](https://github.com/apache/arrow-datafusion/issues/4077) +- add support for `.xz` compressed files [\#4074](https://github.com/apache/arrow-datafusion/issues/4074) +- add a feature gate to make support for compressed files optional [\#4073](https://github.com/apache/arrow-datafusion/issues/4073) +- Support serializing more deeply nested AND / OR expressions [\#4066](https://github.com/apache/arrow-datafusion/issues/4066) +- Use f64::total_cmp instead of OrderedFloat [\#4051](https://github.com/apache/arrow-datafusion/issues/4051) +- Add documentation to make it clear that decimal support is still experimental [\#4036](https://github.com/apache/arrow-datafusion/issues/4036) +- Simplify Pushed Down Predicates [\#4020](https://github.com/apache/arrow-datafusion/issues/4020) +- Improve HashJoinExec metrics [\#4009](https://github.com/apache/arrow-datafusion/issues/4009) +- Move physical plan serde from Ballista to DataFusion [\#3949](https://github.com/apache/arrow-datafusion/issues/3949) +- Support `SubqueryAlias` better in planner [\#3927](https://github.com/apache/arrow-datafusion/issues/3927) +- A framework for expression boundary analysis \(and statistics\) [\#3898](https://github.com/apache/arrow-datafusion/issues/3898) +- Replace `Filter: Boolean(false)` with `EmptyRelation` [\#3864](https://github.com/apache/arrow-datafusion/issues/3864) +- Implement statistics estimation for `FilterExec` [\#3845](https://github.com/apache/arrow-datafusion/issues/3845) +- Support parquet page filtering for more types: String, Binary\(Decimal\), Int96 [\#3833](https://github.com/apache/arrow-datafusion/issues/3833) +- Allow configuring parquet filter pushdown dynamically [\#3821](https://github.com/apache/arrow-datafusion/issues/3821) +- Unable to register tables in non-cloud S3 servers [\#3640](https://github.com/apache/arrow-datafusion/issues/3640) +- support more data type in prune for cast/try_cast [\#3442](https://github.com/apache/arrow-datafusion/issues/3442) +- Disable spill to disk globally [\#3264](https://github.com/apache/arrow-datafusion/issues/3264) +- Consider to categorize Operator [\#3216](https://github.com/apache/arrow-datafusion/issues/3216) +- Replace Projection.alias with SubqueryAlias [\#2212](https://github.com/apache/arrow-datafusion/issues/2212) +- \[Optimizer\] Eliminate the distinct [\#2045](https://github.com/apache/arrow-datafusion/issues/2045) +- beautify datafusion's site: https://arrow.apache.org/datafusion/ [\#1819](https://github.com/apache/arrow-datafusion/issues/1819) +- split datafusion-logical-plan sub-module [\#1755](https://github.com/apache/arrow-datafusion/issues/1755) +- convert `outer join` to `inner join` to improve performance [\#1585](https://github.com/apache/arrow-datafusion/issues/1585) +- Add sqllogictest for datafusion [\#1453](https://github.com/apache/arrow-datafusion/issues/1453) +- Add additional simplification rules [\#1406](https://github.com/apache/arrow-datafusion/issues/1406) +- support more subqueries [\#1209](https://github.com/apache/arrow-datafusion/issues/1209) +- Add baseline metrics for remaining execution plan nodes [\#1019](https://github.com/apache/arrow-datafusion/issues/1019) +- Make `ExecutionPlan` implementations immutable [\#987](https://github.com/apache/arrow-datafusion/issues/987) +- Architecture overview may be insufficient in README [\#980](https://github.com/apache/arrow-datafusion/issues/980) +- Add a separate configuration setting for parallelism of scanning parquet files [\#924](https://github.com/apache/arrow-datafusion/issues/924) +- Support hash repartion elimination [\#41](https://github.com/apache/arrow-datafusion/issues/41) + +**Fixed bugs:** + +- `pyarrow` CI failed [\#4448](https://github.com/apache/arrow-datafusion/issues/4448) +- `UnwrapCastInComparison` exist bug [\#4430](https://github.com/apache/arrow-datafusion/issues/4430) +- The CLI panics when passing an invalid `explain` query [\#4378](https://github.com/apache/arrow-datafusion/issues/4378) +- HashJoin should return Err when the right side input stream produce Err [\#4362](https://github.com/apache/arrow-datafusion/issues/4362) +- Optimizer check errors if resulting schema has different metadata [\#4346](https://github.com/apache/arrow-datafusion/issues/4346) +- Panic with function `to_hex` [\#4339](https://github.com/apache/arrow-datafusion/issues/4339) +- `LimitPushDown` pushdown into limit, result is wrong [\#4308](https://github.com/apache/arrow-datafusion/issues/4308) +- DESCRIBE statement issue with qualified table references [\#4303](https://github.com/apache/arrow-datafusion/issues/4303) +- Panic with window function LAST_VALUE [\#4297](https://github.com/apache/arrow-datafusion/issues/4297) +- CI failed in `Compare to postgres` [\#4294](https://github.com/apache/arrow-datafusion/issues/4294) +- Field alias can't work in where clause [\#4288](https://github.com/apache/arrow-datafusion/issues/4288) +- Some valid filters are not pushed down to parquet scan [\#4282](https://github.com/apache/arrow-datafusion/issues/4282) +- The type renaming `pub type NullColumnarValue = ColumnarValue` makes no sense [\#4271](https://github.com/apache/arrow-datafusion/issues/4271) +- Current `limit_push_down` can't support cross_join [\#4256](https://github.com/apache/arrow-datafusion/issues/4256) +- Cargo test fail [\#4253](https://github.com/apache/arrow-datafusion/issues/4253) +- RightSemi/RightAnti HashJoin has bug, the left_indices is never populated, causing failure to apply join filters. [\#4247](https://github.com/apache/arrow-datafusion/issues/4247) +- Clippy failures [\#4245](https://github.com/apache/arrow-datafusion/issues/4245) +- Cannot query s3 data from datafusion-cli [\#4239](https://github.com/apache/arrow-datafusion/issues/4239) +- Bug parsing interval with negative values [\#4237](https://github.com/apache/arrow-datafusion/issues/4237) +- `cargo test` reports errors on the master branch. [\#4236](https://github.com/apache/arrow-datafusion/issues/4236) +- Doc of the expression function`log2` is incorrect [\#4231](https://github.com/apache/arrow-datafusion/issues/4231) +- HashJoin with mode PartitionMode:CollectLeft has bug and can produce wrong result [\#4230](https://github.com/apache/arrow-datafusion/issues/4230) +- Add ambiguous check when generate projection plan [\#4210](https://github.com/apache/arrow-datafusion/issues/4210) +- What happened for NDJSON support on CLI? [\#4198](https://github.com/apache/arrow-datafusion/issues/4198) +- Add ambiguous check when generate join plan [\#4197](https://github.com/apache/arrow-datafusion/issues/4197) +- Clippy failing on master : error: use of deprecated associated function `chrono::NaiveDate::from_ymd`: use `from_ymd_opt()` instead [\#4187](https://github.com/apache/arrow-datafusion/issues/4187) +- Reimplement the `eliminate_cross_join` [\#4176](https://github.com/apache/arrow-datafusion/issues/4176) +- Incorrect handling of column names [\#4166](https://github.com/apache/arrow-datafusion/issues/4166) +- Update release scripts to support datafusion-benchmarks [\#4134](https://github.com/apache/arrow-datafusion/issues/4134) +- Bug in interpreting correctly parsed SQL with aliases [\#4123](https://github.com/apache/arrow-datafusion/issues/4123) +- The percentile argument for ApproxPercentileCont must be Float64, not Decimal128\(2, 1\) [\#4103](https://github.com/apache/arrow-datafusion/issues/4103) +- Panic when using array_agg [\#4080](https://github.com/apache/arrow-datafusion/issues/4080) +- Wrong result for FIRST_VALUE AND LAST_VALUE window functions [\#4076](https://github.com/apache/arrow-datafusion/issues/4076) +- Round error when casting float to decimal [\#4071](https://github.com/apache/arrow-datafusion/issues/4071) +- Predicate still has cast when comparing Timestamp\(Nano, None\) to a timestamp literal, so can't be pushed down or used for pruning [\#3938](https://github.com/apache/arrow-datafusion/issues/3938) +- Revisit required_child_distribution\(\), output_partitioning\(\), output_ordering\(\) implementations in ExecutionPlan's implementations [\#3653](https://github.com/apache/arrow-datafusion/issues/3653) +- Can't push down projection after do type coercion [\#3583](https://github.com/apache/arrow-datafusion/issues/3583) +- In some circumstances cast expression is not working [\#3499](https://github.com/apache/arrow-datafusion/issues/3499) +- output_partitioning\(\) and output_ordering\(\) implementations are wrong in some physical plan implementations with alias [\#3400](https://github.com/apache/arrow-datafusion/issues/3400) +- Interval Literal doesn't work for timeunit less than millisecond [\#3204](https://github.com/apache/arrow-datafusion/issues/3204) +- `INTERVAL` literal with duplicated interval types should raise error [\#3183](https://github.com/apache/arrow-datafusion/issues/3183) +- Error occurs when only using partition columns in query [\#1999](https://github.com/apache/arrow-datafusion/issues/1999) +- regex_match does not compile using the `g` flag [\#1429](https://github.com/apache/arrow-datafusion/issues/1429) +- `between` with NULL literals does not work: can't be evaluated because there isn't a common type to coerce the types to [\#1193](https://github.com/apache/arrow-datafusion/issues/1193) +- \[Datafusion\] Error with CAST: Unsupported SQL type Time [\#193](https://github.com/apache/arrow-datafusion/issues/193) + +**Closed issues:** + +- SQL level coverage for when memory limit is exceeded [\#4404](https://github.com/apache/arrow-datafusion/issues/4404) +- Throw error \(not `panic`\) if a listing table specifies an missing partition column [\#4350](https://github.com/apache/arrow-datafusion/issues/4350) +- Page index pruning fail on complex_expr [\#4317](https://github.com/apache/arrow-datafusion/issues/4317) +- optimize `limit-full join` in the limit push down rule [\#4275](https://github.com/apache/arrow-datafusion/issues/4275) +- `infer_schema` function is not working with s3 Urls or http endpoints [\#4269](https://github.com/apache/arrow-datafusion/issues/4269) +- Add support binary boolean operators with nulls [\#4241](https://github.com/apache/arrow-datafusion/issues/4241) +- Add additional testing to parquet predicate pushdown integration tests [\#4087](https://github.com/apache/arrow-datafusion/issues/4087) +- Add metrics for parquet page level skipping [\#4086](https://github.com/apache/arrow-datafusion/issues/4086) +- Add parquet page index pushdown metrics [\#4058](https://github.com/apache/arrow-datafusion/issues/4058) +- Throw a runtime error if the memory allocated to GroupByHash exceeds a limit [\#3940](https://github.com/apache/arrow-datafusion/issues/3940) +- support unsigned numeric data type in UnwrapCastInBinaryComparison rule [\#3702](https://github.com/apache/arrow-datafusion/issues/3702) +- Support type cast in union [\#2125](https://github.com/apache/arrow-datafusion/issues/2125) +- \[EPIC\] Memory Limited Sort \(Externalized / Spill\) [\#1568](https://github.com/apache/arrow-datafusion/issues/1568) +- Maintain partition information in Union [\#189](https://github.com/apache/arrow-datafusion/issues/189) +- Add coercion support for `NULL` literals [\#185](https://github.com/apache/arrow-datafusion/issues/185) + +**Merged pull requests:** + +- Make `datafusion-sql` depend on `arrow-schema` instead of `arrow` [\#4456](https://github.com/apache/arrow-datafusion/pull/4456) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mbrobbel](https://github.com/mbrobbel)) +- replace the comparator for `decimal array op scalar` using arrow kernel [\#4453](https://github.com/apache/arrow-datafusion/pull/4453) ([liukun4515](https://github.com/liukun4515)) +- Fix pyarrow test [\#4450](https://github.com/apache/arrow-datafusion/pull/4450) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Replace `&Option` with `Option<&T>` [\#4446](https://github.com/apache/arrow-datafusion/pull/4446) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([askoa](https://github.com/askoa)) +- Improve error handling for array downcasting [\#4445](https://github.com/apache/arrow-datafusion/pull/4445) ([retikulum](https://github.com/retikulum)) +- Refactor Builtin Window Function Implementation [\#4441](https://github.com/apache/arrow-datafusion/pull/4441) ([mustafasrepo](https://github.com/mustafasrepo)) +- feat: `DataFusionError::find_root` [\#4437](https://github.com/apache/arrow-datafusion/pull/4437) ([crepererum](https://github.com/crepererum)) +- fix: do NOT convert errors to strings but keep the type [\#4436](https://github.com/apache/arrow-datafusion/pull/4436) ([crepererum](https://github.com/crepererum)) +- The CLI panics when passing an invalid explain query [\#4429](https://github.com/apache/arrow-datafusion/pull/4429) ([comphead](https://github.com/comphead)) +- \[minor\] use arrow kernel concat_batches instead combine_batches [\#4423](https://github.com/apache/arrow-datafusion/pull/4423) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- fix panic on to_hex function for negative numbers [\#4422](https://github.com/apache/arrow-datafusion/pull/4422) ([retikulum](https://github.com/retikulum)) +- Optimize filter executor in pull-based executor [\#4421](https://github.com/apache/arrow-datafusion/pull/4421) ([xudong963](https://github.com/xudong963)) +- optimize limit push for join case [\#4411](https://github.com/apache/arrow-datafusion/pull/4411) ([liukun4515](https://github.com/liukun4515)) +- Add integration test for erroring when memory limits are hit [\#4406](https://github.com/apache/arrow-datafusion/pull/4406) ([alamb](https://github.com/alamb)) +- feat: `ResourceExhausted` for memory limit in `AggregateStream` [\#4405](https://github.com/apache/arrow-datafusion/pull/4405) ([crepererum](https://github.com/crepererum)) +- Update to arrow 28 [\#4400](https://github.com/apache/arrow-datafusion/pull/4400) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Update rstest requirement from 0.15.0 to 0.16.0 [\#4399](https://github.com/apache/arrow-datafusion/pull/4399) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add sqllogictests \(v0\) [\#4395](https://github.com/apache/arrow-datafusion/pull/4395) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- improve hashjoin execution metrics [\#4394](https://github.com/apache/arrow-datafusion/pull/4394) ([AssHero](https://github.com/AssHero)) +- Add `with_new_inputs` for LogicalPlan [\#4393](https://github.com/apache/arrow-datafusion/pull/4393) ([jackwener](https://github.com/jackwener)) +- Clean the code in `limit.rs`. [\#4391](https://github.com/apache/arrow-datafusion/pull/4391) ([HaoYang670](https://github.com/HaoYang670)) +- Move physical plan serde from Ballista to DataFusion [\#4390](https://github.com/apache/arrow-datafusion/pull/4390) ([Kikkon](https://github.com/Kikkon)) +- Fix page index pruning fail on complex_expr [\#4387](https://github.com/apache/arrow-datafusion/pull/4387) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add check for nested types in equivalent names and types [\#4380](https://github.com/apache/arrow-datafusion/pull/4380) ([alamb](https://github.com/alamb)) +- refine the code of build schema for ambiguous check, factor this out into a function [\#4379](https://github.com/apache/arrow-datafusion/pull/4379) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) +- Refactor the Hash Join [\#4377](https://github.com/apache/arrow-datafusion/pull/4377) ([liukun4515](https://github.com/liukun4515)) +- Minor: Fix typos in the documentation [\#4376](https://github.com/apache/arrow-datafusion/pull/4376) ([martin-g](https://github.com/martin-g)) +- Include byte size estimates in the filter statistics [\#4375](https://github.com/apache/arrow-datafusion/pull/4375) ([isidentical](https://github.com/isidentical)) +- HashJoin should return Err when the right side input stream produce Err, add more join UTs to cover different join types [\#4373](https://github.com/apache/arrow-datafusion/pull/4373) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mingmwang](https://github.com/mingmwang)) +- feat: `ResourceExhausted` for memory limit in `GroupedHashAggregateStream` [\#4371](https://github.com/apache/arrow-datafusion/pull/4371) ([crepererum](https://github.com/crepererum)) +- Use limit\(\) function instead of show_limit\(\) in the first example [\#4369](https://github.com/apache/arrow-datafusion/pull/4369) ([martin-g](https://github.com/martin-g)) +- Update env_logger requirement from 0.9 to 0.10 [\#4367](https://github.com/apache/arrow-datafusion/pull/4367) ([dependabot[bot]](https://github.com/apps/dependabot)) +- reimplement `push_down_filter` to remove global-state [\#4365](https://github.com/apache/arrow-datafusion/pull/4365) ([jackwener](https://github.com/jackwener)) +- Support to use Schedular in tpch benchmark [\#4361](https://github.com/apache/arrow-datafusion/pull/4361) ([xudong963](https://github.com/xudong963)) +- Adding more dataframe example to read csv files [\#4360](https://github.com/apache/arrow-datafusion/pull/4360) ([DataPsycho](https://github.com/DataPsycho)) +- minor: correct name and typo [\#4359](https://github.com/apache/arrow-datafusion/pull/4359) ([jackwener](https://github.com/jackwener)) +- Do not log error if page index can not be evaluated [\#4358](https://github.com/apache/arrow-datafusion/pull/4358) ([alamb](https://github.com/alamb)) +- Clean the `expr_fn` - use `scalar_expr` to create unary scalar expr functions, remove macro `unary_scalar_functions` [\#4357](https://github.com/apache/arrow-datafusion/pull/4357) ([HaoYang670](https://github.com/HaoYang670)) +- Throw error \(not `panic`\) if a listing table specifies an missing partition column [\#4354](https://github.com/apache/arrow-datafusion/pull/4354) ([doki23](https://github.com/doki23)) +- Improve error handling and add some more types for proper downcasting [\#4352](https://github.com/apache/arrow-datafusion/pull/4352) ([retikulum](https://github.com/retikulum)) +- Add check to avoid underflow in memory manager [\#4351](https://github.com/apache/arrow-datafusion/pull/4351) ([askoa](https://github.com/askoa)) +- Improve error messages when memory is exhausted while sorting [\#4348](https://github.com/apache/arrow-datafusion/pull/4348) ([alamb](https://github.com/alamb)) +- Do not error in optimizer if resulting schema has different metadata [\#4347](https://github.com/apache/arrow-datafusion/pull/4347) ([alamb](https://github.com/alamb)) +- minor: improve optimizer logging and do not repeat rule name [\#4345](https://github.com/apache/arrow-datafusion/pull/4345) ([alamb](https://github.com/alamb)) +- minor: fix typos in test names [\#4344](https://github.com/apache/arrow-datafusion/pull/4344) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Minor: Add docstrings to `EliminateOuterJoins` optimizer pass [\#4343](https://github.com/apache/arrow-datafusion/pull/4343) ([alamb](https://github.com/alamb)) +- Minor: refactor: isolate common memory accounting utils [\#4341](https://github.com/apache/arrow-datafusion/pull/4341) ([crepererum](https://github.com/crepererum)) +- minor: make `plan_from_tables` return one plan instead of `Vec` [\#4336](https://github.com/apache/arrow-datafusion/pull/4336) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- enhancement: when fetch == 0, pushdown limit 0 instead skip+fetch. [\#4334](https://github.com/apache/arrow-datafusion/pull/4334) ([jackwener](https://github.com/jackwener)) +- Teach optimizer that `CoalesceBatchesExec` does not destroy output order [\#4332](https://github.com/apache/arrow-datafusion/pull/4332) ([alamb](https://github.com/alamb)) +- Add ability to disable DiskManager [\#4330](https://github.com/apache/arrow-datafusion/pull/4330) ([tustvold](https://github.com/tustvold)) +- Update cli.md [\#4329](https://github.com/apache/arrow-datafusion/pull/4329) ([psvri](https://github.com/psvri)) +- fix bug: right semi join can't support the filter [\#4327](https://github.com/apache/arrow-datafusion/pull/4327) ([liukun4515](https://github.com/liukun4515)) +- reimplment `eliminate_limit` to remove `global-state`. [\#4324](https://github.com/apache/arrow-datafusion/pull/4324) ([jackwener](https://github.com/jackwener)) +- Refine Err propagation and avoid unwrap in transform closures [\#4318](https://github.com/apache/arrow-datafusion/pull/4318) ([mingmwang](https://github.com/mingmwang)) +- Add a checker to confirm physical optimizer rules will keep the physical plan schema immutable [\#4316](https://github.com/apache/arrow-datafusion/pull/4316) ([mingmwang](https://github.com/mingmwang)) +- Refactor downcasting functions with downcastvalue macro and improve error handling of `ListArray` downcasting [\#4313](https://github.com/apache/arrow-datafusion/pull/4313) ([retikulum](https://github.com/retikulum)) +- minor: add another test case to cover join ambiguous check [\#4305](https://github.com/apache/arrow-datafusion/pull/4305) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- Fix DESCRIBE statement qualified table issue [\#4304](https://github.com/apache/arrow-datafusion/pull/4304) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) +- Use tournament loser tree for k-way sort-merging, increase merge speed by 50% [\#4301](https://github.com/apache/arrow-datafusion/pull/4301) ([richox](https://github.com/richox)) +- Pin Python `setuptools` in the CI to fix integration tests [\#4296](https://github.com/apache/arrow-datafusion/pull/4296) ([isidentical](https://github.com/isidentical)) +- Support `SubqueryAlias` in optimizer, physcial planner. [\#4293](https://github.com/apache/arrow-datafusion/pull/4293) ([jackwener](https://github.com/jackwener)) +- minor: avoid a clone into string when checking ambiguous [\#4292](https://github.com/apache/arrow-datafusion/pull/4292) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- replace the comparison op for decimal array op using the arrow-rs kernel [\#4290](https://github.com/apache/arrow-datafusion/pull/4290) ([liukun4515](https://github.com/liukun4515)) +- MINOR: replace `{..}` with `(_)`, typo, remove outdated TODO [\#4286](https://github.com/apache/arrow-datafusion/pull/4286) ([jackwener](https://github.com/jackwener)) +- Reduce Expr copies in `ParquetExec` [\#4283](https://github.com/apache/arrow-datafusion/pull/4283) ([alamb](https://github.com/alamb)) +- Fix issue in filter pushdown with overloaded projection index [\#4281](https://github.com/apache/arrow-datafusion/pull/4281) ([thinkharderdev](https://github.com/thinkharderdev)) +- Skip useless pruning predicates in `ParquetExec` [\#4280](https://github.com/apache/arrow-datafusion/pull/4280) ([alamb](https://github.com/alamb)) +- Push down more predicates into `ParquetExec` [\#4279](https://github.com/apache/arrow-datafusion/pull/4279) ([alamb](https://github.com/alamb)) +- Fix EXPLAIN plan for ParquetExec to show pruning_predicate [\#4278](https://github.com/apache/arrow-datafusion/pull/4278) ([alamb](https://github.com/alamb)) +- reimplement `limit_push_down` to remove global-state, enhance optimize and simplify code. [\#4276](https://github.com/apache/arrow-datafusion/pull/4276) ([jackwener](https://github.com/jackwener)) +- Bump actions/labeler from 4.0.2 to 4.1.0 [\#4274](https://github.com/apache/arrow-datafusion/pull/4274) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Remove the type alias `NullColumnarValue` [\#4273](https://github.com/apache/arrow-datafusion/pull/4273) ([HaoYang670](https://github.com/HaoYang670)) +- reimplement `eliminate_outer_join` [\#4272](https://github.com/apache/arrow-datafusion/pull/4272) ([jackwener](https://github.com/jackwener)) +- Fix bugs in parsing `with header row` and `partitioned by` [\#4268](https://github.com/apache/arrow-datafusion/pull/4268) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- improve error messages while downcasting `UInt32Array`, `UInt64Array` and `BooleanArray` [\#4261](https://github.com/apache/arrow-datafusion/pull/4261) ([retikulum](https://github.com/retikulum)) +- add ambiguous check for projection [\#4260](https://github.com/apache/arrow-datafusion/pull/4260) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) +- Add ambiguous check for join [\#4258](https://github.com/apache/arrow-datafusion/pull/4258) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- support cross_join in `limit_push_down` [\#4257](https://github.com/apache/arrow-datafusion/pull/4257) ([jackwener](https://github.com/jackwener)) +- Support parquet page filtering on min_max for `decimal128` and `string` columns [\#4255](https://github.com/apache/arrow-datafusion/pull/4255) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- fix conflict and UT, cleanup redundant legacy code [\#4252](https://github.com/apache/arrow-datafusion/pull/4252) ([jackwener](https://github.com/jackwener)) +- Minor: remove unecessary clone\(\) in planner [\#4249](https://github.com/apache/arrow-datafusion/pull/4249) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fix nightly clippy failures [\#4246](https://github.com/apache/arrow-datafusion/pull/4246) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Improve Error Handling and Readibility for downcasting `Float32Array`, `Float64Array`, `StringArray` [\#4244](https://github.com/apache/arrow-datafusion/pull/4244) ([retikulum](https://github.com/retikulum)) +- Use defaults for ListingOptions builder [\#4243](https://github.com/apache/arrow-datafusion/pull/4243) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Support binary boolean operators with nulls [\#4242](https://github.com/apache/arrow-datafusion/pull/4242) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Fixing doc of the expression [\#4240](https://github.com/apache/arrow-datafusion/pull/4240) ([Creampanda](https://github.com/Creampanda)) +- Fix negative interval parsing bug [\#4238](https://github.com/apache/arrow-datafusion/pull/4238) ([Jefffrey](https://github.com/Jefffrey)) +- remove duplicate or redundant code [\#4235](https://github.com/apache/arrow-datafusion/pull/4235) ([jackwener](https://github.com/jackwener)) +- add a checker to confirm optimizer can keep plan schema immutable. [\#4233](https://github.com/apache/arrow-datafusion/pull/4233) ([jackwener](https://github.com/jackwener)) +- Fix the percentile argument for ApproxPercentileCont must be Float64, not Decimal128\(2, 1\) [\#4228](https://github.com/apache/arrow-datafusion/pull/4228) ([comphead](https://github.com/comphead)) +- refactor how we create listing tables [\#4227](https://github.com/apache/arrow-datafusion/pull/4227) ([timvw](https://github.com/timvw)) +- Update sqlparser requirement from 0.26 to 0.27 [\#4226](https://github.com/apache/arrow-datafusion/pull/4226) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- upgrade required chrono version to 0.4.23 [\#4225](https://github.com/apache/arrow-datafusion/pull/4225) ([waitingkuo](https://github.com/waitingkuo)) +- Support types other than String for partition columns on ListingTables [\#4221](https://github.com/apache/arrow-datafusion/pull/4221) ([doki23](https://github.com/doki23)) +- \[CBO\] JoinSelection Rule, select HashJoin Partition Mode based on the Join Type and available statistics, option for SortMergeJoin [\#4219](https://github.com/apache/arrow-datafusion/pull/4219) ([mingmwang](https://github.com/mingmwang)) +- Remove alias in Union [\#4212](https://github.com/apache/arrow-datafusion/pull/4212) ([jackwener](https://github.com/jackwener)) +- Add try_optimize method [\#4208](https://github.com/apache/arrow-datafusion/pull/4208) ([andygrove](https://github.com/andygrove)) +- Provide a builder for ListingOptions with fixups [\#4207](https://github.com/apache/arrow-datafusion/pull/4207) ([alamb](https://github.com/alamb)) +- Avoid error with empty iterators used for `ScalarValue::iter_to_array` [\#4206](https://github.com/apache/arrow-datafusion/pull/4206) ([GrandChaman](https://github.com/GrandChaman)) +- Improve error message for regexp_match 'g' flag [\#4203](https://github.com/apache/arrow-datafusion/pull/4203) ([Jefffrey](https://github.com/Jefffrey)) +- Return `ResourceExhausted` errors when memory limit is exceed in `GroupedHashAggregateStreamV2` \(Row Hash\) [\#4202](https://github.com/apache/arrow-datafusion/pull/4202) ([crepererum](https://github.com/crepererum)) +- Add additional expr boolean simplification rules [\#4200](https://github.com/apache/arrow-datafusion/pull/4200) ([Jefffrey](https://github.com/Jefffrey)) +- Update to arrow and parquet 27.0.0 [\#4199](https://github.com/apache/arrow-datafusion/pull/4199) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Support `create table` with explicit column definitions [\#4194](https://github.com/apache/arrow-datafusion/pull/4194) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) +- Support all equality predicates in equality join [\#4193](https://github.com/apache/arrow-datafusion/pull/4193) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- add `propagate_empty_relation` optimizer rule [\#4192](https://github.com/apache/arrow-datafusion/pull/4192) ([jackwener](https://github.com/jackwener)) +- fix clippy [\#4190](https://github.com/apache/arrow-datafusion/pull/4190) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Fix clippy by avoiding deprecated functions in chrono [\#4189](https://github.com/apache/arrow-datafusion/pull/4189) ([alamb](https://github.com/alamb)) +- Disallow duplicate interval types during parsing [\#4188](https://github.com/apache/arrow-datafusion/pull/4188) ([Jefffrey](https://github.com/Jefffrey)) +- Parse nanoseconds for intervals [\#4186](https://github.com/apache/arrow-datafusion/pull/4186) ([Jefffrey](https://github.com/Jefffrey)) +- Add rule to reimplement `Eliminate cross join` and remove it in planner [\#4185](https://github.com/apache/arrow-datafusion/pull/4185) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- \[FOLLOWUP\] Enforcement Rule: resolve review comments, refactor adjust_input_keys_ordering\(\) [\#4184](https://github.com/apache/arrow-datafusion/pull/4184) ([mingmwang](https://github.com/mingmwang)) +- Simplify boolean parquet pushdown predicate [\#4182](https://github.com/apache/arrow-datafusion/pull/4182) ([Jefffrey](https://github.com/Jefffrey)) +- Minor: consolidate parquet `custom_reader` integration test into parquet_exec [\#4175](https://github.com/apache/arrow-datafusion/pull/4175) ([alamb](https://github.com/alamb)) +- minor: remove redundant println and cleanup [\#4173](https://github.com/apache/arrow-datafusion/pull/4173) ([jackwener](https://github.com/jackwener)) +- Add ability to specify external sort information for ListingTables [\#4170](https://github.com/apache/arrow-datafusion/pull/4170) ([alamb](https://github.com/alamb)) +- Improve Error Handling and Readibility for downcasting `Decimal128Array` [\#4168](https://github.com/apache/arrow-datafusion/pull/4168) ([retikulum](https://github.com/retikulum)) +- Minor: Remove completed comment on parquet row group pruning [\#4167](https://github.com/apache/arrow-datafusion/pull/4167) ([alamb](https://github.com/alamb)) +- Update hashbrown requirement from 0.12 to 0.13 [\#4164](https://github.com/apache/arrow-datafusion/pull/4164) ([dependabot[bot]](https://github.com/apps/dependabot)) +- MINOR: enable `dyn_cmp_dict` feature on arrow for physical expr crate [\#4163](https://github.com/apache/arrow-datafusion/pull/4163) ([isidentical](https://github.com/isidentical)) +- Derive filter statistic estimates from the predicate expression [\#4162](https://github.com/apache/arrow-datafusion/pull/4162) ([isidentical](https://github.com/isidentical)) +- Minor: pass `ParquetFileMetrics` to `build_row_filter` in parquet [\#4161](https://github.com/apache/arrow-datafusion/pull/4161) ([alamb](https://github.com/alamb)) +- Minor: Extract parquet row group pruning code into its own module [\#4160](https://github.com/apache/arrow-datafusion/pull/4160) ([alamb](https://github.com/alamb)) +- Full support for time32 and time64 literal values \(`ScalarValue`\) [\#4156](https://github.com/apache/arrow-datafusion/pull/4156) ([andre-cc-natzka](https://github.com/andre-cc-natzka)) +- Window frame GROUPS mode support [\#4155](https://github.com/apache/arrow-datafusion/pull/4155) ([zembunia](https://github.com/zembunia)) +- Improve error messages while downcasting Int64Array [\#4154](https://github.com/apache/arrow-datafusion/pull/4154) ([retikulum](https://github.com/retikulum)) +- Add another method to collect referenced columns from an expression [\#4153](https://github.com/apache/arrow-datafusion/pull/4153) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- Remove BoxedAsyncFileReader [\#4150](https://github.com/apache/arrow-datafusion/pull/4150) ([tustvold](https://github.com/tustvold)) +- Support unsigned integers in `unwrap_cast_in_comparison` Optimizer rule [\#4149](https://github.com/apache/arrow-datafusion/pull/4149) ([alamb](https://github.com/alamb)) +- Add support for `DataType::Timestamp` casts in `unwrap_cast_in_comparison` optimizer pass [\#4148](https://github.com/apache/arrow-datafusion/pull/4148) ([alamb](https://github.com/alamb)) +- Add additional testing for `unwrap_cast_in_comparison` [\#4147](https://github.com/apache/arrow-datafusion/pull/4147) ([alamb](https://github.com/alamb)) +- improve error messages while downcasting Int32Array [\#4146](https://github.com/apache/arrow-datafusion/pull/4146) ([retikulum](https://github.com/retikulum)) +- Minor: Update docstring on unwrap_cast_in_comparison [\#4145](https://github.com/apache/arrow-datafusion/pull/4145) ([alamb](https://github.com/alamb)) +- add schema parameter to table provider factory create method [\#4143](https://github.com/apache/arrow-datafusion/pull/4143) ([milenkovicm](https://github.com/milenkovicm)) +- fix: shouldn't pass alias through into subquery. [\#4141](https://github.com/apache/arrow-datafusion/pull/4141) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Preserve the `Cast` expression in `columnize_expr` [\#4137](https://github.com/apache/arrow-datafusion/pull/4137) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Set versions to dependencies with path in benchmarks Cargo.toml file [\#4136](https://github.com/apache/arrow-datafusion/pull/4136) ([ArkashaJavelin](https://github.com/ArkashaJavelin)) +- Fix links [\#4135](https://github.com/apache/arrow-datafusion/pull/4135) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Use f64::total_cmp instead of OrderedFloat [\#4133](https://github.com/apache/arrow-datafusion/pull/4133) ([comphead](https://github.com/comphead)) +- Add parquet integration tests for explicitly smaller page sizes, page pruning [\#4131](https://github.com/apache/arrow-datafusion/pull/4131) ([alamb](https://github.com/alamb)) +- Consolidate `ParquetExec` tests in `parquet_exec` integration test [\#4130](https://github.com/apache/arrow-datafusion/pull/4130) ([alamb](https://github.com/alamb)) +- Minor: Use upstream `BooleanArray::true_count` [\#4129](https://github.com/apache/arrow-datafusion/pull/4129) ([alamb](https://github.com/alamb)) +- Combined TPCH runs & uniformed summaries for benchmarks [\#4128](https://github.com/apache/arrow-datafusion/pull/4128) ([isidentical](https://github.com/isidentical)) +- Enable TableProviderFactories to receive additional options when creating an external table [\#4126](https://github.com/apache/arrow-datafusion/pull/4126) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([timvw](https://github.com/timvw)) +- Add CI check that configs.md is up-to-date [\#4124](https://github.com/apache/arrow-datafusion/pull/4124) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- \[Part3\] Partition and Sort Enforcement, Enforcement rule implementation [\#4122](https://github.com/apache/arrow-datafusion/pull/4122) ([mingmwang](https://github.com/mingmwang)) +- reuse code `utils::optimize_children` but affect inline. [\#4121](https://github.com/apache/arrow-datafusion/pull/4121) ([jackwener](https://github.com/jackwener)) +- reuse code `utils::optimize_children` instead of redundant implementation [\#4119](https://github.com/apache/arrow-datafusion/pull/4119) ([jackwener](https://github.com/jackwener)) +- Allow listing tables to be created via TableFactories [\#4112](https://github.com/apache/arrow-datafusion/pull/4112) ([avantgardnerio](https://github.com/avantgardnerio)) +- Update SQL reference to state that decimal support is currently experimental [\#4109](https://github.com/apache/arrow-datafusion/pull/4109) ([andygrove](https://github.com/andygrove)) +- Add metrics for parquet page level skipping [\#4105](https://github.com/apache/arrow-datafusion/pull/4105) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add parser option for parsing SQL numeric literals as decimal [\#4102](https://github.com/apache/arrow-datafusion/pull/4102) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Replace RwLock\ and Mutex\ by using DashMap [\#4079](https://github.com/apache/arrow-datafusion/pull/4079) ([yahoNanJing](https://github.com/yahoNanJing)) +- Custom window frame support extended to built-in window functions [\#4078](https://github.com/apache/arrow-datafusion/pull/4078) ([mustafasrepo](https://github.com/mustafasrepo)) +- Enable tests for page index filtering in parquet filter pushdown test [\#4062](https://github.com/apache/arrow-datafusion/pull/4062) ([alamb](https://github.com/alamb)) +- \[Part2\] Partition and Sort Enforcement, ExecutionPlan enhancement [\#4043](https://github.com/apache/arrow-datafusion/pull/4043) ([mingmwang](https://github.com/mingmwang)) +- add support for xz file compression and `compression` feature [\#3993](https://github.com/apache/arrow-datafusion/pull/3993) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) +- Expression boundary analysis framework [\#3912](https://github.com/apache/arrow-datafusion/pull/3912) ([isidentical](https://github.com/isidentical)) diff --git a/dev/changelog/16.0.0.md b/dev/changelog/16.0.0.md new file mode 100644 index 0000000000000..fecf52f10995d --- /dev/null +++ b/dev/changelog/16.0.0.md @@ -0,0 +1,411 @@ + + +## [16.0.0](https://github.com/apache/arrow-datafusion/tree/16.0.0) (2023-01-12) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/16.0.0-rc1...16.0.0) + +**Breaking changes:** + +- Remove unused ExecutionPlan::relies_input_order \(has been replaced with `required_input_ordering`\) [\#4856](https://github.com/apache/arrow-datafusion/pull/4856) ([alamb](https://github.com/alamb)) +- Add DataFrame::into_view instead of implementing TableProvider \(\#2659\) [\#4778](https://github.com/apache/arrow-datafusion/pull/4778) ([tustvold](https://github.com/tustvold)) + +**Implemented enhancements:** + +- Support custom window frame with AVG aggregate function [\#4845](https://github.com/apache/arrow-datafusion/issues/4845) +- add sqllogicaltest for tpch and remove some duplicated test. [\#4801](https://github.com/apache/arrow-datafusion/issues/4801) +- Catalog Snapshot Isolation [\#4697](https://github.com/apache/arrow-datafusion/issues/4697) +- Support `select .. FROM 'parquet.file'` in datafusion-cli [\#4580](https://github.com/apache/arrow-datafusion/issues/4580) + +**Fixed bugs:** + +- Regression: `write_csv` result has incorrect formatting [\#4876](https://github.com/apache/arrow-datafusion/issues/4876) +- Incorrect results for join condition against current master branch [\#4844](https://github.com/apache/arrow-datafusion/issues/4844) +- Match Postgres for stddev and variance on less than 3 values [\#4843](https://github.com/apache/arrow-datafusion/issues/4843) +- `JOIN ... USING (columns)` works incorrectly with multiple columns \(joined-over columns are missing in the output\) [\#4674](https://github.com/apache/arrow-datafusion/issues/4674) +- ROW_NUMBER window function inconsistent across partitions in multi-threaded runtime [\#4673](https://github.com/apache/arrow-datafusion/issues/4673) +- `SELECT ... FROM (tbl1 UNION tbl2)` wrongly works like `SELECT DISTINCT ... FROM (tbl1 UNION tbl2)` [\#4667](https://github.com/apache/arrow-datafusion/issues/4667) +- DataFrame TableProvider Circular Reference [\#2659](https://github.com/apache/arrow-datafusion/issues/2659) + +**Documentation updates:** + +- Add Synnada to known uses [\#4857](https://github.com/apache/arrow-datafusion/pull/4857) ([ozankabak](https://github.com/ozankabak)) + +**Closed issues:** + +- Remove tests from `sql_integration` that were ported to `sqllogictest` [\#4498](https://github.com/apache/arrow-datafusion/issues/4498) +- How to register a http url to the `object_store` [\#4491](https://github.com/apache/arrow-datafusion/issues/4491) +- optimizer: support `unsigned <-> decimal` for unwrap_cast_in_comparion rule [\#4287](https://github.com/apache/arrow-datafusion/issues/4287) +- Add SQL support for NATURAL JOIN [\#117](https://github.com/apache/arrow-datafusion/issues/117) +- \[Datafusion\] Datafusion queries involving a column name that begins with a number produces unexpected results [\#108](https://github.com/apache/arrow-datafusion/issues/108) + +**Merged pull requests:** + +- docs: improve `Column::normalize_with_schemas` docs [\#4871](https://github.com/apache/arrow-datafusion/pull/4871) ([crepererum](https://github.com/crepererum)) +- Skip EliminateCrossJoin rule when meet non-empty join filter [\#4869](https://github.com/apache/arrow-datafusion/pull/4869) ([ygf11](https://github.com/ygf11)) +- Support for SQL Natural Join [\#4863](https://github.com/apache/arrow-datafusion/pull/4863) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- Minor: Move test data into `datafusion/core/tests/data` [\#4855](https://github.com/apache/arrow-datafusion/pull/4855) ([alamb](https://github.com/alamb)) +- Covariance single row input & null skipping [\#4852](https://github.com/apache/arrow-datafusion/pull/4852) ([korowa](https://github.com/korowa)) +- Document ability to select directly from files in datafusion-cli [\#4851](https://github.com/apache/arrow-datafusion/pull/4851) ([alamb](https://github.com/alamb)) +- Fix push_down_projection through a distinct [\#4849](https://github.com/apache/arrow-datafusion/pull/4849) ([Jefffrey](https://github.com/Jefffrey)) +- Support using var/var_pop/stddev/stddev_pop in window expressions with custom frames [\#4848](https://github.com/apache/arrow-datafusion/pull/4848) ([jonmmease](https://github.com/jonmmease)) +- Update variance/stddev to work with single values [\#4847](https://github.com/apache/arrow-datafusion/pull/4847) ([jonmmease](https://github.com/jonmmease)) +- Implement retract_batch for AvgAccumulator [\#4846](https://github.com/apache/arrow-datafusion/pull/4846) ([jonmmease](https://github.com/jonmmease)) +- Support wildcard select on multiple column using joins [\#4840](https://github.com/apache/arrow-datafusion/pull/4840) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- Orthogonalize distribution and sort enforcement rules into `EnforceDistribution` and `EnforceSorting` [\#4839](https://github.com/apache/arrow-datafusion/pull/4839) ([mustafasrepo](https://github.com/mustafasrepo)) +- support `select .. FROM 'parquet.file'` in datafusion-cli [\#4838](https://github.com/apache/arrow-datafusion/pull/4838) ([unconsolable](https://github.com/unconsolable)) +- Remove tests from sql_integration that were ported to sqllogictest [\#4836](https://github.com/apache/arrow-datafusion/pull/4836) ([matthewwillian](https://github.com/matthewwillian)) +- add tpch sqllogicaltest and remove some duplicated test [\#4802](https://github.com/apache/arrow-datafusion/pull/4802) ([jackwener](https://github.com/jackwener)) + +## [16.0.0-rc1](https://github.com/apache/arrow-datafusion/tree/16.0.0-rc1) (2023-01-07) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/15.0.0...16.0.0-rc1) + +**Breaking changes:** + +- Enable PhysicalOptimizerRule lazily \(\#4806\) [\#4807](https://github.com/apache/arrow-datafusion/pull/4807) ([tustvold](https://github.com/tustvold)) +- Move ConfigOptions to core [\#4803](https://github.com/apache/arrow-datafusion/pull/4803) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- remove Operator::{Like,NotLike,ILike,NotILike} [\#4792](https://github.com/apache/arrow-datafusion/pull/4792) ([unconsolable](https://github.com/unconsolable)) +- Move subquery alias assignment onto rules [\#4767](https://github.com/apache/arrow-datafusion/pull/4767) ([tustvold](https://github.com/tustvold)) +- Make SessionState members private [\#4764](https://github.com/apache/arrow-datafusion/pull/4764) ([tustvold](https://github.com/tustvold)) +- Deprecate SessionContext physical plan methods \(\#4617\) [\#4751](https://github.com/apache/arrow-datafusion/pull/4751) ([tustvold](https://github.com/tustvold)) +- Decouple physical optimizer from SessionConfig \(\#3887\) [\#4749](https://github.com/apache/arrow-datafusion/pull/4749) ([tustvold](https://github.com/tustvold)) +- Don't share ConfigOptions \(\#3886\) [\#4712](https://github.com/apache/arrow-datafusion/pull/4712) ([tustvold](https://github.com/tustvold)) +- Push SessionState into FileFormat \(\#4349\) [\#4699](https://github.com/apache/arrow-datafusion/pull/4699) ([tustvold](https://github.com/tustvold)) +- Make SessionContext members private [\#4698](https://github.com/apache/arrow-datafusion/pull/4698) ([tustvold](https://github.com/tustvold)) +- Make OptimizerConfig a trait \(\#4631\) \(\#4638\) [\#4645](https://github.com/apache/arrow-datafusion/pull/4645) ([tustvold](https://github.com/tustvold)) +- DataFrame owned SessionState [\#4633](https://github.com/apache/arrow-datafusion/pull/4633) ([tustvold](https://github.com/tustvold)) +- Make LogicalPlanBuilder consuming \(\#4622\) [\#4632](https://github.com/apache/arrow-datafusion/pull/4632) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Make DataFrame API consuming \(\#4621\) [\#4624](https://github.com/apache/arrow-datafusion/pull/4624) ([tustvold](https://github.com/tustvold)) +- Make execute_stream functions sync [\#4608](https://github.com/apache/arrow-datafusion/pull/4608) ([tustvold](https://github.com/tustvold)) +- Remove ObjectStore from FileStream \(\#4533\) [\#4601](https://github.com/apache/arrow-datafusion/pull/4601) ([tustvold](https://github.com/tustvold)) +- Remove `AggregateState` wrapper [\#4582](https://github.com/apache/arrow-datafusion/pull/4582) ([alamb](https://github.com/alamb)) +- Fix querying and defining table / view names with period [\#4530](https://github.com/apache/arrow-datafusion/pull/4530) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- refactor code about `subquery_alias` and `expr-alias`. [\#4451](https://github.com/apache/arrow-datafusion/pull/4451) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) + +**Implemented enhancements:** + +- Move the ExtractEquijoinPredicate behind the SubqueryFilterToJoin [\#4759](https://github.com/apache/arrow-datafusion/issues/4759) +- Remove the config `datafusion.execution.coalesce_target_batch_size` [\#4756](https://github.com/apache/arrow-datafusion/issues/4756) +- SimplifyExpressions will fail when rebuild equijoin with alias [\#4754](https://github.com/apache/arrow-datafusion/issues/4754) +- Provide a constructor for the ConfigOptions with HashMap\ [\#4752](https://github.com/apache/arrow-datafusion/issues/4752) +- Non-deprecated support for planning SQL without DDL [\#4720](https://github.com/apache/arrow-datafusion/issues/4720) +- Add regression tests for planning TPC-DS queries [\#4718](https://github.com/apache/arrow-datafusion/issues/4718) +- Move the extracting join keys logic to optimizer [\#4710](https://github.com/apache/arrow-datafusion/issues/4710) +- Support compression in `IPCWriter` [\#4708](https://github.com/apache/arrow-datafusion/issues/4708) +- Support prepared statement parameter type inference [\#4700](https://github.com/apache/arrow-datafusion/issues/4700) +- PruningPredicate Use Physical not Logical Predicate [\#4695](https://github.com/apache/arrow-datafusion/issues/4695) +- Support for executing infinite files [\#4692](https://github.com/apache/arrow-datafusion/issues/4692) +- Add a sort rule to remove unnecessary SortExecs from physical plan [\#4686](https://github.com/apache/arrow-datafusion/issues/4686) +- Install `protoc` automatically when building `datafusion/proto` crate [\#4684](https://github.com/apache/arrow-datafusion/issues/4684) +- Make DfSchema wrap SchemaRef [\#4680](https://github.com/apache/arrow-datafusion/issues/4680) +- Reorder the physical plan optimizer rules [\#4678](https://github.com/apache/arrow-datafusion/issues/4678) +- Inconsistent behavior with PostgreSQL to decide Window Expressions ordering [\#4641](https://github.com/apache/arrow-datafusion/issues/4641) +- Returns error too late when parsing invalid file compression type. [\#4636](https://github.com/apache/arrow-datafusion/issues/4636) +- Make OptimizerConfig a Trait [\#4631](https://github.com/apache/arrow-datafusion/issues/4631) +- Move Optimize onto DataFrame [\#4626](https://github.com/apache/arrow-datafusion/issues/4626) +- Make LogicalPlanBuilder Consuming [\#4622](https://github.com/apache/arrow-datafusion/issues/4622) +- Make DataFrame Consuming [\#4621](https://github.com/apache/arrow-datafusion/issues/4621) +- rules don't need to recursion inside themselves [\#4613](https://github.com/apache/arrow-datafusion/issues/4613) +- \[window function\] support min max with self define sliding window. [\#4603](https://github.com/apache/arrow-datafusion/issues/4603) +- Add `try_optimize` for all_rules [\#4598](https://github.com/apache/arrow-datafusion/issues/4598) +- Refine the physical plan serialization and deserialization [\#4597](https://github.com/apache/arrow-datafusion/issues/4597) +- Normalize datafusion configuration names [\#4595](https://github.com/apache/arrow-datafusion/issues/4595) +- Add need_data_exchange in the ExecutionPlan to indicate whether a physical operator needs data exchange [\#4585](https://github.com/apache/arrow-datafusion/issues/4585) +- Bump Datafusion sql-parser dependency to 0.28 [\#4573](https://github.com/apache/arrow-datafusion/issues/4573) +- tpch test exist duplicated [\#4563](https://github.com/apache/arrow-datafusion/issues/4563) +- user-defined aggregate function as window function [\#4552](https://github.com/apache/arrow-datafusion/issues/4552) +- Convert a Prepare Logical Plan into a Logical Plan with all parameters replaced with values [\#4550](https://github.com/apache/arrow-datafusion/issues/4550) +- FileStream requires fake ObjectStore when ParquetFileReaderFactory is used [\#4533](https://github.com/apache/arrow-datafusion/issues/4533) +- Avoid reading the entire file in ChunkedStore [\#4524](https://github.com/apache/arrow-datafusion/issues/4524) +- Enrich filter statistics predictions with estimated column boundaries [\#4518](https://github.com/apache/arrow-datafusion/issues/4518) +- Show window frame info in physical plan [\#4509](https://github.com/apache/arrow-datafusion/issues/4509) +- Add sqllogictest auto labeler [\#4507](https://github.com/apache/arrow-datafusion/issues/4507) +- Optimize `is_distinct_from` / `is_not_distinct_from` [\#4482](https://github.com/apache/arrow-datafusion/issues/4482) +- Add window func related logic plan to proto ability. [\#4480](https://github.com/apache/arrow-datafusion/issues/4480) +- Make window function related struct public. [\#4479](https://github.com/apache/arrow-datafusion/issues/4479) +- Improve partition file explain plan display to show groupings [\#4466](https://github.com/apache/arrow-datafusion/issues/4466) +- Add support for non-column key for equijoin when eliminating cross join to inner join [\#4442](https://github.com/apache/arrow-datafusion/issues/4442) +- Remove the schema checking from `CrossJoinExec::try_new` [\#4431](https://github.com/apache/arrow-datafusion/issues/4431) +- Initial support for prepared statement [\#4426](https://github.com/apache/arrow-datafusion/issues/4426) +- Add support for NTILE built-in Window Function [\#4403](https://github.com/apache/arrow-datafusion/issues/4403) +- Add Support for MIN, MAX Aggregate Functions when run with custom window frames [\#4402](https://github.com/apache/arrow-datafusion/issues/4402) +- Support `INSERT INTO` statement [\#4397](https://github.com/apache/arrow-datafusion/issues/4397) +- Enhancement: split the SQL `planner` into smaller modules [\#4392](https://github.com/apache/arrow-datafusion/issues/4392) +- Proposal: Improve the join keys of logical plan [\#4389](https://github.com/apache/arrow-datafusion/issues/4389) +- Add `MergeSubqueryAlias` rule [\#4383](https://github.com/apache/arrow-datafusion/issues/4383) +- Optimizer rule support `subqueryAlias` [\#4381](https://github.com/apache/arrow-datafusion/issues/4381) +- Rewrite simple regex expressions [\#4370](https://github.com/apache/arrow-datafusion/issues/4370) +- Revisit get_statistics_with_limit\(\) method in datasource mod [\#4323](https://github.com/apache/arrow-datafusion/issues/4323) +- Support for type coercion for a \(Timestamp, Utf8\) pair [\#4311](https://github.com/apache/arrow-datafusion/issues/4311) +- replace the operation about decimal to the arrow-rs kernel [\#4289](https://github.com/apache/arrow-datafusion/issues/4289) +- change` date_part` return types to f64 [\#3997](https://github.com/apache/arrow-datafusion/issues/3997) +- Better api for setting `ConfigOptions` from SessionContext [\#3908](https://github.com/apache/arrow-datafusion/issues/3908) +- Make `ConfigOptions` easier to work with [\#3886](https://github.com/apache/arrow-datafusion/issues/3886) +- An asynchronous version of `CatalogList`/`CatalogProvider`/`SchemaProvider` [\#3777](https://github.com/apache/arrow-datafusion/issues/3777) +- Allow configs to be set with string values [\#3500](https://github.com/apache/arrow-datafusion/issues/3500) +- support scientific notation for SQL literals [\#3448](https://github.com/apache/arrow-datafusion/issues/3448) +- Adopt physical plan serde from arrow-ballista [\#3257](https://github.com/apache/arrow-datafusion/issues/3257) +- Improve codebase readability and error messages by and consistently handle downcasting [\#3152](https://github.com/apache/arrow-datafusion/issues/3152) +- Re-enable where_clauses_object_safety [\#3081](https://github.com/apache/arrow-datafusion/issues/3081) +- optimize/simplify the literal data type and remove unnecessary cast、try_cast [\#3031](https://github.com/apache/arrow-datafusion/issues/3031) +- Move `datafusion-substrait` crate into `arrow-datafusion` repo [\#2646](https://github.com/apache/arrow-datafusion/issues/2646) +- \[enhancement\] rules don't need to recursion inside themselves [\#2620](https://github.com/apache/arrow-datafusion/issues/2620) +- Add support for `GROUPING SETS` syntax in SQL planner [\#2469](https://github.com/apache/arrow-datafusion/issues/2469) +- Optimize EXISTS subquery expressions by rewriting as semi-join [\#2351](https://github.com/apache/arrow-datafusion/issues/2351) +- Add Delta Lake TableProvider [\#525](https://github.com/apache/arrow-datafusion/issues/525) +- Support window functions with window frame [\#361](https://github.com/apache/arrow-datafusion/issues/361) + +**Fixed bugs:** + +- PushdownFilter rule exist bug will cause filter change wrong [\#4822](https://github.com/apache/arrow-datafusion/issues/4822) +- Unlimited memory consumption in `RepartitionExec` [\#4816](https://github.com/apache/arrow-datafusion/issues/4816) +- Physical Optimizer Config Mutation Doesn't Take Effect [\#4806](https://github.com/apache/arrow-datafusion/issues/4806) +- cargo test failed `error: linking with `cc` failed: exit status: 1` [\#4790](https://github.com/apache/arrow-datafusion/issues/4790) +- Parquet files generated by DataFusion cannot be read by Apache Spark [\#4782](https://github.com/apache/arrow-datafusion/issues/4782) +- datafusion-physical-expr doesn't compile when blake3/traits-preview is enabled [\#4781](https://github.com/apache/arrow-datafusion/issues/4781) +- Multiple ways to express `like` / `ilike` / `not like` / `not ilike` [\#4765](https://github.com/apache/arrow-datafusion/issues/4765) +- SessionState::optimize and SessionState::create_physical_plan Don't Update Query Start Time [\#4747](https://github.com/apache/arrow-datafusion/issues/4747) +- Page Filtering Incorrectly Handles Pages with Different Row Counts [\#4744](https://github.com/apache/arrow-datafusion/issues/4744) +- cargo test failing on master due to tpcds_logical_q41 stackoverflow [\#4728](https://github.com/apache/arrow-datafusion/issues/4728) +- PruningPredicate Different Evaluation Context from Query [\#4693](https://github.com/apache/arrow-datafusion/issues/4693) +- Skipping optimizer rule due to create_name not supporting wildcard [\#4681](https://github.com/apache/arrow-datafusion/issues/4681) +- Create physical plan bug: got Arrow schema with 1 and DataFusion schema with 0 [\#4677](https://github.com/apache/arrow-datafusion/issues/4677) +- Timestamp \<-\> Date32 compare doesn't work [\#4672](https://github.com/apache/arrow-datafusion/issues/4672) +- Wrongly use the function `clamp` [\#4654](https://github.com/apache/arrow-datafusion/issues/4654) +- Fix the clippy errors [\#4653](https://github.com/apache/arrow-datafusion/issues/4653) +- Filter Null Keys Update Not Taking Effect [\#4638](https://github.com/apache/arrow-datafusion/issues/4638) +- Should not generate duplicate sort keys from Window expr's partition by keys [\#4635](https://github.com/apache/arrow-datafusion/issues/4635) +- `common_sub_expression_eliminate` exists bug [\#4575](https://github.com/apache/arrow-datafusion/issues/4575) +- Confusing "Bare" in doesn't exist messages [\#4571](https://github.com/apache/arrow-datafusion/issues/4571) +- `having` shouldn't include alias in projection [\#4556](https://github.com/apache/arrow-datafusion/issues/4556) +- wrong comment about having [\#4554](https://github.com/apache/arrow-datafusion/issues/4554) +- `drop view t1, t2, ...` and `drop table t1, t2, ...` silently ignores arguments past the first [\#4531](https://github.com/apache/arrow-datafusion/issues/4531) +- Extract from timestamp doesn't support nanosecond [\#4528](https://github.com/apache/arrow-datafusion/issues/4528) +- `prepare_select_exprs` don't need `outer_query_schema` [\#4526](https://github.com/apache/arrow-datafusion/issues/4526) +- Table names with periods are not handled correctly [\#4513](https://github.com/apache/arrow-datafusion/issues/4513) +- `Push_down_projection` push redundant column. [\#4486](https://github.com/apache/arrow-datafusion/issues/4486) +- Planner don't generate `SubqueryAlias` [\#4483](https://github.com/apache/arrow-datafusion/issues/4483) +- Planner generate replicated `Projection` | `SubqueryAlias` [\#4481](https://github.com/apache/arrow-datafusion/issues/4481) +- `apply_table_alias` will ignore alias_name when columns is empty. [\#4454](https://github.com/apache/arrow-datafusion/issues/4454) +- Fix output_ordering of WindowAggExec [\#4438](https://github.com/apache/arrow-datafusion/issues/4438) +- Incorrect error for plus/minus operations over timestamps and dates [\#4420](https://github.com/apache/arrow-datafusion/issues/4420) +- Optimization rule `filter_push_down` causes `FieldNotFound` error [\#4401](https://github.com/apache/arrow-datafusion/issues/4401) +- Should not convert a normal non-inner join to Cross Join when there are non-equal Join conditions [\#4363](https://github.com/apache/arrow-datafusion/issues/4363) +- MemoryConsumer::try_grow Underflow [\#4328](https://github.com/apache/arrow-datafusion/issues/4328) +- Potential MemoryManager Deadlock [\#4325](https://github.com/apache/arrow-datafusion/issues/4325) +- `create external table` should fail to parse if syntax is incorrect [\#4262](https://github.com/apache/arrow-datafusion/issues/4262) +- Nullif func states support for Boolean type, but fails if this is attempted [\#4205](https://github.com/apache/arrow-datafusion/issues/4205) +- `ProjectionPushDown` rule don't consider the alias in projection. [\#4174](https://github.com/apache/arrow-datafusion/issues/4174) +- Stack overflow planning complex query [\#4065](https://github.com/apache/arrow-datafusion/issues/4065) +- Can not use `extract ` on the value of `now()` [\#3980](https://github.com/apache/arrow-datafusion/issues/3980) +- Bug with intervals and logical and/or [\#3944](https://github.com/apache/arrow-datafusion/issues/3944) +- CoalesceBatches doesn't provide correct elapsed_compute info in metrics [\#3894](https://github.com/apache/arrow-datafusion/issues/3894) +- Paniced at to_timestamp_micros function when the timestamp is too large. [\#3832](https://github.com/apache/arrow-datafusion/issues/3832) +- Optimizer casts decimals to different values on different platforms [\#3791](https://github.com/apache/arrow-datafusion/issues/3791) +- CSV inference reads in the whole file to memory, regardless of row limit [\#3658](https://github.com/apache/arrow-datafusion/issues/3658) +- after type coercion `CommonSubexprEliminate` will produce invalid projection [\#3635](https://github.com/apache/arrow-datafusion/issues/3635) +- panic at `attempt to multiply with overflow` when doing math on Decimal128 columns [\#3437](https://github.com/apache/arrow-datafusion/issues/3437) +- Precedence bug with date comparison to date plus interval [\#3408](https://github.com/apache/arrow-datafusion/issues/3408) +- Median aggregation using DataFrame panics: "AggregateState is not a scalar aggregate" [\#3105](https://github.com/apache/arrow-datafusion/issues/3105) +- `date_part` does't work for `now()` [\#3096](https://github.com/apache/arrow-datafusion/issues/3096) +- hash_join panics when join keys have different data types [\#2877](https://github.com/apache/arrow-datafusion/issues/2877) +- Memory manager triggers unnecessary spills [\#2829](https://github.com/apache/arrow-datafusion/issues/2829) +- Address performance/execution plan of TPCH query 9 [\#77](https://github.com/apache/arrow-datafusion/issues/77) + +**Documentation updates:** + +- Add a new open source project that is use DataFusion as query engine [\#4768](https://github.com/apache/arrow-datafusion/pull/4768) ([francis-du](https://github.com/francis-du)) + +**Closed issues:** + +- move the tests in planner [\#4798](https://github.com/apache/arrow-datafusion/issues/4798) +- Make it easier to update sqltestlogic test expected output \("test script completion mode"\) [\#4570](https://github.com/apache/arrow-datafusion/issues/4570) +- Make ConfigOption names into an Enum [\#4517](https://github.com/apache/arrow-datafusion/issues/4517) +- Implement null / empty string handling for sqllogictest [\#4500](https://github.com/apache/arrow-datafusion/issues/4500) +- Write a blog about parquet predicate pushdown [\#3464](https://github.com/apache/arrow-datafusion/issues/3464) +- Ensure column names are equivalent with or without optimization [\#1123](https://github.com/apache/arrow-datafusion/issues/1123) + +**Merged pull requests:** + +- Bump tokio from 1.23.0 to 1.23.1 in /datafusion-cli [\#4835](https://github.com/apache/arrow-datafusion/pull/4835) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Fix a few links in `roadmap.md` [\#4833](https://github.com/apache/arrow-datafusion/pull/4833) ([romanz](https://github.com/romanz)) +- DataFusion 16.0.0 release prep: Update version + add changelog [\#4831](https://github.com/apache/arrow-datafusion/pull/4831) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- feat: use arrow row format for hash-group-by [\#4830](https://github.com/apache/arrow-datafusion/pull/4830) ([crepererum](https://github.com/crepererum)) +- refactor: split relation of planner into one part. [\#4829](https://github.com/apache/arrow-datafusion/pull/4829) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- bugfix: remove cnf_rewrite in push_down_filter [\#4825](https://github.com/apache/arrow-datafusion/pull/4825) ([jackwener](https://github.com/jackwener)) +- minor: add some comments to row group pruning tests [\#4823](https://github.com/apache/arrow-datafusion/pull/4823) ([alamb](https://github.com/alamb)) +- Handle trailing tbl column in TPCH benchmarks [\#4821](https://github.com/apache/arrow-datafusion/pull/4821) ([tustvold](https://github.com/tustvold)) +- fix: account for memory in `RepartitionExec` [\#4820](https://github.com/apache/arrow-datafusion/pull/4820) ([crepererum](https://github.com/crepererum)) +- Fix clippy [\#4817](https://github.com/apache/arrow-datafusion/pull/4817) ([tustvold](https://github.com/tustvold)) +- Add test cases: row group filter with missing statistics for decimal data type [\#4810](https://github.com/apache/arrow-datafusion/pull/4810) ([liukun4515](https://github.com/liukun4515)) +- Move default catalog and schema onto ConfigOptions \(\#3887\) [\#4805](https://github.com/apache/arrow-datafusion/pull/4805) ([tustvold](https://github.com/tustvold)) +- remove duplicated test [\#4800](https://github.com/apache/arrow-datafusion/pull/4800) ([jackwener](https://github.com/jackwener)) +- Update sqlparser requirement from 0.29 to 0.30 [\#4799](https://github.com/apache/arrow-datafusion/pull/4799) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- rewrite the function `ensure_any_column_reference_is_unambiguous` [\#4797](https://github.com/apache/arrow-datafusion/pull/4797) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Uncomment nanoseconds tests after sql parser upgrade [\#4789](https://github.com/apache/arrow-datafusion/pull/4789) ([comphead](https://github.com/comphead)) +- fix: ListingSchemaProvider directory paths \(related: \#4204\) [\#4788](https://github.com/apache/arrow-datafusion/pull/4788) ([cfraz89](https://github.com/cfraz89)) +- Minimize stack space required to plan deeply nested binary expressions [\#4787](https://github.com/apache/arrow-datafusion/pull/4787) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Minor: Refactor some sql planning code into functions [\#4785](https://github.com/apache/arrow-datafusion/pull/4785) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Make datafusion-physical-expr compatible with blake3/traits-preview feature. [\#4784](https://github.com/apache/arrow-datafusion/pull/4784) ([BoredPerson](https://github.com/BoredPerson)) +- refactor: split expression pf planner into one part. [\#4783](https://github.com/apache/arrow-datafusion/pull/4783) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Fix Stack overflow in sql planning in debug builds [\#4779](https://github.com/apache/arrow-datafusion/pull/4779) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Pipeline-friendly Bounded Memory Window Executor [\#4777](https://github.com/apache/arrow-datafusion/pull/4777) ([mustafasrepo](https://github.com/mustafasrepo)) +- Implement OptimizerConfig for SessionState [\#4775](https://github.com/apache/arrow-datafusion/pull/4775) ([tustvold](https://github.com/tustvold)) +- refactor: extract `parse_value` [\#4774](https://github.com/apache/arrow-datafusion/pull/4774) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Structify ConfigOptions \(\#4517\) [\#4771](https://github.com/apache/arrow-datafusion/pull/4771) ([tustvold](https://github.com/tustvold)) +- Update sqlparser to `29.0.0` [\#4770](https://github.com/apache/arrow-datafusion/pull/4770) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Refactor extract_join_keys and move the ExtractEquijoinPredicate rule [\#4760](https://github.com/apache/arrow-datafusion/pull/4760) ([ygf11](https://github.com/ygf11)) +- Remove the config datafusion.execution.coalesce_target_batch_size and use datafusion.execution.batch_size instead [\#4757](https://github.com/apache/arrow-datafusion/pull/4757) ([yahoNanJing](https://github.com/yahoNanJing)) +- Add alias check for equijoin in from_plan [\#4755](https://github.com/apache/arrow-datafusion/pull/4755) ([ygf11](https://github.com/ygf11)) +- Take the top level `schema` into account when creating `UnionExec` [\#4753](https://github.com/apache/arrow-datafusion/pull/4753) ([HaoYang670](https://github.com/HaoYang670)) +- Set query_execution_start_time on snapshot from SessionContext \(\#4747\) [\#4750](https://github.com/apache/arrow-datafusion/pull/4750) ([tustvold](https://github.com/tustvold)) +- minor: Improve docstrings [\#4748](https://github.com/apache/arrow-datafusion/pull/4748) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Append generated column to the schema instead of prepending for WindowAggExec [\#4746](https://github.com/apache/arrow-datafusion/pull/4746) ([mustafasrepo](https://github.com/mustafasrepo)) +- Minor: comments about coercion in physical planner [\#4745](https://github.com/apache/arrow-datafusion/pull/4745) ([alamb](https://github.com/alamb)) +- Simplify parquet filter predicate test, fix Page Filtering Incorrectly Handles Pages with Different Row Counts [\#4743](https://github.com/apache/arrow-datafusion/pull/4743) ([tustvold](https://github.com/tustvold)) +- support byte array for decimal in parquet page and row group filters [\#4742](https://github.com/apache/arrow-datafusion/pull/4742) ([liukun4515](https://github.com/liukun4515)) +- revert some code for \#4726 / remove unnecessary coercion in physical plans [\#4741](https://github.com/apache/arrow-datafusion/pull/4741) ([liukun4515](https://github.com/liukun4515)) +- Cleanup InformationSchema plumbing [\#4740](https://github.com/apache/arrow-datafusion/pull/4740) ([tustvold](https://github.com/tustvold)) +- Minor: use a common method to check the validate of equijoin predicate [\#4739](https://github.com/apache/arrow-datafusion/pull/4739) ([ygf11](https://github.com/ygf11)) +- minor: Support more data type for `null_counts` in the `PruningStatistics` [\#4738](https://github.com/apache/arrow-datafusion/pull/4738) ([liukun4515](https://github.com/liukun4515)) +- Extended datatypes & signatures support for `NULLIF` function [\#4737](https://github.com/apache/arrow-datafusion/pull/4737) ([korowa](https://github.com/korowa)) +- minor: improve debug logging for pruning predicates [\#4736](https://github.com/apache/arrow-datafusion/pull/4736) ([alamb](https://github.com/alamb)) +- refactor: parallelize `parquet_exec` test case `single_file` [\#4735](https://github.com/apache/arrow-datafusion/pull/4735) ([waynexia](https://github.com/waynexia)) +- fix: add one more projection to recover output schema [\#4733](https://github.com/apache/arrow-datafusion/pull/4733) ([waynexia](https://github.com/waynexia)) +- remove `SubqueryFilterToJoin` [\#4731](https://github.com/apache/arrow-datafusion/pull/4731) ([jackwener](https://github.com/jackwener)) +- Create writer with `arrow::ipc::IPCWriteOptions` [\#4730](https://github.com/apache/arrow-datafusion/pull/4730) ([askoa](https://github.com/askoa)) +- Implement cast between Date and Timestamp [\#4726](https://github.com/apache/arrow-datafusion/pull/4726) ([comphead](https://github.com/comphead)) +- Dynamic information_schema configuration and port more tests [\#4722](https://github.com/apache/arrow-datafusion/pull/4722) ([alamb](https://github.com/alamb)) +- Add TPC-DS query planning regression tests [\#4719](https://github.com/apache/arrow-datafusion/pull/4719) ([andygrove](https://github.com/andygrove)) +- Minor: refactor streaming CSV inference code [\#4717](https://github.com/apache/arrow-datafusion/pull/4717) ([alamb](https://github.com/alamb)) +- Reorder the physical plan optimizer rules, extract `GlobalSortSelection`, make `Repartition` optional [\#4714](https://github.com/apache/arrow-datafusion/pull/4714) ([yahoNanJing](https://github.com/yahoNanJing)) +- Eagerly construct PagePruningPredicate [\#4713](https://github.com/apache/arrow-datafusion/pull/4713) ([tustvold](https://github.com/tustvold)) +- Move the extract_join_keys to optimizer [\#4711](https://github.com/apache/arrow-datafusion/pull/4711) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- Avoid to bypass `try_new/new()` to build plan directly and cleanup filter [\#4702](https://github.com/apache/arrow-datafusion/pull/4702) ([jackwener](https://github.com/jackwener)) +- MINOR: Remove where_clause_object_safety clippy ignore \(\#3081\) [\#4696](https://github.com/apache/arrow-datafusion/pull/4696) ([tustvold](https://github.com/tustvold)) +- Support for executing infinite files and boundedness-aware join reordering rule [\#4694](https://github.com/apache/arrow-datafusion/pull/4694) ([metesynnada](https://github.com/metesynnada)) +- Unnecessary SortExec removal rule from Physical Plan [\#4691](https://github.com/apache/arrow-datafusion/pull/4691) ([mustafasrepo](https://github.com/mustafasrepo)) +- minor: rename the github actions [\#4689](https://github.com/apache/arrow-datafusion/pull/4689) ([jackwener](https://github.com/jackwener)) +- FOLLOWUP: remove more recursion in optimizer rules. [\#4687](https://github.com/apache/arrow-datafusion/pull/4687) ([jackwener](https://github.com/jackwener)) +- Add line that prevents display_name from being called on Wildcard [\#4682](https://github.com/apache/arrow-datafusion/pull/4682) ([andre-cc-natzka](https://github.com/andre-cc-natzka)) +- Deprecate SessionContext::create_logical_plan \(\#4617\) [\#4679](https://github.com/apache/arrow-datafusion/pull/4679) ([tustvold](https://github.com/tustvold)) +- Support `NTILE` window function [\#4676](https://github.com/apache/arrow-datafusion/pull/4676) ([berkaycpp](https://github.com/berkaycpp)) +- Support min max aggregates in window functions with sliding windows [\#4675](https://github.com/apache/arrow-datafusion/pull/4675) ([berkaycpp](https://github.com/berkaycpp)) +- Refactor Expr::AggregateFunction and Expr::WindowFunction to use struct [\#4671](https://github.com/apache/arrow-datafusion/pull/4671) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- Support type coercion for equijoin [\#4666](https://github.com/apache/arrow-datafusion/pull/4666) ([ygf11](https://github.com/ygf11)) +- Add `--complete` auto completion mode to `sqllogictests` [\#4665](https://github.com/apache/arrow-datafusion/pull/4665) ([alamb](https://github.com/alamb)) +- Fix CoalesceBatches elasped_compute metric [\#4664](https://github.com/apache/arrow-datafusion/pull/4664) ([Jefffrey](https://github.com/Jefffrey)) +- Refactor Expr::Sort to use struct [\#4663](https://github.com/apache/arrow-datafusion/pull/4663) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- More descriptive error for plus/minus between timestamps/dates [\#4662](https://github.com/apache/arrow-datafusion/pull/4662) ([Jefffrey](https://github.com/Jefffrey)) +- Stream CSV file during schema inference [\#4661](https://github.com/apache/arrow-datafusion/pull/4661) ([Jefffrey](https://github.com/Jefffrey)) +- Refine the logical and physical plan serialization and deserialization [\#4659](https://github.com/apache/arrow-datafusion/pull/4659) ([yahoNanJing](https://github.com/yahoNanJing)) +- Use thiserror in sqllogictest erorr [\#4657](https://github.com/apache/arrow-datafusion/pull/4657) ([xudong963](https://github.com/xudong963)) +- fix `cargo clippy` warning [\#4652](https://github.com/apache/arrow-datafusion/pull/4652) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Improve group by hash performance: avoid group-key/-state clones for hash-groupby [\#4651](https://github.com/apache/arrow-datafusion/pull/4651) ([crepererum](https://github.com/crepererum)) +- remove recursion in optimizer rules [\#4650](https://github.com/apache/arrow-datafusion/pull/4650) ([jackwener](https://github.com/jackwener)) +- replace the arithmetic op for decimal array op decimal array using arrow kernel [\#4648](https://github.com/apache/arrow-datafusion/pull/4648) ([liukun4515](https://github.com/liukun4515)) +- simplify regex expressions [\#4646](https://github.com/apache/arrow-datafusion/pull/4646) ([crepererum](https://github.com/crepererum)) +- Avoid generate duplicate sort Keys from Window Expressions, fix bug when decide Window Expressions ordering [\#4643](https://github.com/apache/arrow-datafusion/pull/4643) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mingmwang](https://github.com/mingmwang)) +- Refactor Expr::TryCast to use a struct [\#4642](https://github.com/apache/arrow-datafusion/pull/4642) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- add `ILIKE` support [\#4639](https://github.com/apache/arrow-datafusion/pull/4639) ([crepererum](https://github.com/crepererum)) +- Detect invalid \(unsupported\) compression types when parsing [\#4637](https://github.com/apache/arrow-datafusion/pull/4637) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- unwrap_cast_in_comparison.rs: support unint \<-\> decimal [\#4634](https://github.com/apache/arrow-datafusion/pull/4634) ([liukun4515](https://github.com/liukun4515)) +- MINOR: Fix incorrect config definitions [\#4623](https://github.com/apache/arrow-datafusion/pull/4623) ([andygrove](https://github.com/andygrove)) +- FOLLOWUP: remove `optimize()` [\#4619](https://github.com/apache/arrow-datafusion/pull/4619) ([jackwener](https://github.com/jackwener)) +- Optimizer: avoid every rule must recursive children in optimizer [\#4618](https://github.com/apache/arrow-datafusion/pull/4618) ([jackwener](https://github.com/jackwener)) +- fix: run logical optimizer rules for `TableScan` expressions [\#4614](https://github.com/apache/arrow-datafusion/pull/4614) ([crepererum](https://github.com/crepererum)) +- refactor: relax the signature of register\_\* in SessionContext [\#4612](https://github.com/apache/arrow-datafusion/pull/4612) ([waynexia](https://github.com/waynexia)) +- Remove the function `consume_token` from the parser [\#4609](https://github.com/apache/arrow-datafusion/pull/4609) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Make SchemaProvider::table async [\#4607](https://github.com/apache/arrow-datafusion/pull/4607) ([tustvold](https://github.com/tustvold)) +- Lazy system tables [\#4606](https://github.com/apache/arrow-datafusion/pull/4606) ([tustvold](https://github.com/tustvold)) +- Refactor: Change equijoin keys from column to expression in logical join [\#4602](https://github.com/apache/arrow-datafusion/pull/4602) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- refactor: extract `assert_optimized_plan_eq` from UT. [\#4600](https://github.com/apache/arrow-datafusion/pull/4600) ([jackwener](https://github.com/jackwener)) +- add `try_optimize()` for all rules. [\#4599](https://github.com/apache/arrow-datafusion/pull/4599) ([jackwener](https://github.com/jackwener)) +- Normalize datafusion configuration names [\#4596](https://github.com/apache/arrow-datafusion/pull/4596) ([yahoNanJing](https://github.com/yahoNanJing)) +- Fix the bugs in parsing `COMPRESSION TYPE` [\#4590](https://github.com/apache/arrow-datafusion/pull/4590) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Minor: Remove datafusion-core dev dependency from datafusion-sql [\#4589](https://github.com/apache/arrow-datafusion/pull/4589) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Improve error handling for array downcasting [\#4588](https://github.com/apache/arrow-datafusion/pull/4588) ([retikulum](https://github.com/retikulum)) +- Update to arrow v29 [\#4587](https://github.com/apache/arrow-datafusion/pull/4587) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Add need_data_exchange in the ExecutionPlan to indicate whether a physical operator needs data exchange [\#4586](https://github.com/apache/arrow-datafusion/pull/4586) ([yahoNanJing](https://github.com/yahoNanJing)) +- Move subset of select tests to sqllogic [\#4583](https://github.com/apache/arrow-datafusion/pull/4583) ([ajayaa](https://github.com/ajayaa)) +- bugfix: just allow having use expr in `groupby` or `aggr` [\#4579](https://github.com/apache/arrow-datafusion/pull/4579) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Output sqllogictests with arrow display rather than CSV writer [\#4578](https://github.com/apache/arrow-datafusion/pull/4578) ([alamb](https://github.com/alamb)) +- Minor: Add test case for reduce cross join [\#4577](https://github.com/apache/arrow-datafusion/pull/4577) ([ygf11](https://github.com/ygf11)) +- refactor: remove redundant `outer_query_schema` [\#4576](https://github.com/apache/arrow-datafusion/pull/4576) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Preserve the TryCast expression in columnize_expr [\#4574](https://github.com/apache/arrow-datafusion/pull/4574) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([byteink](https://github.com/byteink)) +- Remove Confusing "Bare" in does not exist messages [\#4572](https://github.com/apache/arrow-datafusion/pull/4572) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Minor: Add tests for date interval predicate handling [\#4569](https://github.com/apache/arrow-datafusion/pull/4569) ([alamb](https://github.com/alamb)) +- Update sqlparser requirement from 0.27 to 0.28 [\#4568](https://github.com/apache/arrow-datafusion/pull/4568) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Avoid materializing local varaibles when creating `sortMergeJoinExec` [\#4566](https://github.com/apache/arrow-datafusion/pull/4566) ([HaoYang670](https://github.com/HaoYang670)) +- Minor: Fix logical conflict [\#4565](https://github.com/apache/arrow-datafusion/pull/4565) ([alamb](https://github.com/alamb)) +- feat: support nested loop join with the initial version [\#4562](https://github.com/apache/arrow-datafusion/pull/4562) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- feat: prepare logical plan to logical plan without params/placeholders [\#4561](https://github.com/apache/arrow-datafusion/pull/4561) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([NGA-TRAN](https://github.com/NGA-TRAN)) +- Write faster kernel for is_distinct [\#4560](https://github.com/apache/arrow-datafusion/pull/4560) ([comphead](https://github.com/comphead)) +- refactor code about `query -> plan` for subqueries [\#4559](https://github.com/apache/arrow-datafusion/pull/4559) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- fix: remove wrong comment about `having` [\#4555](https://github.com/apache/arrow-datafusion/pull/4555) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- feat: user-defined aggregate function\(UDAF\) as window function [\#4553](https://github.com/apache/arrow-datafusion/pull/4553) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([MichaelScofield](https://github.com/MichaelScofield)) +- Fix date_part/extract functions to support now\(\) [\#4548](https://github.com/apache/arrow-datafusion/pull/4548) ([comphead](https://github.com/comphead)) +- bump sqllogictest to 0.9.0 [\#4547](https://github.com/apache/arrow-datafusion/pull/4547) ([xxchan](https://github.com/xxchan)) +- minor: Remove more clones from the planner [\#4546](https://github.com/apache/arrow-datafusion/pull/4546) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Add tests for coercion of timestamps to strings [\#4545](https://github.com/apache/arrow-datafusion/pull/4545) ([alamb](https://github.com/alamb)) +- MINOR: move sqllogictest to dev-dependencies [\#4544](https://github.com/apache/arrow-datafusion/pull/4544) ([alamb](https://github.com/alamb)) +- MINOR: add some comments about intended use of ChunkedStore [\#4541](https://github.com/apache/arrow-datafusion/pull/4541) ([alamb](https://github.com/alamb)) +- fix: remove TODOs linked to arrow\#3147 [\#4540](https://github.com/apache/arrow-datafusion/pull/4540) ([crepererum](https://github.com/crepererum)) +- refactor: remove redundant `build_join_schema()` [\#4538](https://github.com/apache/arrow-datafusion/pull/4538) ([jackwener](https://github.com/jackwener)) +- Move some create/drop tests to `ddl.slt` [\#4535](https://github.com/apache/arrow-datafusion/pull/4535) ([alamb](https://github.com/alamb)) +- Minor: Avoid cloning as many `Ident` during SQL planning [\#4534](https://github.com/apache/arrow-datafusion/pull/4534) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- shouldn't add `outer_query_schema` in `sql_select_to_rex` [\#4527](https://github.com/apache/arrow-datafusion/pull/4527) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Avoid reading the entire file in ChunkedStore [\#4525](https://github.com/apache/arrow-datafusion/pull/4525) ([metesynnada](https://github.com/metesynnada)) +- Simplify MemoryManager [\#4522](https://github.com/apache/arrow-datafusion/pull/4522) ([tustvold](https://github.com/tustvold)) +- Fix limited statistic collection accross files with no stats [\#4521](https://github.com/apache/arrow-datafusion/pull/4521) ([isidentical](https://github.com/isidentical)) +- refactor: make Ctes a struct to also store data types provided by prepare stmt [\#4520](https://github.com/apache/arrow-datafusion/pull/4520) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([NGA-TRAN](https://github.com/NGA-TRAN)) +- Enrich filter statistics with known column boundaries [\#4519](https://github.com/apache/arrow-datafusion/pull/4519) ([isidentical](https://github.com/isidentical)) +- Remove Option from window frame [\#4516](https://github.com/apache/arrow-datafusion/pull/4516) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mustafasrepo](https://github.com/mustafasrepo)) +- Make nightly clippy happy [\#4515](https://github.com/apache/arrow-datafusion/pull/4515) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Remove interior mutability of `MemTable` [\#4514](https://github.com/apache/arrow-datafusion/pull/4514) ([xudong963](https://github.com/xudong963)) +- Make window function related struct public for ballista. [\#4511](https://github.com/apache/arrow-datafusion/pull/4511) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- minor: rename `push_down_limit` [\#4510](https://github.com/apache/arrow-datafusion/pull/4510) ([jackwener](https://github.com/jackwener)) +- Add get_window_frame in window_expr, show frame info in window_agg_exec [\#4508](https://github.com/apache/arrow-datafusion/pull/4508) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add sqllogictest auto labeler [\#4506](https://github.com/apache/arrow-datafusion/pull/4506) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Add some more aggregate sqllogictests and remove rust tests [\#4505](https://github.com/apache/arrow-datafusion/pull/4505) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Remove sqllogictests CI run [\#4504](https://github.com/apache/arrow-datafusion/pull/4504) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Refactor code for `insert` in sqllogictest [\#4503](https://github.com/apache/arrow-datafusion/pull/4503) ([xudong963](https://github.com/xudong963)) +- Add empty string normalization to sqllogictests [\#4501](https://github.com/apache/arrow-datafusion/pull/4501) ([alamb](https://github.com/alamb)) +- sqllogictest: A logging and command line filter [\#4497](https://github.com/apache/arrow-datafusion/pull/4497) ([alamb](https://github.com/alamb)) +- Support `insert into` statement in sqllogictest [\#4496](https://github.com/apache/arrow-datafusion/pull/4496) ([xudong963](https://github.com/xudong963)) +- Improve error handling for array downcasting [\#4493](https://github.com/apache/arrow-datafusion/pull/4493) ([retikulum](https://github.com/retikulum)) +- Unify most of `SessionConfig` settings into `ConfigOptions` [\#4492](https://github.com/apache/arrow-datafusion/pull/4492) ([alamb](https://github.com/alamb)) +- feat: support prepare statement [\#4490](https://github.com/apache/arrow-datafusion/pull/4490) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([NGA-TRAN](https://github.com/NGA-TRAN)) +- Minor: Update docstrings and comments to aggregate code [\#4489](https://github.com/apache/arrow-datafusion/pull/4489) ([alamb](https://github.com/alamb)) +- Fix panic in median "AggregateState is not a scalar aggregate" [\#4488](https://github.com/apache/arrow-datafusion/pull/4488) ([alamb](https://github.com/alamb)) +- fix `push_down_projection` push redundant columns. [\#4487](https://github.com/apache/arrow-datafusion/pull/4487) ([jackwener](https://github.com/jackwener)) +- Add window func related logic plan to proto ability. [\#4485](https://github.com/apache/arrow-datafusion/pull/4485) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- fix `Planner` don't generate `SubqueryAlias` and generate duplicated `SubqueryAlias` [\#4484](https://github.com/apache/arrow-datafusion/pull/4484) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Improve parquet partition_file output display [\#4467](https://github.com/apache/arrow-datafusion/pull/4467) ([alamb](https://github.com/alamb)) +- minor: remove redundant `unwrap()` [\#4463](https://github.com/apache/arrow-datafusion/pull/4463) ([jackwener](https://github.com/jackwener)) +- Fix `Cte` in `from` clause with duplicated cte name [\#4461](https://github.com/apache/arrow-datafusion/pull/4461) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Replace `&Option` with `Option<&T>` part 2 [\#4458](https://github.com/apache/arrow-datafusion/pull/4458) ([askoa](https://github.com/askoa)) +- Fix output_partitioning\(\), output_ordering\(\), equivalence_properties\(\) in WindowAggExec, shift the Column indexes [\#4455](https://github.com/apache/arrow-datafusion/pull/4455) ([mingmwang](https://github.com/mingmwang)) +- fix `push_down_filter` for pushing filters on grouping columns rather than aggregate columns [\#4447](https://github.com/apache/arrow-datafusion/pull/4447) ([jackwener](https://github.com/jackwener)) +- Add support for non-column key for equijoin when eliminating cross join to inner join [\#4443](https://github.com/apache/arrow-datafusion/pull/4443) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- Remove the schema checking when creating `CrossJoinExec` [\#4432](https://github.com/apache/arrow-datafusion/pull/4432) ([HaoYang670](https://github.com/HaoYang670)) +- `date_part` support fractions of second [\#4385](https://github.com/apache/arrow-datafusion/pull/4385) ([comphead](https://github.com/comphead)) +- Minor: use upstream RowSelection code from arrow `intersect_row_selection` [\#4340](https://github.com/apache/arrow-datafusion/pull/4340) ([alamb](https://github.com/alamb)) +- Support type coercion for timestamp and utf8 [\#4312](https://github.com/apache/arrow-datafusion/pull/4312) ([andre-cc-natzka](https://github.com/andre-cc-natzka)) diff --git a/dev/changelog/16.1.0.md b/dev/changelog/16.1.0.md new file mode 100644 index 0000000000000..994800062b1c7 --- /dev/null +++ b/dev/changelog/16.1.0.md @@ -0,0 +1,27 @@ + + +## [16.1.0](https://github.com/apache/arrow-datafusion/tree/16.1.0) (2023-01-19) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/16.1.0-rc1...16.0.0) + +**Merged pull requests:** + +- Fix column indices in EnforceDistribution optimizer in Partial AggregateMode \(\#4878\) [\#4959](https://github.com/apache/arrow-datafusion/pull/4959) +- Make it able to specify a session id for SessionState \(\#4933\) [\#4951](https://github.com/apache/arrow-datafusion/pull/4951) diff --git a/dev/changelog/17.0.0.md b/dev/changelog/17.0.0.md new file mode 100644 index 0000000000000..7a35b52e3cdd3 --- /dev/null +++ b/dev/changelog/17.0.0.md @@ -0,0 +1,190 @@ + + +## [17.0.0](https://github.com/apache/arrow-datafusion/tree/17.0.0) (2023-01-27) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/17.0.0-rc1...17.0.0) + +**Breaking changes:** + +- Implemented a ReadOptions trait for cleaner code. [\#5025](https://github.com/apache/arrow-datafusion/pull/5025) ([saikrishna1-bidgely](https://github.com/saikrishna1-bidgely)) + +**Implemented enhancements:** + +- Add null-equals-null JOIN support in Substrait producer/consumer [\#5084](https://github.com/apache/arrow-datafusion/issues/5084) +- Cleaner code for Read Options in reader methdos. [\#5024](https://github.com/apache/arrow-datafusion/issues/5024) +- Substrait donation follow-on work [\#4897](https://github.com/apache/arrow-datafusion/issues/4897) +- Add `len` method to `DataFrame` [\#1926](https://github.com/apache/arrow-datafusion/issues/1926) + +**Fixed bugs:** + +- Clippy failures in master branch and in PRs \(due to new nightly Rust\) [\#5080](https://github.com/apache/arrow-datafusion/issues/5080) + +**Merged pull requests:** + +- Add null-equals-null join support [\#5085](https://github.com/apache/arrow-datafusion/pull/5085) ([nseekhao](https://github.com/nseekhao)) +- Optimize returned plan in roundtrip_fill_na function [\#5083](https://github.com/apache/arrow-datafusion/pull/5083) ([nseekhao](https://github.com/nseekhao)) +- fix clippy failures [\#5081](https://github.com/apache/arrow-datafusion/pull/5081) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Add NULL literal support for decimal and integers [\#5077](https://github.com/apache/arrow-datafusion/pull/5077) ([nseekhao](https://github.com/nseekhao)) +- DataFrame count method [\#5071](https://github.com/apache/arrow-datafusion/pull/5071) ([Jefffrey](https://github.com/Jefffrey)) +- \[sqllogictests\] Port orderby.rs to sqllogictests [\#5062](https://github.com/apache/arrow-datafusion/pull/5062) ([alamb](https://github.com/alamb)) + +## [17.0.0-rc1](https://github.com/apache/arrow-datafusion/tree/17.0.0-rc1) (2023-01-26) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/16.1.0...17.0.0-rc1) + +**Breaking changes:** + +- Change ExecutionPlan::maintains_input_order to return vector \(to support multi children executors better\) [\#5035](https://github.com/apache/arrow-datafusion/pull/5035) ([mustafasrepo](https://github.com/mustafasrepo)) +- Allow overriding error type in DataFusion Result [\#5000](https://github.com/apache/arrow-datafusion/pull/5000) ([tustvold](https://github.com/tustvold)) +- Add dictionary_expresions feature \(\#4386\) [\#4999](https://github.com/apache/arrow-datafusion/pull/4999) ([tustvold](https://github.com/tustvold)) + +**Implemented enhancements:** + +- Retain the ordering of fields in the table schema when creating the projection for an update plan [\#5052](https://github.com/apache/arrow-datafusion/issues/5052) +- \[sqllogictest\] Remove `integration-tests` directory [\#5011](https://github.com/apache/arrow-datafusion/issues/5011) +- \[sqllogictest\] Consolidate normalization code for the postgres and non-postgres paths [\#5010](https://github.com/apache/arrow-datafusion/issues/5010) +- \[sqllogictest\] Don't orchestrate the postgres containers with rust / docker [\#5009](https://github.com/apache/arrow-datafusion/issues/5009) +- check external table exist before creating a table [\#4997](https://github.com/apache/arrow-datafusion/issues/4997) +- Implement `std::error::Error` for DataFusionError [\#4991](https://github.com/apache/arrow-datafusion/issues/4991) +- Return Vec\ instead of bool in ExecutionPlan::maintains_input_order [\#4980](https://github.com/apache/arrow-datafusion/issues/4980) +- Add support for linear range search [\#4979](https://github.com/apache/arrow-datafusion/issues/4979) +- Add support for bounded execution when window query involves UNBOUNDED PRECEDING [\#4978](https://github.com/apache/arrow-datafusion/issues/4978) +- Infer prepared statement parameter types for insert queries with values clauses [\#4976](https://github.com/apache/arrow-datafusion/issues/4976) +- The filter of outer table happens multiple time after optimizing in-subquery to join [\#4914](https://github.com/apache/arrow-datafusion/issues/4914) +- Support Describe FILE in datafusion-cli [\#4913](https://github.com/apache/arrow-datafusion/issues/4913) +- Release DataFusion 16 [\#4776](https://github.com/apache/arrow-datafusion/issues/4776) +- Support writing lists in the arrow csv writer [\#4502](https://github.com/apache/arrow-datafusion/issues/4502) +- Replace python based integration test with sqllogictest [\#4462](https://github.com/apache/arrow-datafusion/issues/4462) +- Support CREATE TABLE table_name\(...schema_fields\) [\#4396](https://github.com/apache/arrow-datafusion/issues/4396) +- Make Binary Dictionary Operations Optional [\#4386](https://github.com/apache/arrow-datafusion/issues/4386) +- Improve / Cleanup DataFusion CI [\#3045](https://github.com/apache/arrow-datafusion/issues/3045) +- More frequent DataFusion releases to crates.io \(discussion\) [\#2327](https://github.com/apache/arrow-datafusion/issues/2327) + +**Fixed bugs:** + +- UPDATE statment for non existent column doesn't error out [\#5068](https://github.com/apache/arrow-datafusion/issues/5068) +- Limit doesn't drop on first batch when limit size == fetch size. [\#5064](https://github.com/apache/arrow-datafusion/issues/5064) +- Performance regressions since DataFusion 15.x [\#5060](https://github.com/apache/arrow-datafusion/issues/5060) +- Quoted schema and table names result in double-quoted names in logical plan. [\#5058](https://github.com/apache/arrow-datafusion/issues/5058) +- Homebrew release script has the amount of arguments being incorrect [\#5043](https://github.com/apache/arrow-datafusion/issues/5043) +- CI Failing with Out of Disk [\#5040](https://github.com/apache/arrow-datafusion/issues/5040) +- Doc links to LogicalPlan in the core package need updating. [\#5036](https://github.com/apache/arrow-datafusion/issues/5036) +- explain analyze can not see csvexec execution time metrics [\#5014](https://github.com/apache/arrow-datafusion/issues/5014) +- AVG\(nulls\) returns 0 rather than NULL [\#5007](https://github.com/apache/arrow-datafusion/issues/5007) +- Invalid Placeholders return internal error \(rather than Plan error\) [\#5005](https://github.com/apache/arrow-datafusion/issues/5005) +- select \* from csv error [\#4996](https://github.com/apache/arrow-datafusion/issues/4996) +- Incorrect nested error wrapped to `ArrowError:External` variant for joins [\#4981](https://github.com/apache/arrow-datafusion/issues/4981) + +**Documentation updates:** + +- MINOR: Add Substrait to feature list in README [\#4955](https://github.com/apache/arrow-datafusion/pull/4955) ([andygrove](https://github.com/andygrove)) +- Minor: comma engineering in Readme [\#4954](https://github.com/apache/arrow-datafusion/pull/4954) ([alamb](https://github.com/alamb)) +- Update main DataFusion README [\#4903](https://github.com/apache/arrow-datafusion/pull/4903) ([alamb](https://github.com/alamb)) +- Docs: Add known user - Kamu [\#4899](https://github.com/apache/arrow-datafusion/pull/4899) ([sergiimk](https://github.com/sergiimk)) + +**Closed issues:** + +- Support sub directories in sqllogictest runner [\#4709](https://github.com/apache/arrow-datafusion/issues/4709) +- Bug displaying fractional seconds in `IntervalMonthDayNano` [\#4220](https://github.com/apache/arrow-datafusion/issues/4220) + +**Merged pull requests:** + +- Add `release-crates.sh` script [\#5070](https://github.com/apache/arrow-datafusion/pull/5070) ([iajoiner](https://github.com/iajoiner)) +- Validate assignment target column existence for UPDATE statements [\#5069](https://github.com/apache/arrow-datafusion/pull/5069) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) +- Fix limit when size of batch to poll == skip/fetch value [\#5066](https://github.com/apache/arrow-datafusion/pull/5066) ([Dandandan](https://github.com/Dandandan)) +- Fix CREATE SCHEMA schema name double quoting issue. [\#5059](https://github.com/apache/arrow-datafusion/pull/5059) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([neumark](https://github.com/neumark)) +- Minor: Move some aggregate error tests to sqllogictests [\#5055](https://github.com/apache/arrow-datafusion/pull/5055) ([alamb](https://github.com/alamb)) +- Add decimal support to substrait serde [\#5054](https://github.com/apache/arrow-datafusion/pull/5054) ([andygrove](https://github.com/andygrove)) +- Retain schema order in projection [\#5053](https://github.com/apache/arrow-datafusion/pull/5053) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Improve join type support in substrait [\#5051](https://github.com/apache/arrow-datafusion/pull/5051) ([andygrove](https://github.com/andygrove)) +- \[Substrait\] ReadRel. Get column names from TableScan source [\#5050](https://github.com/apache/arrow-datafusion/pull/5050) ([andygrove](https://github.com/andygrove)) +- Ensure insert projections are of correct type [\#5049](https://github.com/apache/arrow-datafusion/pull/5049) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Remove unnecessary pyo3 dependency from datafusion crate [\#5048](https://github.com/apache/arrow-datafusion/pull/5048) ([tustvold](https://github.com/tustvold)) +- Cleanup CI \(\#5040\) [\#5047](https://github.com/apache/arrow-datafusion/pull/5047) ([tustvold](https://github.com/tustvold)) +- Fix homebrew publish script [\#5044](https://github.com/apache/arrow-datafusion/pull/5044) ([iajoiner](https://github.com/iajoiner)) +- Update docs links to logical plans module. [\#5037](https://github.com/apache/arrow-datafusion/pull/5037) ([vincev](https://github.com/vincev)) +- \[sqllogictest\] Read subdirectories in `test_files` [\#5033](https://github.com/apache/arrow-datafusion/pull/5033) ([melgenek](https://github.com/melgenek)) +- minor: Fix docs for create_default_catalog_and_schema [\#5032](https://github.com/apache/arrow-datafusion/pull/5032) ([alamb](https://github.com/alamb)) +- Remove python based posgres comparsion `integration-test` [\#5031](https://github.com/apache/arrow-datafusion/pull/5031) ([alamb](https://github.com/alamb)) +- \[sqllogictest\] Create empty tables [\#5026](https://github.com/apache/arrow-datafusion/pull/5026) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([melgenek](https://github.com/melgenek)) +- Simplify the `PushDownLimit`. [\#5021](https://github.com/apache/arrow-datafusion/pull/5021) ([HaoYang670](https://github.com/HaoYang670)) +- \[BugFix\] fix explain csv/json/avro exec can not see metrics bug [\#5018](https://github.com/apache/arrow-datafusion/pull/5018) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Check placeholder \_\_timeTo and return Datafusion::Plan error [\#5017](https://github.com/apache/arrow-datafusion/pull/5017) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthias-Q](https://github.com/matthias-Q)) +- \[sqllogictets\] Remove postgres container orchestration [\#5015](https://github.com/apache/arrow-datafusion/pull/5015) ([alamb](https://github.com/alamb)) +- Sqllogictest: use the same normalization for all tests [\#5013](https://github.com/apache/arrow-datafusion/pull/5013) ([melgenek](https://github.com/melgenek)) +- Minor: Remove invalid comments [\#5012](https://github.com/apache/arrow-datafusion/pull/5012) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- AVG\(null\) is NULL \(not zero\) [\#5008](https://github.com/apache/arrow-datafusion/pull/5008) ([alamb](https://github.com/alamb)) +- Minor: improve internal error message [\#5006](https://github.com/apache/arrow-datafusion/pull/5006) ([alamb](https://github.com/alamb)) +- Support for bounded execution when window frame involves UNBOUNDED PRECEDING [\#5003](https://github.com/apache/arrow-datafusion/pull/5003) ([mustafasrepo](https://github.com/mustafasrepo)) +- Bump sqllogictest to v0.11.1 [\#5002](https://github.com/apache/arrow-datafusion/pull/5002) ([xudong963](https://github.com/xudong963)) +- Minor: Document how to create `ListingTables` [\#5001](https://github.com/apache/arrow-datafusion/pull/5001) ([alamb](https://github.com/alamb)) +- \[Enhancement\] early check table exist before create [\#4998](https://github.com/apache/arrow-datafusion/pull/4998) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- \[Feature\] support describe file [\#4995](https://github.com/apache/arrow-datafusion/pull/4995) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Implement `std::error::Error::source()` for `DataFusionError`, make `DataFusionError::find_root` more generic [\#4992](https://github.com/apache/arrow-datafusion/pull/4992) ([alamb](https://github.com/alamb)) +- Add support for linear range calculation in WINDOW functions [\#4989](https://github.com/apache/arrow-datafusion/pull/4989) ([mustafasrepo](https://github.com/mustafasrepo)) +- re-export substrait crate [\#4988](https://github.com/apache/arrow-datafusion/pull/4988) ([jdye64](https://github.com/jdye64)) +- minor: Update data type support documentation [\#4984](https://github.com/apache/arrow-datafusion/pull/4984) ([alamb](https://github.com/alamb)) +- fix\(4981\): incorrect error wrapping in `OnceFut` [\#4983](https://github.com/apache/arrow-datafusion/pull/4983) ([DDtKey](https://github.com/DDtKey)) +- Infer values for inserts [\#4977](https://github.com/apache/arrow-datafusion/pull/4977) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Simplify GroupByHash implementation \(to prepare for more work\) [\#4972](https://github.com/apache/arrow-datafusion/pull/4972) ([alamb](https://github.com/alamb)) +- Add DataFusionError::Substrait variant to DataFusionError enum [\#4971](https://github.com/apache/arrow-datafusion/pull/4971) ([jdye64](https://github.com/jdye64)) +- refactor: display input partitions for `RepartitionExec` [\#4969](https://github.com/apache/arrow-datafusion/pull/4969) ([crepererum](https://github.com/crepererum)) +- Upgrade to Substrait 0.4.0 [\#4966](https://github.com/apache/arrow-datafusion/pull/4966) ([mbrobbel](https://github.com/mbrobbel)) +- Expose `sql_to_statement` and `statement_to_plan` on `SessionState` [\#4958](https://github.com/apache/arrow-datafusion/pull/4958) ([avantgardnerio](https://github.com/avantgardnerio)) +- Minor: Make messages consistent for LogicalPlan::Dml [\#4953](https://github.com/apache/arrow-datafusion/pull/4953) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Do not resort inputs to `UnionExec` if they are already sorted [\#4946](https://github.com/apache/arrow-datafusion/pull/4946) ([alamb](https://github.com/alamb)) +- Minor: Reduce even more redundancy creating window_agg in sort_enforcement tests [\#4945](https://github.com/apache/arrow-datafusion/pull/4945) ([alamb](https://github.com/alamb)) +- Only add outer filter once when transforming exists/in subquery to join [\#4944](https://github.com/apache/arrow-datafusion/pull/4944) ([ygf11](https://github.com/ygf11)) +- fix: `FieldNotFound` error message without valid fields [\#4942](https://github.com/apache/arrow-datafusion/pull/4942) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([DDtKey](https://github.com/DDtKey)) +- Propagate planning error back to user [\#4940](https://github.com/apache/arrow-datafusion/pull/4940) ([fsdvh](https://github.com/fsdvh)) +- Make it able to specify a session id for SessionState [\#4933](https://github.com/apache/arrow-datafusion/pull/4933) ([yahoNanJing](https://github.com/yahoNanJing)) +- SUPPORT SEMI/ANTI JOIN SQL syntax in DataFusion [\#4932](https://github.com/apache/arrow-datafusion/pull/4932) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mingmwang](https://github.com/mingmwang)) +- Support gs:// as GCS schema [\#4930](https://github.com/apache/arrow-datafusion/pull/4930) ([jychen7](https://github.com/jychen7)) +- Upgrade object_store from 0.5.0 to 0.5.3 [\#4929](https://github.com/apache/arrow-datafusion/pull/4929) ([jychen7](https://github.com/jychen7)) +- Reduce redundancy in sort_enforcement tests [\#4928](https://github.com/apache/arrow-datafusion/pull/4928) ([alamb](https://github.com/alamb)) +- Update to arrow 31 [\#4927](https://github.com/apache/arrow-datafusion/pull/4927) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Unify Row hash and hash implementation [\#4924](https://github.com/apache/arrow-datafusion/pull/4924) ([mustafasrepo](https://github.com/mustafasrepo)) +- Support join-filter pushdown for semi/anti join [\#4923](https://github.com/apache/arrow-datafusion/pull/4923) ([ygf11](https://github.com/ygf11)) +- Minor add ticket link to broken test [\#4919](https://github.com/apache/arrow-datafusion/pull/4919) ([alamb](https://github.com/alamb)) +- Improve documentation for ExprVisitor, port simple uses to new walking function [\#4916](https://github.com/apache/arrow-datafusion/pull/4916) ([alamb](https://github.com/alamb)) +- Add substrait label to PRs [\#4915](https://github.com/apache/arrow-datafusion/pull/4915) ([alamb](https://github.com/alamb)) +- Executing ProjectionExec with no column should not return an Err [\#4912](https://github.com/apache/arrow-datafusion/pull/4912) ([viirya](https://github.com/viirya)) +- Refactor: `Add LogicalPlan::observe_expressions` to walk expressions [\#4906](https://github.com/apache/arrow-datafusion/pull/4906) ([alamb](https://github.com/alamb)) +- Minor: Port information schema tests to sqllogictest [\#4905](https://github.com/apache/arrow-datafusion/pull/4905) ([alamb](https://github.com/alamb)) +- Add insert/update/delete to LogicalPlan and add SQL planner support [\#4902](https://github.com/apache/arrow-datafusion/pull/4902) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- fix: Visit subqueries in `Expr::Alias` [\#4900](https://github.com/apache/arrow-datafusion/pull/4900) ([askoa](https://github.com/askoa)) +- \[Substrait\] Change API to return LogicalPlan instead of DataFrame [\#4896](https://github.com/apache/arrow-datafusion/pull/4896) ([andygrove](https://github.com/andygrove)) +- Upgrade to substrait 0.3 [\#4895](https://github.com/apache/arrow-datafusion/pull/4895) ([andygrove](https://github.com/andygrove)) +- Add datafusion-substrait crate to workspace [\#4893](https://github.com/apache/arrow-datafusion/pull/4893) ([andygrove](https://github.com/andygrove)) +- refactor and add simple function to deserialize and serialize proto b… [\#4892](https://github.com/apache/arrow-datafusion/pull/4892) ([jdye64](https://github.com/jdye64)) +- Update `optimize_children` to return `Result>` [\#4888](https://github.com/apache/arrow-datafusion/pull/4888) ([HaoYang670](https://github.com/HaoYang670)) +- Do not repartition inputs whose sort order is required [\#4885](https://github.com/apache/arrow-datafusion/pull/4885) ([alamb](https://github.com/alamb)) +- Minor: Add docstrings to UnionExec [\#4884](https://github.com/apache/arrow-datafusion/pull/4884) ([alamb](https://github.com/alamb)) +- Update datafusion-substrait crate to build against repo version of DataFusion [\#4879](https://github.com/apache/arrow-datafusion/pull/4879) ([andygrove](https://github.com/andygrove)) +- Fix column indices in EnforceDistribution optimizer in Partial AggregateMode [\#4878](https://github.com/apache/arrow-datafusion/pull/4878) ([jonmmease](https://github.com/jonmmease)) +- refactor: improve repartition buffering [\#4867](https://github.com/apache/arrow-datafusion/pull/4867) ([crepererum](https://github.com/crepererum)) +- Rewrite coerce_plan_expr_for_schema to fix union type coercion [\#4862](https://github.com/apache/arrow-datafusion/pull/4862) ([ygf11](https://github.com/ygf11)) +- \(\#4462\) Postgres compatibility tests using sqllogictest [\#4834](https://github.com/apache/arrow-datafusion/pull/4834) ([melgenek](https://github.com/melgenek)) +- Support non-tuple expression for in-subquery to join [\#4826](https://github.com/apache/arrow-datafusion/pull/4826) ([ygf11](https://github.com/ygf11)) +- Update to arrow `30.0.1` [\#4818](https://github.com/apache/arrow-datafusion/pull/4818) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Refine the statistics estimation for the limit and aggregate operator [\#4716](https://github.com/apache/arrow-datafusion/pull/4716) ([yahoNanJing](https://github.com/yahoNanJing)) +- Infer prepared statement parameter types [\#4701](https://github.com/apache/arrow-datafusion/pull/4701) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Add datafusion-substrait crate [\#4543](https://github.com/apache/arrow-datafusion/pull/4543) ([andygrove](https://github.com/andygrove)) +- Refactor loser tree code in SortPreservingMerge per PR comments [\#4407](https://github.com/apache/arrow-datafusion/pull/4407) ([alamb](https://github.com/alamb)) diff --git a/dev/changelog/18.0.0.md b/dev/changelog/18.0.0.md new file mode 100644 index 0000000000000..f51bb947e781b --- /dev/null +++ b/dev/changelog/18.0.0.md @@ -0,0 +1,151 @@ + + +## [18.0.0](https://github.com/apache/arrow-datafusion/tree/18.0.0) (2023-02-10) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/17.0.0...18.0.0) + +**Breaking changes:** + +- Use DataFusionError instead of ArrowError in SendableRecordBatchStream [\#5101](https://github.com/apache/arrow-datafusion/pull/5101) ([comphead](https://github.com/comphead)) +- Update to arrow 32 and Switch to RawDecoder for JSON [\#5056](https://github.com/apache/arrow-datafusion/pull/5056) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) + +**Implemented enhancements:** + +- DiskManager to create a spill folder if doesn't exist [\#5186](https://github.com/apache/arrow-datafusion/issues/5186) +- cast expression may cause duplicate column name error [\#5174](https://github.com/apache/arrow-datafusion/issues/5174) +- Add type coercion from Dictionary to string for regular expressions [\#5154](https://github.com/apache/arrow-datafusion/issues/5154) +- Unnecessary `Filter` on Parquet datasources [\#5149](https://github.com/apache/arrow-datafusion/issues/5149) +- \[sqllogictest\] Support `pg_typeof` for Postgres compatibility tests [\#5147](https://github.com/apache/arrow-datafusion/issues/5147) +- Supporting Grafana global variables [\#5144](https://github.com/apache/arrow-datafusion/issues/5144) +- add example for standalone DataFusion server which supports Arrow Flight SQL JDBC driver [\#5139](https://github.com/apache/arrow-datafusion/issues/5139) +- Support for InList in datafusion-substrait [\#5134](https://github.com/apache/arrow-datafusion/issues/5134) +- Pipeline file opening in `FileStream` [\#5129](https://github.com/apache/arrow-datafusion/issues/5129) +- Make `parse_physical_expr` public [\#5107](https://github.com/apache/arrow-datafusion/issues/5107) +- Use DataFusionError in SendableRecordBatchStream [\#5039](https://github.com/apache/arrow-datafusion/issues/5039) +- Interval coercion:`date_bin('1 hour',...)` does not work but `date_bin(interval '1 hour', ...` does [\#4853](https://github.com/apache/arrow-datafusion/issues/4853) +- `Explain ` should not fail if meeting errors when optimizing the query [\#4766](https://github.com/apache/arrow-datafusion/issues/4766) +- Add option to determine whether to convert identifiers [\#4551](https://github.com/apache/arrow-datafusion/issues/4551) +- Replace `&Option` with `Option<&T>`. [\#4424](https://github.com/apache/arrow-datafusion/issues/4424) +- Error type in `RecordBatchStream` [\#4172](https://github.com/apache/arrow-datafusion/issues/4172) +- Support non-equi join \(e.g. `ON` clause\) in Dataframe API [\#1254](https://github.com/apache/arrow-datafusion/issues/1254) +- Allow ParquetExec to parallelize work based on row groups [\#137](https://github.com/apache/arrow-datafusion/issues/137) + +**Fixed bugs:** + +- Confusing schema errors when using window partition [\#5229](https://github.com/apache/arrow-datafusion/issues/5229) +- Propagating empty_relation generate an illegal plan [\#5218](https://github.com/apache/arrow-datafusion/issues/5218) +- The test `in_list_types_struct_literal` fails when setting `skip_failed_rules` as `false` [\#5217](https://github.com/apache/arrow-datafusion/issues/5217) +- Placeholder values are not replaced in ScalarSubqueries [\#5215](https://github.com/apache/arrow-datafusion/issues/5215) +- Querying against delta lake table does not seem to work [\#5202](https://github.com/apache/arrow-datafusion/issues/5202) +- Arithmetic operation doesn't work with DictionaryArray [\#5193](https://github.com/apache/arrow-datafusion/issues/5193) +- simplify_expr\(\) invoke nullable\(\) exist bug [\#5191](https://github.com/apache/arrow-datafusion/issues/5191) +- CI is currently broken on git diff: Not a git repository [\#5180](https://github.com/apache/arrow-datafusion/issues/5180) +- `write_csv/json/parquet` isn't cancel safe [\#5178](https://github.com/apache/arrow-datafusion/issues/5178) +- no hyperlink to blaze-rs \[doc: README-"Use Cases"\] [\#5175](https://github.com/apache/arrow-datafusion/issues/5175) +- Arithmetic scalar operation doesn't work with DictionaryArray [\#5150](https://github.com/apache/arrow-datafusion/issues/5150) +- Sort operator disappear in physical_plan [\#5100](https://github.com/apache/arrow-datafusion/issues/5100) +- Window function error: InvalidArgumentError\("number of columns\(27\) must match number of fields\(35\) in schema" [\#5090](https://github.com/apache/arrow-datafusion/issues/5090) +- `INSERT` statements without target column list are not working [\#5078](https://github.com/apache/arrow-datafusion/issues/5078) +- fix file stream time scanning metrics bug [\#5019](https://github.com/apache/arrow-datafusion/issues/5019) +- Date before `1678` causes panic [\#4875](https://github.com/apache/arrow-datafusion/issues/4875) +- Can not ORDER BY an aliased group column [\#4854](https://github.com/apache/arrow-datafusion/issues/4854) +- The `filters` expressions in `TableScan` may contain fields not included in `schema`. [\#4793](https://github.com/apache/arrow-datafusion/issues/4793) +- Comparing a `Timestamp` to a `Date32` fails [\#4644](https://github.com/apache/arrow-datafusion/issues/4644) +- String --\> TableReference parsing does not properly handle `"` and `.` [\#4532](https://github.com/apache/arrow-datafusion/issues/4532) +- can't compare NULL type with NULL type [\#4335](https://github.com/apache/arrow-datafusion/issues/4335) +- Add ambiguous check when generate selection plan [\#4196](https://github.com/apache/arrow-datafusion/issues/4196) +- Internal error in CAST from Timestamp\[us\] [\#3922](https://github.com/apache/arrow-datafusion/issues/3922) +- Run median expr on parquet file column got error [\#3805](https://github.com/apache/arrow-datafusion/issues/3805) +- aliasing a field renders it missing in the order by clause [\#669](https://github.com/apache/arrow-datafusion/issues/669) +- Querying datetime data in DataFusion with an embedded timezone always fails [\#153](https://github.com/apache/arrow-datafusion/issues/153) + +**Documentation updates:** + +- Update README.md fix \[welcoming community\] links [\#5232](https://github.com/apache/arrow-datafusion/pull/5232) ([jiangzhx](https://github.com/jiangzhx)) +- Update README.md update blaze-rs link to https://github.com/blaze-init/blaze [\#5190](https://github.com/apache/arrow-datafusion/pull/5190) ([jiangzhx](https://github.com/jiangzhx)) +- Typo of greptimedb [\#5103](https://github.com/apache/arrow-datafusion/pull/5103) ([fengjiachun](https://github.com/fengjiachun)) +- chore: change `DataBend` to `Databend` [\#5096](https://github.com/apache/arrow-datafusion/pull/5096) ([xudong963](https://github.com/xudong963)) + +**Closed issues:** + +- Change coerced type for comparison between timestamp with date to timestamp [\#4761](https://github.com/apache/arrow-datafusion/issues/4761) + +**Merged pull requests:** + +- fix: correct expected error in test [\#5224](https://github.com/apache/arrow-datafusion/pull/5224) ([jackwener](https://github.com/jackwener)) +- bugfix: fix propagating empty_relation generates an illegal plan [\#5219](https://github.com/apache/arrow-datafusion/pull/5219) ([yukkit](https://github.com/yukkit)) +- Replace placeholders in ScalarSubqueries [\#5216](https://github.com/apache/arrow-datafusion/pull/5216) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Dataframe join_on method [\#5210](https://github.com/apache/arrow-datafusion/pull/5210) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- bugfix: fix eval `nullalbe()` in `simplify_exprs` [\#5208](https://github.com/apache/arrow-datafusion/pull/5208) ([jackwener](https://github.com/jackwener)) +- minor: remove unnecessary clone [\#5207](https://github.com/apache/arrow-datafusion/pull/5207) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- minor: extract `merge_schema()` function. [\#5203](https://github.com/apache/arrow-datafusion/pull/5203) ([jackwener](https://github.com/jackwener)) +- minor: remove unnecessary `continue` [\#5200](https://github.com/apache/arrow-datafusion/pull/5200) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- fix\(MemTable\): make it cancel-safe and fix parallelism [\#5197](https://github.com/apache/arrow-datafusion/pull/5197) ([DDtKey](https://github.com/DDtKey)) +- fix: make `write_csv/json/parquet` cancel-safe [\#5196](https://github.com/apache/arrow-datafusion/pull/5196) ([DDtKey](https://github.com/DDtKey)) +- Support arithmetic operation on DictionaryArray [\#5194](https://github.com/apache/arrow-datafusion/pull/5194) ([viirya](https://github.com/viirya)) +- sqllogicaltest: add cleanup and use rowsort. [\#5189](https://github.com/apache/arrow-datafusion/pull/5189) ([jackwener](https://github.com/jackwener)) +- bugfix: fix `TableScan` may contain fields not included in `schema` [\#5188](https://github.com/apache/arrow-datafusion/pull/5188) ([jackwener](https://github.com/jackwener)) +- Create disk manager spill folder if doesn't exist [\#5185](https://github.com/apache/arrow-datafusion/pull/5185) ([comphead](https://github.com/comphead)) +- Parse identifiers properly for TableReferences [\#5183](https://github.com/apache/arrow-datafusion/pull/5183) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- Fix decimal scalar dyn kernels [\#5179](https://github.com/apache/arrow-datafusion/pull/5179) ([viirya](https://github.com/viirya)) +- Patch git Safe Paths in CI [\#5177](https://github.com/apache/arrow-datafusion/pull/5177) ([tustvold](https://github.com/tustvold)) +- Add initial support for serializing physical plans with Substrait [\#5176](https://github.com/apache/arrow-datafusion/pull/5176) ([andygrove](https://github.com/andygrove)) +- Bump tokio from 1.24.1 to 1.24.2 in /datafusion-cli [\#5172](https://github.com/apache/arrow-datafusion/pull/5172) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Make EnforceSorting global sort aware, fix sort mis-optimizations involving unions, support parallel sort + merge transformations [\#5171](https://github.com/apache/arrow-datafusion/pull/5171) ([mustafasrepo](https://github.com/mustafasrepo)) +- Update substrait README.md [\#5168](https://github.com/apache/arrow-datafusion/pull/5168) ([jiangzhx](https://github.com/jiangzhx)) +- Switch to use sum kernel from arrow-rs for Decimal128 [\#5167](https://github.com/apache/arrow-datafusion/pull/5167) ([sunchao](https://github.com/sunchao)) +- FileStream: Open next file in parallel while decoding [\#5161](https://github.com/apache/arrow-datafusion/pull/5161) ([thinkharderdev](https://github.com/thinkharderdev)) +- Fix FairSpillPool try_grow for non-spillable consumers [\#5160](https://github.com/apache/arrow-datafusion/pull/5160) ([tustvold](https://github.com/tustvold)) +- fix: treat unsupported SQL plans as "not implemented" [\#5159](https://github.com/apache/arrow-datafusion/pull/5159) ([crepererum](https://github.com/crepererum)) +- Compare NULL types [\#5158](https://github.com/apache/arrow-datafusion/pull/5158) ([melgenek](https://github.com/melgenek)) +- chore: add object_name_to_table_reference in SqlToRel [\#5155](https://github.com/apache/arrow-datafusion/pull/5155) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) +- Ambiguity check for where selection [\#5153](https://github.com/apache/arrow-datafusion/pull/5153) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- feat: Type coercion for Dictionary\(\_, \_\) to Utf8 for regex conditions [\#5152](https://github.com/apache/arrow-datafusion/pull/5152) ([stuartcarnie](https://github.com/stuartcarnie)) +- Support arithmetic scalar operation with DictionaryArray [\#5151](https://github.com/apache/arrow-datafusion/pull/5151) ([viirya](https://github.com/viirya)) +- \[sqllogictest\] Support `pg_typeof` [\#5148](https://github.com/apache/arrow-datafusion/pull/5148) ([melgenek](https://github.com/melgenek)) +- Date to Timestamp cast [\#5140](https://github.com/apache/arrow-datafusion/pull/5140) ([comphead](https://github.com/comphead)) +- add example for Flight SQL server that supports JDBC driver [\#5138](https://github.com/apache/arrow-datafusion/pull/5138) ([kmitchener](https://github.com/kmitchener)) +- Add in-list test [\#5135](https://github.com/apache/arrow-datafusion/pull/5135) ([nseekhao](https://github.com/nseekhao)) +- Bug fix: Empty Record Batch handling [\#5131](https://github.com/apache/arrow-datafusion/pull/5131) ([mustafasrepo](https://github.com/mustafasrepo)) +- Add option to control whether to normalize ident [\#5124](https://github.com/apache/arrow-datafusion/pull/5124) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) +- Make `parse_physical_expr` public [\#5118](https://github.com/apache/arrow-datafusion/pull/5118) ([comphead](https://github.com/comphead)) +- Support coercing `utf8` to `interval` and `timestamp` \(including arguments to `date_bin`\) [\#5117](https://github.com/apache/arrow-datafusion/pull/5117) ([alamb](https://github.com/alamb)) +- Fix release issues [\#5116](https://github.com/apache/arrow-datafusion/pull/5116) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- minor: port date_bin tests to sqllogictests [\#5115](https://github.com/apache/arrow-datafusion/pull/5115) ([alamb](https://github.com/alamb)) +- Minor: reduce code duplication using `rewrite_expr` [\#5114](https://github.com/apache/arrow-datafusion/pull/5114) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Replace &Option\ with Option\<&T\> [\#5113](https://github.com/apache/arrow-datafusion/pull/5113) ([gaoxinge](https://github.com/gaoxinge)) +- Improve `get_meet_of_orderings` to check for common prefixes [\#5111](https://github.com/apache/arrow-datafusion/pull/5111) ([ozankabak](https://github.com/ozankabak)) +- \[sqllogictest\] Apply rowsort when there is no explicit order by [\#5110](https://github.com/apache/arrow-datafusion/pull/5110) ([melgenek](https://github.com/melgenek)) +- Add unnest_column to DataFrame [\#5106](https://github.com/apache/arrow-datafusion/pull/5106) ([vincev](https://github.com/vincev)) +- Minor: reduce indent level in page filter pruning code [\#5105](https://github.com/apache/arrow-datafusion/pull/5105) ([alamb](https://github.com/alamb)) +- Replace &Option\ with Option\<&T\> [\#5102](https://github.com/apache/arrow-datafusion/pull/5102) ([gaoxinge](https://github.com/gaoxinge)) +- Minor: remove unused methods in datafusion/optimizer/src/utils.rs [\#5098](https://github.com/apache/arrow-datafusion/pull/5098) ([ygf11](https://github.com/ygf11)) +- ci: don't trigger rust ci for doc changes [\#5097](https://github.com/apache/arrow-datafusion/pull/5097) ([xudong963](https://github.com/xudong963)) +- sqllogicaltest: fix unstable slt case. [\#5095](https://github.com/apache/arrow-datafusion/pull/5095) ([jackwener](https://github.com/jackwener)) +- chore: update cranelift-module [\#5094](https://github.com/apache/arrow-datafusion/pull/5094) ([jackwener](https://github.com/jackwener)) +- refactor: Add `rewrite_expr` convenience method for rewriting `Expr`s [\#5092](https://github.com/apache/arrow-datafusion/pull/5092) ([alamb](https://github.com/alamb)) +- Minor: extract sort col rewrite into its own module, add unit tests [\#5088](https://github.com/apache/arrow-datafusion/pull/5088) ([alamb](https://github.com/alamb)) +- \[sqllogictest\] Move `decimal.rs` tests [\#5086](https://github.com/apache/arrow-datafusion/pull/5086) ([melgenek](https://github.com/melgenek)) +- Insert target columns empty fix [\#5079](https://github.com/apache/arrow-datafusion/pull/5079) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) +- sqllogicaltest: move union.rs [\#5075](https://github.com/apache/arrow-datafusion/pull/5075) ([jackwener](https://github.com/jackwener)) +- Support ORDER BY an aliased column [\#5067](https://github.com/apache/arrow-datafusion/pull/5067) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Parquet parallel scan [\#5057](https://github.com/apache/arrow-datafusion/pull/5057) ([korowa](https://github.com/korowa)) +- \[BugFix\] fix file stream time scanning metrics bug [\#5020](https://github.com/apache/arrow-datafusion/pull/5020) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Show optimization errors in explain [\#4819](https://github.com/apache/arrow-datafusion/pull/4819) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) diff --git a/dev/changelog/19.0.0.md b/dev/changelog/19.0.0.md new file mode 100644 index 0000000000000..6e4abcf8ffba3 --- /dev/null +++ b/dev/changelog/19.0.0.md @@ -0,0 +1,205 @@ + + +## [19.0.0](https://github.com/apache/arrow-datafusion/tree/19.0.0) (2023-02-24) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/18.0.0...19.0.0) + +**Breaking changes:** + +- Use DataFusionError instead of ArrowError in SendableRecordBatchStream [\#5101](https://github.com/apache/arrow-datafusion/pull/5101) ([comphead](https://github.com/comphead)) +- Update to arrow 32 and Switch to RawDecoder for JSON [\#5056](https://github.com/apache/arrow-datafusion/pull/5056) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Allow `SessionContext::read_csv`, etc to read multiple files [\#4908](https://github.com/apache/arrow-datafusion/pull/4908) ([saikrishna1-bidgely](https://github.com/saikrishna1-bidgely)) + +**Implemented enhancements:** + +- Ignore Arrow in dependabot [\#5340](https://github.com/apache/arrow-datafusion/issues/5340) +- Provide access to internal fields of SessionContext [\#5317](https://github.com/apache/arrow-datafusion/issues/5317) +- Investigate performance drop for DISTINCT queries [\#5313](https://github.com/apache/arrow-datafusion/issues/5313) +- \[DOC\] Update math expression documentation [\#5312](https://github.com/apache/arrow-datafusion/issues/5312) +- Replace merge_batches with concat_batches [\#5297](https://github.com/apache/arrow-datafusion/issues/5297) +- Support for some of the window frame range queries [\#5275](https://github.com/apache/arrow-datafusion/issues/5275) +- Make `log` function to be in sync with PostgresSql [\#5259](https://github.com/apache/arrow-datafusion/issues/5259) +- \[SQLLogicTest\] Make schema validation ignore nullable and metadata attributes [\#5231](https://github.com/apache/arrow-datafusion/issues/5231) +- Add support for linear groups search [\#5213](https://github.com/apache/arrow-datafusion/issues/5213) +- Add SQL function overload `LOG(base, x)` for logarithm of x to base [\#5206](https://github.com/apache/arrow-datafusion/issues/5206) +- `all_schema()` will get schema of child of child of .... [\#5192](https://github.com/apache/arrow-datafusion/issues/5192) +- Enable parquet parallel scans by default [\#5125](https://github.com/apache/arrow-datafusion/issues/5125) +- Don't repartition ProjectionExec when it does not compute anything [\#4968](https://github.com/apache/arrow-datafusion/issues/4968) +- Support non-tuple expression for Exists Subquery to Join [\#4934](https://github.com/apache/arrow-datafusion/issues/4934) +- Read multiple files/folders using `read_csv` [\#4909](https://github.com/apache/arrow-datafusion/issues/4909) + +**Fixed bugs:** + +- Make inline_table_scan optimize whole plan during first optimization stage. [\#5364](https://github.com/apache/arrow-datafusion/issues/5364) +- tpcds_logical_q8 ambiguous name. [\#5334](https://github.com/apache/arrow-datafusion/issues/5334) +- Protobuf serialisation is missing for GetIndexedFieldExpr [\#5323](https://github.com/apache/arrow-datafusion/issues/5323) +- Indexing a nested list with 0 or an index larger than list size is not handled correctly [\#5310](https://github.com/apache/arrow-datafusion/issues/5310) +- Protobuf serialization drops `preserve_partitioning` from `SortExec` [\#5305](https://github.com/apache/arrow-datafusion/issues/5305) +- data file without suffix can't be read correctly [\#5301](https://github.com/apache/arrow-datafusion/issues/5301) +- Idk [\#5298](https://github.com/apache/arrow-datafusion/issues/5298) +- Error with query that has DISTINCT with ORDER BY and aliased select list [\#5293](https://github.com/apache/arrow-datafusion/issues/5293) +- Optimizer prunes UnnestExec on aggregate count [\#5281](https://github.com/apache/arrow-datafusion/issues/5281) +- Strange Behaviour on RepartitionExec with CoalescePartitionsExec. [\#5278](https://github.com/apache/arrow-datafusion/issues/5278) +- Error "For SELECT DISTINCT, ORDER BY expressions id must appear in select list" may be over eager [\#5255](https://github.com/apache/arrow-datafusion/issues/5255) +- SQL allows SORT BY keyword [\#5247](https://github.com/apache/arrow-datafusion/issues/5247) +- test `sort_on_window_null_string` failed after disable `skip_fail`. [\#5233](https://github.com/apache/arrow-datafusion/issues/5233) +- Dataframe API adds ?table? qualifier [\#5187](https://github.com/apache/arrow-datafusion/issues/5187) +- Re-ordering Projections in scan are not working anymore \(since DF15\) [\#5146](https://github.com/apache/arrow-datafusion/issues/5146) +- parquet page level skipping \(page index pruning\) doesn't work with evolved schemas [\#5104](https://github.com/apache/arrow-datafusion/issues/5104) +- Incorrect results on queries with `distinct` and orderby [\#5065](https://github.com/apache/arrow-datafusion/issues/5065) +- NestedLoopJoin will panic when right child contains RepartitionExec [\#5022](https://github.com/apache/arrow-datafusion/issues/5022) +- JSON projection only work when the index is in ascending order [\#4832](https://github.com/apache/arrow-datafusion/issues/4832) +- Stack overflows when planning tpcds 22 in debug mode [\#4786](https://github.com/apache/arrow-datafusion/issues/4786) +- Failed to create Left anti join physical plan due to SchemaError::FieldNotFound [\#4366](https://github.com/apache/arrow-datafusion/issues/4366) +- Filters/limit are not pushdown druing optimalization for table with alias [\#2270](https://github.com/apache/arrow-datafusion/issues/2270) + +**Documentation updates:** + +- Update README.md fix \[welcoming community\] links [\#5232](https://github.com/apache/arrow-datafusion/pull/5232) ([jiangzhx](https://github.com/jiangzhx)) +- Update README.md update blaze-rs link to https://github.com/blaze-init/blaze [\#5190](https://github.com/apache/arrow-datafusion/pull/5190) ([jiangzhx](https://github.com/jiangzhx)) +- Typo of greptimedb [\#5103](https://github.com/apache/arrow-datafusion/pull/5103) ([fengjiachun](https://github.com/fengjiachun)) +- chore: change `DataBend` to `Databend` [\#5096](https://github.com/apache/arrow-datafusion/pull/5096) ([xudong963](https://github.com/xudong963)) + +**Closed issues:** + +- Implement column number / column type verification for sqllogictest [\#4499](https://github.com/apache/arrow-datafusion/issues/4499) + +**Merged pull requests:** + +- generate new projection plan in inline_table_scan instead of discarding [\#5371](https://github.com/apache/arrow-datafusion/pull/5371) ([jackwener](https://github.com/jackwener)) +- minor: fix rule name and comment. [\#5370](https://github.com/apache/arrow-datafusion/pull/5370) ([jackwener](https://github.com/jackwener)) +- minor: port limit tests to sqllogictests [\#5355](https://github.com/apache/arrow-datafusion/pull/5355) ([jackwener](https://github.com/jackwener)) +- feat: add rule to merge projection. [\#5349](https://github.com/apache/arrow-datafusion/pull/5349) ([jackwener](https://github.com/jackwener)) +- Ignore Arrow in dependabot [\#5341](https://github.com/apache/arrow-datafusion/pull/5341) ([iajoiner](https://github.com/iajoiner)) +- minor: remove useless `.get()` [\#5336](https://github.com/apache/arrow-datafusion/pull/5336) ([jackwener](https://github.com/jackwener)) +- bugfix: fix tpcds_logical_q8 ambiguous name. [\#5335](https://github.com/apache/arrow-datafusion/pull/5335) ([jackwener](https://github.com/jackwener)) +- minor: disable tpcds_logical_q10/q35 [\#5333](https://github.com/apache/arrow-datafusion/pull/5333) ([jackwener](https://github.com/jackwener)) +- minor: port intersection sql tests to sqllogictests [\#5331](https://github.com/apache/arrow-datafusion/pull/5331) ([alamb](https://github.com/alamb)) +- minor: port more window tests to sqllogictests [\#5330](https://github.com/apache/arrow-datafusion/pull/5330) ([alamb](https://github.com/alamb)) +- MINOR: nicer error messages for cli, use display format rather than debug [\#5329](https://github.com/apache/arrow-datafusion/pull/5329) ([kmitchener](https://github.com/kmitchener)) +- Add missing protobuf serialisation functionality GetIndexedFieldExpr. [\#5324](https://github.com/apache/arrow-datafusion/pull/5324) ([ahmedriza](https://github.com/ahmedriza)) +- chore: small typo in the example README [\#5319](https://github.com/apache/arrow-datafusion/pull/5319) ([gianarb](https://github.com/gianarb)) +- feat: add accessor to SessionContext fields for ContextProvider impl [\#5318](https://github.com/apache/arrow-datafusion/pull/5318) ([sunng87](https://github.com/sunng87)) +- \[DOC\] Update math expression documentation [\#5316](https://github.com/apache/arrow-datafusion/pull/5316) ([comphead](https://github.com/comphead)) +- Fix nested list indexing when the index is 0 or larger than the list size [\#5311](https://github.com/apache/arrow-datafusion/pull/5311) ([ahmedriza](https://github.com/ahmedriza)) +- Fix SortExec bench case and Add SortExec input cases to bench for SortPreservingMergeExec [\#5308](https://github.com/apache/arrow-datafusion/pull/5308) ([jaylmiller](https://github.com/jaylmiller)) +- Allow DISTINCT with ORDER BY and an aliased select list [\#5307](https://github.com/apache/arrow-datafusion/pull/5307) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Serialize preserve_partitioning in SortExec [\#5306](https://github.com/apache/arrow-datafusion/pull/5306) ([thinkharderdev](https://github.com/thinkharderdev)) +- fix: correct plan builder when test `scalar_subquery_project_expr` [\#5304](https://github.com/apache/arrow-datafusion/pull/5304) ([jackwener](https://github.com/jackwener)) +- Make SQL query consistent with API syntax expression in code examples [\#5303](https://github.com/apache/arrow-datafusion/pull/5303) ([ongchi](https://github.com/ongchi)) +- enable tpcds-64 test [\#5302](https://github.com/apache/arrow-datafusion/pull/5302) ([jackwener](https://github.com/jackwener)) +- Feature/merge batches removal [\#5300](https://github.com/apache/arrow-datafusion/pull/5300) ([berkaysynnada](https://github.com/berkaysynnada)) +- fix: add yield point to `RepartitionExec` [\#5299](https://github.com/apache/arrow-datafusion/pull/5299) ([crepererum](https://github.com/crepererum)) +- `datafusion.optimizer.repartition_file_scans` enabled by default [\#5295](https://github.com/apache/arrow-datafusion/pull/5295) ([korowa](https://github.com/korowa)) +- minor: derive Ord/PartialOrd/Eq/PartialEq traits for `ObjectStoreUrl` [\#5288](https://github.com/apache/arrow-datafusion/pull/5288) ([crepererum](https://github.com/crepererum)) +- Fix the potential bug of check_all_column_from_schema [\#5287](https://github.com/apache/arrow-datafusion/pull/5287) ([ygf11](https://github.com/ygf11)) +- Linear search support for Window Group queries [\#5286](https://github.com/apache/arrow-datafusion/pull/5286) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mustafasrepo](https://github.com/mustafasrepo)) +- Prevent optimizer from pruning UnnestExec. [\#5282](https://github.com/apache/arrow-datafusion/pull/5282) ([vincev](https://github.com/vincev)) +- Minor: Add fetch to SortExec display [\#5279](https://github.com/apache/arrow-datafusion/pull/5279) ([thinkharderdev](https://github.com/thinkharderdev)) +- Set `catalog_list` from outside for `SessionState`. [\#5277](https://github.com/apache/arrow-datafusion/pull/5277) ([MichaelScofield](https://github.com/MichaelScofield)) +- Support page skipping / page_index pushdown for evolved schemas [\#5268](https://github.com/apache/arrow-datafusion/pull/5268) ([alamb](https://github.com/alamb)) +- Use upstream newline_delimited_stream [\#5267](https://github.com/apache/arrow-datafusion/pull/5267) ([tustvold](https://github.com/tustvold)) +- Support non-tuple expression for exists-subquery to join [\#5264](https://github.com/apache/arrow-datafusion/pull/5264) ([ygf11](https://github.com/ygf11)) +- minor: Fix cargo fmt [\#5263](https://github.com/apache/arrow-datafusion/pull/5263) ([alamb](https://github.com/alamb)) +- minor: replace `unwrap()` with `?` [\#5262](https://github.com/apache/arrow-datafusion/pull/5262) ([jackwener](https://github.com/jackwener)) +- Preserve `TableScan.projection` order in `push_down_projection` optimizer rule [\#5261](https://github.com/apache/arrow-datafusion/pull/5261) ([korowa](https://github.com/korowa)) +- Minor: refactor ParquetExec roundtrip tests [\#5260](https://github.com/apache/arrow-datafusion/pull/5260) ([alamb](https://github.com/alamb)) +- \[fix\]\[plan\] relax the check for distinct, order by for dataframe [\#5258](https://github.com/apache/arrow-datafusion/pull/5258) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xiaoyong-z](https://github.com/xiaoyong-z)) +- enhance the checking of type errors in the test `window_frame_creation` [\#5257](https://github.com/apache/arrow-datafusion/pull/5257) ([HaoYang670](https://github.com/HaoYang670)) +- SQL planning benchmarks for very wide tables [\#5256](https://github.com/apache/arrow-datafusion/pull/5256) ([alamb](https://github.com/alamb)) +- Minor: Add negative test for SORT BY [\#5254](https://github.com/apache/arrow-datafusion/pull/5254) ([alamb](https://github.com/alamb)) +- \[sqllogictest\] Define output types and check them in tests [\#5253](https://github.com/apache/arrow-datafusion/pull/5253) ([melgenek](https://github.com/melgenek)) +- Minor: port some explain test to sqllogictest, add filename normalization [\#5252](https://github.com/apache/arrow-datafusion/pull/5252) ([alamb](https://github.com/alamb)) +- Disallow SORT BY in SQL [\#5249](https://github.com/apache/arrow-datafusion/pull/5249) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- \[SQLLogicTest\] Make schema validation ignore nullable and metadata attributes [\#5246](https://github.com/apache/arrow-datafusion/pull/5246) ([comphead](https://github.com/comphead)) +- Add SQL function overload LOG\(base, x\) for logarithm of x to base [\#5245](https://github.com/apache/arrow-datafusion/pull/5245) ([comphead](https://github.com/comphead)) +- Update sqllogictest requirement from 0.11.1 to 0.12.0 \#5237 [\#5244](https://github.com/apache/arrow-datafusion/pull/5244) ([alamb](https://github.com/alamb)) +- Test case for NDJsonExec with randomly ordered projection [\#5243](https://github.com/apache/arrow-datafusion/pull/5243) ([korowa](https://github.com/korowa)) +- Update to arrow `33.0.0` [\#5241](https://github.com/apache/arrow-datafusion/pull/5241) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- DataFusion 18.0.0 Release [\#5240](https://github.com/apache/arrow-datafusion/pull/5240) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- fix clippy in nightly [\#5238](https://github.com/apache/arrow-datafusion/pull/5238) ([jackwener](https://github.com/jackwener)) +- refactor: correct the implementation of `all_schemas()` [\#5236](https://github.com/apache/arrow-datafusion/pull/5236) ([jackwener](https://github.com/jackwener)) +- bugfix: fix error when `get_coerced_window_frame` meet `utf8` [\#5234](https://github.com/apache/arrow-datafusion/pull/5234) ([jackwener](https://github.com/jackwener)) +- Feature/sort enforcement refactor [\#5228](https://github.com/apache/arrow-datafusion/pull/5228) ([mustafasrepo](https://github.com/mustafasrepo)) +- Minor: Fix doc links and typos [\#5225](https://github.com/apache/arrow-datafusion/pull/5225) ([Jefffrey](https://github.com/Jefffrey)) +- fix: correct expected error in test [\#5224](https://github.com/apache/arrow-datafusion/pull/5224) ([jackwener](https://github.com/jackwener)) +- bugfix: fix propagating empty_relation generates an illegal plan [\#5219](https://github.com/apache/arrow-datafusion/pull/5219) ([yukkit](https://github.com/yukkit)) +- Replace placeholders in ScalarSubqueries [\#5216](https://github.com/apache/arrow-datafusion/pull/5216) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Dataframe join_on method [\#5210](https://github.com/apache/arrow-datafusion/pull/5210) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- bugfix: fix eval `nullalbe()` in `simplify_exprs` [\#5208](https://github.com/apache/arrow-datafusion/pull/5208) ([jackwener](https://github.com/jackwener)) +- minor: remove unnecessary clone [\#5207](https://github.com/apache/arrow-datafusion/pull/5207) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- minor: extract `merge_schema()` function. [\#5203](https://github.com/apache/arrow-datafusion/pull/5203) ([jackwener](https://github.com/jackwener)) +- minor: remove unnecessary `continue` [\#5200](https://github.com/apache/arrow-datafusion/pull/5200) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Minor: Begin porting some window tests to sqllogictests [\#5199](https://github.com/apache/arrow-datafusion/pull/5199) ([alamb](https://github.com/alamb)) +- fix\(MemTable\): make it cancel-safe and fix parallelism [\#5197](https://github.com/apache/arrow-datafusion/pull/5197) ([DDtKey](https://github.com/DDtKey)) +- fix: make `write_csv/json/parquet` cancel-safe [\#5196](https://github.com/apache/arrow-datafusion/pull/5196) ([DDtKey](https://github.com/DDtKey)) +- Support arithmetic operation on DictionaryArray [\#5194](https://github.com/apache/arrow-datafusion/pull/5194) ([viirya](https://github.com/viirya)) +- sqllogicaltest: add cleanup and use rowsort. [\#5189](https://github.com/apache/arrow-datafusion/pull/5189) ([jackwener](https://github.com/jackwener)) +- bugfix: fix `TableScan` may contain fields not included in `schema` [\#5188](https://github.com/apache/arrow-datafusion/pull/5188) ([jackwener](https://github.com/jackwener)) +- Create disk manager spill folder if doesn't exist [\#5185](https://github.com/apache/arrow-datafusion/pull/5185) ([comphead](https://github.com/comphead)) +- Parse identifiers properly for TableReferences [\#5183](https://github.com/apache/arrow-datafusion/pull/5183) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- Fix decimal scalar dyn kernels [\#5179](https://github.com/apache/arrow-datafusion/pull/5179) ([viirya](https://github.com/viirya)) +- Patch git Safe Paths in CI [\#5177](https://github.com/apache/arrow-datafusion/pull/5177) ([tustvold](https://github.com/tustvold)) +- Add initial support for serializing physical plans with Substrait [\#5176](https://github.com/apache/arrow-datafusion/pull/5176) ([andygrove](https://github.com/andygrove)) +- Bump tokio from 1.24.1 to 1.24.2 in /datafusion-cli [\#5172](https://github.com/apache/arrow-datafusion/pull/5172) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Make EnforceSorting global sort aware, fix sort mis-optimizations involving unions, support parallel sort + merge transformations [\#5171](https://github.com/apache/arrow-datafusion/pull/5171) ([mustafasrepo](https://github.com/mustafasrepo)) +- Update substrait README.md [\#5168](https://github.com/apache/arrow-datafusion/pull/5168) ([jiangzhx](https://github.com/jiangzhx)) +- Switch to use sum kernel from arrow-rs for Decimal128 [\#5167](https://github.com/apache/arrow-datafusion/pull/5167) ([sunchao](https://github.com/sunchao)) +- FileStream: Open next file in parallel while decoding [\#5161](https://github.com/apache/arrow-datafusion/pull/5161) ([thinkharderdev](https://github.com/thinkharderdev)) +- Fix FairSpillPool try_grow for non-spillable consumers [\#5160](https://github.com/apache/arrow-datafusion/pull/5160) ([tustvold](https://github.com/tustvold)) +- fix: treat unsupported SQL plans as "not implemented" [\#5159](https://github.com/apache/arrow-datafusion/pull/5159) ([crepererum](https://github.com/crepererum)) +- Compare NULL types [\#5158](https://github.com/apache/arrow-datafusion/pull/5158) ([melgenek](https://github.com/melgenek)) +- Always wrapping OnceAsync for the inner table side in NestedLoopJoinExec [\#5156](https://github.com/apache/arrow-datafusion/pull/5156) ([ygf11](https://github.com/ygf11)) +- chore: add object_name_to_table_reference in SqlToRel [\#5155](https://github.com/apache/arrow-datafusion/pull/5155) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) +- Ambiguity check for where selection [\#5153](https://github.com/apache/arrow-datafusion/pull/5153) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) +- feat: Type coercion for Dictionary\(\_, \_\) to Utf8 for regex conditions [\#5152](https://github.com/apache/arrow-datafusion/pull/5152) ([stuartcarnie](https://github.com/stuartcarnie)) +- Support arithmetic scalar operation with DictionaryArray [\#5151](https://github.com/apache/arrow-datafusion/pull/5151) ([viirya](https://github.com/viirya)) +- \[sqllogictest\] Support `pg_typeof` [\#5148](https://github.com/apache/arrow-datafusion/pull/5148) ([melgenek](https://github.com/melgenek)) +- Date to Timestamp cast [\#5140](https://github.com/apache/arrow-datafusion/pull/5140) ([comphead](https://github.com/comphead)) +- add example for Flight SQL server that supports JDBC driver [\#5138](https://github.com/apache/arrow-datafusion/pull/5138) ([kmitchener](https://github.com/kmitchener)) +- Add in-list test [\#5135](https://github.com/apache/arrow-datafusion/pull/5135) ([nseekhao](https://github.com/nseekhao)) +- \[BugFix\] abort plan if order by column not in select list [\#5132](https://github.com/apache/arrow-datafusion/pull/5132) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Bug fix: Empty Record Batch handling [\#5131](https://github.com/apache/arrow-datafusion/pull/5131) ([mustafasrepo](https://github.com/mustafasrepo)) +- Add option to control whether to normalize ident [\#5124](https://github.com/apache/arrow-datafusion/pull/5124) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) +- Make `parse_physical_expr` public [\#5118](https://github.com/apache/arrow-datafusion/pull/5118) ([comphead](https://github.com/comphead)) +- Support coercing `utf8` to `interval` and `timestamp` \(including arguments to `date_bin`\) [\#5117](https://github.com/apache/arrow-datafusion/pull/5117) ([alamb](https://github.com/alamb)) +- Fix release issues [\#5116](https://github.com/apache/arrow-datafusion/pull/5116) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- minor: port date_bin tests to sqllogictests [\#5115](https://github.com/apache/arrow-datafusion/pull/5115) ([alamb](https://github.com/alamb)) +- Minor: reduce code duplication using `rewrite_expr` [\#5114](https://github.com/apache/arrow-datafusion/pull/5114) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Replace &Option\ with Option\<&T\> [\#5113](https://github.com/apache/arrow-datafusion/pull/5113) ([gaoxinge](https://github.com/gaoxinge)) +- Improve `get_meet_of_orderings` to check for common prefixes [\#5111](https://github.com/apache/arrow-datafusion/pull/5111) ([ozankabak](https://github.com/ozankabak)) +- \[sqllogictest\] Apply rowsort when there is no explicit order by [\#5110](https://github.com/apache/arrow-datafusion/pull/5110) ([melgenek](https://github.com/melgenek)) +- Add unnest_column to DataFrame [\#5106](https://github.com/apache/arrow-datafusion/pull/5106) ([vincev](https://github.com/vincev)) +- Minor: reduce indent level in page filter pruning code [\#5105](https://github.com/apache/arrow-datafusion/pull/5105) ([alamb](https://github.com/alamb)) +- Replace &Option\ with Option\<&T\> [\#5102](https://github.com/apache/arrow-datafusion/pull/5102) ([gaoxinge](https://github.com/gaoxinge)) +- Minor: remove unused methods in datafusion/optimizer/src/utils.rs [\#5098](https://github.com/apache/arrow-datafusion/pull/5098) ([ygf11](https://github.com/ygf11)) +- ci: don't trigger rust ci for doc changes [\#5097](https://github.com/apache/arrow-datafusion/pull/5097) ([xudong963](https://github.com/xudong963)) +- sqllogicaltest: fix unstable slt case. [\#5095](https://github.com/apache/arrow-datafusion/pull/5095) ([jackwener](https://github.com/jackwener)) +- chore: update cranelift-module [\#5094](https://github.com/apache/arrow-datafusion/pull/5094) ([jackwener](https://github.com/jackwener)) +- refactor: Add `rewrite_expr` convenience method for rewriting `Expr`s [\#5092](https://github.com/apache/arrow-datafusion/pull/5092) ([alamb](https://github.com/alamb)) +- Minor: extract sort col rewrite into its own module, add unit tests [\#5088](https://github.com/apache/arrow-datafusion/pull/5088) ([alamb](https://github.com/alamb)) +- \[sqllogictest\] Move `decimal.rs` tests [\#5086](https://github.com/apache/arrow-datafusion/pull/5086) ([melgenek](https://github.com/melgenek)) +- Insert target columns empty fix [\#5079](https://github.com/apache/arrow-datafusion/pull/5079) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) +- sqllogicaltest: move union.rs [\#5075](https://github.com/apache/arrow-datafusion/pull/5075) ([jackwener](https://github.com/jackwener)) +- \[Enhancement\] Don't repartition ProjectionExec when it does not compute anything [\#5074](https://github.com/apache/arrow-datafusion/pull/5074) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Support ORDER BY an aliased column [\#5067](https://github.com/apache/arrow-datafusion/pull/5067) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Parquet parallel scan [\#5057](https://github.com/apache/arrow-datafusion/pull/5057) ([korowa](https://github.com/korowa)) +- \[BugFix\] fix file stream time scanning metrics bug [\#5020](https://github.com/apache/arrow-datafusion/pull/5020) ([xiaoyong-z](https://github.com/xiaoyong-z)) +- Show optimization errors in explain [\#4819](https://github.com/apache/arrow-datafusion/pull/4819) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jefffrey](https://github.com/Jefffrey)) diff --git a/dev/changelog/20.0.0.md b/dev/changelog/20.0.0.md new file mode 100644 index 0000000000000..8f95d573801ce --- /dev/null +++ b/dev/changelog/20.0.0.md @@ -0,0 +1,159 @@ + + +## [20.0.0](https://github.com/apache/arrow-datafusion/tree/20.0.0) (2023-03-10) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/19.0.0...20.0.0 + +**Breaking changes:** + +- Minor: Move TableProviderFactories up out of `RuntimeEnv` and into `SessionState` [#5477](https://github.com/apache/arrow-datafusion/pull/5477) (alamb) +- chore: Remove references from SessionState from physical_plan [#5455](https://github.com/apache/arrow-datafusion/pull/5455) (alamb) +- Implement `Debug` for `ExecutionProps` and `VarProvider` [#5489](https://github.com/apache/arrow-datafusion/pull/5489) (alamb) + +**Implemented enhancements:** + +- Add UserDefinedLogicalNodeCore [#5521](https://github.com/apache/arrow-datafusion/pull/5521) (mslapek) +- feat: add `arrow_cast` function to support supports arbitrary arrow types [#5166](https://github.com/apache/arrow-datafusion/pull/5166) (alamb) +- feat: interval add timestamp [#5491](https://github.com/apache/arrow-datafusion/pull/5491) (Weijun-H) +- feat: `ParquetExec` predicate preservation [#5495](https://github.com/apache/arrow-datafusion/pull/5495) (crepererum) +- feat: add optimization rules for bitwise operations [#5423](https://github.com/apache/arrow-datafusion/pull/5423) (izveigor) +- feat: Support bitwise operations for unsigned integer types [#5476](https://github.com/apache/arrow-datafusion/pull/5476) (izveigor) +- feat: eliminate the duplicated sort keys in Order By clause [#5462](https://github.com/apache/arrow-datafusion/pull/5462) (jackwener) +- feat: add name() method to UserDefinedLogicalNode [#5450](https://github.com/apache/arrow-datafusion/pull/5450) (waynexia) +- feat: express unsigned literal in substrait [#5448](https://github.com/apache/arrow-datafusion/pull/5448) (waynexia) +- feat: `extensions_options` macro [#5442](https://github.com/apache/arrow-datafusion/pull/5442) (crepererum) +- [feat]:fast check has column [#5328](https://github.com/apache/arrow-datafusion/pull/5328) (suxiaogang223) +- feat: eliminate unnecessary projection. [#5366](https://github.com/apache/arrow-datafusion/pull/5366) (jackwener) + +**Fixed bugs:** + +- revert accidently deleted size code in count_distinct [#5533](https://github.com/apache/arrow-datafusion/pull/5533) (comphead) +- fix: return schema of ExtensionPlan instead of its children's [#5514](https://github.com/apache/arrow-datafusion/pull/5514) (waynexia) +- fix: logical merge conflict -- hash_join tests with passing boolean by value [#5531](https://github.com/apache/arrow-datafusion/pull/5531) (korowa) +- fix: build union schema with child has same column name but qualifier… [#5452](https://github.com/apache/arrow-datafusion/pull/5452) (yukkit) +- Fix is_distinct from for float NaN values [#5446](https://github.com/apache/arrow-datafusion/pull/5446) (comphead) +- Bug/union wrong casting [#5342](https://github.com/apache/arrow-datafusion/pull/5342) (berkaysynnada) +- fix nested loop join with literal join filter [#5431](https://github.com/apache/arrow-datafusion/pull/5431) (ygf11) +- Fix filter pushdown for extension plans [#5425](https://github.com/apache/arrow-datafusion/pull/5425) (thinkharderdev) +- Bug fix: Window frame range value outside the type range [#5384](https://github.com/apache/arrow-datafusion/pull/5384) (mustafasrepo) +- fix: misc phys. expression display bugs [#5387](https://github.com/apache/arrow-datafusion/pull/5387) (crepererum) + +**Documentation updates:** + +- Minor: Improve docs for UserDefinedLogicalNode `dyn_eq` and `dyn_hash` [#5515](https://github.com/apache/arrow-datafusion/pull/5515) (alamb) +- chore: add known project ZincObserve [#5376](https://github.com/apache/arrow-datafusion/pull/5376) (hengfeiyang) +- docs: clarify spark [#5391](https://github.com/apache/arrow-datafusion/pull/5391) (hyoklee) + +**Merged pull requests:** + +- Manual changelog for 20.0.0 [#5551](https://github.com/apache/arrow-datafusion/pull/5551) (andygrove) +- Prepare for 20.0.0 release [Part 1] [#5539](https://github.com/apache/arrow-datafusion/pull/5539) (andygrove) +- chore: deduplicate workspace fields in Cargo.toml [#5519](https://github.com/apache/arrow-datafusion/pull/5519) (waynexia) +- Add necessary features to optimizer [#5540](https://github.com/apache/arrow-datafusion/pull/5540) (viirya) +- Minor: add the concise way for matching numerics [#5537](https://github.com/apache/arrow-datafusion/pull/5537) (izveigor) +- Add UserDefinedLogicalNodeCore [#5521](https://github.com/apache/arrow-datafusion/pull/5521) (mslapek) +- revert accidently deleted size code in count_distinct [#5533](https://github.com/apache/arrow-datafusion/pull/5533) (comphead) +- fix: return schema of ExtensionPlan instead of its children's [#5514](https://github.com/apache/arrow-datafusion/pull/5514) (waynexia) +- Minor: Move `ObjectStoreRegistry` to datafusion_execution crate [#5478](https://github.com/apache/arrow-datafusion/pull/5478) (alamb) +- Minor: Add db-benchmark URL to db-benchmark readme [#5503](https://github.com/apache/arrow-datafusion/pull/5503) (alamb) +- minor: fix clippy problem in new version. [#5532](https://github.com/apache/arrow-datafusion/pull/5532) (jackwener) +- fix: logical merge conflict -- hash_join tests with passing boolean by value [#5531](https://github.com/apache/arrow-datafusion/pull/5531) (korowa) +- Memory limited hash join [#5490](https://github.com/apache/arrow-datafusion/pull/5490) (korowa) +- minor: improve error style [#5510](https://github.com/apache/arrow-datafusion/pull/5510) (alamb) +- feat: add `arrow_cast` function to support supports arbitrary arrow types [#5166](https://github.com/apache/arrow-datafusion/pull/5166) (alamb) +- build(deps): update sqlparser requirement from 0.30 to 0.32 w/ API update [#5457](https://github.com/apache/arrow-datafusion/pull/5457) (alamb) +- Allow setting config extensions for TaskContext [#5497](https://github.com/apache/arrow-datafusion/pull/5497) (mpurins-coralogix) +- Minor: Improve docs for UserDefinedLogicalNode `dyn_eq` and `dyn_hash` [#5515](https://github.com/apache/arrow-datafusion/pull/5515) (alamb) +- feat: interval add timestamp [#5491](https://github.com/apache/arrow-datafusion/pull/5491) (Weijun-H) +- Pass booleans by value instead of by reference [#5487](https://github.com/apache/arrow-datafusion/pull/5487) (maxburke) +- Minor: Move TableProviderFactories up out of `RuntimeEnv` and into `SessionState` [#5477](https://github.com/apache/arrow-datafusion/pull/5477) (alamb) +- feat: `ParquetExec` predicate preservation [#5495](https://github.com/apache/arrow-datafusion/pull/5495) (crepererum) +- feat: add optimization rules for bitwise operations [#5423](https://github.com/apache/arrow-datafusion/pull/5423) (izveigor) +- chore: Remove references from SessionState from physical_plan [#5455](https://github.com/apache/arrow-datafusion/pull/5455) (alamb) +- Implement `Debug` for `ExecutionProps` and `VarProvider` [#5489](https://github.com/apache/arrow-datafusion/pull/5489) (alamb) +- feat: Support bitwise operations for unsigned integer types [#5476](https://github.com/apache/arrow-datafusion/pull/5476) (izveigor) +- Apply workaround for #5444 to `DataFrame::describe` [#5468](https://github.com/apache/arrow-datafusion/pull/5468) (jiangzhx) +- feat: eliminate the duplicated sort keys in Order By clause [#5462](https://github.com/apache/arrow-datafusion/pull/5462) (jackwener) +- Propagate timezone to created arrays [#5481](https://github.com/apache/arrow-datafusion/pull/5481) (maxburke) +- refactor: make GeometricMean not to have update and merge [#5469](https://github.com/apache/arrow-datafusion/pull/5469) (Weijun-H) +- feat: add name() method to UserDefinedLogicalNode [#5450](https://github.com/apache/arrow-datafusion/pull/5450) (waynexia) +- Comment out description text in issue templates [#5482](https://github.com/apache/arrow-datafusion/pull/5482) (Jefffrey) +- feat: express unsigned literal in substrait [#5448](https://github.com/apache/arrow-datafusion/pull/5448) (waynexia) +- fix: build union schema with child has same column name but qualifier… [#5452](https://github.com/apache/arrow-datafusion/pull/5452) (yukkit) +- refactor: make sum_distinct not to have update and merge [#5474](https://github.com/apache/arrow-datafusion/pull/5474) (Weijun-H) +- `compute_decimal_op_dyn_scalar` should not cast lhs array to decimal array [#5465](https://github.com/apache/arrow-datafusion/pull/5465) (viirya) +- feat: `extensions_options` macro [#5442](https://github.com/apache/arrow-datafusion/pull/5442) (crepererum) +- Enable hash joins on FixedSizeBinary columns [#5461](https://github.com/apache/arrow-datafusion/pull/5461) (maxburke) +- Fix is_distinct from for float NaN values [#5446](https://github.com/apache/arrow-datafusion/pull/5446) (comphead) +- Implement/fix Eq and Hash for Expr and LogicalPlan [#5421](https://github.com/apache/arrow-datafusion/pull/5421) (mslapek) +- [feat]:fast check has column [#5328](https://github.com/apache/arrow-datafusion/pull/5328) (suxiaogang223) +- Parquet sorting benchmark [#5433](https://github.com/apache/arrow-datafusion/pull/5433) (jaylmiller) +- refactor count_distinct to not to have update and merge [#5408](https://github.com/apache/arrow-datafusion/pull/5408) (Weijun-H) +- build(deps): update zstd requirement from 0.11 to 0.12 [#5458](https://github.com/apache/arrow-datafusion/pull/5458) (alamb) +- Upgrade bytes to 1.4 [#5460](https://github.com/apache/arrow-datafusion/pull/5460) (viirya) +- add std,median result to describe method [#5445](https://github.com/apache/arrow-datafusion/pull/5445) (jiangzhx) +- minor: Port more window tests to sqlogictests [#5434](https://github.com/apache/arrow-datafusion/pull/5434) (alamb) +- Use compute_op_dyn_scalar for datatime [#5315](https://github.com/apache/arrow-datafusion/pull/5315) (viirya) +- add a unit test that cover cast bug. [#5443](https://github.com/apache/arrow-datafusion/pull/5443) (jackwener) +- create new `datafusion-execution` crate, start splitting code out [#5432](https://github.com/apache/arrow-datafusion/pull/5432) (alamb) +- minor: fix clippy in nightly. [#5440](https://github.com/apache/arrow-datafusion/pull/5440) (jackwener) +- Support for Sliding Windows Joins with Symmetric Hash Join (SHJ) [#5322](https://github.com/apache/arrow-datafusion/pull/5322) (metesynnada) +- refactor: ParquetExec logical expr. => phys. expr. [#5419](https://github.com/apache/arrow-datafusion/pull/5419) (crepererum) +- Update README.md fix [DataFusion] links [#5438](https://github.com/apache/arrow-datafusion/pull/5438) (jiangzhx) +- add mean result for describe method [#5435](https://github.com/apache/arrow-datafusion/pull/5435) (jiangzhx) +- add expr_fn::median [#5437](https://github.com/apache/arrow-datafusion/pull/5437) (jiangzhx) +- Bug/union wrong casting [#5342](https://github.com/apache/arrow-datafusion/pull/5342) (berkaysynnada) +- reimplement `push_down_projection` and `prune_column`. [#4465](https://github.com/apache/arrow-datafusion/pull/4465) (jackwener) +- Add `expr_fn::stddev` [#5409](https://github.com/apache/arrow-datafusion/pull/5409) (jiangzhx) +- fix nested loop join with literal join filter [#5431](https://github.com/apache/arrow-datafusion/pull/5431) (ygf11) +- add a describe method on DataFrame like Polars [#5226](https://github.com/apache/arrow-datafusion/pull/5226) (jiangzhx) +- Memory reservation & metrics for cross join [#5339](https://github.com/apache/arrow-datafusion/pull/5339) (korowa) +- Optimize count_distinct.size [#5377](https://github.com/apache/arrow-datafusion/pull/5377) (comphead) +- Fix filter pushdown for extension plans [#5425](https://github.com/apache/arrow-datafusion/pull/5425) (thinkharderdev) +- Also push down all filters in TableProvider [#5420](https://github.com/apache/arrow-datafusion/pull/5420) (avantgardnerio) +- Update arrow 34 [#5375](https://github.com/apache/arrow-datafusion/pull/5375) (tustvold) +- Parquet limit pushdown (#5404) [#5416](https://github.com/apache/arrow-datafusion/pull/5416) (tustvold) +- Move file format config.rs to live with the rest of the datasource code [#5406](https://github.com/apache/arrow-datafusion/pull/5406) (alamb) +- Support Zstd compressed files [#5397](https://github.com/apache/arrow-datafusion/pull/5397) (dennybritz) +- Add example of catalog API usage (#5291) [#5326](https://github.com/apache/arrow-datafusion/pull/5326) (jaylmiller) +- Add support for protobuf serialisation of Arrow Map type [#5359](https://github.com/apache/arrow-datafusion/pull/5359) (ahmedriza) +- minor: port window tests to slt (part 2) [#5399](https://github.com/apache/arrow-datafusion/pull/5399) (alamb) +- fix(docs): fix typos [#5403](https://github.com/apache/arrow-datafusion/pull/5403) (WenyXu) +- Try to push down full filter before break-up [#5367](https://github.com/apache/arrow-datafusion/pull/5367) (avantgardnerio) +- enhance: remove more projection. [#5402](https://github.com/apache/arrow-datafusion/pull/5402) (jackwener) +- refactor `push_down_filter` to fix dead-loop and use optimizer_recurse. [#5337](https://github.com/apache/arrow-datafusion/pull/5337) (jackwener) +- feat: eliminate unnecessary projection. [#5366](https://github.com/apache/arrow-datafusion/pull/5366) (jackwener) +- minor: add forgotten large_utf8 [#5393](https://github.com/apache/arrow-datafusion/pull/5393) (jackwener) +- Minor: add tests for subquery to join [#5363](https://github.com/apache/arrow-datafusion/pull/5363) (ygf11) +- bugfix: fix master `bors` problem. [#5395](https://github.com/apache/arrow-datafusion/pull/5395) (jackwener) +- Rule ReplaceDistinctWithAggregate [#5354](https://github.com/apache/arrow-datafusion/pull/5354) (mingmwang) +- chore: add known project ZincObserve [#5376](https://github.com/apache/arrow-datafusion/pull/5376) (hengfeiyang) +- refactor: parquet pruning simplifications [#5386](https://github.com/apache/arrow-datafusion/pull/5386) (crepererum) +- Minor: intersect expressions optimization [#5388](https://github.com/apache/arrow-datafusion/pull/5388) (izveigor) +- docs: clarify spark [#5391](https://github.com/apache/arrow-datafusion/pull/5391) (hyoklee) +- UDF zero params #5378 [#5380](https://github.com/apache/arrow-datafusion/pull/5380) (jaylmiller) +- Minor: added some tests for coercion type [#5389](https://github.com/apache/arrow-datafusion/pull/5389) (izveigor) +- minor: make table resolution an independent function ... [#5373](https://github.com/apache/arrow-datafusion/pull/5373) (MichaelScofield) +- minor: port predicates tests to sqllogictests [#5374](https://github.com/apache/arrow-datafusion/pull/5374) (jackwener) +- Bug fix: Window frame range value outside the type range [#5384](https://github.com/apache/arrow-datafusion/pull/5384) (mustafasrepo) +- Fixed small typos in files of the optimizer [#5356](https://github.com/apache/arrow-datafusion/pull/5356) (izveigor) +- fix: misc phys. expression display bugs [#5387](https://github.com/apache/arrow-datafusion/pull/5387) (crepererum) +- Prepare for 19.0.0 release [#5381](https://github.com/apache/arrow-datafusion/pull/5381) (andygrove) +- minor: disable tpcds-q41 due to not support decorrelate disjunction subquery [#5369](https://github.com/apache/arrow-datafusion/pull/5369) (jackwener) diff --git a/dev/changelog/21.0.0.md b/dev/changelog/21.0.0.md new file mode 100644 index 0000000000000..98d93b9597a33 --- /dev/null +++ b/dev/changelog/21.0.0.md @@ -0,0 +1,135 @@ + + +## [21.0.0](https://github.com/apache/arrow-datafusion/tree/21.0.0) (2023-03-24) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/20.0.0...21.0.0) + +**Breaking changes:** + +- Support arbitrary user defined partition column in `ListingTable` (rather than assuming they are always Dictionary encoded) [#5545](https://github.com/apache/arrow-datafusion/pull/5545) (crepererum) +- Use TableReference for TableScan [#5615](https://github.com/apache/arrow-datafusion/pull/5615) (alamb) +- Update the type of `param_values` to `&[ScalarValue]` in function `replace_params_with_values` [#5640](https://github.com/apache/arrow-datafusion/pull/5640) (HaoYang670) + +**Implemented enhancements:** + +- feat: extract (epoch from col) [#5555](https://github.com/apache/arrow-datafusion/pull/5555) (Weijun-H) +- INSERT INTO support for MemTable [#5520](https://github.com/apache/arrow-datafusion/pull/5520) (metesynnada) +- Memory limited nested-loop join [#5564](https://github.com/apache/arrow-datafusion/pull/5564) (korowa) +- Timestamp subtraction and interval operations for `ScalarValue` [#5603](https://github.com/apache/arrow-datafusion/pull/5603) (berkaysynnada) +- Substrait: Add cast expression with bool, integers and decimal128 support [#5137](https://github.com/apache/arrow-datafusion/pull/5137) (nseekhao) +- Support `date_bin` with 2 arguments [#5643](https://github.com/apache/arrow-datafusion/pull/5643) (Weijun-H) +- improve: support combining multiple grouping expressions [#5559](https://github.com/apache/arrow-datafusion/pull/5559) (yukkit) +- Substrait: Add support for WindowFunction [#5653](https://github.com/apache/arrow-datafusion/pull/5653) (nseekhao) +- feat: `date_bin` supports MonthDayNano, microsecond and nanosecond units [#5698](https://github.com/apache/arrow-datafusion/pull/5698) (stuartcarnie) +- Handle serialization of TryCast [#5692](https://github.com/apache/arrow-datafusion/pull/5692) (thinkharderdev) + +**Fixed bugs:** + +- fix: failed to execute sql with subquery [#5542](https://github.com/apache/arrow-datafusion/pull/5542) (MichaelScofield) +- fix: cast literal to timestamp [#5517](https://github.com/apache/arrow-datafusion/pull/5517) (Weijun-H) +- fix dataframe only boolean/binary column got error on describe [#5585](https://github.com/apache/arrow-datafusion/pull/5585) (jiangzhx) +- Median returns null on empty input instead of error [#5624](https://github.com/apache/arrow-datafusion/pull/5624) (toppyy) +- add CountWildcardRule to fix error on Count(Expr:Wildcard) with DataFrame API [#5627](https://github.com/apache/arrow-datafusion/pull/5627) (jiangzhx) +- fix: correct CountWildcardRule and move analyzer into a new directory. [#5671](https://github.com/apache/arrow-datafusion/pull/5671) (jackwener) + +**Documentation updates:** + +- Minor: improve docstrings for `ObjectStoreRegistry` and `ObjectStoreProvider` [#5577](https://github.com/apache/arrow-datafusion/pull/5577) (alamb) +- Clarify differences of DataFusion with other systems in README.md [#5578](https://github.com/apache/arrow-datafusion/pull/5578) (alamb) +- Minor: Document docs build process [#5687](https://github.com/apache/arrow-datafusion/pull/5687) (alamb) + +**Merged pull requests:** + +- Refactor DecorrelateWhereExists and add back Distinct if needs [#5345](https://github.com/apache/arrow-datafusion/pull/5345) (ygf11) +- Simplify simplify test cases, support `^`, `&`, `|`, `<<` and `>>` operators for building exprs [#5511](https://github.com/apache/arrow-datafusion/pull/5511) (alamb) +- minor: improve sqllogictest docs [#5553](https://github.com/apache/arrow-datafusion/pull/5553) (alamb) +- Remove unused dependencies found by cargo-machete [#5552](https://github.com/apache/arrow-datafusion/pull/5552) (Jefffrey) +- make AggregateStatistics return the same result whether optimizer disabled or enabled [#5485](https://github.com/apache/arrow-datafusion/pull/5485) (jiangzhx) +- Avoid circular(ish) dependency parquet-test-utils on datafusion, try 2 [#5536](https://github.com/apache/arrow-datafusion/pull/5536) (alamb) +- Enforce ambiguity check whilst normalizing columns [#5509](https://github.com/apache/arrow-datafusion/pull/5509) (Jefffrey) +- Generated changelog for 20.0.0 [#5563](https://github.com/apache/arrow-datafusion/pull/5563) (andygrove) +- fix: failed to execute sql with subquery [#5542](https://github.com/apache/arrow-datafusion/pull/5542) (MichaelScofield) +- Revert describe count() workaround [#5556](https://github.com/apache/arrow-datafusion/pull/5556) (Jefffrey) +- fix: cast literal to timestamp [#5517](https://github.com/apache/arrow-datafusion/pull/5517) (Weijun-H) +- feat: extract (epoch from col) [#5555](https://github.com/apache/arrow-datafusion/pull/5555) (Weijun-H) +- Minor: improve docstrings for `ObjectStoreRegistry` and `ObjectStoreProvider` [#5577](https://github.com/apache/arrow-datafusion/pull/5577) (alamb) +- Minor: Move RuntimeEnv to `datafusion_execution` [#5580](https://github.com/apache/arrow-datafusion/pull/5580) (alamb) +- INSERT INTO support for MemTable [#5520](https://github.com/apache/arrow-datafusion/pull/5520) (metesynnada) +- Minor: restore explicit match to help avoid subtle bugs in the future when new `Expr` variants are added [#5579](https://github.com/apache/arrow-datafusion/pull/5579) (alamb) +- refactor: add more error info when array is empty [#5560](https://github.com/apache/arrow-datafusion/pull/5560) (Weijun-H) +- Memory limited nested-loop join [#5564](https://github.com/apache/arrow-datafusion/pull/5564) (korowa) +- Support catalog.schema.table.column in SQL SELECT and WHERE [#5343](https://github.com/apache/arrow-datafusion/pull/5343) (Jefffrey) +- Minor: clean up aggregates.slt tests [#5599](https://github.com/apache/arrow-datafusion/pull/5599) (alamb) +- Minor: Port more aggregate tests to sqllogictests [#5574](https://github.com/apache/arrow-datafusion/pull/5574) (alamb) +- Add a utility function to get all of the PartitionedFile for an ExecutionPlan [#5572](https://github.com/apache/arrow-datafusion/pull/5572) (yahoNanJing) +- minor: port some join tests to sqllogictests [#5567](https://github.com/apache/arrow-datafusion/pull/5567) (ygf11) +- Support arbitrary user defined partition column in `ListingTable` (rather than assuming they are always Dictionary encoded) [#5545](https://github.com/apache/arrow-datafusion/pull/5545) (crepererum) +- feat: add the similar optimization function for bitwise negative [#5516](https://github.com/apache/arrow-datafusion/pull/5516) (izveigor) +- Clarify differences of DataFusion with other systems in README.md [#5578](https://github.com/apache/arrow-datafusion/pull/5578) (alamb) +- Minor: Add more documentation about table_partition_columns [#5576](https://github.com/apache/arrow-datafusion/pull/5576) (alamb) +- Add Analyzer phase to DataFusion , add basic validation logic to Subquery Plans and Expressions [#5570](https://github.com/apache/arrow-datafusion/pull/5570) (mingmwang) +- Use TableReference for TableScan [#5615](https://github.com/apache/arrow-datafusion/pull/5615) (alamb) +- Preserve casts in rewrite_sort_cols_by_aggs [#5611](https://github.com/apache/arrow-datafusion/pull/5611) (mpurins-coralogix) +- Miscellaneous ArrayData Cleanup [#5612](https://github.com/apache/arrow-datafusion/pull/5612) (tustvold) +- Update substrait requirement from 0.4 to 0.5 [#5620](https://github.com/apache/arrow-datafusion/pull/5620) (dependabot[bot]) +- Do not break pipeline for window queries with GROUPS [#5587](https://github.com/apache/arrow-datafusion/pull/5587) (mustafasrepo) +- fix dataframe only boolean/binary column got error on describe [#5585](https://github.com/apache/arrow-datafusion/pull/5585) (jiangzhx) +- Minor: Add Documentation and Examples to `TableReference` [#5616](https://github.com/apache/arrow-datafusion/pull/5616) (alamb) +- [FOLLOWUP] eliminate the duplicated sort keys in Order By clause [#5607](https://github.com/apache/arrow-datafusion/pull/5607) (mingmwang) +- Update default behaviour of compression algorithms (support multistreams) [#5629](https://github.com/apache/arrow-datafusion/pull/5629) (metesynnada) +- Timestamp subtraction and interval operations for `ScalarValue` [#5603](https://github.com/apache/arrow-datafusion/pull/5603) (berkaysynnada) +- Use modulus dyn kernels for arithmetic expressions [#5634](https://github.com/apache/arrow-datafusion/pull/5634) (viirya) +- Minor: reduce cloning in `infer_placeholder_types` [#5638](https://github.com/apache/arrow-datafusion/pull/5638) (alamb) +- Move `SessionConfig` to `datafusion_execution` [#5581](https://github.com/apache/arrow-datafusion/pull/5581) (alamb) +- Update the type of `param_values` to `&[ScalarValue]` in function `replace_params_with_values` [#5640](https://github.com/apache/arrow-datafusion/pull/5640) (HaoYang670) +- WITH ORDER support on CREATE EXTERNAL TABLE [#5618](https://github.com/apache/arrow-datafusion/pull/5618) (metesynnada) +- Median returns null on empty input instead of error [#5624](https://github.com/apache/arrow-datafusion/pull/5624) (toppyy) +- feat: Memory limited merge join [#5632](https://github.com/apache/arrow-datafusion/pull/5632) (korowa) +- Update rstest requirement from 0.16.0 to 0.17.0 [#5648](https://github.com/apache/arrow-datafusion/pull/5648) (dependabot[bot]) +- add CountWildcardRule to fix error on Count(Expr:Wildcard) with DataFrame API [#5627](https://github.com/apache/arrow-datafusion/pull/5627) (jiangzhx) +- Add OuterReferenceColumn to Expr to represent correlated expression [#5593](https://github.com/apache/arrow-datafusion/pull/5593) (mingmwang) +- Minor: Simplify `Result` [#5659](https://github.com/apache/arrow-datafusion/pull/5659) (comphead) +- minor: remove redundant `DataFusionError` and fix `clippy` [#5669](https://github.com/apache/arrow-datafusion/pull/5669) (jackwener) +- Substrait: Add cast expression with bool, integers and decimal128 support [#5137](https://github.com/apache/arrow-datafusion/pull/5137) (nseekhao) +- Support `date_bin` with 2 arguments [#5643](https://github.com/apache/arrow-datafusion/pull/5643) (Weijun-H) +- Add LogicalPlanSignature and use in the optimizer loop [#5623](https://github.com/apache/arrow-datafusion/pull/5623) (mslapek) +- fix: correct CountWildcardRule and move analyzer into a new directory. [#5671](https://github.com/apache/arrow-datafusion/pull/5671) (jackwener) +- refactoring: added tests and fixed comments in "math_expressions" [#5656](https://github.com/apache/arrow-datafusion/pull/5656) (izveigor) +- improve: support combining multiple grouping expressions [#5559](https://github.com/apache/arrow-datafusion/pull/5559) (yukkit) +- community: polish issue template [#5668](https://github.com/apache/arrow-datafusion/pull/5668) (jackwener) +- minor: correct issue template [#5679](https://github.com/apache/arrow-datafusion/pull/5679) (jackwener) +- Change ObjectStoreRegistry from struct to trait to provide polymorphism [#5543](https://github.com/apache/arrow-datafusion/pull/5543) (yahoNanJing) +- Minor: Add `Extensions::new()` [#5676](https://github.com/apache/arrow-datafusion/pull/5676) (alamb) +- minor: add with_plan for Subquery [#5680](https://github.com/apache/arrow-datafusion/pull/5680) (jackwener) +- minor: reduce replication in `date_bin` implementation [#5673](https://github.com/apache/arrow-datafusion/pull/5673) (alamb) +- Fixes #5500 - Add a GitHub Actions workflow that builds the docs [#5670](https://github.com/apache/arrow-datafusion/pull/5670) (martin-g) +- Minor: port some content to the docs [#5684](https://github.com/apache/arrow-datafusion/pull/5684) (alamb) +- Docs: Add logo back to sidebar [#5688](https://github.com/apache/arrow-datafusion/pull/5688) (alamb) +- Substrait: Add support for WindowFunction [#5653](https://github.com/apache/arrow-datafusion/pull/5653) (nseekhao) +- Add -o option to all e2e benches [#5658](https://github.com/apache/arrow-datafusion/pull/5658) (jaylmiller) +- create table default to null [#5606](https://github.com/apache/arrow-datafusion/pull/5606) (Weijun-H) +- Minor: Document docs build process [#5687](https://github.com/apache/arrow-datafusion/pull/5687) (alamb) +- Minor: change doc formatting to force a republish [#5702](https://github.com/apache/arrow-datafusion/pull/5702) (alamb) +- Move `TaskContext` to datafusion-execution [#5677](https://github.com/apache/arrow-datafusion/pull/5677) (alamb) +- feat: `date_bin` supports MonthDayNano, microsecond and nanosecond units [#5698](https://github.com/apache/arrow-datafusion/pull/5698) (stuartcarnie) +- Return plan error when adding utf8 and timestamp [#5696](https://github.com/apache/arrow-datafusion/pull/5696) (Weijun-H) +- Handle serialization of TryCast [#5692](https://github.com/apache/arrow-datafusion/pull/5692) (thinkharderdev) +- analyzer: move InlineTableScan into Analyzer. [#5683](https://github.com/apache/arrow-datafusion/pull/5683) (jackwener) +- minor: Add doc comments to clarify what Analyzer is for [#5705](https://github.com/apache/arrow-datafusion/pull/5705) (alamb) diff --git a/dev/changelog/21.1.0.md b/dev/changelog/21.1.0.md new file mode 100644 index 0000000000000..6cd79c93b3f45 --- /dev/null +++ b/dev/changelog/21.1.0.md @@ -0,0 +1,26 @@ + + +## [21.1.0](https://github.com/apache/arrow-datafusion/tree/21.1.0) (2023-03-24) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/21.0.0...21.1.0) + +**Fixed bugs:** + +- fix: Use consistent arrow version (do not use both `arrow 34.0.0` and `arrow-array 35.0.0`) [#5765](https://github.com/apache/arrow-datafusion/pull/5765) diff --git a/dev/changelog/22.0.0.md b/dev/changelog/22.0.0.md new file mode 100644 index 0000000000000..7e02fb5d5ab33 --- /dev/null +++ b/dev/changelog/22.0.0.md @@ -0,0 +1,141 @@ + + +## [22.0.0](https://github.com/apache/arrow-datafusion/tree/22.0.0) (2023-04-07) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/21.1.0...22.0.0) + +**Breaking changes:** + +- Introduce a common trait TreeNode for ExecutionPlan, PhysicalExpr, LogicalExpr, LogicalPlan [#5630](https://github.com/apache/arrow-datafusion/pull/5630) (yahoNanJing) +- Minor: Reduce clones in AnalyzerRule [#5728](https://github.com/apache/arrow-datafusion/pull/5728) (alamb) +- Change required input ordering physical plan API to allow any NULLS FIRST / LAST and ASC / DESC [#5772](https://github.com/apache/arrow-datafusion/pull/5772) (mustafasrepo) +- Remove batch_idx from SortKeyCursor [#5855](https://github.com/apache/arrow-datafusion/pull/5855) (tustvold) +- Top down `EnforceSorting`, Extended testbench for `EnforceSorting` rule to prepare for refactors, additional functionality such as pushdowns over unions [#5661](https://github.com/apache/arrow-datafusion/pull/5661) (mustafasrepo) +- Move `TransactionStart`/`TransactionEnd`/`SetVariable` into `LogicalPlan::Statement` [#5842](https://github.com/apache/arrow-datafusion/pull/5842) (alamb) + +**Implemented enhancements:** + +- feat: Simplify LOG and POWER functions [#5816](https://github.com/apache/arrow-datafusion/pull/5816) (izveigor) +- feat: Add expression rewrite rules for LIKE and ILIKE [#5819](https://github.com/apache/arrow-datafusion/pull/5819) (Weijun-H) +- feat: BuiltinScalarFunction::Cbrt [#5839](https://github.com/apache/arrow-datafusion/pull/5839) (izveigor) +- feat: Quote column names if required in error messages [#5778](https://github.com/apache/arrow-datafusion/pull/5778) (alamb) + +**Fixed bugs:** + +- Fix parquet pruning when column names have periods [#5710](https://github.com/apache/arrow-datafusion/pull/5710) (alamb) +- fix: parse table name into TableReference on converting substrait read [#5716](https://github.com/apache/arrow-datafusion/pull/5716) (waynexia) +- fix: Enhance case expression type coercion [#5820](https://github.com/apache/arrow-datafusion/pull/5820) (Jefffrey) +- fix: type_coercion support BinaryExpr ( interval , timestamp ). [#5845](https://github.com/apache/arrow-datafusion/pull/5845) (jackwener) +- fix: coerce type for InSubquery and fix timestamp minus timestamp. [#5853](https://github.com/apache/arrow-datafusion/pull/5853) (jackwener) + +**Documentation updates:** + +- chore: update sql function documentation [#5780](https://github.com/apache/arrow-datafusion/pull/5780) (sanderson) +- Minor: fix docs build [#5795](https://github.com/apache/arrow-datafusion/pull/5795) (alamb) +- Move content from README.md to docs site [#5824](https://github.com/apache/arrow-datafusion/pull/5824) (alamb) +- Update docs/source/contributor-guide/index.md [#5872](https://github.com/apache/arrow-datafusion/pull/5872) (2010YOUY01) + +**Merged pull requests:** + +- Fix parquet pruning when column names have periods [#5710](https://github.com/apache/arrow-datafusion/pull/5710) (alamb) +- Executing LocalLimitExec with no column should not return an Err [#5709](https://github.com/apache/arrow-datafusion/pull/5709) (kazuyukitanimura) +- Minor: Comments to .asf.yaml [#5703](https://github.com/apache/arrow-datafusion/pull/5703) (alamb) +- Exclude some .github files from rat license check [#5720](https://github.com/apache/arrow-datafusion/pull/5720) (andygrove) +- Minor: Trigger docs CI build on changes to asf.yaml [#5726](https://github.com/apache/arrow-datafusion/pull/5726) (alamb) +- Use consistent arrow version (do not use both arrrow 34 and arrow-array 35) [#5724](https://github.com/apache/arrow-datafusion/pull/5724) (tustvold) +- LIMIT edge cases [#5723](https://github.com/apache/arrow-datafusion/pull/5723) (comphead) +- Put the file "type_coercion" in the same named fold and rename the file "mod.rs" [#5736](https://github.com/apache/arrow-datafusion/pull/5736) (HaoYang670) +- fix: parse table name into TableReference on converting substrait read [#5716](https://github.com/apache/arrow-datafusion/pull/5716) (waynexia) +- Modify tests for TPCH explain plans to avoid regressions [#5741](https://github.com/apache/arrow-datafusion/pull/5741) (jiangyinzuo) +- Minor: port select tests to sqllogictests [#5740](https://github.com/apache/arrow-datafusion/pull/5740) (alamb) +- Introduce a common trait TreeNode for ExecutionPlan, PhysicalExpr, LogicalExpr, LogicalPlan [#5630](https://github.com/apache/arrow-datafusion/pull/5630) (yahoNanJing) +- Minor: Reduce clones in AnalyzerRule [#5728](https://github.com/apache/arrow-datafusion/pull/5728) (alamb) +- Upgrade to substrait 0.5.1 and set the version field of produced plans [#5707](https://github.com/apache/arrow-datafusion/pull/5707) (mbrobbel) +- excluding doctests for mac/win64 platform ,Make them consistent with amd64 [#5730](https://github.com/apache/arrow-datafusion/pull/5730) (jiangzhx) +- fix test of benchmarks warning [#5737](https://github.com/apache/arrow-datafusion/pull/5737) (r4ntix) +- Move protoc generation to binary crate (#5718) [#5742](https://github.com/apache/arrow-datafusion/pull/5742) (l0kr) +- Add compare.py to compare the output of multiple benchmarks [#5655](https://github.com/apache/arrow-datafusion/pull/5655) (alamb) +- Move and rename `expr_rewriter.rs` [#5743](https://github.com/apache/arrow-datafusion/pull/5743) (HaoYang670) +- Minor: port some decimal tests to sqllogictests [#5739](https://github.com/apache/arrow-datafusion/pull/5739) (alamb) +- Update to arrow 36 [#5685](https://github.com/apache/arrow-datafusion/pull/5685) (tustvold) +- Minor: Avoid an unecessary contruction in `map_children` some extra plan construction [#5761](https://github.com/apache/arrow-datafusion/pull/5761) (alamb) +- minor: fix typos in planner.rs error msg [#5776](https://github.com/apache/arrow-datafusion/pull/5776) (jiangzhx) +- minor: add timestampstz utf8 conversion test [#5777](https://github.com/apache/arrow-datafusion/pull/5777) (comphead) +- Update prost-build requirement from =0.11.7 to =0.11.8 [#5773](https://github.com/apache/arrow-datafusion/pull/5773) (dependabot[bot]) +- infer right side nullability for LEFT join [#5748](https://github.com/apache/arrow-datafusion/pull/5748) (comphead) +- MINOR: simplify sqllogic test schema check [#5769](https://github.com/apache/arrow-datafusion/pull/5769) (comphead) +- Change required input ordering physical plan API to allow any NULLS FIRST / LAST and ASC / DESC [#5772](https://github.com/apache/arrow-datafusion/pull/5772) (mustafasrepo) +- Support timestamp and interval arithmetic [#5764](https://github.com/apache/arrow-datafusion/pull/5764) (berkaysynnada) +- chore: update sql function documentation [#5780](https://github.com/apache/arrow-datafusion/pull/5780) (sanderson) +- Minor: fix docs build [#5795](https://github.com/apache/arrow-datafusion/pull/5795) (alamb) +- Minor: use workspace arrow-array rather than hard coded 34 [#5794](https://github.com/apache/arrow-datafusion/pull/5794) (alamb) +- Return an error for invalid placeholder `$0` instead of panicking [#5787](https://github.com/apache/arrow-datafusion/pull/5787) (kawadakk) +- Bump substrait version to 0.6.0 [#5798](https://github.com/apache/arrow-datafusion/pull/5798) (jdye64) +- Support `INTERVAL` SQL Type [#5792](https://github.com/apache/arrow-datafusion/pull/5792) (alamb) +- Minor: fix flaking test [#5805](https://github.com/apache/arrow-datafusion/pull/5805) (alamb) +- Incorrect row comparison for tpch queries in benchmarks [#5784](https://github.com/apache/arrow-datafusion/pull/5784) (viirya) +- Update ctor requirement from 0.1.22 to 0.2.0 [#5752](https://github.com/apache/arrow-datafusion/pull/5752) (dependabot[bot]) +- Minor: Port some timestamp tests to sqllogictests [#5804](https://github.com/apache/arrow-datafusion/pull/5804) (alamb) +- Minor: remove typed_min_max_batch_decimal128 [#5809](https://github.com/apache/arrow-datafusion/pull/5809) (izveigor) +- Minor: Run rust workflow on changes to .github [#5758](https://github.com/apache/arrow-datafusion/pull/5758) (alamb) +- Minor: clean up timestamp arithmetic tests [#5803](https://github.com/apache/arrow-datafusion/pull/5803) (alamb) +- improve Filter pushdown to Join [#5770](https://github.com/apache/arrow-datafusion/pull/5770) (mingmwang) +- Support `round()` function with two parameters [#5807](https://github.com/apache/arrow-datafusion/pull/5807) (viirya) +- Fix datatype of case expression [#5734](https://github.com/apache/arrow-datafusion/pull/5734) (mslapek) +- Minor: Add ticket reference as comment [#5822](https://github.com/apache/arrow-datafusion/pull/5822) (alamb) +- Forward port version and Changelog for `21.1.0` [#5767](https://github.com/apache/arrow-datafusion/pull/5767) (alamb) +- Implement LogicalPlan support for transactions [#5827](https://github.com/apache/arrow-datafusion/pull/5827) (avantgardnerio) +- Minor: port more timestamp tests to sqllogictests [#5832](https://github.com/apache/arrow-datafusion/pull/5832) (alamb) +- feat: Simplify LOG and POWER functions [#5816](https://github.com/apache/arrow-datafusion/pull/5816) (izveigor) +- fix: Enhance case expression type coercion [#5820](https://github.com/apache/arrow-datafusion/pull/5820) (Jefffrey) +- feat: Add expression rewrite rules for LIKE and ILIKE [#5819](https://github.com/apache/arrow-datafusion/pull/5819) (Weijun-H) +- fix: type_coercion support BinaryExpr ( interval , timestamp ). [#5845](https://github.com/apache/arrow-datafusion/pull/5845) (jackwener) +- Add primary key information to CreateMemoryTable LogicalPlan node [#5835](https://github.com/apache/arrow-datafusion/pull/5835) (avantgardnerio) +- Expose substrait protoc feature [#5852](https://github.com/apache/arrow-datafusion/pull/5852) (andygrove) +- minor(sqlparser): encapsulate PlanerContext, reduce some clones [#5814](https://github.com/apache/arrow-datafusion/pull/5814) (alamb) +- Remove batch_idx from SortKeyCursor [#5855](https://github.com/apache/arrow-datafusion/pull/5855) (tustvold) +- Improving optimizer performance by eliminating unnecessary sort and distribution passes, add more SymmetricHashJoin improvements [#5754](https://github.com/apache/arrow-datafusion/pull/5754) (metesynnada) +- Poll next open file future while scanning current file [#5800](https://github.com/apache/arrow-datafusion/pull/5800) (nenorbot) +- Top down `EnforceSorting`, Extended testbench for `EnforceSorting` rule to prepare for refactors, additional functionality such as pushdowns over unions [#5661](https://github.com/apache/arrow-datafusion/pull/5661) (mustafasrepo) +- Move `TransactionStart`/`TransactionEnd`/`SetVariable` into `LogicalPlan::Statement` [#5842](https://github.com/apache/arrow-datafusion/pull/5842) (alamb) +- Move content from README.md to docs site [#5824](https://github.com/apache/arrow-datafusion/pull/5824) (alamb) +- Fix `interval` to use consistent units and arrow parser [#5806](https://github.com/apache/arrow-datafusion/pull/5806) (alamb) +- Enhance Asynchronous Performance of SHJ Implementation [#5864](https://github.com/apache/arrow-datafusion/pull/5864) (metesynnada) +- Prove timestamptz <=> timestamp now works [#5869](https://github.com/apache/arrow-datafusion/pull/5869) (comphead) +- Update docs/source/contributor-guide/index.md [#5872](https://github.com/apache/arrow-datafusion/pull/5872) (2010YOUY01) +- fix: coerce type for InSubquery and fix timestamp minus timestamp. [#5853](https://github.com/apache/arrow-datafusion/pull/5853) (jackwener) +- chore: update sqllogictest version 0.13.2. [#5875](https://github.com/apache/arrow-datafusion/pull/5875) (jackwener) +- Minor: Add crates.io / API links to website [#5871](https://github.com/apache/arrow-datafusion/pull/5871) (alamb) +- minor: made `information_schema` pub [#5862](https://github.com/apache/arrow-datafusion/pull/5862) (MichaelScofield) +- Update substrait requirement from 0.6.0 to 0.7.1 [#5876](https://github.com/apache/arrow-datafusion/pull/5876) (dependabot[bot]) +- refactor: move type_coercion to analyzer [#5831](https://github.com/apache/arrow-datafusion/pull/5831) (jackwener) +- feat: BuiltinScalarFunction::Cbrt [#5839](https://github.com/apache/arrow-datafusion/pull/5839) (izveigor) +- [Minor]: Update `architecture.md` to include April tech talks [#5865](https://github.com/apache/arrow-datafusion/pull/5865) (comphead) +- [sqllogictest] Run tests on Windows [#5870](https://github.com/apache/arrow-datafusion/pull/5870) (melgenek) +- Support create object store source tables without depending on environment variables [#5732](https://github.com/apache/arrow-datafusion/pull/5732) (r4ntix) +- feat: Quote column names if required in error messages [#5778](https://github.com/apache/arrow-datafusion/pull/5778) (alamb) +- [MINOR]: Refactor to increase readability [#5874](https://github.com/apache/arrow-datafusion/pull/5874) (mustafasrepo) +- More realistic sort benchmarks [#5881](https://github.com/apache/arrow-datafusion/pull/5881) (tustvold) +- Removal of arithmetic operations for temporal values to binary.rs [#5846](https://github.com/apache/arrow-datafusion/pull/5846) (berkaysynnada) +- Moving PipelineFixer above all rules to use ExecutionPlan APIs [#5880](https://github.com/apache/arrow-datafusion/pull/5880) (metesynnada) +- Add assert on hash children partition count [#5768](https://github.com/apache/arrow-datafusion/pull/5768) (duongcongtoai) +- Use ScalarValue for single input on math expression [#5891](https://github.com/apache/arrow-datafusion/pull/5891) (viirya) +- Generify SortPreservingMerge (#5882) (#5879) [#5886](https://github.com/apache/arrow-datafusion/pull/5886) (tustvold) +- Fix: allow arbitrary exprs in VALUES clause [#5813](https://github.com/apache/arrow-datafusion/pull/5813) (alamb) diff --git a/dev/changelog/5.0.0.md b/dev/changelog/5.0.0.md new file mode 100644 index 0000000000000..fe8535929b34f --- /dev/null +++ b/dev/changelog/5.0.0.md @@ -0,0 +1,310 @@ + + +## [5.0.0](https://github.com/apache/arrow-datafusion/tree/5.0.0) (2021-08-10) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/4.0.0...5.0.0) + +**Breaking changes:** + +- Box ScalarValue:Lists, reduce size by half size [\#788](https://github.com/apache/arrow-datafusion/pull/788) ([alamb](https://github.com/alamb)) +- JOIN conditions are order dependent [\#778](https://github.com/apache/arrow-datafusion/pull/778) ([seddonm1](https://github.com/seddonm1)) +- Show the result of all optimizer passes in EXPLAIN VERBOSE [\#759](https://github.com/apache/arrow-datafusion/pull/759) ([alamb](https://github.com/alamb)) +- \#723 Datafusion add option in ExecutionConfig to enable/disable parquet pruning [\#749](https://github.com/apache/arrow-datafusion/pull/749) ([lvheyang](https://github.com/lvheyang)) +- Update API for extension planning to include logical plan [\#643](https://github.com/apache/arrow-datafusion/pull/643) ([alamb](https://github.com/alamb)) +- Rename MergeExec to CoalescePartitionsExec [\#635](https://github.com/apache/arrow-datafusion/pull/635) ([andygrove](https://github.com/andygrove)) +- fix 593, reduce cloning by taking ownership in logical planner's `from` fn [\#610](https://github.com/apache/arrow-datafusion/pull/610) ([Jimexist](https://github.com/Jimexist)) +- fix join column handling logic for `On` and `Using` constraints [\#605](https://github.com/apache/arrow-datafusion/pull/605) ([houqp](https://github.com/houqp)) +- Rewrite pruning logic in terms of PruningStatistics using Array trait \(option 2\) [\#426](https://github.com/apache/arrow-datafusion/pull/426) ([alamb](https://github.com/alamb)) +- Support reading from NdJson formatted data sources [\#404](https://github.com/apache/arrow-datafusion/pull/404) ([heymind](https://github.com/heymind)) +- Add metrics to RepartitionExec [\#398](https://github.com/apache/arrow-datafusion/pull/398) ([andygrove](https://github.com/andygrove)) +- Use 4.x arrow-rs from crates.io rather than git sha [\#395](https://github.com/apache/arrow-datafusion/pull/395) ([alamb](https://github.com/alamb)) +- Return Vec\ from PredicateBuilder rather than an `Fn` [\#370](https://github.com/apache/arrow-datafusion/pull/370) ([alamb](https://github.com/alamb)) +- Refactor: move RowGroupPredicateBuilder into its own module, rename to PruningPredicateBuilder [\#365](https://github.com/apache/arrow-datafusion/pull/365) ([alamb](https://github.com/alamb)) +- \[Datafusion\] NOW\(\) function support [\#288](https://github.com/apache/arrow-datafusion/pull/288) ([msathis](https://github.com/msathis)) +- Implement select distinct [\#262](https://github.com/apache/arrow-datafusion/pull/262) ([Dandandan](https://github.com/Dandandan)) +- Refactor datafusion/src/physical_plan/common.rs build_file_list to take less param and reuse code [\#253](https://github.com/apache/arrow-datafusion/pull/253) ([Jimexist](https://github.com/Jimexist)) +- Support qualified columns in queries [\#55](https://github.com/apache/arrow-datafusion/pull/55) ([houqp](https://github.com/houqp)) +- Read CSV format text from stdin or memory [\#54](https://github.com/apache/arrow-datafusion/pull/54) ([heymind](https://github.com/heymind)) +- Use atomics for SQLMetric implementation, remove unused name field [\#25](https://github.com/apache/arrow-datafusion/pull/25) ([returnString](https://github.com/returnString)) + +**Implemented enhancements:** + +- Allow extension nodes to correctly plan physical expressions with relations [\#642](https://github.com/apache/arrow-datafusion/issues/642) +- Filters aren't passed down to table scans in a union [\#557](https://github.com/apache/arrow-datafusion/issues/557) +- Support pruning for `boolean` columns [\#490](https://github.com/apache/arrow-datafusion/issues/490) +- Implement SQLMetrics for RepartitionExec [\#397](https://github.com/apache/arrow-datafusion/issues/397) +- DataFusion benchmarks should show executed plan with metrics after query completes [\#396](https://github.com/apache/arrow-datafusion/issues/396) +- Use published versions of arrow rather than github shas [\#393](https://github.com/apache/arrow-datafusion/issues/393) +- Add Compare to GroupByScalar [\#364](https://github.com/apache/arrow-datafusion/issues/364) +- Reusable "row group pruning" logic [\#363](https://github.com/apache/arrow-datafusion/issues/363) +- Add an Order Preserving merge operator [\#362](https://github.com/apache/arrow-datafusion/issues/362) +- Implement Postgres compatible `now()` function [\#251](https://github.com/apache/arrow-datafusion/issues/251) +- COUNT DISTINCT does not support dictionary types [\#249](https://github.com/apache/arrow-datafusion/issues/249) +- Use standard make_null_array for CASE [\#222](https://github.com/apache/arrow-datafusion/issues/222) +- Implement date_trunc\(\) function [\#203](https://github.com/apache/arrow-datafusion/issues/203) +- COUNT DISTINCT does not support for `Float64` [\#199](https://github.com/apache/arrow-datafusion/issues/199) +- Update SQLMetric to use atomics rather than a Mutex [\#30](https://github.com/apache/arrow-datafusion/issues/30) +- Implement PartialOrd for ScalarValue [\#838](https://github.com/apache/arrow-datafusion/pull/838) ([viirya](https://github.com/viirya)) +- Support date datatypes in max/min [\#820](https://github.com/apache/arrow-datafusion/pull/820) ([viirya](https://github.com/viirya)) +- Implement vectorized hashing for DictionaryArray types [\#812](https://github.com/apache/arrow-datafusion/pull/812) ([alamb](https://github.com/alamb)) +- Convert unsupported conditions in left right join to filters [\#796](https://github.com/apache/arrow-datafusion/pull/796) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) +- Implement streaming versions of Dataframe.collect methods [\#789](https://github.com/apache/arrow-datafusion/pull/789) ([andygrove](https://github.com/andygrove)) +- impl from str for column and scalar [\#762](https://github.com/apache/arrow-datafusion/pull/762) ([Jimexist](https://github.com/Jimexist)) +- impl fmt::Display for PlanType [\#752](https://github.com/apache/arrow-datafusion/pull/752) ([Jimexist](https://github.com/Jimexist)) +- Remove unnecessary projection in logical plan optimization phase [\#747](https://github.com/apache/arrow-datafusion/pull/747) ([waynexia](https://github.com/waynexia)) +- Support table columns alias [\#735](https://github.com/apache/arrow-datafusion/pull/735) ([Dandandan](https://github.com/Dandandan)) +- Derive PartialEq for datasource enums [\#734](https://github.com/apache/arrow-datafusion/pull/734) ([alamb](https://github.com/alamb)) +- Allow filetype to be lowercase, Implement FromStr for FileType [\#728](https://github.com/apache/arrow-datafusion/pull/728) ([Jimexist](https://github.com/Jimexist)) +- Update to use arrow 5.0 [\#721](https://github.com/apache/arrow-datafusion/pull/721) ([alamb](https://github.com/alamb)) +- \#554: Lead/lag window function with offset and default value arguments [\#687](https://github.com/apache/arrow-datafusion/pull/687) ([jgoday](https://github.com/jgoday)) +- dedup using join column in wildcard expansion [\#678](https://github.com/apache/arrow-datafusion/pull/678) ([houqp](https://github.com/houqp)) +- Implement metrics for HashJoinExec [\#664](https://github.com/apache/arrow-datafusion/pull/664) ([andygrove](https://github.com/andygrove)) +- Show physical plan with metrics in benchmark [\#662](https://github.com/apache/arrow-datafusion/pull/662) ([andygrove](https://github.com/andygrove)) +- Allow non-equijoin filters in join condition [\#660](https://github.com/apache/arrow-datafusion/pull/660) ([Dandandan](https://github.com/Dandandan)) +- Add End-to-end test for parquet pruning + metrics for ParquetExec [\#657](https://github.com/apache/arrow-datafusion/pull/657) ([alamb](https://github.com/alamb)) +- Add support for leading field in interval [\#647](https://github.com/apache/arrow-datafusion/pull/647) ([Dandandan](https://github.com/Dandandan)) +- Remove hard-coded PartitionMode from Ballista serde [\#637](https://github.com/apache/arrow-datafusion/pull/637) ([andygrove](https://github.com/andygrove)) +- Ballista: Implement scalable distributed joins [\#634](https://github.com/apache/arrow-datafusion/pull/634) ([andygrove](https://github.com/andygrove)) +- implement rank and dense_rank function and refactor built-in window function evaluation [\#631](https://github.com/apache/arrow-datafusion/pull/631) ([Jimexist](https://github.com/Jimexist)) +- Improve "field not found" error messages [\#625](https://github.com/apache/arrow-datafusion/pull/625) ([andygrove](https://github.com/andygrove)) +- Support modulus op [\#577](https://github.com/apache/arrow-datafusion/pull/577) ([gangliao](https://github.com/gangliao)) +- implement `std::default::Default` for execution config [\#570](https://github.com/apache/arrow-datafusion/pull/570) ([Jimexist](https://github.com/Jimexist)) +- `to_timestamp_millis()`, `to_timestamp_micros()`, `to_timestamp_seconds()` [\#567](https://github.com/apache/arrow-datafusion/pull/567) ([velvia](https://github.com/velvia)) +- Filter push down for Union [\#559](https://github.com/apache/arrow-datafusion/pull/559) ([Dandandan](https://github.com/Dandandan)) +- Implement window functions with `partition_by` clause [\#558](https://github.com/apache/arrow-datafusion/pull/558) ([Jimexist](https://github.com/Jimexist)) +- support table alias in join clause [\#547](https://github.com/apache/arrow-datafusion/pull/547) ([houqp](https://github.com/houqp)) +- Not equal predicate in physical_planning pruning [\#544](https://github.com/apache/arrow-datafusion/pull/544) ([jgoday](https://github.com/jgoday)) +- add error handling and boundary checking for window frames [\#530](https://github.com/apache/arrow-datafusion/pull/530) ([Jimexist](https://github.com/Jimexist)) +- Implement window functions with `order_by` clause [\#520](https://github.com/apache/arrow-datafusion/pull/520) ([Jimexist](https://github.com/Jimexist)) +- support group by column positions [\#519](https://github.com/apache/arrow-datafusion/pull/519) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jychen7](https://github.com/jychen7)) +- Implement constant folding for CAST [\#513](https://github.com/apache/arrow-datafusion/pull/513) ([msathis](https://github.com/msathis)) +- Add window frame constructs - alternative [\#506](https://github.com/apache/arrow-datafusion/pull/506) ([Jimexist](https://github.com/Jimexist)) +- Add `partition by` constructs in window functions and modify logical planning [\#501](https://github.com/apache/arrow-datafusion/pull/501) ([Jimexist](https://github.com/Jimexist)) +- Add support for boolean columns in pruning logic [\#500](https://github.com/apache/arrow-datafusion/pull/500) ([alamb](https://github.com/alamb)) +- \#215 resolve aliases for group by exprs [\#485](https://github.com/apache/arrow-datafusion/pull/485) ([jychen7](https://github.com/jychen7)) +- Support anti join [\#482](https://github.com/apache/arrow-datafusion/pull/482) ([Dandandan](https://github.com/Dandandan)) +- Support semi join [\#470](https://github.com/apache/arrow-datafusion/pull/470) ([Dandandan](https://github.com/Dandandan)) +- add `order by` construct in window function and logical plans [\#463](https://github.com/apache/arrow-datafusion/pull/463) ([Jimexist](https://github.com/Jimexist)) +- Remove reundant filters \(e.g. c\> 5 AND c\>5 --\> c\>5\) [\#436](https://github.com/apache/arrow-datafusion/pull/436) ([jgoday](https://github.com/jgoday)) +- fix: display the content of debug explain [\#434](https://github.com/apache/arrow-datafusion/pull/434) ([NGA-TRAN](https://github.com/NGA-TRAN)) +- implement lead and lag built-in window function [\#429](https://github.com/apache/arrow-datafusion/pull/429) ([Jimexist](https://github.com/Jimexist)) +- add support for ndjson for datafusion-cli [\#427](https://github.com/apache/arrow-datafusion/pull/427) ([Jimexist](https://github.com/Jimexist)) +- add `first_value`, `last_value`, and `nth_value` built-in window functions [\#403](https://github.com/apache/arrow-datafusion/pull/403) ([Jimexist](https://github.com/Jimexist)) +- export both `now` and `random` functions [\#389](https://github.com/apache/arrow-datafusion/pull/389) ([Jimexist](https://github.com/Jimexist)) +- Function to create `ArrayRef` from an iterator of ScalarValues [\#381](https://github.com/apache/arrow-datafusion/pull/381) ([alamb](https://github.com/alamb)) +- Sort preserving merge \(\#362\) [\#379](https://github.com/apache/arrow-datafusion/pull/379) ([tustvold](https://github.com/tustvold)) +- Add support for multiple partitions with SortExec \(\#362\) [\#378](https://github.com/apache/arrow-datafusion/pull/378) ([tustvold](https://github.com/tustvold)) +- add window expression stream, delegated window aggregation to aggregate functions, and implement `row_number` [\#375](https://github.com/apache/arrow-datafusion/pull/375) ([Jimexist](https://github.com/Jimexist)) +- Add PartialOrd and Ord to GroupByScalar \(\#364\) [\#368](https://github.com/apache/arrow-datafusion/pull/368) ([tustvold](https://github.com/tustvold)) +- Implement readable explain plans for physical plans [\#337](https://github.com/apache/arrow-datafusion/pull/337) ([alamb](https://github.com/alamb)) +- Add window expression part 1 - logical and physical planning, structure, to/from proto, and explain, for empty over clause only [\#334](https://github.com/apache/arrow-datafusion/pull/334) ([Jimexist](https://github.com/Jimexist)) +- Use NullArray to Pass row count to ScalarFunctions that take 0 arguments [\#328](https://github.com/apache/arrow-datafusion/pull/328) ([Jimexist](https://github.com/Jimexist)) +- add --quiet/-q flag and allow timing info to be turned on/off [\#323](https://github.com/apache/arrow-datafusion/pull/323) ([Jimexist](https://github.com/Jimexist)) +- Implement hash partitioned aggregation [\#320](https://github.com/apache/arrow-datafusion/pull/320) ([Dandandan](https://github.com/Dandandan)) +- Support COUNT\(DISTINCT timestamps\) [\#319](https://github.com/apache/arrow-datafusion/pull/319) ([charlibot](https://github.com/charlibot)) +- add random SQL function [\#303](https://github.com/apache/arrow-datafusion/pull/303) ([Jimexist](https://github.com/Jimexist)) +- allow datafusion cli to take -- comments [\#296](https://github.com/apache/arrow-datafusion/pull/296) ([Jimexist](https://github.com/Jimexist)) +- Add json print format mode to datafusion cli [\#295](https://github.com/apache/arrow-datafusion/pull/295) ([Jimexist](https://github.com/Jimexist)) +- Add print format param with support for tsv print format to datafusion cli [\#292](https://github.com/apache/arrow-datafusion/pull/292) ([Jimexist](https://github.com/Jimexist)) +- Add print format param and support for csv print format to datafusion cli [\#289](https://github.com/apache/arrow-datafusion/pull/289) ([Jimexist](https://github.com/Jimexist)) +- allow datafusion-cli to take a file param [\#285](https://github.com/apache/arrow-datafusion/pull/285) ([Jimexist](https://github.com/Jimexist)) +- add param validation for datafusion-cli [\#284](https://github.com/apache/arrow-datafusion/pull/284) ([Jimexist](https://github.com/Jimexist)) +- \[breaking change\] fix 265, log should be log10, and add ln [\#271](https://github.com/apache/arrow-datafusion/pull/271) ([Jimexist](https://github.com/Jimexist)) +- Implement count distinct for dictionary arrays [\#256](https://github.com/apache/arrow-datafusion/pull/256) ([alamb](https://github.com/alamb)) +- Count distinct floats [\#252](https://github.com/apache/arrow-datafusion/pull/252) ([pjmore](https://github.com/pjmore)) +- Add rule to eliminate `LIMIT 0` and replace it with an `EmptyRelation` [\#213](https://github.com/apache/arrow-datafusion/pull/213) ([Dandandan](https://github.com/Dandandan)) +- Allow table providers to indicate their type for catalog metadata [\#205](https://github.com/apache/arrow-datafusion/pull/205) ([returnString](https://github.com/returnString)) +- Use arrow eq kernels in CaseWhen expression evaluation [\#52](https://github.com/apache/arrow-datafusion/pull/52) ([Dandandan](https://github.com/Dandandan)) +- Re-export Arrow and Parquet crates from DataFusion [\#39](https://github.com/apache/arrow-datafusion/pull/39) ([returnString](https://github.com/returnString)) +- \[DataFusion\] Optimize hash join inner workings, null handling fix [\#24](https://github.com/apache/arrow-datafusion/pull/24) ([Dandandan](https://github.com/Dandandan)) +- \[ARROW-12441\] \[DataFusion\] Cross join implementation [\#11](https://github.com/apache/arrow-datafusion/pull/11) ([Dandandan](https://github.com/Dandandan)) + +**Fixed bugs:** + +- Projection pushdown removes unqualified column names even when they are used [\#617](https://github.com/apache/arrow-datafusion/issues/617) +- Panic while running join datatypes/schema.rs:165:10 [\#601](https://github.com/apache/arrow-datafusion/issues/601) +- Indentation is incorrect for joins in formatted physical plans [\#345](https://github.com/apache/arrow-datafusion/issues/345) +- Error while running `COUNT DISTINCT (timestamp)`: 'Unexpected DataType for list [\#314](https://github.com/apache/arrow-datafusion/issues/314) +- When joining two tables, get Error: Plan\("Schema contains duplicate unqualified field name \'xxx\'"\) [\#311](https://github.com/apache/arrow-datafusion/issues/311) +- Incorrect answers with SELECT DISTINCT queries [\#250](https://github.com/apache/arrow-datafusion/issues/250) +- Intermitent failure in CI join_with_hash_collision [\#227](https://github.com/apache/arrow-datafusion/issues/227) +- `Concat` from Dataframe API no longer accepts multiple expressions [\#226](https://github.com/apache/arrow-datafusion/issues/226) +- Fix right, full join handling when having multiple non-matching rows at the left side [\#845](https://github.com/apache/arrow-datafusion/pull/845) ([Dandandan](https://github.com/Dandandan)) +- Qualified field resolution too strict [\#810](https://github.com/apache/arrow-datafusion/pull/810) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([seddonm1](https://github.com/seddonm1)) +- Better join order resolution logic [\#797](https://github.com/apache/arrow-datafusion/pull/797) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([seddonm1](https://github.com/seddonm1)) +- Produce correct answers for Group BY NULL \(Option 1\) [\#793](https://github.com/apache/arrow-datafusion/pull/793) ([alamb](https://github.com/alamb)) +- Use consistent version of string_to_timestamp_nanos in DataFusion [\#767](https://github.com/apache/arrow-datafusion/pull/767) ([alamb](https://github.com/alamb)) +- \#723 limit pruning rule to simple expression [\#764](https://github.com/apache/arrow-datafusion/pull/764) ([lvheyang](https://github.com/lvheyang)) +- \#699 fix return type conflict when calling builtin math fuctions [\#716](https://github.com/apache/arrow-datafusion/pull/716) ([lvheyang](https://github.com/lvheyang)) +- Fix Date32 and Date64 parquet row group pruning [\#690](https://github.com/apache/arrow-datafusion/pull/690) ([alamb](https://github.com/alamb)) +- Remove qualifiers on pushed down predicates / Fix parquet pruning [\#689](https://github.com/apache/arrow-datafusion/pull/689) ([alamb](https://github.com/alamb)) +- use `Weak` ptr to break catalog list \<\> info schema cyclic reference [\#681](https://github.com/apache/arrow-datafusion/pull/681) ([crepererum](https://github.com/crepererum)) +- honor table name for csv/parquet scan in ballista plan serde [\#629](https://github.com/apache/arrow-datafusion/pull/629) ([houqp](https://github.com/houqp)) +- fix 621, where unnamed window functions shall be differentiated by partition and order by clause [\#622](https://github.com/apache/arrow-datafusion/pull/622) ([Jimexist](https://github.com/Jimexist)) +- RFC: Do not prune out unnecessary columns with unqualified references [\#619](https://github.com/apache/arrow-datafusion/pull/619) ([alamb](https://github.com/alamb)) +- \[fix\] select \* on empty table [\#613](https://github.com/apache/arrow-datafusion/pull/613) ([rdettai](https://github.com/rdettai)) +- fix 592, support alias in window functions [\#607](https://github.com/apache/arrow-datafusion/pull/607) ([Jimexist](https://github.com/Jimexist)) +- RepartitionExec should not error if output has hung up [\#576](https://github.com/apache/arrow-datafusion/pull/576) ([alamb](https://github.com/alamb)) +- Fix pruning on not equal predicate [\#561](https://github.com/apache/arrow-datafusion/pull/561) ([alamb](https://github.com/alamb)) +- hash float arrays using primitive usigned integer type [\#556](https://github.com/apache/arrow-datafusion/pull/556) ([houqp](https://github.com/houqp)) +- Return errors properly from RepartitionExec [\#521](https://github.com/apache/arrow-datafusion/pull/521) ([alamb](https://github.com/alamb)) +- refactor sort exec stream and combine batches [\#515](https://github.com/apache/arrow-datafusion/pull/515) ([Jimexist](https://github.com/Jimexist)) +- Fix display of execution time in datafusion-cli [\#514](https://github.com/apache/arrow-datafusion/pull/514) ([Dandandan](https://github.com/Dandandan)) +- Wrong aggregation arguments error. [\#505](https://github.com/apache/arrow-datafusion/pull/505) ([jgoday](https://github.com/jgoday)) +- fix window aggregation with alias and add integration test case [\#454](https://github.com/apache/arrow-datafusion/pull/454) ([Jimexist](https://github.com/Jimexist)) +- fix: don't duplicate existing filters [\#409](https://github.com/apache/arrow-datafusion/pull/409) ([e-dard](https://github.com/e-dard)) +- Fixed incorrect logical type in GroupByScalar. [\#391](https://github.com/apache/arrow-datafusion/pull/391) ([jorgecarleitao](https://github.com/jorgecarleitao)) +- Fix indented display for multi-child nodes [\#358](https://github.com/apache/arrow-datafusion/pull/358) ([alamb](https://github.com/alamb)) +- Fix SQL planner to support multibyte column names [\#357](https://github.com/apache/arrow-datafusion/pull/357) ([agatan](https://github.com/agatan)) +- Fix wrong projection 'optimization' [\#268](https://github.com/apache/arrow-datafusion/pull/268) ([Dandandan](https://github.com/Dandandan)) +- Fix Left join implementation is incorrect for 0 or multiple batches on the right side [\#238](https://github.com/apache/arrow-datafusion/pull/238) ([Dandandan](https://github.com/Dandandan)) +- Count distinct boolean [\#230](https://github.com/apache/arrow-datafusion/pull/230) ([pjmore](https://github.com/pjmore)) +- Fix Filter / where clause without column names is removed in optimization pass [\#225](https://github.com/apache/arrow-datafusion/pull/225) ([Dandandan](https://github.com/Dandandan)) + +**Documentation updates:** + +- No way to get to the examples from docs.rs [\#186](https://github.com/apache/arrow-datafusion/issues/186) +- Update docs to use vendored version of arrow [\#772](https://github.com/apache/arrow-datafusion/pull/772) ([alamb](https://github.com/alamb)) +- Fix typo in DEVELOPERS.md [\#692](https://github.com/apache/arrow-datafusion/pull/692) ([lvheyang](https://github.com/lvheyang)) +- update stale documentations related to window functions [\#598](https://github.com/apache/arrow-datafusion/pull/598) ([Jimexist](https://github.com/Jimexist)) +- update readme to reflect work on window functions [\#471](https://github.com/apache/arrow-datafusion/pull/471) ([Jimexist](https://github.com/Jimexist)) +- Add examples section to datafusion crate doc [\#457](https://github.com/apache/arrow-datafusion/pull/457) ([mluts](https://github.com/mluts)) +- add invariants spec [\#443](https://github.com/apache/arrow-datafusion/pull/443) ([houqp](https://github.com/houqp)) +- add output field name rfc [\#422](https://github.com/apache/arrow-datafusion/pull/422) ([houqp](https://github.com/houqp)) +- Update more docs and also the developer.md doc [\#414](https://github.com/apache/arrow-datafusion/pull/414) ([Jimexist](https://github.com/Jimexist)) +- use prettier to format md files [\#367](https://github.com/apache/arrow-datafusion/pull/367) ([Jimexist](https://github.com/Jimexist)) +- Add new logo svg with white background [\#313](https://github.com/apache/arrow-datafusion/pull/313) ([parthsarthy](https://github.com/parthsarthy)) +- Add projects \(Squirtle and Tensorbase\) to list in readme [\#312](https://github.com/apache/arrow-datafusion/pull/312) ([parthsarthy](https://github.com/parthsarthy)) +- docs - fix the ballista link [\#274](https://github.com/apache/arrow-datafusion/pull/274) ([haoxins](https://github.com/haoxins)) +- misc\(README\): Replace Cube.js with Cube Store [\#248](https://github.com/apache/arrow-datafusion/pull/248) ([ovr](https://github.com/ovr)) +- Initial docs for SQL syntax [\#242](https://github.com/apache/arrow-datafusion/pull/242) ([Dandandan](https://github.com/Dandandan)) +- Deduplicate README.md [\#79](https://github.com/apache/arrow-datafusion/pull/79) ([msathis](https://github.com/msathis)) + +**Performance improvements:** + +- Speed up inlist for strings and primitives [\#813](https://github.com/apache/arrow-datafusion/pull/813) ([Dandandan](https://github.com/Dandandan)) +- perf: improve performance of `SortPreservingMergeExec` operator [\#722](https://github.com/apache/arrow-datafusion/pull/722) ([e-dard](https://github.com/e-dard)) +- Optimize min/max queries with table statistics [\#719](https://github.com/apache/arrow-datafusion/pull/719) ([b41sh](https://github.com/b41sh)) +- perf: Improve materialisation performance of SortPreservingMergeExec [\#691](https://github.com/apache/arrow-datafusion/pull/691) ([e-dard](https://github.com/e-dard)) +- Optimize count\(\*\) with table statistics [\#620](https://github.com/apache/arrow-datafusion/pull/620) ([Dandandan](https://github.com/Dandandan)) +- optimize window function's `find_ranges_in_range` [\#595](https://github.com/apache/arrow-datafusion/pull/595) ([Jimexist](https://github.com/Jimexist)) +- Collapse sort into window expr and do sort within logical phase [\#571](https://github.com/apache/arrow-datafusion/pull/571) ([Jimexist](https://github.com/Jimexist)) +- Use repartition in window functions to speed up [\#569](https://github.com/apache/arrow-datafusion/pull/569) ([Jimexist](https://github.com/Jimexist)) +- Constant fold / optimize `to_timestamp` function during planning [\#387](https://github.com/apache/arrow-datafusion/pull/387) ([msathis](https://github.com/msathis)) +- Speed up `create_batch_from_map` [\#339](https://github.com/apache/arrow-datafusion/pull/339) ([Dandandan](https://github.com/Dandandan)) +- Simplify math expression code \(use unary kernel\) [\#309](https://github.com/apache/arrow-datafusion/pull/309) ([Dandandan](https://github.com/Dandandan)) + +**Closed issues:** + +- Confirm git tagging strategy for releases [\#770](https://github.com/apache/arrow-datafusion/issues/770) +- arrow::util::pretty::pretty_format_batches missing [\#769](https://github.com/apache/arrow-datafusion/issues/769) +- move the `assert_batches_eq!` macros to a non part of datafusion [\#745](https://github.com/apache/arrow-datafusion/issues/745) +- fix an issue where aliases are not respected in generating downstream schemas in window expr [\#592](https://github.com/apache/arrow-datafusion/issues/592) +- make the planner to print more succinct and useful information in window function explain clause [\#526](https://github.com/apache/arrow-datafusion/issues/526) +- move window frame module to be in `logical_plan` [\#517](https://github.com/apache/arrow-datafusion/issues/517) +- use a more rust idiomatic way of handling nth_value [\#448](https://github.com/apache/arrow-datafusion/issues/448) +- create a test with more than one partition for window functions [\#435](https://github.com/apache/arrow-datafusion/issues/435) +- COUNT DISTINCT does not support for `Boolean` [\#202](https://github.com/apache/arrow-datafusion/issues/202) +- Read CSV format text from stdin or memory [\#198](https://github.com/apache/arrow-datafusion/issues/198) +- Fix null handling hash join [\#195](https://github.com/apache/arrow-datafusion/issues/195) +- Allow TableProviders to indicate their type for the information schema [\#191](https://github.com/apache/arrow-datafusion/issues/191) +- Make DataFrame extensible [\#190](https://github.com/apache/arrow-datafusion/issues/190) +- TPC-H Query 19 [\#170](https://github.com/apache/arrow-datafusion/issues/170) +- TPC-H Query 7 [\#161](https://github.com/apache/arrow-datafusion/issues/161) +- Upgrade hashbrown to 0.10 [\#151](https://github.com/apache/arrow-datafusion/issues/151) +- Implement vectorized hashing for hash aggregate [\#149](https://github.com/apache/arrow-datafusion/issues/149) +- More efficient LEFT join implementation [\#143](https://github.com/apache/arrow-datafusion/issues/143) +- Implement vectorized hashing [\#142](https://github.com/apache/arrow-datafusion/issues/142) +- RFC Roadmap for 2021 \(DataFusion\) [\#140](https://github.com/apache/arrow-datafusion/issues/140) +- Implement hash partitioning [\#131](https://github.com/apache/arrow-datafusion/issues/131) +- Grouping by column position [\#110](https://github.com/apache/arrow-datafusion/issues/110) +- \[Datafusion\] GROUP BY with a high cardinality doesn't seem to finish [\#107](https://github.com/apache/arrow-datafusion/issues/107) +- \[Rust\] Add support for JSON data sources [\#103](https://github.com/apache/arrow-datafusion/issues/103) +- \[Rust\] Implement metrics framework [\#95](https://github.com/apache/arrow-datafusion/issues/95) +- Publically export Arrow crate from datafusion [\#36](https://github.com/apache/arrow-datafusion/issues/36) +- Implement hash-partitioned hash aggregate [\#27](https://github.com/apache/arrow-datafusion/issues/27) +- Consider using GitHub pages for DataFusion/Ballista documentation [\#18](https://github.com/apache/arrow-datafusion/issues/18) +- Update "repository" in Cargo.toml [\#16](https://github.com/apache/arrow-datafusion/issues/16) + +**Merged pull requests:** + +- Use `RawTable` API in hash join [\#827](https://github.com/apache/arrow-datafusion/pull/827) ([Dandandan](https://github.com/Dandandan)) +- Add test for window functions on dictionary [\#823](https://github.com/apache/arrow-datafusion/pull/823) ([alamb](https://github.com/alamb)) +- Update dependencies: prost to 0.8 and tonic to 0.5 [\#818](https://github.com/apache/arrow-datafusion/pull/818) ([alamb](https://github.com/alamb)) +- Move `hash_array` into hash_utils.rs [\#807](https://github.com/apache/arrow-datafusion/pull/807) ([alamb](https://github.com/alamb)) +- Remove GroupByScalar and use ScalarValue in preparation for supporting null values in GroupBy [\#786](https://github.com/apache/arrow-datafusion/pull/786) ([alamb](https://github.com/alamb)) +- fix 226, make `concat`, `concat_ws`, and `random` work with `Python` crate [\#761](https://github.com/apache/arrow-datafusion/pull/761) ([Jimexist](https://github.com/Jimexist)) +- Test for parquet pruning disabling [\#754](https://github.com/apache/arrow-datafusion/pull/754) ([alamb](https://github.com/alamb)) +- Add explain verbose with limit push down [\#751](https://github.com/apache/arrow-datafusion/pull/751) ([Jimexist](https://github.com/Jimexist)) +- Move assert_batches_eq! macros to test_utils.rs [\#746](https://github.com/apache/arrow-datafusion/pull/746) ([alamb](https://github.com/alamb)) +- Show optimized physical and logical plans in EXPLAIN [\#744](https://github.com/apache/arrow-datafusion/pull/744) ([alamb](https://github.com/alamb)) +- update `python` crate to support latest pyo3 syntax and gil sematics [\#741](https://github.com/apache/arrow-datafusion/pull/741) ([Jimexist](https://github.com/Jimexist)) +- update `python` crate dependencies [\#740](https://github.com/apache/arrow-datafusion/pull/740) ([Jimexist](https://github.com/Jimexist)) +- provide more details on required .parquet file extension error message [\#729](https://github.com/apache/arrow-datafusion/pull/729) ([Jimexist](https://github.com/Jimexist)) +- split up windows functions into a dedicated module with separate files [\#724](https://github.com/apache/arrow-datafusion/pull/724) ([Jimexist](https://github.com/Jimexist)) +- Use pytest in integration test [\#715](https://github.com/apache/arrow-datafusion/pull/715) ([Jimexist](https://github.com/Jimexist)) +- replace once iter chain with array::IntoIter [\#704](https://github.com/apache/arrow-datafusion/pull/704) ([houqp](https://github.com/houqp)) +- avoid iterator materialization in column index lookup [\#703](https://github.com/apache/arrow-datafusion/pull/703) ([houqp](https://github.com/houqp)) +- Fix build with 1.52.1 [\#696](https://github.com/apache/arrow-datafusion/pull/696) ([alamb](https://github.com/alamb)) +- Fix test output due to logical merge conflict [\#694](https://github.com/apache/arrow-datafusion/pull/694) ([alamb](https://github.com/alamb)) +- add more integration tests [\#668](https://github.com/apache/arrow-datafusion/pull/668) ([Jimexist](https://github.com/Jimexist)) +- Bump arrow and parquet versions to 4.4 [\#654](https://github.com/apache/arrow-datafusion/pull/654) ([toddtreece](https://github.com/toddtreece)) +- Add query 15 to TPC-H queries [\#645](https://github.com/apache/arrow-datafusion/pull/645) ([Dandandan](https://github.com/Dandandan)) +- Improve error message and comments [\#641](https://github.com/apache/arrow-datafusion/pull/641) ([alamb](https://github.com/alamb)) +- add integration tests for rank, dense_rank, fix last_value evaluation with rank [\#638](https://github.com/apache/arrow-datafusion/pull/638) ([Jimexist](https://github.com/Jimexist)) +- round trip TPCH queries in tests [\#630](https://github.com/apache/arrow-datafusion/pull/630) ([houqp](https://github.com/houqp)) +- use Into\ as argument type wherever applicable [\#615](https://github.com/apache/arrow-datafusion/pull/615) ([houqp](https://github.com/houqp)) +- reuse alias map in aggregate logical planning and refactor position resolution [\#606](https://github.com/apache/arrow-datafusion/pull/606) ([Jimexist](https://github.com/Jimexist)) +- fix clippy warnings [\#581](https://github.com/apache/arrow-datafusion/pull/581) ([Jimexist](https://github.com/Jimexist)) +- Add benchmarks to window function queries [\#564](https://github.com/apache/arrow-datafusion/pull/564) ([Jimexist](https://github.com/Jimexist)) +- reuse code for now function expr creation [\#548](https://github.com/apache/arrow-datafusion/pull/548) ([houqp](https://github.com/houqp)) +- turn on clippy rule for needless borrow [\#545](https://github.com/apache/arrow-datafusion/pull/545) ([Jimexist](https://github.com/Jimexist)) +- Refactor hash aggregates's planner building code [\#539](https://github.com/apache/arrow-datafusion/pull/539) ([Jimexist](https://github.com/Jimexist)) +- Cleanup Repartition Exec code [\#538](https://github.com/apache/arrow-datafusion/pull/538) ([alamb](https://github.com/alamb)) +- reuse datafusion physical planner in ballista building from protobuf [\#532](https://github.com/apache/arrow-datafusion/pull/532) ([Jimexist](https://github.com/Jimexist)) +- remove redundant `into_iter()` calls [\#527](https://github.com/apache/arrow-datafusion/pull/527) ([Jimexist](https://github.com/Jimexist)) +- Fix 517 - move `window_frames` module to `logical_plan` [\#518](https://github.com/apache/arrow-datafusion/pull/518) ([Jimexist](https://github.com/Jimexist)) +- Refactor window aggregation, simplify batch processing logic [\#516](https://github.com/apache/arrow-datafusion/pull/516) ([Jimexist](https://github.com/Jimexist)) +- Add datafusion::test_util, resolve test data paths without env vars [\#498](https://github.com/apache/arrow-datafusion/pull/498) ([mluts](https://github.com/mluts)) +- Avoid warnings in tests when compiling without default features [\#489](https://github.com/apache/arrow-datafusion/pull/489) ([alamb](https://github.com/alamb)) +- update cargo.toml in python crate and fix unit test due to hash joins [\#483](https://github.com/apache/arrow-datafusion/pull/483) ([Jimexist](https://github.com/Jimexist)) +- use prettier check in CI [\#453](https://github.com/apache/arrow-datafusion/pull/453) ([Jimexist](https://github.com/Jimexist)) +- Optimize `nth_value`, remove `first_value`, `last_value` structs and use idiomatic rust style [\#452](https://github.com/apache/arrow-datafusion/pull/452) ([Jimexist](https://github.com/Jimexist)) +- Fixed typo / logical merge conflict [\#433](https://github.com/apache/arrow-datafusion/pull/433) ([jorgecarleitao](https://github.com/jorgecarleitao)) +- include test data and add aggregation tests in integration test [\#425](https://github.com/apache/arrow-datafusion/pull/425) ([Jimexist](https://github.com/Jimexist)) +- Add some padding around the logo [\#411](https://github.com/apache/arrow-datafusion/pull/411) ([parthsarthy](https://github.com/parthsarthy)) +- Benchmark subcommand to distinguish between DataFusion and Ballista [\#402](https://github.com/apache/arrow-datafusion/pull/402) ([jgoday](https://github.com/jgoday)) +- refactor datafusion/`scalar_value` to use more macro and avoid dup code [\#392](https://github.com/apache/arrow-datafusion/pull/392) ([Jimexist](https://github.com/Jimexist)) +- Update TPC-H benchmark to show physical plan when debug mode is enabled [\#386](https://github.com/apache/arrow-datafusion/pull/386) ([andygrove](https://github.com/andygrove)) +- Update arrow dependencies again [\#341](https://github.com/apache/arrow-datafusion/pull/341) ([alamb](https://github.com/alamb)) +- Update arrow-rs deps [\#317](https://github.com/apache/arrow-datafusion/pull/317) ([alamb](https://github.com/alamb)) +- Update PR template by commenting out instructions [\#315](https://github.com/apache/arrow-datafusion/pull/315) ([alamb](https://github.com/alamb)) +- fix clippy warning [\#286](https://github.com/apache/arrow-datafusion/pull/286) ([Jimexist](https://github.com/Jimexist)) +- add integration test to compare datafusion-cli against psql [\#281](https://github.com/apache/arrow-datafusion/pull/281) ([Jimexist](https://github.com/Jimexist)) +- Update arrow deps [\#269](https://github.com/apache/arrow-datafusion/pull/269) ([alamb](https://github.com/alamb)) +- Use multi-stage build dockerfile in datafusion-cli and reduce image size from 2.16GB to 89.9MB [\#266](https://github.com/apache/arrow-datafusion/pull/266) ([Jimexist](https://github.com/Jimexist)) +- Enable redundant_field_names clippy lint [\#261](https://github.com/apache/arrow-datafusion/pull/261) ([Dandandan](https://github.com/Dandandan)) +- fix clippy lint [\#259](https://github.com/apache/arrow-datafusion/pull/259) ([alamb](https://github.com/alamb)) +- Move datafusion-cli to new crate [\#231](https://github.com/apache/arrow-datafusion/pull/231) ([Dandandan](https://github.com/Dandandan)) +- Make test join_with_hash_collision deterministic [\#229](https://github.com/apache/arrow-datafusion/pull/229) ([Dandandan](https://github.com/Dandandan)) +- Update arrow-rs deps \(to fix build due to flatbuffers update\) [\#224](https://github.com/apache/arrow-datafusion/pull/224) ([alamb](https://github.com/alamb)) +- Use standard make_null_array for CASE [\#223](https://github.com/apache/arrow-datafusion/pull/223) ([alamb](https://github.com/alamb)) +- update arrow-rs deps to latest master [\#216](https://github.com/apache/arrow-datafusion/pull/216) ([alamb](https://github.com/alamb)) +- MINOR: Remove empty rust dir [\#61](https://github.com/apache/arrow-datafusion/pull/61) ([andygrove](https://github.com/andygrove)) diff --git a/dev/changelog/6.0.0.md b/dev/changelog/6.0.0.md new file mode 100644 index 0000000000000..68ce4802aab2d --- /dev/null +++ b/dev/changelog/6.0.0.md @@ -0,0 +1,204 @@ + + +## [6.0.0](https://github.com/apache/arrow-datafusion/tree/6.0.0) (2021-11-13) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/5.0.0...6.0.0) + +**Breaking changes:** + +- Removed deprecated with_concurrency [\#1200](https://github.com/apache/arrow-datafusion/pull/1200) ([rdettai](https://github.com/rdettai)) +- File partitioning for ListingTable [\#1141](https://github.com/apache/arrow-datafusion/pull/1141) ([rdettai](https://github.com/rdettai)) +- Add function volatility to Signature [\#1071](https://github.com/apache/arrow-datafusion/pull/1071) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([pjmore](https://github.com/pjmore)) +- fix: allow duplicate field names in table join, fix output with duplicated names [\#1023](https://github.com/apache/arrow-datafusion/pull/1023) ([houqp](https://github.com/houqp)) +- Make TableProvider.scan\(\) and PhysicalPlanner::create_physical_plan\(\) async [\#1013](https://github.com/apache/arrow-datafusion/pull/1013) ([rdettai](https://github.com/rdettai)) +- Reorganize table providers by table format [\#1010](https://github.com/apache/arrow-datafusion/pull/1010) ([rdettai](https://github.com/rdettai)) +- Make Metrics::labels\(\) public [\#999](https://github.com/apache/arrow-datafusion/pull/999) ([alamb](https://github.com/alamb)) +- Rename NthValue::{first_value,last_value,nth_value} to satisfy clippy in Rust 1.55 [\#986](https://github.com/apache/arrow-datafusion/pull/986) ([alamb](https://github.com/alamb)) +- Move CBOs and Statistics to physical plan [\#965](https://github.com/apache/arrow-datafusion/pull/965) ([rdettai](https://github.com/rdettai)) +- Update to sqlparser v 0.10.0 [\#934](https://github.com/apache/arrow-datafusion/pull/934) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- FilePartition and PartitionedFile for scanning flexibility [\#932](https://github.com/apache/arrow-datafusion/pull/932) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([yjshen](https://github.com/yjshen)) +- Improve SQLMetric APIs, port existing metrics [\#908](https://github.com/apache/arrow-datafusion/pull/908) ([alamb](https://github.com/alamb)) +- Add support for EXPLAIN ANALYZE [\#858](https://github.com/apache/arrow-datafusion/pull/858) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Rename concurrency to target_partitions [\#706](https://github.com/apache/arrow-datafusion/pull/706) ([andygrove](https://github.com/andygrove)) + +**Implemented enhancements:** + +- Add booleans support to the `CASE` statement [\#1156](https://github.com/apache/arrow-datafusion/issues/1156) +- Implement General Purpose Constant Folding with the Expression Evaluator [\#1070](https://github.com/apache/arrow-datafusion/issues/1070) +- Mark volatility categories of functions [\#1069](https://github.com/apache/arrow-datafusion/issues/1069) +- Add "show" support to DataFrame API [\#937](https://github.com/apache/arrow-datafusion/issues/937) +- Add support for TRIM BOTH/LEADING/TRAILING [\#935](https://github.com/apache/arrow-datafusion/issues/935) +- Add "baseline" metrics to all built in operators [\#866](https://github.com/apache/arrow-datafusion/issues/866) +- Add SQL support for referencing fields in structs [\#119](https://github.com/apache/arrow-datafusion/issues/119) +- add filename completer for create table statement [\#1278](https://github.com/apache/arrow-datafusion/pull/1278) ([Jimexist](https://github.com/Jimexist)) +- Add drop table support [\#1266](https://github.com/apache/arrow-datafusion/pull/1266) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([viirya](https://github.com/viirya)) +- Dataframe supports except and update readme [\#1261](https://github.com/apache/arrow-datafusion/pull/1261) ([xudong963](https://github.com/xudong963)) +- Implement EXCEPT & EXCEPT DISTINCT [\#1259](https://github.com/apache/arrow-datafusion/pull/1259) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Add DataFrame support for `INTERSECT` and update readme [\#1258](https://github.com/apache/arrow-datafusion/pull/1258) ([xudong963](https://github.com/xudong963)) +- use arrow 6.1.0 [\#1255](https://github.com/apache/arrow-datafusion/pull/1255) ([Jimexist](https://github.com/Jimexist)) +- fix 1250, add editor support for datafusion cli with validation [\#1251](https://github.com/apache/arrow-datafusion/pull/1251) ([Jimexist](https://github.com/Jimexist)) +- Add support for `create table as` via MemTable [\#1243](https://github.com/apache/arrow-datafusion/pull/1243) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) +- Add cli show columns command to describe tables [\#1231](https://github.com/apache/arrow-datafusion/pull/1231) ([Jimexist](https://github.com/Jimexist)) +- datafusion-cli to add list table command [\#1229](https://github.com/apache/arrow-datafusion/pull/1229) ([Jimexist](https://github.com/Jimexist)) +- datafusion cli to handle EoF and interrupt signal [\#1225](https://github.com/apache/arrow-datafusion/pull/1225) ([Jimexist](https://github.com/Jimexist)) +- add \q as quit command and add \? for help [\#1224](https://github.com/apache/arrow-datafusion/pull/1224) ([Jimexist](https://github.com/Jimexist)) +- Add algebraic simplifications to constant_folding [\#1208](https://github.com/apache/arrow-datafusion/pull/1208) ([matthewmturner](https://github.com/matthewmturner)) +- Improve GetIndexedFieldExpr adding utf8 key based access for struct v… [\#1204](https://github.com/apache/arrow-datafusion/pull/1204) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Igosuki](https://github.com/Igosuki)) +- Fix `between` in select query [\#1202](https://github.com/apache/arrow-datafusion/pull/1202) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([capkurmagati](https://github.com/capkurmagati)) +- Move code to fold Stable functions like `now()` from `Simplifier` to `ConstEvaluator` [\#1176](https://github.com/apache/arrow-datafusion/pull/1176) ([alamb](https://github.com/alamb)) +- DataFrame supports window function [\#1167](https://github.com/apache/arrow-datafusion/pull/1167) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- add values list expression [\#1165](https://github.com/apache/arrow-datafusion/pull/1165) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) +- Add booleans support to the CASE statement [\#1161](https://github.com/apache/arrow-datafusion/pull/1161) ([xudong963](https://github.com/xudong963)) +- Improve error messages when operations are not supported [\#1158](https://github.com/apache/arrow-datafusion/pull/1158) ([alamb](https://github.com/alamb)) +- Generic constant expression evaluation [\#1153](https://github.com/apache/arrow-datafusion/pull/1153) ([alamb](https://github.com/alamb)) +- python `lit` function to support bool and byte vec [\#1152](https://github.com/apache/arrow-datafusion/pull/1152) ([Jimexist](https://github.com/Jimexist)) +- \[nit\] simplify datafusion optimizer module codes [\#1146](https://github.com/apache/arrow-datafusion/pull/1146) ([panarch](https://github.com/panarch)) +- Add ScalarValue support for arbitrary list elements [\#1142](https://github.com/apache/arrow-datafusion/pull/1142) ([jonmmease](https://github.com/jonmmease)) +- Multiple files per partitions for CSV Avro Json [\#1138](https://github.com/apache/arrow-datafusion/pull/1138) ([rdettai](https://github.com/rdettai)) +- Implement INTERSECT & INTERSECT DISTINCT [\#1135](https://github.com/apache/arrow-datafusion/pull/1135) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Simplify file struct abstractions [\#1120](https://github.com/apache/arrow-datafusion/pull/1120) ([rdettai](https://github.com/rdettai)) +- Implement `is [not] distinct from` [\#1117](https://github.com/apache/arrow-datafusion/pull/1117) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Dandandan](https://github.com/Dandandan)) +- Clean up spawned task on drop for `RepartitionExec`, `SortPreservingMergeExec`, `WindowAggExec` [\#1112](https://github.com/apache/arrow-datafusion/pull/1112) ([crepererum](https://github.com/crepererum)) +- add hyperloglog implementation \(`add` and `count`\) [\#1095](https://github.com/apache/arrow-datafusion/pull/1095) ([Jimexist](https://github.com/Jimexist)) +- Add ScalarValue::Struct variant [\#1091](https://github.com/apache/arrow-datafusion/pull/1091) ([jonmmease](https://github.com/jonmmease)) +- add digest\(utf8, method\) function and refactor all current hash digest functions [\#1090](https://github.com/apache/arrow-datafusion/pull/1090) ([Jimexist](https://github.com/Jimexist)) +- \[crypto\] add `blake3` algorithm to `digest` function [\#1086](https://github.com/apache/arrow-datafusion/pull/1086) ([Jimexist](https://github.com/Jimexist)) +- \[crypto\] add blake2b and blake2s functions [\#1081](https://github.com/apache/arrow-datafusion/pull/1081) ([Jimexist](https://github.com/Jimexist)) +- \[nit\] make schema qualifier error message in field lookup more readable [\#1079](https://github.com/apache/arrow-datafusion/pull/1079) ([Jimexist](https://github.com/Jimexist)) +- \[window function\] add `percent_rank` window function [\#1077](https://github.com/apache/arrow-datafusion/pull/1077) ([Jimexist](https://github.com/Jimexist)) +- \[window function\] add `cume_dist` implementation [\#1076](https://github.com/apache/arrow-datafusion/pull/1076) ([Jimexist](https://github.com/Jimexist)) +- Add a LogicalPlanBuilder::schema\(\) function [\#1075](https://github.com/apache/arrow-datafusion/pull/1075) ([alamb](https://github.com/alamb)) +- Add support for UNION \[DISTINCT\] sql [\#1068](https://github.com/apache/arrow-datafusion/pull/1068) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- fix: fix joins on Float32/Float64 columns bug [\#1054](https://github.com/apache/arrow-datafusion/pull/1054) ([francis-du](https://github.com/francis-du)) +- Update sqlparser-rs to 0.11 [\#1052](https://github.com/apache/arrow-datafusion/pull/1052) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Support querying CSV files without providing the schema [\#1050](https://github.com/apache/arrow-datafusion/pull/1050) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- remove hard coded partition count in ballista logicalplan deserialization [\#1044](https://github.com/apache/arrow-datafusion/pull/1044) ([xudong963](https://github.com/xudong963)) +- feat: add lit_timestamp_nanosecond [\#1030](https://github.com/apache/arrow-datafusion/pull/1030) ([NGA-TRAN](https://github.com/NGA-TRAN)) +- Ignore metadata on schema merge [\#1024](https://github.com/apache/arrow-datafusion/pull/1024) ([Smurphy000](https://github.com/Smurphy000)) +- add ExecutionConfig.with_optimizer_rules [\#1022](https://github.com/apache/arrow-datafusion/pull/1022) ([seddonm1](https://github.com/seddonm1)) +- Add baseline execution stats to `WindowAggExec` and `UnionExec`, and fixup `CoalescePartitionsExec` [\#1018](https://github.com/apache/arrow-datafusion/pull/1018) ([alamb](https://github.com/alamb)) +- Derive PartialOrd for Expr [\#1015](https://github.com/apache/arrow-datafusion/pull/1015) ([alamb](https://github.com/alamb)) +- Indexed field access for List [\#1006](https://github.com/apache/arrow-datafusion/pull/1006) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Igosuki](https://github.com/Igosuki)) +- Add metrics for Limit and Projection, and CoalesceBatches [\#1004](https://github.com/apache/arrow-datafusion/pull/1004) ([alamb](https://github.com/alamb)) +- Update DataFusion to arrow 6.0 [\#984](https://github.com/apache/arrow-datafusion/pull/984) ([alamb](https://github.com/alamb)) +- Implement Display for Expr, improve operator display [\#971](https://github.com/apache/arrow-datafusion/pull/971) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) +- Add metrics for FilterExec [\#960](https://github.com/apache/arrow-datafusion/pull/960) ([alamb](https://github.com/alamb)) +- Change compound column field name rules [\#952](https://github.com/apache/arrow-datafusion/pull/952) ([waynexia](https://github.com/waynexia)) +- ObjectStore API to read from remote storage systems [\#950](https://github.com/apache/arrow-datafusion/pull/950) ([yjshen](https://github.com/yjshen)) +- Add baseline metrics to `SortPreservingMergeExec` [\#948](https://github.com/apache/arrow-datafusion/pull/948) ([alamb](https://github.com/alamb)) +- Add support for TRIM LEADING/TRAILING/BOTH syntax [\#947](https://github.com/apache/arrow-datafusion/pull/947) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([adsharma](https://github.com/adsharma)) +- fixes \#933 replace placeholder fmt_as fr ExecutionPlan impls [\#939](https://github.com/apache/arrow-datafusion/pull/939) ([tiphaineruy](https://github.com/tiphaineruy)) +- Add metrics for SortExect + HashAggregateExec [\#938](https://github.com/apache/arrow-datafusion/pull/938) ([alamb](https://github.com/alamb)) +- Add some additional asserts in `utils::from_plan` [\#930](https://github.com/apache/arrow-datafusion/pull/930) ([alamb](https://github.com/alamb)) +- Avro Table Provider [\#910](https://github.com/apache/arrow-datafusion/pull/910) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Igosuki](https://github.com/Igosuki)) +- Add BaselineMetrics, Timestamp metrics, add for `CoalescePartitionsExec`, rename output_time -\> elapsed_compute [\#909](https://github.com/apache/arrow-datafusion/pull/909) ([alamb](https://github.com/alamb)) +- add cross join support to ballista [\#891](https://github.com/apache/arrow-datafusion/pull/891) ([houqp](https://github.com/houqp)) +- Add Ballista support to DataFusion CLI [\#889](https://github.com/apache/arrow-datafusion/pull/889) ([andygrove](https://github.com/andygrove)) +- support like on DictionaryArray [\#876](https://github.com/apache/arrow-datafusion/pull/876) ([b41sh](https://github.com/b41sh)) +- Register table based on known schema without file IO [\#872](https://github.com/apache/arrow-datafusion/pull/872) ([Dandandan](https://github.com/Dandandan)) +- Add support for PostgreSQL regex match [\#870](https://github.com/apache/arrow-datafusion/pull/870) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) +- Include planning time in datafusion-cli printing [\#860](https://github.com/apache/arrow-datafusion/pull/860) ([Dandandan](https://github.com/Dandandan)) +- Implement basic common subexpression eliminate optimization [\#792](https://github.com/apache/arrow-datafusion/pull/792) ([waynexia](https://github.com/waynexia)) +- Impl `ops::Not` for `expr` [\#763](https://github.com/apache/arrow-datafusion/pull/763) ([Jimexist](https://github.com/Jimexist)) + +**Fixed bugs:** + +- Can not use `between` in the select list: [\#1196](https://github.com/apache/arrow-datafusion/issues/1196) +- ORDER BY does not work with literals: Sort operation is not applicable to scalar value 'foo' [\#1195](https://github.com/apache/arrow-datafusion/issues/1195) +- window functions with NULL literals in `partition by` and `order by` do not work: Internal\("Sort operation is not applicable to scalar value NULL"\) [\#1194](https://github.com/apache/arrow-datafusion/issues/1194) +- Operation name not included in internal errors -- Internal\("Data type Boolean not supported for binary operation on dyn arrays"\) [\#1157](https://github.com/apache/arrow-datafusion/issues/1157) +- Physical plan explain UNION query says "ExecutionPlan\(PlaceHolder\)" [\#933](https://github.com/apache/arrow-datafusion/issues/933) +- Can not use LIKE on DictionaryArray encoded strings [\#815](https://github.com/apache/arrow-datafusion/issues/815) +- physical_plan::repartition::tests::repartition_with_dropping_output_stream failing locally [\#614](https://github.com/apache/arrow-datafusion/issues/614) +- Fix some `BuiltinScalarFunction` panics with zero arguments [\#1249](https://github.com/apache/arrow-datafusion/pull/1249) ([capkurmagati](https://github.com/capkurmagati)) +- fix: not do boolean folding on NULL and/or expr [\#1245](https://github.com/apache/arrow-datafusion/pull/1245) ([NGA-TRAN](https://github.com/NGA-TRAN)) +- ignore case of `with header row` in sql when creating external table [\#1237](https://github.com/apache/arrow-datafusion/pull/1237) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([lichuan6](https://github.com/lichuan6)) +- fix: Min/Max aggregation data type should not be dictionary [\#1235](https://github.com/apache/arrow-datafusion/pull/1235) ([NGA-TRAN](https://github.com/NGA-TRAN)) +- Fix build with `--no-default-features` [\#1219](https://github.com/apache/arrow-datafusion/pull/1219) ([alamb](https://github.com/alamb)) +- Prevent "future cannot be sent between threads safely" compilation error [\#1155](https://github.com/apache/arrow-datafusion/pull/1155) ([jonmmease](https://github.com/jonmmease)) +- Clean up spawned task on drop for `AnalyzeExec`, `CoalescePartitionsExec`, `HashAggregateExec` [\#1121](https://github.com/apache/arrow-datafusion/pull/1121) ([crepererum](https://github.com/crepererum)) +- Clean up spawned task on `SortStream` drop [\#1105](https://github.com/apache/arrow-datafusion/pull/1105) ([crepererum](https://github.com/crepererum)) +- fix UNION ALL bug: thread 'main' panicked at 'index out of bounds: the len is 1 but the index is 1', ./src/datatypes/schema.rs:165:10 [\#1088](https://github.com/apache/arrow-datafusion/pull/1088) ([xudong963](https://github.com/xudong963)) +- python: fix generated table name in dataframe creation [\#1078](https://github.com/apache/arrow-datafusion/pull/1078) ([houqp](https://github.com/houqp)) +- fix subquery alias [\#1067](https://github.com/apache/arrow-datafusion/pull/1067) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- fix pattern handling in regexp_match function [\#1065](https://github.com/apache/arrow-datafusion/pull/1065) ([houqp](https://github.com/houqp)) +- fix: joins on Timestamp columns [\#1055](https://github.com/apache/arrow-datafusion/pull/1055) ([francis-du](https://github.com/francis-du)) +- Fix metric name typo [\#943](https://github.com/apache/arrow-datafusion/pull/943) ([alamb](https://github.com/alamb)) +- EXPLAIN ANALYZE should run all Optimizer passes [\#929](https://github.com/apache/arrow-datafusion/pull/929) ([alamb](https://github.com/alamb)) + +**Documentation updates:** + +- update docs to fix DataFusion User Guide link [\#1238](https://github.com/apache/arrow-datafusion/pull/1238) ([jiangzhx](https://github.com/jiangzhx)) +- \[docs\] datafusion cli run via homebrew [\#1198](https://github.com/apache/arrow-datafusion/pull/1198) ([Jimexist](https://github.com/Jimexist)) +- add support for unary and binary values in values list, update docs [\#1172](https://github.com/apache/arrow-datafusion/pull/1172) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) +- Add additional docstring comments to `from_plan` [\#1168](https://github.com/apache/arrow-datafusion/pull/1168) ([alamb](https://github.com/alamb)) +- \[nit\] fix document issue for `approx_distinct` [\#1110](https://github.com/apache/arrow-datafusion/pull/1110) ([Jimexist](https://github.com/Jimexist)) +- implement `approx_distinct` function using HyperLogLog [\#1087](https://github.com/apache/arrow-datafusion/pull/1087) ([Jimexist](https://github.com/Jimexist)) +- Remove unused `use` statements from examples [\#1032](https://github.com/apache/arrow-datafusion/pull/1032) ([alamb](https://github.com/alamb)) +- consolidate datafusion docs with sphinx [\#993](https://github.com/apache/arrow-datafusion/pull/993) ([houqp](https://github.com/houqp)) +- Updated user-guide library docs with optimized config [\#976](https://github.com/apache/arrow-datafusion/pull/976) ([matthewmturner](https://github.com/matthewmturner)) +- Improve User Guide [\#954](https://github.com/apache/arrow-datafusion/pull/954) ([andygrove](https://github.com/andygrove)) +- \[MINOR\] Fix typos in doc comments [\#945](https://github.com/apache/arrow-datafusion/pull/945) ([alamb](https://github.com/alamb)) +- \[DataFusion\] - Add show and show_limit function for DataFrame [\#923](https://github.com/apache/arrow-datafusion/pull/923) ([francis-du](https://github.com/francis-du)) +- Typo fix in DataFusion crate documentation [\#914](https://github.com/apache/arrow-datafusion/pull/914) ([antoinewdg](https://github.com/antoinewdg)) + +**Performance improvements:** + +- Improve avro reader performance by avoiding some cloning on avro_rs::Value [\#1206](https://github.com/apache/arrow-datafusion/pull/1206) ([Igosuki](https://github.com/Igosuki)) +- optimize build profile for datafusion python binding, cli and ballista [\#1137](https://github.com/apache/arrow-datafusion/pull/1137) ([houqp](https://github.com/houqp)) +- Avoid stack overflow by reducing stack usage of `BinaryExpr::evaluate` in debug builds [\#1047](https://github.com/apache/arrow-datafusion/pull/1047) ([alamb](https://github.com/alamb)) +- Add ScalarValue::eq_array optimized comparison function [\#844](https://github.com/apache/arrow-datafusion/pull/844) ([alamb](https://github.com/alamb)) +- Rework GroupByHash to for faster performance and support grouping by nulls [\#808](https://github.com/apache/arrow-datafusion/pull/808) ([alamb](https://github.com/alamb)) + +**Closed issues:** + +- InList expr with NULL literals do not work [\#1190](https://github.com/apache/arrow-datafusion/issues/1190) +- update the homepage README to include values, `approx_distinct`, etc. [\#1171](https://github.com/apache/arrow-datafusion/issues/1171) +- \[Python\]: Inconsistencies with Python package name [\#1011](https://github.com/apache/arrow-datafusion/issues/1011) +- Wanting to contribute to project where to start? [\#983](https://github.com/apache/arrow-datafusion/issues/983) +- delete redundant code [\#973](https://github.com/apache/arrow-datafusion/issues/973) +- How to build DataFusion python wheel [\#853](https://github.com/apache/arrow-datafusion/issues/853) +- Add support for partition pruning [\#204](https://github.com/apache/arrow-datafusion/issues/204) +- \[Datafusion\] Support joins on TimestampMillisecond columns [\#187](https://github.com/apache/arrow-datafusion/issues/187) +- TPC-H Query 21 [\#173](https://github.com/apache/arrow-datafusion/issues/173) +- TPC-H Query 13 [\#164](https://github.com/apache/arrow-datafusion/issues/164) +- TPC-H Query 8 [\#162](https://github.com/apache/arrow-datafusion/issues/162) +- implement split_part\(string, delimiter, position\) [\#157](https://github.com/apache/arrow-datafusion/issues/157) +- Join Statement: Schema contains duplicate unqualified field name [\#155](https://github.com/apache/arrow-datafusion/issues/155) +- ParquetTable should avoid scanning all files twice [\#136](https://github.com/apache/arrow-datafusion/issues/136) +- Add support for reading partitioned Parquet files [\#133](https://github.com/apache/arrow-datafusion/issues/133) +- Add support for Parquet schema merging [\#132](https://github.com/apache/arrow-datafusion/issues/132) +- Catalog abstraction [\#126](https://github.com/apache/arrow-datafusion/issues/126) +- Optimizer rules should work with qualified column names [\#125](https://github.com/apache/arrow-datafusion/issues/125) +- Add optional qualifier to Expr::Column [\#121](https://github.com/apache/arrow-datafusion/issues/121) +- Implement modulus expression [\#99](https://github.com/apache/arrow-datafusion/issues/99) +- \[Rust\] Add constant folding to expressions during logically planning [\#98](https://github.com/apache/arrow-datafusion/issues/98) +- \[Rust\] Implement pretty print for physical query plan [\#93](https://github.com/apache/arrow-datafusion/issues/93) +- Can not group by boolean columns \(add boolean to valid keys of groupBy\) [\#91](https://github.com/apache/arrow-datafusion/issues/91) +- improve performance of building literal arrays [\#90](https://github.com/apache/arrow-datafusion/issues/90) +- \[rust\]\[datafusion\] optimize count\(\*\) queries on parquet sources [\#89](https://github.com/apache/arrow-datafusion/issues/89) +- Produce a design for a metrics framework [\#21](https://github.com/apache/arrow-datafusion/issues/21) + +**Merged pull requests:** + +- Add timezome string to stablize test [\#1265](https://github.com/apache/arrow-datafusion/pull/1265) ([viirya](https://github.com/viirya)) +- numerical_coercion pattern match optimize [\#1256](https://github.com/apache/arrow-datafusion/pull/1256) ([Jimexist](https://github.com/Jimexist)) +- fix and update window function sql tests [\#1059](https://github.com/apache/arrow-datafusion/pull/1059) ([Jimexist](https://github.com/Jimexist)) +- reduce ScalarValue from trait boilerplate with macro [\#989](https://github.com/apache/arrow-datafusion/pull/989) ([houqp](https://github.com/houqp)) diff --git a/dev/changelog/7.0.0.md b/dev/changelog/7.0.0.md new file mode 100644 index 0000000000000..e63c2a4455c9a --- /dev/null +++ b/dev/changelog/7.0.0.md @@ -0,0 +1,310 @@ + + +## [7.0.0](https://github.com/apache/arrow-datafusion/tree/7.0.0) (2022-02-14) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/6.0.0...7.0.0) + +**Breaking changes:** + +- Consolidate various configurations options, remove unrelated `batch_size` [\#1565](https://github.com/apache/arrow-datafusion/issues/1565) +- Extract logical plans in LogicalPlan as independent struct [\#1228](https://github.com/apache/arrow-datafusion/issues/1228) +- Update `ExecutionPlan` to know about sortedness and repartitioning optimizer pass respect the invariants [\#1776](https://github.com/apache/arrow-datafusion/pull/1776) ([alamb](https://github.com/alamb)) +- Update to `arrow 8.0.0` [\#1673](https://github.com/apache/arrow-datafusion/pull/1673) ([alamb](https://github.com/alamb)) +- Remove non idiomatic `DataFusionError::into_arrow_external_error` in favor of From conversion [\#1645](https://github.com/apache/arrow-datafusion/pull/1645) ([alamb](https://github.com/alamb)) +- Remove `Accumulator::update` and `Accumulator::merge` [\#1582](https://github.com/apache/arrow-datafusion/pull/1582) ([Jimexist](https://github.com/Jimexist)) +- implement `Hash` for various types and replace `PartialOrd` [\#1580](https://github.com/apache/arrow-datafusion/pull/1580) ([Jimexist](https://github.com/Jimexist)) +- Replace `DatafusionError` with `GenericError` in `ObjectStore` interface [\#1541](https://github.com/apache/arrow-datafusion/pull/1541) ([matthewmturner](https://github.com/matthewmturner)) +- Make `FLOAT` SQL type map to `Float32` rather than `Float64` [\#1423](https://github.com/apache/arrow-datafusion/pull/1423) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Map `REAL` SQL type to `Float32` rather than `Float64` to be consistent with pg [\#1390](https://github.com/apache/arrow-datafusion/pull/1390) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([hntd187](https://github.com/hntd187)) + +**Implemented enhancements:** + +- Create new `datafusion_expr` crate [\#1753](https://github.com/apache/arrow-datafusion/issues/1753) +- Create new `datafusion_common` crate [\#1752](https://github.com/apache/arrow-datafusion/issues/1752) +- API to get Expr's type and nullability without a `DFSchema` [\#1725](https://github.com/apache/arrow-datafusion/issues/1725) +- Cleaner API to create `Expr::ScalarFunction` programatically [\#1718](https://github.com/apache/arrow-datafusion/issues/1718) +- Introduce a `Vec` based row-wise representation for DataFusion [\#1708](https://github.com/apache/arrow-datafusion/issues/1708) +- Simplify creating new `ListingTable` [\#1705](https://github.com/apache/arrow-datafusion/issues/1705) +- Implement TableProvider for DataFrameImpl to allow registration of logical plans [\#1698](https://github.com/apache/arrow-datafusion/issues/1698) +- Public Expr simplification API [\#1694](https://github.com/apache/arrow-datafusion/issues/1694) +- Query Optimizer: Add OUTER --\> INNER join conversion [\#1670](https://github.com/apache/arrow-datafusion/issues/1670) +- Support reading from CSV, Avro and Json files that have mergeable/compatible, but not identical schemas [\#1669](https://github.com/apache/arrow-datafusion/issues/1669) +- Remove `DataFusionError::into_arrow_external_error` in favor of `From` conversion [\#1644](https://github.com/apache/arrow-datafusion/issues/1644) +- Include join type in display implementation for logical plan [\#1620](https://github.com/apache/arrow-datafusion/issues/1620) +- Switch datafusion to using `eq_dyn_scalar`, etc kernels [\#1610](https://github.com/apache/arrow-datafusion/issues/1610) +- Proposal: Remove `Accumulator::update` and `Accumulator::merge` [\#1549](https://github.com/apache/arrow-datafusion/issues/1549) +- Replace DataFusionError/Result with impl Error for ObjectStore and Reader [\#1540](https://github.com/apache/arrow-datafusion/issues/1540) +- Add `approx_quantile` support [\#1538](https://github.com/apache/arrow-datafusion/issues/1538) +- support sorting decimal data type [\#1522](https://github.com/apache/arrow-datafusion/issues/1522) +- Keep all datafusion's packages up to date with Dependabot [\#1472](https://github.com/apache/arrow-datafusion/issues/1472) +- ExecutionContext support init ExecutionContextState with `new(state: Arc>)` method [\#1439](https://github.com/apache/arrow-datafusion/issues/1439) +- support the decimal scalar value [\#1393](https://github.com/apache/arrow-datafusion/issues/1393) +- Documentation for using scalar functions with the the DataFrame API [\#1364](https://github.com/apache/arrow-datafusion/issues/1364) +- Support `boolean == boolean` and `boolean != boolean` operators [\#1159](https://github.com/apache/arrow-datafusion/issues/1159) +- Support DataType::Decimal\(15, 2\) in TPC-H benchmark [\#174](https://github.com/apache/arrow-datafusion/issues/174) +- Make `MemoryStream` public [\#150](https://github.com/apache/arrow-datafusion/issues/150) +- Add support for Parquet schema merging [\#132](https://github.com/apache/arrow-datafusion/issues/132) +- Add SQL support for IN expression [\#118](https://github.com/apache/arrow-datafusion/issues/118) +- Add logging to datafusion-cli [\#1789](https://github.com/apache/arrow-datafusion/pull/1789) ([alamb](https://github.com/alamb)) +- Add `approx_median()` aggregate function [\#1729](https://github.com/apache/arrow-datafusion/pull/1729) ([realno](https://github.com/realno)) +- Add join type for logical plan display [\#1674](https://github.com/apache/arrow-datafusion/pull/1674) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Fix null comparison for Parquet pruning predicate [\#1595](https://github.com/apache/arrow-datafusion/pull/1595) ([viirya](https://github.com/viirya)) +- Add `corr` aggregate function [\#1561](https://github.com/apache/arrow-datafusion/pull/1561) ([realno](https://github.com/realno)) +- Add `covar`, `covar_pop` and `covar_samp` aggregate functions [\#1551](https://github.com/apache/arrow-datafusion/pull/1551) ([realno](https://github.com/realno)) +- Add `approx_quantile()` aggregation function [\#1539](https://github.com/apache/arrow-datafusion/pull/1539) ([domodwyer](https://github.com/domodwyer)) +- Initial MemoryManager and DiskManager APIs for query execution + External Sort implementation [\#1526](https://github.com/apache/arrow-datafusion/pull/1526) ([yjshen](https://github.com/yjshen)) +- Add `stddev` and `variance` [\#1525](https://github.com/apache/arrow-datafusion/pull/1525) ([realno](https://github.com/realno)) +- Add `rem` operation for Expr [\#1467](https://github.com/apache/arrow-datafusion/pull/1467) ([liukun4515](https://github.com/liukun4515)) +- support decimal data type in create table [\#1431](https://github.com/apache/arrow-datafusion/pull/1431) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Ordering by index in select expression [\#1419](https://github.com/apache/arrow-datafusion/pull/1419) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([hntd187](https://github.com/hntd187)) +- Add support for `ORDER BY` on unprojected columns [\#1415](https://github.com/apache/arrow-datafusion/pull/1415) ([viirya](https://github.com/viirya)) +- Support decimal for `min` and `max` aggregate [\#1407](https://github.com/apache/arrow-datafusion/pull/1407) ([liukun4515](https://github.com/liukun4515)) +- Consolidate `ConstantFolding` and `SimplifyExpression` [\#1375](https://github.com/apache/arrow-datafusion/pull/1375) ([alamb](https://github.com/alamb)) +- Datafusion cli quiet mode command to contain option bool [\#1345](https://github.com/apache/arrow-datafusion/pull/1345) ([Jimexist](https://github.com/Jimexist)) +- Implement `array_agg` aggregate function [\#1300](https://github.com/apache/arrow-datafusion/pull/1300) ([viirya](https://github.com/viirya)) +- Add a command to switch output format in cli [\#1284](https://github.com/apache/arrow-datafusion/pull/1284) ([capkurmagati](https://github.com/capkurmagati)) +- Support `=`, `<`, `<=`, `>`, `>=`, `!=`, `is distinct from`, `is not distinct from` for `BooleanArray` [\#1163](https://github.com/apache/arrow-datafusion/pull/1163) ([alamb](https://github.com/alamb)) + +**Fixed bugs:** + +- Unsupported data type in hasher: Timestamp\(Second, None\) [\#1768](https://github.com/apache/arrow-datafusion/issues/1768) +- SQL column identifiers should be converted to lowercase when unquoted [\#1746](https://github.com/apache/arrow-datafusion/issues/1746) +- Data type Dictionary\(Int32, Utf8\) not supported for binary operation 'eq' on dyn arrays [\#1605](https://github.com/apache/arrow-datafusion/issues/1605) +- datafusion doesn't process predicate pushdown correctly when there is outer join [\#1586](https://github.com/apache/arrow-datafusion/issues/1586) +- casting `Int64` to `Float64` unsuccessfully caused tpch8 to fail [\#1576](https://github.com/apache/arrow-datafusion/issues/1576) +- CTE/WITH .. UNION ALL confuses name resolution in WHERE [\#1509](https://github.com/apache/arrow-datafusion/issues/1509) +- ORDER BY min\(x\) results in error `Plan("No field named 'foo.x'. Valid fields are 'MIN(foo.x)'.")` [\#1479](https://github.com/apache/arrow-datafusion/issues/1479) +- Sort discards field metadata on the output schema [\#1476](https://github.com/apache/arrow-datafusion/issues/1476) +- Datafusion should not strip out timezone information from existing types [\#1454](https://github.com/apache/arrow-datafusion/issues/1454) +- Error on some queries: "column types must match schema types, expected XXX but found YYY" [\#1447](https://github.com/apache/arrow-datafusion/issues/1447) +- Query failing to return any results when filter is an equality check on strings \(bad statistics in parquet\) [\#1433](https://github.com/apache/arrow-datafusion/issues/1433) +- Field names containing period such as `f.c1` cannot be named in SQL query [\#1432](https://github.com/apache/arrow-datafusion/issues/1432) +- `Select *` returns an unexpected result [\#1412](https://github.com/apache/arrow-datafusion/issues/1412) +- Turn off unused default features of chrono and ahash [\#1398](https://github.com/apache/arrow-datafusion/issues/1398) +- real data type is float32 in PG database, but in the datafusion it is as float64 [\#1380](https://github.com/apache/arrow-datafusion/issues/1380) +- TPC-H q10 performance regression \(expression for filter with added alias is not pushed down\) [\#1367](https://github.com/apache/arrow-datafusion/issues/1367) +- ProjectionExec Loses Field Metadata [\#1361](https://github.com/apache/arrow-datafusion/issues/1361) +- Support Filter on unprojected columns [\#1351](https://github.com/apache/arrow-datafusion/issues/1351) +- NULLS ORDER is inconsistent with postgres [\#1343](https://github.com/apache/arrow-datafusion/issues/1343) +- Fix bug while merging `RecordBatch`, add `SortPreservingMerge` fuzz tester [\#1678](https://github.com/apache/arrow-datafusion/pull/1678) ([alamb](https://github.com/alamb)) +- fix a cte block with same name for many times [\#1639](https://github.com/apache/arrow-datafusion/pull/1639) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- fix: casting Int64 to Float64 unsuccessfully caused tpch8 to fail [\#1601](https://github.com/apache/arrow-datafusion/pull/1601) ([xudong963](https://github.com/xudong963)) +- Fix single_distinct_to_groupby for arbitrary expressions [\#1519](https://github.com/apache/arrow-datafusion/pull/1519) ([james727](https://github.com/james727)) +- Fix SortExec discards field metadata on the output schema [\#1477](https://github.com/apache/arrow-datafusion/pull/1477) ([alamb](https://github.com/alamb)) +- fix calculate in many_to_many_hash_partition test. [\#1463](https://github.com/apache/arrow-datafusion/pull/1463) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add Timezone to Scalar::Time\* types, and better timezone awareness to Datafusion's time types [\#1455](https://github.com/apache/arrow-datafusion/pull/1455) ([maxburke](https://github.com/maxburke)) +- Support identifiers with `.` in them [\#1449](https://github.com/apache/arrow-datafusion/pull/1449) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fixes for working with functions in dataframes, additional documentation [\#1430](https://github.com/apache/arrow-datafusion/pull/1430) ([tobyhede](https://github.com/tobyhede)) +- \[Minor\] Fix `send_time` metric for hash-repartition [\#1421](https://github.com/apache/arrow-datafusion/pull/1421) ([Dandandan](https://github.com/Dandandan)) +- fix: Select \* returns an unexpected result [\#1413](https://github.com/apache/arrow-datafusion/pull/1413) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Make cli handle multiple whitespaces [\#1388](https://github.com/apache/arrow-datafusion/pull/1388) ([capkurmagati](https://github.com/capkurmagati)) +- Metadata is kept in projections for non-derived columns [\#1378](https://github.com/apache/arrow-datafusion/pull/1378) ([hntd187](https://github.com/hntd187)) +- Fix Predicate Pushdown: split_members should be able to split aliased predicate [\#1368](https://github.com/apache/arrow-datafusion/pull/1368) ([viirya](https://github.com/viirya)) +- Change the arg names and make parameters more meaningful [\#1357](https://github.com/apache/arrow-datafusion/pull/1357) ([liukun4515](https://github.com/liukun4515)) +- collect table stats by default for listing table [\#1347](https://github.com/apache/arrow-datafusion/pull/1347) ([houqp](https://github.com/houqp)) +- fix: make nulls-order consistent with postgres [\#1344](https://github.com/apache/arrow-datafusion/pull/1344) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Avoid changing expression names during constant folding [\#1319](https://github.com/apache/arrow-datafusion/pull/1319) ([viirya](https://github.com/viirya)) +- improve error message for invalid create table statement [\#1294](https://github.com/apache/arrow-datafusion/pull/1294) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([houqp](https://github.com/houqp)) +- Forbid creating the table with the same name [\#1288](https://github.com/apache/arrow-datafusion/pull/1288) ([liukun4515](https://github.com/liukun4515)) + +**Documentation updates:** + +- Clarify docs about `Accumulator::update` and `Accumulator::update_batch` [\#1542](https://github.com/apache/arrow-datafusion/pull/1542) ([alamb](https://github.com/alamb)) +- Fix duplicated `cargo run --example parquet_sql` [\#1482](https://github.com/apache/arrow-datafusion/pull/1482) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) +- add documentation to Datafusion cli's new commands [\#1348](https://github.com/apache/arrow-datafusion/pull/1348) ([liukun4515](https://github.com/liukun4515)) +- fix some clippy warnings from nightly channel [\#1277](https://github.com/apache/arrow-datafusion/pull/1277) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) + +**Performance improvements:** + +- Parquet pruning predicate for `IS NULL` [\#1591](https://github.com/apache/arrow-datafusion/issues/1591) +- Fix predicate pushdown for outer joins [\#1618](https://github.com/apache/arrow-datafusion/pull/1618) ([james727](https://github.com/james727)) +- fix: sql planner creates cross join instead of inner join from select predicates [\#1566](https://github.com/apache/arrow-datafusion/pull/1566) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Split fetch_metadata into fetch_statistics and fetch_schema [\#1365](https://github.com/apache/arrow-datafusion/pull/1365) ([Dandandan](https://github.com/Dandandan)) +- Optimize the performance queries with a single distinct aggregate [\#1315](https://github.com/apache/arrow-datafusion/pull/1315) ([ic4y](https://github.com/ic4y)) +- Left join could use bitmap for left join instead of Vec\ [\#1291](https://github.com/apache/arrow-datafusion/pull/1291) ([boazberman](https://github.com/boazberman)) + +**Closed issues:** + +- Add `release compile` to CI [\#1728](https://github.com/apache/arrow-datafusion/issues/1728) +- DiskManager and TempFiles getting created several times per query [\#1690](https://github.com/apache/arrow-datafusion/issues/1690) +- Add a test for the `pyarrow` feature in CI [\#1635](https://github.com/apache/arrow-datafusion/issues/1635) +- SQL tests for when sorting exceeded available memory and had to spill to disk [\#1573](https://github.com/apache/arrow-datafusion/issues/1573) +- Consolidate the N-way merging code and `SortPreservingMergeStream` \(which has quite good tests of what is often quite tricky code, and it will be performance critical\) [\#1572](https://github.com/apache/arrow-datafusion/issues/1572) +- Consolidate the `SortExec` code \(so there is only a single sort operator that does in memory sorting if it has enough memory budget but then spills to disk if needed\). [\#1571](https://github.com/apache/arrow-datafusion/issues/1571) +- Track memory usage in Non Limited Operators [\#1569](https://github.com/apache/arrow-datafusion/issues/1569) +- \[Question\] Why does ballista store tables in the client instead of in the SchedulerServer [\#1473](https://github.com/apache/arrow-datafusion/issues/1473) +- Consolidate Projection for Schema and RecordBatch [\#1425](https://github.com/apache/arrow-datafusion/issues/1425) +- Support Sort on unprojected columns [\#1372](https://github.com/apache/arrow-datafusion/issues/1372) +- Unused code in hash_aggregate [\#1362](https://github.com/apache/arrow-datafusion/issues/1362) +- Why use the expr types before coercion to get the result type? [\#1358](https://github.com/apache/arrow-datafusion/issues/1358) +- A problem about the projection_push_down optimizer gathers valid columns [\#1312](https://github.com/apache/arrow-datafusion/issues/1312) +- apply constant folding to `LogicalPlan::Values` [\#1170](https://github.com/apache/arrow-datafusion/issues/1170) +- reduce usage of `IntoIterator` in logical plan builder window fn [\#372](https://github.com/apache/arrow-datafusion/issues/372) +- Why does DataFusion throw a Tokio 0.2 runtime error? [\#176](https://github.com/apache/arrow-datafusion/issues/176) +- TPC-H Query 14 [\#165](https://github.com/apache/arrow-datafusion/issues/165) +- Length kernel returns bytes not character length [\#156](https://github.com/apache/arrow-datafusion/issues/156) +- Split the logical operators out into separate source files [\#115](https://github.com/apache/arrow-datafusion/issues/115) + +**Merged pull requests:** + +- Fixup some doc warnings [\#1811](https://github.com/apache/arrow-datafusion/pull/1811) ([alamb](https://github.com/alamb)) +- Ensure most of links in docs are correct [\#1808](https://github.com/apache/arrow-datafusion/pull/1808) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Update CHANGELOG.md, update release scripts [\#1807](https://github.com/apache/arrow-datafusion/pull/1807) ([alamb](https://github.com/alamb)) +- Update versions for split crates [\#1803](https://github.com/apache/arrow-datafusion/pull/1803) ([matthewmturner](https://github.com/matthewmturner)) +- Improve the error message and UX of tpch benchmark program [\#1800](https://github.com/apache/arrow-datafusion/pull/1800) ([alamb](https://github.com/alamb)) +- rename references of expr in logical plan module after datafusion-expr split [\#1797](https://github.com/apache/arrow-datafusion/pull/1797) ([Jimexist](https://github.com/Jimexist)) +- Update to sqlparser 0.14 [\#1796](https://github.com/apache/arrow-datafusion/pull/1796) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- \[split/13\] move rest of expr to expr_fn in datafusion-expr module [\#1794](https://github.com/apache/arrow-datafusion/pull/1794) ([Jimexist](https://github.com/Jimexist)) +- Update datafusion versions [\#1793](https://github.com/apache/arrow-datafusion/pull/1793) ([matthewmturner](https://github.com/matthewmturner)) +- Less verbose plans in debug logging [\#1787](https://github.com/apache/arrow-datafusion/pull/1787) ([alamb](https://github.com/alamb)) +- \[split/11\] split expr type and null info to be expr-schemable [\#1784](https://github.com/apache/arrow-datafusion/pull/1784) ([Jimexist](https://github.com/Jimexist)) +- Introduce `Row` format backed by raw bytes [\#1782](https://github.com/apache/arrow-datafusion/pull/1782) ([yjshen](https://github.com/yjshen)) +- rewrite predicates before pushing to union inputs [\#1781](https://github.com/apache/arrow-datafusion/pull/1781) ([korowa](https://github.com/korowa)) +- Update datafusion to use arrow 9.0.0 [\#1775](https://github.com/apache/arrow-datafusion/pull/1775) ([alamb](https://github.com/alamb)) +- \[split/10\] split up expr for rewriting, visiting, and simplification traits [\#1774](https://github.com/apache/arrow-datafusion/pull/1774) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) +- \#1768 Support TimeUnit::Second in hasher [\#1769](https://github.com/apache/arrow-datafusion/pull/1769) ([jychen7](https://github.com/jychen7)) +- TPC-H benchmark can optionally write JSON output file with benchmark summary [\#1766](https://github.com/apache/arrow-datafusion/pull/1766) ([andygrove](https://github.com/andygrove)) +- \[split/8\] move `Accumulator` and `ColumnarValue` to datafusion-expr [\#1765](https://github.com/apache/arrow-datafusion/pull/1765) ([Jimexist](https://github.com/Jimexist)) +- \[split/7\] move built-in scalar function to datafusion-expr [\#1764](https://github.com/apache/arrow-datafusion/pull/1764) ([Jimexist](https://github.com/Jimexist)) +- \[split/6\] move signature, type signature, volatility to datafusion-expr [\#1763](https://github.com/apache/arrow-datafusion/pull/1763) ([Jimexist](https://github.com/Jimexist)) +- \[split/9+12\] move udf, udaf, `Expr` to datafusion-expr module [\#1762](https://github.com/apache/arrow-datafusion/pull/1762) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) +- \[split/5\] move window frame and operator to datafusion-expr module [\#1761](https://github.com/apache/arrow-datafusion/pull/1761) ([Jimexist](https://github.com/Jimexist)) +- \[split/4\] move scalar value to datafusion-common [\#1760](https://github.com/apache/arrow-datafusion/pull/1760) ([Jimexist](https://github.com/Jimexist)) +- \[split/3\] split datafusion expr module and move aggregate and window function expr [\#1759](https://github.com/apache/arrow-datafusion/pull/1759) ([Jimexist](https://github.com/Jimexist)) +- \[split/2\] move column and dfschema to datafusion-common module [\#1758](https://github.com/apache/arrow-datafusion/pull/1758) ([Jimexist](https://github.com/Jimexist)) +- Use ordered-float 2.10 [\#1756](https://github.com/apache/arrow-datafusion/pull/1756) ([andygrove](https://github.com/andygrove)) +- \[split/1\] split datafusion-common module [\#1751](https://github.com/apache/arrow-datafusion/pull/1751) ([Jimexist](https://github.com/Jimexist)) +- use clap 3 style args parsing for datafusion cli [\#1749](https://github.com/apache/arrow-datafusion/pull/1749) ([Jimexist](https://github.com/Jimexist)) +- fix: Case insensitive unquoted identifiers in SQL [\#1747](https://github.com/apache/arrow-datafusion/pull/1747) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mkmik](https://github.com/mkmik)) +- Move more tests out of context.rs [\#1743](https://github.com/apache/arrow-datafusion/pull/1743) ([alamb](https://github.com/alamb)) +- Move optimize test out of context.rs [\#1742](https://github.com/apache/arrow-datafusion/pull/1742) ([alamb](https://github.com/alamb)) +- Fix typos in crate documentation [\#1739](https://github.com/apache/arrow-datafusion/pull/1739) ([r4ntix](https://github.com/r4ntix)) +- add `cargo check --release` to ci [\#1737](https://github.com/apache/arrow-datafusion/pull/1737) ([xudong963](https://github.com/xudong963)) +- Update parking_lot requirement from 0.11 to 0.12 [\#1735](https://github.com/apache/arrow-datafusion/pull/1735) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Create built-in scalar functions programmatically [\#1734](https://github.com/apache/arrow-datafusion/pull/1734) ([HaoYang670](https://github.com/HaoYang670)) +- Prevent repartitioning of certain operator's direct children \(\#1731\) [\#1732](https://github.com/apache/arrow-datafusion/pull/1732) ([tustvold](https://github.com/tustvold)) +- API to get Expr's type and nullability without a `DFSchema` [\#1726](https://github.com/apache/arrow-datafusion/pull/1726) ([alamb](https://github.com/alamb)) +- minor: fix `cargo run --release` error [\#1723](https://github.com/apache/arrow-datafusion/pull/1723) ([xudong963](https://github.com/xudong963)) +- substitute `parking_lot::Mutex` for `std::sync::Mutex` [\#1720](https://github.com/apache/arrow-datafusion/pull/1720) ([xudong963](https://github.com/xudong963)) +- Convert boolean case expressions to boolean logic [\#1719](https://github.com/apache/arrow-datafusion/pull/1719) ([tustvold](https://github.com/tustvold)) +- Add Expression Simplification API [\#1717](https://github.com/apache/arrow-datafusion/pull/1717) ([alamb](https://github.com/alamb)) +- Create ListingTableConfig which includes file format and schema inference [\#1715](https://github.com/apache/arrow-datafusion/pull/1715) ([matthewmturner](https://github.com/matthewmturner)) +- make `select_to_plan` clearer [\#1714](https://github.com/apache/arrow-datafusion/pull/1714) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Add upper bound for public function `signature` [\#1713](https://github.com/apache/arrow-datafusion/pull/1713) ([HaoYang670](https://github.com/HaoYang670)) +- Add tests and CI for optional pyarrow module [\#1711](https://github.com/apache/arrow-datafusion/pull/1711) ([wjones127](https://github.com/wjones127)) +- Create SchemaAdapter trait to map table schema to file schemas [\#1709](https://github.com/apache/arrow-datafusion/pull/1709) ([thinkharderdev](https://github.com/thinkharderdev)) +- refine test in repartition.rs & coalesce_batches.rs [\#1707](https://github.com/apache/arrow-datafusion/pull/1707) ([xudong963](https://github.com/xudong963)) +- Fuzz test for spillable sort [\#1706](https://github.com/apache/arrow-datafusion/pull/1706) ([yjshen](https://github.com/yjshen)) +- Support `create_physical_expr` and `ExecutionContextState` or `DefaultPhysicalPlanner` for faster speed [\#1700](https://github.com/apache/arrow-datafusion/pull/1700) ([alamb](https://github.com/alamb)) +- Implement TableProvider for DataFrameImpl [\#1699](https://github.com/apache/arrow-datafusion/pull/1699) ([cpcloud](https://github.com/cpcloud)) +- Move timestamp related tests out of context.rs and into sql integration test [\#1696](https://github.com/apache/arrow-datafusion/pull/1696) ([alamb](https://github.com/alamb)) +- Lazy TempDir creation in DiskManager [\#1695](https://github.com/apache/arrow-datafusion/pull/1695) ([alamb](https://github.com/alamb)) +- Add `MemTrackingMetrics` to ease memory tracking for non-limited memory consumers [\#1691](https://github.com/apache/arrow-datafusion/pull/1691) ([yjshen](https://github.com/yjshen)) +- \(minor\) Reduce memory manager and disk manager logs from `info!` to `debug!` [\#1689](https://github.com/apache/arrow-datafusion/pull/1689) ([alamb](https://github.com/alamb)) +- Make `SortPreservingMergeStream` stable on input stream order [\#1687](https://github.com/apache/arrow-datafusion/pull/1687) ([alamb](https://github.com/alamb)) +- Incorporate dyn scalar kernels [\#1685](https://github.com/apache/arrow-datafusion/pull/1685) ([matthewmturner](https://github.com/matthewmturner)) +- Move `information_schema` tests out of execution/context.rs to `sql_integration` tests [\#1684](https://github.com/apache/arrow-datafusion/pull/1684) ([alamb](https://github.com/alamb)) +- Add a new metric type: `Gauge` + `CurrentMemoryUsage` to metrics [\#1682](https://github.com/apache/arrow-datafusion/pull/1682) ([yjshen](https://github.com/yjshen)) +- refactor array_agg to not to have `update` and `merge` [\#1681](https://github.com/apache/arrow-datafusion/pull/1681) ([Jimexist](https://github.com/Jimexist)) +- Use NamedTempFile rather than `String` in DiskManager [\#1680](https://github.com/apache/arrow-datafusion/pull/1680) ([alamb](https://github.com/alamb)) +- upgrade clap to version 3 [\#1672](https://github.com/apache/arrow-datafusion/pull/1672) ([Jimexist](https://github.com/Jimexist)) +- Improve configuration and resource use of `MemoryManager` and `DiskManager` [\#1668](https://github.com/apache/arrow-datafusion/pull/1668) ([alamb](https://github.com/alamb)) +- feat: Support quarter granularity in date_trunc function [\#1667](https://github.com/apache/arrow-datafusion/pull/1667) ([ovr](https://github.com/ovr)) +- Fix can not load parquet table form spark in datafusion-cli. [\#1665](https://github.com/apache/arrow-datafusion/pull/1665) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Make `MemoryManager` and `MemoryStream` public [\#1664](https://github.com/apache/arrow-datafusion/pull/1664) ([yjshen](https://github.com/yjshen)) +- \[Cleanup\] Move `AggregatedMetricsSet` to `metrics` for further reuse [\#1663](https://github.com/apache/arrow-datafusion/pull/1663) ([yjshen](https://github.com/yjshen)) +- fix: substr - correct behaivour with negative start pos [\#1660](https://github.com/apache/arrow-datafusion/pull/1660) ([ovr](https://github.com/ovr)) +- suppport bitwise and as an example [\#1653](https://github.com/apache/arrow-datafusion/pull/1653) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- refine match pattern related code [\#1650](https://github.com/apache/arrow-datafusion/pull/1650) ([xudong963](https://github.com/xudong963)) +- update md-5, sha2, blake2 [\#1647](https://github.com/apache/arrow-datafusion/pull/1647) ([xudong963](https://github.com/xudong963)) +- Add `DataFusionError` -\> `ArrowError` conversion [\#1643](https://github.com/apache/arrow-datafusion/pull/1643) ([alamb](https://github.com/alamb)) +- Add `spill_count` and `spilled_bytes` to `BaselineMetrics`, test sort with spill [\#1641](https://github.com/apache/arrow-datafusion/pull/1641) ([yjshen](https://github.com/yjshen)) +- support hash decimal array and group by [\#1640](https://github.com/apache/arrow-datafusion/pull/1640) ([liukun4515](https://github.com/liukun4515)) +- Consolidate Schema and RecordBatch projection [\#1638](https://github.com/apache/arrow-datafusion/pull/1638) ([alamb](https://github.com/alamb)) +- Update hashbrown requirement from 0.11 to 0.12 [\#1631](https://github.com/apache/arrow-datafusion/pull/1631) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update pyo3 requirement from 0.14 to 0.15 [\#1627](https://github.com/apache/arrow-datafusion/pull/1627) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Optimize `SortPreservingMergeStream` to avoid `SortKeyCursor` sharing [\#1624](https://github.com/apache/arrow-datafusion/pull/1624) ([yjshen](https://github.com/yjshen)) +- Handle merging of evolved schemas in ParquetExec [\#1622](https://github.com/apache/arrow-datafusion/pull/1622) ([thinkharderdev](https://github.com/thinkharderdev)) +- feat: Support Substring\(str \[from int\] \[for int\]\) [\#1621](https://github.com/apache/arrow-datafusion/pull/1621) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) +- feat: Support complex interval via IntervalMonthDayNano [\#1615](https://github.com/apache/arrow-datafusion/pull/1615) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ovr](https://github.com/ovr)) +- consolidate binary_expr coercion rule code into `binary_rule.rs` module [\#1607](https://github.com/apache/arrow-datafusion/pull/1607) ([alamb](https://github.com/alamb)) +- Fix comparison of dictionary arrays [\#1606](https://github.com/apache/arrow-datafusion/pull/1606) ([alamb](https://github.com/alamb)) +- add test for decimal to decimal [\#1603](https://github.com/apache/arrow-datafusion/pull/1603) ([liukun4515](https://github.com/liukun4515)) +- update nightly version [\#1597](https://github.com/apache/arrow-datafusion/pull/1597) ([Jimexist](https://github.com/Jimexist)) +- Consolidate sort and external_sort [\#1596](https://github.com/apache/arrow-datafusion/pull/1596) ([yjshen](https://github.com/yjshen)) +- support from_slice for binary, string, and boolean array types [\#1589](https://github.com/apache/arrow-datafusion/pull/1589) ([Jimexist](https://github.com/Jimexist)) +- add from_slice trait to ease arrow2 migration [\#1588](https://github.com/apache/arrow-datafusion/pull/1588) ([Jimexist](https://github.com/Jimexist)) +- Implement ARRAY_AGG\(DISTINCT ...\) [\#1579](https://github.com/apache/arrow-datafusion/pull/1579) ([james727](https://github.com/james727)) +- Rename sql integration tests from `mod` to `sql_integration` [\#1575](https://github.com/apache/arrow-datafusion/pull/1575) ([alamb](https://github.com/alamb)) +- minor: improve the benchmark readme [\#1567](https://github.com/apache/arrow-datafusion/pull/1567) ([xudong963](https://github.com/xudong963)) +- Consolidate `batch_size` configuration in `ExecutionConfig`, `RuntimeConfig` and `PhysicalPlanConfig` [\#1562](https://github.com/apache/arrow-datafusion/pull/1562) ([yjshen](https://github.com/yjshen)) +- Update to rust 1.58 [\#1557](https://github.com/apache/arrow-datafusion/pull/1557) ([xudong963](https://github.com/xudong963)) +- support mathematics operation for decimal data type [\#1554](https://github.com/apache/arrow-datafusion/pull/1554) ([liukun4515](https://github.com/liukun4515)) +- Address clippy warnings [\#1553](https://github.com/apache/arrow-datafusion/pull/1553) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) +- enhance arithmetic operation for array with scalar [\#1552](https://github.com/apache/arrow-datafusion/pull/1552) ([liukun4515](https://github.com/liukun4515)) +- Remove unused `update` and `merge` implementations from Aggregates and supporting `ScalarValue` arithmetic [\#1550](https://github.com/apache/arrow-datafusion/pull/1550) ([alamb](https://github.com/alamb)) +- Add batch operations to stddev [\#1547](https://github.com/apache/arrow-datafusion/pull/1547) ([realno](https://github.com/realno)) +- Mark ARRAY_AGG\(DISTINCT ...\) not implemented [\#1534](https://github.com/apache/arrow-datafusion/pull/1534) ([james727](https://github.com/james727)) +- Update to arrow-7.0.0 [\#1523](https://github.com/apache/arrow-datafusion/pull/1523) ([alamb](https://github.com/alamb)) +- Fix ORDER BY on aggregate [\#1506](https://github.com/apache/arrow-datafusion/pull/1506) ([viirya](https://github.com/viirya)) +- Add example on how to query multiple parquet files [\#1497](https://github.com/apache/arrow-datafusion/pull/1497) ([nitisht](https://github.com/nitisht)) +- Refactor testing modules [\#1491](https://github.com/apache/arrow-datafusion/pull/1491) ([hntd187](https://github.com/hntd187)) +- add rfcs for datafusion [\#1490](https://github.com/apache/arrow-datafusion/pull/1490) ([xudong963](https://github.com/xudong963)) +- support comparison for decimal data type and refactor the binary coercion rule [\#1483](https://github.com/apache/arrow-datafusion/pull/1483) ([liukun4515](https://github.com/liukun4515)) +- Minor: Rename `predicate_builder` --\> `pruning_predicate` for consistency [\#1481](https://github.com/apache/arrow-datafusion/pull/1481) ([alamb](https://github.com/alamb)) +- Tests for support try_cast/cast decimal to numeric [\#1465](https://github.com/apache/arrow-datafusion/pull/1465) ([liukun4515](https://github.com/liukun4515)) +- Avoid send empty batches for Hash partitioning. [\#1459](https://github.com/apache/arrow-datafusion/pull/1459) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Planner code cleanup [\#1450](https://github.com/apache/arrow-datafusion/pull/1450) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fix bug in projection: "column types must match schema types, expected XXX but found YYY" [\#1448](https://github.com/apache/arrow-datafusion/pull/1448) ([alamb](https://github.com/alamb)) +- Update arrow-rs to 6.4.0 and replace boolean comparison in datafusion with arrow compute kernel [\#1446](https://github.com/apache/arrow-datafusion/pull/1446) ([xudong963](https://github.com/xudong963)) +- support cast/try_cast for decimal: signed numeric to decimal [\#1442](https://github.com/apache/arrow-datafusion/pull/1442) ([liukun4515](https://github.com/liukun4515)) +- Consolidate decimal error checking and improve error messages [\#1438](https://github.com/apache/arrow-datafusion/pull/1438) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- use 0.13 sql parser [\#1435](https://github.com/apache/arrow-datafusion/pull/1435) ([Jimexist](https://github.com/Jimexist)) +- Minor Code cleanups [\#1428](https://github.com/apache/arrow-datafusion/pull/1428) ([alamb](https://github.com/alamb)) +- Clarify communication on bi-weekly sync [\#1427](https://github.com/apache/arrow-datafusion/pull/1427) ([alamb](https://github.com/alamb)) +- support sum/avg agg for decimal, change sum\(float32\) --\> float64 [\#1408](https://github.com/apache/arrow-datafusion/pull/1408) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Fix bugs with nullability during rewrites: Combine `simplify` and `Simplifier` [\#1401](https://github.com/apache/arrow-datafusion/pull/1401) ([alamb](https://github.com/alamb)) +- Minimize features [\#1399](https://github.com/apache/arrow-datafusion/pull/1399) ([carols10cents](https://github.com/carols10cents)) +- Update rust vesion to 1.57 [\#1395](https://github.com/apache/arrow-datafusion/pull/1395) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- support decimal scalar value [\#1394](https://github.com/apache/arrow-datafusion/pull/1394) ([liukun4515](https://github.com/liukun4515)) +- Add coercion rules for AggregateFunctions [\#1387](https://github.com/apache/arrow-datafusion/pull/1387) ([liukun4515](https://github.com/liukun4515)) +- upgrade the arrow-rs version [\#1385](https://github.com/apache/arrow-datafusion/pull/1385) ([liukun4515](https://github.com/liukun4515)) +- add array agg name [\#1382](https://github.com/apache/arrow-datafusion/pull/1382) ([liukun4515](https://github.com/liukun4515)) +- Make tests for `simplify` and `Simplifer` consistent [\#1376](https://github.com/apache/arrow-datafusion/pull/1376) ([alamb](https://github.com/alamb)) +- Refactor: Consolidate expression simplification code in `simplify_expression.rs` [\#1374](https://github.com/apache/arrow-datafusion/pull/1374) ([alamb](https://github.com/alamb)) +- remove unused code in hash_aggregate [\#1370](https://github.com/apache/arrow-datafusion/pull/1370) ([ic4y](https://github.com/ic4y)) +- Use `BufReader` for LocalFileReader to revert performance regression in parquet reading [\#1366](https://github.com/apache/arrow-datafusion/pull/1366) ([Dandandan](https://github.com/Dandandan)) +- Add unit test for constant folding on values [\#1355](https://github.com/apache/arrow-datafusion/pull/1355) ([viirya](https://github.com/viirya)) +- Extract logical plan: rename the plan name \(follow up\) [\#1354](https://github.com/apache/arrow-datafusion/pull/1354) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Moved aggr_test_schema to test_utils [\#1338](https://github.com/apache/arrow-datafusion/pull/1338) ([rdettai](https://github.com/rdettai)) +- upgrade arrow-rs to 6.2.0 [\#1334](https://github.com/apache/arrow-datafusion/pull/1334) ([liukun4515](https://github.com/liukun4515)) +- Update release instructions [\#1331](https://github.com/apache/arrow-datafusion/pull/1331) ([alamb](https://github.com/alamb)) +- \#1268: allow datafusion-cli to toggle quiet flag within CLI [\#1330](https://github.com/apache/arrow-datafusion/pull/1330) ([jgoday](https://github.com/jgoday)) +- Extract Aggregate, Sort, and Join to struct from AggregatePlan [\#1326](https://github.com/apache/arrow-datafusion/pull/1326) ([matthewmturner](https://github.com/matthewmturner)) +- Extract `EmptyRelation`, `Limit`, `Values` from `LogicalPlan` [\#1325](https://github.com/apache/arrow-datafusion/pull/1325) ([liukun4515](https://github.com/liukun4515)) +- Extract CrossJoin, Repartition, Union in LogicalPlan [\#1322](https://github.com/apache/arrow-datafusion/pull/1322) ([liukun4515](https://github.com/liukun4515)) +- Fifth batch of updating sql tests to use assert_batches_eq [\#1318](https://github.com/apache/arrow-datafusion/pull/1318) ([matthewmturner](https://github.com/matthewmturner)) +- Extract Explain, Analyze, Extension in LogicalPlan as independent struct [\#1317](https://github.com/apache/arrow-datafusion/pull/1317) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Extract CreateMemoryTable, DropTable, CreateExternalTable in LogicalPlan as independent struct [\#1311](https://github.com/apache/arrow-datafusion/pull/1311) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Extract Projection, Filter, Window in LogicalPlan as independent struct [\#1309](https://github.com/apache/arrow-datafusion/pull/1309) ([ic4y](https://github.com/ic4y)) +- Add PSQL comparison tests for except, intersect [\#1292](https://github.com/apache/arrow-datafusion/pull/1292) ([mrob95](https://github.com/mrob95)) +- Extract logical plans in LogicalPlan as independent struct: TableScan [\#1290](https://github.com/apache/arrow-datafusion/pull/1290) ([xudong963](https://github.com/xudong963)) +- Add statement helper command to cli [\#1285](https://github.com/apache/arrow-datafusion/pull/1285) ([matthewmturner](https://github.com/matthewmturner)) +- Python bindings for window functions [\#819](https://github.com/apache/arrow-datafusion/pull/819) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jgoday](https://github.com/jgoday)) diff --git a/dev/changelog/7.1.0.md b/dev/changelog/7.1.0.md new file mode 100644 index 0000000000000..751023e32c56c --- /dev/null +++ b/dev/changelog/7.1.0.md @@ -0,0 +1,26 @@ + + +## [7.1.0](https://github.com/apache/arrow-datafusion/tree/7.1.0) (2022-04-10) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/7.0.0...7.1.0) + +**Fixed bugs:** + +- By default, use only 1000 rows to infer the schema [\#2159](https://github.com/apache/arrow-datafusion/pull/2159) diff --git a/dev/changelog/8.0.0.md b/dev/changelog/8.0.0.md new file mode 100644 index 0000000000000..dcfb85f3ea21b --- /dev/null +++ b/dev/changelog/8.0.0.md @@ -0,0 +1,319 @@ + + +## [8.0.0](https://github.com/apache/arrow-datafusion/tree/8.0.0) (2022-05-12) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/7.1.0-rc1...8.0.0) + +**Breaking changes:** + +- Add SQL planner support for `ROLLUP` and `CUBE` grouping set expressions [\#2446](https://github.com/apache/arrow-datafusion/pull/2446) ([andygrove](https://github.com/andygrove)) +- Make `ExecutionPlan::execute` Sync [\#2434](https://github.com/apache/arrow-datafusion/pull/2434) ([tustvold](https://github.com/tustvold)) +- Introduce new `DataFusionError::SchemaError` type [\#2371](https://github.com/apache/arrow-datafusion/pull/2371) ([andygrove](https://github.com/andygrove)) +- Add `Expr::InSubquery` and `Expr::ScalarSubquery` [\#2342](https://github.com/apache/arrow-datafusion/pull/2342) ([andygrove](https://github.com/andygrove)) +- Add `Expr::Exists` to represent EXISTS subquery expression [\#2339](https://github.com/apache/arrow-datafusion/pull/2339) ([andygrove](https://github.com/andygrove)) +- Move `LogicalPlan` enum to `datafusion-expr` crate [\#2294](https://github.com/apache/arrow-datafusion/pull/2294) ([andygrove](https://github.com/andygrove)) +- Remove dependency from `LogicalPlan::TableScan` to `ExecutionPlan` [\#2284](https://github.com/apache/arrow-datafusion/pull/2284) ([andygrove](https://github.com/andygrove)) +- Move logical expression type-coercion code from `physical-expr` crate to `expr` crate [\#2257](https://github.com/apache/arrow-datafusion/pull/2257) ([andygrove](https://github.com/andygrove)) +- feat: 2061 create external table ddl table partition cols [\#2099](https://github.com/apache/arrow-datafusion/pull/2099) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jychen7](https://github.com/jychen7)) +- Reorganize the project folders [\#2081](https://github.com/apache/arrow-datafusion/pull/2081) ([yahoNanJing](https://github.com/yahoNanJing)) +- Support more ScalarFunction in Ballista [\#2008](https://github.com/apache/arrow-datafusion/pull/2008) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Merge dataframe and dataframe imp [\#1998](https://github.com/apache/arrow-datafusion/pull/1998) ([vchag](https://github.com/vchag)) +- Rename `ExecutionContext` to `SessionContext`, `ExecutionContextState` to `SessionState`, add `TaskContext` to support multi-tenancy configurations - Part 1 [\#1987](https://github.com/apache/arrow-datafusion/pull/1987) ([mingmwang](https://github.com/mingmwang)) +- Add Coalesce function [\#1969](https://github.com/apache/arrow-datafusion/pull/1969) ([msathis](https://github.com/msathis)) +- Add Create Schema functionality in SQL [\#1959](https://github.com/apache/arrow-datafusion/pull/1959) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) +- omit some clone when converting sql to logical plan [\#1945](https://github.com/apache/arrow-datafusion/pull/1945) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) +- \[split/16\] move physical plan expressions folder to datafusion-physical-expr crate [\#1889](https://github.com/apache/arrow-datafusion/pull/1889) ([Jimexist](https://github.com/Jimexist)) +- remove sync constraint of SendableRecordBatchStream [\#1884](https://github.com/apache/arrow-datafusion/pull/1884) ([doki23](https://github.com/doki23)) +- \[split/15\] move built in window expr and partition evaluator [\#1865](https://github.com/apache/arrow-datafusion/pull/1865) ([Jimexist](https://github.com/Jimexist)) + +**Implemented enhancements:** + +- Include `Expr` to `datafusion::prelude` [\#2347](https://github.com/apache/arrow-datafusion/issues/2347) +- Implement `Serialization` API for DataFusion [\#2340](https://github.com/apache/arrow-datafusion/issues/2340) +- Implement `power` function [\#1493](https://github.com/apache/arrow-datafusion/issues/1493) +- allow `lit` python function to support `boolean` and other types [\#1136](https://github.com/apache/arrow-datafusion/issues/1136) +- Automate dependency updates [\#37](https://github.com/apache/arrow-datafusion/issues/37) +- Add `CREATE VIEW` [\#2279](https://github.com/apache/arrow-datafusion/pull/2279) ([matthewmturner](https://github.com/matthewmturner)) +- \[Ballista\] Support Union in ballista. [\#2098](https://github.com/apache/arrow-datafusion/pull/2098) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Change the DataFusion explain plans to make it clearer in the predicate/filter [\#2063](https://github.com/apache/arrow-datafusion/pull/2063) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add `write_json`, `read_json`, `register_json`, and `JsonFormat` to `CREATE EXTERNAL TABLE` functionality [\#2023](https://github.com/apache/arrow-datafusion/pull/2023) ([matthewmturner](https://github.com/matthewmturner)) +- Qualified wildcard [\#2012](https://github.com/apache/arrow-datafusion/pull/2012) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) +- support bitwise or/'|' operation [\#1876](https://github.com/apache/arrow-datafusion/pull/1876) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Introduce JIT code generation [\#1849](https://github.com/apache/arrow-datafusion/pull/1849) ([yjshen](https://github.com/yjshen)) + +**Fixed bugs:** + +- CASE expr with NULL literals panics `'WHEN expression did not return a BooleanArray'` [\#1189](https://github.com/apache/arrow-datafusion/issues/1189) +- Function calls with NULL literals do not work [\#1188](https://github.com/apache/arrow-datafusion/issues/1188) +- Add SQL planner support for calling `round` function with two arguments [\#2503](https://github.com/apache/arrow-datafusion/pull/2503) ([andygrove](https://github.com/andygrove)) +- nested query fix [\#2402](https://github.com/apache/arrow-datafusion/pull/2402) ([comphead](https://github.com/comphead)) +- fix issue\#2058 file_format/json.rs attempt to subtract with overflow [\#2066](https://github.com/apache/arrow-datafusion/pull/2066) ([silence-coding](https://github.com/silence-coding)) +- fix bug the optimizer rule filter push down [\#2039](https://github.com/apache/arrow-datafusion/pull/2039) ([jackwener](https://github.com/jackwener)) +- fix: replace `ExecutionContex` and `ExecutionConfig` with `SessionContext` and `SessionConfig` [\#2030](https://github.com/apache/arrow-datafusion/pull/2030) ([xudong963](https://github.com/xudong963)) +- Fixed parquet path partitioning when only selecting partitioned columns [\#2000](https://github.com/apache/arrow-datafusion/pull/2000) ([pjmore](https://github.com/pjmore)) +- Fix ambiguous reference error in filter plan [\#1925](https://github.com/apache/arrow-datafusion/pull/1925) ([jonmmease](https://github.com/jonmmease)) +- platform aware partition parsing [\#1867](https://github.com/apache/arrow-datafusion/pull/1867) ([korowa](https://github.com/korowa)) +- Fix incorrect aggregation in case that GROUP BY contains duplicate column names [\#1855](https://github.com/apache/arrow-datafusion/pull/1855) ([alex-natzka](https://github.com/alex-natzka)) + +**Documentation updates:** + +- MINOR: Make crate READMEs consistent [\#2437](https://github.com/apache/arrow-datafusion/pull/2437) ([andygrove](https://github.com/andygrove)) +- minor: Improve documentation for DFSchema join and merge functions [\#2367](https://github.com/apache/arrow-datafusion/pull/2367) ([andygrove](https://github.com/andygrove)) +- Change the code location and add annotation [\#2037](https://github.com/apache/arrow-datafusion/pull/2037) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Fix typos \(Datafusion -\> DataFusion\) [\#1993](https://github.com/apache/arrow-datafusion/pull/1993) ([andygrove](https://github.com/andygrove)) +- Add examples to use MemTable and TableProvider \(\#1864\) [\#1946](https://github.com/apache/arrow-datafusion/pull/1946) ([PierreZ](https://github.com/PierreZ)) +- Add doc for building `datafusion-cli` when connect the ballista [\#1866](https://github.com/apache/arrow-datafusion/pull/1866) ([liukun4515](https://github.com/liukun4515)) +- Add benchmarks section to DEVELOPERS.md [\#1838](https://github.com/apache/arrow-datafusion/pull/1838) ([tustvold](https://github.com/tustvold)) + +**Performance improvements:** + +- Avoid an Arc::clone per row in benchmark [\#1975](https://github.com/apache/arrow-datafusion/pull/1975) ([jhorstmann](https://github.com/jhorstmann)) +- Update datafusion-cli allocator [\#1878](https://github.com/apache/arrow-datafusion/pull/1878) ([matthewmturner](https://github.com/matthewmturner)) + +**Closed issues:** + +- Make expected result string in unit tests more readable [\#2412](https://github.com/apache/arrow-datafusion/issues/2412) +- remove duplicated `fn aggregate()` in aggregate expression tests [\#2399](https://github.com/apache/arrow-datafusion/issues/2399) +- split `distinct_expression.rs` into `count_distinct.rs` and `array_agg_distinct.rs` [\#2385](https://github.com/apache/arrow-datafusion/issues/2385) +- move sql tests in `context.rs` to corresponding test files in `datafustion/core/tests/sql` [\#2328](https://github.com/apache/arrow-datafusion/issues/2328) +- Date32/Date64 as join keys for merge join [\#2314](https://github.com/apache/arrow-datafusion/issues/2314) +- Error precision and scale for decimal coercion in logic comparison [\#2232](https://github.com/apache/arrow-datafusion/issues/2232) +- Support Multiple row layout [\#2188](https://github.com/apache/arrow-datafusion/issues/2188) +- TPC-H Query 18 [\#169](https://github.com/apache/arrow-datafusion/issues/169) +- TPC-H Query 16 [\#167](https://github.com/apache/arrow-datafusion/issues/167) +- Implement Sort-Merge Join [\#141](https://github.com/apache/arrow-datafusion/issues/141) +- Split logical expressions out into separate source files [\#114](https://github.com/apache/arrow-datafusion/issues/114) + +**Merged pull requests:** + +- Minor: remove code that is now included in arrow-rs [\#2511](https://github.com/apache/arrow-datafusion/pull/2511) ([alamb](https://github.com/alamb)) +- MINOR: Enable multi-statement benchmark queries [\#2507](https://github.com/apache/arrow-datafusion/pull/2507) ([andygrove](https://github.com/andygrove)) +- MINOR: Add ignored tests for all remaining benchmark queries [\#2506](https://github.com/apache/arrow-datafusion/pull/2506) ([andygrove](https://github.com/andygrove)) +- Update to `sqlparser` `0.17.0` [\#2500](https://github.com/apache/arrow-datafusion/pull/2500) ([alamb](https://github.com/alamb)) +- Add metrics for ParquetExec [\#2499](https://github.com/apache/arrow-datafusion/pull/2499) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Limit cpu cores used when generating changelog [\#2494](https://github.com/apache/arrow-datafusion/pull/2494) ([andygrove](https://github.com/andygrove)) +- Optimize MergeJoin by storing joined indices instead of creating small record batches for each match [\#2492](https://github.com/apache/arrow-datafusion/pull/2492) ([richox](https://github.com/richox)) +- Add SQL planner support for `grouping()` aggregate expressions [\#2486](https://github.com/apache/arrow-datafusion/pull/2486) ([andygrove](https://github.com/andygrove)) +- MINOR: Parameterize changelog script [\#2484](https://github.com/apache/arrow-datafusion/pull/2484) ([jychen7](https://github.com/jychen7)) +- Numeric, String, Boolean comparisons with literal `NULL` [\#2481](https://github.com/apache/arrow-datafusion/pull/2481) ([WinkerDu](https://github.com/WinkerDu)) +- Adds unit test cases of mathematical expressions working with `null` literal [\#2478](https://github.com/apache/arrow-datafusion/pull/2478) ([WinkerDu](https://github.com/WinkerDu)) +- Minor: Move test code from `context.rs` into `sql_integration` [\#2473](https://github.com/apache/arrow-datafusion/pull/2473) ([alamb](https://github.com/alamb)) +- Minor: Use ExprVisitor to find columns referenced by expr [\#2471](https://github.com/apache/arrow-datafusion/pull/2471) ([alamb](https://github.com/alamb)) +- minor: remove expr dependency from the row crate, update crate-deps.dot/svg [\#2470](https://github.com/apache/arrow-datafusion/pull/2470) ([yjshen](https://github.com/yjshen)) +- Fix `read_from_registered_table_with_glob_path` fails if path contains // \#2465 [\#2468](https://github.com/apache/arrow-datafusion/pull/2468) ([timvw](https://github.com/timvw)) +- Add support for list_dir\(\) on local fs [\#2467](https://github.com/apache/arrow-datafusion/pull/2467) ([wjones127](https://github.com/wjones127)) +- MINOR: Partial fix for SQL aggregate queries with aliases [\#2464](https://github.com/apache/arrow-datafusion/pull/2464) ([andygrove](https://github.com/andygrove)) +- minor: move struct definition out of `aggregate/mod.rs`, etc [\#2458](https://github.com/apache/arrow-datafusion/pull/2458) ([WinkerDu](https://github.com/WinkerDu)) +- Fix bugs in SQL planner with GROUP BY scalar function and alias [\#2457](https://github.com/apache/arrow-datafusion/pull/2457) ([andygrove](https://github.com/andygrove)) +- feat: Support CompoundIdentifier as GetIndexedField access [\#2454](https://github.com/apache/arrow-datafusion/pull/2454) ([ovr](https://github.com/ovr)) +- Table provider error propagation [\#2438](https://github.com/apache/arrow-datafusion/pull/2438) ([jdye64](https://github.com/jdye64)) +- MINOR: Improve error messages for GROUP BY / HAVING queries [\#2435](https://github.com/apache/arrow-datafusion/pull/2435) ([andygrove](https://github.com/andygrove)) +- minor: remove redundant code [\#2432](https://github.com/apache/arrow-datafusion/pull/2432) ([jackwener](https://github.com/jackwener)) +- minor: update versions and paths in changelog scripts [\#2429](https://github.com/apache/arrow-datafusion/pull/2429) ([andygrove](https://github.com/andygrove)) +- Fix Ballista executing during plan [\#2428](https://github.com/apache/arrow-datafusion/pull/2428) ([tustvold](https://github.com/tustvold)) +- minor: format table result vec & remove some unnecessary semicolons [\#2425](https://github.com/apache/arrow-datafusion/pull/2425) ([WinkerDu](https://github.com/WinkerDu)) +- Basic support for `IN` and `NOT IN` Subqueries by rewriting them to `SEMI` / `ANTI` Join [\#2421](https://github.com/apache/arrow-datafusion/pull/2421) ([korowa](https://github.com/korowa)) +- Allow subqueries without aliases [\#2418](https://github.com/apache/arrow-datafusion/pull/2418) ([andygrove](https://github.com/andygrove)) +- Fix bug in subquery join filters referencing outer query [\#2416](https://github.com/apache/arrow-datafusion/pull/2416) ([andygrove](https://github.com/andygrove)) +- MINOR: remove duplicated function `format_state_name()` [\#2414](https://github.com/apache/arrow-datafusion/pull/2414) ([WinkerDu](https://github.com/WinkerDu)) +- Make expected result string in unit tests more readable [\#2413](https://github.com/apache/arrow-datafusion/pull/2413) ([WinkerDu](https://github.com/WinkerDu)) +- `sum(distinct)` support [\#2405](https://github.com/apache/arrow-datafusion/pull/2405) ([WinkerDu](https://github.com/WinkerDu)) +- Update ordered-float requirement from 2.10 to 3.0 [\#2403](https://github.com/apache/arrow-datafusion/pull/2403) ([dependabot[bot]](https://github.com/apps/dependabot)) +- remove duplicated `fn aggregate()` in aggregate expression tests [\#2400](https://github.com/apache/arrow-datafusion/pull/2400) ([WinkerDu](https://github.com/WinkerDu)) +- Support type-coercion from Decimal to Float64 [\#2396](https://github.com/apache/arrow-datafusion/pull/2396) ([comphead](https://github.com/comphead)) +- minor: SchemaError code cleanup and improvements [\#2391](https://github.com/apache/arrow-datafusion/pull/2391) ([andygrove](https://github.com/andygrove)) +- Support struct_expr generate struct in sql [\#2389](https://github.com/apache/arrow-datafusion/pull/2389) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Re-organize and rename aggregates physical plan [\#2388](https://github.com/apache/arrow-datafusion/pull/2388) ([yjshen](https://github.com/yjshen)) +- refactor `distinct_expressions.rs` and split into `count_distinct.rs` and `array_agg_distinct.rs` [\#2386](https://github.com/apache/arrow-datafusion/pull/2386) ([WinkerDu](https://github.com/WinkerDu)) +- Allow CTEs to be referenced from subquery expressions [\#2384](https://github.com/apache/arrow-datafusion/pull/2384) ([andygrove](https://github.com/andygrove)) +- Upgrade to arrow 13 [\#2382](https://github.com/apache/arrow-datafusion/pull/2382) ([alamb](https://github.com/alamb)) +- Grouped Aggregate in row format [\#2375](https://github.com/apache/arrow-datafusion/pull/2375) ([yjshen](https://github.com/yjshen)) +- Fix bugs with CTE aliasing and normalize all identifiers in the SQL planner [\#2373](https://github.com/apache/arrow-datafusion/pull/2373) ([andygrove](https://github.com/andygrove)) +- Stop optimizing queries twice [\#2369](https://github.com/apache/arrow-datafusion/pull/2369) ([andygrove](https://github.com/andygrove)) +- feat: Support casting to arrays to primitive type [\#2366](https://github.com/apache/arrow-datafusion/pull/2366) ([ovr](https://github.com/ovr)) +- Add proper support for `null` literal by introducing `ScalarValue::Null` [\#2364](https://github.com/apache/arrow-datafusion/pull/2364) ([WinkerDu](https://github.com/WinkerDu)) +- minor: fix duplicate column bug in subquery support [\#2362](https://github.com/apache/arrow-datafusion/pull/2362) ([andygrove](https://github.com/andygrove)) +- Normalize subquery aliases [\#2359](https://github.com/apache/arrow-datafusion/pull/2359) ([andygrove](https://github.com/andygrove)) +- Implement physical planner support for DATE +/- INTERVAL [\#2357](https://github.com/apache/arrow-datafusion/pull/2357) ([andygrove](https://github.com/andygrove)) +- Add SQL query planner support for Scalar Subqueries [\#2354](https://github.com/apache/arrow-datafusion/pull/2354) ([andygrove](https://github.com/andygrove)) +- Add SQL query planner support for IN subqueries [\#2352](https://github.com/apache/arrow-datafusion/pull/2352) ([andygrove](https://github.com/andygrove)) +- Add `Expr` to prelude [\#2348](https://github.com/apache/arrow-datafusion/pull/2348) ([alamb](https://github.com/alamb)) +- Add SQL planner support for EXISTS subqueries [\#2344](https://github.com/apache/arrow-datafusion/pull/2344) ([andygrove](https://github.com/andygrove)) +- Add public Serialization/Deserialization API for `Expr` to/from bytes [\#2341](https://github.com/apache/arrow-datafusion/pull/2341) ([alamb](https://github.com/alamb)) +- Support for date32 and date64 in sort merge join [\#2336](https://github.com/apache/arrow-datafusion/pull/2336) ([hntd187](https://github.com/hntd187)) +- \[physical-expr\] move aggregate exprs and window exprs to their own modules [\#2335](https://github.com/apache/arrow-datafusion/pull/2335) ([yjshen](https://github.com/yjshen)) +- fix: union schema [\#2334](https://github.com/apache/arrow-datafusion/pull/2334) ([gandronchik](https://github.com/gandronchik)) +- Improve sql integration test organization [\#2333](https://github.com/apache/arrow-datafusion/pull/2333) ([alamb](https://github.com/alamb)) +- Support scalar values for func Array [\#2332](https://github.com/apache/arrow-datafusion/pull/2332) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- move sql tests from `context.rs` to corresponding test files in `tests/sql` [\#2329](https://github.com/apache/arrow-datafusion/pull/2329) ([WinkerDu](https://github.com/WinkerDu)) +- deprecate `index_of` and make `index_of_column_by_name` public [\#2320](https://github.com/apache/arrow-datafusion/pull/2320) ([jdye64](https://github.com/jdye64)) +- Fix HashJoin evaluating during plan [\#2317](https://github.com/apache/arrow-datafusion/pull/2317) ([tustvold](https://github.com/tustvold)) +- minor: remove two source files that only had re-exports [\#2313](https://github.com/apache/arrow-datafusion/pull/2313) ([andygrove](https://github.com/andygrove)) +- Don't sort batches during plan [\#2312](https://github.com/apache/arrow-datafusion/pull/2312) ([tustvold](https://github.com/tustvold)) +- Move case/when expressions to datafusion-expr crate [\#2311](https://github.com/apache/arrow-datafusion/pull/2311) ([andygrove](https://github.com/andygrove)) +- Fix CrossJoinExec evaluating during plan [\#2310](https://github.com/apache/arrow-datafusion/pull/2310) ([tustvold](https://github.com/tustvold)) +- Make SortPreservingMerge Usable Outside Tokio \(\#2201\) [\#2305](https://github.com/apache/arrow-datafusion/pull/2305) ([tustvold](https://github.com/tustvold)) +- chore: update cranelift to 0.83.0 [\#2304](https://github.com/apache/arrow-datafusion/pull/2304) ([yjshen](https://github.com/yjshen)) +- Always increment timer on record [\#2298](https://github.com/apache/arrow-datafusion/pull/2298) ([tustvold](https://github.com/tustvold)) +- Remove unnecessary env var for parquet_sql example [\#2297](https://github.com/apache/arrow-datafusion/pull/2297) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) +- Simplify sort streams [\#2296](https://github.com/apache/arrow-datafusion/pull/2296) ([tustvold](https://github.com/tustvold)) +- MINOR: beautify code with neat idents [\#2295](https://github.com/apache/arrow-datafusion/pull/2295) ([WinkerDu](https://github.com/WinkerDu)) +- Move FileType enum from sql module to logical_plan module [\#2290](https://github.com/apache/arrow-datafusion/pull/2290) ([andygrove](https://github.com/andygrove)) +- Remove Parquet Empty Projection Workaround [\#2289](https://github.com/apache/arrow-datafusion/pull/2289) ([tustvold](https://github.com/tustvold)) +- Add BatchPartitioner \(\#2285\) [\#2287](https://github.com/apache/arrow-datafusion/pull/2287) ([tustvold](https://github.com/tustvold)) +- Make row its crate to make it accessible from physical-expr [\#2283](https://github.com/apache/arrow-datafusion/pull/2283) ([yjshen](https://github.com/yjshen)) +- Enable filter pushdown when using In_list on parquet [\#2282](https://github.com/apache/arrow-datafusion/pull/2282) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Update uuid requirement from 0.8 to 1.0 [\#2280](https://github.com/apache/arrow-datafusion/pull/2280) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add bytes scanned metric to ParquetExec [\#2273](https://github.com/apache/arrow-datafusion/pull/2273) ([thinkharderdev](https://github.com/thinkharderdev)) +- Fix outer join output with all-null indices on empty batch [\#2272](https://github.com/apache/arrow-datafusion/pull/2272) ([yjshen](https://github.com/yjshen)) +- Re-export DataFusion crates [\#2264](https://github.com/apache/arrow-datafusion/pull/2264) ([andygrove](https://github.com/andygrove)) +- rewrite approx_median to approx_percentile_cont while planning phase [\#2262](https://github.com/apache/arrow-datafusion/pull/2262) ([korowa](https://github.com/korowa)) +- Introduce RowLayout to represent rows for different purposes [\#2261](https://github.com/apache/arrow-datafusion/pull/2261) ([yjshen](https://github.com/yjshen)) +- fix string coercion missing in Eq/NotEq operator [\#2258](https://github.com/apache/arrow-datafusion/pull/2258) ([WinkerDu](https://github.com/WinkerDu)) +- Update to Arrow 12.0.0, update tonic and prost [\#2253](https://github.com/apache/arrow-datafusion/pull/2253) ([alamb](https://github.com/alamb)) +- minor: move field_util from `physical-expr` crate to `expr` crate [\#2250](https://github.com/apache/arrow-datafusion/pull/2250) ([andygrove](https://github.com/andygrove)) +- Move identifer case tests to `sql_integ`, add negative cases, Debug for `DataFrame` [\#2243](https://github.com/apache/arrow-datafusion/pull/2243) ([alamb](https://github.com/alamb)) +- Implement sort-merge join [\#2242](https://github.com/apache/arrow-datafusion/pull/2242) ([richox](https://github.com/richox)) +- fix: find the right wider decimal datatype for comparison operation [\#2241](https://github.com/apache/arrow-datafusion/pull/2241) ([liukun4515](https://github.com/liukun4515)) +- Fix join without constraints [\#2240](https://github.com/apache/arrow-datafusion/pull/2240) ([Dandandan](https://github.com/Dandandan)) +- Add type coercion rule for date + interval [\#2235](https://github.com/apache/arrow-datafusion/pull/2235) ([andygrove](https://github.com/andygrove)) +- support array with scalar arithmetic operation for decimal data type [\#2233](https://github.com/apache/arrow-datafusion/pull/2233) ([liukun4515](https://github.com/liukun4515)) +- chore: add `debug!` log in some execution operators [\#2231](https://github.com/apache/arrow-datafusion/pull/2231) ([NGA-TRAN](https://github.com/NGA-TRAN)) +- Introduce new optional scheduler, using Morsel-driven Parallelism + rayon \(\#2199\) [\#2226](https://github.com/apache/arrow-datafusion/pull/2226) ([tustvold](https://github.com/tustvold)) +- minor: add editor config file [\#2224](https://github.com/apache/arrow-datafusion/pull/2224) ([jackwener](https://github.com/jackwener)) +- minor: Refactor to avoid repeated code in replace_qualifier [\#2222](https://github.com/apache/arrow-datafusion/pull/2222) ([andygrove](https://github.com/andygrove)) +- update cli readme [\#2220](https://github.com/apache/arrow-datafusion/pull/2220) ([liukun4515](https://github.com/liukun4515)) +- Use `filter` \(filter_record_batch\) instead of `take` to avoid using indices [\#2218](https://github.com/apache/arrow-datafusion/pull/2218) ([Dandandan](https://github.com/Dandandan)) +- Add single line description of ExecutionPlan \(\#2216\) [\#2217](https://github.com/apache/arrow-datafusion/pull/2217) ([tustvold](https://github.com/tustvold)) +- Remove tokio::spawn from HashAggregateExec \(\#2201\) [\#2215](https://github.com/apache/arrow-datafusion/pull/2215) ([tustvold](https://github.com/tustvold)) +- Remove tokio::spawn from WindowAggExec \(\#2201\) [\#2203](https://github.com/apache/arrow-datafusion/pull/2203) ([tustvold](https://github.com/tustvold)) +- Make ParquetExec usable outside of a tokio runtime \(\#2201\) [\#2202](https://github.com/apache/arrow-datafusion/pull/2202) ([tustvold](https://github.com/tustvold)) +- add sql level test for decimal data type [\#2200](https://github.com/apache/arrow-datafusion/pull/2200) ([liukun4515](https://github.com/liukun4515)) +- `case when` supports `NULL` constant [\#2197](https://github.com/apache/arrow-datafusion/pull/2197) ([WinkerDu](https://github.com/WinkerDu)) +- feat: Support simple Arrays with Literals [\#2194](https://github.com/apache/arrow-datafusion/pull/2194) ([ovr](https://github.com/ovr)) +- \[Ballista\] Enable ApproxPercentileWithWeight in Ballista and fill UT [\#2192](https://github.com/apache/arrow-datafusion/pull/2192) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- refactor: simplify `prepare_select_exprs` [\#2190](https://github.com/apache/arrow-datafusion/pull/2190) ([jackwener](https://github.com/jackwener)) +- Multiple row-layout support, part-1: Restructure code for clearness [\#2189](https://github.com/apache/arrow-datafusion/pull/2189) ([yjshen](https://github.com/yjshen)) +- make nightly clippy happy [\#2186](https://github.com/apache/arrow-datafusion/pull/2186) ([xudong963](https://github.com/xudong963)) +- \[Ballista\]Make PhysicalAggregateExprNode has repeated PhysicalExprNode [\#2184](https://github.com/apache/arrow-datafusion/pull/2184) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- MINOR: handle `NULL` in advance to avoid value copy in `string_concat` [\#2183](https://github.com/apache/arrow-datafusion/pull/2183) ([WinkerDu](https://github.com/WinkerDu)) +- fix: Sort with a lot of repetition values [\#2182](https://github.com/apache/arrow-datafusion/pull/2182) ([yjshen](https://github.com/yjshen)) +- cli: update lockfile [\#2178](https://github.com/apache/arrow-datafusion/pull/2178) ([happysalada](https://github.com/happysalada)) +- Add LogicalPlan::SubqueryAlias [\#2172](https://github.com/apache/arrow-datafusion/pull/2172) ([andygrove](https://github.com/andygrove)) +- minor: Avoid per cell evaluation in Coalesce, use zip in CaseWhen [\#2171](https://github.com/apache/arrow-datafusion/pull/2171) ([yjshen](https://github.com/yjshen)) +- Handle merged schemas in parquet pruning [\#2170](https://github.com/apache/arrow-datafusion/pull/2170) ([thinkharderdev](https://github.com/thinkharderdev)) +- Implement fast path of with_new_children\(\) in ExecutionPlan [\#2168](https://github.com/apache/arrow-datafusion/pull/2168) ([mingmwang](https://github.com/mingmwang)) +- enable explain for ballista [\#2163](https://github.com/apache/arrow-datafusion/pull/2163) ([doki23](https://github.com/doki23)) +- Add delimiter for create external table [\#2162](https://github.com/apache/arrow-datafusion/pull/2162) ([matthewmturner](https://github.com/matthewmturner)) +- \[MINOR\] enable `EXTRACT week` and add test \(after sqlparser update to 0.16\) [\#2157](https://github.com/apache/arrow-datafusion/pull/2157) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Optimize the evaluation of `IN` for large lists using InSet [\#2156](https://github.com/apache/arrow-datafusion/pull/2156) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Update sqlparser requirement from 0.15 to 0.16 [\#2152](https://github.com/apache/arrow-datafusion/pull/2152) ([dependabot[bot]](https://github.com/apps/dependabot)) +- fix `not(null)` with constant `null` [\#2144](https://github.com/apache/arrow-datafusion/pull/2144) ([WinkerDu](https://github.com/WinkerDu)) +- Add IF NOT EXISTS to `CREATE TABLE` and `CREATE EXTERNAL TABLE` [\#2143](https://github.com/apache/arrow-datafusion/pull/2143) ([matthewmturner](https://github.com/matthewmturner)) +- implement 'StringConcat' operator to support sql like "select 'aa' || 'b' " [\#2142](https://github.com/apache/arrow-datafusion/pull/2142) ([WinkerDu](https://github.com/WinkerDu)) +- \#2109 By default, use only 1000 rows to infer the schema [\#2139](https://github.com/apache/arrow-datafusion/pull/2139) ([jychen7](https://github.com/jychen7)) +- \[CLI\] Add show tables in ballista for datafusion-cli [\#2137](https://github.com/apache/arrow-datafusion/pull/2137) ([gaojun2048](https://github.com/gaojun2048)) +- fix: incorrect memory usage track for sort [\#2135](https://github.com/apache/arrow-datafusion/pull/2135) ([yjshen](https://github.com/yjshen)) +- Update quarterly roadmap for Q2 [\#2133](https://github.com/apache/arrow-datafusion/pull/2133) ([matthewmturner](https://github.com/matthewmturner)) +- Reduce SortExec memory usage by void constructing single huge batch [\#2132](https://github.com/apache/arrow-datafusion/pull/2132) ([yjshen](https://github.com/yjshen)) +- MINOR: fix concat_ws corner bug [\#2128](https://github.com/apache/arrow-datafusion/pull/2128) ([WinkerDu](https://github.com/WinkerDu)) +- Minor add clarifying comment in parquet [\#2127](https://github.com/apache/arrow-datafusion/pull/2127) ([alamb](https://github.com/alamb)) +- Minor: make disk_manager public [\#2126](https://github.com/apache/arrow-datafusion/pull/2126) ([yjshen](https://github.com/yjshen)) +- JIT-compille DataFusion expression with column name [\#2124](https://github.com/apache/arrow-datafusion/pull/2124) ([Dandandan](https://github.com/Dandandan)) +- minor: replace array_equals in case evaluation with eq_dyn from arrow-rs [\#2121](https://github.com/apache/arrow-datafusion/pull/2121) ([alamb](https://github.com/alamb)) +- Serialize timezone in timestamp scalar values [\#2120](https://github.com/apache/arrow-datafusion/pull/2120) ([thinkharderdev](https://github.com/thinkharderdev)) +- minor: fix some clippy warnings from nightly rust [\#2119](https://github.com/apache/arrow-datafusion/pull/2119) ([alamb](https://github.com/alamb)) +- Fix case evaluation with NULLs [\#2118](https://github.com/apache/arrow-datafusion/pull/2118) ([alamb](https://github.com/alamb)) +- issue\#1967 ignore channel close [\#2113](https://github.com/apache/arrow-datafusion/pull/2113) ([silence-coding](https://github.com/silence-coding)) +- cli: add cargo.lock [\#2112](https://github.com/apache/arrow-datafusion/pull/2112) ([happysalada](https://github.com/happysalada)) +- doc: update release schedule [\#2110](https://github.com/apache/arrow-datafusion/pull/2110) ([jychen7](https://github.com/jychen7)) +- fix df union all bug [\#2108](https://github.com/apache/arrow-datafusion/pull/2108) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([WinkerDu](https://github.com/WinkerDu)) +- Reduce repetition in Decimal binary kernels, upgrade to arrow 11.1 [\#2107](https://github.com/apache/arrow-datafusion/pull/2107) ([alamb](https://github.com/alamb)) +- update zlib version to 1.2.12 [\#2106](https://github.com/apache/arrow-datafusion/pull/2106) ([waitingkuo](https://github.com/waitingkuo)) +- Create jit-expression from datafusion expression [\#2103](https://github.com/apache/arrow-datafusion/pull/2103) ([Dandandan](https://github.com/Dandandan)) +- Add CREATE DATABASE command to SQL [\#2094](https://github.com/apache/arrow-datafusion/pull/2094) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) +- Refactor SessionContext, BallistaContext to support multi-tenancy configurations - Part 3 [\#2091](https://github.com/apache/arrow-datafusion/pull/2091) ([mingmwang](https://github.com/mingmwang)) +- minor: remove duplicate test [\#2089](https://github.com/apache/arrow-datafusion/pull/2089) ([jackwener](https://github.com/jackwener)) +- minor: remove repeated test [\#2085](https://github.com/apache/arrow-datafusion/pull/2085) ([jackwener](https://github.com/jackwener)) +- Fix lost filters and projections in ParquetExec, CSVExec etc [\#2077](https://github.com/apache/arrow-datafusion/pull/2077) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Remove dependency of common for the storage crate [\#2076](https://github.com/apache/arrow-datafusion/pull/2076) ([yahoNanJing](https://github.com/yahoNanJing)) +- [MINOR] fix doc in `EXTRACT\(field FROM source\) [\#2074](https://github.com/apache/arrow-datafusion/pull/2074) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- \[Bug\]\[Datafusion\] fix TaskContext session_config bug [\#2070](https://github.com/apache/arrow-datafusion/pull/2070) ([gaojun2048](https://github.com/gaojun2048)) +- Short-circuit evaluation for `CaseWhen` [\#2068](https://github.com/apache/arrow-datafusion/pull/2068) ([yjshen](https://github.com/yjshen)) +- split datafusion-object-store module [\#2065](https://github.com/apache/arrow-datafusion/pull/2065) ([yahoNanJing](https://github.com/yahoNanJing)) +- Allow `CatalogProvider::register_catalog` to return an error [\#2052](https://github.com/apache/arrow-datafusion/pull/2052) ([alamb](https://github.com/alamb)) +- Add test in register_catalog and change to use named symbolic constants [\#2050](https://github.com/apache/arrow-datafusion/pull/2050) ([alamb](https://github.com/alamb)) +- Update to arrow/parquet 11.0 [\#2048](https://github.com/apache/arrow-datafusion/pull/2048) ([alamb](https://github.com/alamb)) +- minor: format comments \(`//` to `// `\) [\#2047](https://github.com/apache/arrow-datafusion/pull/2047) ([jackwener](https://github.com/jackwener)) +- use cargo-tomlfmt to check Cargo.toml formatting in CI [\#2033](https://github.com/apache/arrow-datafusion/pull/2033) ([WinkerDu](https://github.com/WinkerDu)) +- feat: \#2004 approx percentile with weight [\#2031](https://github.com/apache/arrow-datafusion/pull/2031) ([jychen7](https://github.com/jychen7)) +- Refactor SessionContext, SessionState and SessionConfig to support multi-tenancy configurations - Part 2 [\#2029](https://github.com/apache/arrow-datafusion/pull/2029) ([mingmwang](https://github.com/mingmwang)) +- Simplify prerequisites for running examples [\#2028](https://github.com/apache/arrow-datafusion/pull/2028) ([doki23](https://github.com/doki23)) +- Replace usage of `println!` with logger macros [\#2020](https://github.com/apache/arrow-datafusion/pull/2020) ([silence-coding](https://github.com/silence-coding)) +- Automatically test examples in user guide [\#2018](https://github.com/apache/arrow-datafusion/pull/2018) ([vchag](https://github.com/vchag)) +- return VecDeque for DFParser::parse_sql [\#2017](https://github.com/apache/arrow-datafusion/pull/2017) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) +- Eliminate the scalar value filter [\#2002](https://github.com/apache/arrow-datafusion/pull/2002) ([jackwener](https://github.com/jackwener)) +- Fixing a typo in documentation [\#1997](https://github.com/apache/arrow-datafusion/pull/1997) ([psvri](https://github.com/psvri)) +- Correct documentation of ExprVisitor [\#1996](https://github.com/apache/arrow-datafusion/pull/1996) ([alamb](https://github.com/alamb)) +- Make it possible to only scan part of a parquet file in a partition [\#1990](https://github.com/apache/arrow-datafusion/pull/1990) ([yjshen](https://github.com/yjshen)) +- Update Dockerfile to fix integration tests [\#1982](https://github.com/apache/arrow-datafusion/pull/1982) ([andygrove](https://github.com/andygrove)) +- Remove some more unecessary cloning in sql_expr_to_logical_expr [\#1981](https://github.com/apache/arrow-datafusion/pull/1981) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Add ticket reference to clippy allow [\#1978](https://github.com/apache/arrow-datafusion/pull/1978) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Implement EXTRACT expression with week, month, day, hour [\#1974](https://github.com/apache/arrow-datafusion/pull/1974) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Address typo in ExprVisitable trait documentation [\#1970](https://github.com/apache/arrow-datafusion/pull/1970) ([jdye64](https://github.com/jdye64)) +- Update sqlparser requirement from 0.14 to 0.15 [\#1966](https://github.com/apache/arrow-datafusion/pull/1966) ([dependabot[bot]](https://github.com/apps/dependabot)) +- PruningPredicate should take owned Expr [\#1960](https://github.com/apache/arrow-datafusion/pull/1960) ([thinkharderdev](https://github.com/thinkharderdev)) +- Update to arrow 10.0.0, pyo3 0.16 [\#1957](https://github.com/apache/arrow-datafusion/pull/1957) ([alamb](https://github.com/alamb)) +- update jit-related dependencies [\#1953](https://github.com/apache/arrow-datafusion/pull/1953) ([xudong963](https://github.com/xudong963)) +- minor code refinement: `if_exists` name change, wildcard field for logical plan, etc. [\#1951](https://github.com/apache/arrow-datafusion/pull/1951) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Allow different types of query variables \(`@@var`\) rather than just string [\#1943](https://github.com/apache/arrow-datafusion/pull/1943) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([maxburke](https://github.com/maxburke)) +- Pruning serialization [\#1941](https://github.com/apache/arrow-datafusion/pull/1941) ([thinkharderdev](https://github.com/thinkharderdev)) +- Add write_parquet to `DataFrame` [\#1940](https://github.com/apache/arrow-datafusion/pull/1940) ([matthewmturner](https://github.com/matthewmturner)) +- Fix select from EmptyExec always return 0 row after optimizer passes [\#1938](https://github.com/apache/arrow-datafusion/pull/1938) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add debug log when waiting for spilling on other consumers [\#1933](https://github.com/apache/arrow-datafusion/pull/1933) ([viirya](https://github.com/viirya)) +- Add db benchmark script [\#1928](https://github.com/apache/arrow-datafusion/pull/1928) ([matthewmturner](https://github.com/matthewmturner)) +- Add write_csv to DataFrame [\#1922](https://github.com/apache/arrow-datafusion/pull/1922) ([matthewmturner](https://github.com/matthewmturner)) +- \[MINOR\] Update copyright year in Docs [\#1918](https://github.com/apache/arrow-datafusion/pull/1918) ([alamb](https://github.com/alamb)) +- add metadata to DFSchema, close \#1806. [\#1914](https://github.com/apache/arrow-datafusion/pull/1914) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) +- Clippy fix on nightly [\#1907](https://github.com/apache/arrow-datafusion/pull/1907) ([yjshen](https://github.com/yjshen)) +- Updated Rust version to 1.59 in all the files [\#1903](https://github.com/apache/arrow-datafusion/pull/1903) ([NaincyKumariKnoldus](https://github.com/NaincyKumariKnoldus)) +- support extract second and minute in expr. [\#1901](https://github.com/apache/arrow-datafusion/pull/1901) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Update crate descriptions [\#1899](https://github.com/apache/arrow-datafusion/pull/1899) ([alamb](https://github.com/alamb)) +- Remove uneeded Mutex in Ballista Client [\#1898](https://github.com/apache/arrow-datafusion/pull/1898) ([alamb](https://github.com/alamb)) +- \[split/17\] move the rest of physical expr to datafusion-physical-expr crate [\#1892](https://github.com/apache/arrow-datafusion/pull/1892) ([Jimexist](https://github.com/Jimexist)) +- Avoid unnecessary branching in row read/write if schema is null-free [\#1891](https://github.com/apache/arrow-datafusion/pull/1891) ([yjshen](https://github.com/yjshen)) +- Make parquet support optional for datafusion-common crate [\#1886](https://github.com/apache/arrow-datafusion/pull/1886) ([jonmmease](https://github.com/jonmmease)) +- Fix clippy lints [\#1885](https://github.com/apache/arrow-datafusion/pull/1885) ([HaoYang670](https://github.com/HaoYang670)) +- Add support for `~/.datafusionrc` and cli option for overriding it to datafusion-cli [\#1875](https://github.com/apache/arrow-datafusion/pull/1875) ([matthewmturner](https://github.com/matthewmturner)) +- \[Minor\] Clean up DecimalArray API Usage [\#1869](https://github.com/apache/arrow-datafusion/pull/1869) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Changes after went through "Datafusion as a library section" [\#1868](https://github.com/apache/arrow-datafusion/pull/1868) ([nonontb](https://github.com/nonontb)) +- Enhance MemorySchemaProvider to support `register_listing_table` [\#1863](https://github.com/apache/arrow-datafusion/pull/1863) ([matthewmturner](https://github.com/matthewmturner)) +- Increase default partition column type from Dict\(UInt8\) to Dict\(UInt16\) [\#1860](https://github.com/apache/arrow-datafusion/pull/1860) ([Igosuki](https://github.com/Igosuki)) +- Update to arrow 9.1.0 [\#1851](https://github.com/apache/arrow-datafusion/pull/1851) ([alamb](https://github.com/alamb)) +- move some tests out of context and into sql [\#1846](https://github.com/apache/arrow-datafusion/pull/1846) ([alamb](https://github.com/alamb)) +- \[split/14\] create `datafusion-physical-expr` module [\#1843](https://github.com/apache/arrow-datafusion/pull/1843) ([Jimexist](https://github.com/Jimexist)) +- Return `Error` when parquet reader fails rather than no data with `println!` [\#1837](https://github.com/apache/arrow-datafusion/pull/1837) ([alamb](https://github.com/alamb)) +- determine build side in hash join by `total_byte_size` instead of `num_rows` [\#1831](https://github.com/apache/arrow-datafusion/pull/1831) ([xudong963](https://github.com/xudong963)) +- Make ballista support an optional feature to datafusion-cli [\#1816](https://github.com/apache/arrow-datafusion/pull/1816) ([alamb](https://github.com/alamb)) +- Update documentation example for change in API [\#1812](https://github.com/apache/arrow-datafusion/pull/1812) ([alamb](https://github.com/alamb)) +- rename references of expr in physical plan module after datafusion-expr split [\#1798](https://github.com/apache/arrow-datafusion/pull/1798) ([Jimexist](https://github.com/Jimexist)) +- DataFusion + Conbench Integration [\#1791](https://github.com/apache/arrow-datafusion/pull/1791) ([dianaclarke](https://github.com/dianaclarke)) +- The returned path value of get_by_uri should be self-described with entire path [\#1779](https://github.com/apache/arrow-datafusion/pull/1779) ([yahoNanJing](https://github.com/yahoNanJing)) +- Use`eq_dyn`, `neq_dyn`, `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` kernels from arrow [\#1475](https://github.com/apache/arrow-datafusion/pull/1475) ([alamb](https://github.com/alamb)) diff --git a/dev/changelog/9.0.0.md b/dev/changelog/9.0.0.md new file mode 100644 index 0000000000000..f47127aa5f4c6 --- /dev/null +++ b/dev/changelog/9.0.0.md @@ -0,0 +1,178 @@ + + +## [9.0.0](https://github.com/apache/arrow-datafusion/tree/9.0.0) (2022-06-10) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/8.0.0-rc2...9.0.0) + +**Breaking changes:** + +- MINOR: Move `simplify_expression` rule to `datafusion-optimizer` crate [\#2686](https://github.com/apache/arrow-datafusion/pull/2686) ([andygrove](https://github.com/andygrove)) +- Move physical expression planning to `datafusion-physical-expr` crate [\#2682](https://github.com/apache/arrow-datafusion/pull/2682) ([andygrove](https://github.com/andygrove)) +- Create new `datafusion-optimizer` crate for logical optimizer rules [\#2675](https://github.com/apache/arrow-datafusion/pull/2675) ([andygrove](https://github.com/andygrove)) +- Remove `ExecutionProps` dependency from `OptimizerRule` [\#2666](https://github.com/apache/arrow-datafusion/pull/2666) ([andygrove](https://github.com/andygrove)) +- Remove ObjectStoreSchemaProvider \(\#2656\) [\#2665](https://github.com/apache/arrow-datafusion/pull/2665) ([tustvold](https://github.com/tustvold)) +- Move `LogicalPlanBuilder` to `datafusion-expr` crate [\#2576](https://github.com/apache/arrow-datafusion/pull/2576) ([andygrove](https://github.com/andygrove)) +- `LogicalPlanBuilder` now uses `TableSource` instead of `TableProvider` [\#2569](https://github.com/apache/arrow-datafusion/pull/2569) ([andygrove](https://github.com/andygrove)) +- Remove `scan_empty` method from `LogicalPlanBuilder` [\#2568](https://github.com/apache/arrow-datafusion/pull/2568) ([andygrove](https://github.com/andygrove)) +- MINOR: Move expression utils from sql module to expr crate [\#2553](https://github.com/apache/arrow-datafusion/pull/2553) ([andygrove](https://github.com/andygrove)) +- Remove `scan_json` methods from `LogicalPlanBuilder` [\#2541](https://github.com/apache/arrow-datafusion/pull/2541) ([andygrove](https://github.com/andygrove)) +- Remove `scan_avro` methods from `LogicalPlanBuilder` [\#2540](https://github.com/apache/arrow-datafusion/pull/2540) ([andygrove](https://github.com/andygrove)) +- Remove `scan_parquet` methods from `LogicalPlanBuilder` [\#2539](https://github.com/apache/arrow-datafusion/pull/2539) ([andygrove](https://github.com/andygrove)) +- MINOR: Move `ExprVisitable` and `exprlist_to_columns` to datafusion-expr crate [\#2538](https://github.com/apache/arrow-datafusion/pull/2538) ([andygrove](https://github.com/andygrove)) +- Remove `scan_csv` methods from `LogicalPlanBuilder` [\#2537](https://github.com/apache/arrow-datafusion/pull/2537) ([andygrove](https://github.com/andygrove)) +- Fix Redundant ScalarValue Boxed Collection [\#2523](https://github.com/apache/arrow-datafusion/pull/2523) ([comphead](https://github.com/comphead)) +- Support for OFFSET in LogicalPlan [\#2521](https://github.com/apache/arrow-datafusion/pull/2521) ([jdye64](https://github.com/jdye64)) + +**Implemented enhancements:** + +- \[EPIC\] JIT support for `DataFusion` [\#2703](https://github.com/apache/arrow-datafusion/issues/2703) +- Show column names instead of column indices in query plans [\#2689](https://github.com/apache/arrow-datafusion/issues/2689) +- Proposal: remove automated ballista CI checks from DataFusion [\#2679](https://github.com/apache/arrow-datafusion/issues/2679) +- Pass SessionState to TableProvider [\#2658](https://github.com/apache/arrow-datafusion/issues/2658) +- Is ObjectStoreSchemaProvider Still Needed? [\#2656](https://github.com/apache/arrow-datafusion/issues/2656) +- Add logical plan support to `datafusion-proto` [\#2630](https://github.com/apache/arrow-datafusion/issues/2630) +- Like, NotLike expressions work with literal `NULL` [\#2626](https://github.com/apache/arrow-datafusion/issues/2626) +- Move `JOIN ON` predicates push down logic from planner to optimizer [\#2619](https://github.com/apache/arrow-datafusion/issues/2619) +- Remove `ExecutionProps` from `OptimizerRule` trait [\#2614](https://github.com/apache/arrow-datafusion/issues/2614) +- Add, Minus, Multiply, divide, Modulo operator work with literal `NULL` [\#2609](https://github.com/apache/arrow-datafusion/issues/2609) +- Support `DESCRIBE
` to show table schemas [\#2606](https://github.com/apache/arrow-datafusion/issues/2606) +- Support `CREATE OR REPLACE TABLE` [\#2605](https://github.com/apache/arrow-datafusion/issues/2605) +- filter_push_down tests should not rely on TableProvider and ExecutionPlan [\#2600](https://github.com/apache/arrow-datafusion/issues/2600) +- Move logical optimizer rules out of the core datafusion crate [\#2599](https://github.com/apache/arrow-datafusion/issues/2599) +- Push Limit through outer Join [\#2579](https://github.com/apache/arrow-datafusion/issues/2579) +- `datafusion_proto` crate should have exhaustive match statements for handling `Expr` [\#2565](https://github.com/apache/arrow-datafusion/issues/2565) +- String representation of Expr variant [\#2563](https://github.com/apache/arrow-datafusion/issues/2563) +- File URI Scheme Interpretation [\#2562](https://github.com/apache/arrow-datafusion/issues/2562) +- Implement physical plan for OFFSET [\#2551](https://github.com/apache/arrow-datafusion/issues/2551) +- Update limit pushdown rule to support offsets [\#2550](https://github.com/apache/arrow-datafusion/issues/2550) +- Move `LogicalPlanBuilder` to `datafusion-expr` crate [\#2536](https://github.com/apache/arrow-datafusion/issues/2536) +- Logical optimizer rule "simplify expressions" should not depend on the core datafusion crate [\#2535](https://github.com/apache/arrow-datafusion/issues/2535) +- Support optional filter in Join [\#2509](https://github.com/apache/arrow-datafusion/issues/2509) +- Improve SQL planner & logical plan support for JOIN conditions [\#2496](https://github.com/apache/arrow-datafusion/issues/2496) +- Numeric, String, Boolean comparisons with literal `NULL` [\#2482](https://github.com/apache/arrow-datafusion/issues/2482) +- Redundant ScalarValue Boxed Collection [\#2449](https://github.com/apache/arrow-datafusion/issues/2449) +- ObjectStore Directory Semantics [\#2445](https://github.com/apache/arrow-datafusion/issues/2445) +- Add support for `OFFSET` in SQL query planner + logical plan [\#2377](https://github.com/apache/arrow-datafusion/issues/2377) +- SQL planner should use `TableSource` not `TableProvider` [\#2346](https://github.com/apache/arrow-datafusion/issues/2346) +- Move SQL query planning to new crate [\#2345](https://github.com/apache/arrow-datafusion/issues/2345) +- Update LogicalPlan rustdoc code to not use LogicalPlanBuilder [\#2308](https://github.com/apache/arrow-datafusion/issues/2308) +- \[Optimizer\] Refactor `convert join` [\#2256](https://github.com/apache/arrow-datafusion/issues/2256) +- \[Optimizer\] Infer is not null predicate from `where clause` [\#2254](https://github.com/apache/arrow-datafusion/issues/2254) +- Support ArrayIndex for ScalarValue\(List\) [\#2207](https://github.com/apache/arrow-datafusion/issues/2207) +- \[Ballista\] Fill functional gaps between datafusion and ballista [\#2062](https://github.com/apache/arrow-datafusion/issues/2062) +- \[Ballista\] support datafusion built_in UDAF work in ballista cluster [\#1985](https://github.com/apache/arrow-datafusion/issues/1985) +- Export C API [\#1113](https://github.com/apache/arrow-datafusion/issues/1113) + +**Fixed bugs:** + +- Fix Typos in Docs [\#2695](https://github.com/apache/arrow-datafusion/issues/2695) +- Unable to build a docker image [\#2691](https://github.com/apache/arrow-datafusion/issues/2691) +- Optimization pass `AggregateStatistics` changes type of output from `Int64` to `UInt64` [\#2673](https://github.com/apache/arrow-datafusion/issues/2673) +- ViewTable Circular Reference [\#2657](https://github.com/apache/arrow-datafusion/issues/2657) +- `ScalarValue::to_array_of_size` panics computing statistics for nested parquet file [\#2653](https://github.com/apache/arrow-datafusion/issues/2653) +- The result type of count/count_distinct [\#2635](https://github.com/apache/arrow-datafusion/issues/2635) +- limit_push_down is not working properly with OFFSET [\#2624](https://github.com/apache/arrow-datafusion/issues/2624) +- Avro Tests Fail To Compile [\#2570](https://github.com/apache/arrow-datafusion/issues/2570) +- Unused Window functions experssion is wrongly removed from LogicalPlan during optimalization [\#2542](https://github.com/apache/arrow-datafusion/issues/2542) +- Bug: ObjectStoreRegistry get_by_uri does not return correct path when "scheme" is provided [\#2525](https://github.com/apache/arrow-datafusion/issues/2525) +- There are duplicate and inconsistent copies of `datafusion.proto` [\#2514](https://github.com/apache/arrow-datafusion/issues/2514) +- Projection pushdown produces incorrect results when column names are reused [\#2462](https://github.com/apache/arrow-datafusion/issues/2462) +- Incorrect Parquet Projection For Nested Types [\#2453](https://github.com/apache/arrow-datafusion/issues/2453) +- LogicalPlanBuilder::scan_csv creates scans with invalid table names [\#2278](https://github.com/apache/arrow-datafusion/issues/2278) +- Inner join incorrectly pushdown predicate with OR operation [\#2271](https://github.com/apache/arrow-datafusion/issues/2271) +- Ignored alias for columns with aggregate function and incorrect results when collecting statistics is enabled [\#2176](https://github.com/apache/arrow-datafusion/issues/2176) +- Join on path partitioned columns fails with error [\#2145](https://github.com/apache/arrow-datafusion/issues/2145) + +**Documentation updates:** + +- Fix Ballista link [\#2654](https://github.com/apache/arrow-datafusion/pull/2654) ([dsaxton](https://github.com/dsaxton)) +- MINOR: Add Blaze as a project using DataFusion [\#2618](https://github.com/apache/arrow-datafusion/pull/2618) ([yjshen](https://github.com/yjshen)) +- \[MINOR\] remove datafusion-cli's ballista feature from docs [\#2612](https://github.com/apache/arrow-datafusion/pull/2612) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- chore\(doc\) remove ballista from datafusion-cli readme [\#2604](https://github.com/apache/arrow-datafusion/pull/2604) ([ming535](https://github.com/ming535)) + +**Closed issues:** + +- \[Question\] Converting TableSource to custom TableProvider [\#2644](https://github.com/apache/arrow-datafusion/issues/2644) +- \[Question\] Why DataFusion is shipped with arrow version 9.1.0 on crates.io ? [\#2474](https://github.com/apache/arrow-datafusion/issues/2474) + +**Merged pull requests:** + +- Test optional features in CI [\#2708](https://github.com/apache/arrow-datafusion/pull/2708) ([tustvold](https://github.com/tustvold)) +- support indexed fields proto [\#2707](https://github.com/apache/arrow-datafusion/pull/2707) ([nl5887](https://github.com/nl5887)) +- Update sqlparser-rs to 0.18.0 [\#2705](https://github.com/apache/arrow-datafusion/pull/2705) ([alamb](https://github.com/alamb)) +- \[MINOR\]: Add documentation to `datafusion-row` modules [\#2704](https://github.com/apache/arrow-datafusion/pull/2704) ([alamb](https://github.com/alamb)) +- Make sure that the data types are supported in hashjoin before genera… [\#2702](https://github.com/apache/arrow-datafusion/pull/2702) ([AssHero](https://github.com/AssHero)) +- Move remaining code out of legacy `core/logical_plan` module [\#2701](https://github.com/apache/arrow-datafusion/pull/2701) ([andygrove](https://github.com/andygrove)) +- Move some tests from core to expr [\#2700](https://github.com/apache/arrow-datafusion/pull/2700) ([andygrove](https://github.com/andygrove)) +- MINOR: Improve Docs Readability [\#2696](https://github.com/apache/arrow-datafusion/pull/2696) ([ryanrussell](https://github.com/ryanrussell)) +- Combine limit and offset to `fetch` and `skip` and implement physical plan support [\#2694](https://github.com/apache/arrow-datafusion/pull/2694) ([ming535](https://github.com/ming535)) +- MINOR: Add datafusion-sql example [\#2693](https://github.com/apache/arrow-datafusion/pull/2693) ([andygrove](https://github.com/andygrove)) +- Remove Ballista related lines from Dockerfile [\#2692](https://github.com/apache/arrow-datafusion/pull/2692) ([mocknen](https://github.com/mocknen)) +- Show column names instead of indices in query plans [\#2690](https://github.com/apache/arrow-datafusion/pull/2690) ([andygrove](https://github.com/andygrove)) +- MINOR: Remove uses of TryClone for Parquet [\#2681](https://github.com/apache/arrow-datafusion/pull/2681) ([tustvold](https://github.com/tustvold)) +- Fix `AggregateStatistics` optimization so it doesn't change output type [\#2674](https://github.com/apache/arrow-datafusion/pull/2674) ([alamb](https://github.com/alamb)) +- If statistics of column Max/Min value does not exists in parquet file, sent Min/Max to None [\#2671](https://github.com/apache/arrow-datafusion/pull/2671) ([AssHero](https://github.com/AssHero)) +- MINOR: Move more expression code to `datafusion-expr` crate [\#2669](https://github.com/apache/arrow-datafusion/pull/2669) ([andygrove](https://github.com/andygrove)) +- MINOR: Rewrite imports in optimizer moduler [\#2667](https://github.com/apache/arrow-datafusion/pull/2667) ([andygrove](https://github.com/andygrove)) +- Update snmalloc-rs requirement from 0.2 to 0.3 [\#2663](https://github.com/apache/arrow-datafusion/pull/2663) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add module doc for RuntimeEnv, SessionContext, TaskContext, etc... [\#2655](https://github.com/apache/arrow-datafusion/pull/2655) ([tustvold](https://github.com/tustvold)) +- Prune unused dependencies from datafusion-proto [\#2651](https://github.com/apache/arrow-datafusion/pull/2651) ([tustvold](https://github.com/tustvold)) +- MINOR: Implement serde for join filter [\#2649](https://github.com/apache/arrow-datafusion/pull/2649) ([andygrove](https://github.com/andygrove)) +- pushdown support for predicates in `ON` clause of joins [\#2647](https://github.com/apache/arrow-datafusion/pull/2647) ([korowa](https://github.com/korowa)) +- Move `SortKeyCursor` and `RowIndex` into modules, add `sort_key_cursor` test [\#2645](https://github.com/apache/arrow-datafusion/pull/2645) ([alamb](https://github.com/alamb)) +- Implement DESCRIBE \ [\#2642](https://github.com/apache/arrow-datafusion/pull/2642) ([LiuYuHui](https://github.com/LiuYuHui)) +- Implement `LogicalPlan` serde in `datafusion-proto` [\#2639](https://github.com/apache/arrow-datafusion/pull/2639) ([andygrove](https://github.com/andygrove)) +- Fix limit + offset pushdown [\#2638](https://github.com/apache/arrow-datafusion/pull/2638) ([ming535](https://github.com/ming535)) +- change result type of count/count_distinct from uint64 to int64 [\#2636](https://github.com/apache/arrow-datafusion/pull/2636) ([liukun4515](https://github.com/liukun4515)) +- if none columns in window expr are needed, remove the window exprs [\#2634](https://github.com/apache/arrow-datafusion/pull/2634) ([AssHero](https://github.com/AssHero)) +- Like, NotLike expressions work with literal `NULL` [\#2627](https://github.com/apache/arrow-datafusion/pull/2627) ([WinkerDu](https://github.com/WinkerDu)) +- MINOR: Refactor `datafusion-proto` dependencies and imports [\#2623](https://github.com/apache/arrow-datafusion/pull/2623) ([andygrove](https://github.com/andygrove)) +- MINOR: add optimizer struct [\#2616](https://github.com/apache/arrow-datafusion/pull/2616) ([jackwener](https://github.com/jackwener)) +- Remove FilterPushDown dependency on physical plan [\#2615](https://github.com/apache/arrow-datafusion/pull/2615) ([andygrove](https://github.com/andygrove)) +- Support CREATE OR REPLACE TABLE [\#2613](https://github.com/apache/arrow-datafusion/pull/2613) ([AssHero](https://github.com/AssHero)) +- Support binary mathematical operators work with `NULL` literals [\#2610](https://github.com/apache/arrow-datafusion/pull/2610) ([WinkerDu](https://github.com/WinkerDu)) +- chore: try fix CI coverage [\#2608](https://github.com/apache/arrow-datafusion/pull/2608) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- MINOR: Rename benchmark crate [\#2607](https://github.com/apache/arrow-datafusion/pull/2607) ([andygrove](https://github.com/andygrove)) +- chore\(dep\): bump cranelift to 0.84.0 [\#2598](https://github.com/apache/arrow-datafusion/pull/2598) ([waynexia](https://github.com/waynexia)) +- fix some typos [\#2597](https://github.com/apache/arrow-datafusion/pull/2597) ([ming535](https://github.com/ming535)) +- Support limit pushdown through left right outer join [\#2596](https://github.com/apache/arrow-datafusion/pull/2596) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Unignore rustdoc code examples in `datafusion-expr` crate [\#2590](https://github.com/apache/arrow-datafusion/pull/2590) ([andygrove](https://github.com/andygrove)) +- Evaluate JIT'd expression over arrays [\#2587](https://github.com/apache/arrow-datafusion/pull/2587) ([waynexia](https://github.com/waynexia)) +- \[minor\]Fix ci clippy for unused import [\#2586](https://github.com/apache/arrow-datafusion/pull/2586) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- \[Doc\]add doc for enable SIMD need `cargo nightly` [\#2577](https://github.com/apache/arrow-datafusion/pull/2577) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add DataFrame `union_distinct` and fix documentation for `distinct` [\#2574](https://github.com/apache/arrow-datafusion/pull/2574) ([andygrove](https://github.com/andygrove)) +- Fix avro tests \(\#2570\) [\#2571](https://github.com/apache/arrow-datafusion/pull/2571) ([tustvold](https://github.com/tustvold)) +- Make datafusion-proto match exhaustive [\#2567](https://github.com/apache/arrow-datafusion/pull/2567) ([andygrove](https://github.com/andygrove)) +- Support limit push down for offset_plan [\#2566](https://github.com/apache/arrow-datafusion/pull/2566) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Introduce Expr.variant_name\(\) function [\#2564](https://github.com/apache/arrow-datafusion/pull/2564) ([jdye64](https://github.com/jdye64)) +- Fix some 404 links in the contribution guide [\#2561](https://github.com/apache/arrow-datafusion/pull/2561) ([hi-rustin](https://github.com/hi-rustin)) +- Update datafusion-cli readme cli version [\#2559](https://github.com/apache/arrow-datafusion/pull/2559) ([hi-rustin](https://github.com/hi-rustin)) +- MINOR: Move `expr_rewriter.rs` to `datafusion-expr` crate [\#2552](https://github.com/apache/arrow-datafusion/pull/2552) ([andygrove](https://github.com/andygrove)) +- Fix `JOIN`s with complex predicates in ON \(split ON expressions only by AND operator\) [\#2534](https://github.com/apache/arrow-datafusion/pull/2534) ([korowa](https://github.com/korowa)) +- Reduce duplication in file scan tests [\#2533](https://github.com/apache/arrow-datafusion/pull/2533) ([tustvold](https://github.com/tustvold)) +- Fix size_of_scalar test [\#2531](https://github.com/apache/arrow-datafusion/pull/2531) ([alamb](https://github.com/alamb)) +- Update to arrow-rs 14.0.0 [\#2528](https://github.com/apache/arrow-datafusion/pull/2528) ([alamb](https://github.com/alamb)) +- ObjectStoreRegistry get_by_uri now returns correct path when "scheme" is provided [\#2526](https://github.com/apache/arrow-datafusion/pull/2526) ([timvw](https://github.com/timvw)) +- MINOR: Add ORDER BY clause to test [\#2524](https://github.com/apache/arrow-datafusion/pull/2524) ([andygrove](https://github.com/andygrove)) +- Remove unused `binary_array_op_scalar!` in binary.rs [\#2512](https://github.com/apache/arrow-datafusion/pull/2512) ([alamb](https://github.com/alamb)) +- fix `NULL column` evaluation, tests for same [\#2510](https://github.com/apache/arrow-datafusion/pull/2510) ([alamb](https://github.com/alamb)) +- Fix projection pushdown produces incorrect results when column names are reused [\#2463](https://github.com/apache/arrow-datafusion/pull/2463) ([jonmmease](https://github.com/jonmmease)) +- Benchmark for sort preserving merge [\#2431](https://github.com/apache/arrow-datafusion/pull/2431) ([alamb](https://github.com/alamb)) +- Support GetIndexedFieldExpr for ScalarValue [\#2196](https://github.com/apache/arrow-datafusion/pull/2196) ([ovr](https://github.com/ovr))