diff --git a/Cargo.toml b/Cargo.toml index e1bd0d7aa72b..3431c4673e0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,7 +55,7 @@ license = "Apache-2.0" readme = "README.md" repository = "https://github.com/apache/datafusion" rust-version = "1.76" -version = "40.0.0" +version = "41.0.0" [workspace.dependencies] # We turn off default-features for some dependencies here so the workspaces which inherit them can @@ -88,25 +88,25 @@ bytes = "1.4" chrono = { version = "0.4.34", default-features = false } ctor = "0.2.0" dashmap = "6.0.1" -datafusion = { path = "datafusion/core", version = "40.0.0", default-features = false } -datafusion-catalog = { path = "datafusion/catalog", version = "40.0.0" } -datafusion-common = { path = "datafusion/common", version = "40.0.0", default-features = false } -datafusion-common-runtime = { path = "datafusion/common-runtime", version = "40.0.0" } -datafusion-execution = { path = "datafusion/execution", version = "40.0.0" } -datafusion-expr = { path = "datafusion/expr", version = "40.0.0" } -datafusion-functions = { path = "datafusion/functions", version = "40.0.0" } -datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "40.0.0" } -datafusion-functions-nested = { path = "datafusion/functions-nested", version = "40.0.0" } -datafusion-optimizer = { path = "datafusion/optimizer", version = "40.0.0", default-features = false } -datafusion-physical-expr = { path = "datafusion/physical-expr", version = "40.0.0", default-features = false } -datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "40.0.0", default-features = false } -datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "40.0.0" } -datafusion-physical-plan = { path = "datafusion/physical-plan", version = "40.0.0" } -datafusion-proto = { path = "datafusion/proto", version = "40.0.0" } -datafusion-proto-common = { path = "datafusion/proto-common", version = "40.0.0" } -datafusion-sql = { path = "datafusion/sql", version = "40.0.0" } -datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "40.0.0" } -datafusion-substrait = { path = "datafusion/substrait", version = "40.0.0" } +datafusion = { path = "datafusion/core", version = "41.0.0", default-features = false } +datafusion-catalog = { path = "datafusion/catalog", version = "41.0.0" } +datafusion-common = { path = "datafusion/common", version = "41.0.0", default-features = false } +datafusion-common-runtime = { path = "datafusion/common-runtime", version = "41.0.0" } +datafusion-execution = { path = "datafusion/execution", version = "41.0.0" } +datafusion-expr = { path = "datafusion/expr", version = "41.0.0" } +datafusion-functions = { path = "datafusion/functions", version = "41.0.0" } +datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "41.0.0" } +datafusion-functions-nested = { path = "datafusion/functions-nested", version = "41.0.0" } +datafusion-optimizer = { path = "datafusion/optimizer", version = "41.0.0", default-features = false } +datafusion-physical-expr = { path = "datafusion/physical-expr", version = "41.0.0", default-features = false } +datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "41.0.0", default-features = false } +datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "41.0.0" } +datafusion-physical-plan = { path = "datafusion/physical-plan", version = "41.0.0" } +datafusion-proto = { path = "datafusion/proto", version = "41.0.0" } +datafusion-proto-common = { path = "datafusion/proto-common", version = "41.0.0" } +datafusion-sql = { path = "datafusion/sql", version = "41.0.0" } +datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "41.0.0" } +datafusion-substrait = { path = "datafusion/substrait", version = "41.0.0" } doc-comment = "0.3" env_logger = "0.11" futures = "0.3" diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 9d20c242bbef..2eb93da7c020 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -874,9 +874,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.7" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc" +checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549" dependencies = [ "jobserver", "libc", @@ -1128,7 +1128,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "40.0.0" +version = "41.0.0" dependencies = [ "ahash", "apache-avro", @@ -1184,7 +1184,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "40.0.0" +version = "41.0.0" dependencies = [ "arrow-schema", "async-trait", @@ -1196,7 +1196,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "40.0.0" +version = "41.0.0" dependencies = [ "arrow", "assert_cmd", @@ -1223,7 +1223,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "40.0.0" +version = "41.0.0" dependencies = [ "ahash", "apache-avro", @@ -1244,14 +1244,14 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "40.0.0" +version = "41.0.0" dependencies = [ "tokio", ] [[package]] name = "datafusion-execution" -version = "40.0.0" +version = "41.0.0" dependencies = [ "arrow", "chrono", @@ -1270,7 +1270,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "40.0.0" +version = "41.0.0" dependencies = [ "ahash", "arrow", @@ -1287,7 +1287,7 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "40.0.0" +version = "41.0.0" dependencies = [ "arrow", "arrow-buffer", @@ -1312,7 +1312,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "40.0.0" +version = "41.0.0" dependencies = [ "ahash", "arrow", @@ -1328,7 +1328,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "40.0.0" +version = "41.0.0" dependencies = [ "arrow", "arrow-array", @@ -1348,7 +1348,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "40.0.0" +version = "41.0.0" dependencies = [ "arrow", "async-trait", @@ -1366,7 +1366,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "40.0.0" +version = "41.0.0" dependencies = [ "ahash", "arrow", @@ -1394,7 +1394,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "40.0.0" +version = "41.0.0" dependencies = [ "ahash", "arrow", @@ -1406,7 +1406,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "40.0.0" +version = "41.0.0" dependencies = [ "datafusion-common", "datafusion-execution", @@ -1416,7 +1416,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "40.0.0" +version = "41.0.0" dependencies = [ "ahash", "arrow", @@ -1448,7 +1448,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "40.0.0" +version = "41.0.0" dependencies = [ "arrow", "arrow-array", @@ -2049,9 +2049,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ab92f4f49ee4fb4f997c784b7a2e0fa70050211e0b6a287f898c3c9785ca956" +checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9" dependencies = [ "bytes", "futures-channel", @@ -2524,9 +2524,9 @@ dependencies = [ [[package]] name = "object" -version = "0.36.2" +version = "0.36.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e" +checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9" dependencies = [ "memchr", ] @@ -3220,9 +3220,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" +checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0" [[package]] name = "rustls-webpki" @@ -3341,18 +3341,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.204" +version = "1.0.205" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.205" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1" dependencies = [ "proc-macro2", "quote", @@ -3590,15 +3590,15 @@ checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" [[package]] name = "tempfile" -version = "3.11.0" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fcd239983515c23a32fb82099f97d0b11b8c72f654ed659363a95c3dad7a53" +checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" dependencies = [ "cfg-if", "fastrand 2.1.0", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index ff6673dc9727..cbd9ffd0feba 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-cli" description = "Command Line Client for DataFusion query engine." -version = "40.0.0" +version = "41.0.0" authors = ["Apache DataFusion "] edition = "2021" keywords = ["arrow", "datafusion", "query", "sql"] @@ -35,7 +35,7 @@ async-trait = "0.1.41" aws-config = "0.55" aws-credential-types = "0.55" clap = { version = "3", features = ["derive", "cargo"] } -datafusion = { path = "../datafusion/core", version = "40.0.0", features = [ +datafusion = { path = "../datafusion/core", version = "41.0.0", features = [ "avro", "crypto_expressions", "datetime_expressions", diff --git a/datafusion/core/example.parquet b/datafusion/core/example.parquet index 94de10394b33..17f7473cd221 100644 Binary files a/datafusion/core/example.parquet and b/datafusion/core/example.parquet differ diff --git a/dev/changelog/41.0.0.md b/dev/changelog/41.0.0.md new file mode 100644 index 000000000000..3e289112c7bb --- /dev/null +++ b/dev/changelog/41.0.0.md @@ -0,0 +1,363 @@ + + +# Apache DataFusion 41.0.0 Changelog + +This release consists of 245 commits from 69 contributors. See credits at the end of this changelog for more information. + +**Breaking changes:** + +- make unparser `Dialect` trait `Send` + `Sync` [#11504](https://github.com/apache/datafusion/pull/11504) (y-f-u) +- Implement physical plan serialization for csv COPY plans , add `as_any`, `Debug` to `FileFormatFactory` [#11588](https://github.com/apache/datafusion/pull/11588) (Lordworms) +- Consistent API to set parameters of aggregate and window functions (`AggregateExt` --> `ExprFunctionExt`) [#11550](https://github.com/apache/datafusion/pull/11550) (timsaucer) +- Rename `ColumnOptions` to `ParquetColumnOptions` [#11512](https://github.com/apache/datafusion/pull/11512) (alamb) +- Rename `input_type` --> `input_types` on AggregateFunctionExpr / AccumulatorArgs / StateFieldsArgs [#11666](https://github.com/apache/datafusion/pull/11666) (lewiszlw) +- Rename RepartitionExec metric `repart_time` to `repartition_time` [#11703](https://github.com/apache/datafusion/pull/11703) (alamb) +- Remove `AggregateFunctionDefinition` [#11803](https://github.com/apache/datafusion/pull/11803) (lewiszlw) +- Skipping partial aggregation when it is not helping for high cardinality aggregates [#11627](https://github.com/apache/datafusion/pull/11627) (korowa) +- Optionally create name of aggregate expression from expressions [#11776](https://github.com/apache/datafusion/pull/11776) (lewiszlw) + +**Performance related:** + +- feat: Optimize CASE expression for "column or null" use case [#11534](https://github.com/apache/datafusion/pull/11534) (andygrove) +- feat: Optimize CASE expression for usage where then and else values are literals [#11553](https://github.com/apache/datafusion/pull/11553) (andygrove) +- perf: Optimize IsNotNullExpr [#11586](https://github.com/apache/datafusion/pull/11586) (andygrove) + +**Implemented enhancements:** + +- feat: Add `fail_on_overflow` option to `BinaryExpr` [#11400](https://github.com/apache/datafusion/pull/11400) (andygrove) +- feat: add UDF to_local_time() [#11347](https://github.com/apache/datafusion/pull/11347) (appletreeisyellow) +- feat: switch to using proper Substrait types for IntervalYearMonth and IntervalDayTime [#11471](https://github.com/apache/datafusion/pull/11471) (Blizzara) +- feat: support UDWFs in Substrait [#11489](https://github.com/apache/datafusion/pull/11489) (Blizzara) +- feat: support `unnest` in GROUP BY clause [#11469](https://github.com/apache/datafusion/pull/11469) (JasonLi-cn) +- feat: support `COUNT()` [#11229](https://github.com/apache/datafusion/pull/11229) (tshauck) +- feat: consume and produce Substrait type extensions [#11510](https://github.com/apache/datafusion/pull/11510) (Blizzara) +- feat: Error when a SHOW command is passed in with an accompanying non-existant variable [#11540](https://github.com/apache/datafusion/pull/11540) (itsjunetime) +- feat: support Map literals in Substrait consumer and producer [#11547](https://github.com/apache/datafusion/pull/11547) (Blizzara) +- feat: add bounds for unary math scalar functions [#11584](https://github.com/apache/datafusion/pull/11584) (tshauck) +- feat: Add support for cardinality function on maps [#11801](https://github.com/apache/datafusion/pull/11801) (Weijun-H) +- feat: support `Utf8View` type in `starts_with` function [#11787](https://github.com/apache/datafusion/pull/11787) (tshauck) +- feat: Expose public method for optimizing physical plans [#11879](https://github.com/apache/datafusion/pull/11879) (andygrove) + +**Fixed bugs:** + +- fix: Fix eq properties regression from #10434 [#11363](https://github.com/apache/datafusion/pull/11363) (suremarc) +- fix: make sure JOIN ON expression is boolean type [#11423](https://github.com/apache/datafusion/pull/11423) (jonahgao) +- fix: `regexp_replace` fails when pattern or replacement is a scalar `NULL` [#11459](https://github.com/apache/datafusion/pull/11459) (Weijun-H) +- fix: unparser generates wrong sql for derived table with columns [#11505](https://github.com/apache/datafusion/pull/11505) (y-f-u) +- fix: make `UnKnownColumn`s not equal to others physical exprs [#11536](https://github.com/apache/datafusion/pull/11536) (jonahgao) +- fix: fixes trig function order by [#11559](https://github.com/apache/datafusion/pull/11559) (tshauck) +- fix: CASE with NULL [#11542](https://github.com/apache/datafusion/pull/11542) (Weijun-H) +- fix: panic and incorrect results in `LogFunc::output_ordering()` [#11571](https://github.com/apache/datafusion/pull/11571) (jonahgao) +- fix: expose the fluent API fn for approx_distinct instead of the module [#11644](https://github.com/apache/datafusion/pull/11644) (Michael-J-Ward) +- fix: dont try to coerce list for regex match [#11646](https://github.com/apache/datafusion/pull/11646) (tshauck) +- fix: regr_count now returns Uint64 [#11731](https://github.com/apache/datafusion/pull/11731) (Michael-J-Ward) +- fix: set `null_equals_null` to false when `convert_cross_join_to_inner_join` [#11738](https://github.com/apache/datafusion/pull/11738) (jonahgao) +- fix: Add additional required expression for natural join [#11713](https://github.com/apache/datafusion/pull/11713) (Lordworms) +- fix: hash join tests with forced collisions [#11806](https://github.com/apache/datafusion/pull/11806) (korowa) +- fix: `collect_columns` quadratic complexity [#11843](https://github.com/apache/datafusion/pull/11843) (crepererum) + +**Documentation updates:** + +- Minor: Add link to blog to main DataFusion website [#11356](https://github.com/apache/datafusion/pull/11356) (alamb) +- Add `to_local_time()` in function reference docs [#11401](https://github.com/apache/datafusion/pull/11401) (appletreeisyellow) +- Minor: Consolidate specification doc sections [#11427](https://github.com/apache/datafusion/pull/11427) (alamb) +- Combine the Roadmap / Quarterly Roadmap sections [#11426](https://github.com/apache/datafusion/pull/11426) (alamb) +- Minor: Add an example for backtrace pretty print [#11450](https://github.com/apache/datafusion/pull/11450) (goldmedal) +- Docs: Document creating new extension APIs [#11425](https://github.com/apache/datafusion/pull/11425) (alamb) +- Minor: Clarify which parquet options are used for reading/writing [#11511](https://github.com/apache/datafusion/pull/11511) (alamb) +- Support `newlines_in_values` CSV option [#11533](https://github.com/apache/datafusion/pull/11533) (connec) +- chore: Minor cleanup `simplify_demo()` example [#11576](https://github.com/apache/datafusion/pull/11576) (kavirajk) +- Move Datafusion Query Optimizer to library user guide [#11563](https://github.com/apache/datafusion/pull/11563) (devesh-2002) +- Fix typo in doc of Partitioning [#11612](https://github.com/apache/datafusion/pull/11612) (waruto210) +- Doc: A tiny typo in scalar function's doc [#11620](https://github.com/apache/datafusion/pull/11620) (2010YOUY01) +- Change default Parquet writer settings to match arrow-rs (except for compression & statistics) [#11558](https://github.com/apache/datafusion/pull/11558) (wiedld) +- Rename `functions-array` to `functions-nested` [#11602](https://github.com/apache/datafusion/pull/11602) (goldmedal) +- Add parser option enable_options_value_normalization [#11330](https://github.com/apache/datafusion/pull/11330) (xinlifoobar) +- Add reference to #comet channel in Arrow Rust Discord server [#11637](https://github.com/apache/datafusion/pull/11637) (ajmarcus) +- Extract catalog API to separate crate, change `TableProvider::scan` to take a trait rather than `SessionState` [#11516](https://github.com/apache/datafusion/pull/11516) (findepi) +- doc: why nullable of list item is set to true [#11626](https://github.com/apache/datafusion/pull/11626) (jcsherin) +- Docs: adding explicit mention of test_utils to docs [#11670](https://github.com/apache/datafusion/pull/11670) (edmondop) +- Ensure statistic defaults in parquet writers are in sync [#11656](https://github.com/apache/datafusion/pull/11656) (wiedld) +- Merge `string-view2` branch: reading from parquet up to 2x faster for some ClickBench queries (not on by default) [#11667](https://github.com/apache/datafusion/pull/11667) (alamb) +- Doc: Add Sail to known users list [#11791](https://github.com/apache/datafusion/pull/11791) (shehabgamin) +- Move min and max to user defined aggregate function, remove `AggregateFunction` / `AggregateFunctionDefinition::BuiltIn` [#11013](https://github.com/apache/datafusion/pull/11013) (edmondop) +- Change name of MAX/MIN udaf to lowercase max/min [#11795](https://github.com/apache/datafusion/pull/11795) (edmondop) +- doc: Add support for `map` and `make_map` functions [#11799](https://github.com/apache/datafusion/pull/11799) (Weijun-H) +- Improve readme page in crates.io [#11809](https://github.com/apache/datafusion/pull/11809) (lewiszlw) +- refactor: remove unneed mut for session context [#11864](https://github.com/apache/datafusion/pull/11864) (sunng87) + +**Other:** + +- Prepare 40.0.0 Release [#11343](https://github.com/apache/datafusion/pull/11343) (andygrove) +- Support `NULL` literals in where clause [#11266](https://github.com/apache/datafusion/pull/11266) (xinlifoobar) +- Implement TPCH substrait integration test, support tpch_6, tpch_10, t… [#11349](https://github.com/apache/datafusion/pull/11349) (Lordworms) +- Fix bug when pushing projection under joins [#11333](https://github.com/apache/datafusion/pull/11333) (jonahgao) +- Minor: some cosmetics in `filter.rs`, fix clippy due to logical conflict [#11368](https://github.com/apache/datafusion/pull/11368) (comphead) +- Update prost-derive requirement from 0.12 to 0.13 [#11355](https://github.com/apache/datafusion/pull/11355) (dependabot[bot]) +- Minor: update dashmap `6.0.1` [#11335](https://github.com/apache/datafusion/pull/11335) (alamb) +- Improve and test dataframe API examples in docs [#11290](https://github.com/apache/datafusion/pull/11290) (alamb) +- Remove redundant `unalias_nested` calls for creating Filter's [#11340](https://github.com/apache/datafusion/pull/11340) (alamb) +- Enable `clone_on_ref_ptr` clippy lint on optimizer [#11346](https://github.com/apache/datafusion/pull/11346) (lewiszlw) +- Update termtree requirement from 0.4.1 to 0.5.0 [#11383](https://github.com/apache/datafusion/pull/11383) (dependabot[bot]) +- Introduce `resources_err!` error macro [#11374](https://github.com/apache/datafusion/pull/11374) (comphead) +- Enable `clone_on_ref_ptr` clippy lint on common [#11384](https://github.com/apache/datafusion/pull/11384) (lewiszlw) +- Track parquet writer encoding memory usage on MemoryPool [#11345](https://github.com/apache/datafusion/pull/11345) (wiedld) +- Minor: remove clones and unnecessary Arcs in `from_substrait_rex` [#11337](https://github.com/apache/datafusion/pull/11337) (alamb) +- Minor: Change no-statement error message to be clearer [#11394](https://github.com/apache/datafusion/pull/11394) (itsjunetime) +- Change `array_agg` to return `null` on no input rather than empty list [#11299](https://github.com/apache/datafusion/pull/11299) (jayzhan211) +- Minor: return "not supported" for `COUNT DISTINCT` with multiple arguments [#11391](https://github.com/apache/datafusion/pull/11391) (jonahgao) +- Enable `clone_on_ref_ptr` clippy lint on sql [#11380](https://github.com/apache/datafusion/pull/11380) (lewiszlw) +- Move configuration information out of example usage page [#11300](https://github.com/apache/datafusion/pull/11300) (alamb) +- chore: reuse a single function to create the Substrait TPCH consumer test contexts [#11396](https://github.com/apache/datafusion/pull/11396) (Blizzara) +- refactor: change error type for "no statement" [#11411](https://github.com/apache/datafusion/pull/11411) (crepererum) +- Implement prettier SQL unparsing (more human readable) [#11186](https://github.com/apache/datafusion/pull/11186) (MohamedAbdeen21) +- Move `overlay` planning to`ExprPlanner` [#11398](https://github.com/apache/datafusion/pull/11398) (dharanad) +- Coerce types for all union children plans when eliminating nesting [#11386](https://github.com/apache/datafusion/pull/11386) (gruuya) +- Add customizable equality and hash functions to UDFs [#11392](https://github.com/apache/datafusion/pull/11392) (joroKr21) +- Implement ScalarFunction `MAKE_MAP` and `MAP` [#11361](https://github.com/apache/datafusion/pull/11361) (goldmedal) +- Improve `CommonSubexprEliminate` rule with surely and conditionally evaluated stats [#11357](https://github.com/apache/datafusion/pull/11357) (peter-toth) +- fix(11397): surface proper errors in ParquetSink [#11399](https://github.com/apache/datafusion/pull/11399) (wiedld) +- Minor: Add note about SQLLancer fuzz testing to docs [#11430](https://github.com/apache/datafusion/pull/11430) (alamb) +- Trivial: use arrow csv writer's timestamp_tz_format [#11407](https://github.com/apache/datafusion/pull/11407) (tmi) +- Improved unparser documentation [#11395](https://github.com/apache/datafusion/pull/11395) (alamb) +- Avoid calling shutdown after failed write of AsyncWrite [#11415](https://github.com/apache/datafusion/pull/11415) (joroKr21) +- Short term way to make `AggregateStatistics` still work when min/max is converted to udaf [#11261](https://github.com/apache/datafusion/pull/11261) (Rachelint) +- Implement TPCH substrait integration test, support tpch_13, tpch_14,16 [#11405](https://github.com/apache/datafusion/pull/11405) (Lordworms) +- Minor: fix giuthub action labeler rules [#11428](https://github.com/apache/datafusion/pull/11428) (alamb) +- Minor: change internal error to not supported error for nested field … [#11446](https://github.com/apache/datafusion/pull/11446) (alamb) +- Minor: change Datafusion --> DataFusion in docs [#11439](https://github.com/apache/datafusion/pull/11439) (alamb) +- Support serialization/deserialization for custom physical exprs in proto [#11387](https://github.com/apache/datafusion/pull/11387) (lewiszlw) +- remove termtree dependency [#11416](https://github.com/apache/datafusion/pull/11416) (Kev1n8) +- Add SessionStateBuilder and extract out the registration of defaults [#11403](https://github.com/apache/datafusion/pull/11403) (Omega359) +- integrate consumer tests, implement tpch query 18 to 22 [#11462](https://github.com/apache/datafusion/pull/11462) (Lordworms) +- Docs: Explain the usage of logical expressions for `create_aggregate_expr` [#11458](https://github.com/apache/datafusion/pull/11458) (jayzhan211) +- Return scalar result when all inputs are constants in `map` and `make_map` [#11461](https://github.com/apache/datafusion/pull/11461) (Rachelint) +- Enable `clone_on_ref_ptr` clippy lint on functions\* [#11468](https://github.com/apache/datafusion/pull/11468) (lewiszlw) +- minor: non-overlapping `repart_time` and `send_time` metrics [#11440](https://github.com/apache/datafusion/pull/11440) (korowa) +- Minor: rename `row_groups.rs` to `row_group_filter.rs` [#11481](https://github.com/apache/datafusion/pull/11481) (alamb) +- Support alternate formats for unparsing `datetime` to `timestamp` and `interval` [#11466](https://github.com/apache/datafusion/pull/11466) (y-f-u) +- chore: Add criterion benchmark for CaseExpr [#11482](https://github.com/apache/datafusion/pull/11482) (andygrove) +- Initial support for `StringView`, merge changes from `string-view` development branch [#11402](https://github.com/apache/datafusion/pull/11402) (alamb) +- Replace to_lowercase with to_string in sql example [#11486](https://github.com/apache/datafusion/pull/11486) (lewiszlw) +- Minor: Make execute_input_stream Accessible for Any Sinking Operators [#11449](https://github.com/apache/datafusion/pull/11449) (berkaysynnada) +- Enable `clone_on_ref_ptr` clippy lints on proto [#11465](https://github.com/apache/datafusion/pull/11465) (lewiszlw) +- upgrade sqlparser 0.47 -> 0.48 [#11453](https://github.com/apache/datafusion/pull/11453) (MohamedAbdeen21) +- Add extension hooks for encoding and decoding UDAFs and UDWFs [#11417](https://github.com/apache/datafusion/pull/11417) (joroKr21) +- Remove element's nullability of array_agg function [#11447](https://github.com/apache/datafusion/pull/11447) (jayzhan211) +- Get expr planners when creating new planner [#11485](https://github.com/apache/datafusion/pull/11485) (jayzhan211) +- Support alternate format for Utf8 unparsing (CHAR) [#11494](https://github.com/apache/datafusion/pull/11494) (sgrebnov) +- implement retract_batch for xor accumulator [#11500](https://github.com/apache/datafusion/pull/11500) (drewhayward) +- Refactor: more clearly delineate between `TableParquetOptions` and `ParquetWriterOptions` [#11444](https://github.com/apache/datafusion/pull/11444) (wiedld) +- chore: fix typos of common and core packages [#11520](https://github.com/apache/datafusion/pull/11520) (JasonLi-cn) +- Move spill related functions to spill.rs [#11509](https://github.com/apache/datafusion/pull/11509) (findepi) +- Add tests that show the different defaults for `ArrowWriter` and `TableParquetOptions` [#11524](https://github.com/apache/datafusion/pull/11524) (wiedld) +- Create `datafusion-physical-optimizer` crate [#11507](https://github.com/apache/datafusion/pull/11507) (lewiszlw) +- Minor: Assert `test_enabled_backtrace` requirements to run [#11525](https://github.com/apache/datafusion/pull/11525) (comphead) +- Move handlign of NULL literals in where clause to type coercion pass [#11491](https://github.com/apache/datafusion/pull/11491) (xinlifoobar) +- Update parquet page pruning code to use the `StatisticsExtractor` [#11483](https://github.com/apache/datafusion/pull/11483) (alamb) +- Enable SortMergeJoin LeftAnti filtered fuzz tests [#11535](https://github.com/apache/datafusion/pull/11535) (comphead) +- chore: fix typos of expr, functions, optimizer, physical-expr-common,… [#11538](https://github.com/apache/datafusion/pull/11538) (JasonLi-cn) +- Minor: Remove clone in `PushDownFilter` [#11532](https://github.com/apache/datafusion/pull/11532) (jayzhan211) +- Minor: avoid a clone in type coercion [#11530](https://github.com/apache/datafusion/pull/11530) (alamb) +- Move array `ArrayAgg` to a `UserDefinedAggregate` [#11448](https://github.com/apache/datafusion/pull/11448) (jayzhan211) +- Move `MAKE_MAP` to ExprPlanner [#11452](https://github.com/apache/datafusion/pull/11452) (goldmedal) +- chore: fix typos of sql, sqllogictest and substrait packages [#11548](https://github.com/apache/datafusion/pull/11548) (JasonLi-cn) +- Prevent bigger files from being checked in [#11508](https://github.com/apache/datafusion/pull/11508) (findepi) +- Add dialect param to use double precision for float64 in Postgres [#11495](https://github.com/apache/datafusion/pull/11495) (Sevenannn) +- Minor: move `SessionStateDefaults` into its own module [#11566](https://github.com/apache/datafusion/pull/11566) (alamb) +- refactor: rewrite mega type to an enum containing both cases [#11539](https://github.com/apache/datafusion/pull/11539) (LorrensP-2158466) +- Move `sql_compound_identifier_to_expr ` to `ExprPlanner` [#11487](https://github.com/apache/datafusion/pull/11487) (dharanad) +- Support SortMergeJoin spilling [#11218](https://github.com/apache/datafusion/pull/11218) (comphead) +- Fix unparser invalid sql for query with order [#11527](https://github.com/apache/datafusion/pull/11527) (y-f-u) +- Provide DataFrame API for `map` and move `map` to `functions-array` [#11560](https://github.com/apache/datafusion/pull/11560) (goldmedal) +- Move OutputRequirements to datafusion-physical-optimizer crate [#11579](https://github.com/apache/datafusion/pull/11579) (xinlifoobar) +- Minor: move `Column` related tests and rename `column.rs` [#11573](https://github.com/apache/datafusion/pull/11573) (jonahgao) +- Fix SortMergeJoin antijoin flaky condition [#11604](https://github.com/apache/datafusion/pull/11604) (comphead) +- Improve Union Equivalence Propagation [#11506](https://github.com/apache/datafusion/pull/11506) (mustafasrepo) +- Migrate `OrderSensitiveArrayAgg` to be a user defined aggregate [#11564](https://github.com/apache/datafusion/pull/11564) (jayzhan211) +- Minor:Disable flaky SMJ antijoin filtered test until the fix [#11608](https://github.com/apache/datafusion/pull/11608) (comphead) +- support Decimal256 type in datafusion-proto [#11606](https://github.com/apache/datafusion/pull/11606) (leoyvens) +- Chore/fifo tests cleanup [#11616](https://github.com/apache/datafusion/pull/11616) (ozankabak) +- Fix Internal Error for an INNER JOIN query [#11578](https://github.com/apache/datafusion/pull/11578) (xinlifoobar) +- test: get file size by func metadata [#11575](https://github.com/apache/datafusion/pull/11575) (zhuliquan) +- Improve unparser MySQL compatibility [#11589](https://github.com/apache/datafusion/pull/11589) (sgrebnov) +- Push scalar functions into cross join [#11528](https://github.com/apache/datafusion/pull/11528) (lewiszlw) +- Remove ArrayAgg Builtin in favor of UDF [#11611](https://github.com/apache/datafusion/pull/11611) (jayzhan211) +- refactor: simplify `DFSchema::field_with_unqualified_name` [#11619](https://github.com/apache/datafusion/pull/11619) (jonahgao) +- Minor: Use upstream `concat_batches` from arrow-rs [#11615](https://github.com/apache/datafusion/pull/11615) (alamb) +- Fix : `signum` function bug when `0.0` input [#11580](https://github.com/apache/datafusion/pull/11580) (getChan) +- Enforce uniqueness of `named_struct` field names [#11614](https://github.com/apache/datafusion/pull/11614) (dharanad) +- Minor: unecessary row_count calculation in `CrossJoinExec` and `NestedLoopsJoinExec` [#11632](https://github.com/apache/datafusion/pull/11632) (alamb) +- ExprBuilder for Physical Aggregate Expr [#11617](https://github.com/apache/datafusion/pull/11617) (jayzhan211) +- Minor: avoid copying order by exprs in planner [#11634](https://github.com/apache/datafusion/pull/11634) (alamb) +- Unify CI and pre-commit hook settings for clippy [#11640](https://github.com/apache/datafusion/pull/11640) (findepi) +- Parsing SQL strings to Exprs with the qualified schema [#11562](https://github.com/apache/datafusion/pull/11562) (Lordworms) +- Add some zero column tests covering LIMIT, GROUP BY, WHERE, JOIN, and WINDOW [#11624](https://github.com/apache/datafusion/pull/11624) (Kev1n8) +- Refactor/simplify window frame utils [#11648](https://github.com/apache/datafusion/pull/11648) (ozankabak) +- Minor: use `ready!` macro to simplify `FilterExec` [#11649](https://github.com/apache/datafusion/pull/11649) (alamb) +- Temporarily pin toolchain version to avoid clippy errors [#11655](https://github.com/apache/datafusion/pull/11655) (findepi) +- Fix clippy errors for Rust 1.80 [#11654](https://github.com/apache/datafusion/pull/11654) (findepi) +- Add `CsvExecBuilder` for creating `CsvExec` [#11633](https://github.com/apache/datafusion/pull/11633) (connec) +- chore(deps): update sqlparser requirement from 0.48 to 0.49 [#11630](https://github.com/apache/datafusion/pull/11630) (dependabot[bot]) +- Add support for USING to SQL unparser [#11636](https://github.com/apache/datafusion/pull/11636) (wackywendell) +- Run CI with latest (Rust 1.80), add ticket references to commented out tests [#11661](https://github.com/apache/datafusion/pull/11661) (alamb) +- Use `AccumulatorArgs::is_reversed` in `NthValueAgg` [#11669](https://github.com/apache/datafusion/pull/11669) (jcsherin) +- Implement physical plan serialization for json Copy plans [#11645](https://github.com/apache/datafusion/pull/11645) (Lordworms) +- Minor: improve documentation on `SessionState` [#11642](https://github.com/apache/datafusion/pull/11642) (alamb) +- Add LimitPushdown optimization rule and CoalesceBatchesExec fetch [#11652](https://github.com/apache/datafusion/pull/11652) (alihandroid) +- Update to arrow/parquet `52.2.0` [#11691](https://github.com/apache/datafusion/pull/11691) (alamb) +- Minor: Rename `RepartitionMetrics::repartition_time` to `RepartitionMetrics::repart_time` to match metric [#11478](https://github.com/apache/datafusion/pull/11478) (alamb) +- Update cache key used in rust CI script [#11641](https://github.com/apache/datafusion/pull/11641) (findepi) +- Fix bug in `remove_join_expressions` [#11693](https://github.com/apache/datafusion/pull/11693) (jonahgao) +- Initial changes to support using udaf min/max for statistics and opti… [#11696](https://github.com/apache/datafusion/pull/11696) (edmondop) +- Handle nulls in approx_percentile_cont [#11721](https://github.com/apache/datafusion/pull/11721) (Dandandan) +- Reduce repetition in try_process_group_by_unnest and try_process_unnest [#11714](https://github.com/apache/datafusion/pull/11714) (JasonLi-cn) +- Minor: Add example for `ScalarUDF::call` [#11727](https://github.com/apache/datafusion/pull/11727) (alamb) +- Use `cargo release` in `bench.sh` [#11722](https://github.com/apache/datafusion/pull/11722) (alamb) +- expose some fields on session state [#11716](https://github.com/apache/datafusion/pull/11716) (waynexia) +- Make DefaultSchemaAdapterFactory public [#11709](https://github.com/apache/datafusion/pull/11709) (adriangb) +- Check hashes first during probing the aggr hash table [#11718](https://github.com/apache/datafusion/pull/11718) (Rachelint) +- Implement physical plan serialization for parquet Copy plans [#11735](https://github.com/apache/datafusion/pull/11735) (Lordworms) +- Support cross-timezone `timestamp` comparison via coercsion [#11711](https://github.com/apache/datafusion/pull/11711) (jeffreyssmith2nd) +- Minor: Improve documentation for AggregateUDFImpl::state_fields [#11740](https://github.com/apache/datafusion/pull/11740) (lewiszlw) +- Do not push down Sorts if it violates the sort requirements [#11678](https://github.com/apache/datafusion/pull/11678) (alamb) +- Use upstream `StatisticsConverter` from arrow-rs in DataFusion [#11479](https://github.com/apache/datafusion/pull/11479) (alamb) +- Fix `plan_to_sql`: Add wildcard projection to SELECT statement if no projection was set [#11744](https://github.com/apache/datafusion/pull/11744) (LatrecheYasser) +- Use upstream `DataType::from_str` in arrow-cast [#11254](https://github.com/apache/datafusion/pull/11254) (alamb) +- Fix documentation warnings, make CsvExecBuilder and Unparsed pub [#11729](https://github.com/apache/datafusion/pull/11729) (alamb) +- [Minor] Add test for only nulls (empty) as input in APPROX_PERCENTILE_CONT [#11760](https://github.com/apache/datafusion/pull/11760) (Dandandan) +- Add `TrackedMemoryPool` with better error messages on exhaustion [#11665](https://github.com/apache/datafusion/pull/11665) (wiedld) +- Derive `Debug` for logical plan nodes [#11757](https://github.com/apache/datafusion/pull/11757) (lewiszlw) +- Minor: add "clickbench extended" queries to slt tests [#11763](https://github.com/apache/datafusion/pull/11763) (alamb) +- Minor: Add comment explaining rationale for hash check [#11750](https://github.com/apache/datafusion/pull/11750) (alamb) +- Fix bug that `COUNT(DISTINCT)` on StringView panics [#11768](https://github.com/apache/datafusion/pull/11768) (XiangpengHao) +- [Minor] Refactor approx_percentile [#11769](https://github.com/apache/datafusion/pull/11769) (Dandandan) +- minor: always time batch_filter even when the result is an empty batch [#11775](https://github.com/apache/datafusion/pull/11775) (andygrove) +- Improve OOM message when a single reservation request fails to get more bytes. [#11771](https://github.com/apache/datafusion/pull/11771) (wiedld) +- [Minor] Short circuit `ApplyFunctionRewrites` if there are no function rewrites [#11765](https://github.com/apache/datafusion/pull/11765) (gruuya) +- Fix #11692: Improve doc comments within macros [#11694](https://github.com/apache/datafusion/pull/11694) (Rafferty97) +- Extract `CoalesceBatchesStream` to a struct [#11610](https://github.com/apache/datafusion/pull/11610) (alamb) +- refactor: move ExecutionPlan and related structs into dedicated mod [#11759](https://github.com/apache/datafusion/pull/11759) (waynexia) +- Minor: Add references to github issue in comments [#11784](https://github.com/apache/datafusion/pull/11784) (findepi) +- Add docs and rename param for `Signature::numeric` [#11778](https://github.com/apache/datafusion/pull/11778) (matthewmturner) +- Support planning `Map` literal [#11780](https://github.com/apache/datafusion/pull/11780) (goldmedal) +- Support `LogicalPlan` `Debug` differently than `Display` [#11774](https://github.com/apache/datafusion/pull/11774) (lewiszlw) +- Remove redundant Aggregate when `DISTINCT` & `GROUP BY` are in the same query [#11781](https://github.com/apache/datafusion/pull/11781) (mertak-synnada) +- Minor: add ticket reference and fmt [#11805](https://github.com/apache/datafusion/pull/11805) (alamb) +- Improve MSRV CI check to print out problems to log [#11789](https://github.com/apache/datafusion/pull/11789) (alamb) +- Improve log func tests stability [#11808](https://github.com/apache/datafusion/pull/11808) (lewiszlw) +- Add valid Distinct case for aggregation [#11814](https://github.com/apache/datafusion/pull/11814) (mertak-synnada) +- Don't implement `create_sliding_accumulator` repeatedly [#11813](https://github.com/apache/datafusion/pull/11813) (lewiszlw) +- chore(deps): update rstest requirement from 0.21.0 to 0.22.0 [#11811](https://github.com/apache/datafusion/pull/11811) (dependabot[bot]) +- Minor: Update exected output due to logical conflict [#11824](https://github.com/apache/datafusion/pull/11824) (alamb) +- Pass scalar to `eq` inside `nullif` [#11697](https://github.com/apache/datafusion/pull/11697) (simonvandel) +- refactor: move `aggregate_statistics` to `datafusion-physical-optimizer` [#11798](https://github.com/apache/datafusion/pull/11798) (Weijun-H) +- Minor: refactor probe check into function `should_skip_aggregation` [#11821](https://github.com/apache/datafusion/pull/11821) (alamb) +- Minor: consolidate `path_partition` test into `core_integration` [#11831](https://github.com/apache/datafusion/pull/11831) (alamb) +- Move optimizer integration tests to `core_integration` [#11830](https://github.com/apache/datafusion/pull/11830) (alamb) +- Bump deprecated version of SessionState::new_with_config_rt to 41.0.0 [#11839](https://github.com/apache/datafusion/pull/11839) (kezhuw) +- Fix partial aggregation skipping with Decimal aggregators [#11833](https://github.com/apache/datafusion/pull/11833) (alamb) +- Fix bug with zero-sized buffer for StringViewArray [#11841](https://github.com/apache/datafusion/pull/11841) (XiangpengHao) +- Reduce clone of `Statistics` in `ListingTable` and `PartitionedFile` [#11802](https://github.com/apache/datafusion/pull/11802) (Rachelint) +- Add `LogicalPlan::CreateIndex` [#11817](https://github.com/apache/datafusion/pull/11817) (lewiszlw) +- Update `object_store` to 0.10.2 [#11860](https://github.com/apache/datafusion/pull/11860) (danlgrca) +- Add `skipped_aggregation_rows` metric to aggregate operator [#11706](https://github.com/apache/datafusion/pull/11706) (alamb) +- Cast `Utf8View` to `Utf8` to support `||` from `StringViewArray` [#11796](https://github.com/apache/datafusion/pull/11796) (dharanad) +- Improve nested loop join code [#11863](https://github.com/apache/datafusion/pull/11863) (lewiszlw) +- [Minor]: Refactor to use Result.transpose() [#11882](https://github.com/apache/datafusion/pull/11882) (djanderson) +- support `ANY()` op [#11849](https://github.com/apache/datafusion/pull/11849) (samuelcolvin) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 48 Andrew Lamb + 20 张林伟 + 9 Jay Zhan + 9 Jonah Gao + 8 Andy Grove + 8 Lordworms + 8 Piotr Findeisen + 8 wiedld + 7 Oleks V + 6 Jax Liu + 5 Alex Huang + 5 Arttu + 5 JasonLi + 5 Trent Hauck + 5 Xin Li + 4 Dharan Aditya + 4 Edmondo Porcu + 4 dependabot[bot] + 4 kamille + 4 yfu + 3 Daniël Heres + 3 Eduard Karacharov + 3 Georgi Krastev + 2 Chris Connelly + 2 Chunchun Ye + 2 June + 2 Marco Neumann + 2 Marko Grujic + 2 Mehmet Ozan Kabak + 2 Michael J Ward + 2 Mohamed Abdeen + 2 Ruihang Xia + 2 Sergei Grebnov + 2 Xiangpeng Hao + 2 jcsherin + 2 kf zheng + 2 mertak-synnada + 1 Adrian Garcia Badaracco + 1 Alexander Rafferty + 1 Alihan Çelikcan + 1 Ariel Marcus + 1 Berkay Şahin + 1 Bruce Ritchie + 1 Devesh Rahatekar + 1 Douglas Anderson + 1 Drew Hayward + 1 Jeffrey Smith II + 1 Kaviraj Kanagaraj + 1 Kezhu Wang + 1 Leonardo Yvens + 1 Lorrens Pantelis + 1 Matthew Cramerus + 1 Matthew Turner + 1 Mustafa Akur + 1 Namgung Chan + 1 Ning Sun + 1 Peter Toth + 1 Qianqian + 1 Samuel Colvin + 1 Shehab Amin + 1 Simon Vandel Sillesen + 1 Tim Saucer + 1 Wendell Smith + 1 Yasser Latreche + 1 Yongting You + 1 danlgrca + 1 tmi + 1 waruto + 1 zhuliquan +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/dev/release/README.md b/dev/release/README.md index c6bc9be2b0db..1817b3002578 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -124,20 +124,31 @@ Here are the commands that could be used to prepare the `38.0.0` release: Checkout the main commit to be released -``` +```shell git fetch apache git checkout apache/main ``` -Update datafusion version in `datafusion/Cargo.toml` to `38.0.0`: +Manually update the datafusion version in the root `Cargo.toml` to `38.0.0`. +Run `cargo update` in the root directory and also in `datafusion-cli`: + +```shell +cargo update +cd datafustion-cli +cargo update +cd .. ``` -./dev/update_datafusion_versions.py 38.0.0 + +Run `cargo test` to re-generate some example files: + +```shell +cargo test ``` Lastly commit the version change: -``` +```shell git commit -a -m 'Update version' ``` @@ -193,7 +204,7 @@ For the release to become "official" it needs at least three PMC members to vote The `dev/release/verify-release-candidate.sh` is a script in this repository that can assist in the verification process. Run it like: -``` +```shell ./dev/release/verify-release-candidate.sh 38.0.0 0 ``` @@ -222,7 +233,7 @@ Congratulations! The release is now official! Tag the same release candidate commit with the final release tag -``` +```shell git co apache/38.0.0-rc0 git tag 38.0.0 git push apache 38.0.0 diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index badd07822ac2..e0c8391a259a 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -65,7 +65,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.execution.parquet.statistics_enabled | page | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_statistics_size | 4096 | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_row_group_size | 1048576 | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. | -| datafusion.execution.parquet.created_by | datafusion version 40.0.0 | (writing) Sets "created by" property | +| datafusion.execution.parquet.created_by | datafusion version 41.0.0 | (writing) Sets "created by" property | | datafusion.execution.parquet.column_index_truncate_length | 64 | (writing) Sets column index truncate length | | datafusion.execution.parquet.data_page_row_count_limit | 20000 | (writing) Sets best effort maximum number of rows in data page | | datafusion.execution.parquet.encoding | NULL | (writing) Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting |