diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index 37f8f8a525407..9ba1fe39d324b 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -48,4 +48,4 @@ jobs: - name: Run audit check # Note: you can ignore specific RUSTSEC issues using the `--ignore` flag ,for example: # run: cargo audit --ignore RUSTSEC-2026-0001 - run: cargo audit + run: cargo audit --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2024-0014 diff --git a/Cargo.lock b/Cargo.lock index 49f67e9dd2901..ae81068616c7a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -126,9 +126,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.21" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" dependencies = [ "anstyle", "anstyle-parse", @@ -141,15 +141,15 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anstyle-parse" -version = "0.2.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" dependencies = [ "utf8parse", ] @@ -766,9 +766,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.16.1" +version = "1.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94bffc006df10ac2a68c83692d734a465f8ee6c5b384d8545a636f81d858f4bf" +checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" dependencies = [ "aws-lc-sys", "zeroize", @@ -776,9 +776,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.38.0" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4321e568ed89bb5a7d291a7f37997c2c0df89809d7b6d12062c81ddb54aa782e" +checksum = "1fa7e52a4c5c547c741610a2c6f123f3881e409b714cd27e6798ef020c514f0a" dependencies = [ "cc", "cmake", @@ -813,9 +813,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.96.0" +version = "1.97.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f64a6eded248c6b453966e915d32aeddb48ea63ad17932682774eb026fbef5b1" +checksum = "9aadc669e184501caaa6beafb28c6267fc1baef0810fb58f9b205485ca3f2567" dependencies = [ "aws-credential-types", "aws-runtime", @@ -837,9 +837,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.98.0" +version = "1.99.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db96d720d3c622fcbe08bae1c4b04a72ce6257d8b0584cb5418da00ae20a344f" +checksum = "1342a7db8f358d3de0aed2007a0b54e875458e39848d54cc1d46700b2bfcb0a8" dependencies = [ "aws-credential-types", "aws-runtime", @@ -861,9 +861,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.100.0" +version = "1.101.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fafbdda43b93f57f699c5dfe8328db590b967b8a820a13ccdd6687355dfcc7ca" +checksum = "ab41ad64e4051ecabeea802d6a17845a91e83287e1dd249e6963ea1ba78c428a" dependencies = [ "aws-credential-types", "aws-runtime", @@ -1034,9 +1034,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.4.6" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b1117b3b2bbe166d11199b540ceed0d0f7676e36e7b962b5a437a9971eac75" +checksum = "9d73dbfbaa8e4bc57b9045137680b958d274823509a360abfd8e1d514d40c95c" dependencies = [ "base64-simd", "bytes", @@ -1311,9 +1311,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.9.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d13a61f2963b88eef9c1be03df65d42f6996dfeac1054870d950fcf66686f83" +checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" dependencies = [ "bon-macros", "rustversion", @@ -1321,11 +1321,11 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.9.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0" +checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" dependencies = [ - "darling 0.23.0", + "darling", "ident_case", "prettyplease", "proc-macro2", @@ -1410,9 +1410,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.56" +version = "1.2.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" dependencies = [ "find-msvc-tools", "jobserver", @@ -1496,9 +1496,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.60" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" dependencies = [ "clap_builder", "clap_derive", @@ -1506,9 +1506,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.60" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ "anstream", "anstyle", @@ -1518,9 +1518,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.55" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" dependencies = [ "heck", "proc-macro2", @@ -1530,9 +1530,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "clipboard-win" @@ -1554,9 +1554,9 @@ dependencies = [ [[package]] name = "colorchoice" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "comfy-table" @@ -1612,13 +1612,12 @@ dependencies = [ [[package]] name = "console" -version = "0.16.2" +version = "0.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03e45a4a8926227e4197636ba97a9fc9b00477e9f4bd711395687c5f0734bec4" +checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" dependencies = [ "encode_unicode", "libc", - "once_cell", "unicode-width 0.2.2", "windows-sys 0.61.2", ] @@ -1872,38 +1871,14 @@ dependencies = [ "libloading 0.8.9", ] -[[package]] -name = "darling" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" -dependencies = [ - "darling_core 0.21.3", - "darling_macro 0.21.3", -] - [[package]] name = "darling" version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" dependencies = [ - "darling_core 0.23.0", - "darling_macro 0.23.0", -] - -[[package]] -name = "darling_core" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn 2.0.117", + "darling_core", + "darling_macro", ] [[package]] @@ -1919,24 +1894,13 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "darling_macro" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" -dependencies = [ - "darling_core 0.21.3", - "quote", - "syn 2.0.117", -] - [[package]] name = "darling_macro" version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ - "darling_core 0.23.0", + "darling_core", "quote", "syn 2.0.117", ] @@ -1957,7 +1921,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "arrow-schema", @@ -1968,34 +1932,34 @@ dependencies = [ "criterion", "ctor", "dashmap", - "datafusion-catalog 52.3.0", + "datafusion-catalog 52.4.0", "datafusion-catalog-listing", - "datafusion-common 52.3.0", - "datafusion-common-runtime 52.3.0", - "datafusion-datasource 52.3.0", + "datafusion-common 52.4.0", + "datafusion-common-runtime 52.4.0", + "datafusion-datasource 52.4.0", "datafusion-datasource-arrow", "datafusion-datasource-avro", "datafusion-datasource-csv", "datafusion-datasource-json", "datafusion-datasource-parquet", - "datafusion-doc 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-expr-common 52.3.0", - "datafusion-functions 52.3.0", + "datafusion-doc 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-expr-common 52.4.0", + "datafusion-functions 52.4.0", "datafusion-functions-aggregate", "datafusion-functions-nested", "datafusion-functions-table", "datafusion-functions-window", - "datafusion-functions-window-common 52.3.0", - "datafusion-macros 52.3.0", + "datafusion-functions-window-common 52.4.0", + "datafusion-macros 52.4.0", "datafusion-optimizer", - "datafusion-physical-expr 52.3.0", - "datafusion-physical-expr-adapter 52.3.0", - "datafusion-physical-expr-common 52.3.0", + "datafusion-physical-expr 52.4.0", + "datafusion-physical-expr-adapter 52.4.0", + "datafusion-physical-expr-common 52.4.0", "datafusion-physical-optimizer", - "datafusion-physical-plan 52.3.0", - "datafusion-session 52.3.0", + "datafusion-physical-plan 52.4.0", + "datafusion-session 52.4.0", "datafusion-sql", "doc-comment", "env_logger", @@ -2029,12 +1993,12 @@ dependencies = [ [[package]] name = "datafusion-benchmarks" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "clap", "datafusion", - "datafusion-common 52.3.0", + "datafusion-common 52.4.0", "datafusion-proto", "env_logger", "futures", @@ -2079,19 +2043,19 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "async-trait", "dashmap", - "datafusion-common 52.3.0", - "datafusion-common-runtime 52.3.0", - "datafusion-datasource 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-physical-expr 52.3.0", - "datafusion-physical-plan 52.3.0", - "datafusion-session 52.3.0", + "datafusion-common 52.4.0", + "datafusion-common-runtime 52.4.0", + "datafusion-datasource 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-physical-expr 52.4.0", + "datafusion-physical-plan 52.4.0", + "datafusion-session 52.4.0", "futures", "itertools 0.14.0", "log", @@ -2102,20 +2066,20 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "async-trait", - "datafusion-catalog 52.3.0", - "datafusion-common 52.3.0", - "datafusion-datasource 52.3.0", + "datafusion-catalog 52.4.0", + "datafusion-common 52.4.0", + "datafusion-datasource 52.4.0", "datafusion-datasource-parquet", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-physical-expr 52.3.0", - "datafusion-physical-expr-adapter 52.3.0", - "datafusion-physical-expr-common 52.3.0", - "datafusion-physical-plan 52.3.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-physical-expr 52.4.0", + "datafusion-physical-expr-adapter 52.4.0", + "datafusion-physical-expr-common 52.4.0", + "datafusion-physical-plan 52.4.0", "futures", "itertools 0.14.0", "log", @@ -2126,7 +2090,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "async-trait", @@ -2136,7 +2100,7 @@ dependencies = [ "clap", "ctor", "datafusion", - "datafusion-common 52.3.0", + "datafusion-common 52.4.0", "dirs", "env_logger", "futures", @@ -2178,7 +2142,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.3.0" +version = "52.4.0" dependencies = [ "ahash", "apache-avro", @@ -2216,7 +2180,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.3.0" +version = "52.4.0" dependencies = [ "futures", "log", @@ -2254,7 +2218,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "async-compression", @@ -2263,15 +2227,15 @@ dependencies = [ "bzip2", "chrono", "criterion", - "datafusion-common 52.3.0", - "datafusion-common-runtime 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-physical-expr 52.3.0", - "datafusion-physical-expr-adapter 52.3.0", - "datafusion-physical-expr-common 52.3.0", - "datafusion-physical-plan 52.3.0", - "datafusion-session 52.3.0", + "datafusion-common 52.4.0", + "datafusion-common-runtime 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-physical-expr 52.4.0", + "datafusion-physical-expr-adapter 52.4.0", + "datafusion-physical-expr-common 52.4.0", + "datafusion-physical-plan 52.4.0", + "datafusion-session 52.4.0", "flate2", "futures", "glob", @@ -2290,21 +2254,21 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "arrow-ipc", "async-trait", "bytes", "chrono", - "datafusion-common 52.3.0", - "datafusion-common-runtime 52.3.0", - "datafusion-datasource 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-physical-expr-common 52.3.0", - "datafusion-physical-plan 52.3.0", - "datafusion-session 52.3.0", + "datafusion-common 52.4.0", + "datafusion-common-runtime 52.4.0", + "datafusion-datasource 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-physical-expr-common 52.4.0", + "datafusion-physical-plan 52.4.0", + "datafusion-session 52.4.0", "futures", "itertools 0.14.0", "object_store", @@ -2313,17 +2277,17 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "52.3.0" +version = "52.4.0" dependencies = [ "apache-avro", "arrow", "async-trait", "bytes", - "datafusion-common 52.3.0", - "datafusion-datasource 52.3.0", - "datafusion-physical-expr-common 52.3.0", - "datafusion-physical-plan 52.3.0", - "datafusion-session 52.3.0", + "datafusion-common 52.4.0", + "datafusion-datasource 52.4.0", + "datafusion-physical-expr-common 52.4.0", + "datafusion-physical-plan 52.4.0", + "datafusion-session 52.4.0", "futures", "num-traits", "object_store", @@ -2332,19 +2296,19 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-common 52.3.0", - "datafusion-common-runtime 52.3.0", - "datafusion-datasource 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-physical-expr-common 52.3.0", - "datafusion-physical-plan 52.3.0", - "datafusion-session 52.3.0", + "datafusion-common 52.4.0", + "datafusion-common-runtime 52.4.0", + "datafusion-datasource 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-physical-expr-common 52.4.0", + "datafusion-physical-plan 52.4.0", + "datafusion-session 52.4.0", "futures", "object_store", "regex", @@ -2353,19 +2317,19 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-common 52.3.0", - "datafusion-common-runtime 52.3.0", - "datafusion-datasource 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-physical-expr-common 52.3.0", - "datafusion-physical-plan 52.3.0", - "datafusion-session 52.3.0", + "datafusion-common 52.4.0", + "datafusion-common-runtime 52.4.0", + "datafusion-datasource 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-physical-expr-common 52.4.0", + "datafusion-physical-plan 52.4.0", + "datafusion-session 52.4.0", "futures", "object_store", "tokio", @@ -2373,24 +2337,24 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "async-trait", "bytes", "chrono", - "datafusion-common 52.3.0", - "datafusion-common-runtime 52.3.0", - "datafusion-datasource 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-functions-aggregate-common 52.3.0", - "datafusion-physical-expr 52.3.0", - "datafusion-physical-expr-adapter 52.3.0", - "datafusion-physical-expr-common 52.3.0", - "datafusion-physical-plan 52.3.0", - "datafusion-pruning 52.3.0", - "datafusion-session 52.3.0", + "datafusion-common 52.4.0", + "datafusion-common-runtime 52.4.0", + "datafusion-datasource 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-functions-aggregate-common 52.4.0", + "datafusion-physical-expr 52.4.0", + "datafusion-physical-expr-adapter 52.4.0", + "datafusion-physical-expr-common 52.4.0", + "datafusion-physical-plan 52.4.0", + "datafusion-pruning 52.4.0", + "datafusion-session 52.4.0", "futures", "itertools 0.14.0", "log", @@ -2408,11 +2372,11 @@ checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" [[package]] name = "datafusion-doc" -version = "52.3.0" +version = "52.4.0" [[package]] name = "datafusion-examples" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "arrow-flight", @@ -2422,9 +2386,9 @@ dependencies = [ "bytes", "dashmap", "datafusion", - "datafusion-common 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-physical-expr-adapter 52.3.0", + "datafusion-common 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-physical-expr-adapter 52.4.0", "datafusion-proto", "datafusion-sql", "env_logger", @@ -2471,14 +2435,14 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "async-trait", "chrono", "dashmap", - "datafusion-common 52.3.0", - "datafusion-expr 52.3.0", + "datafusion-common 52.4.0", + "datafusion-expr 52.4.0", "futures", "insta", "log", @@ -2514,18 +2478,18 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "async-trait", "chrono", "ctor", - "datafusion-common 52.3.0", - "datafusion-doc 52.3.0", - "datafusion-expr-common 52.3.0", - "datafusion-functions-aggregate-common 52.3.0", - "datafusion-functions-window-common 52.3.0", - "datafusion-physical-expr-common 52.3.0", + "datafusion-common 52.4.0", + "datafusion-doc 52.4.0", + "datafusion-expr-common 52.4.0", + "datafusion-functions-aggregate-common 52.4.0", + "datafusion-functions-window-common 52.4.0", + "datafusion-physical-expr-common 52.4.0", "env_logger", "indexmap 2.13.0", "insta", @@ -2551,10 +2515,10 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", - "datafusion-common 52.3.0", + "datafusion-common 52.4.0", "indexmap 2.13.0", "itertools 0.14.0", "paste", @@ -2562,7 +2526,7 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "52.3.0" +version = "52.4.0" dependencies = [ "abi_stable", "arrow", @@ -2570,22 +2534,22 @@ dependencies = [ "async-ffi", "async-trait", "datafusion", - "datafusion-catalog 52.3.0", - "datafusion-common 52.3.0", - "datafusion-datasource 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-functions 52.3.0", + "datafusion-catalog 52.4.0", + "datafusion-common 52.4.0", + "datafusion-datasource 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-functions 52.4.0", "datafusion-functions-aggregate", - "datafusion-functions-aggregate-common 52.3.0", + "datafusion-functions-aggregate-common 52.4.0", "datafusion-functions-table", "datafusion-functions-window", - "datafusion-physical-expr 52.3.0", - "datafusion-physical-expr-common 52.3.0", - "datafusion-physical-plan 52.3.0", + "datafusion-physical-expr 52.4.0", + "datafusion-physical-expr-common 52.4.0", + "datafusion-physical-plan 52.4.0", "datafusion-proto", "datafusion-proto-common", - "datafusion-session 52.3.0", + "datafusion-session 52.4.0", "doc-comment", "futures", "log", @@ -2622,7 +2586,7 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "arrow-buffer", @@ -2633,12 +2597,12 @@ dependencies = [ "chrono-tz", "criterion", "ctor", - "datafusion-common 52.3.0", - "datafusion-doc 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-expr-common 52.3.0", - "datafusion-macros 52.3.0", + "datafusion-common 52.4.0", + "datafusion-doc 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-expr-common 52.4.0", + "datafusion-macros 52.4.0", "env_logger", "hex", "itertools 0.14.0", @@ -2655,19 +2619,19 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.3.0" +version = "52.4.0" dependencies = [ "ahash", "arrow", "criterion", - "datafusion-common 52.3.0", - "datafusion-doc 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-functions-aggregate-common 52.3.0", - "datafusion-macros 52.3.0", - "datafusion-physical-expr 52.3.0", - "datafusion-physical-expr-common 52.3.0", + "datafusion-common 52.4.0", + "datafusion-doc 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-functions-aggregate-common 52.4.0", + "datafusion-macros 52.4.0", + "datafusion-physical-expr 52.4.0", + "datafusion-physical-expr-common 52.4.0", "half", "log", "paste", @@ -2689,34 +2653,34 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "52.3.0" +version = "52.4.0" dependencies = [ "ahash", "arrow", "criterion", - "datafusion-common 52.3.0", - "datafusion-expr-common 52.3.0", - "datafusion-physical-expr-common 52.3.0", + "datafusion-common 52.4.0", + "datafusion-expr-common 52.4.0", + "datafusion-physical-expr-common 52.4.0", "rand 0.9.2", ] [[package]] name = "datafusion-functions-nested" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "arrow-ord", "criterion", - "datafusion-common 52.3.0", - "datafusion-doc 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-expr-common 52.3.0", - "datafusion-functions 52.3.0", + "datafusion-common 52.4.0", + "datafusion-doc 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-expr-common 52.4.0", + "datafusion-functions 52.4.0", "datafusion-functions-aggregate", - "datafusion-functions-aggregate-common 52.3.0", - "datafusion-macros 52.3.0", - "datafusion-physical-expr-common 52.3.0", + "datafusion-functions-aggregate-common 52.4.0", + "datafusion-macros 52.4.0", + "datafusion-physical-expr-common 52.4.0", "itertools 0.14.0", "log", "paste", @@ -2725,30 +2689,30 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "async-trait", - "datafusion-catalog 52.3.0", - "datafusion-common 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-physical-plan 52.3.0", + "datafusion-catalog 52.4.0", + "datafusion-common 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-physical-plan 52.4.0", "parking_lot", "paste", ] [[package]] name = "datafusion-functions-window" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", - "datafusion-common 52.3.0", - "datafusion-doc 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-functions-window-common 52.3.0", - "datafusion-macros 52.3.0", - "datafusion-physical-expr 52.3.0", - "datafusion-physical-expr-common 52.3.0", + "datafusion-common 52.4.0", + "datafusion-doc 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-functions-window-common 52.4.0", + "datafusion-macros 52.4.0", + "datafusion-physical-expr 52.4.0", + "datafusion-physical-expr-common 52.4.0", "log", "paste", ] @@ -2765,10 +2729,10 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.3.0" +version = "52.4.0" dependencies = [ - "datafusion-common 52.3.0", - "datafusion-physical-expr-common 52.3.0", + "datafusion-common 52.4.0", + "datafusion-physical-expr-common 52.4.0", ] [[package]] @@ -2784,29 +2748,29 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.3.0" +version = "52.4.0" dependencies = [ - "datafusion-doc 52.3.0", + "datafusion-doc 52.4.0", "quote", "syn 2.0.117", ] [[package]] name = "datafusion-optimizer" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "async-trait", "chrono", "criterion", "ctor", - "datafusion-common 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-expr-common 52.3.0", + "datafusion-common 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-expr-common 52.4.0", "datafusion-functions-aggregate", "datafusion-functions-window", - "datafusion-functions-window-common 52.3.0", - "datafusion-physical-expr 52.3.0", + "datafusion-functions-window-common 52.4.0", + "datafusion-physical-expr 52.4.0", "datafusion-sql", "env_logger", "indexmap 2.13.0", @@ -2842,17 +2806,17 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.3.0" +version = "52.4.0" dependencies = [ "ahash", "arrow", "criterion", - "datafusion-common 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-expr-common 52.3.0", - "datafusion-functions 52.3.0", - "datafusion-functions-aggregate-common 52.3.0", - "datafusion-physical-expr-common 52.3.0", + "datafusion-common 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-expr-common 52.4.0", + "datafusion-functions 52.4.0", + "datafusion-functions-aggregate-common 52.4.0", + "datafusion-physical-expr-common 52.4.0", "half", "hashbrown 0.16.1", "indexmap 2.13.0", @@ -2884,14 +2848,14 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", - "datafusion-common 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-functions 52.3.0", - "datafusion-physical-expr 52.3.0", - "datafusion-physical-expr-common 52.3.0", + "datafusion-common 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-functions 52.4.0", + "datafusion-physical-expr 52.4.0", + "datafusion-physical-expr-common 52.4.0", "itertools 0.14.0", ] @@ -2911,13 +2875,13 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.3.0" +version = "52.4.0" dependencies = [ "ahash", "arrow", "chrono", - "datafusion-common 52.3.0", - "datafusion-expr-common 52.3.0", + "datafusion-common 52.4.0", + "datafusion-expr-common 52.4.0", "hashbrown 0.16.1", "indexmap 2.13.0", "itertools 0.14.0", @@ -2926,18 +2890,18 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", - "datafusion-common 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-expr-common 52.3.0", - "datafusion-functions 52.3.0", - "datafusion-physical-expr 52.3.0", - "datafusion-physical-expr-common 52.3.0", - "datafusion-physical-plan 52.3.0", - "datafusion-pruning 52.3.0", + "datafusion-common 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-expr-common 52.4.0", + "datafusion-functions 52.4.0", + "datafusion-physical-expr 52.4.0", + "datafusion-physical-expr-common 52.4.0", + "datafusion-physical-plan 52.4.0", + "datafusion-pruning 52.4.0", "insta", "itertools 0.14.0", "recursive", @@ -2977,7 +2941,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.3.0" +version = "52.4.0" dependencies = [ "ahash", "arrow", @@ -2985,17 +2949,17 @@ dependencies = [ "arrow-schema", "async-trait", "criterion", - "datafusion-common 52.3.0", - "datafusion-common-runtime 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-functions 52.3.0", + "datafusion-common 52.4.0", + "datafusion-common-runtime 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-functions 52.4.0", "datafusion-functions-aggregate", - "datafusion-functions-aggregate-common 52.3.0", + "datafusion-functions-aggregate-common 52.4.0", "datafusion-functions-window", - "datafusion-functions-window-common 52.3.0", - "datafusion-physical-expr 52.3.0", - "datafusion-physical-expr-common 52.3.0", + "datafusion-functions-window-common 52.4.0", + "datafusion-physical-expr 52.4.0", + "datafusion-physical-expr-common 52.4.0", "futures", "half", "hashbrown 0.16.1", @@ -3013,30 +2977,30 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "async-trait", "chrono", "datafusion", - "datafusion-catalog 52.3.0", + "datafusion-catalog 52.4.0", "datafusion-catalog-listing", - "datafusion-common 52.3.0", - "datafusion-datasource 52.3.0", + "datafusion-common 52.4.0", + "datafusion-datasource 52.4.0", "datafusion-datasource-arrow", "datafusion-datasource-avro", "datafusion-datasource-csv", "datafusion-datasource-json", "datafusion-datasource-parquet", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-functions 52.3.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-functions 52.4.0", "datafusion-functions-aggregate", "datafusion-functions-table", - "datafusion-functions-window-common 52.3.0", - "datafusion-physical-expr 52.3.0", - "datafusion-physical-expr-common 52.3.0", - "datafusion-physical-plan 52.3.0", + "datafusion-functions-window-common 52.4.0", + "datafusion-physical-expr 52.4.0", + "datafusion-physical-expr-common 52.4.0", + "datafusion-physical-plan 52.4.0", "datafusion-proto-common", "doc-comment", "object_store", @@ -3051,10 +3015,10 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", - "datafusion-common 52.3.0", + "datafusion-common 52.4.0", "doc-comment", "pbjson", "prost", @@ -3080,17 +3044,17 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", - "datafusion-common 52.3.0", - "datafusion-datasource 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-expr-common 52.3.0", + "datafusion-common 52.4.0", + "datafusion-datasource 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-expr-common 52.4.0", "datafusion-functions-nested", - "datafusion-physical-expr 52.3.0", - "datafusion-physical-expr-common 52.3.0", - "datafusion-physical-plan 52.3.0", + "datafusion-physical-expr 52.4.0", + "datafusion-physical-expr-common 52.4.0", + "datafusion-physical-plan 52.4.0", "insta", "itertools 0.14.0", "log", @@ -3112,30 +3076,30 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.3.0" +version = "52.4.0" dependencies = [ "async-trait", - "datafusion-common 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-physical-plan 52.3.0", + "datafusion-common 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-physical-plan 52.4.0", "parking_lot", ] [[package]] name = "datafusion-spark" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "bigdecimal", "chrono", "crc32fast", "criterion", - "datafusion-catalog 52.3.0", - "datafusion-common 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-functions 52.3.0", + "datafusion-catalog 52.4.0", + "datafusion-common 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-functions 52.4.0", "datafusion-functions-nested", "log", "percent-encoding", @@ -3146,15 +3110,15 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "bigdecimal", "chrono", "ctor", - "datafusion-common 52.3.0", - "datafusion-expr 52.3.0", - "datafusion-functions 52.3.0", + "datafusion-common 52.4.0", + "datafusion-expr 52.4.0", + "datafusion-functions 52.4.0", "datafusion-functions-aggregate", "datafusion-functions-nested", "datafusion-functions-window", @@ -3172,7 +3136,7 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "52.3.0" +version = "52.4.0" dependencies = [ "arrow", "async-trait", @@ -3203,7 +3167,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "52.3.0" +version = "52.4.0" dependencies = [ "async-recursion", "async-trait", @@ -3224,16 +3188,16 @@ dependencies = [ [[package]] name = "datafusion-wasmtest" -version = "52.3.0" +version = "52.4.0" dependencies = [ "chrono", "console_error_panic_hook", "datafusion", - "datafusion-common 52.3.0", - "datafusion-execution 52.3.0", - "datafusion-expr 52.3.0", + "datafusion-common 52.4.0", + "datafusion-execution 52.4.0", + "datafusion-expr 52.4.0", "datafusion-optimizer", - "datafusion-physical-plan 52.3.0", + "datafusion-physical-plan 52.4.0", "datafusion-sql", "getrandom 0.3.4", "object_store", @@ -3424,9 +3388,9 @@ dependencies = [ [[package]] name = "env_filter" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a1c3cc8e57274ec99de65301228b537f1e4eedc1b8e0f9411c6caac8ae7308f" +checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef" dependencies = [ "log", "regex", @@ -3434,9 +3398,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.9" +version = "0.11.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d" +checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a" dependencies = [ "anstream", "anstyle", @@ -3708,9 +3672,12 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst-rs" -version = "0.5.6" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561f2458a3407836ab8f1acc9113b8cda91b9d6378ba8dad13b2fe1a1d3af5ce" +checksum = "cdf65e16e100438be0030d113042e07a62bed67203998640ca6fae0404eed71e" +dependencies = [ + "rustc-hash", +] [[package]] name = "funty" @@ -4386,7 +4353,7 @@ version = "0.18.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25470f23803092da7d239834776d653104d551bc4d7eacaf31e6837854b8e9eb" dependencies = [ - "console 0.16.2", + "console 0.16.3", "portable-atomic", "unicode-width 0.2.2", "unit-prefix", @@ -4443,9 +4410,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +checksum = "d8e7418f59cc01c88316161279a7f665217ae316b388e58a0d10e29f54f1e5eb" dependencies = [ "memchr", "serde", @@ -4477,9 +4444,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "jiff" @@ -4717,9 +4684,9 @@ dependencies = [ [[package]] name = "libredox" -version = "0.1.14" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" +checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08" dependencies = [ "bitflags", "libc", @@ -4729,9 +4696,9 @@ dependencies = [ [[package]] name = "libtest-mimic" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33" +checksum = "14e6ba06f0ade6e504aff834d7c34298e5155c6baca353cc6a4aaff2f9fd7f33" dependencies = [ "anstream", "anstyle", @@ -4774,9 +4741,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" +checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" dependencies = [ "twox-hash", ] @@ -4867,9 +4834,9 @@ dependencies = [ [[package]] name = "moka" -version = "0.12.14" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85f8024e1c8e71c778968af91d43700ce1d11b219d127d79fb2934153b82b42b" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" dependencies = [ "async-lock", "crossbeam-channel", @@ -5062,9 +5029,9 @@ dependencies = [ [[package]] name = "num_enum" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +checksum = "5d0bca838442ec211fa11de3a8b0e0e8f3a4522575b5c4c06ed722e005036f26" dependencies = [ "num_enum_derive", "rustversion", @@ -5072,9 +5039,9 @@ dependencies = [ [[package]] name = "num_enum_derive" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +checksum = "680998035259dcfcafe653688bf2aa6d3e2dc05e98be6ab46afb089dc84f1df8" dependencies = [ "proc-macro2", "quote", @@ -5157,9 +5124,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" dependencies = [ "parking_lot_core", ] @@ -5556,9 +5523,9 @@ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" +checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" dependencies = [ "portable-atomic", ] @@ -6264,9 +6231,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.9" +version = "0.103.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" dependencies = [ "aws-lc-rs", "ring", @@ -6319,9 +6286,9 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.28" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" dependencies = [ "windows-sys 0.61.2", ] @@ -6486,7 +6453,6 @@ version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ - "indexmap 2.13.0", "itoa", "memchr", "serde", @@ -6531,9 +6497,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.17.0" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "381b283ce7bc6b476d903296fb59d0d36633652b633b27f64db4fb46dcbfc3b9" +checksum = "dd5414fad8e6907dbdd5bc441a50ae8d6e26151a03b1de04d89a5576de61d01f" dependencies = [ "base64 0.22.1", "chrono", @@ -6550,11 +6516,11 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.17.0" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6d4e30573c8cb306ed6ab1dca8423eec9a463ea0e155f45399455e0368b27e0" +checksum = "d3db8978e608f1fe7357e211969fd9abdcae80bac1ba7a3369bb7eb6b404eb65" dependencies = [ - "darling 0.21.3", + "darling", "proc-macro2", "quote", "syn 2.0.117", @@ -6855,12 +6821,11 @@ dependencies = [ [[package]] name = "substrait" -version = "0.62.3" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb31133a233ae4e8fad7c9228fd87767927d9f4d60c3b0f3184be0a445aac1c" +checksum = "62fc4b483a129b9772ccb9c3f7945a472112fdd9140da87f8a4e7f1d44e045d0" dependencies = [ "heck", - "indexmap 2.13.0", "pbjson", "pbjson-build", "pbjson-types", @@ -6962,9 +6927,9 @@ checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" [[package]] name = "tempfile" -version = "3.26.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", "getrandom 0.4.2", @@ -6985,7 +6950,7 @@ version = "0.1.0" dependencies = [ "arrow", "chrono-tz", - "datafusion-common 52.3.0", + "datafusion-common 52.4.0", "env_logger", "rand 0.9.2", ] @@ -7131,9 +7096,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ "tinyvec_macros", ] @@ -7234,18 +7199,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "1.0.0+spec-1.1.0" +version = "1.1.0+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e" +checksum = "97251a7c317e03ad83774a8752a7e81fb6067740609f75ea2b585b569a59198f" dependencies = [ "serde_core", ] [[package]] name = "toml_edit" -version = "0.25.4+spec-1.1.0" +version = "0.25.8+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7193cbd0ce53dc966037f54351dbbcf0d5a642c7f0038c382ef9e677ce8c13f2" +checksum = "16bff38f1d86c47f9ff0647e6838d7bb362522bdf44006c7068c2b1e606f1f3c" dependencies = [ "indexmap 2.13.0", "toml_datetime", @@ -7255,9 +7220,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.9+spec-1.1.0" +version = "1.1.0+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" +checksum = "2334f11ee363607eb04df9b8fc8a13ca1715a72ba8662a26ac285c98aabb4011" dependencies = [ "winnow", ] @@ -7396,9 +7361,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.22" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" dependencies = [ "nu-ansi-term", "sharded-slab", @@ -7529,9 +7494,9 @@ checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "da36089a805484bcccfffe0739803392c8298778a2d2f09febf76fac5ad9025b" [[package]] name = "unicode-width" @@ -7571,9 +7536,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" -version = "3.2.0" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc97a28575b85cfedf2a7e7d3cc64b3e11bd8ac766666318003abbacc7a21fc" +checksum = "dea7109cdcd5864d4eeb1b58a1648dc9bf520360d7af16ec26d0a9354bafcfc0" dependencies = [ "base64 0.22.1", "log", @@ -7581,14 +7546,14 @@ dependencies = [ "rustls", "rustls-pki-types", "ureq-proto", - "utf-8", + "utf8-zero", ] [[package]] name = "ureq-proto" -version = "0.5.3" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d81f9efa9df032be5934a46a068815a10a042b494b6a58cb0a1a97bb5467ed6f" +checksum = "e994ba84b0bd1b1b0cf92878b7ef898a5c1760108fe7b6010327e274917a808c" dependencies = [ "base64 0.22.1", "http 1.4.0", @@ -7616,10 +7581,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" [[package]] -name = "utf-8" -version = "0.7.6" +name = "utf8-zero" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +checksum = "b8c0a043c9540bae7c578c88f91dda8bd82e59ae27c21baca69c8b191aaf5a6e" [[package]] name = "utf8_iter" @@ -8920,9 +8885,9 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.15" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" +checksum = "a90e88e4667264a994d34e6d1ab2d26d398dcdca8b7f52bec8668957517fc7d8" dependencies = [ "memchr", ] @@ -9090,18 +9055,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.42" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.42" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 1b284f094084d..4e18783b215c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,7 +79,7 @@ repository = "https://github.com/apache/datafusion" # Define Minimum Supported Rust Version (MSRV) rust-version = "1.88.0" # Define DataFusion version -version = "52.3.0" +version = "52.4.0" [workspace.dependencies] # We turn off default-features for some dependencies here so the workspaces which inherit them can @@ -112,43 +112,43 @@ chrono = { version = "0.4.42", default-features = false } criterion = "0.8" ctor = "0.6.3" dashmap = "6.0.1" -datafusion = { path = "datafusion/core", version = "52.3.0", default-features = false } -datafusion-catalog = { path = "datafusion/catalog", version = "52.3.0" } -datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "52.3.0" } -datafusion-common = { path = "datafusion/common", version = "52.3.0", default-features = false } -datafusion-common-runtime = { path = "datafusion/common-runtime", version = "52.3.0" } -datafusion-datasource = { path = "datafusion/datasource", version = "52.3.0", default-features = false } -datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "52.3.0", default-features = false } -datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "52.3.0", default-features = false } -datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "52.3.0", default-features = false } -datafusion-datasource-json = { path = "datafusion/datasource-json", version = "52.3.0", default-features = false } -datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "52.3.0", default-features = false } -datafusion-doc = { path = "datafusion/doc", version = "52.3.0" } -datafusion-execution = { path = "datafusion/execution", version = "52.3.0", default-features = false } -datafusion-expr = { path = "datafusion/expr", version = "52.3.0", default-features = false } -datafusion-expr-common = { path = "datafusion/expr-common", version = "52.3.0" } -datafusion-ffi = { path = "datafusion/ffi", version = "52.3.0" } -datafusion-functions = { path = "datafusion/functions", version = "52.3.0" } -datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "52.3.0" } -datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "52.3.0" } -datafusion-functions-nested = { path = "datafusion/functions-nested", version = "52.3.0", default-features = false } -datafusion-functions-table = { path = "datafusion/functions-table", version = "52.3.0" } -datafusion-functions-window = { path = "datafusion/functions-window", version = "52.3.0" } -datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "52.3.0" } -datafusion-macros = { path = "datafusion/macros", version = "52.3.0" } -datafusion-optimizer = { path = "datafusion/optimizer", version = "52.3.0", default-features = false } -datafusion-physical-expr = { path = "datafusion/physical-expr", version = "52.3.0", default-features = false } -datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "52.3.0", default-features = false } -datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "52.3.0", default-features = false } -datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "52.3.0" } -datafusion-physical-plan = { path = "datafusion/physical-plan", version = "52.3.0" } -datafusion-proto = { path = "datafusion/proto", version = "52.3.0" } -datafusion-proto-common = { path = "datafusion/proto-common", version = "52.3.0" } -datafusion-pruning = { path = "datafusion/pruning", version = "52.3.0" } -datafusion-session = { path = "datafusion/session", version = "52.3.0" } -datafusion-spark = { path = "datafusion/spark", version = "52.3.0" } -datafusion-sql = { path = "datafusion/sql", version = "52.3.0" } -datafusion-substrait = { path = "datafusion/substrait", version = "52.3.0" } +datafusion = { path = "datafusion/core", version = "52.4.0", default-features = false } +datafusion-catalog = { path = "datafusion/catalog", version = "52.4.0" } +datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "52.4.0" } +datafusion-common = { path = "datafusion/common", version = "52.4.0", default-features = false } +datafusion-common-runtime = { path = "datafusion/common-runtime", version = "52.4.0" } +datafusion-datasource = { path = "datafusion/datasource", version = "52.4.0", default-features = false } +datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "52.4.0", default-features = false } +datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "52.4.0", default-features = false } +datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "52.4.0", default-features = false } +datafusion-datasource-json = { path = "datafusion/datasource-json", version = "52.4.0", default-features = false } +datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "52.4.0", default-features = false } +datafusion-doc = { path = "datafusion/doc", version = "52.4.0" } +datafusion-execution = { path = "datafusion/execution", version = "52.4.0", default-features = false } +datafusion-expr = { path = "datafusion/expr", version = "52.4.0", default-features = false } +datafusion-expr-common = { path = "datafusion/expr-common", version = "52.4.0" } +datafusion-ffi = { path = "datafusion/ffi", version = "52.4.0" } +datafusion-functions = { path = "datafusion/functions", version = "52.4.0" } +datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "52.4.0" } +datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "52.4.0" } +datafusion-functions-nested = { path = "datafusion/functions-nested", version = "52.4.0", default-features = false } +datafusion-functions-table = { path = "datafusion/functions-table", version = "52.4.0" } +datafusion-functions-window = { path = "datafusion/functions-window", version = "52.4.0" } +datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "52.4.0" } +datafusion-macros = { path = "datafusion/macros", version = "52.4.0" } +datafusion-optimizer = { path = "datafusion/optimizer", version = "52.4.0", default-features = false } +datafusion-physical-expr = { path = "datafusion/physical-expr", version = "52.4.0", default-features = false } +datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "52.4.0", default-features = false } +datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "52.4.0", default-features = false } +datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "52.4.0" } +datafusion-physical-plan = { path = "datafusion/physical-plan", version = "52.4.0" } +datafusion-proto = { path = "datafusion/proto", version = "52.4.0" } +datafusion-proto-common = { path = "datafusion/proto-common", version = "52.4.0" } +datafusion-pruning = { path = "datafusion/pruning", version = "52.4.0" } +datafusion-session = { path = "datafusion/session", version = "52.4.0" } +datafusion-spark = { path = "datafusion/spark", version = "52.4.0" } +datafusion-sql = { path = "datafusion/sql", version = "52.4.0" } +datafusion-substrait = { path = "datafusion/substrait", version = "52.4.0" } doc-comment = "0.3" env_logger = "0.11" diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs index 5c2f1adcd0cf3..379bd7edf7e69 100644 --- a/datafusion/physical-expr/src/expressions/in_list.rs +++ b/datafusion/physical-expr/src/expressions/in_list.rs @@ -98,11 +98,18 @@ impl StaticFilter for ArrayStaticFilter { )); } + // Unwrap dictionary-encoded needles when the value type matches + // in_array, evaluating against the dictionary values and mapping + // back via keys. downcast_dictionary_array! { v => { - let values_contains = self.contains(v.values().as_ref(), negated)?; - let result = take(&values_contains, v.keys(), None)?; - return Ok(downcast_array(result.as_ref())) + // Only unwrap when the haystack (in_array) type matches + // the dictionary value type + if v.values().data_type() == self.in_array.data_type() { + let values_contains = self.contains(v.values().as_ref(), negated)?; + let result = take(&values_contains, v.keys(), None)?; + return Ok(downcast_array(result.as_ref())); + } } _ => {} } @@ -3507,4 +3514,536 @@ mod tests { Ok(()) } + /// Helper: creates an InListExpr with `static_filter = None` + /// to force the column-reference evaluation path. + fn make_in_list_with_columns( + expr: Arc, + list: Vec>, + negated: bool, + ) -> Arc { + Arc::new(InListExpr::new(expr, list, negated, None)) + } + + #[test] + fn test_in_list_with_columns_int32_scalars() -> Result<()> { + // Column-reference path with scalar literals (bypassing static filter) + let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]); + let col_a = col("a", &schema)?; + let batch = RecordBatch::try_new( + Arc::new(schema), + vec![Arc::new(Int32Array::from(vec![ + Some(1), + Some(2), + Some(3), + None, + ]))], + )?; + + let list = vec![ + lit(ScalarValue::Int32(Some(1))), + lit(ScalarValue::Int32(Some(3))), + ]; + let expr = make_in_list_with_columns(col_a, list, false); + + let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?; + let result = as_boolean_array(&result); + assert_eq!( + result, + &BooleanArray::from(vec![Some(true), Some(false), Some(true), None,]) + ); + Ok(()) + } + + #[test] + fn test_in_list_with_columns_int32_column_refs() -> Result<()> { + // IN list with column references + let schema = Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + ]); + let batch = RecordBatch::try_new( + Arc::new(schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3), None])), + Arc::new(Int32Array::from(vec![ + Some(1), + Some(99), + Some(99), + Some(99), + ])), + Arc::new(Int32Array::from(vec![Some(99), Some(99), Some(3), None])), + ], + )?; + + let col_a = col("a", &schema)?; + let list = vec![col("b", &schema)?, col("c", &schema)?]; + let expr = make_in_list_with_columns(col_a, list, false); + + let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?; + let result = as_boolean_array(&result); + // row 0: 1 IN (1, 99) → true + // row 1: 2 IN (99, 99) → false + // row 2: 3 IN (99, 3) → true + // row 3: NULL IN (99, NULL) → NULL + assert_eq!( + result, + &BooleanArray::from(vec![Some(true), Some(false), Some(true), None,]) + ); + Ok(()) + } + + #[test] + fn test_in_list_with_columns_utf8_column_refs() -> Result<()> { + // IN list with Utf8 column references + let schema = Schema::new(vec![ + Field::new("a", DataType::Utf8, false), + Field::new("b", DataType::Utf8, false), + ]); + let batch = RecordBatch::try_new( + Arc::new(schema.clone()), + vec![ + Arc::new(StringArray::from(vec!["x", "y", "z"])), + Arc::new(StringArray::from(vec!["x", "x", "z"])), + ], + )?; + + let col_a = col("a", &schema)?; + let list = vec![col("b", &schema)?]; + let expr = make_in_list_with_columns(col_a, list, false); + + let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?; + let result = as_boolean_array(&result); + // row 0: "x" IN ("x") → true + // row 1: "y" IN ("x") → false + // row 2: "z" IN ("z") → true + assert_eq!(result, &BooleanArray::from(vec![true, false, true])); + Ok(()) + } + + #[test] + fn test_in_list_with_columns_negated() -> Result<()> { + // NOT IN with column references + let schema = Schema::new(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int32, false), + ]); + let batch = RecordBatch::try_new( + Arc::new(schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(Int32Array::from(vec![1, 99, 3])), + ], + )?; + + let col_a = col("a", &schema)?; + let list = vec![col("b", &schema)?]; + let expr = make_in_list_with_columns(col_a, list, true); + + let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?; + let result = as_boolean_array(&result); + // row 0: 1 NOT IN (1) → false + // row 1: 2 NOT IN (99) → true + // row 2: 3 NOT IN (3) → false + assert_eq!(result, &BooleanArray::from(vec![false, true, false])); + Ok(()) + } + + #[test] + fn test_in_list_with_columns_null_in_list() -> Result<()> { + // IN list with NULL scalar (column-reference path) + let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); + let col_a = col("a", &schema)?; + let batch = RecordBatch::try_new( + Arc::new(schema), + vec![Arc::new(Int32Array::from(vec![1, 2]))], + )?; + + let list = vec![ + lit(ScalarValue::Int32(None)), + lit(ScalarValue::Int32(Some(1))), + ]; + let expr = make_in_list_with_columns(col_a, list, false); + + let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?; + let result = as_boolean_array(&result); + // row 0: 1 IN (NULL, 1) → true (true OR null = true) + // row 1: 2 IN (NULL, 1) → NULL (false OR null = null) + assert_eq!(result, &BooleanArray::from(vec![Some(true), None])); + Ok(()) + } + + #[test] + fn test_in_list_with_columns_float_nan() -> Result<()> { + // Verify NaN == NaN is true in the column-reference path + // (consistent with Arrow's totalOrder semantics) + let schema = Schema::new(vec![ + Field::new("a", DataType::Float64, false), + Field::new("b", DataType::Float64, false), + ]); + let batch = RecordBatch::try_new( + Arc::new(schema.clone()), + vec![ + Arc::new(Float64Array::from(vec![f64::NAN, 1.0, f64::NAN])), + Arc::new(Float64Array::from(vec![f64::NAN, 2.0, 0.0])), + ], + )?; + + let col_a = col("a", &schema)?; + let list = vec![col("b", &schema)?]; + let expr = make_in_list_with_columns(col_a, list, false); + + let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?; + let result = as_boolean_array(&result); + // row 0: NaN IN (NaN) → true + // row 1: 1.0 IN (2.0) → false + // row 2: NaN IN (0.0) → false + assert_eq!(result, &BooleanArray::from(vec![true, false, false])); + Ok(()) + } + + /// Tests that short-circuit evaluation produces correct results. + /// When all rows match after the first list item, remaining items + /// should be skipped without affecting correctness. + #[test] + fn test_in_list_with_columns_short_circuit() -> Result<()> { + // a IN (b, c) where b already matches every row of a + // The short-circuit should skip evaluating c + let schema = Schema::new(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int32, false), + Field::new("c", DataType::Int32, false), + ]); + let batch = RecordBatch::try_new( + Arc::new(schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(Int32Array::from(vec![1, 2, 3])), // b == a for all rows + Arc::new(Int32Array::from(vec![99, 99, 99])), + ], + )?; + + let col_a = col("a", &schema)?; + let list = vec![col("b", &schema)?, col("c", &schema)?]; + let expr = make_in_list_with_columns(col_a, list, false); + + let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?; + let result = as_boolean_array(&result); + assert_eq!(result, &BooleanArray::from(vec![true, true, true])); + Ok(()) + } + + /// Short-circuit must NOT skip when nulls are present (three-valued logic). + /// Even if all non-null values are true, null rows keep the result as null. + #[test] + fn test_in_list_with_columns_short_circuit_with_nulls() -> Result<()> { + // a IN (b, c) where a has nulls + // Even if b matches all non-null rows, result should preserve nulls + let schema = Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, false), + Field::new("c", DataType::Int32, false), + ]); + let batch = RecordBatch::try_new( + Arc::new(schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])), + Arc::new(Int32Array::from(vec![1, 2, 3])), // matches non-null rows + Arc::new(Int32Array::from(vec![99, 99, 99])), + ], + )?; + + let col_a = col("a", &schema)?; + let list = vec![col("b", &schema)?, col("c", &schema)?]; + let expr = make_in_list_with_columns(col_a, list, false); + + let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?; + let result = as_boolean_array(&result); + // row 0: 1 IN (1, 99) → true + // row 1: NULL IN (2, 99) → NULL + // row 2: 3 IN (3, 99) → true + assert_eq!( + result, + &BooleanArray::from(vec![Some(true), None, Some(true)]) + ); + Ok(()) + } + + /// Tests the make_comparator + collect_bool fallback path using + /// struct column references (nested types don't support arrow_eq). + #[test] + fn test_in_list_with_columns_struct() -> Result<()> { + let struct_fields = Fields::from(vec![ + Field::new("x", DataType::Int32, false), + Field::new("y", DataType::Utf8, false), + ]); + let struct_dt = DataType::Struct(struct_fields.clone()); + + let schema = Schema::new(vec![ + Field::new("a", struct_dt.clone(), true), + Field::new("b", struct_dt.clone(), false), + Field::new("c", struct_dt.clone(), false), + ]); + + // a: [{1,"a"}, {2,"b"}, NULL, {4,"d"}] + // b: [{1,"a"}, {9,"z"}, {3,"c"}, {4,"d"}] + // c: [{9,"z"}, {2,"b"}, {9,"z"}, {9,"z"}] + let a = Arc::new(StructArray::new( + struct_fields.clone(), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4])), + Arc::new(StringArray::from(vec!["a", "b", "c", "d"])), + ], + Some(vec![true, true, false, true].into()), + )); + let b = Arc::new(StructArray::new( + struct_fields.clone(), + vec![ + Arc::new(Int32Array::from(vec![1, 9, 3, 4])), + Arc::new(StringArray::from(vec!["a", "z", "c", "d"])), + ], + None, + )); + let c = Arc::new(StructArray::new( + struct_fields.clone(), + vec![ + Arc::new(Int32Array::from(vec![9, 2, 9, 9])), + Arc::new(StringArray::from(vec!["z", "b", "z", "z"])), + ], + None, + )); + + let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![a, b, c])?; + + let col_a = col("a", &schema)?; + let list = vec![col("b", &schema)?, col("c", &schema)?]; + let expr = make_in_list_with_columns(col_a, list, false); + + let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?; + let result = as_boolean_array(&result); + // row 0: {1,"a"} IN ({1,"a"}, {9,"z"}) → true (matches b) + // row 1: {2,"b"} IN ({9,"z"}, {2,"b"}) → true (matches c) + // row 2: NULL IN ({3,"c"}, {9,"z"}) → NULL + // row 3: {4,"d"} IN ({4,"d"}, {9,"z"}) → true (matches b) + assert_eq!( + result, + &BooleanArray::from(vec![Some(true), Some(true), None, Some(true)]) + ); + + // Also test NOT IN + let col_a = col("a", &schema)?; + let list = vec![col("b", &schema)?, col("c", &schema)?]; + let expr = make_in_list_with_columns(col_a, list, true); + + let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?; + let result = as_boolean_array(&result); + // row 0: {1,"a"} NOT IN ({1,"a"}, {9,"z"}) → false + // row 1: {2,"b"} NOT IN ({9,"z"}, {2,"b"}) → false + // row 2: NULL NOT IN ({3,"c"}, {9,"z"}) → NULL + // row 3: {4,"d"} NOT IN ({4,"d"}, {9,"z"}) → false + assert_eq!( + result, + &BooleanArray::from(vec![Some(false), Some(false), None, Some(false)]) + ); + Ok(()) + } + + // ----------------------------------------------------------------------- + // Tests for try_new_from_array: evaluates `needle IN in_array`. + // + // This exercises the code path used by HashJoin dynamic filter pushdown, + // where in_array is built directly from the join's build-side arrays. + // Unlike try_new (used by SQL IN expressions), which always produces a + // non-Dictionary in_array because evaluate_list() flattens Dictionary + // scalars, try_new_from_array passes the array directly and can produce + // a Dictionary in_array. + // ----------------------------------------------------------------------- + + fn wrap_in_dict(array: ArrayRef) -> ArrayRef { + let keys = Int32Array::from((0..array.len() as i32).collect::>()); + Arc::new(DictionaryArray::new(keys, array)) + } + + /// Evaluates `needle IN in_array` via try_new_from_array, the same + /// path used by HashJoin dynamic filter pushdown (not the SQL literal + /// IN path which goes through try_new). + fn eval_in_list_from_array( + needle: ArrayRef, + in_array: ArrayRef, + ) -> Result { + let schema = + Schema::new(vec![Field::new("a", needle.data_type().clone(), false)]); + let col_a = col("a", &schema)?; + let expr = Arc::new(InListExpr::try_new_from_array(col_a, in_array, false)?) + as Arc; + let batch = RecordBatch::try_new(Arc::new(schema), vec![needle])?; + let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?; + Ok(as_boolean_array(&result).clone()) + } + + #[test] + fn test_in_list_from_array_type_combinations() -> Result<()> { + use arrow::compute::cast; + + // All cases: needle[0] and needle[2] match, needle[1] does not. + let expected = BooleanArray::from(vec![Some(true), Some(false), Some(true)]); + + // Base arrays cast to each target type + let base_in = Arc::new(Int64Array::from(vec![1i64, 2, 3])) as ArrayRef; + let base_needle = Arc::new(Int64Array::from(vec![1i64, 4, 2])) as ArrayRef; + + // Test all specializations in instantiate_static_filter + let primitive_types = vec![ + DataType::Int8, + DataType::Int16, + DataType::Int32, + DataType::Int64, + DataType::UInt8, + DataType::UInt16, + DataType::UInt32, + DataType::UInt64, + DataType::Float32, + DataType::Float64, + ]; + + for dt in &primitive_types { + let in_array = cast(&base_in, dt)?; + let needle = cast(&base_needle, dt)?; + + // T in_array, T needle + assert_eq!( + expected, + eval_in_list_from_array(Arc::clone(&needle), Arc::clone(&in_array))?, + "same-type failed for {dt:?}" + ); + + // T in_array, Dict(Int32, T) needle + assert_eq!( + expected, + eval_in_list_from_array(wrap_in_dict(needle), in_array)?, + "dict-needle failed for {dt:?}" + ); + } + + // Utf8 (falls through to ArrayStaticFilter) + let utf8_in = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef; + let utf8_needle = Arc::new(StringArray::from(vec!["a", "d", "b"])) as ArrayRef; + + // Utf8 in_array, Utf8 needle + assert_eq!( + expected, + eval_in_list_from_array(Arc::clone(&utf8_needle), Arc::clone(&utf8_in),)? + ); + + // Utf8 in_array, Dict(Utf8) needle + assert_eq!( + expected, + eval_in_list_from_array( + wrap_in_dict(Arc::clone(&utf8_needle)), + Arc::clone(&utf8_in), + )? + ); + + // Dict(Utf8) in_array, Dict(Utf8) needle: the #20937 bug + assert_eq!( + expected, + eval_in_list_from_array( + wrap_in_dict(Arc::clone(&utf8_needle)), + wrap_in_dict(Arc::clone(&utf8_in)), + )? + ); + + // Struct in_array, Struct needle: multi-column join + let struct_fields = Fields::from(vec![ + Field::new("c0", DataType::Utf8, true), + Field::new("c1", DataType::Int64, true), + ]); + let make_struct = |c0: ArrayRef, c1: ArrayRef| -> ArrayRef { + let pairs: Vec<(FieldRef, ArrayRef)> = + struct_fields.iter().cloned().zip([c0, c1]).collect(); + Arc::new(StructArray::from(pairs)) + }; + assert_eq!( + expected, + eval_in_list_from_array( + make_struct( + Arc::clone(&utf8_needle), + Arc::new(Int64Array::from(vec![1, 4, 2])), + ), + make_struct( + Arc::clone(&utf8_in), + Arc::new(Int64Array::from(vec![1, 2, 3])), + ), + )? + ); + + // Struct with Dict fields: multi-column Dict join + let dict_struct_fields = Fields::from(vec![ + Field::new( + "c0", + DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), + true, + ), + Field::new("c1", DataType::Int64, true), + ]); + let make_dict_struct = |c0: ArrayRef, c1: ArrayRef| -> ArrayRef { + let pairs: Vec<(FieldRef, ArrayRef)> = + dict_struct_fields.iter().cloned().zip([c0, c1]).collect(); + Arc::new(StructArray::from(pairs)) + }; + assert_eq!( + expected, + eval_in_list_from_array( + make_dict_struct( + wrap_in_dict(Arc::clone(&utf8_needle)), + Arc::new(Int64Array::from(vec![1, 4, 2])), + ), + make_dict_struct( + wrap_in_dict(Arc::clone(&utf8_in)), + Arc::new(Int64Array::from(vec![1, 2, 3])), + ), + )? + ); + + Ok(()) + } + + #[test] + fn test_in_list_from_array_type_mismatch_errors() -> Result<()> { + // Utf8 needle, Dict(Utf8) in_array + let err = eval_in_list_from_array( + Arc::new(StringArray::from(vec!["a", "d", "b"])), + wrap_in_dict(Arc::new(StringArray::from(vec!["a", "b", "c"]))), + ) + .unwrap_err() + .to_string(); + assert!( + err.contains("Can't compare arrays of different types"), + "{err}" + ); + + // Dict(Utf8) needle, Int64 in_array: specialized Int64StaticFilter + // rejects the Utf8 dictionary values at construction time + let err = eval_in_list_from_array( + wrap_in_dict(Arc::new(StringArray::from(vec!["a", "d", "b"]))), + Arc::new(Int64Array::from(vec![1, 2, 3])), + ) + .unwrap_err() + .to_string(); + assert!(err.contains("Failed to downcast"), "{err}"); + + // Dict(Int64) needle, Dict(Utf8) in_array: both Dict but different + // value types, make_comparator rejects the comparison + let err = eval_in_list_from_array( + wrap_in_dict(Arc::new(Int64Array::from(vec![1, 4, 2]))), + wrap_in_dict(Arc::new(StringArray::from(vec!["a", "b", "c"]))), + ) + .unwrap_err() + .to_string(); + assert!( + err.contains("Can't compare arrays of different types"), + "{err}" + ); + Ok(()) + } } diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs index 7cc59b44a301f..a6fc275723702 100644 --- a/datafusion/physical-plan/src/aggregates/row_hash.rs +++ b/datafusion/physical-plan/src/aggregates/row_hash.rs @@ -1233,6 +1233,18 @@ impl GroupedHashAggregateStream { // on the grouping columns. self.group_ordering = GroupOrdering::Full(GroupOrderingFull::new()); + // Recreate group_values to use streaming mode (GroupValuesColumn + // with scalarized_intern) which preserves input row order, as required + // by GroupOrderingFull. This is only needed for multi-column group by, + // since single-column uses GroupValuesPrimitive which is always safe. + let group_schema = self + .spill_state + .merging_group_by + .group_schema(&self.spill_state.spill_schema)?; + if group_schema.fields().len() > 1 { + self.group_values = new_group_values(group_schema, &self.group_ordering)?; + } + // Use `OutOfMemoryMode::ReportError` from this point on // to ensure we don't spill the spilled data to disk again. self.oom_mode = OutOfMemoryMode::ReportError; diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 743e9e327c0e2..88ba3d2702031 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -55,12 +55,12 @@ use datafusion_common::{ use datafusion_execution::TaskContext; use datafusion_expr::Operator; use datafusion_physical_expr::equivalence::ProjectionMapping; -use datafusion_physical_expr::expressions::{BinaryExpr, Column, lit}; +use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal, lit}; use datafusion_physical_expr::intervals::utils::check_support; use datafusion_physical_expr::utils::collect_columns; use datafusion_physical_expr::{ - AcrossPartitions, AnalysisContext, ConstExpr, ExprBoundaries, PhysicalExpr, analyze, - conjunction, split_conjunction, + AcrossPartitions, AnalysisContext, ConstExpr, EquivalenceProperties, ExprBoundaries, + PhysicalExpr, analyze, conjunction, split_conjunction, }; use datafusion_physical_expr_common::physical_expr::fmt_sql; @@ -243,6 +243,20 @@ impl FilterExec { }) } + /// Returns the `AcrossPartitions` value for `expr` if it is constant: + /// either already known constant in `input_eqs`, or a `Literal` + /// (which is inherently constant across all partitions). + fn expr_constant_or_literal( + expr: &Arc, + input_eqs: &EquivalenceProperties, + ) -> Option { + input_eqs.is_expr_constant(expr).or_else(|| { + expr.as_any() + .downcast_ref::() + .map(|l| AcrossPartitions::Uniform(Some(l.value().clone()))) + }) + } + fn extend_constants( input: &Arc, predicate: &Arc, @@ -255,18 +269,24 @@ impl FilterExec { if let Some(binary) = conjunction.as_any().downcast_ref::() && binary.op() == &Operator::Eq { - // Filter evaluates to single value for all partitions - if input_eqs.is_expr_constant(binary.left()).is_some() { - let across = input_eqs - .is_expr_constant(binary.right()) - .unwrap_or_default(); + // Check if either side is constant — either already known + // constant from the input equivalence properties, or a literal + // value (which is inherently constant across all partitions). + let left_const = Self::expr_constant_or_literal(binary.left(), input_eqs); + let right_const = + Self::expr_constant_or_literal(binary.right(), input_eqs); + + if let Some(left_across) = left_const { + // LEFT is constant, so RIGHT must also be constant. + // Use RIGHT's known across value if available, otherwise + // propagate LEFT's (e.g. Uniform from a literal). + let across = right_const.unwrap_or(left_across); res_constants .push(ConstExpr::new(Arc::clone(binary.right()), across)); - } else if input_eqs.is_expr_constant(binary.right()).is_some() { - let across = input_eqs - .is_expr_constant(binary.left()) - .unwrap_or_default(); - res_constants.push(ConstExpr::new(Arc::clone(binary.left()), across)); + } else if let Some(right_across) = right_const { + // RIGHT is constant, so LEFT must also be constant. + res_constants + .push(ConstExpr::new(Arc::clone(binary.left()), right_across)); } } } @@ -866,6 +886,19 @@ fn collect_columns_from_predicate_inner( let predicates = split_conjunction(predicate); predicates.into_iter().for_each(|p| { if let Some(binary) = p.as_any().downcast_ref::() { + // Only extract pairs where at least one side is a Column reference. + // Pairs like `complex_expr = literal` should not create equivalence + // classes — the literal could appear in many unrelated expressions + // (e.g. sort keys), and normalize_expr's deep traversal would + // replace those occurrences with the complex expression, corrupting + // sort orderings. Constant propagation for such pairs is handled + // separately by `extend_constants`. + let has_direct_column_operand = + binary.left().as_any().downcast_ref::().is_some() + || binary.right().as_any().downcast_ref::().is_some(); + if !has_direct_column_operand { + return; + } match binary.op() { Operator::Eq => { eq_predicate_columns.push((binary.left(), binary.right())) @@ -1792,6 +1825,47 @@ mod tests { from output schema (c@0) to input schema (c@2)" ); + Ok(()) + } + /// Regression test for https://github.com/apache/datafusion/issues/20194 + /// + /// `collect_columns_from_predicate_inner` should only extract equality + /// pairs where at least one side is a Column. Pairs like + /// `complex_expr = literal` must not create equivalence classes because + /// `normalize_expr`'s deep traversal would replace the literal inside + /// unrelated expressions (e.g. sort keys) with the complex expression. + #[test] + fn test_collect_columns_skips_non_column_pairs() -> Result<()> { + let schema = test::aggr_test_schema(); + + // Simulate: nvl(c2, 0) = 0 → (c2 IS DISTINCT FROM 0) = 0 + // Neither side is a Column, so this should NOT be extracted. + let complex_expr: Arc = binary( + col("c2", &schema)?, + Operator::IsDistinctFrom, + lit(0u32), + &schema, + )?; + let predicate: Arc = + binary(complex_expr, Operator::Eq, lit(0u32), &schema)?; + + let (equal_pairs, _) = collect_columns_from_predicate_inner(&predicate); + assert_eq!( + 0, + equal_pairs.len(), + "Should not extract equality pairs where neither side is a Column" + ); + + // But col = literal should still be extracted + let predicate: Arc = + binary(col("c2", &schema)?, Operator::Eq, lit(0u32), &schema)?; + let (equal_pairs, _) = collect_columns_from_predicate_inner(&predicate); + assert_eq!( + 1, + equal_pairs.len(), + "Should extract equality pairs where one side is a Column" + ); + Ok(()) } } diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs index 3a57dc6b41a57..dca55c720ef62 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs @@ -128,6 +128,8 @@ pub(super) struct StreamedBatch { pub join_arrays: Vec, /// Chunks of indices from buffered side (may be nulls) joined to streamed pub output_indices: Vec, + /// Total number of output rows across all chunks in `output_indices` + pub num_output_rows: usize, /// Index of currently scanned batch from buffered data pub buffered_batch_idx: Option, /// Indices that found a match for the given join filter @@ -144,6 +146,7 @@ impl StreamedBatch { idx: 0, join_arrays, output_indices: vec![], + num_output_rows: 0, buffered_batch_idx: None, join_filter_matched_idxs: HashSet::new(), } @@ -155,6 +158,7 @@ impl StreamedBatch { idx: 0, join_arrays: vec![], output_indices: vec![], + num_output_rows: 0, buffered_batch_idx: None, join_filter_matched_idxs: HashSet::new(), } @@ -162,10 +166,7 @@ impl StreamedBatch { /// Number of unfrozen output pairs in this streamed batch fn num_output_rows(&self) -> usize { - self.output_indices - .iter() - .map(|chunk| chunk.streamed_indices.len()) - .sum() + self.num_output_rows } /// Appends new pair consisting of current streamed index and `buffered_idx` @@ -175,7 +176,6 @@ impl StreamedBatch { buffered_batch_idx: Option, buffered_idx: Option, batch_size: usize, - num_unfrozen_pairs: usize, ) { // If no current chunk exists or current chunk is not for current buffered batch, // create a new chunk @@ -183,12 +183,13 @@ impl StreamedBatch { { // Compute capacity only when creating a new chunk (infrequent operation). // The capacity is the remaining space to reach batch_size. - // This should always be >= 1 since we only call this when num_unfrozen_pairs < batch_size. + // This should always be >= 1 since we only call this when num_output_rows < batch_size. debug_assert!( - batch_size > num_unfrozen_pairs, - "batch_size ({batch_size}) must be > num_unfrozen_pairs ({num_unfrozen_pairs})" + batch_size > self.num_output_rows, + "batch_size ({batch_size}) must be > num_output_rows ({})", + self.num_output_rows ); - let capacity = batch_size - num_unfrozen_pairs; + let capacity = batch_size - self.num_output_rows; self.output_indices.push(StreamedJoinedChunk { buffered_batch_idx, streamed_indices: UInt64Builder::with_capacity(capacity), @@ -205,6 +206,7 @@ impl StreamedBatch { } else { current_chunk.buffered_indices.append_null(); } + self.num_output_rows += 1; } } @@ -1134,13 +1136,10 @@ impl SortMergeJoinStream { let scanning_idx = self.buffered_data.scanning_idx(); if join_streamed { // Join streamed row and buffered row - // Pass batch_size and num_unfrozen_pairs to compute capacity only when - // creating a new chunk (when buffered_batch_idx changes), not on every iteration. self.streamed_batch.append_output_pair( Some(self.buffered_data.scanning_batch_idx), Some(scanning_idx), self.batch_size, - self.num_unfrozen_pairs(), ); } else { // Join nulls and buffered row for FULL join @@ -1166,13 +1165,10 @@ impl SortMergeJoinStream { // For Mark join we store a dummy id to indicate the row has a match let scanning_idx = mark_row_as_match.then_some(0); - // Pass batch_size=1 and num_unfrozen_pairs=0 to get capacity of 1, - // since we only append a single null-joined pair here (not in a loop). self.streamed_batch.append_output_pair( scanning_batch_idx, scanning_idx, - 1, - 0, + self.batch_size, ); self.buffered_data.scanning_finish(); self.streamed_joined = true; @@ -1469,6 +1465,7 @@ impl SortMergeJoinStream { } self.streamed_batch.output_indices.clear(); + self.streamed_batch.num_output_rows = 0; Ok(()) } diff --git a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt index 5e643273baed4..d306f94ae310c 100644 --- a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt +++ b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt @@ -663,3 +663,102 @@ DROP TABLE o2_parquet_20696; # Cleanup settings statement ok set datafusion.execution.parquet.pushdown_filters = false; + +########## +# Regression test: filter pushdown with Struct columns in schema +# +# When a schema has Struct columns, Arrow field indices diverge from Parquet +# leaf indices (Struct children become separate leaves). A filter on a +# primitive column *after* a Struct must use the correct Parquet leaf index. +# +# Schema: +# Arrow: col_a=0 struct_col=1 col_b=2 +# Parquet: col_a=0 struct_col.x=1 struct_col.y=2 col_b=3 +########## + +statement ok +set datafusion.execution.parquet.pushdown_filters = true; + +statement ok +COPY ( + SELECT + column1 as col_a, + column2 as struct_col, + column3 as col_b + FROM VALUES + (1, {x: 10, y: 100}, 'aaa'), + (2, {x: 20, y: 200}, 'target'), + (3, {x: 30, y: 300}, 'zzz') +) TO 'test_files/scratch/parquet_filter_pushdown/struct_filter.parquet' +STORED AS PARQUET; + +statement ok +CREATE EXTERNAL TABLE t_struct_filter +STORED AS PARQUET +LOCATION 'test_files/scratch/parquet_filter_pushdown/struct_filter.parquet'; + +# Filter on col_b (the primitive column after the struct). +# Before the fix, this returned 0 rows because the filter read struct_col.y +# (Parquet leaf 2) instead of col_b (Parquet leaf 3). +query IT +SELECT col_a, col_b FROM t_struct_filter WHERE col_b = 'target'; +---- +2 target + +# Clean up +statement ok +set datafusion.execution.parquet.pushdown_filters = false; + +statement ok +DROP TABLE t_struct_filter; + +########## +# Regression test for https://github.com/apache/datafusion/issues/20937 +# +# Dynamic filter pushdown fails when joining VALUES against +# Dictionary-encoded Parquet columns. The InListExpr's ArrayStaticFilter +# unwraps the needle Dictionary but not the stored in_array, causing a +# make_comparator(Utf8, Dictionary) type mismatch. +########## + +statement ok +set datafusion.execution.parquet.pushdown_filters = true; + +statement ok +set datafusion.execution.parquet.reorder_filters = true; + +statement ok +COPY ( + SELECT + arrow_cast(chr(65 + (row_num % 26)), 'Dictionary(Int32, Utf8)') as tag1, + row_num * 1.0 as value + FROM (SELECT unnest(range(0, 10000)) as row_num) +) TO 'test_files/scratch/parquet_filter_pushdown/dict_filter_bug.parquet'; + +statement ok +CREATE EXTERNAL TABLE dict_filter_bug +STORED AS PARQUET +LOCATION 'test_files/scratch/parquet_filter_pushdown/dict_filter_bug.parquet'; + +query TR +SELECT t.tag1, t.value +FROM dict_filter_bug t +JOIN (VALUES ('A'), ('B')) AS v(c1) +ON t.tag1 = v.c1 +ORDER BY t.tag1, t.value +LIMIT 4; +---- +A 0 +A 26 +A 52 +A 78 + +# Cleanup +statement ok +set datafusion.execution.parquet.pushdown_filters = false; + +statement ok +set datafusion.execution.parquet.reorder_filters = false; + +statement ok +DROP TABLE dict_filter_bug; diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 142c20ca5c314..df2a1dace495f 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -6081,3 +6081,49 @@ WHERE acctbal > ( ); ---- 1 + +# Regression test for https://github.com/apache/datafusion/issues/20194 +# Window function with CASE WHEN in ORDER BY combined with NVL filter +# should not trigger SanityCheckPlan error from equivalence normalization +# replacing literals in sort expressions with complex filter expressions. +statement ok +CREATE TABLE issue_20194_t1 ( + value_1_1 decimal(25) NULL, + value_1_2 int NULL, + value_1_3 bigint NULL +); + +statement ok +CREATE TABLE issue_20194_t2 ( + value_2_1 bigint NULL, + value_2_2 varchar(140) NULL, + value_2_3 varchar(140) NULL +); + +statement ok +INSERT INTO issue_20194_t1 (value_1_1, value_1_2, value_1_3) VALUES (6774502793, 10040029, 1120); + +statement ok +INSERT INTO issue_20194_t2 (value_2_1, value_2_2, value_2_3) VALUES (1120, '0', '0'); + +query RII +SELECT + t1.value_1_1, t1.value_1_2, + ROW_NUMBER() OVER ( + PARTITION BY t1.value_1_1, t1.value_1_2 + ORDER BY + CASE WHEN t2.value_2_2 = '0' THEN 1 ELSE 0 END ASC, + CASE WHEN t2.value_2_3 = '0' THEN 1 ELSE 0 END ASC + ) AS ord +FROM issue_20194_t1 t1 +INNER JOIN issue_20194_t2 t2 + ON t1.value_1_3 = t2.value_2_1 + AND nvl(t2.value_2_3, '0') = '0'; +---- +6774502793 10040029 1 + +statement ok +DROP TABLE issue_20194_t1; + +statement ok +DROP TABLE issue_20194_t2; diff --git a/dev/changelog/52.4.0.md b/dev/changelog/52.4.0.md new file mode 100644 index 0000000000000..04fba07cde9e6 --- /dev/null +++ b/dev/changelog/52.4.0.md @@ -0,0 +1,57 @@ + + +# Apache DataFusion 52.4.0 Changelog + +This release consists of 11 commits from 10 contributors. See credits at the end of this changelog for more information. + +See the [upgrade guide](https://datafusion.apache.org/library-user-guide/upgrading.html) for information on how to upgrade from previous versions. + +**Other:** + +- [branch-52] fix: maintain inner list nullability for (#19948) [#20878](https://github.com/apache/datafusion/pull/20878) (Jefffrey) +- [branch-52] fix: Ensure columns are casted to the correct names with Unions (#20146) [#20879](https://github.com/apache/datafusion/pull/20879) (nuno-faria) +- [branch-52] fix: interval analysis error when have two filterexec that inner filter proves zero selectivity (#20743) [#20880](https://github.com/apache/datafusion/pull/20880) (haohuaijin) +- [branch-52] fix: Return `probe_side.len()` for RightMark/Anti count(\*) queries (#20710) [#20881](https://github.com/apache/datafusion/pull/20881) (jonathanc-n) +- [branch-52] fix: disable dynamic filter pushdown for non min/max aggregates (#20279) [#20877](https://github.com/apache/datafusion/pull/20877) (notashes) +- [branch-52] Fix duplicate group keys after hash aggregation spill (#20724) (#20858) [#20917](https://github.com/apache/datafusion/pull/20917) (gboucher90) +- [branch-52] perf: Cache num_output_rows in sort merge join to avoid O(n) recount (#20478) [#20936](https://github.com/apache/datafusion/pull/20936) (andygrove) +- [branch-52] fix: SanityCheckPlan error with window functions and NVL filter (#20231) [#20931](https://github.com/apache/datafusion/pull/20931) (EeshanBembi) +- [branch-52] chore: Ignore RUSTSEC-2024-0014 (#20862) [#21020](https://github.com/apache/datafusion/pull/21020) (comphead) +- [branch-52] fix: InList Dictionary filter pushdown type mismatch (#20962) [#20997](https://github.com/apache/datafusion/pull/20997) (alamb) +- [branch-52] Update to use lz4_flex 0.12.1 and quinn-proto 0.11.14 [#21009](https://github.com/apache/datafusion/pull/21009) (alamb) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 2 Andrew Lamb + 1 Andy Grove + 1 EeshanBembi + 1 Guillaume Boucher + 1 Huaijin + 1 Jeffrey Vo + 1 Jonathan Chen + 1 Nuno Faria + 1 Oleks V + 1 notashes +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 8423480d53305..ebd9ef728ae7b 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -99,7 +99,7 @@ The following configuration settings are available: | datafusion.execution.parquet.dictionary_page_size_limit | 1048576 | (writing) Sets best effort maximum dictionary page size, in bytes | | datafusion.execution.parquet.statistics_enabled | page | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_row_group_size | 1048576 | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. | -| datafusion.execution.parquet.created_by | datafusion version 52.3.0 | (writing) Sets "created by" property | +| datafusion.execution.parquet.created_by | datafusion version 52.4.0 | (writing) Sets "created by" property | | datafusion.execution.parquet.column_index_truncate_length | 64 | (writing) Sets column index truncate length | | datafusion.execution.parquet.statistics_truncate_length | 64 | (writing) Sets statistics truncate length. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.data_page_row_count_limit | 20000 | (writing) Sets best effort maximum number of rows in data page |