diff --git a/Cargo.lock b/Cargo.lock index dac28d60f6b..cf1437085cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -199,9 +199,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1bb018b6960c87fd9d025009820406f74e83281185a8bdcb44880d2aa5c9a87" +checksum = "f3f15b4c6b148206ff3a2b35002e08929c2462467b62b9c02036d9c34f9ef994" dependencies = [ "arrow-arith", "arrow-array", @@ -220,9 +220,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44de76b51473aa888ecd6ad93ceb262fb8d40d1f1154a4df2f069b3590aa7575" +checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4" dependencies = [ "arrow-array", "arrow-buffer", @@ -234,9 +234,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ed77e22744475a9a53d00026cf8e166fe73cf42d89c4c4ae63607ee1cfcc3f" +checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8" dependencies = [ "ahash", "arrow-buffer", @@ -251,9 +251,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0391c96eb58bf7389171d1e103112d3fc3e5625ca6b372d606f2688f1ea4cce" +checksum = "169b1d5d6cb390dd92ce582b06b23815c7953e9dfaaea75556e89d890d19993d" dependencies = [ "bytes", "half", @@ -262,9 +262,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f39e1d774ece9292697fcbe06b5584401b26bd34be1bec25c33edae65c2420ff" +checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8" dependencies = [ "arrow-array", "arrow-buffer", @@ -283,9 +283,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9055c972a07bf12c2a827debfd34f88d3b93da1941d36e1d9fee85eebe38a12a" +checksum = "012c9fef3f4a11573b2c74aec53712ff9fdae4a95f4ce452d1bbf088ee00f06b" dependencies = [ "arrow-array", "arrow-cast", @@ -293,15 +293,14 @@ dependencies = [ "chrono", "csv", "csv-core", - "lazy_static", "regex", ] [[package]] name = "arrow-data" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf75ac27a08c7f48b88e5c923f267e980f27070147ab74615ad85b5c5f90473d" +checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2" dependencies = [ "arrow-buffer", "arrow-schema", @@ -311,9 +310,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a222f0d93772bd058d1268f4c28ea421a603d66f7979479048c429292fac7b2e" +checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92" dependencies = [ "arrow-array", "arrow-buffer", @@ -326,9 +325,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9085342bbca0f75e8cb70513c0807cc7351f1fbf5cb98192a67d5e3044acb033" +checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c" dependencies = [ "arrow-array", "arrow-buffer", @@ -348,9 +347,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab2f1065a5cad7b9efa9e22ce5747ce826aa3855766755d4904535123ef431e7" +checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7" dependencies = [ "arrow-array", "arrow-buffer", @@ -361,9 +360,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3703a0e3e92d23c3f756df73d2dc9476873f873a76ae63ef9d3de17fda83b2d8" +checksum = "52bf7393166beaf79b4bed9bfdf19e97472af32ce5b6b48169d321518a08cae2" dependencies = [ "arrow-array", "arrow-buffer", @@ -374,9 +373,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73a47aa0c771b5381de2b7f16998d351a6f4eb839f1e13d48353e17e873d969b" +checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292" dependencies = [ "bitflags 2.9.1", "serde", @@ -385,9 +384,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24b7b85575702b23b85272b01bc1c25a01c9b9852305e5d0078c79ba25d995d4" +checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5" dependencies = [ "ahash", "arrow-array", @@ -399,9 +398,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9260fddf1cdf2799ace2b4c2fc0356a9789fa7551e0953e35435536fecefebbd" +checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40" dependencies = [ "arrow-array", "arrow-buffer", @@ -1967,9 +1966,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc6cb8c2c81eada072059983657d6c9caf3fddefc43b4a65551d243253254a96" +checksum = "69dfeda1633bf8ec75b068d9f6c27cdc392ffcf5ff83128d5dbab65b73c1fd02" dependencies = [ "arrow", "arrow-ipc", @@ -2017,9 +2016,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7be8d1b627843af62e447396db08fe1372d882c0eb8d0ea655fd1fbc33120ee" +checksum = "2848fd1e85e2953116dab9cc2eb109214b0888d7bbd2230e30c07f1794f642c0" dependencies = [ "arrow", "async-trait", @@ -2043,9 +2042,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38ab16c5ae43f65ee525fc493ceffbc41f40dee38b01f643dfcfc12959e92038" +checksum = "051a1634628c2d1296d4e326823e7536640d87a118966cdaff069b68821ad53b" dependencies = [ "arrow", "async-trait", @@ -2066,14 +2065,15 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3d56b2ac9f476b93ca82e4ef5fb00769c8a3f248d12b4965af7e27635fa7e12" +checksum = "765e4ad4ef7a4500e389a3f1e738791b71ff4c29fd00912c2f541d62b25da096" dependencies = [ "ahash", "arrow", "arrow-ipc", "base64 0.22.1", + "chrono", "half", "hashbrown 0.14.5", "indexmap", @@ -2089,9 +2089,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16015071202d6133bc84d72756176467e3e46029f3ce9ad2cb788f9b1ff139b2" +checksum = "40a2ae8393051ce25d232a6065c4558ab5a535c9637d5373bacfd464ac88ea12" dependencies = [ "futures", "log", @@ -2100,9 +2100,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b77523c95c89d2a7eb99df14ed31390e04ab29b43ff793e562bdc1716b07e17b" +checksum = "90cd841a77f378bc1a5c4a1c37345e1885a9203b008203f9f4b3a769729bf330" dependencies = [ "arrow", "async-trait", @@ -2130,9 +2130,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40d25c5e2c0ebe8434beeea997b8e88d55b3ccc0d19344293f2373f65bc524fc" +checksum = "77f4a2c64939c6f0dd15b246723a699fa30d59d0133eb36a86e8ff8c6e2a8dc6" dependencies = [ "arrow", "async-trait", @@ -2155,9 +2155,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dc6959e1155741ab35369e1dc7673ba30fc45ed568fad34c01b7cb1daeb4d4c" +checksum = "11387aaf931b2993ad9273c63ddca33f05aef7d02df9b70fb757429b4b71cdae" dependencies = [ "arrow", "async-trait", @@ -2180,9 +2180,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7a6afdfe358d70f4237f60eaef26ae5a1ce7cb2c469d02d5fc6c7fd5d84e58b" +checksum = "028f430c5185120bf806347848b8d8acd9823f4038875b3820eeefa35f2bb4a2" dependencies = [ "arrow", "async-trait", @@ -2198,6 +2198,7 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", + "datafusion-pruning", "datafusion-session", "futures", "itertools 0.14.0", @@ -2211,15 +2212,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bcd8a3e3e3d02ea642541be23d44376b5d5c37c2938cce39b3873cdf7186eea" +checksum = "8ff336d1d755399753a9e4fbab001180e346fc8bfa063a97f1214b82274c00f8" [[package]] name = "datafusion-execution" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670da1d45d045eee4c2319b8c7ea57b26cf48ab77b630aaa50b779e406da476a" +checksum = "042ea192757d1b2d7dcf71643e7ff33f6542c7704f00228d8b85b40003fd8e0f" dependencies = [ "arrow", "dashmap", @@ -2236,11 +2237,12 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3a577f64bdb7e2cc4043cd97f8901d8c504711fde2dbcb0887645b00d7c660b" +checksum = "025222545d6d7fab71e2ae2b356526a1df67a2872222cbae7535e557a42abd2e" dependencies = [ "arrow", + "async-trait", "chrono", "datafusion-common", "datafusion-doc", @@ -2256,9 +2258,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b7916806ace3e9f41884f230f7f38ebf0e955dfbd88266da1826f29a0b9a6a" +checksum = "9d5c267104849d5fa6d81cf5ba88f35ecd58727729c5eb84066c25227b644ae2" dependencies = [ "arrow", "datafusion-common", @@ -2269,9 +2271,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fb31c9dc73d3e0c365063f91139dc273308f8a8e124adda9898db8085d68357" +checksum = "c620d105aa208fcee45c588765483314eb415f5571cfd6c1bae3a59c5b4d15bb" dependencies = [ "arrow", "arrow-buffer", @@ -2298,9 +2300,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebb72c6940697eaaba9bd1f746a697a07819de952b817e3fb841fb75331ad5d4" +checksum = "35f61d5198a35ed368bf3aacac74f0d0fa33de7a7cb0c57e9f68ab1346d2f952" dependencies = [ "ahash", "arrow", @@ -2319,9 +2321,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7fdc54656659e5ecd49bf341061f4156ab230052611f4f3609612a0da259696" +checksum = "13efdb17362be39b5024f6da0d977ffe49c0212929ec36eec550e07e2bc7812f" dependencies = [ "ahash", "arrow", @@ -2332,9 +2334,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad94598e3374938ca43bca6b675febe557e7a14eb627d617db427d70d65118b" +checksum = "9187678af567d7c9e004b72a0b6dc5b0a00ebf4901cb3511ed2db4effe092e66" dependencies = [ "arrow", "arrow-ord", @@ -2344,6 +2346,7 @@ dependencies = [ "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", "itertools 0.14.0", @@ -2353,9 +2356,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de2fc6c2946da5cab8364fb28b5cac3115f0f3a87960b235ed031c3f7e2e639b" +checksum = "ecf156589cc21ef59fe39c7a9a841b4a97394549643bbfa88cc44e8588cf8fe5" dependencies = [ "arrow", "async-trait", @@ -2369,9 +2372,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e5746548a8544870a119f556543adcd88fe0ba6b93723fe78ad0439e0fbb8b4" +checksum = "edcb25e3e369f1366ec9a261456e45b5aad6ea1c0c8b4ce546587207c501ed9e" dependencies = [ "arrow", "datafusion-common", @@ -2387,9 +2390,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcbe9404382cda257c434f22e13577bee7047031dfdb6216dd5e841b9465e6fe" +checksum = "8996a8e11174d0bd7c62dc2f316485affc6ae5ffd5b8a68b508137ace2310294" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2397,9 +2400,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8dce50e3b637dab0d25d04d2fe79dfdca2b257eabd76790bffd22c7f90d700c8" +checksum = "95ee8d1be549eb7316f437035f2cec7ec42aba8374096d807c4de006a3b5d78a" dependencies = [ "datafusion-expr", "quote", @@ -2408,14 +2411,15 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03cfaacf06445dc3bbc1e901242d2a44f2cae99a744f49f3fefddcee46240058" +checksum = "c9fa98671458254928af854e5f6c915e66b860a8bde505baea0ff2892deab74d" dependencies = [ "arrow", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-expr-common", "datafusion-physical-expr", "indexmap", "itertools 0.14.0", @@ -2426,9 +2430,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1908034a89d7b2630898e06863583ae4c00a0dd310c1589ca284195ee3f7f8a6" +checksum = "3515d51531cca5f7b5a6f3ea22742b71bb36fc378b465df124ff9a2fa349b002" dependencies = [ "ahash", "arrow", @@ -2448,9 +2452,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b7a12dd59ea07614b67dbb01d85254fbd93df45bcffa63495e11d3bdf847df" +checksum = "24485475d9c618a1d33b2a3dad003d946dc7a7bbf0354d125301abc0a5a79e3e" dependencies = [ "ahash", "arrow", @@ -2462,9 +2466,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4371cc4ad33978cc2a8be93bd54a232d3f2857b50401a14631c0705f3f910aae" +checksum = "b9da411a0a64702f941a12af2b979434d14ec5d36c6f49296966b2c7639cbb3a" dependencies = [ "arrow", "datafusion-common", @@ -2474,15 +2478,16 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", + "datafusion-pruning", "itertools 0.14.0", "log", ] [[package]] name = "datafusion-physical-plan" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc47bc33025757a5c11f2cd094c5b6b5ed87f46fa33c023e6fdfa25fcbfade23" +checksum = "a6d168282bb7b54880bb3159f89b51c047db4287f5014d60c3ef4c6e1468212b" dependencies = [ "ahash", "arrow", @@ -2508,11 +2513,29 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-pruning" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "391a457b9d23744c53eeb89edd1027424cba100581488d89800ed841182df905" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-datasource", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "itertools 0.14.0", + "log", +] + [[package]] name = "datafusion-session" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7485da32283985d6b45bd7d13a65169dcbe8c869e25d01b2cfbc425254b4b49" +checksum = "053201c2bb729c7938f85879034df2b5a52cfaba16f1b3b66ab8505c81b2aad3" dependencies = [ "arrow", "async-trait", @@ -2534,9 +2557,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a466b15632befddfeac68c125f0260f569ff315c6831538cbb40db754134e0df" +checksum = "9082779be8ce4882189b229c0cff4393bd0808282a7194130c9f32159f185e25" dependencies = [ "arrow", "bigdecimal", @@ -2550,9 +2573,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f3973b1a4f6e9ee7fd99a22d58e1c06e6723a28dc911a60df575974c8339aa" +checksum = "4189e480ae42f06ade81a6d12853c5f3f84fbe237d73fb8795c712b6f1923afb" dependencies = [ "async-recursion", "async-trait", @@ -2562,7 +2585,7 @@ dependencies = [ "object_store", "pbjson-types", "prost 0.13.5", - "substrait 0.56.0", + "substrait 0.58.0", "tokio", "url", ] @@ -3921,9 +3944,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.9.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +checksum = "206a8042aec68fa4a62e8d3f7aa4ceb508177d9324faf261e1959e495b7a1921" dependencies = [ "equivalent", "hashbrown 0.15.3", @@ -4865,9 +4888,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.172" +version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" [[package]] name = "libflate" @@ -5821,9 +5844,9 @@ dependencies = [ [[package]] name = "parquet" -version = "55.1.0" +version = "55.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be7b2d778f6b841d37083ebdf32e33a524acde1266b5884a8ca29bf00dfa1231" +checksum = "b17da4150748086bd43352bc77372efa9b6e3dbd06a04831d2a98c041c225cfa" dependencies = [ "ahash", "arrow-array", @@ -5846,6 +5869,7 @@ dependencies = [ "num-bigint", "object_store", "paste", + "ring", "seq-macro", "simdutf8", "snap", @@ -6493,15 +6517,6 @@ dependencies = [ "cmake", ] -[[package]] -name = "psm" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" -dependencies = [ - "cc", -] - [[package]] name = "quick-error" version = "1.2.3" @@ -6751,26 +6766,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "recursive" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" -dependencies = [ - "recursive-proc-macro-impl", - "stacker", -] - -[[package]] -name = "recursive-proc-macro-impl" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" -dependencies = [ - "quote", - "syn 2.0.106", -] - [[package]] name = "redox_syscall" version = "0.5.12" @@ -7679,7 +7674,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" dependencies = [ "log", - "recursive", "sqlparser_derive", ] @@ -7700,19 +7694,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" -[[package]] -name = "stacker" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "windows-sys 0.59.0", -] - [[package]] name = "static_assertions" version = "1.1.0" @@ -7795,9 +7776,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.56.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13de2e20128f2a018dab1cfa30be83ae069219a65968c6f89df66ad124de2397" +checksum = "de6d24c270c6c672a86c183c3a8439ba46c1936f93cf7296aa692de3b0ff0228" dependencies = [ "heck", "pbjson", diff --git a/Cargo.toml b/Cargo.toml index cdcb10bd1e2..f0031fc30c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,17 +60,17 @@ lance-test-macros = { version = "=0.35.0", path = "./rust/lance-test-macros" } lance-testing = { version = "=0.35.0", path = "./rust/lance-testing" } approx = "0.5.1" # Note that this one does not include pyarrow -arrow = { version = "55.1", optional = false, features = ["prettyprint"] } -arrow-arith = "55.1" -arrow-array = "55.1" -arrow-buffer = "55.1" -arrow-cast = "55.1" -arrow-data = "55.1" -arrow-ipc = { version = "55.1", features = ["zstd"] } -arrow-ord = "55.1" -arrow-row = "55.1" -arrow-schema = "55.1" -arrow-select = "55.1" +arrow = { version = "55.2", optional = false, features = ["prettyprint"] } +arrow-arith = "55.2" +arrow-array = "55.2" +arrow-buffer = "55.2" +arrow-cast = "55.2" +arrow-data = "55.2" +arrow-ipc = { version = "55.2", features = ["zstd"] } +arrow-ord = "55.2" +arrow-row = "55.2" +arrow-schema = "55.2" +arrow-select = "55.2" async-recursion = "1.0" async-trait = "0.1" aws-config = "1.2.0" @@ -96,7 +96,7 @@ criterion = { version = "0.5", features = [ "html_reports", ] } crossbeam-queue = "0.3" -datafusion = { version = "48.0", default-features = false, features = [ +datafusion = { version = "49.0.2", default-features = false, features = [ "nested_expressions", "regex_expressions", "unicode_expressions", @@ -105,16 +105,16 @@ datafusion = { version = "48.0", default-features = false, features = [ "datetime_expressions", "string_expressions", ] } -datafusion-common = "48.0" -datafusion-functions = { version = "48.0", features = ["regex_expressions"] } -datafusion-sql = "48.0" -datafusion-expr = "48.0" -datafusion-ffi = "48.0" -datafusion-execution = "48.0" -datafusion-optimizer = "48.0" -datafusion-physical-expr = { version = "48.0" } -datafusion-physical-plan = { version = "48.0" } -datafusion-substrait = { version = "48.0" } +datafusion-common = "49.0.2" +datafusion-functions = { version = "49.0.2", features = ["regex_expressions"] } +datafusion-sql = "49.0.2" +datafusion-expr = "49.0.2" +datafusion-ffi = "49.0.2" +datafusion-execution = "49.0.2" +datafusion-optimizer = "49.0.2" +datafusion-physical-expr = { version = "49.0.2" } +datafusion-physical-plan = { version = "49.0.2" } +datafusion-substrait = { version = "49.0.2" } deepsize = "0.2.0" dirs = "6.0.0" either = "1.0" diff --git a/python/Cargo.lock b/python/Cargo.lock index 2d37caa7402..13cf6802e3e 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -485,7 +485,7 @@ version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" dependencies = [ - "bzip2", + "bzip2 0.5.2", "flate2", "futures-core", "memchr", @@ -1338,6 +1338,15 @@ dependencies = [ "bzip2-sys", ] +[[package]] +name = "bzip2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bea8dcd42434048e4f7a304411d9273a411f647446c1234a65ce0554923f4cff" +dependencies = [ + "libbz2-rs-sys", +] + [[package]] name = "bzip2-sys" version = "0.1.13+1.0.8" @@ -1762,16 +1771,16 @@ dependencies = [ [[package]] name = "datafusion" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc6cb8c2c81eada072059983657d6c9caf3fddefc43b4a65551d243253254a96" +checksum = "69dfeda1633bf8ec75b068d9f6c27cdc392ffcf5ff83128d5dbab65b73c1fd02" dependencies = [ "arrow", "arrow-ipc", "arrow-schema", "async-trait", "bytes", - "bzip2", + "bzip2 0.6.0", "chrono", "datafusion-catalog", "datafusion-catalog-listing", @@ -1798,6 +1807,7 @@ dependencies = [ "datafusion-sql", "flate2", "futures", + "hex", "itertools 0.14.0", "log", "object_store", @@ -1816,9 +1826,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7be8d1b627843af62e447396db08fe1372d882c0eb8d0ea655fd1fbc33120ee" +checksum = "2848fd1e85e2953116dab9cc2eb109214b0888d7bbd2230e30c07f1794f642c0" dependencies = [ "arrow", "async-trait", @@ -1842,9 +1852,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38ab16c5ae43f65ee525fc493ceffbc41f40dee38b01f643dfcfc12959e92038" +checksum = "051a1634628c2d1296d4e326823e7536640d87a118966cdaff069b68821ad53b" dependencies = [ "arrow", "async-trait", @@ -1865,16 +1875,18 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3d56b2ac9f476b93ca82e4ef5fb00769c8a3f248d12b4965af7e27635fa7e12" +checksum = "765e4ad4ef7a4500e389a3f1e738791b71ff4c29fd00912c2f541d62b25da096" dependencies = [ "ahash", "arrow", "arrow-ipc", "base64 0.22.1", + "chrono", "half", "hashbrown 0.14.5", + "hex", "indexmap", "libc", "log", @@ -1889,9 +1901,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16015071202d6133bc84d72756176467e3e46029f3ce9ad2cb788f9b1ff139b2" +checksum = "40a2ae8393051ce25d232a6065c4558ab5a535c9637d5373bacfd464ac88ea12" dependencies = [ "futures", "log", @@ -1900,15 +1912,15 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b77523c95c89d2a7eb99df14ed31390e04ab29b43ff793e562bdc1716b07e17b" +checksum = "90cd841a77f378bc1a5c4a1c37345e1885a9203b008203f9f4b3a769729bf330" dependencies = [ "arrow", "async-compression", "async-trait", "bytes", - "bzip2", + "bzip2 0.6.0", "chrono", "datafusion-common", "datafusion-common-runtime", @@ -1936,9 +1948,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40d25c5e2c0ebe8434beeea997b8e88d55b3ccc0d19344293f2373f65bc524fc" +checksum = "77f4a2c64939c6f0dd15b246723a699fa30d59d0133eb36a86e8ff8c6e2a8dc6" dependencies = [ "arrow", "async-trait", @@ -1961,9 +1973,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dc6959e1155741ab35369e1dc7673ba30fc45ed568fad34c01b7cb1daeb4d4c" +checksum = "11387aaf931b2993ad9273c63ddca33f05aef7d02df9b70fb757429b4b71cdae" dependencies = [ "arrow", "async-trait", @@ -1986,9 +1998,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7a6afdfe358d70f4237f60eaef26ae5a1ce7cb2c469d02d5fc6c7fd5d84e58b" +checksum = "028f430c5185120bf806347848b8d8acd9823f4038875b3820eeefa35f2bb4a2" dependencies = [ "arrow", "async-trait", @@ -2004,8 +2016,10 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", + "datafusion-pruning", "datafusion-session", "futures", + "hex", "itertools 0.14.0", "log", "object_store", @@ -2017,15 +2031,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bcd8a3e3e3d02ea642541be23d44376b5d5c37c2938cce39b3873cdf7186eea" +checksum = "8ff336d1d755399753a9e4fbab001180e346fc8bfa063a97f1214b82274c00f8" [[package]] name = "datafusion-execution" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670da1d45d045eee4c2319b8c7ea57b26cf48ab77b630aaa50b779e406da476a" +checksum = "042ea192757d1b2d7dcf71643e7ff33f6542c7704f00228d8b85b40003fd8e0f" dependencies = [ "arrow", "dashmap", @@ -2042,11 +2056,12 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3a577f64bdb7e2cc4043cd97f8901d8c504711fde2dbcb0887645b00d7c660b" +checksum = "025222545d6d7fab71e2ae2b356526a1df67a2872222cbae7535e557a42abd2e" dependencies = [ "arrow", + "async-trait", "chrono", "datafusion-common", "datafusion-doc", @@ -2063,9 +2078,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b7916806ace3e9f41884f230f7f38ebf0e955dfbd88266da1826f29a0b9a6a" +checksum = "9d5c267104849d5fa6d81cf5ba88f35ecd58727729c5eb84066c25227b644ae2" dependencies = [ "arrow", "datafusion-common", @@ -2076,9 +2091,9 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "980cca31de37f5dadf7ea18e4ffc2b6833611f45bed5ef9de0831d2abb50f1ef" +checksum = "ec21805d9df2d834e4c6ddfbf8a1bed2bd460b89b01686fe0dcd1cee06d0b60f" dependencies = [ "abi_stable", "arrow", @@ -2098,9 +2113,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fb31c9dc73d3e0c365063f91139dc273308f8a8e124adda9898db8085d68357" +checksum = "c620d105aa208fcee45c588765483314eb415f5571cfd6c1bae3a59c5b4d15bb" dependencies = [ "arrow", "arrow-buffer", @@ -2127,9 +2142,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebb72c6940697eaaba9bd1f746a697a07819de952b817e3fb841fb75331ad5d4" +checksum = "35f61d5198a35ed368bf3aacac74f0d0fa33de7a7cb0c57e9f68ab1346d2f952" dependencies = [ "ahash", "arrow", @@ -2148,9 +2163,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7fdc54656659e5ecd49bf341061f4156ab230052611f4f3609612a0da259696" +checksum = "13efdb17362be39b5024f6da0d977ffe49c0212929ec36eec550e07e2bc7812f" dependencies = [ "ahash", "arrow", @@ -2161,9 +2176,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad94598e3374938ca43bca6b675febe557e7a14eb627d617db427d70d65118b" +checksum = "9187678af567d7c9e004b72a0b6dc5b0a00ebf4901cb3511ed2db4effe092e66" dependencies = [ "arrow", "arrow-ord", @@ -2173,6 +2188,7 @@ dependencies = [ "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", "itertools 0.14.0", @@ -2182,9 +2198,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de2fc6c2946da5cab8364fb28b5cac3115f0f3a87960b235ed031c3f7e2e639b" +checksum = "ecf156589cc21ef59fe39c7a9a841b4a97394549643bbfa88cc44e8588cf8fe5" dependencies = [ "arrow", "async-trait", @@ -2198,9 +2214,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e5746548a8544870a119f556543adcd88fe0ba6b93723fe78ad0439e0fbb8b4" +checksum = "edcb25e3e369f1366ec9a261456e45b5aad6ea1c0c8b4ce546587207c501ed9e" dependencies = [ "arrow", "datafusion-common", @@ -2216,9 +2232,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcbe9404382cda257c434f22e13577bee7047031dfdb6216dd5e841b9465e6fe" +checksum = "8996a8e11174d0bd7c62dc2f316485affc6ae5ffd5b8a68b508137ace2310294" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2226,9 +2242,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8dce50e3b637dab0d25d04d2fe79dfdca2b257eabd76790bffd22c7f90d700c8" +checksum = "95ee8d1be549eb7316f437035f2cec7ec42aba8374096d807c4de006a3b5d78a" dependencies = [ "datafusion-expr", "quote", @@ -2237,14 +2253,15 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03cfaacf06445dc3bbc1e901242d2a44f2cae99a744f49f3fefddcee46240058" +checksum = "c9fa98671458254928af854e5f6c915e66b860a8bde505baea0ff2892deab74d" dependencies = [ "arrow", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-expr-common", "datafusion-physical-expr", "indexmap", "itertools 0.14.0", @@ -2256,9 +2273,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1908034a89d7b2630898e06863583ae4c00a0dd310c1589ca284195ee3f7f8a6" +checksum = "3515d51531cca5f7b5a6f3ea22742b71bb36fc378b465df124ff9a2fa349b002" dependencies = [ "ahash", "arrow", @@ -2278,9 +2295,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b7a12dd59ea07614b67dbb01d85254fbd93df45bcffa63495e11d3bdf847df" +checksum = "24485475d9c618a1d33b2a3dad003d946dc7a7bbf0354d125301abc0a5a79e3e" dependencies = [ "ahash", "arrow", @@ -2292,9 +2309,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4371cc4ad33978cc2a8be93bd54a232d3f2857b50401a14631c0705f3f910aae" +checksum = "b9da411a0a64702f941a12af2b979434d14ec5d36c6f49296966b2c7639cbb3a" dependencies = [ "arrow", "datafusion-common", @@ -2304,6 +2321,7 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", + "datafusion-pruning", "itertools 0.14.0", "log", "recursive", @@ -2311,9 +2329,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc47bc33025757a5c11f2cd094c5b6b5ed87f46fa33c023e6fdfa25fcbfade23" +checksum = "a6d168282bb7b54880bb3159f89b51c047db4287f5014d60c3ef4c6e1468212b" dependencies = [ "ahash", "arrow", @@ -2341,9 +2359,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8f5d9acd7d96e3bf2a7bb04818373cab6e51de0356e3694b94905fee7b4e8b6" +checksum = "1b36a0c84f4500efd90487a004b533bd81de1f2bb3f143f71b7526f33b85d2e2" dependencies = [ "arrow", "chrono", @@ -2357,20 +2375,38 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ecb5ec152c4353b60f7a5635489834391f7a291d2b39a4820cd469e318b78e" +checksum = "2ec788be522806740ad6372c0a2f7e45fb37cb37f786d9b77933add49cdd058f" dependencies = [ "arrow", "datafusion-common", "prost 0.13.5", ] +[[package]] +name = "datafusion-pruning" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "391a457b9d23744c53eeb89edd1027424cba100581488d89800ed841182df905" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-datasource", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "itertools 0.14.0", + "log", +] + [[package]] name = "datafusion-session" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7485da32283985d6b45bd7d13a65169dcbe8c869e25d01b2cfbc425254b4b49" +checksum = "053201c2bb729c7938f85879034df2b5a52cfaba16f1b3b66ab8505c81b2aad3" dependencies = [ "arrow", "async-trait", @@ -2392,9 +2428,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a466b15632befddfeac68c125f0260f569ff315c6831538cbb40db754134e0df" +checksum = "9082779be8ce4882189b229c0cff4393bd0808282a7194130c9f32159f185e25" dependencies = [ "arrow", "bigdecimal", @@ -2409,9 +2445,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "48.0.0" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f3973b1a4f6e9ee7fd99a22d58e1c06e6723a28dc911a60df575974c8339aa" +checksum = "4189e480ae42f06ade81a6d12853c5f3f84fbe237d73fb8795c712b6f1923afb" dependencies = [ "async-recursion", "async-trait", @@ -4276,6 +4312,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "libbz2-rs-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" + [[package]] name = "libc" version = "0.2.174" @@ -5095,6 +5137,7 @@ dependencies = [ "num-bigint", "object_store", "paste", + "ring", "seq-macro", "simdutf8", "snap", @@ -6897,9 +6940,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.56.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13de2e20128f2a018dab1cfa30be83ae069219a65968c6f89df66ad124de2397" +checksum = "de6d24c270c6c672a86c183c3a8439ba46c1936f93cf7296aa692de3b0ff0228" dependencies = [ "heck 0.5.0", "pbjson", diff --git a/python/Cargo.toml b/python/Cargo.toml index 02a049727ad..b6fbc9ff082 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -12,15 +12,15 @@ name = "lance" crate-type = ["cdylib"] [dependencies] -arrow = { version = "55.1", features = ["pyarrow"] } -arrow-array = "55.1" -arrow-data = "55.1" -arrow-schema = "55.1" -arrow-select = "55.1" -object_store = "0.12.2" -datafusion = "48.0" -datafusion-ffi = "48.0" -datafusion-common = "48.0" +arrow = { version = "55.2", features = ["pyarrow"] } +arrow-array = "55.2" +arrow-data = "55.2" +arrow-schema = "55.2" +arrow-select = "55.2" +object_store = "0.12.3" +datafusion = "49.0.2" +datafusion-ffi = "49.0.2" +datafusion-common = "49.0.2" async-trait = "0.1" chrono = "0.4.41" env_logger = "0.11.7" diff --git a/python/pyproject.toml b/python/pyproject.toml index a90aac3f408..6be5705d641 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -61,7 +61,7 @@ tests = [ "tensorflow<=2.19.0", "tqdm", # Need to align with the datafusion version we use at lance rust - "datafusion==48.0.0", + "datafusion==49.0.0", ] dev = ["ruff==0.4.1", "pyright"] benchmarks = ["pytest-benchmark"] diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 376476e43af..eab23aeb9e0 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -254,7 +254,8 @@ def explain_plan( ProjectionExec: expr=[id@2 IS NOT NULL as __common_expr_1, ...] CoalesceBatchesExec: target_batch_size=... HashJoinExec: mode=CollectLeft, join_type=Right, ... - LanceRead: uri=test_dataset/data, projection=[id], ... + CooperativeExec + LanceRead: uri=test_dataset/data, projection=[id], ... RepartitionExec: ... StreamingTableExec: partition_sizes=1, ... @@ -335,10 +336,11 @@ def analyze_plan( MergeInsert: on=[id], ..., metrics=[..., bytes_written=..., ...], cumulative_cpu=... CoalescePartitionsExec, metrics=[output_rows=..., elapsed_compute=...], cumulative_cpu=... ProjectionExec: expr=[_rowid@1 as _rowid, ...], metrics=[...], cumulative_cpu=... - ProjectionExec: expr=[id@2 IS NOT NULL as __common_expr_1, ...], ... + ProjectionExec: expr=[id@2 IS NOT NULL as __common_expr_1, ...], metrics=[...], cumulative_cpu=... CoalesceBatchesExec: ..., metrics=[...], cumulative_cpu=... HashJoinExec: mode=CollectLeft, join_type=Right, ... - LanceRead: ..., metrics=[..., bytes_read=..., ...], cumulative_cpu=... + CooperativeExec, metrics=[], cumulative_cpu=... + LanceRead: ..., metrics=[..., bytes_read=..., ...], cumulative_cpu=... RepartitionExec: ... StreamingTableExec: ..., metrics=[], ... diff --git a/rust/lance-datafusion/src/planner.rs b/rust/lance-datafusion/src/planner.rs index f231948f1e4..ddbce5e5b85 100644 --- a/rust/lance-datafusion/src/planner.rs +++ b/rust/lance-datafusion/src/planner.rs @@ -447,7 +447,7 @@ impl Planner { support_varchar_with_length: false, enable_options_value_normalization: false, collect_spans: false, - map_varchar_to_utf8view: false, + map_string_types_to_utf8view: false, }, ); diff --git a/rust/lance-encoding/src/testing.rs b/rust/lance-encoding/src/testing.rs index bf2a9a7a023..f6ac48f4b3d 100644 --- a/rust/lance-encoding/src/testing.rs +++ b/rust/lance-encoding/src/testing.rs @@ -9,7 +9,8 @@ use crate::{ format::pb21::{compressive_encoding::Compression, BufferCompression, CompressiveEncoding}, }; -use arrow_array::{Array, StructArray, UInt64Array}; +use arrow_array::{make_array, Array, StructArray, UInt64Array}; +use arrow_data::transform::{Capacities, MutableArrayData}; use arrow_ord::ord::make_comparator; use arrow_schema::{DataType, Field, FieldRef, Schema, SortOptions}; use arrow_select::concat::concat; @@ -788,6 +789,19 @@ async fn check_round_trip_encoding_inner( let num_rows = data.iter().map(|arr| arr.len() as u64).sum::(); let concat_data = if test_cases.skip_validation { None + } else if let Some(DataType::Struct(_)) = data.first().map(|datum| datum.data_type()) { + // TODO(tsaucer) When arrow upgrades to 56, remove this if statement + // This is due to a check for concat_struct in arrow-rs. See https://github.com/lancedb/lance/pull/4598 + let capacities = Capacities::Array(num_rows as usize); + let array_data: Vec<_> = data.iter().map(|a| a.to_data()).collect::>(); + let array_data = array_data.iter().collect(); + let mut mutable = MutableArrayData::with_capacities(array_data, false, capacities); + + for (i, a) in data.iter().enumerate() { + mutable.extend(i, 0, a.len()) + } + + Some(make_array(mutable.freeze())) } else { Some(concat(&data.iter().map(|arr| arr.as_ref()).collect::>()).unwrap()) }; diff --git a/rust/lance-index/src/scalar/btree.rs b/rust/lance-index/src/scalar/btree.rs index b2760fe214d..4bf1f9555dc 100644 --- a/rust/lance-index/src/scalar/btree.rs +++ b/rust/lance-index/src/scalar/btree.rs @@ -33,7 +33,7 @@ use datafusion::physical_plan::{ union::UnionExec, ExecutionPlan, SendableRecordBatchStream, }; use datafusion_common::{DataFusionError, ScalarValue}; -use datafusion_physical_expr::{expressions::Column, LexOrdering, PhysicalSortExpr}; +use datafusion_physical_expr::{expressions::Column, PhysicalSortExpr}; use deepsize::DeepSizeOf; use futures::{ future::BoxFuture, @@ -1005,10 +1005,7 @@ impl BTreeIndex { // The UnionExec creates multiple partitions but the SortPreservingMergeExec merges // them back into a single partition. let all_data = Arc::new(UnionExec::new(vec![old_input, new_input])); - let ordered = Arc::new(SortPreservingMergeExec::new( - LexOrdering::new(vec![sort_expr]), - all_data, - )); + let ordered = Arc::new(SortPreservingMergeExec::new([sort_expr].into(), all_data)); let unchunked = execute_plan( ordered, @@ -1573,7 +1570,7 @@ mod tests { physical_plan::{sorts::sort::SortExec, stream::RecordBatchStreamAdapter, ExecutionPlan}, }; use datafusion_common::{DataFusionError, ScalarValue}; - use datafusion_physical_expr::{expressions::col, LexOrdering, PhysicalSortExpr}; + use datafusion_physical_expr::{expressions::col, PhysicalSortExpr}; use deepsize::DeepSizeOf; use futures::TryStreamExt; use lance_core::{cache::LanceCache, utils::mask::RowIdTreeMap}; @@ -1705,7 +1702,7 @@ mod tests { .into_df_exec(RowCount::from(10), BatchCount::from(100)); let schema = data.schema(); let sort_expr = PhysicalSortExpr::new_default(col("value", schema.as_ref()).unwrap()); - let plan = Arc::new(SortExec::new(LexOrdering::new(vec![sort_expr]), data)); + let plan = Arc::new(SortExec::new([sort_expr].into(), data)); let stream = plan.execute(0, Arc::new(TaskContext::default())).unwrap(); let stream = break_stream(stream, 64); let stream = stream.map_err(DataFusionError::from); @@ -1747,7 +1744,7 @@ mod tests { .into_df_exec(RowCount::from(1000), BatchCount::from(10)); let schema = data.schema(); let sort_expr = PhysicalSortExpr::new_default(col("value", schema.as_ref()).unwrap()); - let plan = Arc::new(SortExec::new(LexOrdering::new(vec![sort_expr]), data)); + let plan = Arc::new(SortExec::new([sort_expr].into(), data)); let stream = plan.execute(0, Arc::new(TaskContext::default())).unwrap(); let stream = break_stream(stream, 64); let stream = stream.map_err(DataFusionError::from); diff --git a/rust/lance-index/src/scalar/ngram.rs b/rust/lance-index/src/scalar/ngram.rs index ff559dd9292..e4334a80f10 100644 --- a/rust/lance-index/src/scalar/ngram.rs +++ b/rust/lance-index/src/scalar/ngram.rs @@ -1708,7 +1708,7 @@ mod tests { let data = Box::pin(RecordBatchStreamAdapter::new( schema, - data.map_err(|arrow_err| DataFusionError::ArrowError(arrow_err, None)), + data.map_err(|arrow_err| DataFusionError::ArrowError(Box::new(arrow_err), None)), )); let builder = NGramIndexBuilder::try_new(NGramIndexBuilderOptions { diff --git a/rust/lance/src/datafusion/logical_plan.rs b/rust/lance/src/datafusion/logical_plan.rs index 5af1713ac58..63cc6ef2505 100644 --- a/rust/lance/src/datafusion/logical_plan.rs +++ b/rust/lance/src/datafusion/logical_plan.rs @@ -83,6 +83,7 @@ mod tests { }; use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema, SchemaRef}; use datafusion::prelude::*; + use datafusion_physical_plan::coop::CooperativeExec; use tempfile::tempdir; fn create_batches() -> (SchemaRef, Vec) { @@ -158,6 +159,13 @@ mod tests { let df = ctx.sql("SELECT vector, utf8 FROM my_table").await.unwrap(); let physical_plan = df.clone().create_physical_plan().await.unwrap(); + // DataFusion will create a cooperative execution plan, so we need to get its inner plan + let physical_plan = physical_plan + .as_any() + .downcast_ref::() + .unwrap() + .children()[0]; + assert!(physical_plan .as_any() .downcast_ref::() diff --git a/rust/lance/src/dataset/scanner.rs b/rust/lance/src/dataset/scanner.rs index 7f9b3c2663e..24ce739f993 100644 --- a/rust/lance/src/dataset/scanner.rs +++ b/rust/lance/src/dataset/scanner.rs @@ -11,7 +11,7 @@ use arrow_array::{Array, Float32Array, Int64Array, RecordBatch}; use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema, SchemaRef, SortOptions}; use arrow_select::concat::concat_batches; use async_recursion::async_recursion; -use datafusion::common::{DFSchema, SchemaExt}; +use datafusion::common::{exec_datafusion_err, DFSchema, NullEquality, SchemaExt}; use datafusion::functions_aggregate; use datafusion::functions_aggregate::count::count_udaf; use datafusion::logical_expr::{col, lit, Expr}; @@ -1815,7 +1815,11 @@ impl Scanner { }) }) .collect::>>()?; - plan = Arc::new(SortExec::new(LexOrdering::new(col_exprs), plan)); + plan = Arc::new(SortExec::new( + LexOrdering::new(col_exprs) + .ok_or(exec_datafusion_err!("Unexpected empty sort expressions"))?, + plan, + )); } // Stage 4: limit / offset @@ -2433,7 +2437,7 @@ impl Scanner { }; Arc::new( - SortExec::new(LexOrdering::new(vec![sort_expr]), fts_node) + SortExec::new([sort_expr].into(), fts_node) .with_fetch(self.limit.map(|l| l as usize)), ) } @@ -2490,7 +2494,7 @@ impl Scanner { &datafusion_expr::JoinType::Inner, None, datafusion_physical_plan::joins::PartitionMode::CollectLeft, - false, + NullEquality::NullEqualsNothing, )?) as _); } else { must = Some(plan); @@ -2621,10 +2625,8 @@ impl Scanner { nulls_first: false, }, }; - match_plan = Arc::new( - SortExec::new(LexOrdering::new(vec![sort_expr]), match_plan) - .with_fetch(params.limit), - ); + match_plan = + Arc::new(SortExec::new([sort_expr].into(), match_plan).with_fetch(params.limit)); } Ok(match_plan) } @@ -3153,7 +3155,7 @@ impl Scanner { // Use DataFusion's [SortExec] for Top-K search let sort = SortExec::new( - LexOrdering::new(vec![ + [ PhysicalSortExpr { expr: expressions::col(DIST_COL, knn_plan.schema().as_ref())?, options: SortOptions { @@ -3168,7 +3170,8 @@ impl Scanner { nulls_first: false, }, }, - ]), + ] + .into(), knn_plan, ) .with_fetch(Some(q.k)); @@ -3230,11 +3233,8 @@ impl Scanner { }, }; Ok(Arc::new( - SortExec::new( - LexOrdering::new(vec![sort_expr, sort_expr_row_id]), - inner_fanout_search, - ) - .with_fetch(Some(q.k * q.refine_factor.unwrap_or(1) as usize)), + SortExec::new([sort_expr, sort_expr_row_id].into(), inner_fanout_search) + .with_fetch(Some(q.k * q.refine_factor.unwrap_or(1) as usize)), )) } @@ -3292,11 +3292,8 @@ impl Scanner { }, }; let ann_node = Arc::new( - SortExec::new( - LexOrdering::new(vec![sort_expr, sort_expr_row_id]), - ann_node, - ) - .with_fetch(Some(q.k * over_fetch_factor as usize)), + SortExec::new([sort_expr, sort_expr_row_id].into(), ann_node) + .with_fetch(Some(q.k * over_fetch_factor as usize)), ); ann_nodes.push(ann_node as Arc); } @@ -3318,11 +3315,8 @@ impl Scanner { }, }; let ann_node = Arc::new( - SortExec::new( - LexOrdering::new(vec![sort_expr, sort_expr_row_id]), - ann_node, - ) - .with_fetch(Some(q.k * q.refine_factor.unwrap_or(1) as usize)), + SortExec::new([sort_expr, sort_expr_row_id].into(), ann_node) + .with_fetch(Some(q.k * q.refine_factor.unwrap_or(1) as usize)), ); Ok(ann_node) diff --git a/rust/lance/src/dataset/sql.rs b/rust/lance/src/dataset/sql.rs index 4846cabc40b..b4a966c16c6 100644 --- a/rust/lance/src/dataset/sql.rs +++ b/rust/lance/src/dataset/sql.rs @@ -242,7 +242,7 @@ mod tests { ], StringArray [ "TableScan: foo projection=[x, y], full_filters=[foo.y >= Int32(100)]", - "ProjectionExec: expr=[x@0 as x, y@1 as y]\n LanceRead: uri=test_sql_dataset/data, projection=[x, y], num_fragments=10, range_before=None, range_after=None, row_id=true, row_addr=false, full_filter=y >= Int32(100), refine_filter=y >= Int32(100)\n", + "ProjectionExec: expr=[x@0 as x, y@1 as y]\n CooperativeExec\n LanceRead: uri=test_sql_dataset/data, projection=[x, y], num_fragments=10, range_before=None, range_after=None, row_id=true, row_addr=false, full_filter=y >= Int32(100), refine_filter=y >= Int32(100)\n", ]], row_count: 2 }"#; assert_string_matches(&plan, expected_pattern).unwrap(); } @@ -277,7 +277,7 @@ mod tests { "Plan with Metrics", ], StringArray [ - "ProjectionExec: expr=[x@0 as x, y@1 as y], metrics=[output_rows=50, elapsed_compute=...]\n LanceRead: uri=test_sql_dataset/data, projection=[x, y], num_fragments=..., range_before=None, range_after=None, row_id=true, row_addr=false, full_filter=y >= Int32(100), refine_filter=y >= Int32(100), metrics=[output_rows=..., elapsed_compute=..., bytes_read=..., fragments_scanned=..., iops=..., ranges_scanned=..., requests=..., rows_scanned=..., task_wait_time=...]\n", + "ProjectionExec: expr=[x@0 as x, y@1 as y], metrics=[output_rows=50, elapsed_compute=...]\n CooperativeExec, metrics=[]\n LanceRead: uri=test_sql_dataset/data, projection=[x, y], num_fragments=..., range_before=None, range_after=None, row_id=true, row_addr=false, full_filter=y >= Int32(100), refine_filter=y >= Int32(100), metrics=[output_rows=..., elapsed_compute=..., bytes_read=..., fragments_scanned=..., iops=..., ranges_scanned=..., requests=..., rows_scanned=..., task_wait_time=...]\n", ]], row_count: 1 }"#; assert_string_matches(&plan, expected_pattern).unwrap(); } diff --git a/rust/lance/src/dataset/utils.rs b/rust/lance/src/dataset/utils.rs index c528f6d8c2f..e26de9a8811 100644 --- a/rust/lance/src/dataset/utils.rs +++ b/rust/lance/src/dataset/utils.rs @@ -174,7 +174,7 @@ pub fn wrap_json_stream_for_writing( let converted_stream = stream.map(move |batch_result| { batch_result.and_then(|batch| { convert_json_columns(&batch) - .map_err(|e| datafusion::error::DataFusionError::ArrowError(e, None)) + .map_err(|e| datafusion::error::DataFusionError::ArrowError(Box::new(e), None)) }) }); @@ -232,7 +232,9 @@ pub fn wrap_json_stream_for_reading( batch_result.and_then(|batch| { convert_lance_json_to_arrow(&batch).map_err(|e| { datafusion::error::DataFusionError::ArrowError( - arrow_schema::ArrowError::InvalidArgumentError(e.to_string()), + Box::new(arrow_schema::ArrowError::InvalidArgumentError( + e.to_string(), + )), None, ) }) diff --git a/rust/lance/src/dataset/write/merge_insert.rs b/rust/lance/src/dataset/write/merge_insert.rs index 341210809b9..b065d9f258f 100644 --- a/rust/lance/src/dataset/write/merge_insert.rs +++ b/rust/lance/src/dataset/write/merge_insert.rs @@ -21,11 +21,29 @@ const MERGE_ACTION_COLUMN: &str = "__action"; use assign_action::merge_insert_action; +use super::retry::{execute_with_retry, RetryConfig, RetryExecutor}; +use super::{write_fragments_internal, CommitBuilder, WriteParams}; +use crate::dataset::rowids::get_row_id_index; +use crate::dataset::utils::CapturedRowIds; +use crate::{ + datafusion::dataframe::SessionContextExt, + dataset::{ + fragment::{FileFragment, FragReadConfig}, + transaction::{Operation, Transaction}, + write::{merge_insert::logical_plan::MergeInsertPlanner, open_writer}, + }, + index::DatasetIndexInternalExt, + io::exec::{ + project, scalar_index::MapIndexExec, utils::ReplayExec, AddRowAddrExec, Planner, TakeExec, + }, + Dataset, +}; use arrow_array::{ cast::AsArray, types::UInt64Type, BooleanArray, RecordBatch, RecordBatchIterator, StructArray, UInt64Array, }; use arrow_schema::{DataType, Field, Schema}; +use datafusion::common::NullEquality; use datafusion::{ execution::{ context::{SessionConfig, SessionContext}, @@ -45,50 +63,24 @@ use datafusion::{ prelude::DataFrame, scalar::ScalarValue, }; -use lance_arrow::{interleave_batches, RecordBatchExt, SchemaExt}; -use lance_datafusion::{ - chunker::chunk_stream, - dataframe::DataFrameExt, - exec::{analyze_plan, get_session_context, LanceExecutionOptions}, - utils::reader_to_stream, -}; -use std::{ - collections::{BTreeMap, HashSet}, - sync::{ - atomic::{AtomicU32, Ordering}, - Arc, Mutex, - }, - time::Duration, -}; - -use super::retry::{execute_with_retry, RetryConfig, RetryExecutor}; -use super::{write_fragments_internal, CommitBuilder, WriteParams}; -use crate::dataset::rowids::get_row_id_index; -use crate::dataset::utils::CapturedRowIds; -use crate::{ - datafusion::dataframe::SessionContextExt, - dataset::{ - fragment::{FileFragment, FragReadConfig}, - transaction::{Operation, Transaction}, - write::{merge_insert::logical_plan::MergeInsertPlanner, open_writer}, - }, - index::DatasetIndexInternalExt, - io::exec::{ - project, scalar_index::MapIndexExec, utils::ReplayExec, AddRowAddrExec, Planner, TakeExec, - }, - Dataset, -}; use datafusion_physical_expr::expressions::Column; use futures::{ stream::{self}, Stream, StreamExt, TryStreamExt, }; +use lance_arrow::{interleave_batches, RecordBatchExt, SchemaExt}; use lance_core::{ datatypes::{OnMissing, OnTypeMismatch, SchemaCompareOptions}, error::{box_error, InvalidInputSnafu}, utils::{futures::Capacity, mask::RowIdTreeMap, tokio::get_num_compute_intensive_cpus}, Error, Result, ROW_ADDR, ROW_ADDR_FIELD, ROW_ID, ROW_ID_FIELD, }; +use lance_datafusion::{ + chunker::chunk_stream, + dataframe::DataFrameExt, + exec::{analyze_plan, get_session_context, LanceExecutionOptions}, + utils::reader_to_stream, +}; use lance_datafusion::{ exec::{execute_plan, OneShotExec}, utils::StreamingWriteSource, @@ -101,6 +93,14 @@ use lance_table::format::{Fragment, Index, RowIdMeta}; use log::info; use roaring::RoaringTreemap; use snafu::{location, ResultExt}; +use std::{ + collections::{BTreeMap, HashSet}, + sync::{ + atomic::{AtomicU32, Ordering}, + Arc, Mutex, + }, + time::Duration, +}; use tokio::task::JoinSet; mod assign_action; @@ -631,7 +631,7 @@ impl MergeInsertJob { &JoinType::Full, None, PartitionMode::CollectLeft, - true, + NullEquality::NullEqualsNull, ) .unwrap(), ); @@ -3814,8 +3814,9 @@ mod tests { ProjectionExec: expr=[key@3 IS NOT NULL as __common_expr_1, _rowid@0 as _rowid, _rowaddr@1 as _rowaddr, value@2 as value, key@3 as key] CoalesceBatchesExec... HashJoinExec: mode=CollectLeft, join_type=Right, on=[(key@0, key@1)], projection=[_rowid@1, _rowaddr@2, value@3, key@4] - LanceRead: uri=..., projection=[key], num_fragments=1, range_before=None, range_after=None, \ - row_id=true, row_addr=true, full_filter=--, refine_filter=-- + CooperativeExec + LanceRead: uri=..., projection=[key], num_fragments=1, range_before=None, range_after=None, \ + row_id=true, row_addr=true, full_filter=--, refine_filter=-- RepartitionExec: partitioning=RoundRobinBatch(...), input_partitions=1 StreamingTableExec: partition_sizes=1, projection=[value, key]" ).await.unwrap(); @@ -3861,7 +3862,8 @@ mod tests { ProjectionExec: expr=[_rowid@0 as _rowid, _rowaddr@1 as _rowaddr, value@2 as value, key@3 as key, CASE WHEN key@3 IS NOT NULL AND _rowaddr@1 IS NOT NULL THEN 1 ELSE 0 END as __action] CoalesceBatchesExec... HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(key@0, key@1)], projection=[_rowid@1, _rowaddr@2, value@3, key@4] - LanceRead: uri=..., projection=[key], num_fragments=1, range_before=None, range_after=None, row_id=true, row_addr=true, full_filter=--, refine_filter=-- + CooperativeExec + LanceRead: uri=..., projection=[key], num_fragments=1, range_before=None, range_after=None, row_id=true, row_addr=true, full_filter=--, refine_filter=-- RepartitionExec... StreamingTableExec: partition_sizes=1, projection=[value, key]" ).await.unwrap(); @@ -3907,7 +3909,8 @@ mod tests { ProjectionExec: expr=[_rowid@0 as _rowid, _rowaddr@1 as _rowaddr, value@2 as value, key@3 as key, CASE WHEN key@3 IS NOT NULL AND _rowaddr@1 IS NOT NULL AND value@2 > 20 THEN 1 ELSE 0 END as __action] CoalesceBatchesExec... HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(key@0, key@1)], projection=[_rowid@1, _rowaddr@2, value@3, key@4] - LanceRead: uri=..., projection=[key], num_fragments=1, range_before=None, range_after=None, row_id=true, row_addr=true, full_filter=--, refine_filter=-- + CooperativeExec + LanceRead: uri=..., projection=[key], num_fragments=1, range_before=None, range_after=None, row_id=true, row_addr=true, full_filter=--, refine_filter=-- RepartitionExec... StreamingTableExec: partition_sizes=1, projection=[value, key]" ).await.unwrap(); diff --git a/rust/lance/src/io/exec/rowids.rs b/rust/lance/src/io/exec/rowids.rs index d158eba79c8..872c3e1bf79 100644 --- a/rust/lance/src/io/exec/rowids.rs +++ b/rust/lance/src/io/exec/rowids.rs @@ -378,7 +378,7 @@ impl AddRowOffsetExec { )); let new_eq_props = - EquivalenceProperties::new(schema).extend(input.properties().eq_properties.clone()); + EquivalenceProperties::new(schema).extend(input.properties().eq_properties.clone())?; let properties = input.properties().clone().with_eq_properties(new_eq_props); Ok(Self { diff --git a/rust/lance/src/io/exec/utils.rs b/rust/lance/src/io/exec/utils.rs index b86f036b3bf..68aaa07200e 100644 --- a/rust/lance/src/io/exec/utils.rs +++ b/rust/lance/src/io/exec/utils.rs @@ -431,6 +431,7 @@ mod tests { use arrow_array::{types::UInt32Type, RecordBatchReader}; use arrow_schema::SortOptions; + use datafusion::common::NullEquality; use datafusion::{ logical_expr::JoinType, physical_expr::expressions::Column, @@ -467,7 +468,7 @@ mod tests { None, JoinType::Inner, vec![SortOptions::default()], - true, + NullEquality::NullEqualsNull, ) .unwrap(), );