From 004bc861c8c1ab4cc8785f47d3c8dc2fde4541c8 Mon Sep 17 00:00:00 2001 From: metesynnada <100111937+metesynnada@users.noreply.github.com> Date: Thu, 30 Mar 2023 20:31:52 +0300 Subject: [PATCH 01/15] Initial impl --- datafusion-cli/Cargo.lock | 271 ++++++++------ datafusion/physical-expr/Cargo.toml | 1 + .../physical-expr/src/intervals/cp_solver.rs | 164 +++++++- .../src/intervals/interval_aritmetic.rs | 269 +++++++++++++- datafusion/physical-expr/src/intervals/mod.rs | 349 ++++++++++++++++++ .../physical-expr/src/intervals/test_utils.rs | 42 +++ 6 files changed, 978 insertions(+), 118 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 49e2478d842d..2458b9193a81 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -295,13 +295,13 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.67" +version = "0.1.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86ea188f25f0255d8f92797797c97ebf5631fa88178beb1a46fdf5622c9a00e4" +checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.8", + "syn 2.0.11", ] [[package]] @@ -582,9 +582,9 @@ checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" [[package]] name = "cpufeatures" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" +checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" dependencies = [ "libc", ] @@ -637,9 +637,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c00419335c41018365ddf7e4d5f1c12ee3659ddcf3e01974650ba1de73d038" +checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93" dependencies = [ "cc", "cxxbridge-flags", @@ -649,9 +649,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb8307ad413a98fff033c8545ecf133e3257747b3bae935e7602aab8aa92d4ca" +checksum = "12cee708e8962df2aeb38f594aae5d827c022b6460ac71a7a3e2c3c2aae5a07b" dependencies = [ "cc", "codespan-reporting", @@ -659,24 +659,24 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.8", + "syn 2.0.11", ] [[package]] name = "cxxbridge-flags" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edc52e2eb08915cb12596d29d55f0b5384f00d697a646dbd269b6ecb0fbd9d31" +checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb" [[package]] name = "cxxbridge-macro" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "631569015d0d8d54e6c241733f944042623ab6df7bc3be7466874b05fcdb1c5f" +checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.8", + "syn 2.0.11", ] [[package]] @@ -830,6 +830,7 @@ dependencies = [ "indexmap", "itertools", "lazy_static", + "libc", "md-5", "paste", "petgraph", @@ -955,13 +956,13 @@ dependencies = [ [[package]] name = "errno" -version = "0.2.8" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" dependencies = [ "errno-dragonfly", "libc", - "winapi", + "windows-sys", ] [[package]] @@ -995,13 +996,13 @@ dependencies = [ [[package]] name = "fd-lock" -version = "3.0.10" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ef1a30ae415c3a691a4f41afddc2dbcd6d70baf338368d85ebc1e8ed92cedb9" +checksum = "9799aefb4a2e4a01cc47610b1dd47c18ab13d991f27bbcaed9296f5a53d5cbad" dependencies = [ "cfg-if", "rustix", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -1047,9 +1048,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "531ac96c6ff5fd7c62263c5e3c67a603af4fcaee2e1a0ae5565ba3a11e69e549" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" dependencies = [ "futures-channel", "futures-core", @@ -1062,9 +1063,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "164713a5a0dcc3e7b4b1ed7d3b433cabc18025386f9339346e8daf15963cf7ac" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", "futures-sink", @@ -1072,15 +1073,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d7a0c1aa76363dac491de0ee99faf6941128376f1cf96f07db7603b7de69dd" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" [[package]] name = "futures-executor" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1997dd9df74cdac935c76252744c1ed5794fac083242ea4fe77ef3ed60ba0f83" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" dependencies = [ "futures-core", "futures-task", @@ -1089,38 +1090,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d422fa3cbe3b40dca574ab087abb5bc98258ea57eea3fd6f1fa7162c778b91" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" [[package]] name = "futures-macro" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3eb14ed937631bd8b8b8977f2c198443447a8355b6e3ca599f38c975e5a963b6" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.11", ] [[package]] name = "futures-sink" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec93083a4aecafb2a80a885c9de1f0ccae9dbd32c2bb54b0c3a65690e0b8d2f2" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" [[package]] name = "futures-task" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd65540d33b37b16542a0438c12e6aeead10d4ac5d05bd3f805b8f35ab592879" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" [[package]] name = "futures-util" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ef6b17e481503ec85211fed8f39d1970f128935ca1f814cd32ac4a6842e84ab" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ "futures-channel", "futures-core", @@ -1136,9 +1137,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.6" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -1314,9 +1315,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.54" +version = "0.1.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c17cc76786e99f8d2f055c11159e7f0091c42474dcc3189fbab96072e873e6d" +checksum = "716f12fbcfac6ffab0a5e9ec51d0a0ff70503742bb2dc7b99396394c9dc323f0" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1348,9 +1349,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.2" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", @@ -1379,14 +1380,14 @@ checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" dependencies = [ "hermit-abi 0.3.1", "libc", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] name = "ipnet" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" +checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" [[package]] name = "itertools" @@ -1524,9 +1525,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.1.4" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" +checksum = "cd550e73688e6d578f0ac2119e32b797a327631a42f9433e59d02e139c8df60d" [[package]] name = "lock_api" @@ -1626,7 +1627,7 @@ dependencies = [ "libc", "log", "wasi", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -1804,9 +1805,9 @@ checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.2.16", "smallvec", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -1967,9 +1968,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.53" +version = "1.0.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba466839c78239c09faf015484e5cc04860f88242cff4d03eb038f04b4699b73" +checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534" dependencies = [ "unicode-ident", ] @@ -2042,6 +2043,15 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags", +] + [[package]] name = "redox_users" version = "0.4.3" @@ -2049,15 +2059,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ "getrandom", - "redox_syscall", + "redox_syscall 0.2.16", "thiserror", ] [[package]] name = "regex" -version = "1.7.2" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cce168fea28d3e05f158bda4576cf0c844d5045bc2cc3620fa0292ed5bb5814c" +checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" dependencies = [ "aho-corasick", "memchr", @@ -2072,9 +2082,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "reqwest" -version = "0.11.15" +version = "0.11.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ba30cc2c0cd02af1222ed216ba659cdb2f879dfe3181852fe7c50b1d0005949" +checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254" dependencies = [ "base64", "bytes", @@ -2137,16 +2147,16 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.11" +version = "0.37.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4165c9963ab29e422d6c26fbc1d37f15bace6b2810221f9d925023480fcf0e" +checksum = "0e78cc525325c06b4a7ff02db283472f3c042b7ff0c391f96c6d5ac6f4f91b75" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -2250,29 +2260,29 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.158" +version = "1.0.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "771d4d9c4163ee138805e12c710dd365e4f44be8be0503cb1bb9eb989425d9c9" +checksum = "3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.158" +version = "1.0.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e801c1712f48475582b7696ac71e0ca34ebb30e09338425384269d9717c62cad" +checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" dependencies = [ "proc-macro2", "quote", - "syn 2.0.8", + "syn 2.0.11", ] [[package]] name = "serde_json" -version = "1.0.94" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c533a59c9d8a93a09c6ab31f0fd5e5f4dd1b8fc9434804029839884765d04ea" +checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" dependencies = [ "itoa", "ryu", @@ -2444,9 +2454,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.8" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc02725fd69ab9f26eab07fad303e2497fad6fb9eba4f96c4d1687bdf704ad9" +checksum = "21e3787bb71465627110e7d87ed4faaa36c1f61042ee67badb9e2ef173accc40" dependencies = [ "proc-macro2", "quote", @@ -2455,15 +2465,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95" +checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", + "redox_syscall 0.3.5", "rustix", - "windows-sys 0.42.0", + "windows-sys", ] [[package]] @@ -2498,7 +2508,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.8", + "syn 2.0.11", ] [[package]] @@ -2538,32 +2548,31 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.26.0" +version = "1.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03201d01c3c27a29c8a5cee5b55a93ddae1ccf6f08f65365c2c918f8c1b76f64" +checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" dependencies = [ "autocfg", "bytes", "libc", - "memchr", "mio", "num_cpus", "parking_lot", "pin-project-lite", "socket2", "tokio-macros", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] name = "tokio-macros" -version = "1.8.2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" +checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.11", ] [[package]] @@ -2900,50 +2909,50 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows" -version = "0.46.0" +version = "0.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdacb41e6a96a052c6cb63a144f24900236121c6f63f4f8219fef5977ecb0c25" +checksum = "2649ff315bee4c98757f15dac226efe3d81927adbb6e882084bb1ee3e0c330a7" dependencies = [ - "windows-targets", + "windows-targets 0.47.0", ] [[package]] name = "windows-sys" -version = "0.42.0" +version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows-targets 0.42.2", ] [[package]] -name = "windows-sys" -version = "0.45.0" +name = "windows-targets" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows-targets", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", ] [[package]] name = "windows-targets" -version = "0.42.2" +version = "0.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +checksum = "2f8996d3f43b4b2d44327cd71b7b0efd1284ab60e6e9d0e8b630e18555d87d3e" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.47.0", + "windows_aarch64_msvc 0.47.0", + "windows_i686_gnu 0.47.0", + "windows_i686_msvc 0.47.0", + "windows_x86_64_gnu 0.47.0", + "windows_x86_64_gnullvm 0.47.0", + "windows_x86_64_msvc 0.47.0", ] [[package]] @@ -2952,42 +2961,84 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831d567d53d4f3cb1db332b68e6e2b6260228eb4d99a777d8b2e8ed794027c90" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +[[package]] +name = "windows_aarch64_msvc" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a42d54a417c60ce4f0e31661eed628f0fa5aca73448c093ec4d45fab4c51cdf" + [[package]] name = "windows_i686_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +[[package]] +name = "windows_i686_gnu" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1925beafdbb22201a53a483db861a5644123157c1c3cee83323a2ed565d71e3" + [[package]] name = "windows_i686_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +[[package]] +name = "windows_i686_msvc" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8ef8f2f1711b223947d9b69b596cf5a4e452c930fb58b6fc3fdae7d0ec6b31" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +[[package]] +name = "windows_x86_64_gnu" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7acaa0c2cf0d2ef99b61c308a0c3dbae430a51b7345dedec470bd8f53f5a3642" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a0628f71be1d11e17ca4a0e9e15b3a5180f6fbf1c2d55e3ba3f850378052c1" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +[[package]] +name = "windows_x86_64_msvc" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d6e62c256dc6d40b8c8707df17df8d774e60e39db723675241e7c15e910bce7" + [[package]] name = "winreg" version = "0.10.1" diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index c99b07077b96..c973845734b8 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -57,6 +57,7 @@ hashbrown = { version = "0.13", features = ["raw"] } indexmap = "1.9.2" itertools = { version = "0.10", features = ["use_std"] } lazy_static = { version = "^1.4.0" } +libc = "0.2.140" md-5 = { version = "^0.10.0", optional = true } paste = "^1.0" petgraph = "0.6.2" diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs index 66367001c642..01c87eb5622c 100644 --- a/datafusion/physical-expr/src/intervals/cp_solver.rs +++ b/datafusion/physical-expr/src/intervals/cp_solver.rs @@ -524,7 +524,7 @@ impl ExprIntervalGraph { #[cfg(test)] mod tests { use super::*; - use crate::intervals::test_utils::gen_conjunctive_numeric_expr; + use crate::intervals::test_utils::{gen_conjunctive_float64_expr, gen_conjunctive_numeric_expr}; use itertools::Itertools; use crate::expressions::{BinaryExpr, Column}; @@ -657,6 +657,130 @@ mod tests { Ok(()) } + fn experiment_float( + expr: Arc, + exprs_with_interval: (Arc, Arc), + left_interval: (Option, Option), + right_interval: (Option, Option), + left_waited: (Option, Option), + right_waited: (Option, Option), + result: PropagationResult, + ) -> Result<()> { + let col_stats = vec![ + ( + exprs_with_interval.0.clone(), + Interval { + lower: ScalarValue::Float64(left_interval.0), + upper: ScalarValue::Float64(left_interval.1), + }, + ), + ( + exprs_with_interval.1.clone(), + Interval { + lower: ScalarValue::Float64(right_interval.0), + upper: ScalarValue::Float64(right_interval.1), + }, + ), + ]; + let expected = vec![ + ( + exprs_with_interval.0.clone(), + Interval { + lower: ScalarValue::Float64(left_waited.0), + upper: ScalarValue::Float64(left_waited.1), + }, + ), + ( + exprs_with_interval.1.clone(), + Interval { + lower: ScalarValue::Float64(right_waited.0), + upper: ScalarValue::Float64(right_waited.1), + }, + ), + ]; + let mut graph = ExprIntervalGraph::try_new(expr)?; + let expr_indexes = graph + .gather_node_indices(&col_stats.iter().map(|(e, _)| e.clone()).collect_vec()); + + let mut col_stat_nodes = col_stats + .iter() + .zip(expr_indexes.iter()) + .map(|((_, interval), (_, index))| (*index, interval.clone())) + .collect_vec(); + let expected_nodes = expected + .iter() + .zip(expr_indexes.iter()) + .map(|((_, interval), (_, index))| (*index, interval.clone())) + .collect_vec(); + + let exp_result = graph.update_ranges(&mut col_stat_nodes[..])?; + assert_eq!(exp_result, result); + col_stat_nodes + .iter() + .zip(expected_nodes.iter()) + .for_each(|((_, res), (_, expected))| assert_eq!(res, expected)); + Ok(()) + } + + fn generate_float_case( + expr: Arc, + left_col: Arc, + right_col: Arc, + seed: u64, + expr_left: f64, + expr_right: f64, + ) -> Result<()> { + let mut r = StdRng::seed_from_u64(seed); + + let (left_interval, right_interval, left_waited, right_waited) = if ASC { + let left = (Some(r.gen_range(0.0..1000.0)), None); + let right = (Some(r.gen_range(0.0..1000.0)), None); + ( + left, + right, + ( + Some(f64::max(left.0.unwrap(), right.0.unwrap() + expr_left)), + None, + ), + ( + Some(f64::max( + right.0.unwrap(), + left.0.unwrap() + expr_right, + )), + None, + ), + ) + } else { + let left = (None, Some(r.gen_range(0.0..1000.0))); + let right = (None, Some(r.gen_range(0.0..1000.0))); + ( + left, + right, + ( + None, + Some(f64::min(left.1.unwrap(), right.1.unwrap() + expr_left)), + ), + ( + None, + Some(f64::min( + right.1.unwrap(), + left.1.unwrap() + expr_right, + )), + ), + ) + }; + experiment_float( + expr, + (left_col, right_col), + left_interval, + right_interval, + left_waited, + right_waited, + PropagationResult::Success, + )?; + Ok(()) + } + #[test] fn testing_not_possible() -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); @@ -720,6 +844,44 @@ mod tests { Ok(()) } + + #[test] + fn case_1_float() -> Result<()> { + let left_col = Arc::new(Column::new("left_watermark", 0)); + let right_col = Arc::new(Column::new("right_watermark", 0)); + // left_watermark + 1 > right_watermark + 11 AND left_watermark + 3 < right_watermark + 33 + let expr = gen_conjunctive_float64_expr( + left_col.clone(), + right_col.clone(), + Operator::Plus, + Operator::Plus, + Operator::Plus, + Operator::Plus, + 1.0, + 11.0, + 3.0, + 33.0, + ); + // l > r + 10 AND r > l - 30 + let l_gt_r = 10.0; + let r_gt_l = -30.0; + generate_float_case::( + expr.clone(), + left_col.clone(), + right_col.clone(), + 1, + l_gt_r, + r_gt_l, + )?; + // Descending tests + // r < l - 10 AND l < r + 30 + let r_lt_l = -l_gt_r; + let l_lt_r = -r_gt_l; + generate_float_case::(expr, left_col, right_col, 1, l_lt_r, r_lt_l)?; + + Ok(()) + } + #[rstest] #[test] fn case_2( diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index 7fc3641b25ef..534d5355cc24 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -16,18 +16,17 @@ // under the License. //! Interval arithmetic library - use std::borrow::Borrow; use std::fmt; use std::fmt::{Display, Formatter}; +use crate::aggregate::min_max::{max, min}; +use crate::intervals::alter_round_mode_for_float_operation; use arrow::compute::{cast_with_options, CastOptions}; use arrow::datatypes::DataType; use datafusion_common::{DataFusionError, Result, ScalarValue}; use datafusion_expr::Operator; -use crate::aggregate::min_max::{max, min}; - /// This type represents an interval, which is used to calculate reliable /// bounds for expressions. Currently, we only support addition and /// subtraction, but more capabilities will be added in the future. @@ -204,12 +203,30 @@ impl Interval { let lower = if self.lower.is_null() || rhs.lower.is_null() { ScalarValue::try_from(self.lower.get_datatype()) } else { - self.lower.add(&rhs.lower) + match self.get_datatype() { + DataType::Float64 | DataType::Float32 => { + alter_round_mode_for_float_operation::( + &self.lower, + &rhs.lower, + Box::new(|lhs, rhs| lhs.add(rhs)), + ) + } + _ => self.lower.add(&rhs.lower), + } }?; let upper = if self.upper.is_null() || rhs.upper.is_null() { ScalarValue::try_from(self.upper.get_datatype()) } else { - self.upper.add(&rhs.upper) + match self.get_datatype() { + DataType::Float64 | DataType::Float32 => { + alter_round_mode_for_float_operation::( + &self.upper, + &rhs.upper, + Box::new(|lhs, rhs| lhs.add(rhs)), + ) + } + _ => self.upper.add(&rhs.upper), + } }?; Ok(Interval { lower, upper }) } @@ -223,12 +240,30 @@ impl Interval { let lower = if self.lower.is_null() || rhs.upper.is_null() { ScalarValue::try_from(self.lower.get_datatype()) } else { - self.lower.sub(&rhs.upper) + match self.get_datatype() { + DataType::Float64 | DataType::Float32 => { + alter_round_mode_for_float_operation::( + &self.lower, + &rhs.upper, + Box::new(|lhs, rhs| lhs.sub(rhs)), + ) + } + _ => self.lower.sub(&rhs.upper), + } }?; let upper = if self.upper.is_null() || rhs.lower.is_null() { ScalarValue::try_from(self.upper.get_datatype()) } else { - self.upper.sub(&rhs.lower) + match self.get_datatype() { + DataType::Float64 | DataType::Float32 => { + alter_round_mode_for_float_operation::( + &self.upper, + &rhs.lower, + Box::new(|lhs, rhs| lhs.sub(rhs)), + ) + } + _ => self.upper.sub(&rhs.lower), + } }?; Ok(Interval { lower, upper }) } @@ -258,6 +293,8 @@ pub fn is_datatype_supported(data_type: &DataType) -> bool { | &DataType::UInt32 | &DataType::UInt16 | &DataType::UInt8 + | &DataType::Float64 + | &DataType::Float32 ) } @@ -530,4 +567,222 @@ mod tests { } Ok(()) } + + fn create_f32_interval(lower: f32, upper: f32) -> Interval { + Interval { + lower: ScalarValue::Float32(Some(lower)), + upper: ScalarValue::Float32(Some(upper)), + } + } + + fn create_f64_interval(lower: f64, upper: f64) -> Interval { + Interval { + lower: ScalarValue::Float64(Some(lower)), + upper: ScalarValue::Float64(Some(upper)), + } + } + + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + #[test] + fn test_add_intervals_lower_affected_f32() { + let lower = f32::from_bits(1073741887); + let upper = f32::from_bits(1098907651); + let interval1 = create_f32_interval(lower, upper); + let interval2 = create_f32_interval(upper, lower); + let result = interval1.add(&interval2).unwrap(); + match (result, create_f32_interval(lower + upper, lower + upper)) { + ( + Interval { + lower: ScalarValue::Float32(Some(result_lower)), + .. + }, + Interval { + lower: ScalarValue::Float32(Some(without_fe_lower)), + .. + }, + ) => { + assert!(result_lower < without_fe_lower); + } + _ => unreachable!(), + } + } + + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + #[test] + fn test_add_intervals_upper_affected_f32() { + let lower = f32::from_bits(1072693248); + let upper = f32::from_bits(715827883); + let interval1 = create_f32_interval(lower, upper); + let interval2 = create_f32_interval(upper, lower); + let result = interval1.add(&interval2).unwrap(); + match (result, create_f32_interval(lower + upper, lower + upper)) { + ( + Interval { + upper: ScalarValue::Float32(Some(result_upper)), + .. + }, + Interval { + upper: ScalarValue::Float32(Some(without_fe_upper)), + .. + }, + ) => { + assert!(result_upper > without_fe_upper); + } + _ => unreachable!(), + } + } + + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + #[test] + fn test_add_intervals_lower_affected_f64() { + let lower = 1.0; + let upper = 0.3; + let interval1 = create_f64_interval(lower, upper); + let interval2 = create_f64_interval(upper, lower); + let result = interval1.add(&interval2).unwrap(); + match (result, create_f64_interval(lower + upper, lower + upper)) { + ( + Interval { + lower: ScalarValue::Float64(Some(result_lower)), + .. + }, + Interval { + lower: ScalarValue::Float64(Some(without_fe_lower)), + .. + }, + ) => { + assert!(result_lower < without_fe_lower); + } + _ => unreachable!(), + } + } + + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + #[test] + fn test_add_intervals_upper_affected_f64() { + let lower = 1.4999999999999998; + let upper = 0.000000000000000022044604925031308; + let interval1 = create_f64_interval(lower, upper); + let interval2 = create_f64_interval(upper, lower); + let result = interval1.add(&interval2).unwrap(); + match (result, create_f64_interval(lower + upper, lower + upper)) { + ( + Interval { + upper: ScalarValue::Float64(Some(result_upper)), + .. + }, + Interval { + upper: ScalarValue::Float64(Some(without_fe_upper)), + .. + }, + ) => { + assert!(result_upper > without_fe_upper); + } + _ => unreachable!(), + } + } + + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + #[test] + fn test_next_impl_add_intervals_f64() { + let lower = 1.5; + let upper = 1.5; + let interval1 = create_f64_interval(lower, upper); + let interval2 = create_f64_interval(upper, lower); + let result = interval1.add(&interval2).unwrap(); + match (result, create_f64_interval(lower + upper, lower + upper)) { + ( + Interval { + lower: ScalarValue::Float64(Some(result_lower)), + upper: ScalarValue::Float64(Some(result_upper)), + }, + Interval { + lower: ScalarValue::Float64(Some(without_fe_lower)), + upper: ScalarValue::Float64(Some(without_fe_upper)), + }, + ) => { + assert!(result_lower < without_fe_lower); + assert!(result_upper > without_fe_upper); + } + _ => unreachable!(), + } + } + + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + #[test] + fn test_next_impl_sub_intervals_f64() { + let lower = 2.5; + let upper = 1.5; + let interval1 = create_f64_interval(lower, upper); + let interval2 = create_f64_interval(upper, lower); + let result = interval1.sub(&interval2).unwrap(); + match (result, create_f64_interval(lower - upper, upper - lower)) { + ( + Interval { + lower: ScalarValue::Float64(Some(result_lower)), + upper: ScalarValue::Float64(Some(result_upper)), + }, + Interval { + lower: ScalarValue::Float64(Some(without_fe_lower)), + upper: ScalarValue::Float64(Some(without_fe_upper)), + }, + ) => { + assert!(result_lower < without_fe_lower); + assert!(result_upper > without_fe_upper); + } + _ => unreachable!(), + } + } + + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + #[test] + fn test_next_impl_add_intervals_f32() { + let lower = 1.5; + let upper = 1.5; + let interval1 = create_f32_interval(lower, upper); + let interval2 = create_f32_interval(upper, lower); + let result = interval1.add(&interval2).unwrap(); + match (result, create_f32_interval(lower + upper, lower + upper)) { + ( + Interval { + lower: ScalarValue::Float32(Some(result_lower)), + upper: ScalarValue::Float32(Some(result_upper)), + }, + Interval { + lower: ScalarValue::Float32(Some(without_fe_lower)), + upper: ScalarValue::Float32(Some(without_fe_upper)), + }, + ) => { + assert!(result_lower < without_fe_lower); + assert!(result_upper > without_fe_upper); + } + _ => unreachable!(), + } + } + + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + #[test] + fn test_next_impl_sub_intervals_f32() { + let lower = 2.5; + let upper = 1.5; + let interval1 = create_f32_interval(lower, upper); + let interval2 = create_f32_interval(lower, upper); + let result = interval1.sub(&interval2).unwrap(); + match (result, create_f32_interval(lower - upper, upper - lower)) { + ( + Interval { + lower: ScalarValue::Float32(Some(result_lower)), + upper: ScalarValue::Float32(Some(result_upper)), + }, + Interval { + lower: ScalarValue::Float32(Some(without_fe_lower)), + upper: ScalarValue::Float32(Some(without_fe_upper)), + }, + ) => { + assert!(result_lower < without_fe_lower); + assert!(result_upper > without_fe_upper); + } + _ => unreachable!(), + } + } } diff --git a/datafusion/physical-expr/src/intervals/mod.rs b/datafusion/physical-expr/src/intervals/mod.rs index 45616534cb17..0705ceabc9c0 100644 --- a/datafusion/physical-expr/src/intervals/mod.rs +++ b/datafusion/physical-expr/src/intervals/mod.rs @@ -23,4 +23,353 @@ pub mod interval_aritmetic; pub mod test_utils; pub use cp_solver::ExprIntervalGraph; +use datafusion_common::Result; +use datafusion_common::ScalarValue; pub use interval_aritmetic::*; +use std::ops::{Add, BitAnd, Sub}; + +type ScalarValueOperation = dyn FnOnce(&ScalarValue, &ScalarValue) -> Result; + +// Define constants for ARM +#[cfg(all(target_arch = "aarch64"))] +const FE_UPWARD: i32 = 0x00400000; +#[cfg(all(target_arch = "aarch64"))] +const FE_DOWNWARD: i32 = 0x00800000; + +// Define constants for x86_64 +#[cfg(all(target_arch = "x86_64"))] +const FE_UPWARD: i32 = 0x0800; +#[cfg(all(target_arch = "x86_64"))] +const FE_DOWNWARD: i32 = 0x0400; + +// Define a default constant for other architectures +#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] +const FE_UPWARD: i32 = 0x00000000; +#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] +const FE_DOWNWARD: i32 = 0x00000000; + +extern crate libc; + +extern "C" { + fn fesetround(rount: i32); + fn fegetround() -> i32; +} + +/// A trait to represent floating-point types with bitwise operations. +/// +/// This trait provides methods to convert a floating-point value to its bitwise representation +/// and back, as well as methods to check for NaN, infinity, and negative infinity. +pub trait FloatBits { + /// The integer type used for bitwise operations. + type Item: Copy + + PartialEq + + BitAnd + + Add + + Sub; + + /// The smallest positive floating-point value representable by this type. + const TINY_BITS: Self::Item; + + /// The smallest (in magnitude) negative floating-point value representable by this type. + const NEG_TINY_BITS: Self::Item; + + /// A mask to clear the sign bit of the floating-point value's bitwise representation. + const CLEAR_SIGN_MASK: Self::Item; + + /// The integer value 1, used in bitwise operations. + const ONE: Self::Item; + + /// The integer value 0, used in bitwise operations. + const ZERO: Self::Item; + + /// Converts the floating-point value to its bitwise representation. + fn to_bits(self) -> Self::Item; + + /// Converts the bitwise representation to the corresponding floating-point value. + fn from_bits(bits: Self::Item) -> Self; + + /// Returns true if the floating-point value is NaN (not a number). + fn float_is_nan(self) -> bool; + + /// Returns the positive infinity value for this floating-point type. + fn infinity() -> Self; + + /// Returns the negative infinity value for this floating-point type. + fn neg_infinity() -> Self; +} + +impl FloatBits for f32 { + type Item = u32; + const TINY_BITS: u32 = 0x1; // Smallest positive f32. + const NEG_TINY_BITS: u32 = 0x8000_0001; // Smallest (in magnitude) negative f32. + const CLEAR_SIGN_MASK: u32 = 0x7fff_ffff; + const ONE: Self::Item = 1; + const ZERO: Self::Item = 0; + + fn to_bits(self) -> Self::Item { + self.to_bits() + } + + fn from_bits(bits: Self::Item) -> Self { + f32::from_bits(bits) + } + + fn float_is_nan(self) -> bool { + self.is_nan() + } + + fn infinity() -> Self { + f32::INFINITY + } + + fn neg_infinity() -> Self { + f32::NEG_INFINITY + } +} + +impl FloatBits for f64 { + type Item = u64; + const TINY_BITS: u64 = 0x1; + const NEG_TINY_BITS: u64 = 0x8000_0000_0000_0001; + // Smallest positive f64. + const CLEAR_SIGN_MASK: u64 = 0x7fff_ffff_ffff_ffff; + const ONE: Self::Item = 1; + const ZERO: Self::Item = 0; + + fn to_bits(self) -> Self::Item { + self.to_bits() + } + + fn from_bits(bits: Self::Item) -> Self { + f64::from_bits(bits) + } + + fn float_is_nan(self) -> bool { + self.is_nan() + } + + fn infinity() -> Self { + f64::INFINITY + } + + fn neg_infinity() -> Self { + f64::NEG_INFINITY + } +} +/// Returns the next representable floating-point value greater than the input value. +/// +/// This function takes a floating-point value that implements the FloatBits trait, +/// calculates the next representable value greater than the input, and returns it. +/// +/// If the input value is NaN or positive infinity, the function returns the input value. +/// +/// # Examples +/// +/// ``` +/// use datafusion_physical_expr::intervals::next_up; +/// use datafusion_physical_expr::intervals::FloatBits; +/// +/// let f: f32 = 1.0; +/// let next_f = next_up(f); +/// assert_eq!(next_f, 1.0000001); +/// ``` +pub fn next_up(float: F) -> F { + let bits = float.to_bits(); + if float.float_is_nan() || bits == F::infinity().to_bits() { + return float; + } + + let abs = bits & F::CLEAR_SIGN_MASK; + let next_bits = if abs == F::ZERO { + F::TINY_BITS + } else if bits == abs { + bits + F::ONE + } else { + bits - F::ONE + }; + F::from_bits(next_bits) +} + +/// Returns the next representable floating-point value smaller than the input value. +/// +/// This function takes a floating-point value that implements the FloatBits trait, +/// calculates the next representable value smaller than the input, and returns it. +/// +/// If the input value is NaN or negative infinity, the function returns the input value. +/// +/// # Examples +/// +/// ``` +/// use datafusion_physical_expr::intervals::next_down; +/// use datafusion_physical_expr::intervals::FloatBits; +/// +/// let f: f32 = 1.0; +/// let next_f = next_down(f); +/// assert_eq!(next_f, 0.99999994); +/// ``` +pub fn next_down(float: F) -> F { + let bits = float.to_bits(); + if float.float_is_nan() || bits == F::neg_infinity().to_bits() { + return float; + } + let abs = bits & F::CLEAR_SIGN_MASK; + let next_bits = if abs == F::ZERO { + F::NEG_TINY_BITS + } else if bits == abs { + bits - F::ONE + } else { + bits + F::ONE + }; + F::from_bits(next_bits) +} + +pub fn alter_round_mode_for_float_operation( + lhs: &ScalarValue, + rhs: &ScalarValue, + cls: Box, +) -> Result { + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + unsafe { + let current = fegetround(); + fesetround(if UPPER { FE_UPWARD } else { FE_DOWNWARD }); + let res = cls(lhs, rhs); + fesetround(current); + res + } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + match cls(lhs, rhs) { + Ok(ScalarValue::Float64(Some(val))) => Ok(ScalarValue::Float64(Some(if UPPER { + next_up(val) + } else { + next_down(val) + }))), + Ok(ScalarValue::Float32(Some(val))) => Ok(ScalarValue::Float32(Some(if UPPER { + next_up(val) + } else { + next_down(val) + }))), + val => val, + } +} + +#[cfg(test)] +mod tests { + use crate::intervals::{next_down, next_up}; + + #[test] + fn test_next_down() { + let x = 1.0f64; + // Clamp value into range [0, 1). + let clamped = x.clamp(0.0, next_down(1.0f64)); + assert!(clamped < 1.0); + assert_eq!(next_up(clamped), 1.0); + } + + #[test] + fn test_next_up_small_positive() { + let value: f64 = 1.0; + let result = next_up(value); + assert_eq!(result, 1.0000000000000002); + } + + #[test] + fn test_next_up_small_negative() { + let value: f64 = -1.0; + let result = next_up(value); + assert_eq!(result, -0.9999999999999999); + } + + #[test] + fn test_next_up_pos_infinity() { + let value: f64 = f64::INFINITY; + let result = next_up(value); + assert_eq!(result, f64::INFINITY); + } + + #[test] + fn test_next_up_nan() { + let value: f64 = f64::NAN; + let result = next_up(value); + assert!(result.is_nan()); + } + + #[test] + fn test_next_down_small_positive() { + let value: f64 = 1.0; + let result = next_down(value); + assert_eq!(result, 0.9999999999999999); + } + + #[test] + fn test_next_down_small_negative() { + let value: f64 = -1.0; + let result = next_down(value); + assert_eq!(result, -1.0000000000000002); + } + + #[test] + fn test_next_down_neg_infinity() { + let value: f64 = f64::NEG_INFINITY; + let result = next_down(value); + assert_eq!(result, f64::NEG_INFINITY); + } + + #[test] + fn test_next_down_nan() { + let value: f64 = f64::NAN; + let result = next_down(value); + assert!(result.is_nan()); + } + + #[test] + fn test_next_up_small_positive_f32() { + let value: f32 = 1.0; + let result = next_up(value); + assert_eq!(result, 1.0000001); + } + + #[test] + fn test_next_up_small_negative_f32() { + let value: f32 = -1.0; + let result = next_up(value); + assert_eq!(result, -0.99999994); + } + + #[test] + fn test_next_up_pos_infinity_f32() { + let value: f32 = f32::INFINITY; + let result = next_up(value); + assert_eq!(result, f32::INFINITY); + } + + #[test] + fn test_next_up_nan_f32() { + let value: f32 = f32::NAN; + let result = next_up(value); + assert!(result.is_nan()); + } + #[test] + fn test_next_down_small_positive_f32() { + let value: f32 = 1.0; + let result = next_down(value); + assert_eq!(result, 0.99999994); + } + #[test] + fn test_next_down_small_negative_f32() { + let value: f32 = -1.0; + let result = next_down(value); + assert_eq!(result, -1.0000001); + } + #[test] + fn test_next_down_neg_infinity_f32() { + let value: f32 = f32::NEG_INFINITY; + let result = next_down(value); + assert_eq!(result, f32::NEG_INFINITY); + } + #[test] + fn test_next_down_nan_f32() { + let value: f32 = f32::NAN; + let result = next_down(value); + assert!(result.is_nan()); + } +} diff --git a/datafusion/physical-expr/src/intervals/test_utils.rs b/datafusion/physical-expr/src/intervals/test_utils.rs index ba02f4ff7aac..b5bc2abcf1a9 100644 --- a/datafusion/physical-expr/src/intervals/test_utils.rs +++ b/datafusion/physical-expr/src/intervals/test_utils.rs @@ -65,3 +65,45 @@ pub fn gen_conjunctive_numeric_expr( let right_expr = Arc::new(BinaryExpr::new(right_and_1, Operator::Lt, right_and_2)); Arc::new(BinaryExpr::new(left_expr, Operator::And, right_expr)) } + +#[allow(clippy::too_many_arguments)] +/// This test function generates a conjunctive statement with two numeric +/// terms with the following form: +/// left_col (op_1) a > right_col (op_2) b AND left_col (op_3) c < right_col (op_4) d +pub fn gen_conjunctive_float64_expr( + left_col: Arc, + right_col: Arc, + op_1: Operator, + op_2: Operator, + op_3: Operator, + op_4: Operator, + a: f64, + b: f64, + c: f64, + d: f64, +) -> Arc { + let left_and_1 = Arc::new(BinaryExpr::new( + left_col.clone(), + op_1, + Arc::new(Literal::new(ScalarValue::Float64(Some(a)))), + )); + let left_and_2 = Arc::new(BinaryExpr::new( + right_col.clone(), + op_2, + Arc::new(Literal::new(ScalarValue::Float64(Some(b)))), + )); + + let right_and_1 = Arc::new(BinaryExpr::new( + left_col, + op_3, + Arc::new(Literal::new(ScalarValue::Float64(Some(c)))), + )); + let right_and_2 = Arc::new(BinaryExpr::new( + right_col, + op_4, + Arc::new(Literal::new(ScalarValue::Float64(Some(d)))), + )); + let left_expr = Arc::new(BinaryExpr::new(left_and_1, Operator::Gt, left_and_2)); + let right_expr = Arc::new(BinaryExpr::new(right_and_1, Operator::Lt, right_and_2)); + Arc::new(BinaryExpr::new(left_expr, Operator::And, right_expr)) +} From 1d08ce12af8fb388d876865227340067744525b4 Mon Sep 17 00:00:00 2001 From: metesynnada <100111937+metesynnada@users.noreply.github.com> Date: Tue, 11 Apr 2023 16:36:53 +0300 Subject: [PATCH 02/15] Testes float support --- .../joins/symmetric_hash_join.rs | 365 ++++---- .../physical-expr/src/intervals/cp_solver.rs | 811 ++++++++---------- .../src/intervals/interval_aritmetic.rs | 227 ++--- .../physical-expr/src/intervals/test_utils.rs | 77 +- 4 files changed, 657 insertions(+), 823 deletions(-) diff --git a/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs b/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs index 3af983d8f06a..0ecdd1251a80 100644 --- a/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs +++ b/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs @@ -1423,7 +1423,7 @@ impl SymmetricHashJoinStream { mod tests { use std::fs::File; - use arrow::array::ArrayRef; + use arrow::array::{ArrayRef, Float64Array}; use arrow::array::{Int32Array, TimestampNanosecondArray}; use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema}; @@ -1433,7 +1433,7 @@ mod tests { use datafusion_expr::Operator; use datafusion_physical_expr::expressions::{binary, col, Column}; - use datafusion_physical_expr::intervals::test_utils::gen_conjunctive_numeric_expr; + use datafusion_physical_expr::intervals::test_utils::gen_conjunctive_numerical_expr; use datafusion_physical_expr::PhysicalExpr; use crate::physical_plan::joins::{ @@ -1584,80 +1584,130 @@ mod tests { Ok(result) } - fn join_expr_tests_fixture( - expr_id: usize, - left_col: Arc, - right_col: Arc, - ) -> Arc { - match expr_id { - // left_col + 1 > right_col + 5 AND left_col + 3 < right_col + 10 - 0 => gen_conjunctive_numeric_expr( - left_col, - right_col, - Operator::Plus, - Operator::Plus, - Operator::Plus, - Operator::Plus, - 1, - 5, - 3, - 10, - ), - // left_col - 1 > right_col + 5 AND left_col + 3 < right_col + 10 - 1 => gen_conjunctive_numeric_expr( - left_col, - right_col, - Operator::Minus, - Operator::Plus, - Operator::Plus, - Operator::Plus, - 1, - 5, - 3, - 10, - ), - // left_col - 1 > right_col + 5 AND left_col - 3 < right_col + 10 - 2 => gen_conjunctive_numeric_expr( - left_col, - right_col, - Operator::Minus, - Operator::Plus, - Operator::Minus, - Operator::Plus, - 1, - 5, - 3, - 10, - ), - // left_col - 10 > right_col - 5 AND left_col - 3 < right_col + 10 - 3 => gen_conjunctive_numeric_expr( - left_col, - right_col, - Operator::Minus, - Operator::Minus, - Operator::Minus, - Operator::Plus, - 10, - 5, - 3, - 10, - ), - // left_col - 10 > right_col - 5 AND left_col - 30 < right_col - 3 - 4 => gen_conjunctive_numeric_expr( - left_col, - right_col, - Operator::Minus, - Operator::Minus, - Operator::Minus, - Operator::Minus, - 10, - 5, - 30, - 3, - ), - _ => unreachable!(), + macro_rules! join_expr_tests { + ($func_name:ident, $type:ty, $SCALAR:ident) => { + fn $func_name( + expr_id: usize, + left_col: Arc, + right_col: Arc, + ) -> Arc { + match expr_id { + // left_col + 1 > right_col + 5 AND left_col + 3 < right_col + 10 + 0 => gen_conjunctive_numerical_expr( + left_col, + right_col, + ( + Operator::Plus, + Operator::Plus, + Operator::Plus, + Operator::Plus, + ), + ScalarValue::$SCALAR(Some(1 as $type)), + ScalarValue::$SCALAR(Some(5 as $type)), + ScalarValue::$SCALAR(Some(3 as $type)), + ScalarValue::$SCALAR(Some(10 as $type)), + ), + // left_col - 1 > right_col + 5 AND left_col + 3 < right_col + 10 + 1 => gen_conjunctive_numerical_expr( + left_col, + right_col, + ( + Operator::Minus, + Operator::Plus, + Operator::Plus, + Operator::Plus, + ), + ScalarValue::$SCALAR(Some(1 as $type)), + ScalarValue::$SCALAR(Some(5 as $type)), + ScalarValue::$SCALAR(Some(3 as $type)), + ScalarValue::$SCALAR(Some(10 as $type)), + ), + // left_col - 1 > right_col + 5 AND left_col - 3 < right_col + 10 + 2 => gen_conjunctive_numerical_expr( + left_col, + right_col, + ( + Operator::Minus, + Operator::Plus, + Operator::Minus, + Operator::Plus, + ), + ScalarValue::$SCALAR(Some(1 as $type)), + ScalarValue::$SCALAR(Some(5 as $type)), + ScalarValue::$SCALAR(Some(3 as $type)), + ScalarValue::$SCALAR(Some(10 as $type)), + ), + // left_col - 10 > right_col - 5 AND left_col - 3 < right_col + 10 + 3 => gen_conjunctive_numerical_expr( + left_col, + right_col, + ( + Operator::Minus, + Operator::Minus, + Operator::Minus, + Operator::Plus, + ), + ScalarValue::$SCALAR(Some(10 as $type)), + ScalarValue::$SCALAR(Some(5 as $type)), + ScalarValue::$SCALAR(Some(3 as $type)), + ScalarValue::$SCALAR(Some(10 as $type)), + ), + // left_col - 10 > right_col - 5 AND left_col - 30 < right_col - 3 + 4 => gen_conjunctive_numerical_expr( + left_col, + right_col, + ( + Operator::Minus, + Operator::Minus, + Operator::Minus, + Operator::Minus, + ), + ScalarValue::$SCALAR(Some(10 as $type)), + ScalarValue::$SCALAR(Some(5 as $type)), + ScalarValue::$SCALAR(Some(30 as $type)), + ScalarValue::$SCALAR(Some(3 as $type)), + ), + _ => panic!("No case"), + } + } + }; + } + + join_expr_tests!(join_expr_tests_fixture_i32, i32, Int32); + join_expr_tests!(join_expr_tests_fixture_f64, f64, Float64); + + use rand::rngs::StdRng; + use rand::{Rng, SeedableRng}; + use std::iter::Iterator; + + struct AscendingRandomFloatIterator { + prev: f64, + max: f64, + rng: StdRng, + } + + impl AscendingRandomFloatIterator { + fn new(min: f64, max: f64) -> Self { + let mut rng = StdRng::seed_from_u64(42); + let initial = rng.gen_range(min..max); + AscendingRandomFloatIterator { + prev: initial, + max, + rng, + } + } + } + + impl Iterator for AscendingRandomFloatIterator { + type Item = f64; + + fn next(&mut self) -> Option { + let new_value = self.rng.gen_range(self.prev..self.max); + self.prev = new_value; + Some(new_value) } } + fn build_sides_record_batches( table_size: i32, key_cardinality: (i32, i32), @@ -1708,6 +1758,11 @@ mod tests { .collect::>(), )); + let float_asc = Arc::new(Float64Array::from_iter_values( + AscendingRandomFloatIterator::new(0., table_size as f64) + .take(table_size as usize), + )); + let left = RecordBatch::try_from_iter(vec![ ("la1", ordered.clone()), ("lb1", cardinality.clone()), @@ -1718,6 +1773,7 @@ mod tests { ("l_asc_null_first", ordered_asc_null_first.clone()), ("l_asc_null_last", ordered_asc_null_last.clone()), ("l_desc_null_first", ordered_desc_null_first.clone()), + ("l_float", float_asc.clone()), ])?; let right = RecordBatch::try_from_iter(vec![ ("ra1", ordered.clone()), @@ -1729,6 +1785,7 @@ mod tests { ("r_asc_null_first", ordered_asc_null_first), ("r_asc_null_last", ordered_asc_null_last), ("r_desc_null_first", ordered_desc_null_first), + ("r_float", float_asc), ])?; Ok((left, right)) } @@ -1913,7 +1970,7 @@ mod tests { Field::new("left", DataType::Int32, true), Field::new("right", DataType::Int32, true), ]); - let filter_expr = join_expr_tests_fixture( + let filter_expr = join_expr_tests_fixture_i32( case_expr, col("left", &intermediate_schema)?, col("right", &intermediate_schema)?, @@ -1934,65 +1991,6 @@ mod tests { Ok(()) } - #[tokio::test(flavor = "multi_thread")] - async fn single_test() -> Result<()> { - let case_expr = 1; - let cardinality = (11, 21); - let join_type = JoinType::Full; - let config = SessionConfig::new().with_repartition_joins(false); - let session_ctx = SessionContext::with_config(config); - let task_ctx = session_ctx.task_ctx(); - let (left_batch, right_batch) = - build_sides_record_batches(TABLE_SIZE, cardinality)?; - let left_schema = &left_batch.schema(); - let right_schema = &right_batch.schema(); - let left_sorted = vec![PhysicalSortExpr { - expr: col("la1_des", left_schema)?, - options: SortOptions { - descending: true, - nulls_first: true, - }, - }]; - let right_sorted = vec![PhysicalSortExpr { - expr: col("ra1_des", right_schema)?, - options: SortOptions { - descending: true, - nulls_first: true, - }, - }]; - let (left, right) = - create_memory_table(left_batch, right_batch, left_sorted, right_sorted, 13)?; - - let on = vec![( - Column::new_with_schema("lc1", left_schema)?, - Column::new_with_schema("rc1", right_schema)?, - )]; - - let intermediate_schema = Schema::new(vec![ - Field::new("left", DataType::Int32, true), - Field::new("right", DataType::Int32, true), - ]); - let filter_expr = join_expr_tests_fixture( - case_expr, - col("left", &intermediate_schema)?, - col("right", &intermediate_schema)?, - ); - let column_indices = vec![ - ColumnIndex { - index: 5, - side: JoinSide::Left, - }, - ColumnIndex { - index: 5, - side: JoinSide::Right, - }, - ]; - let filter = JoinFilter::new(filter_expr, column_indices, intermediate_schema); - - experiment(left, right, filter, join_type, on, task_ctx).await?; - Ok(()) - } - #[rstest] #[tokio::test(flavor = "multi_thread")] async fn join_all_one_descending_numeric_particular( @@ -2049,7 +2047,7 @@ mod tests { Field::new("left", DataType::Int32, true), Field::new("right", DataType::Int32, true), ]); - let filter_expr = join_expr_tests_fixture( + let filter_expr = join_expr_tests_fixture_i32( case_expr, col("left", &intermediate_schema)?, col("right", &intermediate_schema)?, @@ -2141,7 +2139,7 @@ mod tests { Field::new("left", DataType::Int32, true), Field::new("right", DataType::Int32, true), ]); - let filter_expr = join_expr_tests_fixture( + let filter_expr = join_expr_tests_fixture_i32( case_expr, col("left", &intermediate_schema)?, col("right", &intermediate_schema)?, @@ -2199,7 +2197,7 @@ mod tests { Field::new("left", DataType::Int32, true), Field::new("right", DataType::Int32, true), ]); - let filter_expr = join_expr_tests_fixture( + let filter_expr = join_expr_tests_fixture_i32( case_expr, col("left", &intermediate_schema)?, col("right", &intermediate_schema)?, @@ -2258,7 +2256,7 @@ mod tests { Field::new("left", DataType::Int32, true), Field::new("right", DataType::Int32, true), ]); - let filter_expr = join_expr_tests_fixture( + let filter_expr = join_expr_tests_fixture_i32( case_expr, col("left", &intermediate_schema)?, col("right", &intermediate_schema)?, @@ -2386,17 +2384,19 @@ mod tests { Field::new("0", DataType::Int32, true), Field::new("1", DataType::Int32, true), ]); - let filter_expr = gen_conjunctive_numeric_expr( + let filter_expr = gen_conjunctive_numerical_expr( col("0", &intermediate_schema)?, col("1", &intermediate_schema)?, - Operator::Plus, - Operator::Minus, - Operator::Plus, - Operator::Plus, - 0, - 3, - 0, - 3, + ( + Operator::Plus, + Operator::Minus, + Operator::Plus, + Operator::Plus, + ), + ScalarValue::Int32(Some(0)), + ScalarValue::Int32(Some(3)), + ScalarValue::Int32(Some(0)), + ScalarValue::Int32(Some(3)), ); let column_indices = vec![ ColumnIndex { @@ -2470,4 +2470,75 @@ mod tests { assert_eq!(left_side_joiner.visited_rows.is_empty(), should_be_empty); Ok(()) } + + #[rstest] + #[tokio::test(flavor = "multi_thread")] + async fn testing_ascending_float_pruning( + #[values( + JoinType::Inner, + JoinType::Left, + JoinType::Right, + JoinType::RightSemi, + JoinType::LeftSemi, + JoinType::LeftAnti, + JoinType::RightAnti, + JoinType::Full + )] + join_type: JoinType, + #[values( + (4, 5), + (11, 21), + (31, 71), + (99, 12), + )] + cardinality: (i32, i32), + #[values(0, 1, 2, 3, 4)] case_expr: usize, + ) -> Result<()> { + let config = SessionConfig::new().with_repartition_joins(false); + let session_ctx = SessionContext::with_config(config); + let task_ctx = session_ctx.task_ctx(); + let (left_batch, right_batch) = + build_sides_record_batches(TABLE_SIZE, cardinality)?; + let left_schema = &left_batch.schema(); + let right_schema = &right_batch.schema(); + let left_sorted = vec![PhysicalSortExpr { + expr: col("l_float", left_schema)?, + options: SortOptions::default(), + }]; + let right_sorted = vec![PhysicalSortExpr { + expr: col("r_float", right_schema)?, + options: SortOptions::default(), + }]; + let (left, right) = + create_memory_table(left_batch, right_batch, left_sorted, right_sorted, 13)?; + + let on = vec![( + Column::new_with_schema("lc1", left_schema)?, + Column::new_with_schema("rc1", right_schema)?, + )]; + + let intermediate_schema = Schema::new(vec![ + Field::new("left", DataType::Float64, true), + Field::new("right", DataType::Float64, true), + ]); + let filter_expr = join_expr_tests_fixture_f64( + case_expr, + col("left", &intermediate_schema)?, + col("right", &intermediate_schema)?, + ); + let column_indices = vec![ + ColumnIndex { + index: 9, + side: JoinSide::Left, + }, + ColumnIndex { + index: 9, + side: JoinSide::Right, + }, + ]; + let filter = JoinFilter::new(filter_expr, column_indices, intermediate_schema); + + experiment(left, right, filter, join_type, on, task_ctx).await?; + Ok(()) + } } diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs index 01c87eb5622c..f1b4eb659878 100644 --- a/datafusion/physical-expr/src/intervals/cp_solver.rs +++ b/datafusion/physical-expr/src/intervals/cp_solver.rs @@ -524,10 +524,10 @@ impl ExprIntervalGraph { #[cfg(test)] mod tests { use super::*; - use crate::intervals::test_utils::{gen_conjunctive_float64_expr, gen_conjunctive_numeric_expr}; use itertools::Itertools; use crate::expressions::{BinaryExpr, Column}; + use crate::intervals::test_utils::gen_conjunctive_numerical_expr; use datafusion_common::ScalarValue; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; @@ -536,43 +536,19 @@ mod tests { fn experiment( expr: Arc, exprs_with_interval: (Arc, Arc), - left_interval: (Option, Option), - right_interval: (Option, Option), - left_waited: (Option, Option), - right_waited: (Option, Option), + left_interval: Interval, + right_interval: Interval, + left_waited: Interval, + right_waited: Interval, result: PropagationResult, ) -> Result<()> { let col_stats = vec![ - ( - exprs_with_interval.0.clone(), - Interval { - lower: ScalarValue::Int32(left_interval.0), - upper: ScalarValue::Int32(left_interval.1), - }, - ), - ( - exprs_with_interval.1.clone(), - Interval { - lower: ScalarValue::Int32(right_interval.0), - upper: ScalarValue::Int32(right_interval.1), - }, - ), + (exprs_with_interval.0.clone(), left_interval), + (exprs_with_interval.1.clone(), right_interval), ]; let expected = vec![ - ( - exprs_with_interval.0.clone(), - Interval { - lower: ScalarValue::Int32(left_waited.0), - upper: ScalarValue::Int32(left_waited.1), - }, - ), - ( - exprs_with_interval.1.clone(), - Interval { - lower: ScalarValue::Int32(right_waited.0), - upper: ScalarValue::Int32(right_waited.1), - }, - ), + (exprs_with_interval.0.clone(), left_waited), + (exprs_with_interval.1.clone(), right_waited), ]; let mut graph = ExprIntervalGraph::try_new(expr)?; let expr_indexes = graph @@ -591,195 +567,110 @@ mod tests { let exp_result = graph.update_ranges(&mut col_stat_nodes[..])?; assert_eq!(exp_result, result); - col_stat_nodes - .iter() - .zip(expected_nodes.iter()) - .for_each(|((_, res), (_, expected))| assert_eq!(res, expected)); - Ok(()) - } - - fn generate_case( - expr: Arc, - left_col: Arc, - right_col: Arc, - seed: u64, - expr_left: i32, - expr_right: i32, - ) -> Result<()> { - let mut r = StdRng::seed_from_u64(seed); - - let (left_interval, right_interval, left_waited, right_waited) = if ASC { - let left = (Some(r.gen_range(0..1000)), None); - let right = (Some(r.gen_range(0..1000)), None); - ( - left, - right, - ( - Some(std::cmp::max(left.0.unwrap(), right.0.unwrap() + expr_left)), - None, - ), - ( - Some(std::cmp::max( - right.0.unwrap(), - left.0.unwrap() + expr_right, - )), - None, - ), - ) - } else { - let left = (None, Some(r.gen_range(0..1000))); - let right = (None, Some(r.gen_range(0..1000))); - ( - left, - right, - ( - None, - Some(std::cmp::min(left.1.unwrap(), right.1.unwrap() + expr_left)), - ), - ( - None, - Some(std::cmp::min( - right.1.unwrap(), - left.1.unwrap() + expr_right, - )), - ), - ) - }; - experiment( - expr, - (left_col, right_col), - left_interval, - right_interval, - left_waited, - right_waited, - PropagationResult::Success, - )?; + col_stat_nodes.iter().zip(expected_nodes.iter()).for_each( + |((_, calculated_interval_node), (_, expected))| { + let ( + Interval { + lower: calc_lower, + upper: calc_upper, + }, + Interval { + lower: expected_lower, + upper: expected_upper, + }, + ) = (calculated_interval_node, expected); + assert!(calc_lower <= expected_lower); + assert!(calc_upper >= expected_upper); + }, + ); Ok(()) } - fn experiment_float( - expr: Arc, - exprs_with_interval: (Arc, Arc), - left_interval: (Option, Option), - right_interval: (Option, Option), - left_waited: (Option, Option), - right_waited: (Option, Option), - result: PropagationResult, - ) -> Result<()> { - let col_stats = vec![ - ( - exprs_with_interval.0.clone(), - Interval { - lower: ScalarValue::Float64(left_interval.0), - upper: ScalarValue::Float64(left_interval.1), - }, - ), - ( - exprs_with_interval.1.clone(), - Interval { - lower: ScalarValue::Float64(right_interval.0), - upper: ScalarValue::Float64(right_interval.1), - }, - ), - ]; - let expected = vec![ - ( - exprs_with_interval.0.clone(), - Interval { - lower: ScalarValue::Float64(left_waited.0), - upper: ScalarValue::Float64(left_waited.1), - }, - ), - ( - exprs_with_interval.1.clone(), - Interval { - lower: ScalarValue::Float64(right_waited.0), - upper: ScalarValue::Float64(right_waited.1), - }, - ), - ]; - let mut graph = ExprIntervalGraph::try_new(expr)?; - let expr_indexes = graph - .gather_node_indices(&col_stats.iter().map(|(e, _)| e.clone()).collect_vec()); - - let mut col_stat_nodes = col_stats - .iter() - .zip(expr_indexes.iter()) - .map(|((_, interval), (_, index))| (*index, interval.clone())) - .collect_vec(); - let expected_nodes = expected - .iter() - .zip(expr_indexes.iter()) - .map(|((_, interval), (_, index))| (*index, interval.clone())) - .collect_vec(); - - let exp_result = graph.update_ranges(&mut col_stat_nodes[..])?; - assert_eq!(exp_result, result); - col_stat_nodes - .iter() - .zip(expected_nodes.iter()) - .for_each(|((_, res), (_, expected))| assert_eq!(res, expected)); - Ok(()) - } + macro_rules! generate_cases { + ($func_name:ident, $type:ty, $SCALAR:ident) => { + fn $func_name( + expr: Arc, + left_col: Arc, + right_col: Arc, + seed: u64, + expr_left: $type, + expr_right: $type, + ) -> Result<()> { + let mut r = StdRng::seed_from_u64(seed); - fn generate_float_case( - expr: Arc, - left_col: Arc, - right_col: Arc, - seed: u64, - expr_left: f64, - expr_right: f64, - ) -> Result<()> { - let mut r = StdRng::seed_from_u64(seed); + let (left_interval, right_interval, left_waited, right_waited) = if ASC { + let left = (Some(r.gen_range((0 as $type)..(1000 as $type))), None); + let right = (Some(r.gen_range((0 as $type)..(1000 as $type))), None); + ( + left, + right, + ( + Some(<$type>::max( + left.0.unwrap(), + right.0.unwrap() + expr_left, + )), + None, + ), + ( + Some(<$type>::max( + right.0.unwrap(), + left.0.unwrap() + expr_right, + )), + None, + ), + ) + } else { + let left = (None, Some(r.gen_range((0 as $type)..(1000 as $type)))); + let right = (None, Some(r.gen_range((0 as $type)..(1000 as $type)))); + ( + left, + right, + ( + None, + Some(<$type>::min( + left.1.unwrap(), + right.1.unwrap() + expr_left, + )), + ), + ( + None, + Some(<$type>::min( + right.1.unwrap(), + left.1.unwrap() + expr_right, + )), + ), + ) + }; - let (left_interval, right_interval, left_waited, right_waited) = if ASC { - let left = (Some(r.gen_range(0.0..1000.0)), None); - let right = (Some(r.gen_range(0.0..1000.0)), None); - ( - left, - right, - ( - Some(f64::max(left.0.unwrap(), right.0.unwrap() + expr_left)), - None, - ), - ( - Some(f64::max( - right.0.unwrap(), - left.0.unwrap() + expr_right, - )), - None, - ), - ) - } else { - let left = (None, Some(r.gen_range(0.0..1000.0))); - let right = (None, Some(r.gen_range(0.0..1000.0))); - ( - left, - right, - ( - None, - Some(f64::min(left.1.unwrap(), right.1.unwrap() + expr_left)), - ), - ( - None, - Some(f64::min( - right.1.unwrap(), - left.1.unwrap() + expr_right, - )), - ), - ) + experiment( + expr, + (left_col, right_col), + Interval { + lower: ScalarValue::$SCALAR(left_interval.0), + upper: ScalarValue::$SCALAR(left_interval.1), + }, + Interval { + lower: ScalarValue::$SCALAR(right_interval.0), + upper: ScalarValue::$SCALAR(right_interval.1), + }, + Interval { + lower: ScalarValue::$SCALAR(left_waited.0), + upper: ScalarValue::$SCALAR(left_waited.1), + }, + Interval { + lower: ScalarValue::$SCALAR(right_waited.0), + upper: ScalarValue::$SCALAR(right_waited.1), + }, + PropagationResult::Success, + )?; + Ok(()) + } }; - experiment_float( - expr, - (left_col, right_col), - left_interval, - right_interval, - left_waited, - right_waited, - PropagationResult::Success, - )?; - Ok(()) } + generate_cases!(generate_case_i32, i32, Int32); + generate_cases!(generate_case_i64, i64, Int64); + generate_cases!(generate_case_f32, f32, Float32); + generate_cases!(generate_case_f64, f64, Float64); #[test] fn testing_not_possible() -> Result<()> { @@ -796,250 +687,292 @@ mod tests { experiment( expr, (left_col, right_col), - (Some(10), Some(20)), - (Some(100), None), - (Some(10), Some(20)), - (Some(100), None), + Interval { + lower: ScalarValue::Int32(Some(10)), + upper: ScalarValue::Int32(Some(20)), + }, + Interval { + lower: ScalarValue::Int32(Some(100)), + upper: ScalarValue::Int32(None), + }, + Interval { + lower: ScalarValue::Int32(Some(10)), + upper: ScalarValue::Int32(Some(20)), + }, + Interval { + lower: ScalarValue::Int32(Some(100)), + upper: ScalarValue::Int32(None), + }, PropagationResult::Infeasible, )?; Ok(()) } - #[rstest] - #[test] - fn case_1( - #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, - ) -> Result<()> { - let left_col = Arc::new(Column::new("left_watermark", 0)); - let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark + 1 > right_watermark + 11 AND left_watermark + 3 < right_watermark + 33 - let expr = gen_conjunctive_numeric_expr( - left_col.clone(), - right_col.clone(), - Operator::Plus, - Operator::Plus, - Operator::Plus, - Operator::Plus, - 1, - 11, - 3, - 33, - ); - // l > r + 10 AND r > l - 30 - let l_gt_r = 10; - let r_gt_l = -30; - generate_case::( - expr.clone(), - left_col.clone(), - right_col.clone(), - seed, - l_gt_r, - r_gt_l, - )?; - // Descending tests - // r < l - 10 AND l < r + 30 - let r_lt_l = -l_gt_r; - let l_lt_r = -r_gt_l; - generate_case::(expr, left_col, right_col, seed, l_lt_r, r_lt_l)?; + macro_rules! integer_float_case_1 { + ($test_func:ident, $generate_case_func:ident, $type:ty, $SCALAR:ident) => { + #[rstest] + #[test] + fn $test_func( + #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, + ) -> Result<()> { + let left_col = Arc::new(Column::new("left_watermark", 0)); + let right_col = Arc::new(Column::new("right_watermark", 0)); + // left_watermark - 10 > right_watermark - 5 AND left_watermark - 30 < right_watermark - 3 - Ok(()) + let expr = gen_conjunctive_numerical_expr( + left_col.clone(), + right_col.clone(), + ( + Operator::Minus, + Operator::Minus, + Operator::Minus, + Operator::Plus, + ), + ScalarValue::$SCALAR(Some(1 as $type)), + ScalarValue::$SCALAR(Some(11 as $type)), + ScalarValue::$SCALAR(Some(3 as $type)), + ScalarValue::$SCALAR(Some(33 as $type)), + ); + // l > r + 10 AND r > l - 30 + let l_gt_r = 10 as $type; + let r_gt_l = -30 as $type; + $generate_case_func::( + expr.clone(), + left_col.clone(), + right_col.clone(), + seed, + l_gt_r, + r_gt_l, + )?; + // Descending tests + // r < l - 10 AND l < r + 30 + let r_lt_l = -l_gt_r; + let l_lt_r = -r_gt_l; + $generate_case_func::( + expr, left_col, right_col, seed, l_lt_r, r_lt_l, + )?; + Ok(()) + } + }; } - #[test] - fn case_1_float() -> Result<()> { - let left_col = Arc::new(Column::new("left_watermark", 0)); - let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark + 1 > right_watermark + 11 AND left_watermark + 3 < right_watermark + 33 - let expr = gen_conjunctive_float64_expr( - left_col.clone(), - right_col.clone(), - Operator::Plus, - Operator::Plus, - Operator::Plus, - Operator::Plus, - 1.0, - 11.0, - 3.0, - 33.0, - ); - // l > r + 10 AND r > l - 30 - let l_gt_r = 10.0; - let r_gt_l = -30.0; - generate_float_case::( - expr.clone(), - left_col.clone(), - right_col.clone(), - 1, - l_gt_r, - r_gt_l, - )?; - // Descending tests - // r < l - 10 AND l < r + 30 - let r_lt_l = -l_gt_r; - let l_lt_r = -r_gt_l; - generate_float_case::(expr, left_col, right_col, 1, l_lt_r, r_lt_l)?; + integer_float_case_1!(case_1_i32, generate_case_i32, i32, Int32); + integer_float_case_1!(case_1_i64, generate_case_i64, i64, Int64); + integer_float_case_1!(case_1_f64, generate_case_f64, f64, Float64); + integer_float_case_1!(case_1_f32, generate_case_f32, f32, Float32); - Ok(()) + macro_rules! integer_float_case_2 { + ($test_func:ident, $generate_case_func:ident, $type:ty, $SCALAR:ident) => { + #[rstest] + #[test] + fn $test_func( + #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, + ) -> Result<()> { + let left_col = Arc::new(Column::new("left_watermark", 0)); + let right_col = Arc::new(Column::new("right_watermark", 0)); + // left_watermark - 10 > right_watermark - 5 AND left_watermark - 30 < right_watermark - 3 + + let expr = gen_conjunctive_numerical_expr( + left_col.clone(), + right_col.clone(), + ( + Operator::Minus, + Operator::Minus, + Operator::Minus, + Operator::Plus, + ), + ScalarValue::$SCALAR(Some(1 as $type)), + ScalarValue::$SCALAR(Some(5 as $type)), + ScalarValue::$SCALAR(Some(3 as $type)), + ScalarValue::$SCALAR(Some(10 as $type)), + ); + // l > r + 6 AND r > l - 7 + let l_gt_r = 6 as $type; + let r_gt_l = -7 as $type; + $generate_case_func::( + expr.clone(), + left_col.clone(), + right_col.clone(), + seed, + l_gt_r, + r_gt_l, + )?; + // Descending tests + // r < l - 6 AND l < r + 7 + let r_lt_l = -l_gt_r; + let l_lt_r = -r_gt_l; + $generate_case_func::( + expr, left_col, right_col, seed, l_lt_r, r_lt_l, + )?; + Ok(()) + } + }; } - #[rstest] - #[test] - fn case_2( - #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, - ) -> Result<()> { - let left_col = Arc::new(Column::new("left_watermark", 0)); - let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark - 1 > right_watermark + 5 AND left_watermark + 3 < right_watermark + 10 - let expr = gen_conjunctive_numeric_expr( - left_col.clone(), - right_col.clone(), - Operator::Minus, - Operator::Plus, - Operator::Plus, - Operator::Plus, - 1, - 5, - 3, - 10, - ); - // l > r + 6 AND r > l - 7 - let l_gt_r = 6; - let r_gt_l = -7; - generate_case::( - expr.clone(), - left_col.clone(), - right_col.clone(), - seed, - l_gt_r, - r_gt_l, - )?; - // Descending tests - // r < l - 6 AND l < r + 7 - let r_lt_l = -l_gt_r; - let l_lt_r = -r_gt_l; - generate_case::(expr, left_col, right_col, seed, l_lt_r, r_lt_l)?; + integer_float_case_2!(case_2_i32, generate_case_i32, i32, Int32); + integer_float_case_2!(case_2_i64, generate_case_i64, i64, Int64); + integer_float_case_2!(case_2_f64, generate_case_f64, f64, Float64); + integer_float_case_2!(case_2_f32, generate_case_f32, f32, Float32); - Ok(()) + macro_rules! integer_float_case_3 { + ($test_func:ident, $generate_case_func:ident, $type:ty, $SCALAR:ident) => { + #[rstest] + #[test] + fn $test_func( + #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, + ) -> Result<()> { + let left_col = Arc::new(Column::new("left_watermark", 0)); + let right_col = Arc::new(Column::new("right_watermark", 0)); + // left_watermark - 10 > right_watermark - 5 AND left_watermark - 30 < right_watermark - 3 + + let expr = gen_conjunctive_numerical_expr( + left_col.clone(), + right_col.clone(), + ( + Operator::Minus, + Operator::Minus, + Operator::Minus, + Operator::Plus, + ), + ScalarValue::$SCALAR(Some(10 as $type)), + ScalarValue::$SCALAR(Some(5 as $type)), + ScalarValue::$SCALAR(Some(3 as $type)), + ScalarValue::$SCALAR(Some(10 as $type)), + ); + // l > r + 6 AND r > l - 13 + let l_gt_r = 6 as $type; + let r_gt_l = -13 as $type; + $generate_case_func::( + expr.clone(), + left_col.clone(), + right_col.clone(), + seed, + l_gt_r, + r_gt_l, + )?; + // Descending tests + // r < l - 6 AND l < r + 13 + let r_lt_l = -l_gt_r; + let l_lt_r = -r_gt_l; + $generate_case_func::( + expr, left_col, right_col, seed, l_lt_r, r_lt_l, + )?; + Ok(()) + } + }; } - #[rstest] - #[test] - fn case_3( - #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, - ) -> Result<()> { - let left_col = Arc::new(Column::new("left_watermark", 0)); - let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark - 1 > right_watermark + 5 AND left_watermark - 3 < right_watermark + 10 - let expr = gen_conjunctive_numeric_expr( - left_col.clone(), - right_col.clone(), - Operator::Minus, - Operator::Plus, - Operator::Minus, - Operator::Plus, - 1, - 5, - 3, - 10, - ); - // l > r + 6 AND r > l - 13 - let l_gt_r = 6; - let r_gt_l = -13; - generate_case::( - expr.clone(), - left_col.clone(), - right_col.clone(), - seed, - l_gt_r, - r_gt_l, - )?; - // Descending tests - // r < l - 6 AND l < r + 13 - let r_lt_l = -l_gt_r; - let l_lt_r = -r_gt_l; - generate_case::(expr, left_col, right_col, seed, l_lt_r, r_lt_l)?; + integer_float_case_3!(case_3_i32, generate_case_i32, i32, Int32); + integer_float_case_3!(case_3_i64, generate_case_i64, i64, Int64); + integer_float_case_3!(case_3_f64, generate_case_f64, f64, Float64); + integer_float_case_3!(case_3_f32, generate_case_f32, f32, Float32); - Ok(()) - } - #[rstest] - #[test] - fn case_4( - #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, - ) -> Result<()> { - let left_col = Arc::new(Column::new("left_watermark", 0)); - let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark - 10 > right_watermark - 5 AND left_watermark - 3 < right_watermark + 10 - let expr = gen_conjunctive_numeric_expr( - left_col.clone(), - right_col.clone(), - Operator::Minus, - Operator::Minus, - Operator::Minus, - Operator::Plus, - 10, - 5, - 3, - 10, - ); - // l > r + 5 AND r > l - 13 - let l_gt_r = 5; - let r_gt_l = -13; - generate_case::( - expr.clone(), - left_col.clone(), - right_col.clone(), - seed, - l_gt_r, - r_gt_l, - )?; - // Descending tests - // r < l - 5 AND l < r + 13 - let r_lt_l = -l_gt_r; - let l_lt_r = -r_gt_l; - generate_case::(expr, left_col, right_col, seed, l_lt_r, r_lt_l)?; - Ok(()) + macro_rules! integer_float_case_4 { + ($test_func:ident, $generate_case_func:ident, $type:ty, $SCALAR:ident) => { + #[rstest] + #[test] + fn $test_func( + #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, + ) -> Result<()> { + let left_col = Arc::new(Column::new("left_watermark", 0)); + let right_col = Arc::new(Column::new("right_watermark", 0)); + // left_watermark - 10 > right_watermark - 5 AND left_watermark - 30 < right_watermark - 3 + + let expr = gen_conjunctive_numerical_expr( + left_col.clone(), + right_col.clone(), + ( + Operator::Minus, + Operator::Minus, + Operator::Minus, + Operator::Plus, + ), + ScalarValue::$SCALAR(Some(10 as $type)), + ScalarValue::$SCALAR(Some(5 as $type)), + ScalarValue::$SCALAR(Some(3 as $type)), + ScalarValue::$SCALAR(Some(10 as $type)), + ); + // l > r + 5 AND r > l - 13 + let l_gt_r = 5 as $type; + let r_gt_l = -13 as $type; + $generate_case_func::( + expr.clone(), + left_col.clone(), + right_col.clone(), + seed, + l_gt_r, + r_gt_l, + )?; + // Descending tests + // r < l - 5 AND l < r + 13 + let r_lt_l = -l_gt_r; + let l_lt_r = -r_gt_l; + $generate_case_func::( + expr, left_col, right_col, seed, l_lt_r, r_lt_l, + )?; + Ok(()) + } + }; } - #[rstest] - #[test] - fn case_5( - #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, - ) -> Result<()> { - let left_col = Arc::new(Column::new("left_watermark", 0)); - let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark - 10 > right_watermark - 5 AND left_watermark - 30 < right_watermark - 3 + integer_float_case_4!(case_4_i32, generate_case_i32, i32, Int32); + integer_float_case_4!(case_4_i64, generate_case_i64, i64, Int64); + integer_float_case_4!(case_4_f64, generate_case_f64, f64, Float64); + integer_float_case_4!(case_4_f32, generate_case_f32, f32, Float32); - let expr = gen_conjunctive_numeric_expr( - left_col.clone(), - right_col.clone(), - Operator::Minus, - Operator::Minus, - Operator::Minus, - Operator::Minus, - 10, - 5, - 30, - 3, - ); - // l > r + 5 AND r > l - 27 - let l_gt_r = 5; - let r_gt_l = -27; - generate_case::( - expr.clone(), - left_col.clone(), - right_col.clone(), - seed, - l_gt_r, - r_gt_l, - )?; - // Descending tests - // r < l - 5 AND l < r + 27 - let r_lt_l = -l_gt_r; - let l_lt_r = -r_gt_l; - generate_case::(expr, left_col, right_col, seed, l_lt_r, r_lt_l)?; - Ok(()) + macro_rules! integer_float_case_5 { + ($test_func:ident, $generate_case_func:ident, $type:ty, $SCALAR:ident) => { + #[rstest] + #[test] + fn $test_func( + #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, + ) -> Result<()> { + let left_col = Arc::new(Column::new("left_watermark", 0)); + let right_col = Arc::new(Column::new("right_watermark", 0)); + // left_watermark - 10 > right_watermark - 5 AND left_watermark - 30 < right_watermark - 3 + + let expr = gen_conjunctive_numerical_expr( + left_col.clone(), + right_col.clone(), + ( + Operator::Minus, + Operator::Minus, + Operator::Minus, + Operator::Minus, + ), + ScalarValue::$SCALAR(Some(10 as $type)), + ScalarValue::$SCALAR(Some(5 as $type)), + ScalarValue::$SCALAR(Some(30 as $type)), + ScalarValue::$SCALAR(Some(3 as $type)), + ); + // l > r + 5 AND r > l - 27 + let l_gt_r = 5 as $type; + let r_gt_l = -27 as $type; + $generate_case_func::( + expr.clone(), + left_col.clone(), + right_col.clone(), + seed, + l_gt_r, + r_gt_l, + )?; + // Descending tests + // r < l - 5 AND l < r + 27 + let r_lt_l = -l_gt_r; + let l_lt_r = -r_gt_l; + $generate_case_func::( + expr, left_col, right_col, seed, l_lt_r, r_lt_l, + )?; + Ok(()) + } + }; } + integer_float_case_5!(case_5_i32, generate_case_i32, i32, Int32); + integer_float_case_5!(case_5_i64, generate_case_i64, i64, Int64); + integer_float_case_5!(case_5_f64, generate_case_f64, f64, Float64); + integer_float_case_5!(case_5_f32, generate_case_f32, f32, Float32); + #[test] fn test_gather_node_indices_dont_remove() -> Result<()> { // Expression: a@0 + b@1 + 1 > a@0 - b@1, given a@0 + b@1. diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index 534d5355cc24..d2941e01b3fe 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -568,43 +568,63 @@ mod tests { Ok(()) } - fn create_f32_interval(lower: f32, upper: f32) -> Interval { - Interval { - lower: ScalarValue::Float32(Some(lower)), - upper: ScalarValue::Float32(Some(upper)), - } + macro_rules! create_interval { + ($test_func:ident, $type:ty, $SCALAR:ident) => { + fn $test_func(lower: $type, upper: $type) -> Interval { + Interval { + lower: ScalarValue::$SCALAR(Some(lower)), + upper: ScalarValue::$SCALAR(Some(upper)), + } + } + }; } - fn create_f64_interval(lower: f64, upper: f64) -> Interval { - Interval { - lower: ScalarValue::Float64(Some(lower)), - upper: ScalarValue::Float64(Some(upper)), - } + create_interval!(create_f32_interval, f32, Float32); + create_interval!(create_f64_interval, f64, Float64); + + macro_rules! capture_mode_change { + ($test_func:ident, $interval_create:ident, $type:ty, $SCALAR:ident) => { + fn $test_func(input: ($type, $type), waiting_change: (bool, bool)) { + assert!(waiting_change.0 || waiting_change.1); + let interval1 = $interval_create(input.0, input.1); + let interval2 = $interval_create(input.1, input.0); + let result = interval1.add(&interval2).unwrap(); + match ( + result, + $interval_create(input.0 + input.1, input.0 + input.1), + ) { + ( + Interval { + lower: ScalarValue::$SCALAR(Some(result_lower)), + upper: ScalarValue::$SCALAR(Some(result_upper)), + }, + Interval { + lower: ScalarValue::$SCALAR(Some(without_fe_lower)), + upper: ScalarValue::$SCALAR(Some(without_fe_upper)), + }, + ) => { + if waiting_change.0 { + assert!(result_lower < without_fe_lower); + } + if waiting_change.1 { + assert!(result_upper > without_fe_upper); + } + } + _ => unreachable!(), + } + } + }; } + capture_mode_change!(capture_mode_change_f32, create_f32_interval, f32, Float32); + capture_mode_change!(capture_mode_change_f64, create_f64_interval, f64, Float64); + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] #[test] fn test_add_intervals_lower_affected_f32() { let lower = f32::from_bits(1073741887); let upper = f32::from_bits(1098907651); - let interval1 = create_f32_interval(lower, upper); - let interval2 = create_f32_interval(upper, lower); - let result = interval1.add(&interval2).unwrap(); - match (result, create_f32_interval(lower + upper, lower + upper)) { - ( - Interval { - lower: ScalarValue::Float32(Some(result_lower)), - .. - }, - Interval { - lower: ScalarValue::Float32(Some(without_fe_lower)), - .. - }, - ) => { - assert!(result_lower < without_fe_lower); - } - _ => unreachable!(), - } + capture_mode_change_f32((lower, upper), (true, false)); } #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] @@ -612,24 +632,7 @@ mod tests { fn test_add_intervals_upper_affected_f32() { let lower = f32::from_bits(1072693248); let upper = f32::from_bits(715827883); - let interval1 = create_f32_interval(lower, upper); - let interval2 = create_f32_interval(upper, lower); - let result = interval1.add(&interval2).unwrap(); - match (result, create_f32_interval(lower + upper, lower + upper)) { - ( - Interval { - upper: ScalarValue::Float32(Some(result_upper)), - .. - }, - Interval { - upper: ScalarValue::Float32(Some(without_fe_upper)), - .. - }, - ) => { - assert!(result_upper > without_fe_upper); - } - _ => unreachable!(), - } + capture_mode_change_f32((lower, upper), (false, true)); } #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] @@ -637,49 +640,15 @@ mod tests { fn test_add_intervals_lower_affected_f64() { let lower = 1.0; let upper = 0.3; - let interval1 = create_f64_interval(lower, upper); - let interval2 = create_f64_interval(upper, lower); - let result = interval1.add(&interval2).unwrap(); - match (result, create_f64_interval(lower + upper, lower + upper)) { - ( - Interval { - lower: ScalarValue::Float64(Some(result_lower)), - .. - }, - Interval { - lower: ScalarValue::Float64(Some(without_fe_lower)), - .. - }, - ) => { - assert!(result_lower < without_fe_lower); - } - _ => unreachable!(), - } + capture_mode_change_f64((lower, upper), (true, false)); } #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] #[test] fn test_add_intervals_upper_affected_f64() { let lower = 1.4999999999999998; - let upper = 0.000000000000000022044604925031308; - let interval1 = create_f64_interval(lower, upper); - let interval2 = create_f64_interval(upper, lower); - let result = interval1.add(&interval2).unwrap(); - match (result, create_f64_interval(lower + upper, lower + upper)) { - ( - Interval { - upper: ScalarValue::Float64(Some(result_upper)), - .. - }, - Interval { - upper: ScalarValue::Float64(Some(without_fe_upper)), - .. - }, - ) => { - assert!(result_upper > without_fe_upper); - } - _ => unreachable!(), - } + let upper = 0.000_000_000_000_000_022_044_604_925_031_31; + capture_mode_change_f64((lower, upper), (false, true)); } #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] @@ -687,51 +656,7 @@ mod tests { fn test_next_impl_add_intervals_f64() { let lower = 1.5; let upper = 1.5; - let interval1 = create_f64_interval(lower, upper); - let interval2 = create_f64_interval(upper, lower); - let result = interval1.add(&interval2).unwrap(); - match (result, create_f64_interval(lower + upper, lower + upper)) { - ( - Interval { - lower: ScalarValue::Float64(Some(result_lower)), - upper: ScalarValue::Float64(Some(result_upper)), - }, - Interval { - lower: ScalarValue::Float64(Some(without_fe_lower)), - upper: ScalarValue::Float64(Some(without_fe_upper)), - }, - ) => { - assert!(result_lower < without_fe_lower); - assert!(result_upper > without_fe_upper); - } - _ => unreachable!(), - } - } - - #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] - #[test] - fn test_next_impl_sub_intervals_f64() { - let lower = 2.5; - let upper = 1.5; - let interval1 = create_f64_interval(lower, upper); - let interval2 = create_f64_interval(upper, lower); - let result = interval1.sub(&interval2).unwrap(); - match (result, create_f64_interval(lower - upper, upper - lower)) { - ( - Interval { - lower: ScalarValue::Float64(Some(result_lower)), - upper: ScalarValue::Float64(Some(result_upper)), - }, - Interval { - lower: ScalarValue::Float64(Some(without_fe_lower)), - upper: ScalarValue::Float64(Some(without_fe_upper)), - }, - ) => { - assert!(result_lower < without_fe_lower); - assert!(result_upper > without_fe_upper); - } - _ => unreachable!(), - } + capture_mode_change_f64((lower, upper), (true, true)); } #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] @@ -739,50 +664,6 @@ mod tests { fn test_next_impl_add_intervals_f32() { let lower = 1.5; let upper = 1.5; - let interval1 = create_f32_interval(lower, upper); - let interval2 = create_f32_interval(upper, lower); - let result = interval1.add(&interval2).unwrap(); - match (result, create_f32_interval(lower + upper, lower + upper)) { - ( - Interval { - lower: ScalarValue::Float32(Some(result_lower)), - upper: ScalarValue::Float32(Some(result_upper)), - }, - Interval { - lower: ScalarValue::Float32(Some(without_fe_lower)), - upper: ScalarValue::Float32(Some(without_fe_upper)), - }, - ) => { - assert!(result_lower < without_fe_lower); - assert!(result_upper > without_fe_upper); - } - _ => unreachable!(), - } - } - - #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] - #[test] - fn test_next_impl_sub_intervals_f32() { - let lower = 2.5; - let upper = 1.5; - let interval1 = create_f32_interval(lower, upper); - let interval2 = create_f32_interval(lower, upper); - let result = interval1.sub(&interval2).unwrap(); - match (result, create_f32_interval(lower - upper, upper - lower)) { - ( - Interval { - lower: ScalarValue::Float32(Some(result_lower)), - upper: ScalarValue::Float32(Some(result_upper)), - }, - Interval { - lower: ScalarValue::Float32(Some(without_fe_lower)), - upper: ScalarValue::Float32(Some(without_fe_upper)), - }, - ) => { - assert!(result_lower < without_fe_lower); - assert!(result_upper > without_fe_upper); - } - _ => unreachable!(), - } + capture_mode_change_f32((lower, upper), (true, true)); } } diff --git a/datafusion/physical-expr/src/intervals/test_utils.rs b/datafusion/physical-expr/src/intervals/test_utils.rs index b5bc2abcf1a9..0fe5624638f1 100644 --- a/datafusion/physical-expr/src/intervals/test_utils.rs +++ b/datafusion/physical-expr/src/intervals/test_utils.rs @@ -28,81 +28,30 @@ use datafusion_expr::Operator; /// This test function generates a conjunctive statement with two numeric /// terms with the following form: /// left_col (op_1) a > right_col (op_2) b AND left_col (op_3) c < right_col (op_4) d -pub fn gen_conjunctive_numeric_expr( +pub fn gen_conjunctive_numerical_expr( left_col: Arc, right_col: Arc, - op_1: Operator, - op_2: Operator, - op_3: Operator, - op_4: Operator, - a: i32, - b: i32, - c: i32, - d: i32, + op: (Operator, Operator, Operator, Operator), + a: ScalarValue, + b: ScalarValue, + c: ScalarValue, + d: ScalarValue, ) -> Arc { + let (op_1, op_2, op_3, op_4) = op; let left_and_1 = Arc::new(BinaryExpr::new( left_col.clone(), op_1, - Arc::new(Literal::new(ScalarValue::Int32(Some(a)))), + Arc::new(Literal::new(a)), )); let left_and_2 = Arc::new(BinaryExpr::new( right_col.clone(), op_2, - Arc::new(Literal::new(ScalarValue::Int32(Some(b)))), - )); - - let right_and_1 = Arc::new(BinaryExpr::new( - left_col, - op_3, - Arc::new(Literal::new(ScalarValue::Int32(Some(c)))), - )); - let right_and_2 = Arc::new(BinaryExpr::new( - right_col, - op_4, - Arc::new(Literal::new(ScalarValue::Int32(Some(d)))), - )); - let left_expr = Arc::new(BinaryExpr::new(left_and_1, Operator::Gt, left_and_2)); - let right_expr = Arc::new(BinaryExpr::new(right_and_1, Operator::Lt, right_and_2)); - Arc::new(BinaryExpr::new(left_expr, Operator::And, right_expr)) -} - -#[allow(clippy::too_many_arguments)] -/// This test function generates a conjunctive statement with two numeric -/// terms with the following form: -/// left_col (op_1) a > right_col (op_2) b AND left_col (op_3) c < right_col (op_4) d -pub fn gen_conjunctive_float64_expr( - left_col: Arc, - right_col: Arc, - op_1: Operator, - op_2: Operator, - op_3: Operator, - op_4: Operator, - a: f64, - b: f64, - c: f64, - d: f64, -) -> Arc { - let left_and_1 = Arc::new(BinaryExpr::new( - left_col.clone(), - op_1, - Arc::new(Literal::new(ScalarValue::Float64(Some(a)))), - )); - let left_and_2 = Arc::new(BinaryExpr::new( - right_col.clone(), - op_2, - Arc::new(Literal::new(ScalarValue::Float64(Some(b)))), - )); - - let right_and_1 = Arc::new(BinaryExpr::new( - left_col, - op_3, - Arc::new(Literal::new(ScalarValue::Float64(Some(c)))), - )); - let right_and_2 = Arc::new(BinaryExpr::new( - right_col, - op_4, - Arc::new(Literal::new(ScalarValue::Float64(Some(d)))), + Arc::new(Literal::new(b)), )); + let right_and_1 = + Arc::new(BinaryExpr::new(left_col, op_3, Arc::new(Literal::new(c)))); + let right_and_2 = + Arc::new(BinaryExpr::new(right_col, op_4, Arc::new(Literal::new(d)))); let left_expr = Arc::new(BinaryExpr::new(left_and_1, Operator::Gt, left_and_2)); let right_expr = Arc::new(BinaryExpr::new(right_and_1, Operator::Lt, right_and_2)); Arc::new(BinaryExpr::new(left_expr, Operator::And, right_expr)) From f0fe44ac81501479c935516b7975f378f4eb8524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Metehan=20Y=C4=B1ld=C4=B1r=C4=B1m?= <44446768+metegenez@users.noreply.github.com> Date: Wed, 12 Apr 2023 10:52:54 +0300 Subject: [PATCH 03/15] Windows platform support --- .../src/intervals/interval_aritmetic.rs | 30 ++++++++++++---- datafusion/physical-expr/src/intervals/mod.rs | 34 ++++++++++++------- 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index d2941e01b3fe..7e64aca3755c 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -619,7 +619,10 @@ mod tests { capture_mode_change!(capture_mode_change_f32, create_f32_interval, f32, Float32); capture_mode_change!(capture_mode_change_f64, create_f64_interval, f64, Float64); - #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + #[cfg(all( + any(target_arch = "x86", target_arch = "aarch64"), + not(target_os = "windows") + ))] #[test] fn test_add_intervals_lower_affected_f32() { let lower = f32::from_bits(1073741887); @@ -627,7 +630,10 @@ mod tests { capture_mode_change_f32((lower, upper), (true, false)); } - #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + #[cfg(all( + any(target_arch = "x86", target_arch = "aarch64"), + not(target_os = "windows") + ))] #[test] fn test_add_intervals_upper_affected_f32() { let lower = f32::from_bits(1072693248); @@ -635,7 +641,10 @@ mod tests { capture_mode_change_f32((lower, upper), (false, true)); } - #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + #[cfg(all( + any(target_arch = "x86", target_arch = "aarch64"), + not(target_os = "windows") + ))] #[test] fn test_add_intervals_lower_affected_f64() { let lower = 1.0; @@ -643,7 +652,10 @@ mod tests { capture_mode_change_f64((lower, upper), (true, false)); } - #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + #[cfg(all( + any(target_arch = "x86", target_arch = "aarch64"), + not(target_os = "windows") + ))] #[test] fn test_add_intervals_upper_affected_f64() { let lower = 1.4999999999999998; @@ -651,7 +663,10 @@ mod tests { capture_mode_change_f64((lower, upper), (false, true)); } - #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + #[cfg(any( + not(any(target_arch = "x86", target_arch = "aarch64")), + target_os = "windows" + ))] #[test] fn test_next_impl_add_intervals_f64() { let lower = 1.5; @@ -659,7 +674,10 @@ mod tests { capture_mode_change_f64((lower, upper), (true, true)); } - #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + #[cfg(any( + not(any(target_arch = "x86", target_arch = "aarch64")), + target_os = "windows" + ))] #[test] fn test_next_impl_add_intervals_f32() { let lower = 1.5; diff --git a/datafusion/physical-expr/src/intervals/mod.rs b/datafusion/physical-expr/src/intervals/mod.rs index 0705ceabc9c0..ebe75617318b 100644 --- a/datafusion/physical-expr/src/intervals/mod.rs +++ b/datafusion/physical-expr/src/intervals/mod.rs @@ -37,19 +37,21 @@ const FE_UPWARD: i32 = 0x00400000; const FE_DOWNWARD: i32 = 0x00800000; // Define constants for x86_64 -#[cfg(all(target_arch = "x86_64"))] -const FE_UPWARD: i32 = 0x0800; -#[cfg(all(target_arch = "x86_64"))] -const FE_DOWNWARD: i32 = 0x0400; - -// Define a default constant for other architectures -#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] -const FE_UPWARD: i32 = 0x00000000; -#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] -const FE_DOWNWARD: i32 = 0x00000000; - +#[cfg(all(target_arch = "x86_64", not(target_os = "windows")))] +const FE_UPWARD: libc::c_int = 0x0800; +#[cfg(all(target_arch = "x86_64", not(target_os = "windows")))] +const FE_DOWNWARD: libc::c_int = 0x0400; + +#[cfg(all( + any(target_arch = "x86", target_arch = "aarch64"), + not(target_os = "windows") +))] extern crate libc; +#[cfg(all( + any(target_arch = "x86", target_arch = "aarch64"), + not(target_os = "windows") +))] extern "C" { fn fesetround(rount: i32); fn fegetround() -> i32; @@ -228,7 +230,10 @@ pub fn alter_round_mode_for_float_operation( rhs: &ScalarValue, cls: Box, ) -> Result { - #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + #[cfg(all( + any(target_arch = "x86", target_arch = "aarch64"), + not(target_os = "windows") + ))] unsafe { let current = fegetround(); fesetround(if UPPER { FE_UPWARD } else { FE_DOWNWARD }); @@ -236,7 +241,10 @@ pub fn alter_round_mode_for_float_operation( fesetround(current); res } - #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + #[cfg(any( + not(any(target_arch = "x86", target_arch = "aarch64")), + target_os = "windows" + ))] match cls(lhs, rhs) { Ok(ScalarValue::Float64(Some(val))) => Ok(ScalarValue::Float64(Some(if UPPER { next_up(val) From a69bac786691f25dcf4d142d397e345a255f00a8 Mon Sep 17 00:00:00 2001 From: metesynnada <100111937+metesynnada@users.noreply.github.com> Date: Wed, 12 Apr 2023 11:31:37 +0300 Subject: [PATCH 04/15] Correct the platform --- .../src/intervals/interval_aritmetic.rs | 12 ++++++------ datafusion/physical-expr/src/intervals/mod.rs | 16 ++++++++-------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index 7e64aca3755c..f9cf862b66d3 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -620,7 +620,7 @@ mod tests { capture_mode_change!(capture_mode_change_f64, create_f64_interval, f64, Float64); #[cfg(all( - any(target_arch = "x86", target_arch = "aarch64"), + any(target_arch = "x86_64", target_arch = "aarch64"), not(target_os = "windows") ))] #[test] @@ -631,7 +631,7 @@ mod tests { } #[cfg(all( - any(target_arch = "x86", target_arch = "aarch64"), + any(target_arch = "x86_64", target_arch = "aarch64"), not(target_os = "windows") ))] #[test] @@ -642,7 +642,7 @@ mod tests { } #[cfg(all( - any(target_arch = "x86", target_arch = "aarch64"), + any(target_arch = "x86_64", target_arch = "aarch64"), not(target_os = "windows") ))] #[test] @@ -653,7 +653,7 @@ mod tests { } #[cfg(all( - any(target_arch = "x86", target_arch = "aarch64"), + any(target_arch = "x86_64", target_arch = "aarch64"), not(target_os = "windows") ))] #[test] @@ -664,7 +664,7 @@ mod tests { } #[cfg(any( - not(any(target_arch = "x86", target_arch = "aarch64")), + not(any(target_arch = "x86_64", target_arch = "aarch64")), target_os = "windows" ))] #[test] @@ -675,7 +675,7 @@ mod tests { } #[cfg(any( - not(any(target_arch = "x86", target_arch = "aarch64")), + not(any(target_arch = "x86_64", target_arch = "aarch64")), target_os = "windows" ))] #[test] diff --git a/datafusion/physical-expr/src/intervals/mod.rs b/datafusion/physical-expr/src/intervals/mod.rs index ebe75617318b..cdeaefcacec5 100644 --- a/datafusion/physical-expr/src/intervals/mod.rs +++ b/datafusion/physical-expr/src/intervals/mod.rs @@ -31,25 +31,25 @@ use std::ops::{Add, BitAnd, Sub}; type ScalarValueOperation = dyn FnOnce(&ScalarValue, &ScalarValue) -> Result; // Define constants for ARM -#[cfg(all(target_arch = "aarch64"))] +#[cfg(all(target_arch = "aarch64", not(target_os = "windows")))] const FE_UPWARD: i32 = 0x00400000; -#[cfg(all(target_arch = "aarch64"))] +#[cfg(all(target_arch = "aarch64", not(target_os = "windows")))] const FE_DOWNWARD: i32 = 0x00800000; // Define constants for x86_64 #[cfg(all(target_arch = "x86_64", not(target_os = "windows")))] -const FE_UPWARD: libc::c_int = 0x0800; +const FE_UPWARD: i32 = 0x0800; #[cfg(all(target_arch = "x86_64", not(target_os = "windows")))] -const FE_DOWNWARD: libc::c_int = 0x0400; +const FE_DOWNWARD: i32 = 0x0400; #[cfg(all( - any(target_arch = "x86", target_arch = "aarch64"), + any(target_arch = "x86_64", target_arch = "aarch64"), not(target_os = "windows") ))] extern crate libc; #[cfg(all( - any(target_arch = "x86", target_arch = "aarch64"), + any(target_arch = "x86_64", target_arch = "aarch64"), not(target_os = "windows") ))] extern "C" { @@ -231,7 +231,7 @@ pub fn alter_round_mode_for_float_operation( cls: Box, ) -> Result { #[cfg(all( - any(target_arch = "x86", target_arch = "aarch64"), + any(target_arch = "x86_64", target_arch = "aarch64"), not(target_os = "windows") ))] unsafe { @@ -242,7 +242,7 @@ pub fn alter_round_mode_for_float_operation( res } #[cfg(any( - not(any(target_arch = "x86", target_arch = "aarch64")), + not(any(target_arch = "x86_64", target_arch = "aarch64")), target_os = "windows" ))] match cls(lhs, rhs) { From 1c51c517375e8f2ae8531a4dd39fdcee89314a5f Mon Sep 17 00:00:00 2001 From: metesynnada <100111937+metesynnada@users.noreply.github.com> Date: Wed, 12 Apr 2023 12:31:57 +0300 Subject: [PATCH 05/15] Update Cargo.lock --- datafusion-cli/Cargo.lock | 177 ++++++++++++++++++++------------------ 1 file changed, 93 insertions(+), 84 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 2458b9193a81..d274c94d0979 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -301,7 +301,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.11", + "syn 2.0.14", ] [[package]] @@ -455,9 +455,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa48fa079165080f11d7753fd0bc175b7d391f276b965fe4b55bfad67856e463" +checksum = "cf9cc2b23599e6d7479755f3594285efb3f74a1bdca7a7374948bc831e23a552" dependencies = [ "chrono", "chrono-tz-build", @@ -576,9 +576,9 @@ checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b" [[package]] name = "core-foundation-sys" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" @@ -659,7 +659,7 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.11", + "syn 2.0.14", ] [[package]] @@ -676,7 +676,7 @@ checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.11", + "syn 2.0.14", ] [[package]] @@ -956,13 +956,13 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" dependencies = [ "errno-dragonfly", "libc", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -996,13 +996,13 @@ dependencies = [ [[package]] name = "fd-lock" -version = "3.0.11" +version = "3.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9799aefb4a2e4a01cc47610b1dd47c18ab13d991f27bbcaed9296f5a53d5cbad" +checksum = "39ae6b3d9530211fb3b12a95374b8b0823be812f53d09e18c5675c0146b09642" dependencies = [ "cfg-if", "rustix", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -1102,7 +1102,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.11", + "syn 2.0.14", ] [[package]] @@ -1147,9 +1147,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", "libc", @@ -1315,9 +1315,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.55" +version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "716f12fbcfac6ffab0a5e9ec51d0a0ff70503742bb2dc7b99396394c9dc323f0" +checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1374,13 +1374,13 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "io-lifetimes" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" +checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" dependencies = [ "hermit-abi 0.3.1", "libc", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -1494,9 +1494,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.140" +version = "0.2.141" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" +checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" [[package]] name = "libm" @@ -1506,9 +1506,9 @@ checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" [[package]] name = "libmimalloc-sys" -version = "0.1.30" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8c7cbf8b89019683667e347572e6d55a7df7ea36b0c4ce69961b0cde67b174" +checksum = "43a558e3d911bc3c7bfc8c78bc580b404d6e51c1cefbf656e176a94b49b0df40" dependencies = [ "cc", "libc", @@ -1525,9 +1525,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd550e73688e6d578f0ac2119e32b797a327631a42f9433e59d02e139c8df60d" +checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" [[package]] name = "lock_api" @@ -1596,9 +1596,9 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "mimalloc" -version = "0.1.34" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dcb174b18635f7561a0c6c9fc2ce57218ac7523cf72c50af80e2d79ab8f3ba1" +checksum = "3d88dad3f985ec267a3fcb7a1726f5cb1a7e8cad8b646e70a84f967210df23da" dependencies = [ "libmimalloc-sys", ] @@ -1627,7 +1627,7 @@ dependencies = [ "libc", "log", "wasi", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -1740,9 +1740,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1ea8f683b4f89a64181393742c041520a1a87e9775e6b4c0dd5a3281af05fc6" +checksum = "ec9cd6ca25e796a49fa242876d1c4de36a24a6da5258e9f0bc062dbf5e81c53b" dependencies = [ "async-trait", "base64", @@ -1807,7 +1807,7 @@ dependencies = [ "libc", "redox_syscall 0.2.16", "smallvec", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -1968,18 +1968,18 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.54" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" dependencies = [ "unicode-ident", ] [[package]] name = "quick-xml" -version = "0.27.1" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffc053f057dd768a56f62cd7e434c42c831d296968997e9ac1f76ea7c2d14c41" +checksum = "e5c1a97b1bc42b1d550bfb48d4262153fe400a12bab1511821736f7eac76d7e2" dependencies = [ "memchr", "serde", @@ -2147,16 +2147,16 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.5" +version = "0.37.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e78cc525325c06b4a7ff02db283472f3c042b7ff0c391f96c6d5ac6f4f91b75" +checksum = "85597d61f83914ddeba6a47b3b8ffe7365107221c2e557ed94426489fefb5f77" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -2260,22 +2260,22 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.159" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065" +checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.159" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" +checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" dependencies = [ "proc-macro2", "quote", - "syn 2.0.11", + "syn 2.0.14", ] [[package]] @@ -2454,9 +2454,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.11" +version = "2.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e3787bb71465627110e7d87ed4faaa36c1f61042ee67badb9e2ef173accc40" +checksum = "fcf316d5356ed6847742d036f8a39c3b8435cac10bd528a4bd461928a6ab34d5" dependencies = [ "proc-macro2", "quote", @@ -2473,7 +2473,7 @@ dependencies = [ "fastrand", "redox_syscall 0.3.5", "rustix", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -2508,7 +2508,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.11", + "syn 2.0.14", ] [[package]] @@ -2561,7 +2561,7 @@ dependencies = [ "pin-project-lite", "socket2", "tokio-macros", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -2572,7 +2572,7 @@ checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" dependencies = [ "proc-macro2", "quote", - "syn 2.0.11", + "syn 2.0.14", ] [[package]] @@ -2729,9 +2729,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79" +checksum = "5b55a3fef2a1e3b3a00ce878640918820d3c51081576ac657d23af9fc7928fdb" dependencies = [ "getrandom", ] @@ -2909,11 +2909,11 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows" -version = "0.47.0" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2649ff315bee4c98757f15dac226efe3d81927adbb6e882084bb1ee3e0c330a7" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" dependencies = [ - "windows-targets 0.47.0", + "windows-targets 0.48.0", ] [[package]] @@ -2925,6 +2925,15 @@ dependencies = [ "windows-targets 0.42.2", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.0", +] + [[package]] name = "windows-targets" version = "0.42.2" @@ -2942,17 +2951,17 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.47.0" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f8996d3f43b4b2d44327cd71b7b0efd1284ab60e6e9d0e8b630e18555d87d3e" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" dependencies = [ - "windows_aarch64_gnullvm 0.47.0", - "windows_aarch64_msvc 0.47.0", - "windows_i686_gnu 0.47.0", - "windows_i686_msvc 0.47.0", - "windows_x86_64_gnu 0.47.0", - "windows_x86_64_gnullvm 0.47.0", - "windows_x86_64_msvc 0.47.0", + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", ] [[package]] @@ -2963,9 +2972,9 @@ checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.47.0" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "831d567d53d4f3cb1db332b68e6e2b6260228eb4d99a777d8b2e8ed794027c90" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" [[package]] name = "windows_aarch64_msvc" @@ -2975,9 +2984,9 @@ checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" [[package]] name = "windows_aarch64_msvc" -version = "0.47.0" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a42d54a417c60ce4f0e31661eed628f0fa5aca73448c093ec4d45fab4c51cdf" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" [[package]] name = "windows_i686_gnu" @@ -2987,9 +2996,9 @@ checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" [[package]] name = "windows_i686_gnu" -version = "0.47.0" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1925beafdbb22201a53a483db861a5644123157c1c3cee83323a2ed565d71e3" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" [[package]] name = "windows_i686_msvc" @@ -2999,9 +3008,9 @@ checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" [[package]] name = "windows_i686_msvc" -version = "0.47.0" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a8ef8f2f1711b223947d9b69b596cf5a4e452c930fb58b6fc3fdae7d0ec6b31" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" [[package]] name = "windows_x86_64_gnu" @@ -3011,9 +3020,9 @@ checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" [[package]] name = "windows_x86_64_gnu" -version = "0.47.0" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7acaa0c2cf0d2ef99b61c308a0c3dbae430a51b7345dedec470bd8f53f5a3642" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" [[package]] name = "windows_x86_64_gnullvm" @@ -3023,9 +3032,9 @@ checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" [[package]] name = "windows_x86_64_gnullvm" -version = "0.47.0" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a0628f71be1d11e17ca4a0e9e15b3a5180f6fbf1c2d55e3ba3f850378052c1" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" [[package]] name = "windows_x86_64_msvc" @@ -3035,9 +3044,9 @@ checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" [[package]] name = "windows_x86_64_msvc" -version = "0.47.0" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d6e62c256dc6d40b8c8707df17df8d774e60e39db723675241e7c15e910bce7" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winreg" @@ -3072,7 +3081,7 @@ version = "0.12.3+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" dependencies = [ - "zstd-safe 6.0.4+zstd.1.5.4", + "zstd-safe 6.0.5+zstd.1.5.4", ] [[package]] @@ -3087,9 +3096,9 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "6.0.4+zstd.1.5.4" +version = "6.0.5+zstd.1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7afb4b54b8910cf5447638cb54bf4e8a65cbedd783af98b98c62ffe91f185543" +checksum = "d56d9e60b4b1758206c238a10165fbcae3ca37b01744e394c463463f6529d23b" dependencies = [ "libc", "zstd-sys", @@ -3097,9 +3106,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.7+zstd.1.5.4" +version = "2.0.8+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94509c3ba2fe55294d752b79842c530ccfab760192521df74a081a78d2b3c7f5" +checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" dependencies = [ "cc", "libc", From 61aea854951b061ff91f61544304feee7bdb6c3b Mon Sep 17 00:00:00 2001 From: metesynnada <100111937+metesynnada@users.noreply.github.com> Date: Wed, 12 Apr 2023 12:41:28 +0300 Subject: [PATCH 06/15] Resolving merge problems --- .../src/physical_optimizer/pipeline_checker.rs | 3 ++- .../core/src/physical_optimizer/pipeline_fixer.rs | 2 +- .../src/physical_plan/joins/symmetric_hash_join.rs | 14 +++++++++----- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/datafusion/core/src/physical_optimizer/pipeline_checker.rs b/datafusion/core/src/physical_optimizer/pipeline_checker.rs index 1dbc59a63aa1..d10aa5a67dbb 100644 --- a/datafusion/core/src/physical_optimizer/pipeline_checker.rs +++ b/datafusion/core/src/physical_optimizer/pipeline_checker.rs @@ -27,7 +27,8 @@ use crate::physical_plan::{with_new_children_if_necessary, ExecutionPlan}; use datafusion_common::config::OptimizerOptions; use datafusion_common::tree_node::{Transformed, TreeNode, VisitRecursion}; use datafusion_common::DataFusionError; -use datafusion_physical_expr::intervals::{check_support, is_datatype_supported}; +use datafusion_physical_expr::intervals::cp_solver::check_support; +use datafusion_physical_expr::intervals::is_datatype_supported; use std::sync::Arc; /// The PipelineChecker rule rejects non-runnable query plans that use diff --git a/datafusion/core/src/physical_optimizer/pipeline_fixer.rs b/datafusion/core/src/physical_optimizer/pipeline_fixer.rs index 77ffb9fb41d6..ef46d48b15e7 100644 --- a/datafusion/core/src/physical_optimizer/pipeline_fixer.rs +++ b/datafusion/core/src/physical_optimizer/pipeline_fixer.rs @@ -245,7 +245,7 @@ fn apply_subrules_and_check_finiteness_requirements( mod util_tests { use datafusion_expr::Operator; use datafusion_physical_expr::expressions::{BinaryExpr, Column, NegativeExpr}; - use datafusion_physical_expr::intervals::check_support; + use datafusion_physical_expr::intervals::cp_solver::check_support; use datafusion_physical_expr::PhysicalExpr; use std::sync::Arc; diff --git a/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs b/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs index 8be855016653..81532c88dc9a 100644 --- a/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs +++ b/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs @@ -2070,7 +2070,6 @@ mod tests { experiment(left, right, Some(filter), join_type, on, task_ctx).await?; Ok(()) } - #[rstest] #[tokio::test(flavor = "multi_thread")] @@ -2112,7 +2111,7 @@ mod tests { Field::new("left", DataType::Int32, true), Field::new("right", DataType::Int32, true), ]); - let filter_expr = join_expr_tests_fixture( + let filter_expr = join_expr_tests_fixture_i32( case_expr, col("left", &intermediate_schema)?, col("right", &intermediate_schema)?, @@ -2822,8 +2821,13 @@ mod tests { expr: col("r_float", right_schema)?, options: SortOptions::default(), }]; - let (left, right) = - create_memory_table(left_batch, right_batch, left_sorted, right_sorted, 13)?; + let (left, right) = create_memory_table( + left_batch, + right_batch, + Some(left_sorted), + Some(right_sorted), + 13, + )?; let on = vec![( Column::new_with_schema("lc1", left_schema)?, @@ -2851,7 +2855,7 @@ mod tests { ]; let filter = JoinFilter::new(filter_expr, column_indices, intermediate_schema); - experiment(left, right, filter, join_type, on, task_ctx).await?; + experiment(left, right, Some(filter), join_type, on, task_ctx).await?; Ok(()) } } From fbd54d37546562dfd9995bfe2945f4c9bc50610d Mon Sep 17 00:00:00 2001 From: metesynnada <100111937+metesynnada@users.noreply.github.com> Date: Thu, 13 Apr 2023 11:41:55 +0300 Subject: [PATCH 07/15] Code refactor --- .../joins/symmetric_hash_join.rs | 19 ++- .../physical-expr/src/intervals/cp_solver.rs | 57 ++++--- .../src/intervals/interval_aritmetic.rs | 139 ++++++------------ datafusion/physical-expr/src/intervals/mod.rs | 58 +++++--- 4 files changed, 125 insertions(+), 148 deletions(-) diff --git a/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs b/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs index 81532c88dc9a..85aeadb5cb0f 100644 --- a/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs +++ b/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs @@ -1657,6 +1657,7 @@ mod tests { Ok(result) } + // It creates join filters for different type of fields for testing. macro_rules! join_expr_tests { ($func_name:ident, $type:ty, $SCALAR:ident) => { fn $func_name( @@ -1798,12 +1799,18 @@ mod tests { let cardinality = Arc::new(Int32Array::from_iter( initial_range.clone().map(|x| x % 4).collect::>(), )); - let cardinality_key = Arc::new(Int32Array::from_iter( + let cardinality_key_left = Arc::new(Int32Array::from_iter( initial_range .clone() .map(|x| x % key_cardinality.0) .collect::>(), )); + let cardinality_key_right = Arc::new(Int32Array::from_iter( + initial_range + .clone() + .map(|x| x % key_cardinality.1) + .collect::>(), + )); let ordered_asc_null_first = Arc::new(Int32Array::from_iter({ std::iter::repeat(None) .take(index as usize) @@ -1839,7 +1846,7 @@ mod tests { let left = RecordBatch::try_from_iter(vec![ ("la1", ordered.clone()), ("lb1", cardinality.clone()), - ("lc1", cardinality_key.clone()), + ("lc1", cardinality_key_left), ("lt1", time.clone()), ("la2", ordered.clone()), ("la1_des", ordered_des.clone()), @@ -1851,7 +1858,7 @@ mod tests { let right = RecordBatch::try_from_iter(vec![ ("ra1", ordered.clone()), ("rb1", cardinality), - ("rc1", cardinality_key), + ("rc1", cardinality_key_right), ("rt1", time), ("ra2", ordered), ("ra1_des", ordered_des), @@ -2799,8 +2806,6 @@ mod tests { join_type: JoinType, #[values( (4, 5), - (11, 21), - (31, 71), (99, 12), )] cardinality: (i32, i32), @@ -2845,11 +2850,11 @@ mod tests { ); let column_indices = vec![ ColumnIndex { - index: 9, + index: 9, // l_float side: JoinSide::Left, }, ColumnIndex { - index: 9, + index: 9, // r_float side: JoinSide::Right, }, ]; diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs index f3deaf7ef52c..2d93721fff53 100644 --- a/datafusion/physical-expr/src/intervals/cp_solver.rs +++ b/datafusion/physical-expr/src/intervals/cp_solver.rs @@ -556,8 +556,8 @@ mod tests { exprs_with_interval: (Arc, Arc), left_interval: Interval, right_interval: Interval, - left_waited: Interval, - right_waited: Interval, + left_expected: Interval, + right_expected: Interval, result: PropagationResult, ) -> Result<()> { let col_stats = vec![ @@ -565,8 +565,8 @@ mod tests { (exprs_with_interval.1.clone(), right_interval), ]; let expected = vec![ - (exprs_with_interval.0.clone(), left_waited), - (exprs_with_interval.1.clone(), right_waited), + (exprs_with_interval.0.clone(), left_expected), + (exprs_with_interval.1.clone(), right_expected), ]; let mut graph = ExprIntervalGraph::try_new(expr)?; let expr_indexes = graph @@ -587,18 +587,8 @@ mod tests { assert_eq!(exp_result, result); col_stat_nodes.iter().zip(expected_nodes.iter()).for_each( |((_, calculated_interval_node), (_, expected))| { - let ( - Interval { - lower: calc_lower, - upper: calc_upper, - }, - Interval { - lower: expected_lower, - upper: expected_upper, - }, - ) = (calculated_interval_node, expected); - assert!(calc_lower <= expected_lower); - assert!(calc_upper >= expected_upper); + assert!(calculated_interval_node.lower <= expected.lower); + assert!(calculated_interval_node.upper >= expected.upper); }, ); Ok(()) @@ -731,19 +721,20 @@ mod tests { #[rstest] #[test] fn $test_func( - #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, + #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] + seed: u64, ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark - 10 > right_watermark - 5 AND left_watermark - 30 < right_watermark - 3 + // left_watermark + 1 > right_watermark + 11 AND left_watermark + 3 < right_watermark + 33 let expr = gen_conjunctive_numerical_expr( left_col.clone(), right_col.clone(), ( - Operator::Minus, - Operator::Minus, - Operator::Minus, + Operator::Plus, + Operator::Plus, + Operator::Plus, Operator::Plus, ), ScalarValue::$SCALAR(Some(1 as $type)), @@ -784,19 +775,20 @@ mod tests { #[rstest] #[test] fn $test_func( - #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, + #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] + seed: u64, ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark - 10 > right_watermark - 5 AND left_watermark - 30 < right_watermark - 3 + // left_watermark - 1 > right_watermark + 5 AND left_watermark + 3 < right_watermark + 10 let expr = gen_conjunctive_numerical_expr( left_col.clone(), right_col.clone(), ( Operator::Minus, - Operator::Minus, - Operator::Minus, + Operator::Plus, + Operator::Plus, Operator::Plus, ), ScalarValue::$SCALAR(Some(1 as $type)), @@ -837,22 +829,23 @@ mod tests { #[rstest] #[test] fn $test_func( - #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, + #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] + seed: u64, ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark - 10 > right_watermark - 5 AND left_watermark - 30 < right_watermark - 3 + // left_watermark - 1 > right_watermark + 5 AND left_watermark - 3 < right_watermark + 10 let expr = gen_conjunctive_numerical_expr( left_col.clone(), right_col.clone(), ( Operator::Minus, - Operator::Minus, + Operator::Plus, Operator::Minus, Operator::Plus, ), - ScalarValue::$SCALAR(Some(10 as $type)), + ScalarValue::$SCALAR(Some(1 as $type)), ScalarValue::$SCALAR(Some(5 as $type)), ScalarValue::$SCALAR(Some(3 as $type)), ScalarValue::$SCALAR(Some(10 as $type)), @@ -890,7 +883,8 @@ mod tests { #[rstest] #[test] fn $test_func( - #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, + #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] + seed: u64, ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); @@ -943,7 +937,8 @@ mod tests { #[rstest] #[test] fn $test_func( - #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 123, 4123)] seed: u64, + #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] + seed: u64, ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index f9cf862b66d3..f5c7dfd5e228 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -199,35 +199,24 @@ impl Interval { /// Note that this represents all possible values the sum can take if /// one can choose single values arbitrarily from each of the operands. pub fn add>(&self, other: T) -> Result { - let rhs = other.borrow(); - let lower = if self.lower.is_null() || rhs.lower.is_null() { - ScalarValue::try_from(self.lower.get_datatype()) - } else { - match self.get_datatype() { - DataType::Float64 | DataType::Float32 => { - alter_round_mode_for_float_operation::( - &self.lower, - &rhs.lower, - Box::new(|lhs, rhs| lhs.add(rhs)), - ) - } - _ => self.lower.add(&rhs.lower), - } - }?; - let upper = if self.upper.is_null() || rhs.upper.is_null() { - ScalarValue::try_from(self.upper.get_datatype()) - } else { - match self.get_datatype() { - DataType::Float64 | DataType::Float32 => { - alter_round_mode_for_float_operation::( - &self.upper, - &rhs.upper, - Box::new(|lhs, rhs| lhs.add(rhs)), - ) - } - _ => self.upper.add(&rhs.upper), + fn handle_scalar_add( + lhs: &ScalarValue, + rhs: &ScalarValue, + ) -> Result { + if lhs.is_null() || rhs.is_null() { + ScalarValue::try_from(lhs.get_datatype()) + } else if matches!(lhs.get_datatype(), DataType::Float64 | DataType::Float32) + { + alter_round_mode_for_float_operation::(lhs, rhs, |lhs, rhs| { + lhs.add(rhs) + }) + } else { + lhs.add(rhs) } - }?; + } + let rhs = other.borrow(); + let lower = handle_scalar_add::(&self.lower, &rhs.lower)?; + let upper = handle_scalar_add::(&self.upper, &rhs.upper)?; Ok(Interval { lower, upper }) } @@ -236,35 +225,25 @@ impl Interval { /// Note that this represents all possible values the difference can take /// if one can choose single values arbitrarily from each of the operands. pub fn sub>(&self, other: T) -> Result { - let rhs = other.borrow(); - let lower = if self.lower.is_null() || rhs.upper.is_null() { - ScalarValue::try_from(self.lower.get_datatype()) - } else { - match self.get_datatype() { - DataType::Float64 | DataType::Float32 => { - alter_round_mode_for_float_operation::( - &self.lower, - &rhs.upper, - Box::new(|lhs, rhs| lhs.sub(rhs)), - ) - } - _ => self.lower.sub(&rhs.upper), + fn handle_scalar_sub( + lhs: &ScalarValue, + rhs: &ScalarValue, + ) -> Result { + if lhs.is_null() || rhs.is_null() { + ScalarValue::try_from(lhs.get_datatype()) + } else if matches!(lhs.get_datatype(), DataType::Float64 | DataType::Float32) + { + alter_round_mode_for_float_operation::(lhs, rhs, |lhs, rhs| { + lhs.sub(rhs) + }) + } else { + lhs.sub(rhs) } - }?; - let upper = if self.upper.is_null() || rhs.lower.is_null() { - ScalarValue::try_from(self.upper.get_datatype()) - } else { - match self.get_datatype() { - DataType::Float64 | DataType::Float32 => { - alter_round_mode_for_float_operation::( - &self.upper, - &rhs.lower, - Box::new(|lhs, rhs| lhs.sub(rhs)), - ) - } - _ => self.upper.sub(&rhs.lower), - } - }?; + } + let rhs = other.borrow(); + + let lower = handle_scalar_sub::(&self.lower, &rhs.upper)?; + let upper = handle_scalar_sub::(&self.upper, &rhs.lower)?; Ok(Interval { lower, upper }) } } @@ -625,41 +604,24 @@ mod tests { ))] #[test] fn test_add_intervals_lower_affected_f32() { - let lower = f32::from_bits(1073741887); - let upper = f32::from_bits(1098907651); + // Lower is affected + let lower = f32::from_bits(1073741887); //1000000000000000000000000111111 + let upper = f32::from_bits(1098907651); //1000001100000000000000000000011 capture_mode_change_f32((lower, upper), (true, false)); - } - #[cfg(all( - any(target_arch = "x86_64", target_arch = "aarch64"), - not(target_os = "windows") - ))] - #[test] - fn test_add_intervals_upper_affected_f32() { - let lower = f32::from_bits(1072693248); - let upper = f32::from_bits(715827883); + // Upper is affected + let lower = f32::from_bits(1072693248); //111111111100000000000000000000 + let upper = f32::from_bits(715827883); //101010101010101010101010101011 capture_mode_change_f32((lower, upper), (false, true)); - } - #[cfg(all( - any(target_arch = "x86_64", target_arch = "aarch64"), - not(target_os = "windows") - ))] - #[test] - fn test_add_intervals_lower_affected_f64() { - let lower = 1.0; - let upper = 0.3; + // Lower is affected + let lower = 1.0; // 0x3FF0000000000000 + let upper = 0.3; // 0x3FD3333333333333 capture_mode_change_f64((lower, upper), (true, false)); - } - #[cfg(all( - any(target_arch = "x86_64", target_arch = "aarch64"), - not(target_os = "windows") - ))] - #[test] - fn test_add_intervals_upper_affected_f64() { - let lower = 1.4999999999999998; - let upper = 0.000_000_000_000_000_022_044_604_925_031_31; + // Upper is affected + let lower = 1.4999999999999998; // 0x3FF7FFFFFFFFFFFF + let upper = 0.000_000_000_000_000_022_044_604_925_031_31; // 0x3C796A6B413BB21F capture_mode_change_f64((lower, upper), (false, true)); } @@ -672,14 +634,7 @@ mod tests { let lower = 1.5; let upper = 1.5; capture_mode_change_f64((lower, upper), (true, true)); - } - #[cfg(any( - not(any(target_arch = "x86_64", target_arch = "aarch64")), - target_os = "windows" - ))] - #[test] - fn test_next_impl_add_intervals_f32() { let lower = 1.5; let upper = 1.5; capture_mode_change_f32((lower, upper), (true, true)); diff --git a/datafusion/physical-expr/src/intervals/mod.rs b/datafusion/physical-expr/src/intervals/mod.rs index cdeaefcacec5..15f68fbb3996 100644 --- a/datafusion/physical-expr/src/intervals/mod.rs +++ b/datafusion/physical-expr/src/intervals/mod.rs @@ -28,8 +28,6 @@ use datafusion_common::ScalarValue; pub use interval_aritmetic::*; use std::ops::{Add, BitAnd, Sub}; -type ScalarValueOperation = dyn FnOnce(&ScalarValue, &ScalarValue) -> Result; - // Define constants for ARM #[cfg(all(target_arch = "aarch64", not(target_os = "windows")))] const FE_UPWARD: i32 = 0x00400000; @@ -225,11 +223,47 @@ pub fn next_down(float: F) -> F { F::from_bits(next_bits) } -pub fn alter_round_mode_for_float_operation( +#[cfg(any( + not(any(target_arch = "x86_64", target_arch = "aarch64")), + target_os = "windows" +))] +pub fn alter_round_mode_for_other_arch( + lhs: &ScalarValue, + rhs: &ScalarValue, + cls: F, +) -> Result +where + F: FnOnce(&ScalarValue, &ScalarValue) -> Result, +{ + let mut res = cls(lhs, rhs)?; + match &mut res { + ScalarValue::Float64(Some(val)) => { + if UPPER { + *val = next_up(*val) + } else { + *val = next_down(*val) + } + } + ScalarValue::Float32(Some(val)) => { + if UPPER { + *val = next_up(*val) + } else { + *val = next_down(*val) + } + } + _ => {} + }; + Ok(res) +} + +pub fn alter_round_mode_for_float_operation( lhs: &ScalarValue, rhs: &ScalarValue, - cls: Box, -) -> Result { + cls: F, +) -> Result +where + F: FnOnce(&ScalarValue, &ScalarValue) -> Result, +{ #[cfg(all( any(target_arch = "x86_64", target_arch = "aarch64"), not(target_os = "windows") @@ -245,19 +279,7 @@ pub fn alter_round_mode_for_float_operation( not(any(target_arch = "x86_64", target_arch = "aarch64")), target_os = "windows" ))] - match cls(lhs, rhs) { - Ok(ScalarValue::Float64(Some(val))) => Ok(ScalarValue::Float64(Some(if UPPER { - next_up(val) - } else { - next_down(val) - }))), - Ok(ScalarValue::Float32(Some(val))) => Ok(ScalarValue::Float32(Some(if UPPER { - next_up(val) - } else { - next_down(val) - }))), - val => val, - } + alter_round_mode_for_other_arch::(lhs, rhs, cls) } #[cfg(test)] From ac0e195cdb277c2e82d9cab215002591e9586838 Mon Sep 17 00:00:00 2001 From: metesynnada <100111937+metesynnada@users.noreply.github.com> Date: Fri, 14 Apr 2023 11:28:36 +0300 Subject: [PATCH 08/15] Merge resolution --- .../src/physical_plan/joins/symmetric_hash_join.rs | 3 +-- .../physical-expr/src/intervals/interval_aritmetic.rs | 10 +++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs b/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs index 5fd290d9642c..7e7b93780aa3 100644 --- a/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs +++ b/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs @@ -1554,8 +1554,7 @@ impl SymmetricHashJoinStream { mod tests { use std::fs::File; - - use arrow::array::{ArrayRef, IntervalDayTimeArray, Float64Array}; + use arrow::array::{ArrayRef, Float64Array, IntervalDayTimeArray}; use arrow::array::{Int32Array, TimestampMillisecondArray}; use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, IntervalUnit, Schema, TimeUnit}; diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index 872e8c201b1d..a38c3fc9e4c0 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -206,9 +206,9 @@ impl Interval { ) -> Result { if lhs.is_null() || rhs.is_null() { ScalarValue::try_from(coerce_types( - lhs.get_datatype(), - &Operator::Minus, - rhs.get_datatype(), + &lhs.get_datatype(), + &Operator::Plus, + &rhs.get_datatype(), )?) } else if matches!(lhs.get_datatype(), DataType::Float64 | DataType::Float32) { @@ -236,9 +236,9 @@ impl Interval { ) -> Result { if lhs.is_null() || rhs.is_null() { ScalarValue::try_from(coerce_types( - lhs.get_datatype(), + &lhs.get_datatype(), &Operator::Minus, - rhs.get_datatype(), + &rhs.get_datatype(), )?) } else if matches!(lhs.get_datatype(), DataType::Float64 | DataType::Float32) { From 95339de7baafc4935b0eda3f414c6a82055de520 Mon Sep 17 00:00:00 2001 From: metesynnada <100111937+metesynnada@users.noreply.github.com> Date: Mon, 17 Apr 2023 20:22:11 +0300 Subject: [PATCH 09/15] Merge conflicts --- .../joins/symmetric_hash_join.rs | 56 +++++- .../physical-expr/src/intervals/cp_solver.rs | 166 ++++++------------ .../src/intervals/interval_aritmetic.rs | 74 +++++--- 3 files changed, 154 insertions(+), 142 deletions(-) diff --git a/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs b/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs index 30ad13129d96..513d3ebbc5b8 100644 --- a/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs +++ b/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs @@ -1734,6 +1734,7 @@ mod tests { ScalarValue::$SCALAR(Some(5 as $type)), ScalarValue::$SCALAR(Some(3 as $type)), ScalarValue::$SCALAR(Some(10 as $type)), + (Operator::Gt, Operator::Lt), ), // left_col - 1 > right_col + 5 AND left_col + 3 < right_col + 10 1 => gen_conjunctive_numerical_expr( @@ -1749,6 +1750,7 @@ mod tests { ScalarValue::$SCALAR(Some(5 as $type)), ScalarValue::$SCALAR(Some(3 as $type)), ScalarValue::$SCALAR(Some(10 as $type)), + (Operator::Gt, Operator::Lt), ), // left_col - 1 > right_col + 5 AND left_col - 3 < right_col + 10 2 => gen_conjunctive_numerical_expr( @@ -1764,6 +1766,7 @@ mod tests { ScalarValue::$SCALAR(Some(5 as $type)), ScalarValue::$SCALAR(Some(3 as $type)), ScalarValue::$SCALAR(Some(10 as $type)), + (Operator::Gt, Operator::Lt), ), // left_col - 10 > right_col - 5 AND left_col - 3 < right_col + 10 3 => gen_conjunctive_numerical_expr( @@ -1779,6 +1782,7 @@ mod tests { ScalarValue::$SCALAR(Some(5 as $type)), ScalarValue::$SCALAR(Some(3 as $type)), ScalarValue::$SCALAR(Some(10 as $type)), + (Operator::Gt, Operator::Lt), ), // left_col - 10 > right_col - 5 AND left_col - 30 < right_col - 3 4 => gen_conjunctive_numerical_expr( @@ -1794,6 +1798,55 @@ mod tests { ScalarValue::$SCALAR(Some(5 as $type)), ScalarValue::$SCALAR(Some(30 as $type)), ScalarValue::$SCALAR(Some(3 as $type)), + (Operator::Gt, Operator::Lt), + ), + // left_col - 2 >= right_col - 5 AND left_col - 7 <= right_col - 3 + 5 => gen_conjunctive_numerical_expr( + left_col, + right_col, + ( + Operator::Minus, + Operator::Plus, + Operator::Plus, + Operator::Minus, + ), + ScalarValue::$SCALAR(Some(2 as $type)), + ScalarValue::$SCALAR(Some(5 as $type)), + ScalarValue::$SCALAR(Some(7 as $type)), + ScalarValue::$SCALAR(Some(3 as $type)), + (Operator::GtEq, Operator::LtEq), + ), + // left_col - 28 >= right_col - 11 AND left_col - 21 <= right_col - 39 + 6 => gen_conjunctive_numerical_expr( + left_col, + right_col, + ( + Operator::Plus, + Operator::Minus, + Operator::Plus, + Operator::Plus, + ), + ScalarValue::$SCALAR(Some(28 as $type)), + ScalarValue::$SCALAR(Some(11 as $type)), + ScalarValue::$SCALAR(Some(21 as $type)), + ScalarValue::$SCALAR(Some(39 as $type)), + (Operator::Gt, Operator::LtEq), + ), + // left_col - 28 >= right_col - 11 AND left_col - 21 <= right_col - 39 + 7 => gen_conjunctive_numerical_expr( + left_col, + right_col, + ( + Operator::Plus, + Operator::Minus, + Operator::Minus, + Operator::Plus, + ), + ScalarValue::$SCALAR(Some(28 as $type)), + ScalarValue::$SCALAR(Some(11 as $type)), + ScalarValue::$SCALAR(Some(21 as $type)), + ScalarValue::$SCALAR(Some(39 as $type)), + (Operator::GtEq, Operator::Lt), ), _ => panic!("No case"), } @@ -2832,6 +2885,7 @@ mod tests { ScalarValue::Int32(Some(3)), ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(3)), + (Operator::Gt, Operator::Lt), ); let column_indices = vec![ ColumnIndex { @@ -3071,7 +3125,7 @@ mod tests { (99, 12), )] cardinality: (i32, i32), - #[values(0, 1, 2, 3, 4)] case_expr: usize, + #[values(0, 1, 2, 3, 4, 5, 6, 7)] case_expr: usize, ) -> Result<()> { let config = SessionConfig::new().with_repartition_joins(false); let session_ctx = SessionContext::with_config(config); diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs index e95a79f8f8eb..0bbce1e6b5fe 100644 --- a/datafusion/physical-expr/src/intervals/cp_solver.rs +++ b/datafusion/physical-expr/src/intervals/cp_solver.rs @@ -597,8 +597,8 @@ mod tests { assert_eq!(exp_result, result); col_stat_nodes.iter().zip(expected_nodes.iter()).for_each( |((_, calculated_interval_node), (_, expected))| { - assert!(calculated_interval_node.lower <= expected.lower); - assert!(calculated_interval_node.upper >= expected.upper); + assert!(calculated_interval_node.lower.value <= expected.lower.value); + assert!(calculated_interval_node.upper.value >= expected.upper.value); }, ); Ok(()) @@ -663,22 +663,26 @@ mod tests { experiment( expr, (left_col, right_col), - Interval { - lower: ScalarValue::$SCALAR(left_interval.0), - upper: ScalarValue::$SCALAR(left_interval.1), - }, - Interval { - lower: ScalarValue::$SCALAR(right_interval.0), - upper: ScalarValue::$SCALAR(right_interval.1), - }, - Interval { - lower: ScalarValue::$SCALAR(left_waited.0), - upper: ScalarValue::$SCALAR(left_waited.1), - }, - Interval { - lower: ScalarValue::$SCALAR(right_waited.0), - upper: ScalarValue::$SCALAR(right_waited.1), - }, + Interval::make( + left_interval.0.map(|value| value as $type), + left_interval.1.map(|value| value as $type), + (true, true), + ), + Interval::make( + right_interval.0.map(|value| value as $type), + right_interval.1.map(|value| value as $type), + (true, true), + ), + Interval::make( + left_waited.0.map(|value| value as $type), + left_waited.1.map(|value| value as $type), + (true, true), + ), + Interval::make( + right_waited.0.map(|value| value as $type), + right_waited.1.map(|value| value as $type), + (true, true), + ), PropagationResult::Success, )?; Ok(()) @@ -733,6 +737,8 @@ mod tests { fn $test_func( #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] seed: u64, + #[values(Operator::Gt, Operator::GtEq)] greater_op: Operator, + #[values(Operator::Lt, Operator::LtEq)] less_op: Operator, ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); @@ -751,7 +757,7 @@ mod tests { ScalarValue::$SCALAR(Some(11 as $type)), ScalarValue::$SCALAR(Some(3 as $type)), ScalarValue::$SCALAR(Some(33 as $type)), - (Operator::GtEq, Operator::Lt) + (greater_op, less_op), ); // l > r + 10 AND r > l - 30 let l_gt_r = 10 as $type; @@ -788,6 +794,8 @@ mod tests { fn $test_func( #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] seed: u64, + #[values(Operator::Gt, Operator::GtEq)] greater_op: Operator, + #[values(Operator::Lt, Operator::LtEq)] less_op: Operator, ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); @@ -806,7 +814,7 @@ mod tests { ScalarValue::$SCALAR(Some(5 as $type)), ScalarValue::$SCALAR(Some(3 as $type)), ScalarValue::$SCALAR(Some(10 as $type)), - (Operator::GtEq, Operator::Lt) + (greater_op, less_op), ); // l > r + 6 AND r > l - 7 let l_gt_r = 6 as $type; @@ -843,6 +851,8 @@ mod tests { fn $test_func( #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] seed: u64, + #[values(Operator::Gt, Operator::GtEq)] greater_op: Operator, + #[values(Operator::Lt, Operator::LtEq)] less_op: Operator, ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); @@ -861,7 +871,7 @@ mod tests { ScalarValue::$SCALAR(Some(5 as $type)), ScalarValue::$SCALAR(Some(3 as $type)), ScalarValue::$SCALAR(Some(10 as $type)), - (Operator::GtEq, Operator::Lt) + (greater_op, less_op), ); // l > r + 6 AND r > l - 13 let l_gt_r = 6 as $type; @@ -898,6 +908,8 @@ mod tests { fn $test_func( #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] seed: u64, + #[values(Operator::Gt, Operator::GtEq)] greater_op: Operator, + #[values(Operator::Lt, Operator::LtEq)] less_op: Operator, ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); @@ -916,7 +928,7 @@ mod tests { ScalarValue::$SCALAR(Some(5 as $type)), ScalarValue::$SCALAR(Some(3 as $type)), ScalarValue::$SCALAR(Some(10 as $type)), - (Operator::GtEq, Operator::Lt) + (greater_op, less_op), ); // l > r + 5 AND r > l - 13 let l_gt_r = 5 as $type; @@ -953,6 +965,8 @@ mod tests { fn $test_func( #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] seed: u64, + #[values(Operator::Gt, Operator::GtEq)] greater_op: Operator, + #[values(Operator::Lt, Operator::LtEq)] less_op: Operator, ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); @@ -971,7 +985,7 @@ mod tests { ScalarValue::$SCALAR(Some(5 as $type)), ScalarValue::$SCALAR(Some(30 as $type)), ScalarValue::$SCALAR(Some(3 as $type)), - (Operator::GtEq, Operator::Lt) + (greater_op, less_op), ); // l > r + 5 AND r > l - 27 let l_gt_r = 5 as $type; @@ -1006,23 +1020,25 @@ mod tests { fn case_6( #[values(0, 1, 2, 123, 756, 63, 345, 6443, 12341, 142, 123, 8900)] seed: u64, #[values(Operator::Gt, Operator::GtEq)] greater_op: Operator, - #[values(Operator::Lt, Operator::LtEq)] less_op: Operator + #[values(Operator::Lt, Operator::LtEq)] less_op: Operator, ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); // left_watermark - 1 >= right_watermark + 5 AND left_watermark - 10 <= right_watermark + 3 - let expr = gen_conjunctive_numeric_expr( + let expr = gen_conjunctive_numerical_expr( left_col.clone(), right_col.clone(), - Operator::Minus, - Operator::Plus, - Operator::Minus, - Operator::Plus, - 1, - 5, - 10, - 3, + ( + Operator::Minus, + Operator::Plus, + Operator::Minus, + Operator::Plus, + ), + ScalarValue::Int32(Some(1)), + ScalarValue::Int32(Some(5)), + ScalarValue::Int32(Some(10)), + ScalarValue::Int32(Some(3)), (greater_op, less_op), ); // l >= r + 6 AND r >= l - 13 @@ -1034,90 +1050,6 @@ mod tests { Ok(()) } - #[rstest] - #[test] - fn case_7( - #[values(0, 1, 2, 123, 77, 93, 104, 624, 115, 613, 8365, 9345)] seed: u64, - ) -> Result<()> { - let left_col = Arc::new(Column::new("left_watermark", 0)); - let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark + 4 >= right_watermark + 5 AND left_watermark - 20 < right_watermark - 5 - - let expr = gen_conjunctive_numeric_expr( - left_col.clone(), - right_col.clone(), - Operator::Plus, - Operator::Plus, - Operator::Minus, - Operator::Minus, - 4, - 5, - 20, - 5, - (Operator::GtEq, Operator::Lt), - ); - // l >= r + 1 AND r > l - 15 - let l_gt_r = 1; - let r_gt_l = -15; - generate_case::( - expr.clone(), - left_col.clone(), - right_col.clone(), - seed, - l_gt_r, - r_gt_l, - )?; - // Descending tests - // l >= r + 1 AND r > l - 15 - let r_lt_l = -l_gt_r; - let l_lt_r = -r_gt_l; - generate_case::(expr, left_col, right_col, seed, l_lt_r, r_lt_l)?; - - Ok(()) - } - - #[rstest] - #[test] - fn case_8( - #[values(0, 1, 2, 24, 53, 412, 364, 345, 737, 1010, 52, 1554)] seed: u64, - ) -> Result<()> { - let left_col = Arc::new(Column::new("left_watermark", 0)); - let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark + 4 >= right_watermark + 5 AND left_watermark - 20 < right_watermark - 5 - - let expr = gen_conjunctive_numeric_expr( - left_col.clone(), - right_col.clone(), - Operator::Plus, - Operator::Plus, - Operator::Minus, - Operator::Minus, - 4, - 5, - 20, - 5, - (Operator::Gt, Operator::LtEq), - ); - // l >= r + 1 AND r > l - 15 - let l_gt_r = 1; - let r_gt_l = -15; - generate_case::( - expr.clone(), - left_col.clone(), - right_col.clone(), - seed, - l_gt_r, - r_gt_l, - )?; - // Descending tests - // l >= r + 1 AND r > l - 15 - let r_lt_l = -l_gt_r; - let l_lt_r = -r_gt_l; - generate_case::(expr, left_col, right_col, seed, l_lt_r, r_lt_l)?; - - Ok(()) - } - #[test] fn test_gather_node_indices_dont_remove() -> Result<()> { // Expression: a@0 + b@1 + 1 > a@0 - b@1, given a@0 + b@1. diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index 42fad0b225e3..fabce0c4887f 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -74,22 +74,28 @@ impl IntervalBound { /// The result is unbounded if either is; otherwise, their values are /// added. The result is closed if both original bounds are closed, or open /// otherwise. - pub fn add>(&self, other: T) -> Result { - if self.is_unbounded() || other.is_unbounded() { + pub fn add>( + &self, + other: T, + ) -> Result { + let rhs = other.borrow(); + if self.is_unbounded() || rhs.is_unbounded() { IntervalBound::make_unbounded(coerce_types( - &lhs.get_datatype(), + &self.get_datatype(), &Operator::Plus, &rhs.get_datatype(), )?) - } else if matches!(self.get_datatype(), DataType::Float64 | DataType::Float32) - { - alter_round_mode_for_float_operation::(&self.value, &other.value, |lhs, rhs| { - lhs.add(rhs) - }).map(|v| IntervalBound::new(v, self.open || other.open)) + } else if matches!(self.get_datatype(), DataType::Float64 | DataType::Float32) { + alter_round_mode_for_float_operation::( + &self.value, + &rhs.value, + |lhs, rhs| lhs.add(rhs), + ) + .map(|v| IntervalBound::new(v, self.open || rhs.open)) } else { self.value - .add(&other.value) - .map(|v| IntervalBound::new(v, self.open || other.open)) + .add(&rhs.value) + .map(|v| IntervalBound::new(v, self.open || rhs.open)) } } @@ -97,7 +103,10 @@ impl IntervalBound { /// The result is unbounded if either is; otherwise, their values are /// subtracted. The result is closed if both original bounds are closed, /// or open otherwise. - pub fn sub>(&self, other: T) -> Result { + pub fn sub>( + &self, + other: T, + ) -> Result { let rhs = other.borrow(); if self.is_unbounded() || rhs.is_unbounded() { IntervalBound::make_unbounded(coerce_types( @@ -105,11 +114,14 @@ impl IntervalBound { &Operator::Minus, &rhs.get_datatype(), )?) - } else if matches!(self.get_datatype(), DataType::Float64 | DataType::Float32){ - alter_round_mode_for_float_operation::(&self.value, &other.value, |lhs, rhs| { - lhs.sub(rhs) - }).map(|v| IntervalBound::new(v, self.open || other.open)) - }else { + } else if matches!(self.get_datatype(), DataType::Float64 | DataType::Float32) { + alter_round_mode_for_float_operation::( + &self.value, + &rhs.value, + |lhs, rhs| lhs.sub(rhs), + ) + .map(|v| IntervalBound::new(v, self.open || rhs.open)) + } else { self.value .sub(&rhs.value) .map(|v| IntervalBound::new(v, self.open || rhs.open)) @@ -421,6 +433,7 @@ impl Interval { /// Note that this represents all possible values the difference can take /// if one can choose single values arbitrarily from each of the operands. pub fn sub>(&self, other: T) -> Result { + let rhs = other.borrow(); Ok(Interval::new( self.lower.sub::(&rhs.upper)?, self.upper.sub::(&rhs.lower)?, @@ -1092,10 +1105,7 @@ mod tests { macro_rules! create_interval { ($test_func:ident, $type:ty, $SCALAR:ident) => { fn $test_func(lower: $type, upper: $type) -> Interval { - Interval { - lower: ScalarValue::$SCALAR(Some(lower)), - upper: ScalarValue::$SCALAR(Some(upper)), - } + Interval::make(Some(lower as $type), Some(upper as $type), (true, true)) } }; } @@ -1116,12 +1126,28 @@ mod tests { ) { ( Interval { - lower: ScalarValue::$SCALAR(Some(result_lower)), - upper: ScalarValue::$SCALAR(Some(result_upper)), + lower: + IntervalBound { + value: ScalarValue::$SCALAR(Some(result_lower)), + .. + }, + upper: + IntervalBound { + value: ScalarValue::$SCALAR(Some(result_upper)), + .. + }, }, Interval { - lower: ScalarValue::$SCALAR(Some(without_fe_lower)), - upper: ScalarValue::$SCALAR(Some(without_fe_upper)), + lower: + IntervalBound { + value: ScalarValue::$SCALAR(Some(without_fe_lower)), + .. + }, + upper: + IntervalBound { + value: ScalarValue::$SCALAR(Some(without_fe_upper)), + .. + }, }, ) => { if waiting_change.0 { From af8c7d22d8691a8f93820126531bbf57ecd5355e Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Mon, 17 Apr 2023 18:04:16 -0500 Subject: [PATCH 10/15] Separate rounding mode code into a module --- .../physical_optimizer/pipeline_checker.rs | 3 +- .../src/physical_optimizer/pipeline_fixer.rs | 2 +- .../src/intervals/interval_aritmetic.rs | 54 ++- datafusion/physical-expr/src/intervals/mod.rs | 387 +---------------- .../physical-expr/src/intervals/rounding.rs | 402 ++++++++++++++++++ 5 files changed, 435 insertions(+), 413 deletions(-) create mode 100644 datafusion/physical-expr/src/intervals/rounding.rs diff --git a/datafusion/core/src/physical_optimizer/pipeline_checker.rs b/datafusion/core/src/physical_optimizer/pipeline_checker.rs index d10aa5a67dbb..1dbc59a63aa1 100644 --- a/datafusion/core/src/physical_optimizer/pipeline_checker.rs +++ b/datafusion/core/src/physical_optimizer/pipeline_checker.rs @@ -27,8 +27,7 @@ use crate::physical_plan::{with_new_children_if_necessary, ExecutionPlan}; use datafusion_common::config::OptimizerOptions; use datafusion_common::tree_node::{Transformed, TreeNode, VisitRecursion}; use datafusion_common::DataFusionError; -use datafusion_physical_expr::intervals::cp_solver::check_support; -use datafusion_physical_expr::intervals::is_datatype_supported; +use datafusion_physical_expr::intervals::{check_support, is_datatype_supported}; use std::sync::Arc; /// The PipelineChecker rule rejects non-runnable query plans that use diff --git a/datafusion/core/src/physical_optimizer/pipeline_fixer.rs b/datafusion/core/src/physical_optimizer/pipeline_fixer.rs index ef46d48b15e7..77ffb9fb41d6 100644 --- a/datafusion/core/src/physical_optimizer/pipeline_fixer.rs +++ b/datafusion/core/src/physical_optimizer/pipeline_fixer.rs @@ -245,7 +245,7 @@ fn apply_subrules_and_check_finiteness_requirements( mod util_tests { use datafusion_expr::Operator; use datafusion_physical_expr::expressions::{BinaryExpr, Column, NegativeExpr}; - use datafusion_physical_expr::intervals::cp_solver::check_support; + use datafusion_physical_expr::intervals::check_support; use datafusion_physical_expr::PhysicalExpr; use std::sync::Arc; diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index fabce0c4887f..5c984bf21a5f 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -16,18 +16,20 @@ // under the License. //! Interval arithmetic library + use std::borrow::Borrow; use std::fmt; use std::fmt::{Display, Formatter}; -use crate::aggregate::min_max::{max, min}; -use crate::intervals::alter_round_mode_for_float_operation; use arrow::compute::{cast_with_options, CastOptions}; use arrow::datatypes::DataType; use datafusion_common::{DataFusionError, Result, ScalarValue}; use datafusion_expr::type_coercion::binary::coerce_types; use datafusion_expr::Operator; +use crate::aggregate::min_max::{max, min}; +use crate::intervals::alter_fp_rounding_mode; + /// This type represents a single endpoint of an [`Interval`]. An endpoint can /// be open or closed, denoting whether the interval includes or excludes the /// endpoint itself. @@ -80,23 +82,21 @@ impl IntervalBound { ) -> Result { let rhs = other.borrow(); if self.is_unbounded() || rhs.is_unbounded() { - IntervalBound::make_unbounded(coerce_types( + return IntervalBound::make_unbounded(coerce_types( &self.get_datatype(), &Operator::Plus, &rhs.get_datatype(), - )?) - } else if matches!(self.get_datatype(), DataType::Float64 | DataType::Float32) { - alter_round_mode_for_float_operation::( - &self.value, - &rhs.value, - |lhs, rhs| lhs.add(rhs), - ) - .map(|v| IntervalBound::new(v, self.open || rhs.open)) - } else { - self.value - .add(&rhs.value) - .map(|v| IntervalBound::new(v, self.open || rhs.open)) + )?); + } + match self.get_datatype() { + DataType::Float64 | DataType::Float32 => { + alter_fp_rounding_mode::(&self.value, &rhs.value, |lhs, rhs| { + lhs.add(rhs) + }) + } + _ => self.value.add(&rhs.value), } + .map(|v| IntervalBound::new(v, self.open || rhs.open)) } /// This function subtracts the given `IntervalBound` from `self`. @@ -109,23 +109,21 @@ impl IntervalBound { ) -> Result { let rhs = other.borrow(); if self.is_unbounded() || rhs.is_unbounded() { - IntervalBound::make_unbounded(coerce_types( + return IntervalBound::make_unbounded(coerce_types( &self.get_datatype(), &Operator::Minus, &rhs.get_datatype(), - )?) - } else if matches!(self.get_datatype(), DataType::Float64 | DataType::Float32) { - alter_round_mode_for_float_operation::( - &self.value, - &rhs.value, - |lhs, rhs| lhs.sub(rhs), - ) - .map(|v| IntervalBound::new(v, self.open || rhs.open)) - } else { - self.value - .sub(&rhs.value) - .map(|v| IntervalBound::new(v, self.open || rhs.open)) + )?); + } + match self.get_datatype() { + DataType::Float64 | DataType::Float32 => { + alter_fp_rounding_mode::(&self.value, &rhs.value, |lhs, rhs| { + lhs.sub(rhs) + }) + } + _ => self.value.sub(&rhs.value), } + .map(|v| IntervalBound::new(v, self.open || rhs.open)) } /// This function chooses one of the given `IntervalBound`s according to diff --git a/datafusion/physical-expr/src/intervals/mod.rs b/datafusion/physical-expr/src/intervals/mod.rs index 15f68fbb3996..390d13d4685b 100644 --- a/datafusion/physical-expr/src/intervals/mod.rs +++ b/datafusion/physical-expr/src/intervals/mod.rs @@ -15,391 +15,14 @@ // specific language governing permissions and limitations // under the License. -//! Interval calculations -//! +//! Interval arithmetic and constraint propagation library pub mod cp_solver; pub mod interval_aritmetic; +mod rounding; + pub mod test_utils; -pub use cp_solver::ExprIntervalGraph; -use datafusion_common::Result; -use datafusion_common::ScalarValue; +pub use cp_solver::{check_support, ExprIntervalGraph}; pub use interval_aritmetic::*; -use std::ops::{Add, BitAnd, Sub}; - -// Define constants for ARM -#[cfg(all(target_arch = "aarch64", not(target_os = "windows")))] -const FE_UPWARD: i32 = 0x00400000; -#[cfg(all(target_arch = "aarch64", not(target_os = "windows")))] -const FE_DOWNWARD: i32 = 0x00800000; - -// Define constants for x86_64 -#[cfg(all(target_arch = "x86_64", not(target_os = "windows")))] -const FE_UPWARD: i32 = 0x0800; -#[cfg(all(target_arch = "x86_64", not(target_os = "windows")))] -const FE_DOWNWARD: i32 = 0x0400; - -#[cfg(all( - any(target_arch = "x86_64", target_arch = "aarch64"), - not(target_os = "windows") -))] -extern crate libc; - -#[cfg(all( - any(target_arch = "x86_64", target_arch = "aarch64"), - not(target_os = "windows") -))] -extern "C" { - fn fesetround(rount: i32); - fn fegetround() -> i32; -} - -/// A trait to represent floating-point types with bitwise operations. -/// -/// This trait provides methods to convert a floating-point value to its bitwise representation -/// and back, as well as methods to check for NaN, infinity, and negative infinity. -pub trait FloatBits { - /// The integer type used for bitwise operations. - type Item: Copy - + PartialEq - + BitAnd - + Add - + Sub; - - /// The smallest positive floating-point value representable by this type. - const TINY_BITS: Self::Item; - - /// The smallest (in magnitude) negative floating-point value representable by this type. - const NEG_TINY_BITS: Self::Item; - - /// A mask to clear the sign bit of the floating-point value's bitwise representation. - const CLEAR_SIGN_MASK: Self::Item; - - /// The integer value 1, used in bitwise operations. - const ONE: Self::Item; - - /// The integer value 0, used in bitwise operations. - const ZERO: Self::Item; - - /// Converts the floating-point value to its bitwise representation. - fn to_bits(self) -> Self::Item; - - /// Converts the bitwise representation to the corresponding floating-point value. - fn from_bits(bits: Self::Item) -> Self; - - /// Returns true if the floating-point value is NaN (not a number). - fn float_is_nan(self) -> bool; - - /// Returns the positive infinity value for this floating-point type. - fn infinity() -> Self; - - /// Returns the negative infinity value for this floating-point type. - fn neg_infinity() -> Self; -} - -impl FloatBits for f32 { - type Item = u32; - const TINY_BITS: u32 = 0x1; // Smallest positive f32. - const NEG_TINY_BITS: u32 = 0x8000_0001; // Smallest (in magnitude) negative f32. - const CLEAR_SIGN_MASK: u32 = 0x7fff_ffff; - const ONE: Self::Item = 1; - const ZERO: Self::Item = 0; - - fn to_bits(self) -> Self::Item { - self.to_bits() - } - - fn from_bits(bits: Self::Item) -> Self { - f32::from_bits(bits) - } - - fn float_is_nan(self) -> bool { - self.is_nan() - } - - fn infinity() -> Self { - f32::INFINITY - } - - fn neg_infinity() -> Self { - f32::NEG_INFINITY - } -} - -impl FloatBits for f64 { - type Item = u64; - const TINY_BITS: u64 = 0x1; - const NEG_TINY_BITS: u64 = 0x8000_0000_0000_0001; - // Smallest positive f64. - const CLEAR_SIGN_MASK: u64 = 0x7fff_ffff_ffff_ffff; - const ONE: Self::Item = 1; - const ZERO: Self::Item = 0; - - fn to_bits(self) -> Self::Item { - self.to_bits() - } - - fn from_bits(bits: Self::Item) -> Self { - f64::from_bits(bits) - } - - fn float_is_nan(self) -> bool { - self.is_nan() - } - - fn infinity() -> Self { - f64::INFINITY - } - - fn neg_infinity() -> Self { - f64::NEG_INFINITY - } -} -/// Returns the next representable floating-point value greater than the input value. -/// -/// This function takes a floating-point value that implements the FloatBits trait, -/// calculates the next representable value greater than the input, and returns it. -/// -/// If the input value is NaN or positive infinity, the function returns the input value. -/// -/// # Examples -/// -/// ``` -/// use datafusion_physical_expr::intervals::next_up; -/// use datafusion_physical_expr::intervals::FloatBits; -/// -/// let f: f32 = 1.0; -/// let next_f = next_up(f); -/// assert_eq!(next_f, 1.0000001); -/// ``` -pub fn next_up(float: F) -> F { - let bits = float.to_bits(); - if float.float_is_nan() || bits == F::infinity().to_bits() { - return float; - } - - let abs = bits & F::CLEAR_SIGN_MASK; - let next_bits = if abs == F::ZERO { - F::TINY_BITS - } else if bits == abs { - bits + F::ONE - } else { - bits - F::ONE - }; - F::from_bits(next_bits) -} - -/// Returns the next representable floating-point value smaller than the input value. -/// -/// This function takes a floating-point value that implements the FloatBits trait, -/// calculates the next representable value smaller than the input, and returns it. -/// -/// If the input value is NaN or negative infinity, the function returns the input value. -/// -/// # Examples -/// -/// ``` -/// use datafusion_physical_expr::intervals::next_down; -/// use datafusion_physical_expr::intervals::FloatBits; -/// -/// let f: f32 = 1.0; -/// let next_f = next_down(f); -/// assert_eq!(next_f, 0.99999994); -/// ``` -pub fn next_down(float: F) -> F { - let bits = float.to_bits(); - if float.float_is_nan() || bits == F::neg_infinity().to_bits() { - return float; - } - let abs = bits & F::CLEAR_SIGN_MASK; - let next_bits = if abs == F::ZERO { - F::NEG_TINY_BITS - } else if bits == abs { - bits - F::ONE - } else { - bits + F::ONE - }; - F::from_bits(next_bits) -} - -#[cfg(any( - not(any(target_arch = "x86_64", target_arch = "aarch64")), - target_os = "windows" -))] -pub fn alter_round_mode_for_other_arch( - lhs: &ScalarValue, - rhs: &ScalarValue, - cls: F, -) -> Result -where - F: FnOnce(&ScalarValue, &ScalarValue) -> Result, -{ - let mut res = cls(lhs, rhs)?; - match &mut res { - ScalarValue::Float64(Some(val)) => { - if UPPER { - *val = next_up(*val) - } else { - *val = next_down(*val) - } - } - ScalarValue::Float32(Some(val)) => { - if UPPER { - *val = next_up(*val) - } else { - *val = next_down(*val) - } - } - _ => {} - }; - Ok(res) -} - -pub fn alter_round_mode_for_float_operation( - lhs: &ScalarValue, - rhs: &ScalarValue, - cls: F, -) -> Result -where - F: FnOnce(&ScalarValue, &ScalarValue) -> Result, -{ - #[cfg(all( - any(target_arch = "x86_64", target_arch = "aarch64"), - not(target_os = "windows") - ))] - unsafe { - let current = fegetround(); - fesetround(if UPPER { FE_UPWARD } else { FE_DOWNWARD }); - let res = cls(lhs, rhs); - fesetround(current); - res - } - #[cfg(any( - not(any(target_arch = "x86_64", target_arch = "aarch64")), - target_os = "windows" - ))] - alter_round_mode_for_other_arch::(lhs, rhs, cls) -} - -#[cfg(test)] -mod tests { - use crate::intervals::{next_down, next_up}; - - #[test] - fn test_next_down() { - let x = 1.0f64; - // Clamp value into range [0, 1). - let clamped = x.clamp(0.0, next_down(1.0f64)); - assert!(clamped < 1.0); - assert_eq!(next_up(clamped), 1.0); - } - - #[test] - fn test_next_up_small_positive() { - let value: f64 = 1.0; - let result = next_up(value); - assert_eq!(result, 1.0000000000000002); - } - - #[test] - fn test_next_up_small_negative() { - let value: f64 = -1.0; - let result = next_up(value); - assert_eq!(result, -0.9999999999999999); - } - - #[test] - fn test_next_up_pos_infinity() { - let value: f64 = f64::INFINITY; - let result = next_up(value); - assert_eq!(result, f64::INFINITY); - } - - #[test] - fn test_next_up_nan() { - let value: f64 = f64::NAN; - let result = next_up(value); - assert!(result.is_nan()); - } - - #[test] - fn test_next_down_small_positive() { - let value: f64 = 1.0; - let result = next_down(value); - assert_eq!(result, 0.9999999999999999); - } - - #[test] - fn test_next_down_small_negative() { - let value: f64 = -1.0; - let result = next_down(value); - assert_eq!(result, -1.0000000000000002); - } - - #[test] - fn test_next_down_neg_infinity() { - let value: f64 = f64::NEG_INFINITY; - let result = next_down(value); - assert_eq!(result, f64::NEG_INFINITY); - } - - #[test] - fn test_next_down_nan() { - let value: f64 = f64::NAN; - let result = next_down(value); - assert!(result.is_nan()); - } - - #[test] - fn test_next_up_small_positive_f32() { - let value: f32 = 1.0; - let result = next_up(value); - assert_eq!(result, 1.0000001); - } - - #[test] - fn test_next_up_small_negative_f32() { - let value: f32 = -1.0; - let result = next_up(value); - assert_eq!(result, -0.99999994); - } - - #[test] - fn test_next_up_pos_infinity_f32() { - let value: f32 = f32::INFINITY; - let result = next_up(value); - assert_eq!(result, f32::INFINITY); - } - - #[test] - fn test_next_up_nan_f32() { - let value: f32 = f32::NAN; - let result = next_up(value); - assert!(result.is_nan()); - } - #[test] - fn test_next_down_small_positive_f32() { - let value: f32 = 1.0; - let result = next_down(value); - assert_eq!(result, 0.99999994); - } - #[test] - fn test_next_down_small_negative_f32() { - let value: f32 = -1.0; - let result = next_down(value); - assert_eq!(result, -1.0000001); - } - #[test] - fn test_next_down_neg_infinity_f32() { - let value: f32 = f32::NEG_INFINITY; - let result = next_down(value); - assert_eq!(result, f32::NEG_INFINITY); - } - #[test] - fn test_next_down_nan_f32() { - let value: f32 = f32::NAN; - let result = next_down(value); - assert!(result.is_nan()); - } -} +pub use rounding::alter_fp_rounding_mode; diff --git a/datafusion/physical-expr/src/intervals/rounding.rs b/datafusion/physical-expr/src/intervals/rounding.rs new file mode 100644 index 000000000000..685c4a13632b --- /dev/null +++ b/datafusion/physical-expr/src/intervals/rounding.rs @@ -0,0 +1,402 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Floating point rounding mode utility library +//! TODO: Remove this custom implementation and the "libc" dependency when +//! floating-point rounding mode manipulation functions become available +//! in Rust. + +use std::ops::{Add, BitAnd, Sub}; + +use datafusion_common::Result; +use datafusion_common::ScalarValue; + +// Define constants for ARM +#[cfg(all(target_arch = "aarch64", not(target_os = "windows")))] +const FE_UPWARD: i32 = 0x00400000; +#[cfg(all(target_arch = "aarch64", not(target_os = "windows")))] +const FE_DOWNWARD: i32 = 0x00800000; + +// Define constants for x86_64 +#[cfg(all(target_arch = "x86_64", not(target_os = "windows")))] +const FE_UPWARD: i32 = 0x0800; +#[cfg(all(target_arch = "x86_64", not(target_os = "windows")))] +const FE_DOWNWARD: i32 = 0x0400; + +#[cfg(all( + any(target_arch = "x86_64", target_arch = "aarch64"), + not(target_os = "windows") +))] +extern crate libc; + +#[cfg(all( + any(target_arch = "x86_64", target_arch = "aarch64"), + not(target_os = "windows") +))] +extern "C" { + fn fesetround(round: i32); + fn fegetround() -> i32; +} + +/// A trait to manipulate floating-point types with bitwise operations. +/// Provides functions to convert a floating-point value to/from its bitwise +/// representation as well as utility methods to handle special values. +pub trait FloatBits { + /// The integer type used for bitwise operations. + type Item: Copy + + PartialEq + + BitAnd + + Add + + Sub; + + /// The smallest positive floating-point value representable by this type. + const TINY_BITS: Self::Item; + + /// The smallest (in magnitude) negative floating-point value representable by this type. + const NEG_TINY_BITS: Self::Item; + + /// A mask to clear the sign bit of the floating-point value's bitwise representation. + const CLEAR_SIGN_MASK: Self::Item; + + /// The integer value 1, used in bitwise operations. + const ONE: Self::Item; + + /// The integer value 0, used in bitwise operations. + const ZERO: Self::Item; + + /// Converts the floating-point value to its bitwise representation. + fn to_bits(self) -> Self::Item; + + /// Converts the bitwise representation to the corresponding floating-point value. + fn from_bits(bits: Self::Item) -> Self; + + /// Returns true if the floating-point value is NaN (not a number). + fn float_is_nan(self) -> bool; + + /// Returns the positive infinity value for this floating-point type. + fn infinity() -> Self; + + /// Returns the negative infinity value for this floating-point type. + fn neg_infinity() -> Self; +} + +impl FloatBits for f32 { + type Item = u32; + const TINY_BITS: u32 = 0x1; // Smallest positive f32. + const NEG_TINY_BITS: u32 = 0x8000_0001; // Smallest (in magnitude) negative f32. + const CLEAR_SIGN_MASK: u32 = 0x7fff_ffff; + const ONE: Self::Item = 1; + const ZERO: Self::Item = 0; + + fn to_bits(self) -> Self::Item { + self.to_bits() + } + + fn from_bits(bits: Self::Item) -> Self { + f32::from_bits(bits) + } + + fn float_is_nan(self) -> bool { + self.is_nan() + } + + fn infinity() -> Self { + f32::INFINITY + } + + fn neg_infinity() -> Self { + f32::NEG_INFINITY + } +} + +impl FloatBits for f64 { + type Item = u64; + const TINY_BITS: u64 = 0x1; + const NEG_TINY_BITS: u64 = 0x8000_0000_0000_0001; + // Smallest positive f64. + const CLEAR_SIGN_MASK: u64 = 0x7fff_ffff_ffff_ffff; + const ONE: Self::Item = 1; + const ZERO: Self::Item = 0; + + fn to_bits(self) -> Self::Item { + self.to_bits() + } + + fn from_bits(bits: Self::Item) -> Self { + f64::from_bits(bits) + } + + fn float_is_nan(self) -> bool { + self.is_nan() + } + + fn infinity() -> Self { + f64::INFINITY + } + + fn neg_infinity() -> Self { + f64::NEG_INFINITY + } +} + +/// Returns the next representable floating-point value greater than the input value. +/// +/// This function takes a floating-point value that implements the FloatBits trait, +/// calculates the next representable value greater than the input, and returns it. +/// +/// If the input value is NaN or positive infinity, the function returns the input value. +/// +/// # Examples +/// +/// ``` +/// use datafusion_physical_expr::intervals::rounding::next_up; +/// +/// let f: f32 = 1.0; +/// let next_f = next_up(f); +/// assert_eq!(next_f, 1.0000001); +/// ``` +#[allow(dead_code)] +pub fn next_up(float: F) -> F { + let bits = float.to_bits(); + if float.float_is_nan() || bits == F::infinity().to_bits() { + return float; + } + + let abs = bits & F::CLEAR_SIGN_MASK; + let next_bits = if abs == F::ZERO { + F::TINY_BITS + } else if bits == abs { + bits + F::ONE + } else { + bits - F::ONE + }; + F::from_bits(next_bits) +} + +/// Returns the next representable floating-point value smaller than the input value. +/// +/// This function takes a floating-point value that implements the FloatBits trait, +/// calculates the next representable value smaller than the input, and returns it. +/// +/// If the input value is NaN or negative infinity, the function returns the input value. +/// +/// # Examples +/// +/// ``` +/// use datafusion_physical_expr::intervals::rounding::next_down; +/// +/// let f: f32 = 1.0; +/// let next_f = next_down(f); +/// assert_eq!(next_f, 0.99999994); +/// ``` +#[allow(dead_code)] +pub fn next_down(float: F) -> F { + let bits = float.to_bits(); + if float.float_is_nan() || bits == F::neg_infinity().to_bits() { + return float; + } + let abs = bits & F::CLEAR_SIGN_MASK; + let next_bits = if abs == F::ZERO { + F::NEG_TINY_BITS + } else if bits == abs { + bits - F::ONE + } else { + bits + F::ONE + }; + F::from_bits(next_bits) +} + +#[cfg(any( + not(any(target_arch = "x86_64", target_arch = "aarch64")), + target_os = "windows" +))] +fn alter_fp_rounding_mode_conservative( + lhs: &ScalarValue, + rhs: &ScalarValue, + cls: F, +) -> Result +where + F: FnOnce(&ScalarValue, &ScalarValue) -> Result, +{ + let mut res = cls(lhs, rhs)?; + match &mut res { + ScalarValue::Float64(Some(val)) => { + if UPPER { + *val = next_up(*val) + } else { + *val = next_down(*val) + } + } + ScalarValue::Float32(Some(val)) => { + if UPPER { + *val = next_up(*val) + } else { + *val = next_down(*val) + } + } + _ => {} + }; + Ok(res) +} + +pub fn alter_fp_rounding_mode( + lhs: &ScalarValue, + rhs: &ScalarValue, + cls: F, +) -> Result +where + F: FnOnce(&ScalarValue, &ScalarValue) -> Result, +{ + #[cfg(all( + any(target_arch = "x86_64", target_arch = "aarch64"), + not(target_os = "windows") + ))] + unsafe { + let current = fegetround(); + fesetround(if UPPER { FE_UPWARD } else { FE_DOWNWARD }); + let result = cls(lhs, rhs); + fesetround(current); + result + } + #[cfg(any( + not(any(target_arch = "x86_64", target_arch = "aarch64")), + target_os = "windows" + ))] + alter_fp_rounding_mode_conservative::(lhs, rhs, cls) +} + +#[cfg(test)] +mod tests { + use super::{next_down, next_up}; + + #[test] + fn test_next_down() { + let x = 1.0f64; + // Clamp value into range [0, 1). + let clamped = x.clamp(0.0, next_down(1.0f64)); + assert!(clamped < 1.0); + assert_eq!(next_up(clamped), 1.0); + } + + #[test] + fn test_next_up_small_positive() { + let value: f64 = 1.0; + let result = next_up(value); + assert_eq!(result, 1.0000000000000002); + } + + #[test] + fn test_next_up_small_negative() { + let value: f64 = -1.0; + let result = next_up(value); + assert_eq!(result, -0.9999999999999999); + } + + #[test] + fn test_next_up_pos_infinity() { + let value: f64 = f64::INFINITY; + let result = next_up(value); + assert_eq!(result, f64::INFINITY); + } + + #[test] + fn test_next_up_nan() { + let value: f64 = f64::NAN; + let result = next_up(value); + assert!(result.is_nan()); + } + + #[test] + fn test_next_down_small_positive() { + let value: f64 = 1.0; + let result = next_down(value); + assert_eq!(result, 0.9999999999999999); + } + + #[test] + fn test_next_down_small_negative() { + let value: f64 = -1.0; + let result = next_down(value); + assert_eq!(result, -1.0000000000000002); + } + + #[test] + fn test_next_down_neg_infinity() { + let value: f64 = f64::NEG_INFINITY; + let result = next_down(value); + assert_eq!(result, f64::NEG_INFINITY); + } + + #[test] + fn test_next_down_nan() { + let value: f64 = f64::NAN; + let result = next_down(value); + assert!(result.is_nan()); + } + + #[test] + fn test_next_up_small_positive_f32() { + let value: f32 = 1.0; + let result = next_up(value); + assert_eq!(result, 1.0000001); + } + + #[test] + fn test_next_up_small_negative_f32() { + let value: f32 = -1.0; + let result = next_up(value); + assert_eq!(result, -0.99999994); + } + + #[test] + fn test_next_up_pos_infinity_f32() { + let value: f32 = f32::INFINITY; + let result = next_up(value); + assert_eq!(result, f32::INFINITY); + } + + #[test] + fn test_next_up_nan_f32() { + let value: f32 = f32::NAN; + let result = next_up(value); + assert!(result.is_nan()); + } + #[test] + fn test_next_down_small_positive_f32() { + let value: f32 = 1.0; + let result = next_down(value); + assert_eq!(result, 0.99999994); + } + #[test] + fn test_next_down_small_negative_f32() { + let value: f32 = -1.0; + let result = next_down(value); + assert_eq!(result, -1.0000001); + } + #[test] + fn test_next_down_neg_infinity_f32() { + let value: f32 = f32::NEG_INFINITY; + let result = next_down(value); + assert_eq!(result, f32::NEG_INFINITY); + } + #[test] + fn test_next_down_nan_f32() { + let value: f32 = f32::NAN; + let result = next_down(value); + assert!(result.is_nan()); + } +} From 3045a1bf74a436bf80f6ded0d378fa5a16ffaa4a Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Mon, 17 Apr 2023 18:54:28 -0500 Subject: [PATCH 11/15] Refactor IA tests, fix doctest --- .../src/intervals/interval_aritmetic.rs | 61 +++++++++---------- datafusion/physical-expr/src/intervals/mod.rs | 4 +- 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index 5c984bf21a5f..5441368799c2 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -28,7 +28,7 @@ use datafusion_expr::type_coercion::binary::coerce_types; use datafusion_expr::Operator; use crate::aggregate::min_max::{max, min}; -use crate::intervals::alter_fp_rounding_mode; +use crate::intervals::rounding::alter_fp_rounding_mode; /// This type represents a single endpoint of an [`Interval`]. An endpoint can /// be open or closed, denoting whether the interval includes or excludes the @@ -1060,7 +1060,7 @@ mod tests { // This function tests if valid constructions produce standardized objects // ([false, false], [false, true], [true, true]) for boolean intervals. #[test] - fn non_standard_interval_constructs() -> Result<()> { + fn non_standard_interval_constructs() { let cases = vec![ ( IntervalBound::new(Boolean(None), true), @@ -1097,31 +1097,32 @@ mod tests { for case in cases { assert_eq!(Interval::new(case.0, case.1), case.2) } - Ok(()) } - macro_rules! create_interval { - ($test_func:ident, $type:ty, $SCALAR:ident) => { - fn $test_func(lower: $type, upper: $type) -> Interval { - Interval::make(Some(lower as $type), Some(upper as $type), (true, true)) + macro_rules! capture_mode_change { + ($TEST_FN_NAME:ident, $TYPE:ty, $SCALAR:ident) => { + paste::item! { + capture_mode_change_helper!($TEST_FN_NAME, + [], + $TYPE, + $SCALAR); } }; } - create_interval!(create_f32_interval, f32, Float32); - create_interval!(create_f64_interval, f64, Float64); + macro_rules! capture_mode_change_helper { + ($TEST_FN_NAME:ident, $CREATE_FN_NAME:ident, $TYPE:ty, $SCALAR:ident) => { + fn $CREATE_FN_NAME(lower: $TYPE, upper: $TYPE) -> Interval { + Interval::make(Some(lower as $TYPE), Some(upper as $TYPE), (true, true)) + } - macro_rules! capture_mode_change { - ($test_func:ident, $interval_create:ident, $type:ty, $SCALAR:ident) => { - fn $test_func(input: ($type, $type), waiting_change: (bool, bool)) { - assert!(waiting_change.0 || waiting_change.1); - let interval1 = $interval_create(input.0, input.1); - let interval2 = $interval_create(input.1, input.0); + fn $TEST_FN_NAME(input: ($TYPE, $TYPE), expect_low: bool, expect_high: bool) { + assert!(expect_low || expect_high); + let interval1 = $CREATE_FN_NAME(input.0, input.0); + let interval2 = $CREATE_FN_NAME(input.1, input.1); let result = interval1.add(&interval2).unwrap(); - match ( - result, - $interval_create(input.0 + input.1, input.0 + input.1), - ) { + let without_fe = $CREATE_FN_NAME(input.0 + input.1, input.0 + input.1); + match (result, without_fe) { ( Interval { lower: @@ -1148,12 +1149,10 @@ mod tests { }, }, ) => { - if waiting_change.0 { - assert!(result_lower < without_fe_lower); - } - if waiting_change.1 { - assert!(result_upper > without_fe_upper); - } + assert!( + (!expect_low || result_lower < without_fe_lower) + && (!expect_high || result_upper > without_fe_upper) + ); } _ => unreachable!(), } @@ -1161,8 +1160,8 @@ mod tests { }; } - capture_mode_change!(capture_mode_change_f32, create_f32_interval, f32, Float32); - capture_mode_change!(capture_mode_change_f64, create_f64_interval, f64, Float64); + capture_mode_change!(capture_mode_change_f32, f32, Float32); + capture_mode_change!(capture_mode_change_f64, f64, Float64); #[cfg(all( any(target_arch = "x86_64", target_arch = "aarch64"), @@ -1173,22 +1172,22 @@ mod tests { // Lower is affected let lower = f32::from_bits(1073741887); //1000000000000000000000000111111 let upper = f32::from_bits(1098907651); //1000001100000000000000000000011 - capture_mode_change_f32((lower, upper), (true, false)); + capture_mode_change_f32((lower, upper), true, false); // Upper is affected let lower = f32::from_bits(1072693248); //111111111100000000000000000000 let upper = f32::from_bits(715827883); //101010101010101010101010101011 - capture_mode_change_f32((lower, upper), (false, true)); + capture_mode_change_f32((lower, upper), false, true); // Lower is affected let lower = 1.0; // 0x3FF0000000000000 let upper = 0.3; // 0x3FD3333333333333 - capture_mode_change_f64((lower, upper), (true, false)); + capture_mode_change_f64((lower, upper), true, false); // Upper is affected let lower = 1.4999999999999998; // 0x3FF7FFFFFFFFFFFF let upper = 0.000_000_000_000_000_022_044_604_925_031_31; // 0x3C796A6B413BB21F - capture_mode_change_f64((lower, upper), (false, true)); + capture_mode_change_f64((lower, upper), false, true); } #[cfg(any( diff --git a/datafusion/physical-expr/src/intervals/mod.rs b/datafusion/physical-expr/src/intervals/mod.rs index 390d13d4685b..a9255752fea4 100644 --- a/datafusion/physical-expr/src/intervals/mod.rs +++ b/datafusion/physical-expr/src/intervals/mod.rs @@ -19,10 +19,8 @@ pub mod cp_solver; pub mod interval_aritmetic; - -mod rounding; +pub mod rounding; pub mod test_utils; pub use cp_solver::{check_support, ExprIntervalGraph}; pub use interval_aritmetic::*; -pub use rounding::alter_fp_rounding_mode; From 3178315937837e23993a249e4f94766299341882 Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Mon, 17 Apr 2023 19:18:17 -0500 Subject: [PATCH 12/15] Fix Windows tests --- datafusion/physical-expr/src/intervals/interval_aritmetic.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index 5441368799c2..4aecb1347015 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -1198,10 +1198,10 @@ mod tests { fn test_next_impl_add_intervals_f64() { let lower = 1.5; let upper = 1.5; - capture_mode_change_f64((lower, upper), (true, true)); + capture_mode_change_f64((lower, upper), true, true); let lower = 1.5; let upper = 1.5; - capture_mode_change_f32((lower, upper), (true, true)); + capture_mode_change_f32((lower, upper), true, true); } } From 10a708d3092848f20fc4bd9b4a6139761ddf87fe Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Mon, 17 Apr 2023 20:32:14 -0500 Subject: [PATCH 13/15] Simplify test macros --- .../src/intervals/interval_aritmetic.rs | 51 ++++--------------- .../physical-expr/src/intervals/rounding.rs | 31 ++++++----- 2 files changed, 25 insertions(+), 57 deletions(-) diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index 4aecb1347015..6c2d1b0f418f 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -1100,18 +1100,17 @@ mod tests { } macro_rules! capture_mode_change { - ($TEST_FN_NAME:ident, $TYPE:ty, $SCALAR:ident) => { + ($TYPE:ty) => { paste::item! { - capture_mode_change_helper!($TEST_FN_NAME, + capture_mode_change_helper!([], [], - $TYPE, - $SCALAR); + $TYPE); } }; } macro_rules! capture_mode_change_helper { - ($TEST_FN_NAME:ident, $CREATE_FN_NAME:ident, $TYPE:ty, $SCALAR:ident) => { + ($TEST_FN_NAME:ident, $CREATE_FN_NAME:ident, $TYPE:ty) => { fn $CREATE_FN_NAME(lower: $TYPE, upper: $TYPE) -> Interval { Interval::make(Some(lower as $TYPE), Some(upper as $TYPE), (true, true)) } @@ -1122,46 +1121,16 @@ mod tests { let interval2 = $CREATE_FN_NAME(input.1, input.1); let result = interval1.add(&interval2).unwrap(); let without_fe = $CREATE_FN_NAME(input.0 + input.1, input.0 + input.1); - match (result, without_fe) { - ( - Interval { - lower: - IntervalBound { - value: ScalarValue::$SCALAR(Some(result_lower)), - .. - }, - upper: - IntervalBound { - value: ScalarValue::$SCALAR(Some(result_upper)), - .. - }, - }, - Interval { - lower: - IntervalBound { - value: ScalarValue::$SCALAR(Some(without_fe_lower)), - .. - }, - upper: - IntervalBound { - value: ScalarValue::$SCALAR(Some(without_fe_upper)), - .. - }, - }, - ) => { - assert!( - (!expect_low || result_lower < without_fe_lower) - && (!expect_high || result_upper > without_fe_upper) - ); - } - _ => unreachable!(), - } + assert!( + (!expect_low || result.lower.value < without_fe.lower.value) + && (!expect_high || result.upper.value > without_fe.upper.value) + ); } }; } - capture_mode_change!(capture_mode_change_f32, f32, Float32); - capture_mode_change!(capture_mode_change_f64, f64, Float64); + capture_mode_change!(f32); + capture_mode_change!(f64); #[cfg(all( any(target_arch = "x86_64", target_arch = "aarch64"), diff --git a/datafusion/physical-expr/src/intervals/rounding.rs b/datafusion/physical-expr/src/intervals/rounding.rs index 685c4a13632b..06c4f9e8a957 100644 --- a/datafusion/physical-expr/src/intervals/rounding.rs +++ b/datafusion/physical-expr/src/intervals/rounding.rs @@ -125,9 +125,8 @@ impl FloatBits for f32 { impl FloatBits for f64 { type Item = u64; - const TINY_BITS: u64 = 0x1; - const NEG_TINY_BITS: u64 = 0x8000_0000_0000_0001; - // Smallest positive f64. + const TINY_BITS: u64 = 0x1; // Smallest positive f64. + const NEG_TINY_BITS: u64 = 0x8000_0000_0000_0001; // Smallest (in magnitude) negative f64. const CLEAR_SIGN_MASK: u64 = 0x7fff_ffff_ffff_ffff; const ONE: Self::Item = 1; const ZERO: Self::Item = 0; @@ -227,36 +226,36 @@ pub fn next_down(float: F) -> F { fn alter_fp_rounding_mode_conservative( lhs: &ScalarValue, rhs: &ScalarValue, - cls: F, + operation: F, ) -> Result where F: FnOnce(&ScalarValue, &ScalarValue) -> Result, { - let mut res = cls(lhs, rhs)?; - match &mut res { - ScalarValue::Float64(Some(val)) => { + let mut result = operation(lhs, rhs)?; + match &mut result { + ScalarValue::Float64(Some(value)) => { if UPPER { - *val = next_up(*val) + *value = next_up(*value) } else { - *val = next_down(*val) + *value = next_down(*value) } } - ScalarValue::Float32(Some(val)) => { + ScalarValue::Float32(Some(value)) => { if UPPER { - *val = next_up(*val) + *value = next_up(*value) } else { - *val = next_down(*val) + *value = next_down(*value) } } _ => {} }; - Ok(res) + Ok(result) } pub fn alter_fp_rounding_mode( lhs: &ScalarValue, rhs: &ScalarValue, - cls: F, + operation: F, ) -> Result where F: FnOnce(&ScalarValue, &ScalarValue) -> Result, @@ -268,7 +267,7 @@ where unsafe { let current = fegetround(); fesetround(if UPPER { FE_UPWARD } else { FE_DOWNWARD }); - let result = cls(lhs, rhs); + let result = operation(lhs, rhs); fesetround(current); result } @@ -276,7 +275,7 @@ where not(any(target_arch = "x86_64", target_arch = "aarch64")), target_os = "windows" ))] - alter_fp_rounding_mode_conservative::(lhs, rhs, cls) + alter_fp_rounding_mode_conservative::(lhs, rhs, operation) } #[cfg(test)] From 7c33018c5543714c9543b1a18b8bc7c47feaee73 Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Mon, 17 Apr 2023 21:22:05 -0500 Subject: [PATCH 14/15] Simplify cp_solver tests --- .../joins/symmetric_hash_join.rs | 6 +- .../physical-expr/src/intervals/cp_solver.rs | 242 +++++++----------- 2 files changed, 94 insertions(+), 154 deletions(-) diff --git a/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs b/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs index 513d3ebbc5b8..670a25837a22 100644 --- a/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs +++ b/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs @@ -1883,9 +1883,9 @@ mod tests { type Item = f64; fn next(&mut self) -> Option { - let new_value = self.rng.gen_range(self.prev..self.max); - self.prev = new_value; - Some(new_value) + let value = self.rng.gen_range(self.prev..self.max); + self.prev = value; + Some(value) } } diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs index 0bbce1e6b5fe..ec0ec45e3e40 100644 --- a/datafusion/physical-expr/src/intervals/cp_solver.rs +++ b/datafusion/physical-expr/src/intervals/cp_solver.rs @@ -605,87 +605,46 @@ mod tests { } macro_rules! generate_cases { - ($func_name:ident, $type:ty, $SCALAR:ident) => { - fn $func_name( + ($FUNC_NAME:ident, $TYPE:ty, $SCALAR:ident) => { + fn $FUNC_NAME( expr: Arc, left_col: Arc, right_col: Arc, seed: u64, - expr_left: $type, - expr_right: $type, + expr_left: $TYPE, + expr_right: $TYPE, ) -> Result<()> { let mut r = StdRng::seed_from_u64(seed); - let (left_interval, right_interval, left_waited, right_waited) = if ASC { - let left = (Some(r.gen_range((0 as $type)..(1000 as $type))), None); - let right = (Some(r.gen_range((0 as $type)..(1000 as $type))), None); + let (left_given, right_given, left_expected, right_expected) = if ASC { + let left = r.gen_range((0 as $TYPE)..(1000 as $TYPE)); + let right = r.gen_range((0 as $TYPE)..(1000 as $TYPE)); ( - left, - right, - ( - Some(<$type>::max( - left.0.unwrap(), - right.0.unwrap() + expr_left, - )), - None, - ), - ( - Some(<$type>::max( - right.0.unwrap(), - left.0.unwrap() + expr_right, - )), - None, - ), + (Some(left), None), + (Some(right), None), + (Some(<$TYPE>::max(left, right + expr_left)), None), + (Some(<$TYPE>::max(right, left + expr_right)), None), ) } else { - let left = (None, Some(r.gen_range((0 as $type)..(1000 as $type)))); - let right = (None, Some(r.gen_range((0 as $type)..(1000 as $type)))); + let left = r.gen_range((0 as $TYPE)..(1000 as $TYPE)); + let right = r.gen_range((0 as $TYPE)..(1000 as $TYPE)); ( - left, - right, - ( - None, - Some(<$type>::min( - left.1.unwrap(), - right.1.unwrap() + expr_left, - )), - ), - ( - None, - Some(<$type>::min( - right.1.unwrap(), - left.1.unwrap() + expr_right, - )), - ), + (None, Some(left)), + (None, Some(right)), + (None, Some(<$TYPE>::min(left, right + expr_left))), + (None, Some(<$TYPE>::min(right, left + expr_right))), ) }; experiment( expr, (left_col, right_col), - Interval::make( - left_interval.0.map(|value| value as $type), - left_interval.1.map(|value| value as $type), - (true, true), - ), - Interval::make( - right_interval.0.map(|value| value as $type), - right_interval.1.map(|value| value as $type), - (true, true), - ), - Interval::make( - left_waited.0.map(|value| value as $type), - left_waited.1.map(|value| value as $type), - (true, true), - ), - Interval::make( - right_waited.0.map(|value| value as $type), - right_waited.1.map(|value| value as $type), - (true, true), - ), + Interval::make(left_given.0, left_given.1, (true, true)), + Interval::make(right_given.0, right_given.1, (true, true)), + Interval::make(left_expected.0, left_expected.1, (true, true)), + Interval::make(right_expected.0, right_expected.1, (true, true)), PropagationResult::Success, - )?; - Ok(()) + ) } }; } @@ -698,8 +657,8 @@ mod tests { fn testing_not_possible() -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark > right_watermark + 5 + // left_watermark > right_watermark + 5 let left_and_1 = Arc::new(BinaryExpr::new( left_col.clone(), Operator::Plus, @@ -709,32 +668,19 @@ mod tests { experiment( expr, (left_col, right_col), - Interval { - lower: IntervalBound::new(ScalarValue::Int32(Some(10)), true), - upper: IntervalBound::new(ScalarValue::Int32(Some(20)), true), - }, - Interval { - lower: IntervalBound::new(ScalarValue::Int32(Some(100)), true), - upper: IntervalBound::new(ScalarValue::Int32(None), true), - }, - Interval { - lower: IntervalBound::new(ScalarValue::Int32(Some(10)), true), - upper: IntervalBound::new(ScalarValue::Int32(Some(20)), true), - }, - Interval { - lower: IntervalBound::new(ScalarValue::Int32(Some(100)), true), - upper: IntervalBound::new(ScalarValue::Int32(None), true), - }, + Interval::make(Some(10), Some(20), (true, true)), + Interval::make(Some(100), None, (true, true)), + Interval::make(Some(10), Some(20), (true, true)), + Interval::make(Some(100), None, (true, true)), PropagationResult::Infeasible, - )?; - Ok(()) + ) } macro_rules! integer_float_case_1 { - ($test_func:ident, $generate_case_func:ident, $type:ty, $SCALAR:ident) => { + ($TEST_FUNC_NAME:ident, $GENERATE_CASE_FUNC_NAME:ident, $TYPE:ty, $SCALAR:ident) => { #[rstest] #[test] - fn $test_func( + fn $TEST_FUNC_NAME( #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] seed: u64, #[values(Operator::Gt, Operator::GtEq)] greater_op: Operator, @@ -742,8 +688,8 @@ mod tests { ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark + 1 > right_watermark + 11 AND left_watermark + 3 < right_watermark + 33 + // left_watermark + 1 > right_watermark + 11 AND left_watermark + 3 < right_watermark + 33 let expr = gen_conjunctive_numerical_expr( left_col.clone(), right_col.clone(), @@ -753,16 +699,16 @@ mod tests { Operator::Plus, Operator::Plus, ), - ScalarValue::$SCALAR(Some(1 as $type)), - ScalarValue::$SCALAR(Some(11 as $type)), - ScalarValue::$SCALAR(Some(3 as $type)), - ScalarValue::$SCALAR(Some(33 as $type)), + ScalarValue::$SCALAR(Some(1 as $TYPE)), + ScalarValue::$SCALAR(Some(11 as $TYPE)), + ScalarValue::$SCALAR(Some(3 as $TYPE)), + ScalarValue::$SCALAR(Some(33 as $TYPE)), (greater_op, less_op), ); // l > r + 10 AND r > l - 30 - let l_gt_r = 10 as $type; - let r_gt_l = -30 as $type; - $generate_case_func::( + let l_gt_r = 10 as $TYPE; + let r_gt_l = -30 as $TYPE; + $GENERATE_CASE_FUNC_NAME::( expr.clone(), left_col.clone(), right_col.clone(), @@ -774,10 +720,9 @@ mod tests { // r < l - 10 AND l < r + 30 let r_lt_l = -l_gt_r; let l_lt_r = -r_gt_l; - $generate_case_func::( + $GENERATE_CASE_FUNC_NAME::( expr, left_col, right_col, seed, l_lt_r, r_lt_l, - )?; - Ok(()) + ) } }; } @@ -788,10 +733,10 @@ mod tests { integer_float_case_1!(case_1_f32, generate_case_f32, f32, Float32); macro_rules! integer_float_case_2 { - ($test_func:ident, $generate_case_func:ident, $type:ty, $SCALAR:ident) => { + ($TEST_FUNC_NAME:ident, $GENERATE_CASE_FUNC_NAME:ident, $TYPE:ty, $SCALAR:ident) => { #[rstest] #[test] - fn $test_func( + fn $TEST_FUNC_NAME( #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] seed: u64, #[values(Operator::Gt, Operator::GtEq)] greater_op: Operator, @@ -799,8 +744,8 @@ mod tests { ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark - 1 > right_watermark + 5 AND left_watermark + 3 < right_watermark + 10 + // left_watermark - 1 > right_watermark + 5 AND left_watermark + 3 < right_watermark + 10 let expr = gen_conjunctive_numerical_expr( left_col.clone(), right_col.clone(), @@ -810,16 +755,16 @@ mod tests { Operator::Plus, Operator::Plus, ), - ScalarValue::$SCALAR(Some(1 as $type)), - ScalarValue::$SCALAR(Some(5 as $type)), - ScalarValue::$SCALAR(Some(3 as $type)), - ScalarValue::$SCALAR(Some(10 as $type)), + ScalarValue::$SCALAR(Some(1 as $TYPE)), + ScalarValue::$SCALAR(Some(5 as $TYPE)), + ScalarValue::$SCALAR(Some(3 as $TYPE)), + ScalarValue::$SCALAR(Some(10 as $TYPE)), (greater_op, less_op), ); // l > r + 6 AND r > l - 7 - let l_gt_r = 6 as $type; - let r_gt_l = -7 as $type; - $generate_case_func::( + let l_gt_r = 6 as $TYPE; + let r_gt_l = -7 as $TYPE; + $GENERATE_CASE_FUNC_NAME::( expr.clone(), left_col.clone(), right_col.clone(), @@ -831,10 +776,9 @@ mod tests { // r < l - 6 AND l < r + 7 let r_lt_l = -l_gt_r; let l_lt_r = -r_gt_l; - $generate_case_func::( + $GENERATE_CASE_FUNC_NAME::( expr, left_col, right_col, seed, l_lt_r, r_lt_l, - )?; - Ok(()) + ) } }; } @@ -845,10 +789,10 @@ mod tests { integer_float_case_2!(case_2_f32, generate_case_f32, f32, Float32); macro_rules! integer_float_case_3 { - ($test_func:ident, $generate_case_func:ident, $type:ty, $SCALAR:ident) => { + ($TEST_FUNC_NAME:ident, $GENERATE_CASE_FUNC_NAME:ident, $TYPE:ty, $SCALAR:ident) => { #[rstest] #[test] - fn $test_func( + fn $TEST_FUNC_NAME( #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] seed: u64, #[values(Operator::Gt, Operator::GtEq)] greater_op: Operator, @@ -856,8 +800,8 @@ mod tests { ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark - 1 > right_watermark + 5 AND left_watermark - 3 < right_watermark + 10 + // left_watermark - 1 > right_watermark + 5 AND left_watermark - 3 < right_watermark + 10 let expr = gen_conjunctive_numerical_expr( left_col.clone(), right_col.clone(), @@ -867,16 +811,16 @@ mod tests { Operator::Minus, Operator::Plus, ), - ScalarValue::$SCALAR(Some(1 as $type)), - ScalarValue::$SCALAR(Some(5 as $type)), - ScalarValue::$SCALAR(Some(3 as $type)), - ScalarValue::$SCALAR(Some(10 as $type)), + ScalarValue::$SCALAR(Some(1 as $TYPE)), + ScalarValue::$SCALAR(Some(5 as $TYPE)), + ScalarValue::$SCALAR(Some(3 as $TYPE)), + ScalarValue::$SCALAR(Some(10 as $TYPE)), (greater_op, less_op), ); // l > r + 6 AND r > l - 13 - let l_gt_r = 6 as $type; - let r_gt_l = -13 as $type; - $generate_case_func::( + let l_gt_r = 6 as $TYPE; + let r_gt_l = -13 as $TYPE; + $GENERATE_CASE_FUNC_NAME::( expr.clone(), left_col.clone(), right_col.clone(), @@ -888,10 +832,9 @@ mod tests { // r < l - 6 AND l < r + 13 let r_lt_l = -l_gt_r; let l_lt_r = -r_gt_l; - $generate_case_func::( + $GENERATE_CASE_FUNC_NAME::( expr, left_col, right_col, seed, l_lt_r, r_lt_l, - )?; - Ok(()) + ) } }; } @@ -902,10 +845,10 @@ mod tests { integer_float_case_3!(case_3_f32, generate_case_f32, f32, Float32); macro_rules! integer_float_case_4 { - ($test_func:ident, $generate_case_func:ident, $type:ty, $SCALAR:ident) => { + ($TEST_FUNC_NAME:ident, $GENERATE_CASE_FUNC_NAME:ident, $TYPE:ty, $SCALAR:ident) => { #[rstest] #[test] - fn $test_func( + fn $TEST_FUNC_NAME( #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] seed: u64, #[values(Operator::Gt, Operator::GtEq)] greater_op: Operator, @@ -913,8 +856,8 @@ mod tests { ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark - 10 > right_watermark - 5 AND left_watermark - 30 < right_watermark - 3 + // left_watermark - 10 > right_watermark - 5 AND left_watermark - 30 < right_watermark - 3 let expr = gen_conjunctive_numerical_expr( left_col.clone(), right_col.clone(), @@ -924,16 +867,16 @@ mod tests { Operator::Minus, Operator::Plus, ), - ScalarValue::$SCALAR(Some(10 as $type)), - ScalarValue::$SCALAR(Some(5 as $type)), - ScalarValue::$SCALAR(Some(3 as $type)), - ScalarValue::$SCALAR(Some(10 as $type)), + ScalarValue::$SCALAR(Some(10 as $TYPE)), + ScalarValue::$SCALAR(Some(5 as $TYPE)), + ScalarValue::$SCALAR(Some(3 as $TYPE)), + ScalarValue::$SCALAR(Some(10 as $TYPE)), (greater_op, less_op), ); // l > r + 5 AND r > l - 13 - let l_gt_r = 5 as $type; - let r_gt_l = -13 as $type; - $generate_case_func::( + let l_gt_r = 5 as $TYPE; + let r_gt_l = -13 as $TYPE; + $GENERATE_CASE_FUNC_NAME::( expr.clone(), left_col.clone(), right_col.clone(), @@ -945,10 +888,9 @@ mod tests { // r < l - 5 AND l < r + 13 let r_lt_l = -l_gt_r; let l_lt_r = -r_gt_l; - $generate_case_func::( + $GENERATE_CASE_FUNC_NAME::( expr, left_col, right_col, seed, l_lt_r, r_lt_l, - )?; - Ok(()) + ) } }; } @@ -959,10 +901,10 @@ mod tests { integer_float_case_4!(case_4_f32, generate_case_f32, f32, Float32); macro_rules! integer_float_case_5 { - ($test_func:ident, $generate_case_func:ident, $type:ty, $SCALAR:ident) => { + ($TEST_FUNC_NAME:ident, $GENERATE_CASE_FUNC_NAME:ident, $TYPE:ty, $SCALAR:ident) => { #[rstest] #[test] - fn $test_func( + fn $TEST_FUNC_NAME( #[values(0, 1, 2, 3, 4, 12, 32, 314, 3124, 123, 125, 211, 215, 4123)] seed: u64, #[values(Operator::Gt, Operator::GtEq)] greater_op: Operator, @@ -970,8 +912,8 @@ mod tests { ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark - 10 > right_watermark - 5 AND left_watermark - 30 < right_watermark - 3 + // left_watermark - 10 > right_watermark - 5 AND left_watermark - 30 < right_watermark - 3 let expr = gen_conjunctive_numerical_expr( left_col.clone(), right_col.clone(), @@ -981,16 +923,16 @@ mod tests { Operator::Minus, Operator::Minus, ), - ScalarValue::$SCALAR(Some(10 as $type)), - ScalarValue::$SCALAR(Some(5 as $type)), - ScalarValue::$SCALAR(Some(30 as $type)), - ScalarValue::$SCALAR(Some(3 as $type)), + ScalarValue::$SCALAR(Some(10 as $TYPE)), + ScalarValue::$SCALAR(Some(5 as $TYPE)), + ScalarValue::$SCALAR(Some(30 as $TYPE)), + ScalarValue::$SCALAR(Some(3 as $TYPE)), (greater_op, less_op), ); // l > r + 5 AND r > l - 27 - let l_gt_r = 5 as $type; - let r_gt_l = -27 as $type; - $generate_case_func::( + let l_gt_r = 5 as $TYPE; + let r_gt_l = -27 as $TYPE; + $GENERATE_CASE_FUNC_NAME::( expr.clone(), left_col.clone(), right_col.clone(), @@ -1002,10 +944,9 @@ mod tests { // r < l - 5 AND l < r + 27 let r_lt_l = -l_gt_r; let l_lt_r = -r_gt_l; - $generate_case_func::( + $GENERATE_CASE_FUNC_NAME::( expr, left_col, right_col, seed, l_lt_r, r_lt_l, - )?; - Ok(()) + ) } }; } @@ -1024,8 +965,8 @@ mod tests { ) -> Result<()> { let left_col = Arc::new(Column::new("left_watermark", 0)); let right_col = Arc::new(Column::new("right_watermark", 0)); - // left_watermark - 1 >= right_watermark + 5 AND left_watermark - 10 <= right_watermark + 3 + // left_watermark - 1 >= right_watermark + 5 AND left_watermark - 10 <= right_watermark + 3 let expr = gen_conjunctive_numerical_expr( left_col.clone(), right_col.clone(), @@ -1045,9 +986,7 @@ mod tests { let l_gt_r = 6; let r_gt_l = -13; - generate_case_i32::(expr, left_col, right_col, seed, l_gt_r, r_gt_l)?; - - Ok(()) + generate_case_i32::(expr, left_col, right_col, seed, l_gt_r, r_gt_l) } #[test] @@ -1089,6 +1028,7 @@ mod tests { assert_eq!(prev_node_count, final_node_count); Ok(()) } + #[test] fn test_gather_node_indices_remove() -> Result<()> { // Expression: a@0 + b@1 + 1 > y@0 - z@1, given a@0 + b@1. From 5c22af672d6e14c47b0c8eaebb4f4ddcf0d5bc70 Mon Sep 17 00:00:00 2001 From: metesynnada <100111937+metesynnada@users.noreply.github.com> Date: Tue, 18 Apr 2023 09:52:30 +0300 Subject: [PATCH 15/15] Latests improvements on code --- .../physical-expr/src/intervals/cp_solver.rs | 35 ++----------------- 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs index ec0ec45e3e40..3a682049a08f 100644 --- a/datafusion/physical-expr/src/intervals/cp_solver.rs +++ b/datafusion/physical-expr/src/intervals/cp_solver.rs @@ -597,6 +597,8 @@ mod tests { assert_eq!(exp_result, result); col_stat_nodes.iter().zip(expected_nodes.iter()).for_each( |((_, calculated_interval_node), (_, expected))| { + // NOTE: These randomized tests only check for conservative containment, + // not openness/closedness of endpoints. assert!(calculated_interval_node.lower.value <= expected.lower.value); assert!(calculated_interval_node.upper.value >= expected.upper.value); }, @@ -956,39 +958,6 @@ mod tests { integer_float_case_5!(case_5_f64, generate_case_f64, f64, Float64); integer_float_case_5!(case_5_f32, generate_case_f32, f32, Float32); - #[rstest] - #[test] - fn case_6( - #[values(0, 1, 2, 123, 756, 63, 345, 6443, 12341, 142, 123, 8900)] seed: u64, - #[values(Operator::Gt, Operator::GtEq)] greater_op: Operator, - #[values(Operator::Lt, Operator::LtEq)] less_op: Operator, - ) -> Result<()> { - let left_col = Arc::new(Column::new("left_watermark", 0)); - let right_col = Arc::new(Column::new("right_watermark", 0)); - - // left_watermark - 1 >= right_watermark + 5 AND left_watermark - 10 <= right_watermark + 3 - let expr = gen_conjunctive_numerical_expr( - left_col.clone(), - right_col.clone(), - ( - Operator::Minus, - Operator::Plus, - Operator::Minus, - Operator::Plus, - ), - ScalarValue::Int32(Some(1)), - ScalarValue::Int32(Some(5)), - ScalarValue::Int32(Some(10)), - ScalarValue::Int32(Some(3)), - (greater_op, less_op), - ); - // l >= r + 6 AND r >= l - 13 - let l_gt_r = 6; - let r_gt_l = -13; - - generate_case_i32::(expr, left_col, right_col, seed, l_gt_r, r_gt_l) - } - #[test] fn test_gather_node_indices_dont_remove() -> Result<()> { // Expression: a@0 + b@1 + 1 > a@0 - b@1, given a@0 + b@1.