From a3839925900e407c94c09350b8ad33659d2f7bb2 Mon Sep 17 00:00:00 2001 From: Ddupg Date: Fri, 22 Aug 2025 19:04:02 +0800 Subject: [PATCH] feat: support GEO type Change-Id: Ie3f0bbee0f4dd89ff42d2e7eaca6597887b2c252 --- Cargo.lock | 422 +++++++++++++++++++++++-- Cargo.toml | 8 +- deny.toml | 1 + java/lance-jni/Cargo.lock | 515 +++++++++++++++++++++++++++++-- python/Cargo.lock | 393 ++++++++++++++++++++++- python/pyproject.toml | 4 + python/python/tests/test_geo.py | 106 +++++++ rust/lance-datafusion/Cargo.toml | 1 + rust/lance-datafusion/src/udf.rs | 2 + rust/lance-geo/Cargo.toml | 22 ++ rust/lance-geo/src/lib.rs | 8 + rust/lance-geo/src/udf.rs | 20 ++ rust/lance/Cargo.toml | 4 + rust/lance/src/dataset.rs | 142 +++++++++ 14 files changed, 1596 insertions(+), 52 deletions(-) create mode 100644 python/python/tests/test_geo.py create mode 100644 rust/lance-geo/Cargo.toml create mode 100644 rust/lance-geo/src/lib.rs create mode 100644 rust/lance-geo/src/udf.rs diff --git a/Cargo.lock b/Cargo.lock index 58bd1500565..13f8ae5e0c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -432,15 +432,19 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.34" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e86f6d3dc9dc4352edeea6b8e499e13e3f5dc3b964d7ca5fd411415a3498473" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" dependencies = [ - "compression-codecs", - "compression-core", + "bzip2 0.5.2", + "flate2", "futures-core", + "memchr", "pin-project-lite", "tokio", + "xz2", + "zstd", + "zstd-safe", ] [[package]] @@ -1280,6 +1284,34 @@ dependencies = [ "either", ] +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + +[[package]] +name = "bzip2" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" +dependencies = [ + "libbz2-rs-sys", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "cast" version = "0.3.0" @@ -1461,23 +1493,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "compression-codecs" -version = "0.4.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302266479cb963552d11bd042013a58ef1adc56768016c8b82b4199488f2d4ad" -dependencies = [ - "compression-core", - "flate2", - "memchr", -] - -[[package]] -name = "compression-core" -version = "0.4.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" - [[package]] name = "concurrent-queue" version = "2.5.0" @@ -1907,6 +1922,7 @@ dependencies = [ "arrow-schema", "async-trait", "bytes", + "bzip2 0.6.1", "chrono", "datafusion-catalog", "datafusion-catalog-listing", @@ -1932,6 +1948,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "datafusion-sql", + "flate2", "futures", "itertools 0.14.0", "log", @@ -1945,6 +1962,8 @@ dependencies = [ "tokio", "url", "uuid", + "xz2", + "zstd", ] [[package]] @@ -2015,6 +2034,7 @@ dependencies = [ "object_store", "parquet", "paste", + "recursive", "sqlparser", "tokio", "web-time", @@ -2038,8 +2058,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" dependencies = [ "arrow", + "async-compression", "async-trait", "bytes", + "bzip2 0.6.1", "chrono", "datafusion-common", "datafusion-common-runtime", @@ -2050,6 +2072,7 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", + "flate2", "futures", "glob", "itertools 0.14.0", @@ -2059,7 +2082,10 @@ dependencies = [ "rand 0.9.2", "tempfile", "tokio", + "tokio-util", "url", + "xz2", + "zstd", ] [[package]] @@ -2188,6 +2214,7 @@ dependencies = [ "datafusion-physical-expr-common", "indexmap", "paste", + "recursive", "serde_json", "sqlparser", ] @@ -2360,6 +2387,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "log", + "recursive", "regex", "regex-syntax", ] @@ -2433,6 +2461,7 @@ dependencies = [ "datafusion-pruning", "itertools 0.14.0", "log", + "recursive", ] [[package]] @@ -2520,6 +2549,7 @@ dependencies = [ "datafusion-expr", "indexmap", "log", + "recursive", "regex", "sqlparser", ] @@ -2788,6 +2818,16 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "earcutr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01" +dependencies = [ + "itertools 0.11.0", + "num-traits", +] + [[package]] name = "ecdsa" version = "0.14.8" @@ -3094,6 +3134,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float_next_after" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" + [[package]] name = "fnv" version = "1.0.7" @@ -3304,6 +3350,128 @@ dependencies = [ "version_check", ] +[[package]] +name = "geo" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fc1a1678e54befc9b4bcab6cd43b8e7f834ae8ea121118b0fd8c42747675b4a" +dependencies = [ + "earcutr", + "float_next_after", + "geo-types", + "geographiclib-rs", + "i_overlay", + "log", + "num-traits", + "robust", + "rstar", + "spade", +] + +[[package]] +name = "geo-traits" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7c353d12a704ccfab1ba8bfb1a7fe6cb18b665bf89d37f4f7890edcd260206" +dependencies = [ + "geo-types", +] + +[[package]] +name = "geo-types" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a4dcd69d35b2c87a7c83bce9af69fd65c9d68d3833a0ded568983928f3fc99" +dependencies = [ + "approx", + "num-traits", + "rayon", + "rstar", + "serde", +] + +[[package]] +name = "geoarrow-array" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d1884b17253d8572e88833c282fcbb442365e4ae5f9052ced2831608253436c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-schema", + "geo-traits", + "geoarrow-schema", + "num-traits", + "wkb", + "wkt", +] + +[[package]] +name = "geoarrow-expr-geo" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a67d3b543bc3ebeffdc204b67d69b8f9fcd33d76269ddd4a4618df99f053a934" +dependencies = [ + "arrow-array", + "arrow-buffer", + "geo", + "geo-traits", + "geoarrow-array", + "geoarrow-schema", +] + +[[package]] +name = "geoarrow-schema" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02f1b18b1c9a44ecd72be02e53d6e63bbccfdc8d1765206226af227327e2be6e" +dependencies = [ + "arrow-schema", + "geo-traits", + "serde", + "serde_json", + "thiserror 1.0.69", +] + +[[package]] +name = "geodatafusion" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83d676b8d8b5f391ab4270ba31e9b599ee2c3d780405a38e272a0a7565ea189c" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-schema", + "datafusion", + "geo", + "geo-traits", + "geoarrow-array", + "geoarrow-expr-geo", + "geoarrow-schema", + "geohash", + "thiserror 1.0.69", + "wkt", +] + +[[package]] +name = "geographiclib-rs" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f611040a2bb37eaa29a78a128d1e92a378a03e0b6e66ae27398d42b1ba9a7841" +dependencies = [ + "libm", +] + +[[package]] +name = "geohash" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fb94b1a65401d6cbf22958a9040aa364812c26674f841bee538b12c135db1e6" +dependencies = [ + "geo-types", + "libm", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -3416,6 +3584,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "hash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47d60b12902ba28e2730cd37e95b8c9223af2808df9e902d4df49588d1470606" +dependencies = [ + "byteorder", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -3448,6 +3625,16 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "heapless" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" +dependencies = [ + "hash32", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.5.0" @@ -3719,6 +3906,49 @@ dependencies = [ "serde", ] +[[package]] +name = "i_float" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "010025c2c532c8d82e42d0b8bb5184afa449fa6f06c709ea9adcb16c49ae405b" +dependencies = [ + "libm", +] + +[[package]] +name = "i_key_sort" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9190f86706ca38ac8add223b2aed8b1330002b5cdbbce28fb58b10914d38fc27" + +[[package]] +name = "i_overlay" +version = "4.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fcccbd4e4274e0f80697f5fbc6540fdac533cce02f2081b328e68629cce24f9" +dependencies = [ + "i_float", + "i_key_sort", + "i_shape", + "i_tree", + "rayon", +] + +[[package]] +name = "i_shape" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ea154b742f7d43dae2897fcd5ead86bc7b5eefcedd305a7ebf9f69d44d61082" +dependencies = [ + "i_float", +] + +[[package]] +name = "i_tree" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e6d558e6d4c7b82bc51d9c771e7a927862a161a7d87bf2b0541450e0e20915" + [[package]] name = "iana-time-zone" version = "0.1.64" @@ -4195,6 +4425,9 @@ dependencies = [ "either", "env_logger", "futures", + "geo-types", + "geoarrow-array", + "geoarrow-schema", "half", "humantime", "itertools 0.13.0", @@ -4204,6 +4437,7 @@ dependencies = [ "lance-datagen", "lance-encoding", "lance-file", + "lance-geo", "lance-index", "lance-io", "lance-linalg", @@ -4330,6 +4564,7 @@ dependencies = [ "lance-arrow", "lance-core", "lance-datagen", + "lance-geo", "log", "pin-project", "prost", @@ -4472,6 +4707,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "lance-geo" +version = "1.0.0-beta.12" +dependencies = [ + "datafusion", + "geo-types", + "geoarrow-array", + "geoarrow-schema", + "geodatafusion", +] + [[package]] name = "lance-index" version = "1.0.0-beta.12" @@ -4843,6 +5089,12 @@ dependencies = [ "lexical-util", ] +[[package]] +name = "libbz2-rs-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" + [[package]] name = "libc" version = "0.2.177" @@ -5535,6 +5787,28 @@ dependencies = [ "libc", ] +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "number_prefix" version = "0.4.0" @@ -6367,6 +6641,15 @@ dependencies = [ "cmake", ] +[[package]] +name = "psm" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e66fcd288453b748497d8fb18bccc83a16b0518e3906d4b8df0a8d42d93dbb1c" +dependencies = [ + "cc", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -6631,6 +6914,26 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.111", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -6845,6 +7148,12 @@ dependencies = [ "byteorder", ] +[[package]] +name = "robust" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e27ee8bb91ca0adcf0ecb116293afa12d393f9c2b9b9cd54d33e8078fe19839" + [[package]] name = "rsa" version = "0.9.9" @@ -6866,6 +7175,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rstar" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "421400d13ccfd26dfa5858199c30a5d76f9c54e0dba7575273025b43c5175dbb" +dependencies = [ + "heapless", + "num-traits", + "smallvec", +] + [[package]] name = "rstest" version = "0.23.0" @@ -7536,6 +7856,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "spade" +version = "2.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb313e1c8afee5b5647e00ee0fe6855e3d529eb863a0fdae1d60006c4d1e9990" +dependencies = [ + "hashbrown 0.15.5", + "num-traits", + "robust", + "smallvec", +] + [[package]] name = "spin" version = "0.9.8" @@ -7590,6 +7922,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" dependencies = [ "log", + "recursive", "sqlparser_derive", ] @@ -7610,6 +7943,19 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "stacker" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "std_prelude" version = "0.2.12" @@ -9300,6 +9646,31 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "wkb" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a120b336c7ad17749026d50427c23d838ecb50cd64aaea6254b5030152f890a9" +dependencies = [ + "byteorder", + "geo-traits", + "num_enum", + "thiserror 1.0.69", +] + +[[package]] +name = "wkt" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efb2b923ccc882312e559ffaa832a055ba9d1ac0cc8e86b3e25453247e4b81d7" +dependencies = [ + "geo-traits", + "geo-types", + "log", + "num-traits", + "thiserror 1.0.69", +] + [[package]] name = "writeable" version = "0.6.2" @@ -9337,6 +9708,15 @@ version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yada" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index 790bf1d477f..71e7add5367 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "rust/lance-datagen", "rust/lance-encoding", "rust/lance-file", + "rust/lance-geo", "rust/lance-index", "rust/lance-io", "rust/lance-linalg", @@ -56,6 +57,7 @@ lance-datafusion = { version = "=1.0.0-beta.12", path = "./rust/lance-datafusion lance-datagen = { version = "=1.0.0-beta.12", path = "./rust/lance-datagen" } lance-encoding = { version = "=1.0.0-beta.12", path = "./rust/lance-encoding" } lance-file = { version = "=1.0.0-beta.12", path = "./rust/lance-file" } +lance-geo = { version = "=1.0.0-beta.12", path = "./rust/lance-geo" } lance-index = { version = "=1.0.0-beta.12", path = "./rust/lance-index" } lance-io = { version = "=1.0.0-beta.12", path = "./rust/lance-io", default-features = false } lance-linalg = { version = "=1.0.0-beta.12", path = "./rust/lance-linalg" } @@ -130,6 +132,10 @@ either = "1.0" fst = { version = "0.4.7", features = ["levenshtein"] } fsst = { version = "=1.0.0-beta.12", path = "./rust/compression/fsst" } futures = "0.3" +geoarrow-array = "0.6" +geoarrow-schema = "0.6" +geodatafusion = "0.1.1" +geo-types = "0.7.16" http = "1.1.0" humantime = "2.2.0" hyperloglogplus = { version = "0.4.1", features = ["const-loop"] } @@ -179,7 +185,7 @@ tokio = { version = "1.23", features = [ "sync", ] } tokio-stream = "0.1.14" -tokio-util = { version = "0.7.10" } +tokio-util = { version = "0.7.16" } tower = "0.5" tower-http = "0.5" tracing = "0.1" diff --git a/deny.toml b/deny.toml index e192267b97b..948349e9f7c 100644 --- a/deny.toml +++ b/deny.toml @@ -113,6 +113,7 @@ allow = [ "Zlib", "CC0-1.0", "CDLA-Permissive-2.0", + "bzip2-1.0.6", ] # The confidence threshold for detecting a license from license text. # The higher the value, the more closely the license text must be to the diff --git a/java/lance-jni/Cargo.lock b/java/lance-jni/Cargo.lock index 526b29b6353..181b3ee2efb 100644 --- a/java/lance-jni/Cargo.lock +++ b/java/lance-jni/Cargo.lock @@ -128,6 +128,24 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ar_archive_writer" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +dependencies = [ + "object", +] + [[package]] name = "arc-swap" version = "1.7.1" @@ -377,15 +395,19 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.34" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e86f6d3dc9dc4352edeea6b8e499e13e3f5dc3b964d7ca5fd411415a3498473" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" dependencies = [ - "compression-codecs", - "compression-core", + "bzip2 0.5.2", + "flate2", "futures-core", + "memchr", "pin-project-lite", "tokio", + "xz2", + "zstd", + "zstd-safe", ] [[package]] @@ -1053,6 +1075,34 @@ dependencies = [ "either", ] +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + +[[package]] +name = "bzip2" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" +dependencies = [ + "libbz2-rs-sys", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "cbc" version = "0.1.2" @@ -1168,23 +1218,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "compression-codecs" -version = "0.4.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302266479cb963552d11bd042013a58ef1adc56768016c8b82b4199488f2d4ad" -dependencies = [ - "compression-core", - "flate2", - "memchr", -] - -[[package]] -name = "compression-core" -version = "0.4.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" - [[package]] name = "concurrent-queue" version = "2.5.0" @@ -1409,6 +1442,7 @@ dependencies = [ "arrow-schema", "async-trait", "bytes", + "bzip2 0.6.1", "chrono", "datafusion-catalog", "datafusion-catalog-listing", @@ -1434,6 +1468,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "datafusion-sql", + "flate2", "futures", "itertools 0.14.0", "log", @@ -1447,6 +1482,8 @@ dependencies = [ "tokio", "url", "uuid", + "xz2", + "zstd", ] [[package]] @@ -1517,6 +1554,7 @@ dependencies = [ "object_store", "parquet", "paste", + "recursive", "sqlparser", "tokio", "web-time", @@ -1540,8 +1578,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" dependencies = [ "arrow", + "async-compression", "async-trait", "bytes", + "bzip2 0.6.1", "chrono", "datafusion-common", "datafusion-common-runtime", @@ -1552,6 +1592,7 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", + "flate2", "futures", "glob", "itertools 0.14.0", @@ -1561,7 +1602,10 @@ dependencies = [ "rand 0.9.2", "tempfile", "tokio", + "tokio-util", "url", + "xz2", + "zstd", ] [[package]] @@ -1690,6 +1734,7 @@ dependencies = [ "datafusion-physical-expr-common", "indexmap", "paste", + "recursive", "serde_json", "sqlparser", ] @@ -1862,6 +1907,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "log", + "recursive", "regex", "regex-syntax", ] @@ -1935,6 +1981,7 @@ dependencies = [ "datafusion-pruning", "itertools 0.14.0", "log", + "recursive", ] [[package]] @@ -2022,6 +2069,7 @@ dependencies = [ "datafusion-expr", "indexmap", "log", + "recursive", "regex", "sqlparser", ] @@ -2158,6 +2206,16 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "earcutr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01" +dependencies = [ + "itertools 0.11.0", + "num-traits", +] + [[package]] name = "either" version = "1.15.0" @@ -2290,6 +2348,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float_next_after" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" + [[package]] name = "fnv" version = "1.0.7" @@ -2469,6 +2533,128 @@ dependencies = [ "version_check", ] +[[package]] +name = "geo" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fc1a1678e54befc9b4bcab6cd43b8e7f834ae8ea121118b0fd8c42747675b4a" +dependencies = [ + "earcutr", + "float_next_after", + "geo-types", + "geographiclib-rs", + "i_overlay", + "log", + "num-traits", + "robust", + "rstar", + "spade", +] + +[[package]] +name = "geo-traits" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7c353d12a704ccfab1ba8bfb1a7fe6cb18b665bf89d37f4f7890edcd260206" +dependencies = [ + "geo-types", +] + +[[package]] +name = "geo-types" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a4dcd69d35b2c87a7c83bce9af69fd65c9d68d3833a0ded568983928f3fc99" +dependencies = [ + "approx", + "num-traits", + "rayon", + "rstar", + "serde", +] + +[[package]] +name = "geoarrow-array" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d1884b17253d8572e88833c282fcbb442365e4ae5f9052ced2831608253436c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-schema", + "geo-traits", + "geoarrow-schema", + "num-traits", + "wkb", + "wkt", +] + +[[package]] +name = "geoarrow-expr-geo" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a67d3b543bc3ebeffdc204b67d69b8f9fcd33d76269ddd4a4618df99f053a934" +dependencies = [ + "arrow-array", + "arrow-buffer", + "geo", + "geo-traits", + "geoarrow-array", + "geoarrow-schema", +] + +[[package]] +name = "geoarrow-schema" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02f1b18b1c9a44ecd72be02e53d6e63bbccfdc8d1765206226af227327e2be6e" +dependencies = [ + "arrow-schema", + "geo-traits", + "serde", + "serde_json", + "thiserror 1.0.69", +] + +[[package]] +name = "geodatafusion" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83d676b8d8b5f391ab4270ba31e9b599ee2c3d780405a38e272a0a7565ea189c" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-schema", + "datafusion", + "geo", + "geo-traits", + "geoarrow-array", + "geoarrow-expr-geo", + "geoarrow-schema", + "geohash", + "thiserror 1.0.69", + "wkt", +] + +[[package]] +name = "geographiclib-rs" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f611040a2bb37eaa29a78a128d1e92a378a03e0b6e66ae27398d42b1ba9a7841" +dependencies = [ + "libm", +] + +[[package]] +name = "geohash" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fb94b1a65401d6cbf22958a9040aa364812c26674f841bee538b12c135db1e6" +dependencies = [ + "geo-types", + "libm", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -2545,6 +2731,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "hash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47d60b12902ba28e2730cd37e95b8c9223af2808df9e902d4df49588d1470606" +dependencies = [ + "byteorder", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -2577,6 +2772,16 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "heapless" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" +dependencies = [ + "hash32", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.5.0" @@ -2766,6 +2971,49 @@ dependencies = [ "serde", ] +[[package]] +name = "i_float" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "010025c2c532c8d82e42d0b8bb5184afa449fa6f06c709ea9adcb16c49ae405b" +dependencies = [ + "libm", +] + +[[package]] +name = "i_key_sort" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9190f86706ca38ac8add223b2aed8b1330002b5cdbbce28fb58b10914d38fc27" + +[[package]] +name = "i_overlay" +version = "4.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fcccbd4e4274e0f80697f5fbc6540fdac533cce02f2081b328e68629cce24f9" +dependencies = [ + "i_float", + "i_key_sort", + "i_shape", + "i_tree", + "rayon", +] + +[[package]] +name = "i_shape" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ea154b742f7d43dae2897fcd5ead86bc7b5eefcedd305a7ebf9f69d44d61082" +dependencies = [ + "i_float", +] + +[[package]] +name = "i_tree" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e6d558e6d4c7b82bc51d9c771e7a927862a161a7d87bf2b0541450e0e20915" + [[package]] name = "iana-time-zone" version = "0.1.64" @@ -2946,6 +3194,15 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -3125,6 +3382,7 @@ dependencies = [ "lance-datafusion", "lance-encoding", "lance-file", + "lance-geo", "lance-index", "lance-io", "lance-linalg", @@ -3236,6 +3494,7 @@ dependencies = [ "lance-arrow", "lance-core", "lance-datagen", + "lance-geo", "log", "pin-project", "prost", @@ -3330,6 +3589,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "lance-geo" +version = "1.0.0-beta.12" +dependencies = [ + "datafusion", + "geo-types", + "geoarrow-array", + "geoarrow-schema", + "geodatafusion", +] + [[package]] name = "lance-index" version = "1.0.0-beta.12" @@ -3643,6 +3913,12 @@ dependencies = [ "lexical-util", ] +[[package]] +name = "libbz2-rs-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" + [[package]] name = "libc" version = "0.2.177" @@ -3763,6 +4039,17 @@ dependencies = [ "twox-hash", ] +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "matchers" version = "0.2.0" @@ -4056,6 +4343,37 @@ dependencies = [ "libc", ] +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.111", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + [[package]] name = "object_store" version = "0.12.4" @@ -4538,6 +4856,15 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.103" @@ -4599,6 +4926,16 @@ dependencies = [ "prost", ] +[[package]] +name = "psm" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" +dependencies = [ + "ar_archive_writer", + "cc", +] + [[package]] name = "quick-xml" version = "0.37.5" @@ -4828,6 +5165,26 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.111", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -4996,6 +5353,12 @@ dependencies = [ "byteorder", ] +[[package]] +name = "robust" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e27ee8bb91ca0adcf0ecb116293afa12d393f9c2b9b9cd54d33e8078fe19839" + [[package]] name = "rsa" version = "0.9.9" @@ -5017,6 +5380,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rstar" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "421400d13ccfd26dfa5858199c30a5d76f9c54e0dba7575273025b43c5175dbb" +dependencies = [ + "heapless", + "num-traits", + "smallvec", +] + [[package]] name = "rust-ini" version = "0.21.3" @@ -5527,6 +5901,18 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "spade" +version = "2.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb313e1c8afee5b5647e00ee0fe6855e3d529eb863a0fdae1d60006c4d1e9990" +dependencies = [ + "hashbrown 0.15.5", + "num-traits", + "robust", + "smallvec", +] + [[package]] name = "spin" version = "0.9.8" @@ -5550,6 +5936,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" dependencies = [ "log", + "recursive", "sqlparser_derive", ] @@ -5570,6 +5957,19 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "stacker" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "std_prelude" version = "0.2.12" @@ -6050,6 +6450,36 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml_datetime" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +dependencies = [ + "winnow", +] + [[package]] name = "tower" version = "0.5.2" @@ -6889,12 +7319,46 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "wkb" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a120b336c7ad17749026d50427c23d838ecb50cd64aaea6254b5030152f890a9" +dependencies = [ + "byteorder", + "geo-traits", + "num_enum", + "thiserror 1.0.69", +] + +[[package]] +name = "wkt" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efb2b923ccc882312e559ffaa832a055ba9d1ac0cc8e86b3e25453247e4b81d7" +dependencies = [ + "geo-traits", + "geo-types", + "log", + "num-traits", + "thiserror 1.0.69", +] + [[package]] name = "writeable" version = "0.6.2" @@ -6922,6 +7386,15 @@ version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yoke" version = "0.8.1" diff --git a/python/Cargo.lock b/python/Cargo.lock index ae6c111f300..a9f1a979fc1 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -182,6 +182,15 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "ar_archive_writer" version = "0.2.0" @@ -2499,6 +2508,16 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "earcutr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01" +dependencies = [ + "itertools 0.11.0", + "num-traits", +] + [[package]] name = "either" version = "1.15.0" @@ -2716,6 +2735,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float_next_after" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" + [[package]] name = "fnv" version = "1.0.7" @@ -2904,6 +2929,128 @@ dependencies = [ "version_check", ] +[[package]] +name = "geo" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fc1a1678e54befc9b4bcab6cd43b8e7f834ae8ea121118b0fd8c42747675b4a" +dependencies = [ + "earcutr", + "float_next_after", + "geo-types", + "geographiclib-rs", + "i_overlay", + "log", + "num-traits", + "robust", + "rstar", + "spade", +] + +[[package]] +name = "geo-traits" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7c353d12a704ccfab1ba8bfb1a7fe6cb18b665bf89d37f4f7890edcd260206" +dependencies = [ + "geo-types", +] + +[[package]] +name = "geo-types" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a4dcd69d35b2c87a7c83bce9af69fd65c9d68d3833a0ded568983928f3fc99" +dependencies = [ + "approx", + "num-traits", + "rayon", + "rstar", + "serde", +] + +[[package]] +name = "geoarrow-array" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d1884b17253d8572e88833c282fcbb442365e4ae5f9052ced2831608253436c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-schema", + "geo-traits", + "geoarrow-schema", + "num-traits", + "wkb", + "wkt", +] + +[[package]] +name = "geoarrow-expr-geo" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a67d3b543bc3ebeffdc204b67d69b8f9fcd33d76269ddd4a4618df99f053a934" +dependencies = [ + "arrow-array", + "arrow-buffer", + "geo", + "geo-traits", + "geoarrow-array", + "geoarrow-schema", +] + +[[package]] +name = "geoarrow-schema" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02f1b18b1c9a44ecd72be02e53d6e63bbccfdc8d1765206226af227327e2be6e" +dependencies = [ + "arrow-schema", + "geo-traits", + "serde", + "serde_json", + "thiserror 1.0.69", +] + +[[package]] +name = "geodatafusion" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83d676b8d8b5f391ab4270ba31e9b599ee2c3d780405a38e272a0a7565ea189c" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-schema", + "datafusion", + "geo", + "geo-traits", + "geoarrow-array", + "geoarrow-expr-geo", + "geoarrow-schema", + "geohash", + "thiserror 1.0.69", + "wkt", +] + +[[package]] +name = "geographiclib-rs" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f611040a2bb37eaa29a78a128d1e92a378a03e0b6e66ae27398d42b1ba9a7841" +dependencies = [ + "libm", +] + +[[package]] +name = "geohash" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fb94b1a65401d6cbf22958a9040aa364812c26674f841bee538b12c135db1e6" +dependencies = [ + "geo-types", + "libm", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -2999,6 +3146,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "hash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47d60b12902ba28e2730cd37e95b8c9223af2808df9e902d4df49588d1470606" +dependencies = [ + "byteorder", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -3031,6 +3187,16 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "heapless" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" +dependencies = [ + "hash32", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.5.0" @@ -3260,6 +3426,49 @@ dependencies = [ "serde", ] +[[package]] +name = "i_float" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "010025c2c532c8d82e42d0b8bb5184afa449fa6f06c709ea9adcb16c49ae405b" +dependencies = [ + "libm", +] + +[[package]] +name = "i_key_sort" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9190f86706ca38ac8add223b2aed8b1330002b5cdbbce28fb58b10914d38fc27" + +[[package]] +name = "i_overlay" +version = "4.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fcccbd4e4274e0f80697f5fbc6540fdac533cce02f2081b328e68629cce24f9" +dependencies = [ + "i_float", + "i_key_sort", + "i_shape", + "i_tree", + "rayon", +] + +[[package]] +name = "i_shape" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ea154b742f7d43dae2897fcd5ead86bc7b5eefcedd305a7ebf9f69d44d61082" +dependencies = [ + "i_float", +] + +[[package]] +name = "i_tree" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e6d558e6d4c7b82bc51d9c771e7a927862a161a7d87bf2b0541450e0e20915" + [[package]] name = "iana-time-zone" version = "0.1.64" @@ -3486,6 +3695,15 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -3676,6 +3894,7 @@ dependencies = [ "lance-datafusion", "lance-encoding", "lance-file", + "lance-geo", "lance-index", "lance-io", "lance-linalg", @@ -3787,6 +4006,7 @@ dependencies = [ "lance-arrow", "lance-core", "lance-datagen", + "lance-geo", "log", "pin-project", "prost", @@ -3881,6 +4101,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "lance-geo" +version = "1.0.0-beta.12" +dependencies = [ + "datafusion", + "geo-types", + "geoarrow-array", + "geoarrow-schema", + "geodatafusion", +] + [[package]] name = "lance-index" version = "1.0.0-beta.12" @@ -4305,7 +4536,7 @@ dependencies = [ "reqwest", "serde", "tar", - "thiserror", + "thiserror 2.0.17", "tokio", "yada", ] @@ -4781,6 +5012,28 @@ dependencies = [ "libc", ] +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "object" version = "0.32.2" @@ -4819,7 +5072,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "thiserror", + "thiserror 2.0.17", "tokio", "tracing", "url", @@ -5311,6 +5564,15 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -5564,7 +5826,7 @@ dependencies = [ "rustc-hash", "rustls 0.23.35", "socket2 0.6.1", - "thiserror", + "thiserror 2.0.17", "tokio", "tracing", "web-time", @@ -5585,7 +5847,7 @@ dependencies = [ "rustls 0.23.35", "rustls-pki-types", "slab", - "thiserror", + "thiserror 2.0.17", "tinyvec", "tracing", "web-time", @@ -5796,7 +6058,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ "getrandom 0.2.16", "libredox", - "thiserror", + "thiserror 2.0.17", ] [[package]] @@ -5962,6 +6224,12 @@ dependencies = [ "byteorder", ] +[[package]] +name = "robust" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e27ee8bb91ca0adcf0ecb116293afa12d393f9c2b9b9cd54d33e8078fe19839" + [[package]] name = "rsa" version = "0.9.9" @@ -5983,6 +6251,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rstar" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "421400d13ccfd26dfa5858199c30a5d76f9c54e0dba7575273025b43c5175dbb" +dependencies = [ + "heapless", + "num-traits", + "smallvec", +] + [[package]] name = "rust-ini" version = "0.21.3" @@ -6491,7 +6770,7 @@ checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" dependencies = [ "num-bigint", "num-traits", - "thiserror", + "thiserror 2.0.17", "time", ] @@ -6569,6 +6848,18 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "spade" +version = "2.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb313e1c8afee5b5647e00ee0fe6855e3d529eb863a0fdae1d60006c4d1e9990" +dependencies = [ + "hashbrown 0.15.5", + "num-traits", + "robust", + "smallvec", +] + [[package]] name = "spin" version = "0.9.8" @@ -6812,7 +7103,7 @@ dependencies = [ "tantivy-stacker", "tantivy-tokenizer-api", "tempfile", - "thiserror", + "thiserror 2.0.17", "time", "uuid", "winapi", @@ -6948,13 +7239,33 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + [[package]] name = "thiserror" version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.17", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", ] [[package]] @@ -7134,6 +7445,36 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml_datetime" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +dependencies = [ + "winnow", +] + [[package]] name = "tower" version = "0.5.2" @@ -7344,7 +7685,7 @@ dependencies = [ "serde", "serde_json", "syn 2.0.111", - "thiserror", + "thiserror 2.0.17", "unicode-ident", ] @@ -7978,12 +8319,46 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "wkb" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a120b336c7ad17749026d50427c23d838ecb50cd64aaea6254b5030152f890a9" +dependencies = [ + "byteorder", + "geo-traits", + "num_enum", + "thiserror 1.0.69", +] + +[[package]] +name = "wkt" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efb2b923ccc882312e559ffaa832a055ba9d1ac0cc8e86b3e25453247e4b81d7" +dependencies = [ + "geo-traits", + "geo-types", + "log", + "num-traits", + "thiserror 1.0.69", +] + [[package]] name = "writeable" version = "0.6.2" diff --git a/python/pyproject.toml b/python/pyproject.toml index bfbcd0c3a9e..788d747269f 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -65,6 +65,10 @@ tests = [ dev = ["ruff==0.4.1", "pyright"] benchmarks = ["pytest-benchmark"] torch = ["torch"] +geo = [ + "geoarrow-rust-core", + "geoarrow-rust-io", +] [tool.ruff] lint.select = ["F", "E", "W", "I", "G", "TCH", "PERF", "B019"] diff --git a/python/python/tests/test_geo.py b/python/python/tests/test_geo.py new file mode 100644 index 00000000000..5a2ee8f7582 --- /dev/null +++ b/python/python/tests/test_geo.py @@ -0,0 +1,106 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright The Lance Authors + +from pathlib import Path + +import lance +import numpy as np +import pyarrow as pa +import pytest + +pytest.importorskip("geoarrow.rust.core") +from geoarrow.rust.core import ( + linestring, + linestrings, + point, + points, + polygon, + polygons, +) + + +def test_geo_types(tmp_path: Path): + uri = str(tmp_path / "test_geo_types.lance") + # Points + points_2d = points([np.random.rand(3), np.random.rand(3)]) + + # LineStrings + line_offsets = np.array([0, 2, 6, 10], dtype=np.int32) + linestrings_2d = linestrings([np.random.rand(10), np.random.rand(10)], line_offsets) + + # Polygons + ring_offsets = np.array([0, 3, 7, 12], dtype=np.int32) + geom_offsets = np.array([0, 1, 2, 3], dtype=np.int32) + polygons_2d = polygons( + [np.random.rand(12), np.random.rand(12)], + ring_offsets=ring_offsets, + geom_offsets=geom_offsets, + ) + + schema = pa.schema( + [ + pa.field(point("xy")).with_name("geometry_points"), + pa.field(linestring("xy")).with_name("geometry_lines"), + pa.field(polygon("xy")).with_name("geometry_polygons_2d"), + ] + ) + table = pa.Table.from_arrays( + [points_2d, linestrings_2d, polygons_2d], schema=schema + ) + lance.write_dataset(table, uri) + ds = lance.dataset(uri) + assert ds.schema.field(0) == table.schema.field(0) + assert ds.schema.field(1) == table.schema.field(1) + assert ds.schema.field(2) == table.schema.field(2) + + read_table = ds.to_table() + assert read_table.schema.field(0) == table.schema.field(0) + assert read_table.schema.field(1) == table.schema.field(1) + assert read_table.schema.field(2) == table.schema.field(2) + + assert ( + read_table.schema.field(0).metadata[b"ARROW:extension:name"] + == b"geoarrow.point" + ) + assert ( + read_table.schema.field(1).metadata[b"ARROW:extension:name"] + == b"geoarrow.linestring" + ) + assert ( + read_table.schema.field(2).metadata[b"ARROW:extension:name"] + == b"geoarrow.polygon" + ) + + assert read_table.num_rows == 3 + + +def test_geo_sql(tmp_path: Path): + # Points + points_2d = points([np.array([1.0]), np.array([2.0])]) + + # LineStrings + line_offsets = np.array([0, 2], dtype=np.int32) + linestrings_2d = linestrings( + [np.array([3.0, 4.0]), np.array([5.0, 0.0])], line_offsets + ) + + schema = pa.schema( + [ + pa.field(point("xy")).with_name("point"), + pa.field(linestring("xy")).with_name("linestring"), + ] + ) + table = pa.Table.from_arrays([points_2d, linestrings_2d], schema=schema) + ds = lance.write_dataset(table, str(tmp_path / "test_geo_udf_distance.lance")) + + batches = ( + ds.sql("SELECT St_Distance(point, linestring) as dist FROM dataset") + .build() + .to_batch_records() + ) + assert len(batches) == 1 + result = batches[0].to_pydict() + assert result["dist"] + assert np.allclose( + np.array(result["dist"]), np.array([2.5495097567963922]), atol=1e-8 + ) diff --git a/rust/lance-datafusion/Cargo.toml b/rust/lance-datafusion/Cargo.toml index 80213621d88..47315ce4712 100644 --- a/rust/lance-datafusion/Cargo.toml +++ b/rust/lance-datafusion/Cargo.toml @@ -27,6 +27,7 @@ jsonb = {workspace = true} lance-arrow.workspace = true lance-core = {workspace = true, features = ["datafusion"]} lance-datagen.workspace = true +lance-geo = {workspace = true} chrono.workspace = true log.workspace = true pin-project.workspace = true diff --git a/rust/lance-datafusion/src/udf.rs b/rust/lance-datafusion/src/udf.rs index 24366077c66..9117b67f82e 100644 --- a/rust/lance-datafusion/src/udf.rs +++ b/rust/lance-datafusion/src/udf.rs @@ -26,6 +26,8 @@ pub fn register_functions(ctx: &SessionContext) { ctx.register_udf(json::json_get_bool_udf()); ctx.register_udf(json::json_array_contains_udf()); ctx.register_udf(json::json_array_length_udf()); + // GEO functions + lance_geo::register_functions(ctx); } /// This method checks whether a string contains all specified tokens. The tokens are separated by diff --git a/rust/lance-geo/Cargo.toml b/rust/lance-geo/Cargo.toml new file mode 100644 index 00000000000..898ddea6159 --- /dev/null +++ b/rust/lance-geo/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "lance-geo" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +keywords.workspace = true +categories.workspace = true +rust-version.workspace = true +description = "Lance's geospatial extension providing geospatial UDFs." + +[dependencies] +datafusion.workspace = true +geoarrow-array.workspace = true +geoarrow-schema.workspace = true +geodatafusion.workspace = true +geo-types.workspace = true + +[lints] +workspace = true diff --git a/rust/lance-geo/src/lib.rs b/rust/lance-geo/src/lib.rs new file mode 100644 index 00000000000..209f52b74cf --- /dev/null +++ b/rust/lance-geo/src/lib.rs @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +use datafusion::prelude::SessionContext; + +pub fn register_functions(ctx: &SessionContext) { + geodatafusion::register(ctx); +} diff --git a/rust/lance-geo/src/udf.rs b/rust/lance-geo/src/udf.rs new file mode 100644 index 00000000000..0a93c2a31b8 --- /dev/null +++ b/rust/lance-geo/src/udf.rs @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +use datafusion::prelude::SessionContext; + +/// Register UDF functions to datafusion context. +pub fn register_functions(ctx: &SessionContext) { + ctx.register_udf(geodatafusion::udf::geo::measurement::Area::new().into()); + ctx.register_udf(geodatafusion::udf::geo::measurement::Distance::new().into()); + ctx.register_udf(geodatafusion::udf::geo::measurement::Length::new().into()); + ctx.register_udf(geodatafusion::udf::geo::relationships::Contains::new().into()); + ctx.register_udf(geodatafusion::udf::geo::relationships::CoveredBy::new().into()); + ctx.register_udf(geodatafusion::udf::geo::relationships::Covers::new().into()); + ctx.register_udf(geodatafusion::udf::geo::relationships::Disjoint::new().into()); + ctx.register_udf(geodatafusion::udf::geo::relationships::Intersects::new().into()); + ctx.register_udf(geodatafusion::udf::geo::relationships::Overlaps::new().into()); + ctx.register_udf(geodatafusion::udf::geo::relationships::Touches::new().into()); + ctx.register_udf(geodatafusion::udf::geo::relationships::Within::new().into()); + ctx.register_udf(geodatafusion::udf::geo::validation::IsValid::new().into()); +} diff --git a/rust/lance/Cargo.toml b/rust/lance/Cargo.toml index 1d04c5ea797..570d467c220 100644 --- a/rust/lance/Cargo.toml +++ b/rust/lance/Cargo.toml @@ -26,6 +26,7 @@ lance-linalg = { workspace = true } lance-index = { workspace = true } lance-namespace = { workspace = true } lance-table = { workspace = true } +lance-geo = { workspace = true } arrow-arith = { workspace = true } arrow-array = { workspace = true } arrow-buffer = { workspace = true } @@ -108,6 +109,9 @@ tracking-allocator = { version = "0.4", features = ["tracing-compat"] } # For S3 / DynamoDB tests aws-config = { workspace = true } aws-sdk-s3 = { workspace = true } +geoarrow-array = { workspace = true } +geoarrow-schema = { workspace = true } +geo-types = { workspace = true } [features] diff --git a/rust/lance/src/dataset.rs b/rust/lance/src/dataset.rs index 992bbd48be6..4fecf55b25e 100644 --- a/rust/lance/src/dataset.rs +++ b/rust/lance/src/dataset.rs @@ -9973,4 +9973,146 @@ mod tests { .unwrap(); assert_eq!(1, batch.num_rows()); } + + #[tokio::test] + async fn test_geo_types() { + use geo_types::{coord, line_string, Rect}; + use geoarrow_array::{ + builder::{LineStringBuilder, PointBuilder, PolygonBuilder}, + GeoArrowArray, + }; + use geoarrow_schema::{Dimension, LineStringType, PointType, PolygonType}; + + // 1. Creates arrow table with spatial data. + let point_type = PointType::new(Dimension::XY, Default::default()); + let line_string_type = LineStringType::new(Dimension::XY, Default::default()); + let polygon_type = PolygonType::new(Dimension::XY, Default::default()); + + let schema = arrow_schema::Schema::new(vec![ + point_type.clone().to_field("point", true), + line_string_type.clone().to_field("linestring", true), + polygon_type.clone().to_field("polygon", true), + ]); + let schema = Arc::new(schema) as arrow_schema::SchemaRef; + + let mut point_builder = PointBuilder::new(point_type.clone()); + point_builder.push_point(Some(&geo_types::point!(x: -72.1235, y: 42.3521))); + let point_arr = point_builder.finish(); + + let mut line_string_builder = LineStringBuilder::new(line_string_type.clone()); + line_string_builder + .push_line_string(Some(&line_string![ + (x: -72.1260, y: 42.45), + (x: -72.123, y: 42.1546), + (x: -73.123, y: 43.1546), + ])) + .unwrap(); + let line_arr = line_string_builder.finish(); + + let mut polygon_builder = PolygonBuilder::new(polygon_type.clone()); + let rect = Rect::new( + coord! { x: -72.123, y: 42.146 }, + coord! { x: -72.126, y: 42.45 }, + ); + polygon_builder.push_rect(Some(&rect)).unwrap(); + let polygon_arr = polygon_builder.finish(); + + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + point_arr.to_array_ref(), + line_arr.to_array_ref(), + polygon_arr.to_array_ref(), + ], + ) + .unwrap(); + + // 2. Write to lance + let lance_path = TempStrDir::default(); + let reader = RecordBatchIterator::new(vec![batch].into_iter().map(Ok), schema.clone()); + let dataset = Dataset::write(reader, &lance_path, Some(Default::default())) + .await + .unwrap(); + + // 3. Verifies that the schema fields and extension metadata are preserved + assert_eq!(dataset.schema().fields.len(), 3); + let fields = &dataset.schema().fields; + assert_eq!( + fields.first().unwrap().metadata.get("ARROW:extension:name"), + Some(&"geoarrow.point".to_owned()) + ); + assert_eq!( + fields.get(1).unwrap().metadata.get("ARROW:extension:name"), + Some(&"geoarrow.linestring".to_owned()) + ); + assert_eq!( + fields.get(2).unwrap().metadata.get("ARROW:extension:name"), + Some(&"geoarrow.polygon".to_owned()) + ); + } + + #[tokio::test] + async fn test_geo_sql() { + use arrow_array::types::Float64Type; + use geo_types::line_string; + use geoarrow_array::{ + builder::{LineStringBuilder, PointBuilder}, + GeoArrowArray, + }; + use geoarrow_schema::{Dimension, LineStringType, PointType}; + + // 1. Creates arrow table with point and linestring spatial data + let point_type = PointType::new(Dimension::XY, Default::default()); + let line_string_type = LineStringType::new(Dimension::XY, Default::default()); + + let schema = arrow_schema::Schema::new(vec![ + point_type.clone().to_field("point", true), + line_string_type.clone().to_field("linestring", true), + ]); + let schema = Arc::new(schema) as arrow_schema::SchemaRef; + + let mut point_builder = PointBuilder::new(point_type.clone()); + point_builder.push_point(Some(&geo_types::point!(x: -72.1235, y: 42.3521))); + let point_arr = point_builder.finish(); + + let mut line_string_builder = LineStringBuilder::new(line_string_type.clone()); + line_string_builder + .push_line_string(Some(&line_string![ + (x: -72.1260, y: 42.45), + (x: -72.123, y: 42.1546), + (x: -73.123, y: 43.1546), + ])) + .unwrap(); + let line_arr = line_string_builder.finish(); + + let batch = RecordBatch::try_new( + schema.clone(), + vec![point_arr.to_array_ref(), line_arr.to_array_ref()], + ) + .unwrap(); + + // 2. Write to lance + let lance_path = TempStrDir::default(); + let reader = RecordBatchIterator::new(vec![batch].into_iter().map(Ok), schema.clone()); + let dataset = Dataset::write(reader, &lance_path, Some(Default::default())) + .await + .unwrap(); + + // 3. Executes a SQL query with St_Distance function + let batches = execute_sql( + "SELECT ST_Distance(point, linestring) AS dist FROM dataset", + "dataset".to_owned(), + Arc::new(dataset.clone()), + ) + .await + .unwrap(); + assert_eq!(batches.len(), 1); + let batch = batches.first().unwrap(); + assert_eq!(batch.num_columns(), 1); + assert_eq!(batch.num_rows(), 1); + approx::assert_relative_eq!( + batch.column(0).as_primitive::().value(0), + 0.0015056772638228177 + ); + } }