diff --git a/Cargo.lock b/Cargo.lock index 9bb7417d2..21da04d0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -97,6 +97,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "arrow" version = "55.0.0" @@ -174,6 +186,7 @@ dependencies = [ "atoi", "base64 0.22.1", "chrono", + "comfy-table", "half", "lexical-core 1.0.5", "num", @@ -313,6 +326,23 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "async-compression" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +dependencies = [ + "bzip2", + "flate2", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", +] + [[package]] name = "async-stream" version = "0.3.6" @@ -400,6 +430,19 @@ version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89e25b6adfb930f02d1981565a6e5d9c547ac15a96606256d3b59040e5cd4ca3" +[[package]] +name = "bigdecimal" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "bindgen" version = "0.66.1" @@ -461,6 +504,28 @@ dependencies = [ "serde", ] +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -515,6 +580,25 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "c_vec" version = "2.0.0" @@ -667,6 +751,16 @@ dependencies = [ "cc", ] +[[package]] +name = "comfy-table" +version = "7.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" +dependencies = [ + "unicode-segmentation", + "unicode-width", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -693,6 +787,12 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + [[package]] name = "core-foundation" version = "0.10.0" @@ -884,6 +984,590 @@ dependencies = [ "syn 2.0.100", ] +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "arrow-ipc", + "arrow-schema", + "async-trait", + "bytes", + "bzip2", + "chrono", + "datafusion-catalog", + "datafusion-catalog-listing", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-table", + "datafusion-functions-window", + "datafusion-macros", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "flate2", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "parquet", + "rand 0.8.5", + "regex", + "sqlparser", + "tempfile", + "tokio", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-catalog" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "log", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-common" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "ahash", + "arrow", + "arrow-ipc", + "base64 0.22.1", + "half", + "hashbrown 0.14.5", + "indexmap 2.9.0", + "libc", + "log", + "object_store", + "parquet", + "paste", + "recursive", + "sqlparser", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common-runtime" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "futures", + "log", + "tokio", +] + +[[package]] +name = "datafusion-datasource" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "flate2", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "parquet", + "rand 0.8.5", + "tempfile", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-datasource-csv" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "regex", + "tokio", +] + +[[package]] +name = "datafusion-datasource-json" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "serde_json", + "tokio", +] + +[[package]] +name = "datafusion-datasource-parquet" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "parquet", + "rand 0.8.5", + "tokio", +] + +[[package]] +name = "datafusion-doc" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" + +[[package]] +name = "datafusion-execution" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "log", + "object_store", + "parking_lot", + "rand 0.8.5", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap 2.9.0", + "paste", + "recursive", + "serde_json", + "sqlparser", +] + +[[package]] +name = "datafusion-expr-common" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "datafusion-common", + "indexmap 2.9.0", + "itertools 0.14.0", + "paste", +] + +[[package]] +name = "datafusion-functions" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "arrow-buffer", + "base64 0.22.1", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", + "hex", + "itertools 0.14.0", + "log", + "md-5", + "rand 0.8.5", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-functions-nested" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "arrow-ord", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-macros", + "datafusion-physical-expr-common", + "itertools 0.14.0", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-table" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", +] + +[[package]] +name = "datafusion-functions-window" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "datafusion-common", + "datafusion-doc", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-macros" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "datafusion-expr", + "quote", + "syn 2.0.100", +] + +[[package]] +name = "datafusion-optimizer" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "indexmap 2.9.0", + "itertools 0.14.0", + "log", + "recursive", + "regex", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap 2.9.0", + "itertools 0.14.0", + "log", + "paste", + "petgraph", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "itertools 0.14.0", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "itertools 0.14.0", + "log", + "recursive", +] + +[[package]] +name = "datafusion-physical-plan" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "ahash", + "arrow", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap 2.9.0", + "itertools 0.14.0", + "log", + "parking_lot", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "datafusion-session" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-sql" +version = "47.0.0" +source = "git+https://github.com/apache/datafusion?rev=2d801940c3cb0cec3209aa890688590ded791865#2d801940c3cb0cec3209aa890688590ded791865" +dependencies = [ + "arrow", + "bigdecimal", + "datafusion-common", + "datafusion-expr", + "indexmap 2.9.0", + "log", + "recursive", + "regex", + "sqlparser", +] + [[package]] name = "dbase" version = "0.5.0" @@ -1032,6 +1716,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "flatbuffers" version = "24.12.23" @@ -1475,6 +2165,30 @@ dependencies = [ "wkt 0.12.0 (git+https://github.com/georust/wkt?rev=270ffe0eaf5ba5255c364dbade39c451562a9e9b)", ] +[[package]] +name = "geodatafusion" +version = "0.1.0-dev" +dependencies = [ + "approx", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "async-stream", + "async-trait", + "datafusion", + "geo 0.30.0", + "geo-traits", + "geoarrow-array", + "geoarrow-schema", + "geohash", + "thiserror 1.0.69", + "tokio", +] + [[package]] name = "geographiclib-rs" version = "0.2.4" @@ -1484,6 +2198,16 @@ dependencies = [ "libm", ] +[[package]] +name = "geohash" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fb94b1a65401d6cbf22958a9040aa364812c26674f841bee538b12c135db1e6" +dependencies = [ + "geo-types", + "libm", +] + [[package]] name = "geojson" version = "0.24.2" @@ -2416,6 +3140,17 @@ dependencies = [ "twox-hash 1.6.3", ] +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "matrixmultiply" version = "0.3.9" @@ -2764,6 +3499,7 @@ dependencies = [ "object_store", "paste", "seq-macro", + "simdutf8", "snap", "thrift", "tokio", @@ -2807,6 +3543,16 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset", + "indexmap 2.9.0", +] + [[package]] name = "phf" version = "0.11.3" @@ -3021,6 +3767,15 @@ dependencies = [ "tar", ] +[[package]] +name = "psm" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" +dependencies = [ + "cc", +] + [[package]] name = "pyo3" version = "0.24.1" @@ -3296,6 +4051,26 @@ dependencies = [ "log", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.100", +] + [[package]] name = "redox_syscall" version = "0.5.11" @@ -3862,6 +4637,28 @@ dependencies = [ "unicode_categories", ] +[[package]] +name = "sqlparser" +version = "0.55.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" +dependencies = [ + "log", + "recursive", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "sqlx" version = "0.7.4" @@ -4069,6 +4866,19 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stacker" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -4494,6 +5304,12 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "unicode_categories" version = "0.1.1" @@ -4541,6 +5357,17 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "uuid" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" +dependencies = [ + "getrandom 0.3.2", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "vcpkg" version = "0.2.15" @@ -5113,6 +5940,15 @@ dependencies = [ "rustix 1.0.5", ] +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yoke" version = "0.7.5" diff --git a/Cargo.toml b/Cargo.toml index ef299d82e..0583760b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,9 +9,7 @@ members = [ "rust/geoarrow-test", "rust/geoarrow", "rust/pyo3-geoarrow", - # Comment out until datafusion 47 release so that the workspace can upgrade - # to arrow 55 - # "rust/geodatafusion", + "rust/geodatafusion", ] exclude = ["js"] resolver = "2" @@ -40,8 +38,8 @@ arrow-schema = "55" async-stream = "0.3" async-trait = "0.1" bytes = "1.10.0" -# 47 rc https://lists.apache.org/thread/zrq9x9gf51r8b6m9qokf2q75kh251rm6 -datafusion = { git = "https://github.com/apache/datafusion", rev = "e4433049b04ca2c1e2031eb05d1a0990210f11d6" } +# https://github.com/apache/datafusion/pull/15646 +datafusion = { git = "https://github.com/apache/datafusion", rev = "2d801940c3cb0cec3209aa890688590ded791865" } flatgeobuf = { version = "4.6", default-features = false } futures = "0.3" geo = "0.30.0" diff --git a/rust/geoarrow-array/src/array/geometry.rs b/rust/geoarrow-array/src/array/geometry.rs index ad99d03bb..4e3cbf869 100644 --- a/rust/geoarrow-array/src/array/geometry.rs +++ b/rust/geoarrow-array/src/array/geometry.rs @@ -375,8 +375,6 @@ impl GeometryArray { pub fn into_coord_type(self, coord_type: CoordType) -> Self { Self { data_type: self.data_type.with_coord_type(coord_type), - type_ids: self.type_ids, - offsets: self.offsets, points: self.points.map(|arr| arr.into_coord_type(coord_type)), line_strings: self.line_strings.map(|arr| arr.into_coord_type(coord_type)), polygons: self.polygons.map(|arr| arr.into_coord_type(coord_type)), @@ -386,6 +384,15 @@ impl GeometryArray { .map(|arr| arr.into_coord_type(coord_type)), mpolygons: self.mpolygons.map(|arr| arr.into_coord_type(coord_type)), gcs: self.gcs.map(|arr| arr.into_coord_type(coord_type)), + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self } } @@ -486,6 +493,10 @@ impl GeoArrowArray for GeometryArray { fn slice(&self, offset: usize, length: usize) -> Arc { Arc::new(self.slice(offset, length)) } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } } impl<'a> ArrayAccessor<'a> for GeometryArray { diff --git a/rust/geoarrow-array/src/array/geometrycollection.rs b/rust/geoarrow-array/src/array/geometrycollection.rs index 60005497a..fcf41ec82 100644 --- a/rust/geoarrow-array/src/array/geometrycollection.rs +++ b/rust/geoarrow-array/src/array/geometrycollection.rs @@ -100,8 +100,15 @@ impl GeometryCollectionArray { Self { data_type: self.data_type.with_coord_type(coord_type), array: self.array.into_coord_type(coord_type), - geom_offsets: self.geom_offsets, - nulls: self.nulls, + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self } } } @@ -149,6 +156,10 @@ impl GeoArrowArray for GeometryCollectionArray { fn slice(&self, offset: usize, length: usize) -> Arc { Arc::new(self.slice(offset, length)) } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } } impl<'a> ArrayAccessor<'a> for GeometryCollectionArray { diff --git a/rust/geoarrow-array/src/array/linestring.rs b/rust/geoarrow-array/src/array/linestring.rs index 41005b574..5dbdb3682 100644 --- a/rust/geoarrow-array/src/array/linestring.rs +++ b/rust/geoarrow-array/src/array/linestring.rs @@ -154,8 +154,15 @@ impl LineStringArray { Self { data_type: self.data_type.with_coord_type(coord_type), coords: self.coords.into_coord_type(coord_type), - geom_offsets: self.geom_offsets, - nulls: self.nulls, + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self } } } @@ -202,6 +209,10 @@ impl GeoArrowArray for LineStringArray { fn slice(&self, offset: usize, length: usize) -> Arc { Arc::new(self.slice(offset, length)) } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } } impl<'a> ArrayAccessor<'a> for LineStringArray { diff --git a/rust/geoarrow-array/src/array/mixed.rs b/rust/geoarrow-array/src/array/mixed.rs index 3b9a02af9..add16bdbc 100644 --- a/rust/geoarrow-array/src/array/mixed.rs +++ b/rust/geoarrow-array/src/array/mixed.rs @@ -6,7 +6,7 @@ use arrow_array::{Array, ArrayRef, UnionArray}; use arrow_buffer::ScalarBuffer; use arrow_schema::{DataType, UnionMode}; use geoarrow_schema::{ - CoordType, Dimension, GeometryCollectionType, LineStringType, Metadata, MultiLineStringType, + CoordType, Dimension, GeometryCollectionType, LineStringType, MultiLineStringType, MultiPointType, MultiPolygonType, PointType, PolygonType, }; @@ -65,8 +65,6 @@ pub struct MixedGeometryArray { pub(crate) coord_type: CoordType, pub(crate) dim: Dimension, - pub(crate) metadata: Arc, - /// Invariant: every item in `type_ids` is `> 0 && < fields.len()` if `type_ids` are not provided. If `type_ids` exist in the NativeType, then every item in `type_ids` is `> 0 && ` pub(crate) type_ids: ScalarBuffer, @@ -107,7 +105,6 @@ impl MixedGeometryArray { multi_points: Option, multi_line_strings: Option, multi_polygons: Option, - metadata: Arc, ) -> Self { let mut coord_types = HashSet::new(); if let Some(points) = &points { @@ -191,7 +188,6 @@ impl MixedGeometryArray { )) .finish(), ), - metadata, slice_offset: 0, } } @@ -424,7 +420,6 @@ impl MixedGeometryArray { multi_points: self.multi_points.clone(), multi_line_strings: self.multi_line_strings.clone(), multi_polygons: self.multi_polygons.clone(), - metadata: self.metadata.clone(), slice_offset: self.slice_offset + offset, } } @@ -432,17 +427,13 @@ impl MixedGeometryArray { pub fn into_coord_type(self, coord_type: CoordType) -> Self { Self { coord_type, - dim: self.dim, - metadata: self.metadata, - type_ids: self.type_ids, - offsets: self.offsets, points: self.points.into_coord_type(coord_type), line_strings: self.line_strings.into_coord_type(coord_type), polygons: self.polygons.into_coord_type(coord_type), multi_points: self.multi_points.into_coord_type(coord_type), multi_line_strings: self.multi_line_strings.into_coord_type(coord_type), multi_polygons: self.multi_polygons.into_coord_type(coord_type), - slice_offset: self.slice_offset, + ..self } } @@ -660,9 +651,6 @@ impl TryFrom<(&UnionArray, Dimension, CoordType)> for MixedGeometryArray { multi_points, multi_line_strings, multi_polygons, - // Mixed array is only used inside of GeometryCollectionArray, and this array does not - // hold its own metadata - Default::default(), )) } } @@ -684,7 +672,6 @@ impl TryFrom<(&dyn Array, Dimension, CoordType)> for MixedGeometryArray { impl PartialEq for MixedGeometryArray { fn eq(&self, other: &Self) -> bool { self.dim == other.dim - && self.metadata == other.metadata && self.type_ids == other.type_ids && self.offsets == other.offsets && self.points == other.points diff --git a/rust/geoarrow-array/src/array/multilinestring.rs b/rust/geoarrow-array/src/array/multilinestring.rs index 2a2e9b12a..8d01f29bf 100644 --- a/rust/geoarrow-array/src/array/multilinestring.rs +++ b/rust/geoarrow-array/src/array/multilinestring.rs @@ -181,9 +181,15 @@ impl MultiLineStringArray { Self { data_type: self.data_type.with_coord_type(coord_type), coords: self.coords.into_coord_type(coord_type), - geom_offsets: self.geom_offsets, - ring_offsets: self.ring_offsets, - nulls: self.nulls, + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self } } } @@ -231,6 +237,10 @@ impl GeoArrowArray for MultiLineStringArray { fn slice(&self, offset: usize, length: usize) -> Arc { Arc::new(self.slice(offset, length)) } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } } impl<'a> ArrayAccessor<'a> for MultiLineStringArray { diff --git a/rust/geoarrow-array/src/array/multipoint.rs b/rust/geoarrow-array/src/array/multipoint.rs index 25aa4ef16..e0d537c9a 100644 --- a/rust/geoarrow-array/src/array/multipoint.rs +++ b/rust/geoarrow-array/src/array/multipoint.rs @@ -158,8 +158,15 @@ impl MultiPointArray { Self { data_type: self.data_type.with_coord_type(coord_type), coords: self.coords.into_coord_type(coord_type), - geom_offsets: self.geom_offsets, - nulls: self.nulls, + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self } } } @@ -207,6 +214,10 @@ impl GeoArrowArray for MultiPointArray { fn slice(&self, offset: usize, length: usize) -> Arc { Arc::new(self.slice(offset, length)) } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } } impl<'a> ArrayAccessor<'a> for MultiPointArray { diff --git a/rust/geoarrow-array/src/array/multipolygon.rs b/rust/geoarrow-array/src/array/multipolygon.rs index 774025ec1..ba600e7c9 100644 --- a/rust/geoarrow-array/src/array/multipolygon.rs +++ b/rust/geoarrow-array/src/array/multipolygon.rs @@ -234,10 +234,15 @@ impl MultiPolygonArray { Self { data_type: self.data_type.with_coord_type(coord_type), coords: self.coords.into_coord_type(coord_type), - geom_offsets: self.geom_offsets, - polygon_offsets: self.polygon_offsets, - ring_offsets: self.ring_offsets, - nulls: self.nulls, + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self } } } @@ -285,6 +290,10 @@ impl GeoArrowArray for MultiPolygonArray { fn slice(&self, offset: usize, length: usize) -> Arc { Arc::new(self.slice(offset, length)) } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } } impl<'a> ArrayAccessor<'a> for MultiPolygonArray { diff --git a/rust/geoarrow-array/src/array/point.rs b/rust/geoarrow-array/src/array/point.rs index 1431297a5..90b72a854 100644 --- a/rust/geoarrow-array/src/array/point.rs +++ b/rust/geoarrow-array/src/array/point.rs @@ -119,7 +119,15 @@ impl PointArray { Self { data_type: self.data_type.with_coord_type(coord_type), coords: self.coords.into_coord_type(coord_type), - nulls: self.nulls, + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self } } } @@ -167,6 +175,10 @@ impl GeoArrowArray for PointArray { fn slice(&self, offset: usize, length: usize) -> Arc { Arc::new(self.slice(offset, length)) } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } } impl<'a> ArrayAccessor<'a> for PointArray { diff --git a/rust/geoarrow-array/src/array/polygon.rs b/rust/geoarrow-array/src/array/polygon.rs index 360b99f17..4a94a7041 100644 --- a/rust/geoarrow-array/src/array/polygon.rs +++ b/rust/geoarrow-array/src/array/polygon.rs @@ -185,9 +185,15 @@ impl PolygonArray { Self { data_type: self.data_type.with_coord_type(coord_type), coords: self.coords.into_coord_type(coord_type), - geom_offsets: self.geom_offsets, - ring_offsets: self.ring_offsets, - nulls: self.nulls, + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self } } } @@ -235,6 +241,10 @@ impl GeoArrowArray for PolygonArray { fn slice(&self, offset: usize, length: usize) -> Arc { Arc::new(self.slice(offset, length)) } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } } impl<'a> ArrayAccessor<'a> for PolygonArray { diff --git a/rust/geoarrow-array/src/array/rect.rs b/rust/geoarrow-array/src/array/rect.rs index eb1d5e0f0..1070e1d8e 100644 --- a/rust/geoarrow-array/src/array/rect.rs +++ b/rust/geoarrow-array/src/array/rect.rs @@ -86,6 +86,14 @@ impl RectArray { nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)), } } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self + } + } } impl GeoArrowArray for RectArray { @@ -131,6 +139,10 @@ impl GeoArrowArray for RectArray { fn slice(&self, offset: usize, length: usize) -> Arc { Arc::new(self.slice(offset, length)) } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } } impl<'a> ArrayAccessor<'a> for RectArray { diff --git a/rust/geoarrow-array/src/array/wkb.rs b/rust/geoarrow-array/src/array/wkb.rs index ca6b66d2d..416057216 100644 --- a/rust/geoarrow-array/src/array/wkb.rs +++ b/rust/geoarrow-array/src/array/wkb.rs @@ -131,6 +131,10 @@ impl GeoArrowArray for WkbArray { fn slice(&self, offset: usize, length: usize) -> Arc { Arc::new(self.slice(offset, length)) } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(Self::with_metadata(&self, metadata)) + } } impl<'a, O: OffsetSizeTrait> ArrayAccessor<'a> for WkbArray { diff --git a/rust/geoarrow-array/src/array/wkt.rs b/rust/geoarrow-array/src/array/wkt.rs index ab37909ae..2efff2aa5 100644 --- a/rust/geoarrow-array/src/array/wkt.rs +++ b/rust/geoarrow-array/src/array/wkt.rs @@ -122,6 +122,10 @@ impl GeoArrowArray for WktArray { fn slice(&self, offset: usize, length: usize) -> Arc { Arc::new(self.slice(offset, length)) } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(Self::with_metadata(&self, metadata)) + } } impl<'a, O: OffsetSizeTrait> ArrayAccessor<'a> for WktArray { diff --git a/rust/geoarrow-array/src/builder/geometrycollection.rs b/rust/geoarrow-array/src/builder/geometrycollection.rs index 0afd4e569..2db22fd25 100644 --- a/rust/geoarrow-array/src/builder/geometrycollection.rs +++ b/rust/geoarrow-array/src/builder/geometrycollection.rs @@ -45,7 +45,6 @@ impl<'a> GeometryCollectionBuilder { typ.dimension(), capacity.mixed_capacity, typ.coord_type(), - typ.metadata().clone(), ), geom_offsets: OffsetsBuilder::with_capacity(capacity.geom_capacity), validity: NullBufferBuilder::new(capacity.geom_capacity), diff --git a/rust/geoarrow-array/src/builder/mixed.rs b/rust/geoarrow-array/src/builder/mixed.rs index 271522117..8f1cdc9b1 100644 --- a/rust/geoarrow-array/src/builder/mixed.rs +++ b/rust/geoarrow-array/src/builder/mixed.rs @@ -1,9 +1,7 @@ -use std::sync::Arc; - use geo_traits::*; use geoarrow_schema::{ - CoordType, Dimension, LineStringType, Metadata, MultiLineStringType, MultiPointType, - MultiPolygonType, PointType, PolygonType, + CoordType, Dimension, LineStringType, MultiLineStringType, MultiPointType, MultiPolygonType, + PointType, PolygonType, }; use crate::array::MixedGeometryArray; @@ -32,8 +30,6 @@ pub(crate) const DEFAULT_PREFER_MULTI: bool = false; /// - All arrays must have the same coordinate layout (interleaved or separated) #[derive(Debug)] pub(crate) struct MixedGeometryBuilder { - metadata: Arc, - /// The dimension of this builder. /// /// All underlying arrays must contain a coordinate buffer of this same dimension. @@ -67,11 +63,9 @@ impl MixedGeometryBuilder { dim: Dimension, capacity: MixedCapacity, coord_type: CoordType, - metadata: Arc, ) -> Self { // Don't store array metadata on child arrays Self { - metadata, dim, types: vec![], points: PointBuilder::with_capacity( @@ -145,7 +139,6 @@ impl MixedGeometryBuilder { Some(self.multi_points.finish()), Some(self.multi_line_strings.finish()), Some(self.multi_polygons.finish()), - self.metadata, ) } diff --git a/rust/geoarrow-array/src/trait_.rs b/rust/geoarrow-array/src/trait_.rs index 396349d60..9decfde4a 100644 --- a/rust/geoarrow-array/src/trait_.rs +++ b/rust/geoarrow-array/src/trait_.rs @@ -6,6 +6,7 @@ use arrow_array::{Array, ArrayRef}; use arrow_buffer::NullBuffer; use arrow_schema::extension::ExtensionType; use geo_traits::GeometryTrait; +use geoarrow_schema::Metadata; use crate::datatypes::GeoArrowType; use crate::error::Result; @@ -214,6 +215,9 @@ pub trait GeoArrowArray: Debug + Send + Sync { /// This function panics iff `offset + length > self.len()`. #[must_use] fn slice(&self, offset: usize, length: usize) -> Arc; + + /// Change the [`Metadata`] of this array. + fn with_metadata(self, metadata: Arc) -> Arc; } /// A trait for accessing the values of a [`GeoArrowArray`]. diff --git a/rust/geodatafusion/Cargo.toml b/rust/geodatafusion/Cargo.toml index b93665a31..e6c131e09 100644 --- a/rust/geodatafusion/Cargo.toml +++ b/rust/geodatafusion/Cargo.toml @@ -30,4 +30,5 @@ thiserror = { workspace = true } [dev-dependencies] approx = { workspace = true } +geoarrow-array = { workspace = true, features = ["test-data"] } tokio = { workspace = true, features = ["macros", "fs", "rt-multi-thread"] } diff --git a/rust/geodatafusion/src/data_types.rs b/rust/geodatafusion/src/data_types.rs index 4d62c839d..ac267a069 100644 --- a/rust/geodatafusion/src/data_types.rs +++ b/rust/geodatafusion/src/data_types.rs @@ -5,7 +5,10 @@ use datafusion::error::DataFusionError; use datafusion::logical_expr::{Signature, Volatility}; use geoarrow_array::array::{GeometryArray, PointArray, RectArray}; use geoarrow_array::{GeoArrowArray, GeoArrowType}; -use geoarrow_schema::{BoxType, CoordType, Dimension, GeometryType, PointType}; +use geoarrow_schema::{ + BoxType, CoordType, Dimension, GeometryCollectionType, GeometryType, LineStringType, + MultiLineStringType, MultiPointType, MultiPolygonType, PointType, PolygonType, +}; use crate::error::GeoDataFusionResult; @@ -43,17 +46,42 @@ pub fn GEOMETRY_TYPE() -> GeoArrowType { } pub(crate) fn any_single_geometry_type_input() -> Signature { - Signature::uniform( - 1, - vec![ - POINT2D_TYPE().into(), - POINT3D_TYPE().into(), - BOX2D_TYPE().into(), - BOX3D_TYPE().into(), - GEOMETRY_TYPE().into(), - ], - Volatility::Immutable, - ) + let mut valid_types = vec![]; + + for coord_type in [CoordType::Separated, CoordType::Interleaved] { + for dim in [ + Dimension::XY, + Dimension::XYZ, + Dimension::XYM, + Dimension::XYZM, + ] { + valid_types.push(PointType::new(coord_type, dim, Default::default()).data_type()); + valid_types.push(LineStringType::new(coord_type, dim, Default::default()).data_type()); + valid_types.push(PolygonType::new(coord_type, dim, Default::default()).data_type()); + valid_types.push(MultiPointType::new(coord_type, dim, Default::default()).data_type()); + valid_types + .push(MultiLineStringType::new(coord_type, dim, Default::default()).data_type()); + valid_types + .push(MultiPolygonType::new(coord_type, dim, Default::default()).data_type()); + valid_types + .push(GeometryCollectionType::new(coord_type, dim, Default::default()).data_type()); + } + } + + for coord_type in [CoordType::Separated, CoordType::Interleaved] { + valid_types.push(GeometryType::new(coord_type, Default::default()).data_type()); + } + + for dim in [ + Dimension::XY, + Dimension::XYZ, + Dimension::XYM, + Dimension::XYZM, + ] { + valid_types.push(BoxType::new(dim, Default::default()).data_type()); + } + + Signature::uniform(1, valid_types, Volatility::Immutable) } /// This will not cast a PointArray to a GeometryArray diff --git a/rust/geodatafusion/src/lib.rs b/rust/geodatafusion/src/lib.rs index 4aae57762..6e784a601 100644 --- a/rust/geodatafusion/src/lib.rs +++ b/rust/geodatafusion/src/lib.rs @@ -3,4 +3,4 @@ pub(crate) mod data_types; pub(crate) mod error; -// pub mod udf; +pub mod udf; diff --git a/rust/geodatafusion/src/udf/native/io/mod.rs b/rust/geodatafusion/src/udf/native/io/mod.rs index ea85c0473..49f7c6b7a 100644 --- a/rust/geodatafusion/src/udf/native/io/mod.rs +++ b/rust/geodatafusion/src/udf/native/io/mod.rs @@ -1,18 +1,18 @@ //! Geometry Input and Output -mod geohash; +// mod geohash; mod wkb; -mod wkt; +// mod wkt; -use datafusion::prelude::SessionContext; +// use datafusion::prelude::SessionContext; -/// Register all provided functions for geometry input and output -pub fn register_udfs(ctx: &SessionContext) { - ctx.register_udf(geohash::Box2DFromGeoHash::new().into()); - ctx.register_udf(geohash::GeoHash::new().into()); - ctx.register_udf(geohash::PointFromGeoHash::new().into()); - ctx.register_udf(wkb::AsBinary::new().into()); - ctx.register_udf(wkb::GeomFromWKB::new().into()); - ctx.register_udf(wkt::AsText::new().into()); - ctx.register_udf(wkt::GeomFromText::new().into()); -} +// /// Register all provided functions for geometry input and output +// pub fn register_udfs(ctx: &SessionContext) { +// ctx.register_udf(geohash::Box2DFromGeoHash::new().into()); +// ctx.register_udf(geohash::GeoHash::new().into()); +// ctx.register_udf(geohash::PointFromGeoHash::new().into()); +// ctx.register_udf(wkb::AsBinary::new().into()); +// ctx.register_udf(wkb::GeomFromWKB::new().into()); +// ctx.register_udf(wkt::AsText::new().into()); +// ctx.register_udf(wkt::GeomFromText::new().into()); +// } diff --git a/rust/geodatafusion/src/udf/native/io/wkb.rs b/rust/geodatafusion/src/udf/native/io/wkb.rs index 31fbefa18..c6ef51603 100644 --- a/rust/geodatafusion/src/udf/native/io/wkb.rs +++ b/rust/geodatafusion/src/udf/native/io/wkb.rs @@ -1,20 +1,19 @@ use std::any::Any; use std::sync::OnceLock; -use arrow_array::cast::AsArray; -use arrow_schema::DataType; +use arrow_schema::{DataType, Field}; +use datafusion::error::{DataFusionError, Result}; use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; use datafusion::logical_expr::{ - ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, + ColumnarValue, Documentation, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, }; -use geoarrow::ArrayBase; -use geoarrow::io::wkb::{from_wkb, to_wkb}; -use geoarrow_array::GeoArrowType; -use geoarrow_array::array::WkbArray; -use geoarrow_schema::{CoordType, GeometryType}; +use geoarrow_array::array::from_arrow_array; +use geoarrow_array::cast::to_wkb; +use geoarrow_array::{GeoArrowArray, GeoArrowType}; +use geoarrow_schema::WkbType; -use crate::data_types::{GEOMETRY_TYPE, any_single_geometry_type_input, parse_to_native_array}; -use crate::error::GeoDataFusionResult; +use crate::data_types::any_single_geometry_type_input; +use crate::error::GeoDataFusionError; #[derive(Debug)] pub(super) struct AsBinary { @@ -45,12 +44,26 @@ impl ScalarUDFImpl for AsBinary { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { - Ok(DataType::Binary) + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Err(DataFusionError::Internal("return_type".to_string())) } - fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { - Ok(as_binary_impl(args)?) + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + let field = &args.arg_fields[0]; + let data_type = GeoArrowType::try_from(field).map_err(GeoDataFusionError::GeoArrow)?; + let wkb_type = WkbType::new(data_type.metadata().clone()); + Ok(wkb_type.to_field(field.name(), field.is_nullable(), false)) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let array = ColumnarValue::values_to_arrays(&args.args)? + .into_iter() + .next() + .unwrap(); + let field = args.arg_fields[0]; + let geo_array = from_arrow_array(&array, field).map_err(GeoDataFusionError::GeoArrow)?; + let wkb_arr = to_wkb::(geo_array.as_ref()).map_err(GeoDataFusionError::GeoArrow)?; + Ok(ColumnarValue::Array(wkb_arr.into_array_ref())) } fn documentation(&self) -> Option<&Documentation> { @@ -66,71 +79,108 @@ impl ScalarUDFImpl for AsBinary { } } -fn as_binary_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { - let array = ColumnarValue::values_to_arrays(args)? - .into_iter() - .next() - .unwrap(); - let native_array = parse_to_native_array(array)?; - let wkb_arr = to_wkb::(native_array.as_ref()); - Ok(wkb_arr.into_array_ref().into()) -} - -#[derive(Debug)] -pub(super) struct GeomFromWKB { - signature: Signature, -} - -impl GeomFromWKB { - pub fn new() -> Self { - Self { - signature: Signature::exact(vec![DataType::Binary], Volatility::Immutable), - } - } -} - -static GEOM_FROM_WKB_DOC: OnceLock = OnceLock::new(); - -impl ScalarUDFImpl for GeomFromWKB { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "st_geomfromwkb" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { - Ok(GEOMETRY_TYPE().into()) +// #[derive(Debug)] +// pub(super) struct GeomFromWKB { +// signature: Signature, +// } + +// impl GeomFromWKB { +// pub fn new() -> Self { +// Self { +// signature: Signature::exact(vec![DataType::Binary], Volatility::Immutable), +// } +// } +// } + +// static GEOM_FROM_WKB_DOC: OnceLock = OnceLock::new(); + +// impl ScalarUDFImpl for GeomFromWKB { +// fn as_any(&self) -> &dyn Any { +// self +// } + +// fn name(&self) -> &str { +// "st_geomfromwkb" +// } + +// fn signature(&self) -> &Signature { +// &self.signature +// } + +// fn return_type(&self, _arg_types: &[DataType]) -> Result { +// Ok(GEOMETRY_TYPE().into()) +// } + +// fn invoke(&self, args: &[ColumnarValue]) -> Result { +// Ok(geom_from_wkb_impl(args)?) +// } + +// fn documentation(&self) -> Option<&Documentation> { +// Some(GEOM_FROM_WKB_DOC.get_or_init(|| { +// Documentation::builder(DOC_SECTION_OTHER, "Takes a well-known binary representation of a geometry and a Spatial Reference System ID (SRID) and creates an instance of the appropriate geometry type", "ST_GeomFromWKB(buffer)") +// .with_argument("geom", "WKB buffers") +// .build() +// })) +// } +// } + +// fn geom_from_wkb_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { +// let array = ColumnarValue::values_to_arrays(args)? +// .into_iter() +// .next() +// .unwrap(); +// let wkb_arr = WkbArray::new(array.as_binary::().clone(), Default::default()); +// let native_arr = from_wkb( +// &wkb_arr, +// GeoArrowType::Geometry(GeometryType::new(CoordType::Separated, Default::default())), +// false, +// )?; +// Ok(native_arr.to_array_ref().into()) +// } + +#[cfg(test)] +mod test { + use std::sync::Arc; + + use arrow_array::RecordBatch; + use arrow_schema::Schema; + use datafusion::prelude::SessionContext; + use geoarrow_array::test::point; + use geoarrow_schema::{CoordType, Crs, Dimension, Metadata}; + + use super::*; + + #[tokio::test] + async fn test_as_binary() { + let ctx = SessionContext::new(); + + let crs = Crs::from_srid("4326".to_string()); + let metadata = Arc::new(Metadata::new(crs.clone(), Default::default())); + + let geo_arr = point::array(CoordType::Separated, Dimension::XY).with_metadata(metadata); + + let arr = geo_arr.to_array_ref(); + let field = geo_arr.data_type().to_field("geometry", true); + let schema = Schema::new([Arc::new(field)]); + let batch = RecordBatch::try_new(Arc::new(schema), vec![arr]).unwrap(); + + ctx.register_batch("t", batch).unwrap(); + + ctx.register_udf(AsBinary::new().into()); + + let sql_df = ctx + .sql("SELECT ST_AsBinary(geometry) FROM t;") + .await + .unwrap(); + + let output_batches = sql_df.collect().await.unwrap(); + assert_eq!(output_batches.len(), 1); + let output_batch = &output_batches[0]; + + let output_schema = output_batch.schema(); + let output_field = output_schema.field(0); + let output_wkb_type = output_field.try_extension_type::().unwrap(); + + assert_eq!(&crs, output_wkb_type.metadata().crs()); } - - fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { - Ok(geom_from_wkb_impl(args)?) - } - - fn documentation(&self) -> Option<&Documentation> { - Some(GEOM_FROM_WKB_DOC.get_or_init(|| { - Documentation::builder(DOC_SECTION_OTHER, "Takes a well-known binary representation of a geometry and a Spatial Reference System ID (SRID) and creates an instance of the appropriate geometry type", "ST_GeomFromWKB(buffer)") - .with_argument("geom", "WKB buffers") - .build() - })) - } -} - -fn geom_from_wkb_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { - let array = ColumnarValue::values_to_arrays(args)? - .into_iter() - .next() - .unwrap(); - let wkb_arr = WkbArray::new(array.as_binary::().clone(), Default::default()); - let native_arr = from_wkb( - &wkb_arr, - GeoArrowType::Geometry(GeometryType::new(CoordType::Separated, Default::default())), - false, - )?; - Ok(native_arr.to_array_ref().into()) } diff --git a/rust/geodatafusion/src/udf/native/mod.rs b/rust/geodatafusion/src/udf/native/mod.rs index 604a1687b..8cef73beb 100644 --- a/rust/geodatafusion/src/udf/native/mod.rs +++ b/rust/geodatafusion/src/udf/native/mod.rs @@ -1,20 +1,20 @@ //! User-defined functions that wrap native Rust implementations. -mod accessors; -mod bounding_box; -mod constructors; +// mod accessors; +// mod bounding_box; +// mod constructors; mod io; -mod measurement; -mod processing; +// mod measurement; +// mod processing; -use datafusion::prelude::SessionContext; +// use datafusion::prelude::SessionContext; -/// Register all provided native-Rust functions -pub fn register_native(ctx: &SessionContext) { - accessors::register_udfs(ctx); - bounding_box::register_udfs(ctx); - constructors::register_udfs(ctx); - io::register_udfs(ctx); - measurement::register_udfs(ctx); - processing::register_udfs(ctx); -} +// /// Register all provided native-Rust functions +// pub fn register_native(ctx: &SessionContext) { +// accessors::register_udfs(ctx); +// bounding_box::register_udfs(ctx); +// constructors::register_udfs(ctx); +// io::register_udfs(ctx); +// measurement::register_udfs(ctx); +// processing::register_udfs(ctx); +// }