diff --git a/dask_planner/Cargo.lock b/dask_planner/Cargo.lock index 52e3ed467..72482d972 100644 --- a/dask_planner/Cargo.lock +++ b/dask_planner/Cargo.lock @@ -58,11 +58,12 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" -version = "22.0.0" +version = "23.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5936b4185aa57cb9790d8742aab22859045ce5cc6a3023796240cd101c19335" +checksum = "fedc767fbaa36ea50f086215f54f1a007d22046fc4754b0448c657bcbe9f8413" dependencies = [ "ahash 0.8.0", + "arrow-buffer", "bitflags", "chrono", "comfy-table", @@ -81,6 +82,16 @@ dependencies = [ "serde_json", ] +[[package]] +name = "arrow-buffer" +version = "23.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d290050c6e12a81a24ad08525cef2203c4156a6350f75508d49885d677e88ea9" +dependencies = [ + "half", + "num", +] + [[package]] name = "async-trait" version = "0.1.57" @@ -304,20 +315,17 @@ dependencies = [ [[package]] name = "datafusion-common" version = "12.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7721fd550f6a28ad7235b62462aa51e9a43b08f8346d5cbe4d61f1e83f5df511" +source = "git+https://github.com/apache/arrow-datafusion/?rev=1261741af2a5e142fa0c7916e759859cc18ea59a#1261741af2a5e142fa0c7916e759859cc18ea59a" dependencies = [ "arrow", "ordered-float", - "serde_json", "sqlparser", ] [[package]] name = "datafusion-expr" version = "12.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d81255d043dc594c0ded6240e8a9be6ce8d7c22777a5093357cdb97af3d29ce" +source = "git+https://github.com/apache/arrow-datafusion/?rev=1261741af2a5e142fa0c7916e759859cc18ea59a#1261741af2a5e142fa0c7916e759859cc18ea59a" dependencies = [ "ahash 0.8.0", "arrow", @@ -328,8 +336,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "12.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71b39f8c75163691fff72b4a71816ad5a912e7c6963ee55f29ed1910b5a6993f" +source = "git+https://github.com/apache/arrow-datafusion/?rev=1261741af2a5e142fa0c7916e759859cc18ea59a#1261741af2a5e142fa0c7916e759859cc18ea59a" dependencies = [ "arrow", "async-trait", @@ -344,8 +351,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "12.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109c4138220a109feafb63bf05418b86b17a42ece4bf047c38e4fd417572a9f7" +source = "git+https://github.com/apache/arrow-datafusion/?rev=1261741af2a5e142fa0c7916e759859cc18ea59a#1261741af2a5e142fa0c7916e759859cc18ea59a" dependencies = [ "ahash 0.8.0", "arrow", @@ -369,8 +375,7 @@ dependencies = [ [[package]] name = "datafusion-row" version = "12.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87a178fc0fd7693d9c9f608f7b605823eb982c6731ede0cccd99e2319cacabbc" +source = "git+https://github.com/apache/arrow-datafusion/?rev=1261741af2a5e142fa0c7916e759859cc18ea59a#1261741af2a5e142fa0c7916e759859cc18ea59a" dependencies = [ "arrow", "datafusion-common", @@ -381,8 +386,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "12.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "148cb56e7635faff3b16019393c49b988188c3fdadd1ca90eadb322a80aa1128" +source = "git+https://github.com/apache/arrow-datafusion/?rev=1261741af2a5e142fa0c7916e759859cc18ea59a#1261741af2a5e142fa0c7916e759859cc18ea59a" dependencies = [ "ahash 0.8.0", "arrow", @@ -406,9 +410,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" +checksum = "c90bf5f19754d10198ccb95b70664fc925bd1fc090a0fd9a6ebc54acc8cd6272" dependencies = [ "atty", "humantime", @@ -467,6 +471,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad6a9459c9c30b177b925162351f97e7d967c7ea8bab3b8352805327daf45554" dependencies = [ "crunchy", + "num-traits", ] [[package]] @@ -625,6 +630,12 @@ version = "0.2.132" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" +[[package]] +name = "libm" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "292a948cd991e376cf75541fe5b97a1081d713c618b4f1b9500f8844e49eb565" + [[package]] name = "libmimalloc-sys" version = "0.1.25" @@ -780,6 +791,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" dependencies = [ "autocfg", + "libm", ] [[package]] diff --git a/dask_planner/Cargo.toml b/dask_planner/Cargo.toml index 06cf5addf..51826ab96 100644 --- a/dask_planner/Cargo.toml +++ b/dask_planner/Cargo.toml @@ -9,12 +9,12 @@ edition = "2021" rust-version = "1.62" [dependencies] -arrow = { version = "22.0.0", features = ["prettyprint"] } +arrow = { version = "23.0.0", features = ["prettyprint"] } async-trait = "0.1.41" -datafusion-common = "12.0.0" -datafusion-expr = "12.0.0" -datafusion-optimizer = "12.0.0" -datafusion-sql = "12.0.0" +datafusion-common = { git = "https://github.com/apache/arrow-datafusion/", rev = "1261741af2a5e142fa0c7916e759859cc18ea59a" } +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion/", rev = "1261741af2a5e142fa0c7916e759859cc18ea59a" } +datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion/", rev = "1261741af2a5e142fa0c7916e759859cc18ea59a" } +datafusion-sql = { git = "https://github.com/apache/arrow-datafusion/", rev = "1261741af2a5e142fa0c7916e759859cc18ea59a" } env_logger = "0.9" log = "^0.4" mimalloc = { version = "*", default-features = false } diff --git a/dask_planner/src/expression.rs b/dask_planner/src/expression.rs index a070b3321..cfb910283 100644 --- a/dask_planner/src/expression.rs +++ b/dask_planner/src/expression.rs @@ -529,10 +529,10 @@ impl PyExpr { /// TODO: I can't express how much I dislike explicity listing all of these methods out /// but PyO3 makes it necessary since its annotations cannot be used in trait impl blocks #[pyo3(name = "getFloat32Value")] - pub fn float_32_value(&mut self) -> PyResult { + pub fn float_32_value(&mut self) -> PyResult> { match &self.expr { Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::Float32(iv) => Ok(iv.unwrap()), + ScalarValue::Float32(iv) => Ok(*iv), _ => Err(py_type_err("getValue() - Unexpected value")), }, _ => Err(py_type_err("getValue() - Non literal value encountered")), @@ -540,10 +540,23 @@ impl PyExpr { } #[pyo3(name = "getFloat64Value")] - pub fn float_64_value(&mut self) -> PyResult { + pub fn float_64_value(&mut self) -> PyResult> { match &self.expr { Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::Float64(iv) => Ok(iv.unwrap()), + ScalarValue::Float64(iv) => Ok(*iv), + _ => Err(py_type_err("getValue() - Unexpected value")), + }, + _ => Err(py_type_err("getValue() - Non literal value encountered")), + } + } + + #[pyo3(name = "getDecimal128Value")] + pub fn decimal_128_value(&mut self) -> PyResult<(Option, u8, u8)> { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Decimal128(value, precision, scale) => { + Ok((*value, *precision, *scale)) + } _ => Err(py_type_err("getValue() - Unexpected value")), }, _ => Err(py_type_err("getValue() - Non literal value encountered")), @@ -551,10 +564,10 @@ impl PyExpr { } #[pyo3(name = "getInt8Value")] - pub fn int_8_value(&mut self) -> PyResult { + pub fn int_8_value(&mut self) -> PyResult> { match &self.expr { Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::Int8(iv) => Ok(iv.unwrap()), + ScalarValue::Int8(iv) => Ok(*iv), _ => Err(py_type_err("getValue() - Unexpected value")), }, _ => Err(py_type_err("getValue() - Non literal value encountered")), @@ -562,10 +575,10 @@ impl PyExpr { } #[pyo3(name = "getInt16Value")] - pub fn int_16_value(&mut self) -> PyResult { + pub fn int_16_value(&mut self) -> PyResult> { match &self.expr { Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::Int16(iv) => Ok(iv.unwrap()), + ScalarValue::Int16(iv) => Ok(*iv), _ => Err(py_type_err("getValue() - Unexpected value")), }, _ => Err(py_type_err("getValue() - Non literal value encountered")), @@ -573,10 +586,10 @@ impl PyExpr { } #[pyo3(name = "getInt32Value")] - pub fn int_32_value(&mut self) -> PyResult { + pub fn int_32_value(&mut self) -> PyResult> { match &self.expr { Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::Int32(iv) => Ok(iv.unwrap()), + ScalarValue::Int32(iv) => Ok(*iv), _ => Err(py_type_err("getValue() - Unexpected value")), }, _ => Err(py_type_err("getValue() - Non literal value encountered")), @@ -584,10 +597,10 @@ impl PyExpr { } #[pyo3(name = "getInt64Value")] - pub fn int_64_value(&mut self) -> PyResult { + pub fn int_64_value(&mut self) -> PyResult> { match &self.expr { Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::Int64(iv) => Ok(iv.unwrap()), + ScalarValue::Int64(iv) => Ok(*iv), _ => Err(py_type_err("getValue() - Unexpected value")), }, _ => Err(py_type_err("getValue() - Non literal value encountered")), @@ -595,10 +608,10 @@ impl PyExpr { } #[pyo3(name = "getUInt8Value")] - pub fn uint_8_value(&mut self) -> PyResult { + pub fn uint_8_value(&mut self) -> PyResult> { match &self.expr { Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::UInt8(iv) => Ok(iv.unwrap()), + ScalarValue::UInt8(iv) => Ok(*iv), _ => Err(py_type_err("getValue() - Unexpected value")), }, _ => Err(py_type_err("getValue() - Non literal value encountered")), @@ -606,10 +619,10 @@ impl PyExpr { } #[pyo3(name = "getUInt16Value")] - pub fn uint_16_value(&mut self) -> PyResult { + pub fn uint_16_value(&mut self) -> PyResult> { match &self.expr { Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::UInt16(iv) => Ok(iv.unwrap()), + ScalarValue::UInt16(iv) => Ok(*iv), _ => Err(py_type_err("getValue() - Unexpected value")), }, _ => Err(py_type_err("getValue() - Non literal value encountered")), @@ -617,10 +630,10 @@ impl PyExpr { } #[pyo3(name = "getUInt32Value")] - pub fn uint_32_value(&mut self) -> PyResult { + pub fn uint_32_value(&mut self) -> PyResult> { match &self.expr { Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::UInt32(iv) => Ok(iv.unwrap()), + ScalarValue::UInt32(iv) => Ok(*iv), _ => Err(py_type_err("getValue() - Unexpected value")), }, _ => Err(py_type_err("getValue() - Non literal value encountered")), @@ -628,10 +641,57 @@ impl PyExpr { } #[pyo3(name = "getUInt64Value")] - pub fn uint_64_value(&mut self) -> PyResult { + pub fn uint_64_value(&mut self) -> PyResult> { match &self.expr { Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::UInt64(iv) => Ok(iv.unwrap()), + ScalarValue::UInt64(iv) => Ok(*iv), + _ => Err(py_type_err("getValue() - Unexpected value")), + }, + _ => Err(py_type_err("getValue() - Non literal value encountered")), + } + } + + #[pyo3(name = "getDate32Value")] + pub fn date_32_value(&mut self) -> PyResult> { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Date32(iv) => Ok(*iv), + _ => Err(py_type_err("getValue() - Unexpected value")), + }, + _ => Err(py_type_err("getValue() - Non literal value encountered")), + } + } + + #[pyo3(name = "getDate64Value")] + pub fn date_64_value(&mut self) -> PyResult> { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Date64(iv) => Ok(*iv), + _ => Err(py_type_err("getValue() - Unexpected value")), + }, + _ => Err(py_type_err("getValue() - Non literal value encountered")), + } + } + + #[pyo3(name = "getTime64Value")] + pub fn time_64_value(&mut self) -> PyResult> { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Time64(iv) => Ok(*iv), + _ => Err(py_type_err("getValue() - Unexpected value")), + }, + _ => Err(py_type_err("getValue() - Non literal value encountered")), + } + } + + #[pyo3(name = "getTimestampValue")] + pub fn timestamp_value(&mut self) -> PyResult<(Option, Option)> { + match &self.expr { + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::TimestampNanosecond(iv, tz) + | ScalarValue::TimestampMicrosecond(iv, tz) + | ScalarValue::TimestampMillisecond(iv, tz) + | ScalarValue::TimestampSecond(iv, tz) => Ok((*iv, tz.clone())), _ => Err(py_type_err("getValue() - Unexpected value")), }, _ => Err(py_type_err("getValue() - Non literal value encountered")), @@ -639,10 +699,10 @@ impl PyExpr { } #[pyo3(name = "getBoolValue")] - pub fn bool_value(&mut self) -> PyResult { + pub fn bool_value(&mut self) -> PyResult> { match &self.expr { Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::Boolean(Some(iv)) => Ok(*iv), + ScalarValue::Boolean(iv) => Ok(*iv), _ => Err(py_type_err("getValue() - Unexpected value")), }, _ => Err(py_type_err("getValue() - Non literal value encountered")), @@ -650,10 +710,10 @@ impl PyExpr { } #[pyo3(name = "getStringValue")] - pub fn string_value(&mut self) -> PyResult { + pub fn string_value(&mut self) -> PyResult> { match &self.expr { Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::Utf8(iv) => Ok(iv.clone().unwrap()), + ScalarValue::Utf8(iv) => Ok(iv.clone()), _ => Err(py_type_err("getValue() - Unexpected value")), }, _ => Err(py_type_err("getValue() - Non literal value encountered")), @@ -661,20 +721,19 @@ impl PyExpr { } #[pyo3(name = "getIntervalDayTimeValue")] - pub fn interval_day_time_value(&mut self) -> (i32, i32) { + pub fn interval_day_time_value(&mut self) -> PyResult> { match &self.expr { Expr::Literal(scalar_value) => match scalar_value { - ScalarValue::IntervalDayTime(iv) => { - let interval = iv.unwrap() as u64; + ScalarValue::IntervalDayTime(Some(iv)) => { + let interval = *iv as u64; let days = (interval >> 32) as i32; let ms = interval as i32; - (days, ms) - } - _ => { - panic!("getValue() - Unexpected value") + Ok(Some((days, ms))) } + ScalarValue::IntervalDayTime(None) => Ok(None), + _ => Err(py_type_err("getValue() - Unexpected value")), }, - _ => panic!("getValue() - Non literal value encountered"), + _ => Err(py_type_err("getValue() - Non literal value encountered")), } } diff --git a/dask_sql/mappings.py b/dask_sql/mappings.py index 6c41e13d0..c72c457a0 100644 --- a/dask_sql/mappings.py +++ b/dask_sql/mappings.py @@ -173,15 +173,9 @@ def sql_to_python_value(sql_type: "SqlTypeName", literal_value: Any) -> Any: if str(literal_value) == "None": # NULL time return pd.NaT # pragma: no cover - - tz = literal_value.getTimeZone().getID() - assert str(tz) == "UTC", "The code can currently only handle UTC timezones" - - dt = np.datetime64(literal_value.getTimeInMillis(), "ms") - if sql_type == SqlTypeName.DATE: - return dt.astype("= TIMESTAMP '2014-08-01 23:00:00' + CAST(timezone AS TIMESTAMP) >= TIMESTAMP '2014-08-01 23:00:00+00' """ ) diff --git a/tests/unit/test_queries.py b/tests/unit/test_queries.py index 8c29d5f60..606c7a3ad 100644 --- a/tests/unit/test_queries.py +++ b/tests/unit/test_queries.py @@ -22,7 +22,6 @@ 24, 27, 28, - 32, 34, 35, 36,