From eb1c5135030d54dbb5c73a92e545399dc0d313d1 Mon Sep 17 00:00:00 2001 From: Sevenannn Date: Tue, 16 Jul 2024 10:49:51 -0700 Subject: [PATCH 1/4] Add dialect param to use double precision for float64 in Postgres --- datafusion/sql/src/unparser/dialect.rs | 23 ++++++++++++++++++++ datafusion/sql/src/unparser/expr.rs | 30 +++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index eca2eb4fd0ec..9cc5816e21dc 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -45,6 +45,12 @@ pub trait Dialect { fn interval_style(&self) -> IntervalStyle { IntervalStyle::PostgresVerbose } + + // Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE? + // E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE + fn use_double_precision_for_float64(&self) -> bool { + false + } } /// `IntervalStyle` to use for unparsing @@ -118,6 +124,7 @@ pub struct CustomDialect { supports_nulls_first_in_sort: bool, use_timestamp_for_date64: bool, interval_style: IntervalStyle, + use_double_precision_for_float64: bool, } impl Default for CustomDialect { @@ -127,6 +134,7 @@ impl Default for CustomDialect { supports_nulls_first_in_sort: true, use_timestamp_for_date64: false, interval_style: IntervalStyle::SQLStandard, + use_double_precision_for_float64: false, } } } @@ -158,6 +166,10 @@ impl Dialect for CustomDialect { fn interval_style(&self) -> IntervalStyle { self.interval_style } + + fn use_double_precision_for_float64(&self) -> bool { + self.use_double_precision_for_float64 + } } /// `CustomDialectBuilder` to build `CustomDialect` using builder pattern @@ -179,6 +191,7 @@ pub struct CustomDialectBuilder { supports_nulls_first_in_sort: bool, use_timestamp_for_date64: bool, interval_style: IntervalStyle, + use_double_precision_for_float64: bool, } impl Default for CustomDialectBuilder { @@ -194,6 +207,7 @@ impl CustomDialectBuilder { supports_nulls_first_in_sort: true, use_timestamp_for_date64: false, interval_style: IntervalStyle::PostgresVerbose, + use_double_precision_for_float64: false, } } @@ -203,6 +217,7 @@ impl CustomDialectBuilder { supports_nulls_first_in_sort: self.supports_nulls_first_in_sort, use_timestamp_for_date64: self.use_timestamp_for_date64, interval_style: self.interval_style, + use_double_precision_for_float64: self.use_double_precision_for_float64, } } @@ -235,4 +250,12 @@ impl CustomDialectBuilder { self.interval_style = interval_style; self } + + pub fn with_use_double_precision_for_float64( + mut self, + use_double_precision_for_float64: bool, + ) -> Self { + self.use_double_precision_for_float64 = use_double_precision_for_float64; + self + } } diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 6b7775ee3d4d..0c79f7327812 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -1237,7 +1237,11 @@ impl Unparser<'_> { not_impl_err!("Unsupported DataType: conversion: {data_type:?}") } DataType::Float32 => Ok(ast::DataType::Float(None)), - DataType::Float64 => Ok(ast::DataType::Double), + DataType::Float64 => Ok(if self.dialect.use_double_precision_for_float64() { + ast::DataType::DoublePrecision + } else { + ast::DataType::Double + }), DataType::Timestamp(_, tz) => { let tz_info = match tz { Some(_) => TimezoneInfo::WithTimeZone, @@ -1819,6 +1823,30 @@ mod tests { Ok(()) } + #[test] + fn custom_dialect_use_double_precision_for_float64() -> Result<()> { + for (use_double_precision_for_float64, identifier) in + [(false, "DOUBLE"), (true, "DOUBLE PRECISION")] + { + let dialect = CustomDialectBuilder::new() + .with_use_double_precision_for_float64(use_double_precision_for_float64) + .build(); + let unparser = Unparser::new(&dialect); + + let expr = Expr::Cast(Cast { + expr: Box::new(col("a")), + data_type: DataType::Float64, + }); + let ast = unparser.expr_to_sql(&expr)?; + + let actual = format!("{}", ast); + + let expected = format!(r#"CAST(a AS {identifier})"#); + assert_eq!(actual, expected); + } + Ok(()) + } + #[test] fn customer_dialect_support_nulls_first_in_ort() -> Result<()> { let tests: Vec<(Expr, &str, bool)> = vec![ From d0d8b1478b1e9d1a23641823e7e5dcf0c08ca231 Mon Sep 17 00:00:00 2001 From: Sevenannn Date: Tue, 16 Jul 2024 15:28:19 -0700 Subject: [PATCH 2/4] return ast data type instead of bool --- datafusion/sql/src/unparser/dialect.rs | 28 +++++++++++++++----------- datafusion/sql/src/unparser/expr.rs | 20 +++++++++--------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index 9cc5816e21dc..6660010e0f78 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -48,8 +48,8 @@ pub trait Dialect { // Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE? // E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE - fn use_double_precision_for_float64(&self) -> bool { - false + fn float64_ast_dtype(&self) -> sqlparser::ast::DataType { + sqlparser::ast::DataType::Double } } @@ -93,6 +93,10 @@ impl Dialect for PostgreSqlDialect { fn interval_style(&self) -> IntervalStyle { IntervalStyle::PostgresVerbose } + + fn float64_ast_dtype(&self) -> sqlparser::ast::DataType { + sqlparser::ast::DataType::DoublePrecision + } } pub struct MySqlDialect {} @@ -124,7 +128,7 @@ pub struct CustomDialect { supports_nulls_first_in_sort: bool, use_timestamp_for_date64: bool, interval_style: IntervalStyle, - use_double_precision_for_float64: bool, + float64_ast_dtype: sqlparser::ast::DataType, } impl Default for CustomDialect { @@ -134,7 +138,7 @@ impl Default for CustomDialect { supports_nulls_first_in_sort: true, use_timestamp_for_date64: false, interval_style: IntervalStyle::SQLStandard, - use_double_precision_for_float64: false, + float64_ast_dtype: sqlparser::ast::DataType::Double, } } } @@ -167,8 +171,8 @@ impl Dialect for CustomDialect { self.interval_style } - fn use_double_precision_for_float64(&self) -> bool { - self.use_double_precision_for_float64 + fn float64_ast_dtype(&self) -> sqlparser::ast::DataType { + self.float64_ast_dtype.clone() } } @@ -191,7 +195,7 @@ pub struct CustomDialectBuilder { supports_nulls_first_in_sort: bool, use_timestamp_for_date64: bool, interval_style: IntervalStyle, - use_double_precision_for_float64: bool, + float64_ast_dtype: sqlparser::ast::DataType, } impl Default for CustomDialectBuilder { @@ -207,7 +211,7 @@ impl CustomDialectBuilder { supports_nulls_first_in_sort: true, use_timestamp_for_date64: false, interval_style: IntervalStyle::PostgresVerbose, - use_double_precision_for_float64: false, + float64_ast_dtype: sqlparser::ast::DataType::Double, } } @@ -217,7 +221,7 @@ impl CustomDialectBuilder { supports_nulls_first_in_sort: self.supports_nulls_first_in_sort, use_timestamp_for_date64: self.use_timestamp_for_date64, interval_style: self.interval_style, - use_double_precision_for_float64: self.use_double_precision_for_float64, + float64_ast_dtype: self.float64_ast_dtype, } } @@ -251,11 +255,11 @@ impl CustomDialectBuilder { self } - pub fn with_use_double_precision_for_float64( + pub fn with_float64_ast_dtype( mut self, - use_double_precision_for_float64: bool, + float64_ast_dtype: sqlparser::ast::DataType, ) -> Self { - self.use_double_precision_for_float64 = use_double_precision_for_float64; + self.float64_ast_dtype = float64_ast_dtype; self } } diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 0c79f7327812..23ec6b3bcccd 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -1237,11 +1237,7 @@ impl Unparser<'_> { not_impl_err!("Unsupported DataType: conversion: {data_type:?}") } DataType::Float32 => Ok(ast::DataType::Float(None)), - DataType::Float64 => Ok(if self.dialect.use_double_precision_for_float64() { - ast::DataType::DoublePrecision - } else { - ast::DataType::Double - }), + DataType::Float64 => Ok(self.dialect.float64_ast_dtype()), DataType::Timestamp(_, tz) => { let tz_info = match tz { Some(_) => TimezoneInfo::WithTimeZone, @@ -1824,12 +1820,16 @@ mod tests { } #[test] - fn custom_dialect_use_double_precision_for_float64() -> Result<()> { - for (use_double_precision_for_float64, identifier) in - [(false, "DOUBLE"), (true, "DOUBLE PRECISION")] - { + fn custom_dialect_float64_ast_dtype() -> Result<()> { + for (float64_ast_dtype, identifier) in [ + (sqlparser::ast::DataType::Double, "DOUBLE"), + ( + sqlparser::ast::DataType::DoublePrecision, + "DOUBLE PRECISION", + ), + ] { let dialect = CustomDialectBuilder::new() - .with_use_double_precision_for_float64(use_double_precision_for_float64) + .with_float64_ast_dtype(float64_ast_dtype) .build(); let unparser = Unparser::new(&dialect); From 7405dc8d568347d13d6c28aae1194fb1a84b757f Mon Sep 17 00:00:00 2001 From: Sevenannn Date: Thu, 18 Jul 2024 13:41:09 -0700 Subject: [PATCH 3/4] Fix errors in merging --- datafusion/sql/src/unparser/dialect.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index 865a14e5e1d8..4abe1000d577 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -50,13 +50,14 @@ pub trait Dialect { // E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE fn float64_ast_dtype(&self) -> sqlparser::ast::DataType { sqlparser::ast::DataType::Double + } // The SQL type to use for Arrow Utf8 unparsing // Most dialects use VARCHAR, but some, like MySQL, require CHAR fn utf8_cast_dtype(&self) -> ast::DataType { ast::DataType::Varchar(None) } - + // The SQL type to use for Arrow LargeUtf8 unparsing // Most dialects use TEXT, but some, like MySQL, require CHAR fn large_utf8_cast_dtype(&self) -> ast::DataType { @@ -196,6 +197,7 @@ impl Dialect for CustomDialect { fn float64_ast_dtype(&self) -> sqlparser::ast::DataType { self.float64_ast_dtype.clone() + } fn utf8_cast_dtype(&self) -> ast::DataType { self.utf8_cast_dtype.clone() @@ -203,7 +205,6 @@ impl Dialect for CustomDialect { fn large_utf8_cast_dtype(&self) -> ast::DataType { self.large_utf8_cast_dtype.clone() - } } @@ -297,10 +298,11 @@ impl CustomDialectBuilder { float64_ast_dtype: sqlparser::ast::DataType, ) -> Self { self.float64_ast_dtype = float64_ast_dtype; - - pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self { - self.utf8_cast_dtype = utf8_cast_dtype; - self + + pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self { + self.utf8_cast_dtype = utf8_cast_dtype; + self + } } pub fn with_large_utf8_cast_dtype( From 4d2b8e218142a6bcbe872fb488296d5541a92335 Mon Sep 17 00:00:00 2001 From: Sevenannn Date: Thu, 18 Jul 2024 16:28:50 -0700 Subject: [PATCH 4/4] fix --- datafusion/sql/src/unparser/dialect.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index 4abe1000d577..c6ed28cd74b5 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -298,11 +298,12 @@ impl CustomDialectBuilder { float64_ast_dtype: sqlparser::ast::DataType, ) -> Self { self.float64_ast_dtype = float64_ast_dtype; + self + } - pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self { - self.utf8_cast_dtype = utf8_cast_dtype; - self - } + pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self { + self.utf8_cast_dtype = utf8_cast_dtype; + self } pub fn with_large_utf8_cast_dtype(