Skip to content

Commit

Permalink
Add use_char_for_utf8_cast Dialect param
Browse files Browse the repository at this point in the history
  • Loading branch information
sgrebnov committed Jul 12, 2024
1 parent 068057d commit 107c02a
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 2 deletions.
25 changes: 25 additions & 0 deletions datafusion/sql/src/unparser/dialect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ pub trait Dialect {
fn use_timestamp_for_date64(&self) -> bool {
false
}

// Does the dialect use CHAR to cast Utf8 rather than TEXT?
// E.g. MySQL requires CHAR instead of TEXT and automatically produces a string with
// the VARCHAR, TEXT or LONGTEXT data type based on the length of the string
fn use_char_for_utf8_cast(&self) -> bool {
false
}
}
pub struct DefaultDialect {}

Expand Down Expand Up @@ -75,6 +82,10 @@ impl Dialect for MySqlDialect {
fn supports_nulls_first_in_sort(&self) -> bool {
false
}

fn use_char_for_utf8_cast(&self) -> bool {
true
}
}

pub struct SqliteDialect {}
Expand All @@ -89,6 +100,7 @@ pub struct CustomDialect {
identifier_quote_style: Option<char>,
supports_nulls_first_in_sort: bool,
use_timestamp_for_date64: bool,
use_char_for_utf8_cast: bool,
}

impl Default for CustomDialect {
Expand All @@ -97,6 +109,7 @@ impl Default for CustomDialect {
identifier_quote_style: None,
supports_nulls_first_in_sort: true,
use_timestamp_for_date64: false,
use_char_for_utf8_cast: false,
}
}
}
Expand All @@ -123,13 +136,18 @@ impl Dialect for CustomDialect {
fn use_timestamp_for_date64(&self) -> bool {
self.use_timestamp_for_date64
}

fn use_char_for_utf8_cast(&self) -> bool {
self.use_char_for_utf8_cast
}
}

// create a CustomDialectBuilder
pub struct CustomDialectBuilder {
identifier_quote_style: Option<char>,
supports_nulls_first_in_sort: bool,
use_timestamp_for_date64: bool,
use_char_for_utf8_cast: bool,
}

impl CustomDialectBuilder {
Expand All @@ -138,6 +156,7 @@ impl CustomDialectBuilder {
identifier_quote_style: None,
supports_nulls_first_in_sort: true,
use_timestamp_for_date64: false,
use_char_for_utf8_cast: false,
}
}

Expand All @@ -146,6 +165,7 @@ impl CustomDialectBuilder {
identifier_quote_style: self.identifier_quote_style,
supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
use_timestamp_for_date64: self.use_timestamp_for_date64,
use_char_for_utf8_cast: self.use_char_for_utf8_cast,
}
}

Expand All @@ -169,4 +189,9 @@ impl CustomDialectBuilder {
self.use_timestamp_for_date64 = use_timestamp_for_date64;
self
}

pub fn with_use_char_for_utf8_cast(mut self, use_char_for_utf8_cast: bool) -> Self {
self.use_char_for_utf8_cast = use_char_for_utf8_cast;
self
}
}
39 changes: 37 additions & 2 deletions datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1000,8 +1000,16 @@ impl Unparser<'_> {
DataType::BinaryView => {
not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
}
DataType::Utf8 => Ok(ast::DataType::Varchar(None)),
DataType::LargeUtf8 => Ok(ast::DataType::Text),
DataType::Utf8 => Ok(if self.dialect.use_char_for_utf8_cast() {
ast::DataType::Char(None)
} else {
ast::DataType::Varchar(None)
}),
DataType::LargeUtf8 => Ok(if self.dialect.use_char_for_utf8_cast() {
ast::DataType::Char(None)
} else {
ast::DataType::Text
}),
DataType::Utf8View => {
not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
}
Expand Down Expand Up @@ -1582,4 +1590,31 @@ mod tests {

Ok(())
}

#[test]
fn custom_dialect_use_char_for_utf8_cast() -> Result<()> {
for (use_char_for_utf8_cast, data_type, identifier) in [
(false, DataType::Utf8, "VARCHAR"),
(true, DataType::Utf8, "CHAR"),
(false, DataType::LargeUtf8, "TEXT"),
(true, DataType::LargeUtf8, "CHAR"),
] {
let dialect = CustomDialectBuilder::new()
.with_use_char_for_utf8_cast(use_char_for_utf8_cast)
.build();
let unparser = Unparser::new(&dialect);

let expr = Expr::Cast(Cast {
expr: Box::new(col("a")),
data_type,
});
let ast = unparser.expr_to_sql(&expr)?;

let actual = format!("{}", ast);
let expected = format!(r#"CAST(a AS {identifier})"#);

assert_eq!(actual, expected);
}
Ok(())
}
}

0 comments on commit 107c02a

Please sign in to comment.