Skip to content

Commit

Permalink
Add dialect param to use CHAR instead of TEXT for Utf8 unparsing for …
Browse files Browse the repository at this point in the history
…MySQL (#12)
  • Loading branch information
sgrebnov committed Jul 16, 2024
1 parent f11bdf0 commit f992066
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 2 deletions.
25 changes: 25 additions & 0 deletions datafusion/sql/src/unparser/dialect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ pub trait Dialect {
fn interval_style(&self) -> IntervalStyle {
IntervalStyle::PostgresVerbose
}

// Does the dialect use CHAR to cast Utf8 rather than TEXT?
// E.g. MySQL requires CHAR instead of TEXT and automatically produces a string with
// the VARCHAR, TEXT or LONGTEXT data type based on the length of the string
fn use_char_for_utf8_cast(&self) -> bool {
false
}
}

/// `IntervalStyle` to use for unparsing
Expand Down Expand Up @@ -103,6 +110,10 @@ impl Dialect for MySqlDialect {
fn interval_style(&self) -> IntervalStyle {
IntervalStyle::MySQL
}

fn use_char_for_utf8_cast(&self) -> bool {
true
}
}

pub struct SqliteDialect {}
Expand All @@ -118,6 +129,7 @@ pub struct CustomDialect {
supports_nulls_first_in_sort: bool,
use_timestamp_for_date64: bool,
interval_style: IntervalStyle,
use_char_for_utf8_cast: bool,
}

impl Default for CustomDialect {
Expand All @@ -127,6 +139,7 @@ impl Default for CustomDialect {
supports_nulls_first_in_sort: true,
use_timestamp_for_date64: false,
interval_style: IntervalStyle::SQLStandard,
use_char_for_utf8_cast: false,
}
}
}
Expand Down Expand Up @@ -158,6 +171,10 @@ impl Dialect for CustomDialect {
fn interval_style(&self) -> IntervalStyle {
self.interval_style
}

fn use_char_for_utf8_cast(&self) -> bool {
self.use_char_for_utf8_cast
}
}

/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
Expand All @@ -179,6 +196,7 @@ pub struct CustomDialectBuilder {
supports_nulls_first_in_sort: bool,
use_timestamp_for_date64: bool,
interval_style: IntervalStyle,
use_char_for_utf8_cast: bool,
}

impl Default for CustomDialectBuilder {
Expand All @@ -194,6 +212,7 @@ impl CustomDialectBuilder {
supports_nulls_first_in_sort: true,
use_timestamp_for_date64: false,
interval_style: IntervalStyle::PostgresVerbose,
use_char_for_utf8_cast: false,
}
}

Expand All @@ -203,6 +222,7 @@ impl CustomDialectBuilder {
supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
use_timestamp_for_date64: self.use_timestamp_for_date64,
interval_style: self.interval_style,
use_char_for_utf8_cast: self.use_char_for_utf8_cast,
}
}

Expand Down Expand Up @@ -235,4 +255,9 @@ impl CustomDialectBuilder {
self.interval_style = interval_style;
self
}

pub fn with_use_char_for_utf8_cast(mut self, use_char_for_utf8_cast: bool) -> Self {
self.use_char_for_utf8_cast = use_char_for_utf8_cast;
self
}
}
39 changes: 37 additions & 2 deletions datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1272,8 +1272,16 @@ impl Unparser<'_> {
DataType::BinaryView => {
not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
}
DataType::Utf8 => Ok(ast::DataType::Varchar(None)),
DataType::LargeUtf8 => Ok(ast::DataType::Text),
DataType::Utf8 => Ok(if self.dialect.use_char_for_utf8_cast() {
ast::DataType::Char(None)
} else {
ast::DataType::Varchar(None)
}),
DataType::LargeUtf8 => Ok(if self.dialect.use_char_for_utf8_cast() {
ast::DataType::Char(None)
} else {
ast::DataType::Text
}),
DataType::Utf8View => {
not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
}
Expand Down Expand Up @@ -1933,4 +1941,31 @@ mod tests {
assert_eq!(actual, expected);
}
}

#[test]
fn custom_dialect_use_char_for_utf8_cast() -> Result<()> {
for (use_char_for_utf8_cast, data_type, identifier) in [
(false, DataType::Utf8, "VARCHAR"),
(true, DataType::Utf8, "CHAR"),
(false, DataType::LargeUtf8, "TEXT"),
(true, DataType::LargeUtf8, "CHAR"),
] {
let dialect = CustomDialectBuilder::new()
.with_use_char_for_utf8_cast(use_char_for_utf8_cast)
.build();
let unparser = Unparser::new(&dialect);

let expr = Expr::Cast(Cast {
expr: Box::new(col("a")),
data_type,
});
let ast = unparser.expr_to_sql(&expr)?;

let actual = format!("{}", ast);
let expected = format!(r#"CAST(a AS {identifier})"#);

assert_eq!(actual, expected);
}
Ok(())
}
}

0 comments on commit f992066

Please sign in to comment.