Skip to content

Commit

Permalink
adding config to control Varchar behavior (#11090)
Browse files Browse the repository at this point in the history
* adding config to control Varchar behavior

* fix failed tests

* fix config_md

* format md

* optimize code

* format md

* format md

* adding config

* Tweak documentation

* Update sqllogictest

* tweaks strings

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
Lordworms and alamb committed Jun 28, 2024
1 parent 3bd7200 commit ca9c322
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 1 deletion.
6 changes: 6 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,11 @@ config_namespace! {
/// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
pub dialect: String, default = "generic".to_string()

/// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but
/// ignore the length. If false, error if a `VARCHAR` with a length is
/// specified. The Arrow type system does not have a notion of maximum
/// string length and thus DataFusion can not enforce such limits.
pub support_varchar_with_length: bool, default = true
}
}

Expand Down Expand Up @@ -303,6 +308,7 @@ config_namespace! {
/// statistics into the same file groups.
/// Currently experimental
pub split_file_groups_by_statistics: bool, default = false

}
}

Expand Down
1 change: 1 addition & 0 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,7 @@ impl SessionState {
ParserOptions {
parse_float_as_decimal: sql_parser_options.parse_float_as_decimal,
enable_ident_normalization: sql_parser_options.enable_ident_normalization,
support_varchar_with_length: sql_parser_options.support_varchar_with_length,
}
}

Expand Down
9 changes: 8 additions & 1 deletion datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,15 @@ pub trait ContextProvider {
pub struct ParserOptions {
pub parse_float_as_decimal: bool,
pub enable_ident_normalization: bool,
pub support_varchar_with_length: bool,
}

impl Default for ParserOptions {
fn default() -> Self {
Self {
parse_float_as_decimal: false,
enable_ident_normalization: true,
support_varchar_with_length: true,
}
}
}
Expand Down Expand Up @@ -404,12 +406,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLDataType::UnsignedInt(_) | SQLDataType::UnsignedInteger(_) | SQLDataType::UnsignedInt4(_) => {
Ok(DataType::UInt32)
}
SQLDataType::Varchar(length) => {
match (length, self.options.support_varchar_with_length) {
(Some(_), false) => plan_err!("does not support Varchar with length, please set `support_varchar_with_length` to be true"),
_ => Ok(DataType::Utf8),
}
}
SQLDataType::UnsignedBigInt(_) | SQLDataType::UnsignedInt8(_) => Ok(DataType::UInt64),
SQLDataType::Float(_) => Ok(DataType::Float32),
SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32),
SQLDataType::Double | SQLDataType::DoublePrecision | SQLDataType::Float8 => Ok(DataType::Float64),
SQLDataType::Char(_)
| SQLDataType::Varchar(_)
| SQLDataType::Text
| SQLDataType::String(_) => Ok(DataType::Utf8),
SQLDataType::Timestamp(None, tz_info) => {
Expand Down
2 changes: 2 additions & 0 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ fn parse_decimals() {
ParserOptions {
parse_float_as_decimal: true,
enable_ident_normalization: false,
support_varchar_with_length: false,
},
);
}
Expand Down Expand Up @@ -137,6 +138,7 @@ fn parse_ident_normalization() {
ParserOptions {
parse_float_as_decimal: false,
enable_ident_normalization,
support_varchar_with_length: false,
},
);
if plan.is_ok() {
Expand Down
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ datafusion.optimizer.top_down_join_key_reordering true
datafusion.sql_parser.dialect generic
datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.parse_float_as_decimal false
datafusion.sql_parser.support_varchar_with_length true

# show all variables with verbose
query TTT rowsort
Expand Down Expand Up @@ -318,6 +319,7 @@ datafusion.optimizer.top_down_join_key_reordering true When set to true, the phy
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.

# show_variable_in_config_options
query TT
Expand Down
49 changes: 49 additions & 0 deletions datafusion/sqllogictest/test_files/strings.slt
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,52 @@ e1
p2
p2e1
p2m1e1

## VARCHAR with length support

# Lengths can be used by default
query T
SELECT '12345'::VARCHAR(2);
----
12345

# Lengths can not be used when the config setting is disabled

statement ok
set datafusion.sql_parser.support_varchar_with_length = false;

query error
SELECT '12345'::VARCHAR(2);

query error
SELECT s::VARCHAR(2) FROM (VALUES ('12345')) t(s);

statement ok
create table vals(s char) as values('abc'), ('def');

query error
SELECT s::VARCHAR(2) FROM vals

# Lengths can be used when the config setting is enabled

statement ok
set datafusion.sql_parser.support_varchar_with_length = true;

query T
SELECT '12345'::VARCHAR(2)
----
12345

query T
SELECT s::VARCHAR(2) FROM (VALUES ('12345')) t(s)
----
12345

query T
SELECT s::VARCHAR(2) FROM vals
----
abc
def

statement ok
drop table vals;
1 change: 1 addition & 0 deletions docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,4 @@ Environment variables are read during `SessionConfig` initialisation so they mus
| datafusion.sql_parser.parse_float_as_decimal | false | When set to true, SQL parser will parse float as decimal type |
| datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) |
| datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. |
| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |

0 comments on commit ca9c322

Please sign in to comment.