Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,9 @@ config_namespace! {

/// Specifies the recursion depth limit when parsing complex SQL Queries
pub recursion_limit: usize, default = 50

/// When set to true, the SQL parser will parse literal bytes as `FixedSizeBinary` type
pub parse_hex_as_fixed_size_binary: bool, default = false
}
}

Expand Down
2 changes: 2 additions & 0 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,8 @@ impl SessionState {
support_varchar_with_length: sql_parser_options.support_varchar_with_length,
map_varchar_to_utf8view: sql_parser_options.map_varchar_to_utf8view,
collect_spans: sql_parser_options.collect_spans,
parse_hex_as_fixed_size_binary: sql_parser_options
.parse_hex_as_fixed_size_binary,
}
}

Expand Down
15 changes: 14 additions & 1 deletion datafusion/sql/src/expr/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,20 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
}
Value::HexStringLiteral(s) => {
if let Some(v) = try_decode_hex_literal(&s) {
Ok(lit(v))
if self.options.parse_hex_as_fixed_size_binary {
let safe_len = i32::try_from(v.len()).map_err(|_| {
DataFusionError::from(ParserError(format!(
"HexStringLiteral too long, length: {}",
v.len(),
)))
})?;
Ok(Expr::Literal(ScalarValue::FixedSizeBinary(
safe_len,
Some(v),
)))
} else {
Ok(lit(v))
}
} else {
plan_err!("Invalid HexStringLiteral '{s}'")
}
Expand Down
4 changes: 4 additions & 0 deletions datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ pub struct ParserOptions {
pub collect_spans: bool,
/// Whether `VARCHAR` is mapped to `Utf8View` during SQL planning.
pub map_varchar_to_utf8view: bool,
/// Whether to parse literal bytes as `FixedSizeBinary` type
pub parse_hex_as_fixed_size_binary: bool,
}

impl ParserOptions {
Expand All @@ -75,6 +77,7 @@ impl ParserOptions {
map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
parse_hex_as_fixed_size_binary: false,
}
}

Expand Down Expand Up @@ -147,6 +150,7 @@ impl From<&SqlParserOptions> for ParserOptions {
enable_options_value_normalization: options
.enable_options_value_normalization,
collect_spans: options.collect_spans,
parse_hex_as_fixed_size_binary: options.parse_hex_as_fixed_size_binary,
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3363,6 +3363,7 @@ fn parse_decimals_parser_options() -> ParserOptions {
map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
parse_hex_as_fixed_size_binary: false,
}
}

Expand All @@ -3374,6 +3375,7 @@ fn ident_normalization_parser_options_no_ident_normalization() -> ParserOptions
map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
parse_hex_as_fixed_size_binary: false,
}
}

Expand All @@ -3385,6 +3387,7 @@ fn ident_normalization_parser_options_ident_normalization() -> ParserOptions {
map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
parse_hex_as_fixed_size_binary: false,
}
}

Expand Down
15 changes: 15 additions & 0 deletions datafusion/sqllogictest/test_files/binary.slt
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,21 @@ SELECT column1, column1 = arrow_cast(X'0102', 'FixedSizeBinary(2)') FROM t
query error DataFusion error: Error during planning: Cannot infer common argument type for comparison operation FixedSizeBinary\(3\) = Binary
SELECT column1, column1 = X'0102' FROM t

statement ok
set datafusion.sql_parser.parse_hex_as_fixed_size_binary = true;

query ?B
SELECT column1, column1 = X'000102' FROM t
----
000102 true
003102 false
NULL NULL
ff0102 false
000102 true

statement ok
set datafusion.sql_parser.parse_hex_as_fixed_size_binary = false;

statement ok
drop table t_source

Expand Down
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.enable_options_value_normalization false
datafusion.sql_parser.map_varchar_to_utf8view false
datafusion.sql_parser.parse_float_as_decimal false
datafusion.sql_parser.parse_hex_as_fixed_size_binary false
datafusion.sql_parser.recursion_limit 50
datafusion.sql_parser.support_varchar_with_length true

Expand Down Expand Up @@ -364,6 +365,7 @@ datafusion.sql_parser.enable_ident_normalization true When set to true, SQL pars
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
datafusion.sql_parser.map_varchar_to_utf8view false If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false.
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
datafusion.sql_parser.parse_hex_as_fixed_size_binary false When set to true, the SQL parser will parse literal bytes as `FixedSizeBinary` type
datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.

Expand Down
1 change: 1 addition & 0 deletions docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,4 @@ Environment variables are read during `SessionConfig` initialisation so they mus
| datafusion.sql_parser.map_varchar_to_utf8view | false | If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false. |
| datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. |
| datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries |
| datafusion.sql_parser.parse_hex_as_fixed_size_binary | false | When set to true, the SQL parser will parse literal bytes as `FixedSizeBinary` type |