Skip to content

Commit b6366ad

Browse files
committed
add config parse_hex_as_fixed_size_binary
1 parent 4a4163a commit b6366ad

File tree

7 files changed

+43
-1
lines changed

7 files changed

+43
-1
lines changed

datafusion/common/src/config.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,9 @@ config_namespace! {
272272

273273
/// Specifies the recursion depth limit when parsing complex SQL Queries
274274
pub recursion_limit: usize, default = 50
275+
276+
/// When set to true, the SQL parser will parse literal bytes as `FixedSizeBinary` type
277+
pub parse_hex_as_fixed_size_binary: bool, default = false
275278
}
276279
}
277280

datafusion/core/src/execution/session_state.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,8 @@ impl SessionState {
496496
support_varchar_with_length: sql_parser_options.support_varchar_with_length,
497497
map_varchar_to_utf8view: sql_parser_options.map_varchar_to_utf8view,
498498
collect_spans: sql_parser_options.collect_spans,
499+
parse_hex_as_fixed_size_binary: sql_parser_options
500+
.parse_hex_as_fixed_size_binary,
499501
}
500502
}
501503

datafusion/sql/src/expr/value.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,20 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
5757
}
5858
Value::HexStringLiteral(s) => {
5959
if let Some(v) = try_decode_hex_literal(&s) {
60-
Ok(lit(v))
60+
if self.options.parse_hex_as_fixed_size_binary {
61+
let safe_len = i32::try_from(v.len()).map_err(|_| {
62+
DataFusionError::from(ParserError(format!(
63+
"HexStringLiteral too long, length: {}",
64+
v.len(),
65+
)))
66+
})?;
67+
Ok(Expr::Literal(ScalarValue::FixedSizeBinary(
68+
safe_len,
69+
Some(v),
70+
)))
71+
} else {
72+
Ok(lit(v))
73+
}
6174
} else {
6275
plan_err!("Invalid HexStringLiteral '{s}'")
6376
}

datafusion/sql/src/planner.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ pub struct ParserOptions {
5454
pub collect_spans: bool,
5555
/// Whether `VARCHAR` is mapped to `Utf8View` during SQL planning.
5656
pub map_varchar_to_utf8view: bool,
57+
/// Whether to parse literal bytes as `FixedSizeBinary` type
58+
pub parse_hex_as_fixed_size_binary: bool,
5759
}
5860

5961
impl ParserOptions {
@@ -75,6 +77,7 @@ impl ParserOptions {
7577
map_varchar_to_utf8view: false,
7678
enable_options_value_normalization: false,
7779
collect_spans: false,
80+
parse_hex_as_fixed_size_binary: false,
7881
}
7982
}
8083

@@ -147,6 +150,7 @@ impl From<&SqlParserOptions> for ParserOptions {
147150
enable_options_value_normalization: options
148151
.enable_options_value_normalization,
149152
collect_spans: options.collect_spans,
153+
parse_hex_as_fixed_size_binary: options.parse_hex_as_fixed_size_binary,
150154
}
151155
}
152156
}

datafusion/sql/tests/sql_integration.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3363,6 +3363,7 @@ fn parse_decimals_parser_options() -> ParserOptions {
33633363
map_varchar_to_utf8view: false,
33643364
enable_options_value_normalization: false,
33653365
collect_spans: false,
3366+
parse_hex_as_fixed_size_binary: false,
33663367
}
33673368
}
33683369

@@ -3374,6 +3375,7 @@ fn ident_normalization_parser_options_no_ident_normalization() -> ParserOptions
33743375
map_varchar_to_utf8view: false,
33753376
enable_options_value_normalization: false,
33763377
collect_spans: false,
3378+
parse_hex_as_fixed_size_binary: false,
33773379
}
33783380
}
33793381

@@ -3385,6 +3387,7 @@ fn ident_normalization_parser_options_ident_normalization() -> ParserOptions {
33853387
map_varchar_to_utf8view: false,
33863388
enable_options_value_normalization: false,
33873389
collect_spans: false,
3390+
parse_hex_as_fixed_size_binary: false,
33883391
}
33893392
}
33903393

datafusion/sqllogictest/test_files/binary.slt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,21 @@ SELECT column1, column1 = arrow_cast(X'0102', 'FixedSizeBinary(2)') FROM t
150150
query error DataFusion error: Error during planning: Cannot infer common argument type for comparison operation FixedSizeBinary\(3\) = Binary
151151
SELECT column1, column1 = X'0102' FROM t
152152

153+
statement ok
154+
set datafusion.sql_parser.parse_hex_as_fixed_size_binary = true;
155+
156+
query ?B
157+
SELECT column1, column1 = X'000102' FROM t
158+
----
159+
000102 true
160+
003102 false
161+
NULL NULL
162+
ff0102 false
163+
000102 true
164+
165+
statement ok
166+
set datafusion.sql_parser.parse_hex_as_fixed_size_binary = false;
167+
153168
statement ok
154169
drop table t_source
155170

datafusion/sqllogictest/test_files/information_schema.slt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ datafusion.sql_parser.enable_ident_normalization true
265265
datafusion.sql_parser.enable_options_value_normalization false
266266
datafusion.sql_parser.map_varchar_to_utf8view false
267267
datafusion.sql_parser.parse_float_as_decimal false
268+
datafusion.sql_parser.parse_hex_as_fixed_size_binary false
268269
datafusion.sql_parser.recursion_limit 50
269270
datafusion.sql_parser.support_varchar_with_length true
270271

@@ -364,6 +365,7 @@ datafusion.sql_parser.enable_ident_normalization true When set to true, SQL pars
364365
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
365366
datafusion.sql_parser.map_varchar_to_utf8view false If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false.
366367
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
368+
datafusion.sql_parser.parse_hex_as_fixed_size_binary false When set to true, the SQL parser will parse literal bytes as `FixedSizeBinary` type
367369
datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries
368370
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.
369371

0 commit comments

Comments
 (0)