diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs index c4159cba86f3..fd34e77b0fdc 100644 --- a/datafusion/functions/src/string/starts_with.rs +++ b/datafusion/functions/src/string/starts_with.rs @@ -132,15 +132,18 @@ impl ScalarUDFImpl for StartsWithFunc { ) -> Result { if let Expr::Literal(scalar_value, _) = &args[1] { // Convert starts_with(col, 'prefix') to col LIKE 'prefix%' with proper escaping - // Example: starts_with(col, 'ja%') -> col LIKE 'ja\%%' - // 1. 'ja%' (input pattern) - // 2. 'ja\%' (escape special char '%') - // 3. 'ja\%%' (add suffix for starts_with) + // Escapes pattern characters: starts_with(col, 'j\_a%') -> col LIKE 'j\\\_a\%%' + // 1. 'j\_a%' (input pattern) + // 2. 'j\\\_a\%' (escape special chars '%', '_' and '\') + // 3. 'j\\\_a\%%' (add unescaped % suffix for starts_with) let like_expr = match scalar_value { ScalarValue::Utf8(Some(pattern)) | ScalarValue::LargeUtf8(Some(pattern)) | ScalarValue::Utf8View(Some(pattern)) => { - let escaped_pattern = pattern.replace("%", "\\%"); + let escaped_pattern = pattern + .replace("\\", "\\\\") + .replace("%", "\\%") + .replace("_", "\\_"); let like_pattern = format!("{escaped_pattern}%"); Expr::Literal(ScalarValue::Utf8(Some(like_pattern)), None) } diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt b/datafusion/sqllogictest/test_files/string/string_literal.slt index f5138ab3f734..6cf02218872d 100644 --- a/datafusion/sqllogictest/test_files/string/string_literal.slt +++ b/datafusion/sqllogictest/test_files/string/string_literal.slt @@ -207,6 +207,25 @@ SELECT ends_with('foobar', 'foo') ---- false +query B +SELECT ends_with(a, '%bar') from (values ('foobar'), ('foo%bar')) as t(a); +---- +false +true + +query B +SELECT ends_with(a, '_bar') from (values ('foobar'), ('foo_bar')) as t(a); +---- +false +true + +query B +SELECT ends_with(a, '\_bar') from (values ('foobar'), ('foo\\bar'), ('foo\_bar')) as t(a); +---- +false +false +true + query I SELECT levenshtein('kitten', 'sitting') ---- @@ -846,6 +865,26 @@ SELECT starts_with('foobar', 'bar') ---- false + +query B +SELECT starts_with(a, 'foo%') from (values ('foobar'), ('foo%bar')) as t(a); +---- +false +true + +query B +SELECT starts_with(a, 'foo\_') from (values ('foobar'), ('foo\\_bar'), ('foo\_bar')) as t(a); +---- +false +false +true + +query B +SELECT starts_with(a, 'foo_') from (values ('foobar'), ('foo_bar')) as t(a); +---- +false +true + query TT select ' ', '|' ---- diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt index 4d30f572ad6f..13b0aba653ef 100644 --- a/datafusion/sqllogictest/test_files/string/string_view.slt +++ b/datafusion/sqllogictest/test_files/string/string_view.slt @@ -370,7 +370,7 @@ EXPLAIN SELECT FROM test; ---- logical_plan -01)Projection: test.column1_utf8 LIKE Utf8("foo\%%") AS c1, test.column1_large_utf8 LIKE LargeUtf8("foo\%%") AS c2, test.column1_utf8view LIKE Utf8View("foo\%%") AS c3, test.column1_utf8 LIKE Utf8("f_o%") AS c4, test.column1_large_utf8 LIKE LargeUtf8("f_o%") AS c5, test.column1_utf8view LIKE Utf8View("f_o%") AS c6 +01)Projection: test.column1_utf8 LIKE Utf8("foo\%%") AS c1, test.column1_large_utf8 LIKE LargeUtf8("foo\%%") AS c2, test.column1_utf8view LIKE Utf8View("foo\%%") AS c3, test.column1_utf8 LIKE Utf8("f\_o%") AS c4, test.column1_large_utf8 LIKE LargeUtf8("f\_o%") AS c5, test.column1_utf8view LIKE Utf8View("f\_o%") AS c6 02)--TableScan: test projection=[column1_utf8, column1_large_utf8, column1_utf8view] ## Test STARTS_WITH works with column arguments