diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index dc4f195d6be..6eba043b179 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -713,7 +713,6 @@ void populate() { registerOperator(REGEXP, SqlLibraryOperators.REGEXP); registerOperator(CONCAT, SqlLibraryOperators.CONCAT_FUNCTION); registerOperator(CONCAT_WS, SqlLibraryOperators.CONCAT_WS); - registerOperator(LIKE, SqlLibraryOperators.ILIKE); registerOperator(CONCAT_WS, SqlLibraryOperators.CONCAT_WS); registerOperator(REVERSE, SqlLibraryOperators.REVERSE); registerOperator(RIGHT, SqlLibraryOperators.RIGHT); @@ -992,6 +991,18 @@ void populate() { builder.makeLiteral(" "), arg))), PPLTypeChecker.family(SqlTypeFamily.ANY))); + register( + LIKE, + createFunctionImpWithTypeChecker( + (builder, arg1, arg2) -> + builder.makeCall( + SqlLibraryOperators.ILIKE, + arg1, + arg2, + // TODO: Figure out escaping solution. '\\' is used for JSON input but is not + // necessary for SQL function input + builder.makeLiteral("\\")), + PPLTypeChecker.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING))); } } diff --git a/docs/user/ppl/functions/string.rst b/docs/user/ppl/functions/string.rst index a2e9f81580a..d0d38d8c72f 100644 --- a/docs/user/ppl/functions/string.rst +++ b/docs/user/ppl/functions/string.rst @@ -105,6 +105,8 @@ Example:: +-------------------------------+ +Limitation: The pushdown of the LIKE function to a DSL wildcard query is supported only for keyword fields. + LOCATE ------- diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java index 917561984a3..4df20014be3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java @@ -6,66 +6,22 @@ package org.opensearch.sql.calcite.remote; import java.io.IOException; -import org.junit.Ignore; +import org.junit.Assume; import org.junit.Test; import org.opensearch.sql.ppl.LikeQueryIT; -// TODO Like function behaviour in V2 is not correct. Remove when it was fixed in V2. public class CalciteLikeQueryIT extends LikeQueryIT { @Override public void init() throws Exception { super.init(); enableCalcite(); - // TODO: "https://github.com/opensearch-project/sql/issues/3428" - // disallowCalciteFallback(); + disallowCalciteFallback(); } @Override @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") - public void test_like_with_escaped_percent() throws IOException, IOException { - super.test_like_with_escaped_percent(); - } - - @Override - @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") - public void test_like_in_where_with_escaped_underscore() throws IOException { - super.test_like_in_where_with_escaped_underscore(); - } - - @Override - @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") - public void test_like_on_text_field_with_one_word() throws IOException { - super.test_like_on_text_field_with_one_word(); - } - - @Override - @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") - public void test_like_on_text_keyword_field_with_one_word() throws IOException { - super.test_like_on_text_keyword_field_with_one_word(); - } - - @Override - @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") - public void test_like_on_text_keyword_field_with_greater_than_one_word() throws IOException { - super.test_like_on_text_keyword_field_with_greater_than_one_word(); - } - - @Override - @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") - public void test_like_on_text_field_with_greater_than_one_word() throws IOException { - super.test_like_on_text_field_with_greater_than_one_word(); - } - - @Override - @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") public void test_convert_field_text_to_keyword() throws IOException { + Assume.assumeTrue("Pushdown is not enabled, skipping this test.", isPushdownEnabled()); super.test_convert_field_text_to_keyword(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java index 5c07f06bbc9..04cad7aee8d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java @@ -5,7 +5,6 @@ package org.opensearch.sql.calcite.remote; -import java.io.IOException; import org.opensearch.sql.ppl.WhereCommandIT; public class CalciteWhereCommandIT extends WhereCommandIT { @@ -16,19 +15,6 @@ public void init() throws Exception { disallowCalciteFallback(); } - @Override - public void testIsNotNullFunction() throws IOException { - withFallbackEnabled( - () -> { - try { - super.testIsNotNullFunction(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }, - "https://github.com/opensearch-project/sql/issues/3428"); - } - @Override protected String getIncompatibleTypeErrMsg() { return "In expression types are incompatible: fields type LONG, values type [INTEGER, INTEGER," diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 06d1120b859..60b0e4b6719 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -435,6 +435,24 @@ public void testMultiFieldsRelevanceQueryFunctionExplain() throws IOException { + " default_operator='or', analyzer=english)")); } + @Test + public void testKeywordLikeFunctionExplain() throws IOException { + String expected = loadExpectedPlan("explain_keyword_like_function.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account | where like(firstname, '%mbe%')")); + } + + @Test + public void testTextLikeFunctionExplain() throws IOException { + String expected = loadExpectedPlan("explain_text_like_function.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account | where like(address, '%Holmes%')")); + } + @Ignore("The serialized string is unstable because of function properties") @Test public void testFilterScriptPushDownExplain() throws Exception { diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java index f8c4741312f..31ea92d54e4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java @@ -62,9 +62,9 @@ public void test_like_in_where_with_escaped_underscore() throws IOException { @Test public void test_like_on_text_field_with_one_word() throws IOException { String query = - "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test*') | fields TextBody"; + "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test%') | fields TextBody"; JSONObject result = executeQuery(query); - assertEquals(9, result.getInt("total")); + assertEquals(8, result.getInt("total")); } @Test @@ -72,7 +72,7 @@ public void test_like_on_text_keyword_field_with_one_word() throws IOException { String query = "source=" + TEST_INDEX_WILDCARD - + " | WHERE Like(TextKeywordBody, 'test*') | fields TextKeywordBody"; + + " | WHERE Like(TextKeywordBody, 'test%') | fields TextKeywordBody"; JSONObject result = executeQuery(query); assertEquals(8, result.getInt("total")); } @@ -82,7 +82,7 @@ public void test_like_on_text_keyword_field_with_greater_than_one_word() throws String query = "source=" + TEST_INDEX_WILDCARD - + " | WHERE Like(TextKeywordBody, 'test wild*') | fields TextKeywordBody"; + + " | WHERE Like(TextKeywordBody, 'test wild%') | fields TextKeywordBody"; JSONObject result = executeQuery(query); assertEquals(7, result.getInt("total")); } @@ -90,9 +90,9 @@ public void test_like_on_text_keyword_field_with_greater_than_one_word() throws @Test public void test_like_on_text_field_with_greater_than_one_word() throws IOException { String query = - "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild*') | fields TextBody"; + "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild%') | fields TextBody"; JSONObject result = executeQuery(query); - assertEquals(0, result.getInt("total")); + assertEquals(7, result.getInt("total")); } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java index 4e07b619b11..4a5414fa5e6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java @@ -88,6 +88,16 @@ public void testLikeFunction() throws IOException { verifyDataRows(result, rows("Amber")); } + @Test + public void testLikeFunctionNoHit() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where like(firstname, 'Duk_') | fields lastname", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + assertEquals(0, result.getInt("total")); + } + @Test public void testIsNullFunction() throws IOException { JSONObject result = diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/LikeQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/LikeQueryIT.java index c5ff50898a1..118dd9849b4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/LikeQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/LikeQueryIT.java @@ -117,14 +117,14 @@ public void test_like_in_where_with_escaped_underscore() throws IOException { @Test public void test_like_on_text_field_with_one_word() throws IOException { - String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextBody LIKE 'test*'"; + String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextBody LIKE 'test%'"; JSONObject result = executeJdbcRequest(query); - assertEquals(9, result.getInt("total")); + assertEquals(8, result.getInt("total")); } @Test public void test_like_on_text_keyword_field_with_one_word() throws IOException { - String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextKeywordBody LIKE 'test*'"; + String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextKeywordBody LIKE 'test%'"; JSONObject result = executeJdbcRequest(query); assertEquals(8, result.getInt("total")); } @@ -134,7 +134,7 @@ public void test_like_on_text_keyword_field_with_greater_than_one_word() throws String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextKeywordBody LIKE 'test wild*'"; JSONObject result = executeJdbcRequest(query); - assertEquals(7, result.getInt("total")); + assertEquals(0, result.getInt("total")); } @Test diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.json new file mode 100644 index 00000000000..001d0a1dfbb --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($1, '%mbe%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->ILIKE($1, '%mbe%':VARCHAR, '\\')], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"wildcard\":{\"firstname.keyword\":{\"wildcard\":\"*mbe*\",\"case_insensitive\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.json new file mode 100644 index 00000000000..79d34ebe32b --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($2, '%Holmes%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..10=[{inputs}], expr#11=['%Holmes%':VARCHAR], expr#12=['\\'], expr#13=[ILIKE($t2, $t11, $t12)], proj#0..10=[{exprs}], $condition=[$t13])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function.json new file mode 100644 index 00000000000..ecb1d8d46d9 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($1, '%mbe%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%mbe%':VARCHAR], expr#18=['\\'], expr#19=[ILIKE($t1, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function.json new file mode 100644 index 00000000000..8d76d300373 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($2, '%Holmes%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%Holmes%':VARCHAR], expr#18=['\\'], expr#19=[ILIKE($t2, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.json new file mode 100644 index 00000000000..66a84d645f3 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.json @@ -0,0 +1,15 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]" + }, + "children": [{ + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"wildcard\":{\"firstname.keyword\":{\"wildcard\":\"*mbe*\",\"case_insensitive\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + }, + "children": [] + }] + } +} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.json new file mode 100644 index 00000000000..224b2835ab5 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.json @@ -0,0 +1,21 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]" + }, + "children": [{ + "name": "FilterOperator", + "description": { + "conditions": "like(address, \"%Holmes%\")" + }, + "children": [{ + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + }, + "children": [] + }] + }] + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index c26f06706d1..873a7a93586 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -37,6 +37,7 @@ import static org.opensearch.index.query.QueryBuilders.regexpQuery; import static org.opensearch.index.query.QueryBuilders.termQuery; import static org.opensearch.index.query.QueryBuilders.termsQuery; +import static org.opensearch.index.query.QueryBuilders.wildcardQuery; import static org.opensearch.script.Script.DEFAULT_SCRIPT_TYPE; import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.MULTI_FIELDS_RELEVANCE_FUNCTION_SET; import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.SINGLE_FIELD_RELEVANCE_FUNCTION_SET; @@ -93,6 +94,7 @@ import org.opensearch.sql.opensearch.storage.script.CalciteScriptEngine.ReferenceFieldVisitor; import org.opensearch.sql.opensearch.storage.script.CalciteScriptEngine.UnsupportedScriptException; import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine.ScriptEngineType; +import org.opensearch.sql.opensearch.storage.script.StringUtils; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchBoolPrefixQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhrasePrefixQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhraseQuery; @@ -325,7 +327,8 @@ public Expression visitCall(RexCall call) { case SPECIAL: return switch (call.getKind()) { case CAST -> toCastExpression(call); - case LIKE, CONTAINS -> binary(call); + case CONTAINS -> binary(call); + case LIKE -> like(call); default -> { String message = format(Locale.ROOT, "Unsupported call: [%s]", call); throw new PredicateAnalyzerException(message); @@ -533,8 +536,6 @@ private QueryExpression binary(RexCall call) { switch (call.getKind()) { case CONTAINS: return QueryExpression.create(pair.getKey()).contains(pair.getValue()); - case LIKE: - throw new UnsupportedOperationException("LIKE not yet supported"); case EQUALS: return QueryExpression.create(pair.getKey()).equals(pair.getValue()); case NOT_EQUALS: @@ -580,6 +581,16 @@ private QueryExpression binary(RexCall call) { throw new PredicateAnalyzerException(message); } + private QueryExpression like(RexCall call) { + // The third default escape is not used here. It's handled by + // StringUtils.convertSqlWildcardToLucene + checkState(call.getOperands().size() == 3); + final Expression a = call.getOperands().get(0).accept(this); + final Expression b = call.getOperands().get(1).accept(this); + final SwapResult pair = swap(a, b); + return QueryExpression.create(pair.getKey()).like(pair.getValue()); + } + private static QueryExpression constructQueryExpressionForSearch( RexCall call, SwapResult pair) { if (isSearchWithComplementedPoints(call)) { @@ -1137,10 +1148,24 @@ public QueryExpression notExists() { return this; } + /* + * Prefer to run wildcard query for keyword type field. For text type field, it doesn't support + * cross term match because OpenSearch internally break text to multiple terms and apply wildcard + * matching one by one, which is not same behavior with regular like function without pushdown. + */ @Override public QueryExpression like(LiteralExpression literal) { - builder = regexpQuery(getFieldReference(), literal.stringValue()); - return this; + String fieldName = getFieldReference(); + String keywordField = OpenSearchTextType.toKeywordSubField(fieldName, this.rel.getExprType()); + boolean isKeywordField = keywordField != null; + if (isKeywordField) { + builder = + wildcardQuery( + keywordField, StringUtils.convertSqlWildcardToLuceneSafe(literal.stringValue())) + .caseInsensitive(true); + return this; + } + throw new UnsupportedOperationException("Like query is not supported for text field"); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/StringUtils.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/StringUtils.java index a485296b524..05e0907d934 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/StringUtils.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/StringUtils.java @@ -10,12 +10,32 @@ @UtilityClass public class StringUtils { /** - * Converts sql wildcard character % and _ to * and ?. + * Converts sql wildcard character % and _ to * and ?. Also, DSL specific wildcard (* and ?) is + * still supported. This function is used for legacy SQL WILDCARDQUERY and WILDCARD_QUERY + * functions. * * @param text string to be converted * @return converted string */ + @Deprecated public static String convertSqlWildcardToLucene(String text) { + return convert(text, false); + } + + /** + * Transforms a SQL like pattern into a Lucene/OpenSearch wildcard pattern. + * + *

It replaces '%' with '*' and '_' with '?' and escapes any literal '*' or '?' so they are + * treated as ordinary characters. + * + * @param text string to be converted + * @return converted string + */ + public static String convertSqlWildcardToLuceneSafe(String text) { + return convert(text, true); + } + + private static String convert(String text, boolean escapeStarQuestion) { final char DEFAULT_ESCAPE = '\\'; StringBuilder convertedString = new StringBuilder(text.length()); boolean escaped = false; @@ -44,6 +64,14 @@ public static String convertSqlWildcardToLucene(String text) { } escaped = false; break; + case '*': + case '?': + if (escapeStarQuestion && !escaped) { + convertedString.append(DEFAULT_ESCAPE); + } + convertedString.append(currentChar); + escaped = false; + break; default: convertedString.append(currentChar); escaped = false; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java index 44c1c302001..eb4aaa8600b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java @@ -10,8 +10,12 @@ import org.opensearch.index.query.WildcardQueryBuilder; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.LiteralExpression; +import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; import org.opensearch.sql.opensearch.storage.script.StringUtils; +import org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder.ScriptQueryUnSupportedException; public class LikeQuery extends LuceneQuery { @Override @@ -26,7 +30,35 @@ public QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue lite * ReferenceExpression while wildcard_query are of type NamedArgumentExpression */ protected WildcardQueryBuilder createBuilder(String field, String query) { - String matchText = StringUtils.convertSqlWildcardToLucene(query); + String matchText = StringUtils.convertSqlWildcardToLuceneSafe(query); return QueryBuilders.wildcardQuery(field, matchText).caseInsensitive(true); } + + /** + * Verify if the function supports like/wildcard query. Prefer to run wildcard query for keyword + * type field. For text type field, it doesn't support cross term match because OpenSearch + * internally break text to multiple terms and apply wildcard matching one by one, which is not + * same behavior with regular like function without pushdown. + * + * @param func function Input function expression + * @return boolean + */ + @Override + public boolean canSupport(FunctionExpression func) { + if (func.getArguments().size() == 2 + && (func.getArguments().get(0) instanceof ReferenceExpression) + && (func.getArguments().get(1) instanceof LiteralExpression + || literalExpressionWrappedByCast(func))) { + ReferenceExpression ref = (ReferenceExpression) func.getArguments().get(0); + // Only support keyword type field + if (OpenSearchTextType.toKeywordSubField(ref.getRawPath(), ref.getType()) != null) { + return true; + } else { + // Script pushdown is not supported for text type field + throw new ScriptQueryUnSupportedException( + "text field wildcard doesn't support script query"); + } + } + return false; + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java index 16e3a79780c..cdaf4af3440 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java @@ -88,7 +88,7 @@ private boolean isMultiParameterQuery(FunctionExpression func) { /** * Check if the second argument of the function is a literal expression wrapped by cast function. */ - private boolean literalExpressionWrappedByCast(FunctionExpression func) { + protected boolean literalExpressionWrappedByCast(FunctionExpression func) { if (func.getArguments().get(1) instanceof FunctionExpression) { FunctionExpression expr = (FunctionExpression) func.getArguments().get(1); return castMap.containsKey(expr.getFunctionName()) diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java index 72aacebb250..5a2a7544998 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java @@ -41,6 +41,7 @@ import org.opensearch.index.query.SimpleQueryStringBuilder; import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.index.query.TermsQueryBuilder; +import org.opensearch.index.query.WildcardQueryBuilder; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLFuncImpTable; @@ -569,6 +570,38 @@ void multiMatchRelevanceQueryFunction_generatesMultiMatchQuery() result.toString()); } + @Test + void likeFunction_keywordField_generatesWildcardQuery() throws ExpressionNotAnalyzableException { + List arguments = Arrays.asList(field2, builder.makeLiteral("%Hi%")); + RexNode call = + PPLFuncImpTable.INSTANCE.resolve(builder, "like", arguments.toArray(new RexNode[0])); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + assertInstanceOf(WildcardQueryBuilder.class, result); + assertEquals( + """ + { + "wildcard" : { + "b.keyword" : { + "wildcard" : "*Hi*", + "case_insensitive" : true, + "boost" : 1.0 + } + } + }""", + result.toString()); + } + + @Test + void likeFunction_textField_throwsException() throws ExpressionNotAnalyzableException { + RexInputRef field3 = builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.VARCHAR), 2); + List arguments = Arrays.asList(field3, builder.makeLiteral("%Hi%")); + RexNode call = + PPLFuncImpTable.INSTANCE.resolve(builder, "like", arguments.toArray(new RexNode[0])); + assertThrows( + ExpressionNotAnalyzableException.class, + () -> PredicateAnalyzer.analyze(call, schema, fieldTypes)); + } + @Test void andOrNot_generatesCompoundQuery() throws ExpressionNotAnalyzableException { RexNode call1 = builder.makeCall(SqlStdOperatorTable.EQUALS, field1, numericLiteral); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/StringUtilsTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/StringUtilsTest.java index 24c809ebabd..24ee9b12907 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/StringUtilsTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/StringUtilsTest.java @@ -26,4 +26,25 @@ public void test_escaping_sql_wildcards() { assertEquals("?_?", StringUtils.convertSqlWildcardToLucene("_\\__")); assertEquals("%\\*_\\?", StringUtils.convertSqlWildcardToLucene("\\%\\*\\_\\?")); } + + @Test + public void test_escape_sql_wildcards_safe() { + assertEquals("%", StringUtils.convertSqlWildcardToLuceneSafe("\\%")); + assertEquals("\\*", StringUtils.convertSqlWildcardToLuceneSafe("\\*")); + assertEquals("_", StringUtils.convertSqlWildcardToLuceneSafe("\\_")); + assertEquals("\\?", StringUtils.convertSqlWildcardToLuceneSafe("\\?")); + assertEquals("%*", StringUtils.convertSqlWildcardToLuceneSafe("\\%%")); + assertEquals("*%", StringUtils.convertSqlWildcardToLuceneSafe("%\\%")); + assertEquals("%*%", StringUtils.convertSqlWildcardToLuceneSafe("\\%%\\%")); + assertEquals("*%*", StringUtils.convertSqlWildcardToLuceneSafe("%\\%%")); + assertEquals("_?", StringUtils.convertSqlWildcardToLuceneSafe("\\__")); + assertEquals("?_", StringUtils.convertSqlWildcardToLuceneSafe("_\\_")); + assertEquals("_?_", StringUtils.convertSqlWildcardToLuceneSafe("\\__\\_")); + assertEquals("?_?", StringUtils.convertSqlWildcardToLuceneSafe("_\\__")); + assertEquals("%\\*_\\?", StringUtils.convertSqlWildcardToLuceneSafe("\\%\\*\\_\\?")); + assertEquals("\\*", StringUtils.convertSqlWildcardToLuceneSafe("*")); + assertEquals("\\?", StringUtils.convertSqlWildcardToLuceneSafe("?")); + assertEquals("foo\\*bar", StringUtils.convertSqlWildcardToLuceneSafe("foo*bar")); + assertEquals("foo\\?bar", StringUtils.convertSqlWildcardToLuceneSafe("foo?bar")); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java index d61de04d420..a2a70918f1f 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java @@ -53,14 +53,17 @@ public void testLike() { String expectedLogical = "" + "LogicalAggregate(group=[{}], cnt=[COUNT()])\n" - + " LogicalFilter(condition=[ILIKE($2, 'SALE%':VARCHAR)])\n" + + " LogicalFilter(condition=[ILIKE($2, 'SALE%':VARCHAR, '\\')])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = "cnt=4\n"; verifyResult(root, expectedResult); String expectedSparkSql = - "" + "SELECT COUNT(*) `cnt`\n" + "FROM `scott`.`EMP`\n" + "WHERE `JOB` ILIKE 'SALE%'"; + "" + + "SELECT COUNT(*) `cnt`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `JOB` ILIKE 'SALE%' ESCAPE '\\'"; verifyPPLToSparkSQL(root, expectedSparkSql); } }