diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index dc4f195d6be..6eba043b179 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -713,7 +713,6 @@ void populate() { registerOperator(REGEXP, SqlLibraryOperators.REGEXP); registerOperator(CONCAT, SqlLibraryOperators.CONCAT_FUNCTION); registerOperator(CONCAT_WS, SqlLibraryOperators.CONCAT_WS); - registerOperator(LIKE, SqlLibraryOperators.ILIKE); registerOperator(CONCAT_WS, SqlLibraryOperators.CONCAT_WS); registerOperator(REVERSE, SqlLibraryOperators.REVERSE); registerOperator(RIGHT, SqlLibraryOperators.RIGHT); @@ -992,6 +991,18 @@ void populate() { builder.makeLiteral(" "), arg))), PPLTypeChecker.family(SqlTypeFamily.ANY))); + register( + LIKE, + createFunctionImpWithTypeChecker( + (builder, arg1, arg2) -> + builder.makeCall( + SqlLibraryOperators.ILIKE, + arg1, + arg2, + // TODO: Figure out escaping solution. '\\' is used for JSON input but is not + // necessary for SQL function input + builder.makeLiteral("\\")), + PPLTypeChecker.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING))); } } diff --git a/docs/user/ppl/functions/string.rst b/docs/user/ppl/functions/string.rst index a2e9f81580a..d0d38d8c72f 100644 --- a/docs/user/ppl/functions/string.rst +++ b/docs/user/ppl/functions/string.rst @@ -105,6 +105,8 @@ Example:: +-------------------------------+ +Limitation: The pushdown of the LIKE function to a DSL wildcard query is supported only for keyword fields. + LOCATE ------- diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java index 917561984a3..4df20014be3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java @@ -6,66 +6,22 @@ package org.opensearch.sql.calcite.remote; import java.io.IOException; -import org.junit.Ignore; +import org.junit.Assume; import org.junit.Test; import org.opensearch.sql.ppl.LikeQueryIT; -// TODO Like function behaviour in V2 is not correct. Remove when it was fixed in V2. public class CalciteLikeQueryIT extends LikeQueryIT { @Override public void init() throws Exception { super.init(); enableCalcite(); - // TODO: "https://github.com/opensearch-project/sql/issues/3428" - // disallowCalciteFallback(); + disallowCalciteFallback(); } @Override @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") - public void test_like_with_escaped_percent() throws IOException, IOException { - super.test_like_with_escaped_percent(); - } - - @Override - @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") - public void test_like_in_where_with_escaped_underscore() throws IOException { - super.test_like_in_where_with_escaped_underscore(); - } - - @Override - @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") - public void test_like_on_text_field_with_one_word() throws IOException { - super.test_like_on_text_field_with_one_word(); - } - - @Override - @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") - public void test_like_on_text_keyword_field_with_one_word() throws IOException { - super.test_like_on_text_keyword_field_with_one_word(); - } - - @Override - @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") - public void test_like_on_text_keyword_field_with_greater_than_one_word() throws IOException { - super.test_like_on_text_keyword_field_with_greater_than_one_word(); - } - - @Override - @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") - public void test_like_on_text_field_with_greater_than_one_word() throws IOException { - super.test_like_on_text_field_with_greater_than_one_word(); - } - - @Override - @Test - @Ignore("https://github.com/opensearch-project/sql/issues/3428") public void test_convert_field_text_to_keyword() throws IOException { + Assume.assumeTrue("Pushdown is not enabled, skipping this test.", isPushdownEnabled()); super.test_convert_field_text_to_keyword(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java index 5c07f06bbc9..04cad7aee8d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java @@ -5,7 +5,6 @@ package org.opensearch.sql.calcite.remote; -import java.io.IOException; import org.opensearch.sql.ppl.WhereCommandIT; public class CalciteWhereCommandIT extends WhereCommandIT { @@ -16,19 +15,6 @@ public void init() throws Exception { disallowCalciteFallback(); } - @Override - public void testIsNotNullFunction() throws IOException { - withFallbackEnabled( - () -> { - try { - super.testIsNotNullFunction(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }, - "https://github.com/opensearch-project/sql/issues/3428"); - } - @Override protected String getIncompatibleTypeErrMsg() { return "In expression types are incompatible: fields type LONG, values type [INTEGER, INTEGER," diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 06d1120b859..60b0e4b6719 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -435,6 +435,24 @@ public void testMultiFieldsRelevanceQueryFunctionExplain() throws IOException { + " default_operator='or', analyzer=english)")); } + @Test + public void testKeywordLikeFunctionExplain() throws IOException { + String expected = loadExpectedPlan("explain_keyword_like_function.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account | where like(firstname, '%mbe%')")); + } + + @Test + public void testTextLikeFunctionExplain() throws IOException { + String expected = loadExpectedPlan("explain_text_like_function.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account | where like(address, '%Holmes%')")); + } + @Ignore("The serialized string is unstable because of function properties") @Test public void testFilterScriptPushDownExplain() throws Exception { diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java index f8c4741312f..31ea92d54e4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java @@ -62,9 +62,9 @@ public void test_like_in_where_with_escaped_underscore() throws IOException { @Test public void test_like_on_text_field_with_one_word() throws IOException { String query = - "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test*') | fields TextBody"; + "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test%') | fields TextBody"; JSONObject result = executeQuery(query); - assertEquals(9, result.getInt("total")); + assertEquals(8, result.getInt("total")); } @Test @@ -72,7 +72,7 @@ public void test_like_on_text_keyword_field_with_one_word() throws IOException { String query = "source=" + TEST_INDEX_WILDCARD - + " | WHERE Like(TextKeywordBody, 'test*') | fields TextKeywordBody"; + + " | WHERE Like(TextKeywordBody, 'test%') | fields TextKeywordBody"; JSONObject result = executeQuery(query); assertEquals(8, result.getInt("total")); } @@ -82,7 +82,7 @@ public void test_like_on_text_keyword_field_with_greater_than_one_word() throws String query = "source=" + TEST_INDEX_WILDCARD - + " | WHERE Like(TextKeywordBody, 'test wild*') | fields TextKeywordBody"; + + " | WHERE Like(TextKeywordBody, 'test wild%') | fields TextKeywordBody"; JSONObject result = executeQuery(query); assertEquals(7, result.getInt("total")); } @@ -90,9 +90,9 @@ public void test_like_on_text_keyword_field_with_greater_than_one_word() throws @Test public void test_like_on_text_field_with_greater_than_one_word() throws IOException { String query = - "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild*') | fields TextBody"; + "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild%') | fields TextBody"; JSONObject result = executeQuery(query); - assertEquals(0, result.getInt("total")); + assertEquals(7, result.getInt("total")); } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java index 4e07b619b11..4a5414fa5e6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java @@ -88,6 +88,16 @@ public void testLikeFunction() throws IOException { verifyDataRows(result, rows("Amber")); } + @Test + public void testLikeFunctionNoHit() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where like(firstname, 'Duk_') | fields lastname", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + assertEquals(0, result.getInt("total")); + } + @Test public void testIsNullFunction() throws IOException { JSONObject result = diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/LikeQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/LikeQueryIT.java index c5ff50898a1..118dd9849b4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/LikeQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/LikeQueryIT.java @@ -117,14 +117,14 @@ public void test_like_in_where_with_escaped_underscore() throws IOException { @Test public void test_like_on_text_field_with_one_word() throws IOException { - String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextBody LIKE 'test*'"; + String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextBody LIKE 'test%'"; JSONObject result = executeJdbcRequest(query); - assertEquals(9, result.getInt("total")); + assertEquals(8, result.getInt("total")); } @Test public void test_like_on_text_keyword_field_with_one_word() throws IOException { - String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextKeywordBody LIKE 'test*'"; + String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextKeywordBody LIKE 'test%'"; JSONObject result = executeJdbcRequest(query); assertEquals(8, result.getInt("total")); } @@ -134,7 +134,7 @@ public void test_like_on_text_keyword_field_with_greater_than_one_word() throws String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextKeywordBody LIKE 'test wild*'"; JSONObject result = executeJdbcRequest(query); - assertEquals(7, result.getInt("total")); + assertEquals(0, result.getInt("total")); } @Test diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.json new file mode 100644 index 00000000000..001d0a1dfbb --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($1, '%mbe%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->ILIKE($1, '%mbe%':VARCHAR, '\\')], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"wildcard\":{\"firstname.keyword\":{\"wildcard\":\"*mbe*\",\"case_insensitive\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.json new file mode 100644 index 00000000000..79d34ebe32b --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($2, '%Holmes%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..10=[{inputs}], expr#11=['%Holmes%':VARCHAR], expr#12=['\\'], expr#13=[ILIKE($t2, $t11, $t12)], proj#0..10=[{exprs}], $condition=[$t13])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function.json new file mode 100644 index 00000000000..ecb1d8d46d9 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($1, '%mbe%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%mbe%':VARCHAR], expr#18=['\\'], expr#19=[ILIKE($t1, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function.json new file mode 100644 index 00000000000..8d76d300373 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($2, '%Holmes%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%Holmes%':VARCHAR], expr#18=['\\'], expr#19=[ILIKE($t2, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.json new file mode 100644 index 00000000000..66a84d645f3 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.json @@ -0,0 +1,15 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]" + }, + "children": [{ + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"wildcard\":{\"firstname.keyword\":{\"wildcard\":\"*mbe*\",\"case_insensitive\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + }, + "children": [] + }] + } +} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.json new file mode 100644 index 00000000000..224b2835ab5 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.json @@ -0,0 +1,21 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]" + }, + "children": [{ + "name": "FilterOperator", + "description": { + "conditions": "like(address, \"%Holmes%\")" + }, + "children": [{ + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + }, + "children": [] + }] + }] + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index c26f06706d1..873a7a93586 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -37,6 +37,7 @@ import static org.opensearch.index.query.QueryBuilders.regexpQuery; import static org.opensearch.index.query.QueryBuilders.termQuery; import static org.opensearch.index.query.QueryBuilders.termsQuery; +import static org.opensearch.index.query.QueryBuilders.wildcardQuery; import static org.opensearch.script.Script.DEFAULT_SCRIPT_TYPE; import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.MULTI_FIELDS_RELEVANCE_FUNCTION_SET; import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.SINGLE_FIELD_RELEVANCE_FUNCTION_SET; @@ -93,6 +94,7 @@ import org.opensearch.sql.opensearch.storage.script.CalciteScriptEngine.ReferenceFieldVisitor; import org.opensearch.sql.opensearch.storage.script.CalciteScriptEngine.UnsupportedScriptException; import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine.ScriptEngineType; +import org.opensearch.sql.opensearch.storage.script.StringUtils; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchBoolPrefixQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhrasePrefixQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhraseQuery; @@ -325,7 +327,8 @@ public Expression visitCall(RexCall call) { case SPECIAL: return switch (call.getKind()) { case CAST -> toCastExpression(call); - case LIKE, CONTAINS -> binary(call); + case CONTAINS -> binary(call); + case LIKE -> like(call); default -> { String message = format(Locale.ROOT, "Unsupported call: [%s]", call); throw new PredicateAnalyzerException(message); @@ -533,8 +536,6 @@ private QueryExpression binary(RexCall call) { switch (call.getKind()) { case CONTAINS: return QueryExpression.create(pair.getKey()).contains(pair.getValue()); - case LIKE: - throw new UnsupportedOperationException("LIKE not yet supported"); case EQUALS: return QueryExpression.create(pair.getKey()).equals(pair.getValue()); case NOT_EQUALS: @@ -580,6 +581,16 @@ private QueryExpression binary(RexCall call) { throw new PredicateAnalyzerException(message); } + private QueryExpression like(RexCall call) { + // The third default escape is not used here. It's handled by + // StringUtils.convertSqlWildcardToLucene + checkState(call.getOperands().size() == 3); + final Expression a = call.getOperands().get(0).accept(this); + final Expression b = call.getOperands().get(1).accept(this); + final SwapResult pair = swap(a, b); + return QueryExpression.create(pair.getKey()).like(pair.getValue()); + } + private static QueryExpression constructQueryExpressionForSearch( RexCall call, SwapResult pair) { if (isSearchWithComplementedPoints(call)) { @@ -1137,10 +1148,24 @@ public QueryExpression notExists() { return this; } + /* + * Prefer to run wildcard query for keyword type field. For text type field, it doesn't support + * cross term match because OpenSearch internally break text to multiple terms and apply wildcard + * matching one by one, which is not same behavior with regular like function without pushdown. + */ @Override public QueryExpression like(LiteralExpression literal) { - builder = regexpQuery(getFieldReference(), literal.stringValue()); - return this; + String fieldName = getFieldReference(); + String keywordField = OpenSearchTextType.toKeywordSubField(fieldName, this.rel.getExprType()); + boolean isKeywordField = keywordField != null; + if (isKeywordField) { + builder = + wildcardQuery( + keywordField, StringUtils.convertSqlWildcardToLuceneSafe(literal.stringValue())) + .caseInsensitive(true); + return this; + } + throw new UnsupportedOperationException("Like query is not supported for text field"); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/StringUtils.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/StringUtils.java index a485296b524..05e0907d934 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/StringUtils.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/StringUtils.java @@ -10,12 +10,32 @@ @UtilityClass public class StringUtils { /** - * Converts sql wildcard character % and _ to * and ?. + * Converts sql wildcard character % and _ to * and ?. Also, DSL specific wildcard (* and ?) is + * still supported. This function is used for legacy SQL WILDCARDQUERY and WILDCARD_QUERY + * functions. * * @param text string to be converted * @return converted string */ + @Deprecated public static String convertSqlWildcardToLucene(String text) { + return convert(text, false); + } + + /** + * Transforms a SQL like pattern into a Lucene/OpenSearch wildcard pattern. + * + *
It replaces '%' with '*' and '_' with '?' and escapes any literal '*' or '?' so they are
+ * treated as ordinary characters.
+ *
+ * @param text string to be converted
+ * @return converted string
+ */
+ public static String convertSqlWildcardToLuceneSafe(String text) {
+ return convert(text, true);
+ }
+
+ private static String convert(String text, boolean escapeStarQuestion) {
final char DEFAULT_ESCAPE = '\\';
StringBuilder convertedString = new StringBuilder(text.length());
boolean escaped = false;
@@ -44,6 +64,14 @@ public static String convertSqlWildcardToLucene(String text) {
}
escaped = false;
break;
+ case '*':
+ case '?':
+ if (escapeStarQuestion && !escaped) {
+ convertedString.append(DEFAULT_ESCAPE);
+ }
+ convertedString.append(currentChar);
+ escaped = false;
+ break;
default:
convertedString.append(currentChar);
escaped = false;
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java
index 44c1c302001..eb4aaa8600b 100644
--- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java
@@ -10,8 +10,12 @@
import org.opensearch.index.query.WildcardQueryBuilder;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprType;
+import org.opensearch.sql.expression.FunctionExpression;
+import org.opensearch.sql.expression.LiteralExpression;
+import org.opensearch.sql.expression.ReferenceExpression;
import org.opensearch.sql.opensearch.data.type.OpenSearchTextType;
import org.opensearch.sql.opensearch.storage.script.StringUtils;
+import org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder.ScriptQueryUnSupportedException;
public class LikeQuery extends LuceneQuery {
@Override
@@ -26,7 +30,35 @@ public QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue lite
* ReferenceExpression while wildcard_query are of type NamedArgumentExpression
*/
protected WildcardQueryBuilder createBuilder(String field, String query) {
- String matchText = StringUtils.convertSqlWildcardToLucene(query);
+ String matchText = StringUtils.convertSqlWildcardToLuceneSafe(query);
return QueryBuilders.wildcardQuery(field, matchText).caseInsensitive(true);
}
+
+ /**
+ * Verify if the function supports like/wildcard query. Prefer to run wildcard query for keyword
+ * type field. For text type field, it doesn't support cross term match because OpenSearch
+ * internally break text to multiple terms and apply wildcard matching one by one, which is not
+ * same behavior with regular like function without pushdown.
+ *
+ * @param func function Input function expression
+ * @return boolean
+ */
+ @Override
+ public boolean canSupport(FunctionExpression func) {
+ if (func.getArguments().size() == 2
+ && (func.getArguments().get(0) instanceof ReferenceExpression)
+ && (func.getArguments().get(1) instanceof LiteralExpression
+ || literalExpressionWrappedByCast(func))) {
+ ReferenceExpression ref = (ReferenceExpression) func.getArguments().get(0);
+ // Only support keyword type field
+ if (OpenSearchTextType.toKeywordSubField(ref.getRawPath(), ref.getType()) != null) {
+ return true;
+ } else {
+ // Script pushdown is not supported for text type field
+ throw new ScriptQueryUnSupportedException(
+ "text field wildcard doesn't support script query");
+ }
+ }
+ return false;
+ }
}
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java
index 16e3a79780c..cdaf4af3440 100644
--- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java
@@ -88,7 +88,7 @@ private boolean isMultiParameterQuery(FunctionExpression func) {
/**
* Check if the second argument of the function is a literal expression wrapped by cast function.
*/
- private boolean literalExpressionWrappedByCast(FunctionExpression func) {
+ protected boolean literalExpressionWrappedByCast(FunctionExpression func) {
if (func.getArguments().get(1) instanceof FunctionExpression) {
FunctionExpression expr = (FunctionExpression) func.getArguments().get(1);
return castMap.containsKey(expr.getFunctionName())
diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java
index 72aacebb250..5a2a7544998 100644
--- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java
+++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java
@@ -41,6 +41,7 @@
import org.opensearch.index.query.SimpleQueryStringBuilder;
import org.opensearch.index.query.TermQueryBuilder;
import org.opensearch.index.query.TermsQueryBuilder;
+import org.opensearch.index.query.WildcardQueryBuilder;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.expression.function.BuiltinFunctionName;
import org.opensearch.sql.expression.function.PPLFuncImpTable;
@@ -569,6 +570,38 @@ void multiMatchRelevanceQueryFunction_generatesMultiMatchQuery()
result.toString());
}
+ @Test
+ void likeFunction_keywordField_generatesWildcardQuery() throws ExpressionNotAnalyzableException {
+ List