diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index f5fd1e33153..3b601f98a3d 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -715,6 +715,10 @@ public static FunctionExpression match_bool_prefix(Expression... args) { return compile(FunctionProperties.None, BuiltinFunctionName.MATCH_BOOL_PREFIX, args); } + public static FunctionExpression wildcard_query(Expression... args) { + return compile(FunctionProperties.None,BuiltinFunctionName.WILDCARD_QUERY, args); + } + public static FunctionExpression now(FunctionProperties functionProperties, Expression... args) { return compile(functionProperties, BuiltinFunctionName.NOW, args); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index b09f3b0c747..0b7701d8a9a 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -228,7 +228,9 @@ public enum BuiltinFunctionName { MATCHQUERY(FunctionName.of("matchquery")), MULTI_MATCH(FunctionName.of("multi_match")), MULTIMATCH(FunctionName.of("multimatch")), - MULTIMATCHQUERY(FunctionName.of("multimatchquery")); + MULTIMATCHQUERY(FunctionName.of("multimatchquery")), + WILDCARDQUERY(FunctionName.of("wildcardquery")), + WILDCARD_QUERY(FunctionName.of("wildcard_query")); private final FunctionName name; diff --git a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java index 2041b9762e4..d8efe426402 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java @@ -42,6 +42,8 @@ public void register(BuiltinFunctionRepository repository) { repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE)); repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASEQUERY)); repository.register(match_phrase_prefix()); + repository.register(wildcard_query(BuiltinFunctionName.WILDCARD_QUERY)); + repository.register(wildcard_query(BuiltinFunctionName.WILDCARDQUERY)); } private static FunctionResolver match_bool_prefix() { @@ -83,6 +85,11 @@ private static FunctionResolver query_string() { return new RelevanceFunctionResolver(funcName, STRUCT); } + private static FunctionResolver wildcard_query(BuiltinFunctionName wildcardQuery) { + FunctionName funcName = wildcardQuery.getName(); + return new RelevanceFunctionResolver(funcName, STRING); + } + public static class OpenSearchFunction extends FunctionExpression { private final FunctionName functionName; private final List arguments; diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java index dfb7a7239fb..7114b220ab7 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java @@ -540,6 +540,34 @@ void query_string_expression_two_fields() { AstDSL.unresolvedArg("query", stringLiteral("query_value")))); } + @Test + void wildcard_query_expression() { + assertAnalyzeEqual( + DSL.wildcard_query( + DSL.namedArgument("field", DSL.literal("test")), + DSL.namedArgument("query", DSL.literal("query_value*"))), + AstDSL.function("wildcard_query", + unresolvedArg("field", stringLiteral("test")), + unresolvedArg("query", stringLiteral("query_value*")))); + } + + @Test + void wildcard_query_expression_all_params() { + assertAnalyzeEqual( + DSL.wildcard_query( + DSL.namedArgument("field", DSL.literal("test")), + DSL.namedArgument("query", DSL.literal("query_value*")), + DSL.namedArgument("boost", DSL.literal("1.5")), + DSL.namedArgument("case_insensitive", DSL.literal("true")), + DSL.namedArgument("rewrite", DSL.literal("scoring_boolean"))), + AstDSL.function("wildcard_query", + unresolvedArg("field", stringLiteral("test")), + unresolvedArg("query", stringLiteral("query_value*")), + unresolvedArg("boost", stringLiteral("1.5")), + unresolvedArg("case_insensitive", stringLiteral("true")), + unresolvedArg("rewrite", stringLiteral("scoring_boolean")))); + } + @Test public void match_phrase_prefix_all_params() { assertAnalyzeEqual( diff --git a/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java index 787ca016c96..6e4fff2fb0b 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java @@ -197,4 +197,12 @@ void query_string() { fields.getValue(), query.getValue()), expr.toString()); } + + @Test + void wildcard_query() { + FunctionExpression expr = DSL.wildcard_query(field, query); + assertEquals(String.format("wildcard_query(field=%s, query=%s)", + field.getValue(), query.getValue()), + expr.toString()); + } } diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index c4c2fa988b6..7be50ccffba 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -3299,6 +3299,59 @@ Example searching for field Tags:: | [Winnie-the-Pooh] | +----------------------------------------------+ +WILDCARD_QUERY +------------ + +Description +>>>>>>>>>>> + +``wildcard_query(field_expression, query_expression[, option=]*)`` + +The ``wildcard_query`` function maps to the ``wildcard_query`` query used in search engine. It returns documents that match provided text in the specified field. +OpenSearch supports wildcard characters ``*`` and ``?``. See the full description here: https://opensearch.org/docs/latest/opensearch/query-dsl/term/#wildcards. +You may include a backslash ``\`` to escape SQL wildcard characters ``\%`` and ``\_``. + +Available parameters include: + +- boost +- case_insensitive +- rewrite + +For backward compatibility, ``wildcardquery`` is also supported and mapped to ``wildcard_query`` query as well. + +Example with only ``field`` and ``query`` expressions, and all other parameters are set default values:: + + os> select Body from wildcard where wildcard_query(Body, 'test wildcard*'); + fetched rows / total rows = 7/7 + +-------------------------------------------+ + | Body | + |-------------------------------------------| + | test wildcard | + | test wildcard in the end of the text% | + | test wildcard in % the middle of the text | + | test wildcard %% beside each other | + | test wildcard in the end of the text_ | + | test wildcard in _ the middle of the text | + | test wildcard __ beside each other | + +-------------------------------------------+ + +Another example to show how to set custom values for the optional parameters:: + + os> select Body from wildcard where wildcard_query(Body, 'test wildcard*', boost=0.7, case_insensitive=true, rewrite='constant_score'); + fetched rows / total rows = 8/8 + +-------------------------------------------+ + | Body | + |-------------------------------------------| + | test wildcard | + | test wildcard in the end of the text% | + | test wildcard in % the middle of the text | + | test wildcard %% beside each other | + | test wildcard in the end of the text_ | + | test wildcard in _ the middle of the text | + | test wildcard __ beside each other | + | tEsT wIlDcArD sensitive cases | + +-------------------------------------------+ + System Functions ================ @@ -3323,3 +3376,5 @@ Example:: |----------------+---------------+-----------------+------------------| | DATE | INTEGER | DATETIME | STRUCT | +----------------+---------------+-----------------+------------------+ + + diff --git a/doctest/test_data/wildcard.json b/doctest/test_data/wildcard.json new file mode 100644 index 00000000000..c91778d8abc --- /dev/null +++ b/doctest/test_data/wildcard.json @@ -0,0 +1,22 @@ +{"index":{"_id":"0"}} +{"Body":"test wildcard"} +{"index":{"_id":"1"}} +{"Body":"test wildcard in the end of the text%"} +{"index":{"_id":"2"}} +{"Body":"%test wildcard in the beginning of the text"} +{"index":{"_id":"3"}} +{"Body":"test wildcard in % the middle of the text"} +{"index":{"_id":"4"}} +{"Body":"test wildcard %% beside each other"} +{"index":{"_id":"5"}} +{"Body":"test wildcard in the end of the text_"} +{"index":{"_id":"6"}} +{"Body":"_test wildcard in the beginning of the text"} +{"index":{"_id":"7"}} +{"Body":"test wildcard in _ the middle of the text"} +{"index":{"_id":"8"}} +{"Body":"test wildcard __ beside each other"} +{"index":{"_id":"9"}} +{"Body":"test backslash wildcard \\_"} +{"index":{"_id":"10"}} +{"Body":"tEsT wIlDcArD sensitive cases"} diff --git a/doctest/test_docs.py b/doctest/test_docs.py index 6d2538196ae..b5edf46de9a 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -26,6 +26,7 @@ NYC_TAXI = "nyc_taxi" BOOKS = "books" APACHE = "apache" +WILDCARD = "wildcard" class DocTestConnection(OpenSearchConnection): @@ -92,6 +93,7 @@ def set_up_test_indices(test): load_file("nyc_taxi.json", index_name=NYC_TAXI) load_file("books.json", index_name=BOOKS) load_file("apache.json", index_name=APACHE) + load_file("wildcard.json", index_name=WILDCARD) def load_file(filename, index_name): @@ -120,7 +122,7 @@ def set_up(test): def tear_down(test): # drop leftover tables after each test - test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE], ignore_unavailable=True) + test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD], ignore_unavailable=True) docsuite = partial(doctest.DocFileSuite, diff --git a/doctest/test_mapping/wildcard.json b/doctest/test_mapping/wildcard.json new file mode 100644 index 00000000000..670a774ae14 --- /dev/null +++ b/doctest/test_mapping/wildcard.json @@ -0,0 +1,9 @@ +{ + "mappings" : { + "properties" : { + "Body" : { + "type" : "keyword" + } + } + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index f03acbbbfd7..80348b2a8ba 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -584,7 +584,11 @@ public enum Index { CALCS(TestsConstants.TEST_INDEX_CALCS, "calcs", getMappingFile("calcs_index_mappings.json"), - "src/test/resources/calcs.json"),; + "src/test/resources/calcs.json"), + WILDCARD(TestsConstants.TEST_INDEX_WILDCARD, + "wildcard", + getMappingFile("wildcard_index_mappings.json"), + "src/test/resources/wildcard.json"),; private final String name; private final String type; diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index a9f81c68fee..aff269fcceb 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -53,6 +53,7 @@ public class TestsConstants { public final static String TEST_INDEX_BEER = TEST_INDEX + "_beer"; public final static String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing"; public final static String TEST_INDEX_CALCS = TEST_INDEX + "_calcs"; + public final static String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard"; public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; public final static String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS"; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java new file mode 100644 index 00000000000..67ad5536893 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java @@ -0,0 +1,88 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WILDCARD; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; + +public class LikeQueryIT extends PPLIntegTestCase { + + @Override + public void init() throws IOException { + loadIndex(Index.WILDCARD); + } + + @Test + public void test_like_with_percent() throws IOException { + String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(KeywordBody, 'test wildcard%') | fields KeywordBody"; + JSONObject result = executeQuery(query); + verifyDataRows(result, + rows("test wildcard"), + rows("test wildcard in the end of the text%"), + rows("test wildcard in % the middle of the text"), + rows("test wildcard %% beside each other"), + rows("test wildcard in the end of the text_"), + rows("test wildcard in _ the middle of the text"), + rows("test wildcard __ beside each other")); + } + + @Test + public void test_like_with_escaped_percent() throws IOException { + String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(KeywordBody, '\\\\%test wildcard%') | fields KeywordBody"; + JSONObject result = executeQuery(query); + verifyDataRows(result, + rows("%test wildcard in the beginning of the text")); + } + + @Test + public void test_like_in_where_with_escaped_underscore() throws IOException { + String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(KeywordBody, '\\\\_test wildcard%') | fields KeywordBody"; + JSONObject result = executeQuery(query); + verifyDataRows(result, + rows("_test wildcard in the beginning of the text")); + } + + @Test + public void test_like_on_text_field_with_one_word() throws IOException { + String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test*') | fields TextBody"; + JSONObject result = executeQuery(query); + assertEquals(9, result.getInt("total")); + } + + @Test + public void test_like_on_text_keyword_field_with_one_word() throws IOException { + String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextKeywordBody, 'test*') | fields TextKeywordBody"; + JSONObject result = executeQuery(query); + assertEquals(8, result.getInt("total")); + } + + @Test + public void test_like_on_text_keyword_field_with_greater_than_one_word() throws IOException { + String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextKeywordBody, 'test wild*') | fields TextKeywordBody"; + JSONObject result = executeQuery(query); + assertEquals(7, result.getInt("total")); + } + + @Test + public void test_like_on_text_field_with_greater_than_one_word() throws IOException { + String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild*') | fields TextBody"; + JSONObject result = executeQuery(query); + assertEquals(0, result.getInt("total")); + } + + @Test + public void test_convert_field_text_to_keyword() throws IOException { + String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextKeywordBody, '*') | fields TextKeywordBody"; + String result = explainQueryToString(query); + assertTrue(result.contains("TextKeywordBody.keyword")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/LikeQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/LikeQueryIT.java new file mode 100644 index 00000000000..f0e82adb6fe --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/LikeQueryIT.java @@ -0,0 +1,140 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.sql; + +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +import java.io.IOException; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WILDCARD; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +public class LikeQueryIT extends SQLIntegTestCase { + @Override + protected void init() throws Exception { + loadIndex(Index.WILDCARD); + } + + @Test + public void test_like_in_select() throws IOException { + String query = "SELECT KeywordBody, KeywordBody LIKE 'test wildcard%' FROM " + TEST_INDEX_WILDCARD; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, + rows("test wildcard", true), + rows("test wildcard in the end of the text%", true), + rows("%test wildcard in the beginning of the text", false), + rows("test wildcard in % the middle of the text", true), + rows("test wildcard %% beside each other", true), + rows("test wildcard in the end of the text_", true), + rows("_test wildcard in the beginning of the text", false), + rows("test wildcard in _ the middle of the text", true), + rows("test wildcard __ beside each other", true), + rows("test backslash wildcard \\_", false)); + } + + @Test + public void test_like_in_select_with_escaped_percent() throws IOException { + String query = "SELECT KeywordBody, KeywordBody LIKE '\\\\%test wildcard%' FROM " + TEST_INDEX_WILDCARD; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, + rows("test wildcard", false), + rows("test wildcard in the end of the text%", false), + rows("%test wildcard in the beginning of the text", true), + rows("test wildcard in % the middle of the text", false), + rows("test wildcard %% beside each other", false), + rows("test wildcard in the end of the text_", false), + rows("_test wildcard in the beginning of the text", false), + rows("test wildcard in _ the middle of the text", false), + rows("test wildcard __ beside each other", false), + rows("test backslash wildcard \\_", false)); + } + + @Test + public void test_like_in_select_with_escaped_underscore() throws IOException { + String query = "SELECT KeywordBody, KeywordBody LIKE '\\\\_test wildcard%' FROM " + TEST_INDEX_WILDCARD; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, + rows("test wildcard", false), + rows("test wildcard in the end of the text%", false), + rows("%test wildcard in the beginning of the text", false), + rows("test wildcard in % the middle of the text", false), + rows("test wildcard %% beside each other", false), + rows("test wildcard in the end of the text_", false), + rows("_test wildcard in the beginning of the text", true), + rows("test wildcard in _ the middle of the text", false), + rows("test wildcard __ beside each other", false), + rows("test backslash wildcard \\_", false)); + } + + @Test + public void test_like_in_where() throws IOException { + String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE KeywordBody LIKE 'test wildcard%'"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, + rows("test wildcard"), + rows("test wildcard in the end of the text%"), + rows("test wildcard in % the middle of the text"), + rows("test wildcard %% beside each other"), + rows("test wildcard in the end of the text_"), + rows("test wildcard in _ the middle of the text"), + rows("test wildcard __ beside each other")); + } + + @Test + public void test_like_in_where_with_escaped_percent() throws IOException { + String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE KeywordBody LIKE '\\\\%test wildcard%'"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, + rows("%test wildcard in the beginning of the text")); + } + + @Test + public void test_like_in_where_with_escaped_underscore() throws IOException { + String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE KeywordBody LIKE '\\\\_test wildcard%'"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, + rows("_test wildcard in the beginning of the text")); + } + + @Test + public void test_like_on_text_field_with_one_word() throws IOException { + String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextBody LIKE 'test*'"; + JSONObject result = executeJdbcRequest(query); + assertEquals(9, result.getInt("total")); + } + + @Test + public void test_like_on_text_keyword_field_with_one_word() throws IOException { + String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextKeywordBody LIKE 'test*'"; + JSONObject result = executeJdbcRequest(query); + assertEquals(8, result.getInt("total")); + } + + @Test + public void test_like_on_text_keyword_field_with_greater_than_one_word() throws IOException { + String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextKeywordBody LIKE 'test wild*'"; + JSONObject result = executeJdbcRequest(query); + assertEquals(7, result.getInt("total")); + } + + @Test + public void test_like_on_text_field_with_greater_than_one_word() throws IOException { + String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextBody LIKE 'test wild*'"; + JSONObject result = executeJdbcRequest(query); + assertEquals(0, result.getInt("total")); + } + + @Test + public void test_convert_field_text_to_keyword() throws IOException { + String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextKeywordBody LIKE '*'"; + String result = explainQuery(query); + assertTrue(result.contains("TextKeywordBody.keyword")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/WildcardQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/WildcardQueryIT.java new file mode 100644 index 00000000000..ee636ed5ce3 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/WildcardQueryIT.java @@ -0,0 +1,183 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WILDCARD; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +public class WildcardQueryIT extends SQLIntegTestCase { + @Override + protected void init() throws Exception { + loadIndex(Index.WILDCARD); + } + + @Test + public void test_wildcard_query_asterisk_function() throws IOException { + String expected = "test wildcard"; + + String query1 = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, 't*') LIMIT 1"; + JSONObject result1 = executeJdbcRequest(query1); + verifyDataRows(result1, rows(expected)); + + String query2 = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcardquery(KeywordBody, 't*') LIMIT 1"; + JSONObject result2 = executeJdbcRequest(query2); + verifyDataRows(result2, rows(expected)); + } + + @Test + public void test_wildcard_query_question_mark_function() throws IOException { + String expected = "test wildcard"; + + String query1 = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, 'test wild??rd')"; + JSONObject result1 = executeJdbcRequest(query1); + verifyDataRows(result1, rows(expected)); + + String query2 = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcardquery(KeywordBody, 'test wild??rd')"; + JSONObject result2 = executeJdbcRequest(query2); + verifyDataRows(result2, rows(expected)); + } + + // SQL uses ? as a wildcard which is converted to * in WildcardQuery.java + @Test + public void test_wildcard_query_sql_wildcard_percent_conversion() throws IOException { + String query1 = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, 'test%')"; + JSONObject result1 = executeJdbcRequest(query1); + assertEquals(8, result1.getInt("total")); + + String query2 = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, 'test*')"; + JSONObject result2 = executeJdbcRequest(query2); + assertEquals(result1.getInt("total"), result2.getInt("total")); + } + + // SQL uses _ as a wildcard which is converted to ? in WildcardQuery.java + @Test + public void test_wildcard_query_sql_wildcard_underscore_conversion() throws IOException { + String query1 = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, 'test wild_ard*')"; + JSONObject result1 = executeJdbcRequest(query1); + assertEquals(7, result1.getInt("total")); + + String query2 = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, 'test wild?ard*')"; + JSONObject result2 = executeJdbcRequest(query2); + assertEquals(result1.getInt("total"), result2.getInt("total")); + } + + @Test + public void test_escaping_wildcard_percent_in_the_beginning_of_text() throws IOException { + String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, '\\\\%*')"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, rows("%test wildcard in the beginning of the text")); + } + + @Test + public void test_escaping_wildcard_percent_in_text() throws IOException { + String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, '*\\\\%%')"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, rows("test wildcard in % the middle of the text"), + rows("test wildcard %% beside each other"), + rows("test wildcard in the end of the text%"), + rows("%test wildcard in the beginning of the text")); + } + + @Test + public void test_escaping_wildcard_percent_in_the_end_of_text() throws IOException { + String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, '*\\\\%')"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, rows("test wildcard in the end of the text%")); + } + + @Test + public void test_double_escaped_wildcard_percent() throws IOException { + String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, '*\\\\%\\\\%*')"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, rows("test wildcard %% beside each other")); + } + + @Test + public void test_escaping_wildcard_underscore_in_the_beginning_of_text() throws IOException { + String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, '\\\\_*')"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, rows("_test wildcard in the beginning of the text")); + } + + @Test + public void test_escaping_wildcard_underscore_in_text() throws IOException { + String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, '*\\\\_*')"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, rows("test wildcard in _ the middle of the text"), + rows("test wildcard __ beside each other"), + rows("test wildcard in the end of the text_"), + rows("_test wildcard in the beginning of the text"), + rows("test backslash wildcard \\_")); + } + + @Test + public void test_escaping_wildcard_underscore_in_the_end_of_text() throws IOException { + String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, '*\\\\_')"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, + rows("test wildcard in the end of the text_"), + rows("test backslash wildcard \\_")); + } + + @Test + public void test_double_escaped_wildcard_underscore() throws IOException { + String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, '*\\\\_\\\\_*')"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, rows("test wildcard __ beside each other")); + } + + @Test + public void test_backslash_wildcard() throws IOException { + String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(KeywordBody, '*\\\\\\\\\\\\_')"; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, rows("test backslash wildcard \\_")); + } + + @Test + public void all_params_test() throws IOException { + String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD + + " WHERE wildcard_query(KeywordBody, 'test*', boost = 0.9," + + " case_insensitive=true, rewrite='constant_score')"; + JSONObject result = executeJdbcRequest(query); + assertEquals(8, result.getInt("total")); + } + + @Test + public void test_wildcard_query_on_text_field_with_one_word() throws IOException { + String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(TextBody, 'test*')"; + JSONObject result = executeJdbcRequest(query); + assertEquals(9, result.getInt("total")); + } + + @Test + public void test_wildcard_query_on_text_keyword_field_with_one_word() throws IOException { + String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(TextKeywordBody, 'test*')"; + JSONObject result = executeJdbcRequest(query); + assertEquals(9, result.getInt("total")); + } + + @Test + public void test_wildcard_query_on_text_field_with_greater_than_one_word() throws IOException { + String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(TextBody, 'test wild*')"; + JSONObject result = executeJdbcRequest(query); + assertEquals(0, result.getInt("total")); + } + + @Test + public void test_wildcard_query_on_text_keyword_field_with_greater_than_one_word() throws IOException { + String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(TextKeywordBody, 'test wild*')"; + JSONObject result = executeJdbcRequest(query); + assertEquals(0, result.getInt("total")); + } +} diff --git a/integ-test/src/test/resources/indexDefinitions/wildcard_index_mappings.json b/integ-test/src/test/resources/indexDefinitions/wildcard_index_mappings.json new file mode 100644 index 00000000000..b9974e9548b --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/wildcard_index_mappings.json @@ -0,0 +1,21 @@ +{ + "mappings" : { + "properties" : { + "KeywordBody" : { + "type" : "keyword" + }, + "TextKeywordBody" : { + "type" : "text", + "fields" : { + "keyword" : { + "type" : "keyword", + "ignore_above":256 + } + } + }, + "TextBody" : { + "type" : "text" + } + } + } +} diff --git a/integ-test/src/test/resources/wildcard.json b/integ-test/src/test/resources/wildcard.json new file mode 100644 index 00000000000..b25772a47e5 --- /dev/null +++ b/integ-test/src/test/resources/wildcard.json @@ -0,0 +1,20 @@ +{"index":{"_id":"0"}} +{"KeywordBody":"test wildcard", "TextKeywordBody":"test wildcard", "TextBody":"test wildcard"} +{"index":{"_id":"1"}} +{"KeywordBody":"test wildcard in the end of the text%", "TextKeywordBody":"test wildcard in the end of the text%", "TextBody":"test wildcard in the end of the text%"} +{"index":{"_id":"2"}} +{"KeywordBody":"%test wildcard in the beginning of the text", "TextKeywordBody":"%test wildcard in the beginning of the text", "TextBody":"%test wildcard in the beginning of the text"} +{"index":{"_id":"3"}} +{"KeywordBody":"test wildcard in % the middle of the text", "TextKeywordBody":"test wildcard in % the middle of the text", "TextBody":"test wildcard in % the middle of the text"} +{"index":{"_id":"4"}} +{"KeywordBody":"test wildcard %% beside each other", "TextKeywordBody":"test wildcard %% beside each other", "TextBody":"test wildcard %% beside each other"} +{"index":{"_id":"5"}} +{"KeywordBody":"test wildcard in the end of the text_", "TextKeywordBody":"test wildcard in the end of the text_", "TextBody":"test wildcard in the end of the text_"} +{"index":{"_id":"6"}} +{"KeywordBody":"_test wildcard in the beginning of the text", "TextKeywordBody":"_test wildcard in the beginning of the text", "TextBody":"_test wildcard in the beginning of the text"} +{"index":{"_id":"7"}} +{"KeywordBody":"test wildcard in _ the middle of the text", "TextKeywordBody":"test wildcard in _ the middle of the text", "TextBody":"test wildcard in _ the middle of the text"} +{"index":{"_id":"8"}} +{"KeywordBody":"test wildcard __ beside each other", "TextKeywordBody":"test wildcard __ beside each other", "TextBody":"test wildcard __ beside each other"} +{"index":{"_id":"9"}} +{"KeywordBody":"test backslash wildcard \\_", "TextKeywordBody":"test backslash wildcard \\_", "TextBody":"test backslash wildcard \\_"} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/StringUtils.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/StringUtils.java new file mode 100644 index 00000000000..7b68bd5c927 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/StringUtils.java @@ -0,0 +1,54 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.opensearch.storage.script; + +import lombok.experimental.UtilityClass; + +@UtilityClass +public class StringUtils { + /** + * Converts sql wildcard character % and _ to * and ?. + * @param text string to be converted + * @return converted string + */ + public static String convertSqlWildcardToLucene(String text) { + final char DEFAULT_ESCAPE = '\\'; + StringBuilder convertedString = new StringBuilder(text.length()); + boolean escaped = false; + + for (char currentChar : text.toCharArray()) { + switch (currentChar) { + case DEFAULT_ESCAPE: + escaped = true; + convertedString.append(currentChar); + break; + case '%': + if (escaped) { + convertedString.deleteCharAt(convertedString.length() - 1); + convertedString.append("%"); + } else { + convertedString.append("*"); + } + escaped = false; + break; + case '_': + if (escaped) { + convertedString.deleteCharAt(convertedString.length() - 1); + convertedString.append("_"); + } else { + convertedString.append('?'); + } + escaped = false; + break; + default: + convertedString.append(currentChar); + escaped = false; + } + } + return convertedString.toString(); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java index 2c55a28b885..5f36954d4a7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java @@ -24,11 +24,11 @@ import org.opensearch.sql.expression.FunctionExpression; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.LikeQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.LuceneQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.RangeQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.RangeQuery.Comparison; import org.opensearch.sql.opensearch.storage.script.filter.lucene.TermQuery; -import org.opensearch.sql.opensearch.storage.script.filter.lucene.WildcardQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchBoolPrefixQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhrasePrefixQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhraseQuery; @@ -37,6 +37,7 @@ import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.QueryQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.QueryStringQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.SimpleQueryStringQuery; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.WildcardQuery; import org.opensearch.sql.opensearch.storage.serialization.ExpressionSerializer; @RequiredArgsConstructor @@ -57,7 +58,7 @@ public class FilterQueryBuilder extends ExpressionNodeVisitor b.quoteFieldSuffix(v.stringValue())) .build(); + public static final Map> + WildcardQueryBuildActions = ImmutableMap.>builder() + .put("boost", (b, v) -> b.boost(convertFloatValue(v, "boost"))) + .put("case_insensitive", (b, v) -> b.caseInsensitive(convertBoolValue(v, "case_insensitive"))) + .put("rewrite", (b, v) -> b.rewrite(checkRewrite(v, "rewrite"))) + .build(); + public static final Map ArgumentLimitations = ImmutableMap.builder() .put("boost", "Accepts only floating point values greater than 0.") diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/WildcardQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/WildcardQuery.java new file mode 100644 index 00000000000..9fd37e3de73 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/WildcardQuery.java @@ -0,0 +1,35 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance; + +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.WildcardQueryBuilder; +import org.opensearch.sql.opensearch.storage.script.StringUtils; + +/** + * Lucene query that builds wildcard query. + */ +public class WildcardQuery extends SingleFieldQuery { + /** + * Default constructor for WildcardQuery configures how RelevanceQuery.build() handles + * named arguments. + */ + public WildcardQuery() { + super(FunctionParameterRepository.WildcardQueryBuildActions); + } + + @Override + protected String getQueryName() { + return WildcardQueryBuilder.NAME; + } + + @Override + protected WildcardQueryBuilder createBuilder(String field, String query) { + String matchText = StringUtils.convertSqlWildcardToLucene(query); + return QueryBuilders.wildcardQuery(field, matchText); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/StringUtilsTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/StringUtilsTest.java new file mode 100644 index 00000000000..24c809ebabd --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/StringUtilsTest.java @@ -0,0 +1,29 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script; + +import static org.junit.Assert.assertEquals; + +import org.junit.jupiter.api.Test; + +public class StringUtilsTest { + @Test + public void test_escaping_sql_wildcards() { + assertEquals("%", StringUtils.convertSqlWildcardToLucene("\\%")); + assertEquals("\\*", StringUtils.convertSqlWildcardToLucene("\\*")); + assertEquals("_", StringUtils.convertSqlWildcardToLucene("\\_")); + assertEquals("\\?", StringUtils.convertSqlWildcardToLucene("\\?")); + assertEquals("%*", StringUtils.convertSqlWildcardToLucene("\\%%")); + assertEquals("*%", StringUtils.convertSqlWildcardToLucene("%\\%")); + assertEquals("%*%", StringUtils.convertSqlWildcardToLucene("\\%%\\%")); + assertEquals("*%*", StringUtils.convertSqlWildcardToLucene("%\\%%")); + assertEquals("_?", StringUtils.convertSqlWildcardToLucene("\\__")); + assertEquals("?_", StringUtils.convertSqlWildcardToLucene("_\\_")); + assertEquals("_?_", StringUtils.convertSqlWildcardToLucene("\\__\\_")); + assertEquals("?_?", StringUtils.convertSqlWildcardToLucene("_\\__")); + assertEquals("%\\*_\\?", StringUtils.convertSqlWildcardToLucene("\\%\\*\\_\\?")); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java index 737e61f54bb..cea4e2488a2 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java @@ -631,6 +631,130 @@ void should_build_match_phrase_query_with_custom_parameters() { DSL.namedArgument("zero_terms_query", literal("ALL"))))); } + @Test + void wildcard_query_invalid_parameter() { + FunctionExpression expr = DSL.wildcard_query( + DSL.namedArgument("field", literal("field")), + DSL.namedArgument("query", literal("search query*")), + DSL.namedArgument("invalid_parameter", literal("invalid_value"))); + assertThrows(SemanticCheckException.class, () -> buildQuery(expr), + "Parameter invalid_parameter is invalid for wildcard_query function."); + } + + @Test + void wildcard_query_convert_sql_wildcard_to_lucene() { + // Test conversion of % wildcard to * + assertJsonEquals("{\n" + + " \"wildcard\" : {\n" + + " \"field\" : {\n" + + " \"wildcard\" : \"search query*\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + "}", + buildQuery(DSL.wildcard_query( + DSL.namedArgument("field", literal("field")), + DSL.namedArgument("query", literal("search query%"))))); + + assertJsonEquals("{\n" + + " \"wildcard\" : {\n" + + " \"field\" : {\n" + + " \"wildcard\" : \"search query?\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + "}", + buildQuery(DSL.wildcard_query( + DSL.namedArgument("field", literal("field")), + DSL.namedArgument("query", literal("search query_"))))); + } + + @Test + void wildcard_query_escape_wildcards_characters() { + assertJsonEquals("{\n" + + " \"wildcard\" : {\n" + + " \"field\" : {\n" + + " \"wildcard\" : \"search query%\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + "}", + buildQuery(DSL.wildcard_query( + DSL.namedArgument("field", literal("field")), + DSL.namedArgument("query", literal("search query\\%"))))); + + assertJsonEquals("{\n" + + " \"wildcard\" : {\n" + + " \"field\" : {\n" + + " \"wildcard\" : \"search query_\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + "}", + buildQuery(DSL.wildcard_query( + DSL.namedArgument("field", literal("field")), + DSL.namedArgument("query", literal("search query\\_"))))); + + assertJsonEquals("{\n" + + " \"wildcard\" : {\n" + + " \"field\" : {\n" + + " \"wildcard\" : \"search query\\\\*\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + "}", + buildQuery(DSL.wildcard_query( + DSL.namedArgument("field", literal("field")), + DSL.namedArgument("query", literal("search query\\*"))))); + + assertJsonEquals("{\n" + + " \"wildcard\" : {\n" + + " \"field\" : {\n" + + " \"wildcard\" : \"search query\\\\?\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + "}", + buildQuery(DSL.wildcard_query( + DSL.namedArgument("field", literal("field")), + DSL.namedArgument("query", literal("search query\\?"))))); + } + + @Test + void should_build_wildcard_query_with_default_parameters() { + assertJsonEquals("{\n" + + " \"wildcard\" : {\n" + + " \"field\" : {\n" + + " \"wildcard\" : \"search query*\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + "}", + buildQuery(DSL.wildcard_query( + DSL.namedArgument("field", literal("field")), + DSL.namedArgument("query", literal("search query*"))))); + } + + @Test + void should_build_wildcard_query_query_with_custom_parameters() { + assertJsonEquals("{\n" + + " \"wildcard\" : {\n" + + " \"field\" : {\n" + + " \"wildcard\" : \"search query*\",\n" + + " \"boost\" : 0.6,\n" + + " \"case_insensitive\" : true,\n" + + " \"rewrite\" : \"constant_score_boolean\"\n" + + " }\n" + + " }\n" + + "}", + buildQuery(DSL.wildcard_query( + DSL.namedArgument("field", literal("field")), + DSL.namedArgument("query", literal("search query*")), + DSL.namedArgument("boost", literal("0.6")), + DSL.namedArgument("case_insensitive", literal("true")), + DSL.namedArgument("rewrite", literal("constant_score_boolean"))))); + } + @Test void query_invalid_parameter() { FunctionExpression expr = DSL.query( diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/WildcardQueryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/WildcardQueryTest.java new file mode 100644 index 00000000000..ce7a39d91a9 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/WildcardQueryTest.java @@ -0,0 +1,94 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.filter.lucene; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.opensearch.sql.expression.DSL.namedArgument; + +import java.util.List; +import java.util.stream.Stream; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.env.Environment; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.WildcardQuery; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +class WildcardQueryTest { + private final WildcardQuery wildcardQueryQuery = new WildcardQuery(); + private static final FunctionName wildcardQueryFunc = FunctionName.of("wildcard_query"); + + static Stream> generateValidData() { + return Stream.of( + List.of( + namedArgument("field", "title"), + namedArgument("query", "query_value*"), + namedArgument("boost", "0.7"), + namedArgument("case_insensitive", "false"), + namedArgument("rewrite", "constant_score_boolean") + ) + ); + } + + @ParameterizedTest + @MethodSource("generateValidData") + public void test_valid_parameters(List validArgs) { + Assertions.assertNotNull(wildcardQueryQuery.build( + new WildcardQueryExpression(validArgs))); + } + + @Test + public void test_SyntaxCheckException_when_no_arguments() { + List arguments = List.of(); + assertThrows(SyntaxCheckException.class, + () -> wildcardQueryQuery.build(new WildcardQueryExpression(arguments))); + } + + @Test + public void test_SyntaxCheckException_when_one_argument() { + List arguments = List.of(namedArgument("field", "title")); + assertThrows(SyntaxCheckException.class, + () -> wildcardQueryQuery.build(new WildcardQueryExpression(arguments))); + } + + @Test + public void test_SemanticCheckException_when_invalid_parameter() { + List arguments = List.of( + namedArgument("field", "title"), + namedArgument("query", "query_value*"), + namedArgument("unsupported", "unsupported_value")); + Assertions.assertThrows(SemanticCheckException.class, + () -> wildcardQueryQuery.build(new WildcardQueryExpression(arguments))); + } + + private class WildcardQueryExpression extends FunctionExpression { + public WildcardQueryExpression(List arguments) { + super(WildcardQueryTest.this.wildcardQueryFunc, arguments); + } + + @Override + public ExprValue valueOf(Environment valueEnv) { + throw new UnsupportedOperationException("Invalid function call, " + + "valueOf function need implementation only to support Expression interface"); + } + + @Override + public ExprType type() { + throw new UnsupportedOperationException("Invalid function call, " + + "type function need implementation only to support Expression interface"); + } + } +} diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index c416c784324..a18aee8f103 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -338,6 +338,7 @@ ANALYZER: 'ANALYZER'; ANALYZE_WILDCARD: 'ANALYZE_WILDCARD'; AUTO_GENERATE_SYNONYMS_PHRASE_QUERY:'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY'; BOOST: 'BOOST'; +CASE_INSENSITIVE: 'CASE_INSENSITIVE'; CUTOFF_FREQUENCY: 'CUTOFF_FREQUENCY'; DEFAULT_FIELD: 'DEFAULT_FIELD'; DEFAULT_OPERATOR: 'DEFAULT_OPERATOR'; diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index e6ae551fa2b..6e8e0e08fe9 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -473,6 +473,7 @@ singleFieldRelevanceFunctionName : MATCH | MATCHQUERY | MATCH_QUERY | MATCH_PHRASE | MATCHPHRASE | MATCHPHRASEQUERY | MATCH_BOOL_PREFIX | MATCH_PHRASE_PREFIX + | WILDCARD_QUERY | WILDCARDQUERY ; multiFieldRelevanceFunctionName @@ -502,7 +503,7 @@ highlightArg relevanceArgName : ALLOW_LEADING_WILDCARD | ANALYZER | ANALYZE_WILDCARD | AUTO_GENERATE_SYNONYMS_PHRASE_QUERY - | BOOST | CUTOFF_FREQUENCY | DEFAULT_FIELD | DEFAULT_OPERATOR | ENABLE_POSITION_INCREMENTS + | BOOST | CASE_INSENSITIVE | CUTOFF_FREQUENCY | DEFAULT_FIELD | DEFAULT_OPERATOR | ENABLE_POSITION_INCREMENTS | ESCAPE | FIELDS | FLAGS | FUZZINESS | FUZZY_MAX_EXPANSIONS | FUZZY_PREFIX_LENGTH | FUZZY_REWRITE | FUZZY_TRANSPOSITIONS | LENIENT | LOW_FREQ_OPERATOR | MAX_DETERMINIZED_STATES | MAX_EXPANSIONS | MINIMUM_SHOULD_MATCH | OPERATOR | PHRASE_SLOP | PREFIX_LENGTH diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java index bf2c9af623c..bfd0f93ec98 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java @@ -15,7 +15,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.Random; import java.util.stream.Stream; @@ -449,6 +448,21 @@ public void can_parse_match_phrase_relevance_function() { assertNotNull(parser.parse("SELECT * FROM test WHERE match_phrase(column, 100500)")); } + @Test + public void can_parse_wildcard_query_relevance_function() { + assertNotNull( + parser.parse("SELECT * FROM test WHERE wildcard_query(column, \"this is a test*\")")); + assertNotNull( + parser.parse("SELECT * FROM test WHERE wildcard_query(column, 'this is a test*')")); + assertNotNull( + parser.parse("SELECT * FROM test WHERE wildcard_query(`column`, \"this is a test*\")")); + assertNotNull( + parser.parse("SELECT * FROM test WHERE wildcard_query(`column`, 'this is a test*')")); + assertNotNull( + parser.parse("SELECT * FROM test WHERE wildcard_query(`column`, 'this is a test*', " + + "boost=1.5, case_insensitive=true, rewrite=\"scoring_boolean\")")); + } + @ParameterizedTest @MethodSource({ "matchPhraseComplexQueries", diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java index ac68d146b28..9af4119fdf2 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -637,6 +637,19 @@ public void relevanceQuery_string() { + "analyzer='keyword', time_zone='Canada/Pacific', tie_breaker='1.3')")); } + @Test + public void relevanceWildcard_query() { + assertEquals(AstDSL.function("wildcard_query", + unresolvedArg("field", stringLiteral("field")), + unresolvedArg("query", stringLiteral("search query*")), + unresolvedArg("boost", stringLiteral("1.5")), + unresolvedArg("case_insensitive", stringLiteral("true")), + unresolvedArg("rewrite", stringLiteral("scoring_boolean"))), + buildExprAst("wildcard_query(field, 'search query*', boost=1.5," + + "case_insensitive=true, rewrite='scoring_boolean'))") + ); + } + @Test public void relevanceQuery() { assertEquals(AstDSL.function("query",