diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 51d91eb3723..8ceabf8b48f 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -220,7 +220,8 @@ public enum BuiltinFunctionName { QUERY(FunctionName.of("query")), MATCH_QUERY(FunctionName.of("match_query")), MATCHQUERY(FunctionName.of("matchquery")), - MULTI_MATCH(FunctionName.of("multi_match")); + MULTI_MATCH(FunctionName.of("multi_match")), + MATCHPHRASEPREFIX(FunctionName.of("matchphraseprefix")); private final FunctionName name; diff --git a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java index 97afe3675ed..51e62627feb 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java @@ -36,7 +36,8 @@ public void register(BuiltinFunctionRepository repository) { // compatibility. repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE)); repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE)); - repository.register(match_phrase_prefix()); + repository.register(match_phrase_prefix(BuiltinFunctionName.MATCH_PHRASE_PREFIX)); + repository.register(match_phrase_prefix(BuiltinFunctionName.MATCHPHRASEPREFIX)); } private static FunctionResolver match_bool_prefix() { @@ -49,8 +50,8 @@ private static FunctionResolver match() { return new RelevanceFunctionResolver(funcName, STRING); } - private static FunctionResolver match_phrase_prefix() { - FunctionName funcName = BuiltinFunctionName.MATCH_PHRASE_PREFIX.getName(); + private static FunctionResolver match_phrase_prefix(BuiltinFunctionName matchPhrasePrefix) { + FunctionName funcName = matchPhrasePrefix.getName(); return new RelevanceFunctionResolver(funcName, STRING); } diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index 788cac04334..2f5386d950b 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -2851,6 +2851,49 @@ Another example to show how to set custom values for the optional parameters:: +----------------------+--------------------------+ +MATCHPHRASEPREFIX +------------ + +Description +>>>>>>>>>>> + +``matchphraseprefix(field_expression, query_expression[, option=]*)`` + +The matchphraseprefix function maps to the match_phrase_prefix query used in search engine, +to return the documents that match a provided text with a given field. +It is an alternate syntax for the `match_phrase_prefix`_ function. +Available parameters include: + +- analyzer +- slop +- zero_terms_query +- max_expansions +- boost + + +Example with only ``field`` and ``query`` expressions, and all other parameters are set default values:: + + os> SELECT author, title FROM books WHERE matchphraseprefix(author, 'Alexander Mil'); + fetched rows / total rows = 2/2 + +----------------------+--------------------------+ + | author | title | + |----------------------+--------------------------| + | Alan Alexander Milne | The House at Pooh Corner | + | Alan Alexander Milne | Winnie-the-Pooh | + +----------------------+--------------------------+ + +Another example to show how to set custom values for the optional parameters:: + + os> SELECT author, title FROM books WHERE matchphraseprefix(author, 'Alan Mil', slop = 2); + fetched rows / total rows = 2/2 + +----------------------+--------------------------+ + | author | title | + |----------------------+--------------------------| + | Alan Alexander Milne | The House at Pooh Corner | + | Alan Alexander Milne | Winnie-the-Pooh | + +----------------------+--------------------------+ + + MULTI_MATCH ----------- diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/MatchPhrasePrefixIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/MatchPhrasePrefixIT.java index dd2a8384d6d..8459b45c2f6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/MatchPhrasePrefixIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/MatchPhrasePrefixIT.java @@ -107,4 +107,90 @@ public void slop_is_3() throws IOException { rows("taste draught gas"), rows("taste gas")); } + + @Test + public void required_parameters_alternate_syntax() throws IOException { + String query = "SELECT Title FROM %s WHERE matchphraseprefix(Title, 'champagne be')"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, + rows("Can old flat champagne be used for vinegar?"), + rows("Elder flower champagne best to use natural yeast or add a wine yeast?")); + } + + @Test + public void all_optional_parameters_alternate_syntax() throws IOException { + // The values for optional parameters are valid but arbitrary. + String query = "SELECT Title FROM %s " + + "WHERE matchphraseprefix(Title, 'flat champ', boost = 1.0, zero_terms_query='ALL', " + + "max_expansions = 2, analyzer=standard, slop=0)"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("Can old flat champagne be used for vinegar?")); + } + + @Test + public void max_expansions_is_3_alternate_syntax() throws IOException { + // max_expansions applies to the last term in the query -- 'bottl' + // It tells OpenSearch to consider only the first 3 terms that start with 'bottl' + // In this dataset these are 'bottle-conditioning', 'bottling', 'bottles'. + + String query = "SELECT Tags FROM %s " + + "WHERE matchphraseprefix(Tags, 'draught bottl', max_expansions=3)"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("brewing draught bottling"), + rows("draught bottles")); + } + + @Test + public void analyzer_english_alternate_syntax() throws IOException { + // English analyzer removes 'in' and 'to' as they are common words. + // This results in an empty query. + String query = "SELECT Title FROM %s " + + "WHERE matchphraseprefix(Title, 'in to', analyzer=english)"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + assertTrue("Expect English analyzer to filter out common words 'in' and 'to'", + result.getInt("total") == 0); + } + + @Test + public void analyzer_standard_alternate_syntax() throws IOException { + // Standard analyzer does not treat 'in' and 'to' as special terms. + // This results in 'to' being used as a phrase prefix given us 'Tokyo'. + String query = "SELECT Title FROM %s " + + "WHERE matchphraseprefix(Title, 'in to', analyzer=standard)"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("Local microbreweries and craft beer in Tokyo")); + } + + @Test + public void zero_term_query_all_alternate_syntax() throws IOException { + // English analyzer removes 'in' and 'to' as they are common words. + // zero_terms_query of 'ALL' causes all rows to be returned. + // ORDER BY ... LIMIT helps make the test understandable. + String query = "SELECT Title FROM %s" + + " WHERE matchphraseprefix(Title, 'in to', analyzer=english, zero_terms_query='ALL')" + + " ORDER BY Title DESC" + + " LIMIT 1"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("was working great, now all foam")); + } + + + @Test + public void slop_is_2_alternate_syntax() throws IOException { + // When slop is 2, the terms are matched exactly in the order specified. + // 'open' is used to match prefix of the next term. + String query = "SELECT Tags from %s where matchphraseprefix(Tags, 'gas ta', slop=2)"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("taste gas")); + } + + @Test + public void slop_is_3_alternate_syntax() throws IOException { + // When slop is 3, results will include phrases where the query terms are transposed. + String query = "SELECT Tags from %s where matchphraseprefix(Tags, 'gas ta', slop=3)"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, + rows("taste draught gas"), + rows("taste gas")); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java index ab8fb562dac..5f809ee924b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java @@ -69,6 +69,7 @@ public class FilterQueryBuilder extends ExpressionNodeVisitor arguments = List.of(); - assertThrows(SyntaxCheckException.class, - () -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + for (FunctionName funcName: functionNames) { + assertThrows(SyntaxCheckException.class, + () -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName))); + } } @Test public void test_SyntaxCheckException_when_one_argument() { List arguments = List.of(dsl.namedArgument("field", "test")); - assertThrows(SyntaxCheckException.class, - () -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + for (FunctionName funcName: functionNames) { + assertThrows(SyntaxCheckException.class, + () -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName))); + } } @Test @@ -52,8 +59,10 @@ public void test_SyntaxCheckException_when_invalid_parameter() { dsl.namedArgument("field", "test"), dsl.namedArgument("query", "test2"), dsl.namedArgument("unsupported", "3")); - Assertions.assertThrows(SemanticCheckException.class, - () -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + for (FunctionName funcName: functionNames) { + Assertions.assertThrows(SemanticCheckException.class, + () -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName))); + } } @Test @@ -63,7 +72,9 @@ public void test_analyzer_parameter() { dsl.namedArgument("query", "t2"), dsl.namedArgument("analyzer", "standard") ); - Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + for (FunctionName funcName: functionNames) { + Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName))); + } } @Test @@ -71,7 +82,9 @@ public void build_succeeds_with_two_arguments() { List arguments = List.of( dsl.namedArgument("field", "test"), dsl.namedArgument("query", "test2")); - Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + for (FunctionName funcName: functionNames) { + Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName))); + } } @Test @@ -81,7 +94,9 @@ public void test_slop_parameter() { dsl.namedArgument("query", "t2"), dsl.namedArgument("slop", "2") ); - Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + for (FunctionName funcName: functionNames) { + Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName))); + } } @Test @@ -91,7 +106,9 @@ public void test_zero_terms_query_parameter() { dsl.namedArgument("query", "t2"), dsl.namedArgument("zero_terms_query", "ALL") ); - Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + for (FunctionName funcName: functionNames) { + Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName))); + } } @Test @@ -101,7 +118,9 @@ public void test_zero_terms_query_parameter_lower_case() { dsl.namedArgument("query", "t2"), dsl.namedArgument("zero_terms_query", "all") ); - Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + for (FunctionName funcName: functionNames) { + Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName))); + } } @Test @@ -111,12 +130,14 @@ public void test_boost_parameter() { dsl.namedArgument("query", "t2"), dsl.namedArgument("boost", "0.1") ); - Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + for (FunctionName funcName: functionNames) { + Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName))); + } } private class MatchPhraseExpression extends FunctionExpression { - public MatchPhraseExpression(List arguments) { - super(MatchPhrasePrefixQueryTest.this.matchPhrasePrefix, arguments); + public MatchPhraseExpression(List arguments, FunctionName funcName) { + super(funcName, arguments); } @Override diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 9e0a4094019..c8afde801c7 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -298,6 +298,7 @@ MATCH_PHRASE: 'MATCH_PHRASE'; SIMPLE_QUERY_STRING: 'SIMPLE_QUERY_STRING'; QUERY_STRING: 'QUERY_STRING'; MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX'; +MATCHPHRASEPREFIX: 'MATCHPHRASEPREFIX'; MATCHQUERY: 'MATCHQUERY'; MATCH_QUERY: 'MATCH_QUERY'; MINUTE_OF_DAY: 'MINUTE_OF_DAY'; diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index c803f2b5c36..31d913de55c 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -427,6 +427,7 @@ systemFunctionName singleFieldRelevanceFunctionName : MATCH | MATCH_PHRASE | MATCHPHRASE | MATCH_BOOL_PREFIX | MATCH_PHRASE_PREFIX + | MATCHPHRASEPREFIX ; multiFieldRelevanceFunctionName diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java index 6b78376d452..ac1125a290d 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java @@ -398,7 +398,8 @@ public void canParseComplexMatchPhraseArgsTest(String query) { @ParameterizedTest @MethodSource({ - "generateMatchPhrasePrefixQueries" + "generateMatchPhrasePrefixQueries", + "generateMatchPhrasePrefixQueries_alternateSyntax" }) public void canParseComplexMatchPhrasePrefixQueries(String query) { assertNotNull(parser.parse(query)); @@ -458,6 +459,16 @@ private static Stream generateMatchPhrasePrefixQueries() { .build()); } + private static Stream generateMatchPhrasePrefixQueries_alternateSyntax() { + return generateQueries("matchphraseprefix", ImmutableMap.builder() + .put("analyzer", new String[] {"standard", "stop", "english"}) + .put("slop", new Integer[] {0, 1, 2}) + .put("max_expansions", new Integer[] {0, 3, 10}) + .put("zero_terms_query", new String[] {"NONE", "ALL", "NULL"}) + .put("boost", new Float[] {-0.5f, 1.0f, 1.2f}) + .build()); + } + private static Stream generateQueries(String function, Map functionArgs) { var rand = new Random(0); diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java index cb00ea2f185..e6ef6504a61 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -469,6 +469,24 @@ public void matchPhrasePrefixAllParameters() { ); } + @Test + public void matchPhrasePrefixAllParameters_alternateSyntax() { + assertEquals( + AstDSL.function("matchphraseprefix", + unresolvedArg("field", stringLiteral("test")), + unresolvedArg("query", stringLiteral("search query")), + unresolvedArg("slop", stringLiteral("3")), + unresolvedArg("boost", stringLiteral("1.5")), + unresolvedArg("analyzer", stringLiteral("standard")), + unresolvedArg("max_expansions", stringLiteral("4")), + unresolvedArg("zero_terms_query", stringLiteral("NONE")) + ), + buildExprAst("matchphraseprefix(test, 'search query', slop = 3, boost = 1.5" + + ", analyzer = 'standard', max_expansions = 4, zero_terms_query='NONE'" + + ")") + ); + } + @Test public void relevanceMatch() { assertEquals(AstDSL.function("match",