diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java index 88e0e1658ba..dd3b0636cc4 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java @@ -59,6 +59,7 @@ import org.opensearch.sql.ast.expression.RelevanceFieldList; import org.opensearch.sql.ast.expression.Span; import org.opensearch.sql.ast.expression.SpanUnit; +import org.opensearch.sql.ast.expression.UnresolvedArgument; import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.expression.When; import org.opensearch.sql.ast.expression.WindowFunction; @@ -669,6 +670,28 @@ public RexNode visitWhen(When node, CalcitePlanContext context) { @Override public RexNode visitRelevanceFieldList(RelevanceFieldList node, CalcitePlanContext context) { - throw new CalciteUnsupportedException("Relevance fields expression is unsupported in Calcite"); + List varArgRexNodeList = new ArrayList<>(); + node.getFieldList() + .forEach( + (k, v) -> { + varArgRexNodeList.add( + context.rexBuilder.makeLiteral( + k, + context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), + true)); + varArgRexNodeList.add( + context.rexBuilder.makeLiteral( + v, + context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.DOUBLE), + true)); + }); + return context.rexBuilder.makeCall( + SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR, varArgRexNodeList); + } + + @Override + public RexNode visitUnresolvedArgument(UnresolvedArgument node, CalcitePlanContext context) { + RexNode value = analyze(node.getValue(), context); + return context.relBuilder.alias(value, node.getArgName()); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java index 5baf9983450..9f691479f5d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java @@ -10,11 +10,13 @@ import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.*; import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT.*; +import com.google.common.collect.ImmutableSet; import java.time.Instant; import java.time.ZoneId; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Set; import java.util.TimeZone; import javax.annotation.Nullable; import org.apache.calcite.DataContext; @@ -70,6 +72,10 @@ public class UserDefinedFunctionUtils { TYPE_FACTORY.createMapType( TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR), createArrayType(TYPE_FACTORY, TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR), false)); + public static Set SINGLE_FIELD_RELEVANCE_FUNCTION_SET = + ImmutableSet.of("match", "match_phrase", "match_bool_prefix", "match_phrase_prefix"); + public static Set MULTI_FIELDS_RELEVANCE_FUNCTION_SET = + ImmutableSet.of("simple_query_string", "query_string", "multi_match"); public static RelBuilder.AggCall TransferUserDefinedAggFunction( Class UDAF, diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index 1b3a2b7dba8..966a5ee2e22 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -47,6 +47,7 @@ import org.opensearch.sql.expression.function.jsonUDF.JsonSetFunctionImpl; import org.opensearch.sql.expression.function.udf.CryptographicFunction; import org.opensearch.sql.expression.function.udf.GrokFunction; +import org.opensearch.sql.expression.function.udf.RelevanceQueryFunction; import org.opensearch.sql.expression.function.udf.SpanFunction; import org.opensearch.sql.expression.function.udf.condition.EarliestFunction; import org.opensearch.sql.expression.function.udf.condition.LatestFunction; @@ -327,6 +328,22 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { public static final SqlOperator TRANSFORM = new TransformFunctionImpl().toUDF("transform"); public static final SqlOperator REDUCE = new ReduceFunctionImpl().toUDF("reduce"); + private static final RelevanceQueryFunction RELEVANCE_QUERY_FUNCTION_INSTANCE = + new RelevanceQueryFunction(); + public static final SqlOperator MATCH = RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("match"); + public static final SqlOperator MATCH_PHRASE = + RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("match_phrase"); + public static final SqlOperator MATCH_BOOL_PREFIX = + RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("match_bool_prefix"); + public static final SqlOperator MATCH_PHRASE_PREFIX = + RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("match_phrase_prefix"); + public static final SqlOperator SIMPLE_QUERY_STRING = + RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("simple_query_string"); + public static final SqlOperator QUERY_STRING = + RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("query_string"); + public static final SqlOperator MULTI_MATCH = + RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("multi_match"); + /** * Invoking an implementor registered in {@link RexImpTable}, need to use reflection since they're * all private Use method directly in {@link BuiltInMethod} if possible, most operators' diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 0019e6819d6..84e11d170f1 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -119,6 +119,10 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.LTRIM; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAKEDATE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAKETIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_BOOL_PREFIX; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_PHRASE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_PHRASE_PREFIX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MD5; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MICROSECOND; @@ -133,6 +137,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTHNAME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTH_OF_YEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTI_MATCH; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOTEQUAL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOW; @@ -146,6 +151,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.POW; import static org.opensearch.sql.expression.function.BuiltinFunctionName.POWER; import static org.opensearch.sql.expression.function.BuiltinFunctionName.QUARTER; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.QUERY_STRING; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RADIANS; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RAND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REDUCE; @@ -161,6 +167,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.SHA1; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SHA2; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIGN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIMPLE_QUERY_STRING; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SQRT; @@ -693,6 +700,13 @@ void populate() { registerOperator(SHA2, PPLBuiltinOperators.SHA2); registerOperator(CIDRMATCH, PPLBuiltinOperators.CIDRMATCH); registerOperator(INTERNAL_GROK, PPLBuiltinOperators.GROK); + registerOperator(MATCH, PPLBuiltinOperators.MATCH); + registerOperator(MATCH_PHRASE, PPLBuiltinOperators.MATCH_PHRASE); + registerOperator(MATCH_BOOL_PREFIX, PPLBuiltinOperators.MATCH_BOOL_PREFIX); + registerOperator(MATCH_PHRASE_PREFIX, PPLBuiltinOperators.MATCH_PHRASE_PREFIX); + registerOperator(SIMPLE_QUERY_STRING, PPLBuiltinOperators.SIMPLE_QUERY_STRING); + registerOperator(QUERY_STRING, PPLBuiltinOperators.QUERY_STRING); + registerOperator(MULTI_MATCH, PPLBuiltinOperators.MULTI_MATCH); // Register PPL Datetime UDF operator registerOperator(TIMESTAMP, PPLBuiltinOperators.TIMESTAMP); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/RelevanceQueryFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/RelevanceQueryFunction.java new file mode 100644 index 00000000000..c7adf5f73b0 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/RelevanceQueryFunction.java @@ -0,0 +1,98 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.adapter.enumerable.NullPolicy; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.type.CompositeOperandTypeChecker; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.opensearch.sql.expression.function.ImplementorUDF; +import org.opensearch.sql.expression.function.UDFOperandMetadata; + +public class RelevanceQueryFunction extends ImplementorUDF { + + public RelevanceQueryFunction() { + super(new RelevanceQueryImplementor(), NullPolicy.ANY); + } + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return ReturnTypes.BOOLEAN; + } + + /* + * Starting from the 3rd parameter, they are optional parameters for relevance queries. + * Different query has different parameter set, which will be validated in dedicated query builder + */ + @Override + public UDFOperandMetadata getOperandMetadata() { + return UDFOperandMetadata.wrap( + (CompositeOperandTypeChecker) + OperandTypes.family( + ImmutableList.of( + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING), + i -> i > 1 && i < 14) // Parameters 3-14 are optional + .or( + OperandTypes.family( + ImmutableList.of( + SqlTypeFamily.MAP, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING, + SqlTypeFamily.STRING), + i -> i > 1 && i < 25))); // Parameters 3-25 are optional + } + + public static class RelevanceQueryImplementor implements NotNullImplementor { + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + throw new UnsupportedOperationException( + "Relevance search query functions are only supported when they are pushed down"); + } + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/pushdown/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/pushdown/CalciteNoPushdownIT.java index 14ae021e66a..865f652384b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/pushdown/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/pushdown/CalciteNoPushdownIT.java @@ -47,9 +47,6 @@ CalciteJsonFunctionsIT.class, CalciteLegacyAPICompatibilityIT.class, CalciteLikeQueryIT.class, - CalciteMatchBoolPrefixIT.class, - CalciteMatchIT.class, - CalciteMatchPhraseIT.class, CalciteMathematicalFunctionIT.class, CalciteNewAddedCommandsIT.class, CalciteNowLikeFunctionIT.class, @@ -87,15 +84,12 @@ CalcitePPLTrendlineIT.class, CalcitePrometheusDataSourceCommandsIT.class, CalciteQueryAnalysisIT.class, - CalciteQueryStringIT.class, CalciteRareCommandIT.class, - CalciteRelevanceFunctionIT.class, CalciteRenameCommandIT.class, CalciteResourceMonitorIT.class, CalciteSearchCommandIT.class, CalciteSettingsIT.class, CalciteShowDataSourcesCommandIT.class, - CalciteSimpleQueryStringIT.class, CalciteSortCommandIT.class, CalciteStatsCommandIT.class, CalciteSystemFunctionIT.class, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchBoolPrefixIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchBoolPrefixIT.java index f1fdee66b8f..8dde7c6775a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchBoolPrefixIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchBoolPrefixIT.java @@ -12,7 +12,6 @@ public class CalciteMatchBoolPrefixIT extends MatchBoolPrefixIT { public void init() throws Exception { super.init(); enableCalcite(); - // TODO: "https://github.com/opensearch-project/sql/issues/3462" - // disallowCalciteFallback(); + disallowCalciteFallback(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchIT.java index 19dfe09f2b6..d9ba581e40e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchIT.java @@ -12,7 +12,6 @@ public class CalciteMatchIT extends MatchIT { public void init() throws Exception { super.init(); enableCalcite(); - // TODO: "https://github.com/opensearch-project/sql/issues/3462" - // disallowCalciteFallback(); + disallowCalciteFallback(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchPhraseIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchPhraseIT.java index f50d03498a4..b324d859171 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchPhraseIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchPhraseIT.java @@ -12,7 +12,6 @@ public class CalciteMatchPhraseIT extends MatchPhraseIT { public void init() throws Exception { super.init(); enableCalcite(); - // TODO: "https://github.com/opensearch-project/sql/issues/3462" - // disallowCalciteFallback(); + disallowCalciteFallback(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchPhrasePrefixIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchPhrasePrefixIT.java new file mode 100644 index 00000000000..5ca7d8487e9 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMatchPhrasePrefixIT.java @@ -0,0 +1,17 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import org.opensearch.sql.ppl.MatchPhrasePrefixIT; + +public class CalciteMatchPhrasePrefixIT extends MatchPhrasePrefixIT { + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + disallowCalciteFallback(); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultiMatchIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultiMatchIT.java new file mode 100644 index 00000000000..be5bd3d194f --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultiMatchIT.java @@ -0,0 +1,17 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import org.opensearch.sql.ppl.MultiMatchIT; + +public class CalciteMultiMatchIT extends MultiMatchIT { + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + disallowCalciteFallback(); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteQueryStringIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteQueryStringIT.java index 04f492987ad..cb16e5c8b1c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteQueryStringIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteQueryStringIT.java @@ -12,7 +12,6 @@ public class CalciteQueryStringIT extends QueryStringIT { public void init() throws Exception { super.init(); enableCalcite(); - // TODO: "https://github.com/opensearch-project/sql/issues/3462" - // disallowCalciteFallback(); + disallowCalciteFallback(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRelevanceFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRelevanceFunctionIT.java index 5f3760e4b1a..85ac2554e09 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRelevanceFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRelevanceFunctionIT.java @@ -12,7 +12,6 @@ public class CalciteRelevanceFunctionIT extends RelevanceFunctionIT { public void init() throws Exception { super.init(); enableCalcite(); - // TODO: "https://github.com/opensearch-project/sql/issues/3462" - // disallowCalciteFallback(); + disallowCalciteFallback(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteSimpleQueryStringIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteSimpleQueryStringIT.java index 9ddd649d94f..b1fa95e0e2d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteSimpleQueryStringIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteSimpleQueryStringIT.java @@ -12,8 +12,6 @@ public class CalciteSimpleQueryStringIT extends SimpleQueryStringIT { public void init() throws Exception { super.init(); enableCalcite(); - // Search Functions are not supported - // TODO: "https://github.com/opensearch-project/sql/issues/3462" - // disallowCalciteFallback(); + disallowCalciteFallback(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 07216e601f8..dbba294a5f9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -449,4 +449,33 @@ public void testStatsByTimeSpan() throws IOException { explainQueryToString( String.format("source=%s | stats count() by span(birthdate,1M)", TEST_INDEX_BANK))); } + + @Test + public void testSingleFieldRelevanceQueryFunctionExplain() throws IOException { + String expected = + isCalciteEnabled() + ? loadFromFile("expectedOutput/calcite/explain_single_field_relevance_push.json") + : loadFromFile("expectedOutput/ppl/explain_single_field_relevance_push.json"); + + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account" + + "| where match(email, '*@gmail.com', boost=1.0)")); + } + + @Test + public void testMultiFieldsRelevanceQueryFunctionExplain() throws IOException { + String expected = + isCalciteEnabled() + ? loadFromFile("expectedOutput/calcite/explain_multi_fields_relevance_push.json") + : loadFromFile("expectedOutput/ppl/explain_multi_fields_relevance_push.json"); + + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account" + + "| where simple_query_string(['email', name 4.0], 'gmail'," + + " default_operator='or', analyzer=english)")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/MatchPhrasePrefixIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/MatchPhrasePrefixIT.java index 8fc2b07cdc4..4023cc92d79 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/MatchPhrasePrefixIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/MatchPhrasePrefixIT.java @@ -89,9 +89,9 @@ public void zero_term_query_all() throws IOException { // ORDER BY ... LIMIT helps make the test understandable. String query = "source = %s| WHERE match_phrase_prefix(Title, 'in to', analyzer=english," - + " zero_terms_query='ALL') | sort -Title | head 1 | fields Title"; + + " zero_terms_query='ALL') | head 1 | fields Title"; JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER)); - verifyDataRows(result, rows("was working great, now all foam")); + verifyDataRows(result, rows("How do you mull beer?")); } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/RelevanceFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/RelevanceFunctionIT.java index 8533b9ee0cc..86228c53acc 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/RelevanceFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/RelevanceFunctionIT.java @@ -128,4 +128,29 @@ public void verify_operator_in_match() throws IOException { assertEquals(16, result1.getInt("total")); assertEquals(4, result2.getInt("total")); } + + @Test + public void test_mixed_relevance_function_and_normal_filter() throws IOException { + String query1 = + "SOURCE=" + + TEST_INDEX_BEER + + " | WHERE simple_query_string(['Tags'], 'taste') and AcceptedAnswerId > 200 | fields" + + " Id"; + var result1 = executeQuery(query1); + String query2 = + "SOURCE=" + TEST_INDEX_BEER + " | WHERE simple_query_string(['Tags'], 'taste') | fields Id"; + var result2 = executeQuery(query2); + assertEquals(5, result1.getInt("total")); + assertEquals(8, result2.getInt("total")); + } + + @Test + public void not_pushdown_throws_exception() throws IOException { + String query1 = + "SOURCE=" + + TEST_INDEX_BEER + + " | STATS count(AcceptedAnswerId) as idCount" + + " | WHERE simple_query_string(['Tags'], 'taste') and idCount > 200"; + assertThrows(Exception.class, () -> executeQuery(query1)); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_fields_relevance_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_fields_relevance_push.json new file mode 100644 index 00000000000..254b22b34b1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_fields_relevance_push.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[simple_query_string(AS(MAP('name':VARCHAR, 4.0E0:DOUBLE, 'email':VARCHAR, 1.0E0:DOUBLE), 'fields'), AS('gmail':VARCHAR, 'query'), AS('or':VARCHAR, 'default_operator'), AS('english':VARCHAR, 'analyzer'))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->simple_query_string(AS(MAP('name':VARCHAR, 4.0E0:DOUBLE, 'email':VARCHAR, 1.0E0:DOUBLE), 'fields'), AS('gmail':VARCHAR, 'query'), AS('or':VARCHAR, 'default_operator'), AS('english':VARCHAR, 'analyzer'))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"simple_query_string\":{\"query\":\"gmail\",\"fields\":[\"name^4.0\",\"email^1.0\"],\"analyzer\":\"english\",\"flags\":-1,\"default_operator\":\"or\",\"analyze_wildcard\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_single_field_relevance_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_single_field_relevance_push.json new file mode 100644 index 00000000000..6ad7b285050 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_single_field_relevance_push.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[match(AS($9, 'field'), AS('*@gmail.com':VARCHAR, 'query'), AS('1.0':VARCHAR, 'boost'))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->match(AS($9, 'field'), AS('*@gmail.com':VARCHAR, 'query'), AS('1.0':VARCHAR, 'boost'))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"match\":{\"email\":{\"query\":\"*@gmail.com\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_multi_fields_relevance_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_multi_fields_relevance_push.json new file mode 100644 index 00000000000..5f9602109a4 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_multi_fields_relevance_push.json @@ -0,0 +1,15 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]" + }, + "children": [{ + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"simple_query_string\":{\"query\":\"gmail\",\"fields\":[\"name^4.0\",\"email^1.0\"],\"analyzer\":\"english\",\"flags\":-1,\"default_operator\":\"or\",\"analyze_wildcard\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + }, + "children": [] + }] + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_single_field_relevance_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_single_field_relevance_push.json new file mode 100644 index 00000000000..93d84073f77 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_single_field_relevance_push.json @@ -0,0 +1,15 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]" + }, + "children": [{ + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match\":{\"email\":{\"query\":\"*@gmail.com\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + }, + "children": [] + }] + } +} \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 0adb0992b5d..7111e5d6c7b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -37,6 +37,8 @@ import static org.opensearch.index.query.QueryBuilders.regexpQuery; import static org.opensearch.index.query.QueryBuilders.termQuery; import static org.opensearch.index.query.QueryBuilders.termsQuery; +import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.MULTI_FIELDS_RELEVANCE_FUNCTION_SET; +import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.SINGLE_FIELD_RELEVANCE_FUNCTION_SET; import com.google.common.base.Throwables; import com.google.common.collect.Range; @@ -44,7 +46,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.GregorianCalendar; -import java.util.LinkedHashMap; +import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -57,7 +59,9 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlSyntax; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.NlsString; @@ -69,6 +73,13 @@ import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType; import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchBoolPrefixQuery; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhrasePrefixQuery; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhraseQuery; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchQuery; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MultiMatchQuery; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.QueryStringQuery; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.SimpleQueryStringQuery; /** * Query predicate analyzer. Uses visitor pattern to traverse existing expression and convert it to @@ -278,14 +289,7 @@ public Expression visitCall(RexCall call) { } }; case FUNCTION: - if (call.getOperator().getName().equalsIgnoreCase("CONTAINS")) { - List operands = visitList(call.getOperands()); - String query = - convertQueryString( - operands.subList(0, operands.size() - 1), operands.get(operands.size() - 1)); - return QueryExpression.create(new NamedFieldExpression()).queryString(query); - } - // fall through + return visitRelevanceFunc(call); default: String message = format(Locale.ROOT, "Unsupported syntax [%s] for call: [%s]", syntax, call); @@ -293,23 +297,123 @@ public Expression visitCall(RexCall call) { } } - private static String convertQueryString(List fields, Expression query) { - int index = 0; - checkArgument(query instanceof LiteralExpression, "Query string must be a string literal"); - String queryString = ((LiteralExpression) query).stringValue(); - @SuppressWarnings("ModifiedButNotUsed") - Map fieldMap = new LinkedHashMap<>(); - for (Expression expr : fields) { - if (expr instanceof NamedFieldExpression) { - NamedFieldExpression field = (NamedFieldExpression) expr; - String fieldIndexString = format(Locale.ROOT, "$%d", index++); - fieldMap.put(fieldIndexString, field.getReference()); + private QueryExpression visitRelevanceFunc(RexCall call) { + String funcName = call.getOperator().getName().toLowerCase(Locale.ROOT); + List ops = call.getOperands(); + assert ops.size() >= 2 : "Relevance query function should at least have 2 operands"; + + if (SINGLE_FIELD_RELEVANCE_FUNCTION_SET.contains(funcName)) { + List fieldQueryOperands = + visitList( + List.of( + AliasPair.from(ops.get(0), funcName).value, + AliasPair.from(ops.get(1), funcName).value)); + NamedFieldExpression namedFieldExpression = + (NamedFieldExpression) fieldQueryOperands.get(0); + String queryLiteralOperand = ((LiteralExpression) fieldQueryOperands.get(1)).stringValue(); + Map optionalArguments = + parseRelevanceFunctionOptionalArguments(ops, funcName); + + return SINGLE_FIELD_RELEVANCE_FUNCTION_HANDLERS + .get(funcName) + .apply(namedFieldExpression, queryLiteralOperand, optionalArguments); + } else if (MULTI_FIELDS_RELEVANCE_FUNCTION_SET.contains(funcName)) { + RexCall fieldsRexCall = (RexCall) AliasPair.from(ops.get(0), funcName).value; + String queryLiteralOperand = + ((LiteralExpression) + visitList(List.of(AliasPair.from(ops.get(1), funcName).value)).get(0)) + .stringValue(); + Map optionalArguments = + parseRelevanceFunctionOptionalArguments(ops, funcName); + + return MULTI_FIELDS_RELEVANCE_FUNCTION_HANDLERS + .get(funcName) + .apply(fieldsRexCall, queryLiteralOperand, optionalArguments); + } + + throw new PredicateAnalyzerException( + String.format(Locale.ROOT, "Unsupported search relevance function: [%s]", funcName)); + } + + @FunctionalInterface + private interface SingleFieldRelevanceFunctionHandler { + QueryExpression apply(NamedFieldExpression field, String query, Map opts); + } + + @FunctionalInterface + private interface MultiFieldsRelevanceFunctionHandler { + QueryExpression apply(RexCall fields, String query, Map opts); + } + + private static final Map + SINGLE_FIELD_RELEVANCE_FUNCTION_HANDLERS = + Map.of( + "match", (f, q, o) -> QueryExpression.create(f).match(q, o), + "match_phrase", (f, q, o) -> QueryExpression.create(f).matchPhrase(q, o), + "match_bool_prefix", (f, q, o) -> QueryExpression.create(f).matchBoolPrefix(q, o), + "match_phrase_prefix", + (f, q, o) -> QueryExpression.create(f).matchPhrasePrefix(q, o)); + + private static final Map + MULTI_FIELDS_RELEVANCE_FUNCTION_HANDLERS = + Map.of( + "simple_query_string", + (c, q, o) -> + QueryExpression.create(new NamedFieldExpression()) + .simpleQueryString(c, q, o), + "query_string", + (c, q, o) -> + QueryExpression.create(new NamedFieldExpression()).queryString(c, q, o), + "multi_match", + (c, q, o) -> + QueryExpression.create(new NamedFieldExpression()).multiMatch(c, q, o)); + + private Map parseRelevanceFunctionOptionalArguments( + List operands, String funcName) { + Map optionalArguments = new HashMap<>(); + + for (int i = 2; i < operands.size(); i++) { + AliasPair aliasPair = AliasPair.from(operands.get(i), funcName); + String key = ((RexLiteral) aliasPair.alias).getValueAs(String.class); + if (optionalArguments.containsKey(key)) { + throw new PredicateAnalyzerException( + String.format( + Locale.ROOT, + "Parameter '%s' can only be specified once for function [%s].", + key, + funcName)); } + optionalArguments.put(key, ((RexLiteral) aliasPair.value).getValueAs(String.class)); + } + + return optionalArguments; + } + + private static RexCall expectCall(RexNode node, SqlOperator op, String funcName) { + if (!(node instanceof RexCall call) || call.getOperator() != op) { + throw new IllegalArgumentException( + String.format( + Locale.ROOT, + "Expect [%s] RexCall but get [%s] for function [%s]", + op.getName(), + node.toString(), + funcName)); } - try { - return queryString; - } catch (Exception e) { - throw new PredicateAnalyzerException(e); + return call; + } + + private static class AliasPair { + final RexNode value; + final RexNode alias; + + static AliasPair from(RexNode node, String funcName) { + RexCall as = expectCall(node, SqlStdOperatorTable.AS, funcName); + return new AliasPair(as.getOperands().get(0), as.getOperands().get(1)); + } + + private AliasPair(RexNode value, RexNode alias) { + this.value = value; + this.alias = alias; } } @@ -608,7 +712,25 @@ public boolean isPartial() { public abstract QueryExpression lte(LiteralExpression literal); - public abstract QueryExpression queryString(String query); + public abstract QueryExpression match(String query, Map optionalArguments); + + public abstract QueryExpression matchPhrase( + String query, Map optionalArguments); + + public abstract QueryExpression matchBoolPrefix( + String query, Map optionalArguments); + + public abstract QueryExpression matchPhrasePrefix( + String query, Map optionalArguments); + + public abstract QueryExpression simpleQueryString( + RexCall fieldsRexCall, String query, Map optionalArguments); + + public abstract QueryExpression queryString( + RexCall fieldsRexCall, String query, Map optionalArguments); + + public abstract QueryExpression multiMatch( + RexCall fieldsRexCall, String query, Map optionalArguments); public abstract QueryExpression isTrue(); @@ -621,7 +743,7 @@ public static QueryExpression create(TerminalExpression expression) { return new SimpleQueryExpression((NamedFieldExpression) expression); } else { String message = format(Locale.ROOT, "Unsupported expression: [%s]", expression); - throw new PredicateAnalyzerException(message); + throw new PredicateAnalyzer.PredicateAnalyzerException(message); } } } @@ -748,11 +870,49 @@ public QueryExpression lte(LiteralExpression literal) { } @Override - public QueryExpression queryString(String query) { + public QueryExpression match(String query, Map optionalArguments) { + throw new PredicateAnalyzerException("Match " + "cannot be applied to a compound expression"); + } + + @Override + public QueryExpression matchPhrase(String query, Map optionalArguments) { + throw new PredicateAnalyzerException( + "MatchPhrase " + "cannot be applied to a compound expression"); + } + + @Override + public QueryExpression matchBoolPrefix(String query, Map optionalArguments) { + throw new PredicateAnalyzerException( + "MatchBoolPrefix " + "cannot be applied to a compound expression"); + } + + @Override + public QueryExpression matchPhrasePrefix(String query, Map optionalArguments) { + throw new PredicateAnalyzerException( + "MatchPhrasePrefix " + "cannot be applied to a compound expression"); + } + + @Override + public QueryExpression simpleQueryString( + RexCall fieldsRexCall, String query, Map optionalArguments) { + throw new PredicateAnalyzerException( + "SimpleQueryString " + "cannot be applied to a compound expression"); + } + + @Override + public QueryExpression queryString( + RexCall fieldsRexCall, String query, Map optionalArguments) { throw new PredicateAnalyzerException( "QueryString " + "cannot be applied to a compound expression"); } + @Override + public QueryExpression multiMatch( + RexCall fieldsRexCall, String query, Map optionalArguments) { + throw new PredicateAnalyzerException( + "MultiMatch " + "cannot be applied to a compound expression"); + } + @Override public QueryExpression isTrue() { throw new PredicateAnalyzerException("isTrue cannot be applied to a compound expression"); @@ -898,8 +1058,48 @@ public QueryExpression lte(LiteralExpression literal) { } @Override - public QueryExpression queryString(String query) { - throw new UnsupportedOperationException("QueryExpression not yet supported: " + query); + public QueryExpression match(String query, Map optionalArguments) { + builder = new MatchQuery().build(getFieldReference(), query, optionalArguments); + return this; + } + + @Override + public QueryExpression matchPhrase(String query, Map optionalArguments) { + builder = new MatchPhraseQuery().build(getFieldReference(), query, optionalArguments); + return this; + } + + @Override + public QueryExpression matchBoolPrefix(String query, Map optionalArguments) { + builder = new MatchBoolPrefixQuery().build(getFieldReference(), query, optionalArguments); + return this; + } + + @Override + public QueryExpression matchPhrasePrefix(String query, Map optionalArguments) { + builder = new MatchPhrasePrefixQuery().build(getFieldReference(), query, optionalArguments); + return this; + } + + @Override + public QueryExpression simpleQueryString( + RexCall fieldsRexCall, String query, Map optionalArguments) { + builder = new SimpleQueryStringQuery().build(fieldsRexCall, query, optionalArguments); + return this; + } + + @Override + public QueryExpression queryString( + RexCall fieldsRexCall, String query, Map optionalArguments) { + builder = new QueryStringQuery().build(fieldsRexCall, query, optionalArguments); + return this; + } + + @Override + public QueryExpression multiMatch( + RexCall fieldsRexCall, String query, Map optionalArguments) { + builder = new MultiMatchQuery().build(fieldsRexCall, query, optionalArguments); + return this; } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MultiFieldQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MultiFieldQuery.java index b6e854a3f8b..c05bdef1df4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MultiFieldQuery.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MultiFieldQuery.java @@ -8,6 +8,12 @@ import com.google.common.collect.ImmutableMap; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.stream.IntStream; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.NlsString; import org.opensearch.index.query.QueryBuilder; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.NamedArgumentExpression; @@ -46,4 +52,35 @@ public T createQueryBuilder(List arguments) { } protected abstract T createBuilder(ImmutableMap fields, String query); + + /** + * Build multi-fields relevance query builder based on Calcite function's operands. For + * MultiFieldQuery, fields with weights and query string parameter are required. + * + * @param fieldsRexCall Calcite MAP RexCall that wraps multi-fields and corresponding weights + * @param query String query to search + * @param optionalArguments Map contains optional relevance query argument key value pairs + * @return Final QueryBuilder + */ + public T build(RexCall fieldsRexCall, String query, Map optionalArguments) { + List fieldAndWeightNodes = fieldsRexCall.getOperands(); + ImmutableMap fields = + IntStream.range(0, fieldsRexCall.getOperands().size() / 2) + .map(i -> i * 2) + .mapToObj( + i -> { + RexLiteral fieldLiteral = (RexLiteral) fieldAndWeightNodes.get(i); + RexLiteral weightLiteral = (RexLiteral) fieldAndWeightNodes.get(i + 1); + String field = + ((NlsString) Objects.requireNonNull(fieldLiteral.getValue())).getValue(); + Float weight = + ((Double) Objects.requireNonNull(weightLiteral.getValue())).floatValue(); + return Map.entry(field, weight); + }) + .collect(ImmutableMap.toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); + + T queryBuilder = createBuilder(fields, query); + + return applyArguments(queryBuilder, optionalArguments); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/RelevanceQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/RelevanceQuery.java index 87faf320ec6..6ff4ad83689 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/RelevanceQuery.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/RelevanceQuery.java @@ -14,6 +14,7 @@ import lombok.RequiredArgsConstructor; import org.opensearch.index.query.QueryBuilder; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.data.model.ExprStringValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.FunctionExpression; @@ -86,6 +87,27 @@ public QueryBuilder build(FunctionExpression func) { return loadArguments(arguments); } + /** + * Enrich initially created opensearch index query builder with optional arguments that are + * wrapped in Calcite MAP RexCall. + * + * @param queryBuilder queryBuilder Initially created opensearch index relevance query builder + * @param optionalArguments Map contains optional relevance query argument key value pairs + * @return enriched QueryBuilder + */ + protected T applyArguments(T queryBuilder, Map optionalArguments) { + if (optionalArguments != null && !optionalArguments.isEmpty()) { + optionalArguments.forEach( + (k, v) -> { + checkValidArguments(k, queryBuilder); + (Objects.requireNonNull(getQueryBuildActions().get(k))) + .apply(queryBuilder, new ExprStringValue(v)); + }); + } + + return queryBuilder; + } + protected abstract T createQueryBuilder(List arguments); protected abstract String getQueryName(); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/SingleFieldQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/SingleFieldQuery.java index 086aaddc5ee..e46702720c0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/SingleFieldQuery.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/SingleFieldQuery.java @@ -44,4 +44,18 @@ protected T createQueryBuilder(List arguments) { } protected abstract T createBuilder(String field, String query); + + /** + * Build single field relevance query builder based on Calcite function's operands. For + * SingleFieldQuery, field and query string parameter are required. + * + * @param field Target field name + * @param query String query to search + * @param optionalArguments Map contains optional relevance query argument key value pairs + * @return Final QueryBuilder + */ + public T build(String field, String query, Map optionalArguments) { + T queryBuilder = createBuilder(field, query); + return applyArguments(queryBuilder, optionalArguments); + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java index 7a9bfc64f89..29c0452d429 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java @@ -11,6 +11,7 @@ import com.google.common.collect.ImmutableList; import java.math.BigDecimal; +import java.util.Arrays; import java.util.List; import java.util.Map; import org.apache.calcite.rel.type.RelDataTypeFactory; @@ -26,12 +27,19 @@ import org.junit.jupiter.api.Test; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.ExistsQueryBuilder; +import org.opensearch.index.query.MatchBoolPrefixQueryBuilder; +import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; +import org.opensearch.index.query.MatchPhraseQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.MultiMatchQueryBuilder; import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryStringQueryBuilder; import org.opensearch.index.query.RangeQueryBuilder; +import org.opensearch.index.query.SimpleQueryStringBuilder; import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.index.query.TermsQueryBuilder; import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.expression.function.PPLFuncImpTable; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType; import org.opensearch.sql.opensearch.request.PredicateAnalyzer.ExpressionNotAnalyzableException; @@ -53,6 +61,10 @@ public class PredicateAnalyzerTest { builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.VARCHAR), 1); final RexLiteral numericLiteral = builder.makeExactLiteral(new BigDecimal(12)); final RexLiteral stringLiteral = builder.makeLiteral("Hi"); + final RexNode aliasedField2 = + builder.makeCall(SqlStdOperatorTable.AS, field2, builder.makeLiteral("field")); + final RexNode aliasedStringLiteral = + builder.makeCall(SqlStdOperatorTable.AS, stringLiteral, builder.makeLiteral("query")); @Test void equals_generatesTermQuery() throws ExpressionNotAnalyzableException { @@ -289,6 +301,265 @@ void contains_generatesMatchQuery() throws ExpressionNotAnalyzableException { result.toString()); } + @Test + void matchRelevanceQueryFunction_generatesMatchQuery() throws ExpressionNotAnalyzableException { + List arguments = Arrays.asList(aliasedField2, aliasedStringLiteral); + RexNode call = + PPLFuncImpTable.INSTANCE.resolve(builder, "match", arguments.toArray(new RexNode[0])); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + assertInstanceOf(MatchQueryBuilder.class, result); + assertEquals( + """ + { + "match" : { + "b" : { + "query" : "Hi", + "operator" : "OR", + "prefix_length" : 0, + "max_expansions" : 50, + "fuzzy_transpositions" : true, + "lenient" : false, + "zero_terms_query" : "NONE", + "auto_generate_synonyms_phrase_query" : true, + "boost" : 1.0 + } + } + }""", + result.toString()); + } + + @Test + void matchPhraseRelevanceQueryFunction_generatesMatchPhraseQuery() + throws ExpressionNotAnalyzableException { + List arguments = + Arrays.asList( + aliasedField2, + aliasedStringLiteral, + builder.makeCall( + SqlStdOperatorTable.AS, builder.makeLiteral("2"), builder.makeLiteral("slop"))); + RexNode call = + PPLFuncImpTable.INSTANCE.resolve( + builder, "match_phrase", arguments.toArray(new RexNode[0])); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + assertInstanceOf(MatchPhraseQueryBuilder.class, result); + assertEquals( + """ + { + "match_phrase" : { + "b" : { + "query" : "Hi", + "slop" : 2, + "zero_terms_query" : "NONE", + "boost" : 1.0 + } + } + }""", + result.toString()); + } + + @Test + void matchBoolPrefixRelevanceQueryFunction_generatesMatchBoolPrefixQuery() + throws ExpressionNotAnalyzableException { + List arguments = + Arrays.asList( + aliasedField2, + aliasedStringLiteral, + builder.makeCall( + SqlStdOperatorTable.AS, + builder.makeLiteral("1"), + builder.makeLiteral("minimum_should_match"))); + RexNode call = + PPLFuncImpTable.INSTANCE.resolve( + builder, "match_bool_prefix", arguments.toArray(new RexNode[0])); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + assertInstanceOf(MatchBoolPrefixQueryBuilder.class, result); + assertEquals( + """ + { + "match_bool_prefix" : { + "b" : { + "query" : "Hi", + "operator" : "OR", + "minimum_should_match" : "1", + "prefix_length" : 0, + "max_expansions" : 50, + "fuzzy_transpositions" : true, + "boost" : 1.0 + } + } + }""", + result.toString()); + } + + @Test + void matchPhrasePrefixRelevanceQueryFunction_generatesMatchPhrasePrefixQuery() + throws ExpressionNotAnalyzableException { + List arguments = + Arrays.asList( + aliasedField2, + aliasedStringLiteral, + builder.makeCall( + SqlStdOperatorTable.AS, + builder.makeLiteral("standard"), + builder.makeLiteral("analyzer"))); + RexNode call = + PPLFuncImpTable.INSTANCE.resolve( + builder, "match_phrase_prefix", arguments.toArray(new RexNode[0])); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + assertInstanceOf(MatchPhrasePrefixQueryBuilder.class, result); + assertEquals( + """ + { + "match_phrase_prefix" : { + "b" : { + "query" : "Hi", + "analyzer" : "standard", + "slop" : 0, + "max_expansions" : 50, + "zero_terms_query" : "NONE", + "boost" : 1.0 + } + } + }""", + result.toString()); + } + + @Test + void queryStringRelevanceQueryFunction_generatesQueryStringQuery() + throws ExpressionNotAnalyzableException { + List arguments = + Arrays.asList( + builder.makeCall( + SqlStdOperatorTable.AS, + builder.makeCall( + SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR, + builder.makeLiteral("b"), + builder.makeLiteral( + 1.0, builder.getTypeFactory().createSqlType(SqlTypeName.DOUBLE), true), + builder.makeLiteral("c"), + builder.makeLiteral( + 2.5, builder.getTypeFactory().createSqlType(SqlTypeName.DOUBLE), true)), + builder.makeLiteral("fields")), + aliasedStringLiteral, + builder.makeCall( + SqlStdOperatorTable.AS, + builder.makeLiteral("1"), + builder.makeLiteral("fuzziness"))); + RexNode call = + PPLFuncImpTable.INSTANCE.resolve( + builder, "query_string", arguments.toArray(new RexNode[0])); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + assertInstanceOf(QueryStringQueryBuilder.class, result); + assertEquals( + """ + { + "query_string" : { + "query" : "Hi", + "fields" : [ + "b^1.0", + "c^2.5" + ], + "type" : "best_fields", + "default_operator" : "or", + "max_determinized_states" : 10000, + "enable_position_increments" : true, + "fuzziness" : "1", + "fuzzy_prefix_length" : 0, + "fuzzy_max_expansions" : 50, + "phrase_slop" : 0, + "escape" : false, + "auto_generate_synonyms_phrase_query" : true, + "fuzzy_transpositions" : true, + "boost" : 1.0 + } + }""", + result.toString()); + } + + @Test + void simpleQueryStringRelevanceQueryFunction_generatesSimpleQueryStringQuery() + throws ExpressionNotAnalyzableException { + List arguments = + Arrays.asList( + builder.makeCall( + SqlStdOperatorTable.AS, + builder.makeCall( + SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR, + builder.makeLiteral("b*"), + builder.makeLiteral( + 1.0, builder.getTypeFactory().createSqlType(SqlTypeName.DOUBLE), true)), + builder.makeLiteral("fields")), + aliasedStringLiteral); + RexNode call = + PPLFuncImpTable.INSTANCE.resolve( + builder, "simple_query_string", arguments.toArray(new RexNode[0])); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + assertInstanceOf(SimpleQueryStringBuilder.class, result); + assertEquals( + """ + { + "simple_query_string" : { + "query" : "Hi", + "fields" : [ + "b*^1.0" + ], + "flags" : -1, + "default_operator" : "or", + "analyze_wildcard" : false, + "auto_generate_synonyms_phrase_query" : true, + "fuzzy_prefix_length" : 0, + "fuzzy_max_expansions" : 50, + "fuzzy_transpositions" : true, + "boost" : 1.0 + } + }""", + result.toString()); + } + + @Test + void multiMatchRelevanceQueryFunction_generatesMultiMatchQuery() + throws ExpressionNotAnalyzableException { + List arguments = + Arrays.asList( + builder.makeCall( + SqlStdOperatorTable.AS, + builder.makeCall( + SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR, + builder.makeLiteral("b*"), + builder.makeLiteral( + 1.0, builder.getTypeFactory().createSqlType(SqlTypeName.DOUBLE), true)), + builder.makeLiteral("fields")), + aliasedStringLiteral, + builder.makeCall( + SqlStdOperatorTable.AS, + builder.makeLiteral("25"), + builder.makeLiteral("max_expansions"))); + RexNode call = + PPLFuncImpTable.INSTANCE.resolve(builder, "multi_match", arguments.toArray(new RexNode[0])); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + assertInstanceOf(MultiMatchQueryBuilder.class, result); + assertEquals( + """ + { + "multi_match" : { + "query" : "Hi", + "fields" : [ + "b*^1.0" + ], + "type" : "best_fields", + "operator" : "OR", + "slop" : 0, + "prefix_length" : 0, + "max_expansions" : 25, + "zero_terms_query" : "NONE", + "auto_generate_synonyms_phrase_query" : true, + "fuzzy_transpositions" : true, + "boost" : 1.0 + } + }""", + result.toString()); + } + @Test void andOrNot_generatesCompoundQuery() throws ExpressionNotAnalyzableException { RexNode call1 = builder.makeCall(SqlStdOperatorTable.EQUALS, field1, numericLiteral); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java index 8b4649a4473..bec11a22ac5 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java @@ -39,7 +39,7 @@ public void testTakeAgg() { String expectedLogical = "" + "LogicalAggregate(group=[{}], c=[TAKE($0, $1)])\n" - + " LogicalProject(JOB=[$2], $f1=[2])\n" + + " LogicalProject(JOB=[$2], size=[2])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = "c=[CLERK, SALESMAN]\n";