-
Notifications
You must be signed in to change notification settings - Fork 180
Convert like function call to wildcard query for Calcite filter pushdown #3915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
44a6b48
aaa83f2
381c4d7
0ddb5b2
2f2bb48
698c8ce
a7ca28f
ea9b43e
b457cb8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -435,6 +435,15 @@ public void testMultiFieldsRelevanceQueryFunctionExplain() throws IOException { | |
| + " default_operator='or', analyzer=english)")); | ||
| } | ||
|
|
||
| @Test | ||
| public void testLikeFunctionExplain() throws IOException { | ||
| String expected = loadExpectedPlan("explain_like_function.json"); | ||
| assertJsonEqualsIgnoreId( | ||
| expected, | ||
| explainQueryToString( | ||
| "source=opensearch-sql_test_index_account | where like(email, '%gmail%')")); | ||
| } | ||
|
||
|
|
||
| @Ignore("The serialized string is unstable because of function properties") | ||
| @Test | ||
| public void testFilterScriptPushDownExplain() throws Exception { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,17 +62,17 @@ public void test_like_in_where_with_escaped_underscore() throws IOException { | |
| @Test | ||
| public void test_like_on_text_field_with_one_word() throws IOException { | ||
| String query = | ||
| "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test*') | fields TextBody"; | ||
| "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test%') | fields TextBody"; | ||
| JSONObject result = executeQuery(query); | ||
| assertEquals(9, result.getInt("total")); | ||
| assertEquals(8, result.getInt("total")); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Question:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes |
||
| } | ||
|
|
||
| @Test | ||
| public void test_like_on_text_keyword_field_with_one_word() throws IOException { | ||
| String query = | ||
| "source=" | ||
| + TEST_INDEX_WILDCARD | ||
| + " | WHERE Like(TextKeywordBody, 'test*') | fields TextKeywordBody"; | ||
| + " | WHERE Like(TextKeywordBody, 'test%') | fields TextKeywordBody"; | ||
| JSONObject result = executeQuery(query); | ||
| assertEquals(8, result.getInt("total")); | ||
| } | ||
|
|
@@ -82,17 +82,17 @@ public void test_like_on_text_keyword_field_with_greater_than_one_word() throws | |
| String query = | ||
| "source=" | ||
| + TEST_INDEX_WILDCARD | ||
| + " | WHERE Like(TextKeywordBody, 'test wild*') | fields TextKeywordBody"; | ||
| + " | WHERE Like(TextKeywordBody, 'test wild%') | fields TextKeywordBody"; | ||
| JSONObject result = executeQuery(query); | ||
| assertEquals(7, result.getInt("total")); | ||
| } | ||
|
|
||
| @Test | ||
| public void test_like_on_text_field_with_greater_than_one_word() throws IOException { | ||
| String query = | ||
| "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild*') | fields TextBody"; | ||
| "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild%') | fields TextBody"; | ||
| JSONObject result = executeQuery(query); | ||
| assertEquals(0, result.getInt("total")); | ||
| assertEquals(7, result.getInt("total")); | ||
| } | ||
|
|
||
| @Test | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| { | ||
| "calcite": { | ||
| "logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($9, '%gmail%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", | ||
| "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->ILIKE($9, '%gmail%':VARCHAR, '\\')], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"wildcard\":{\"email.keyword\":{\"wildcard\":\"*gmail*\",\"case_insensitive\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| { | ||
| "calcite": { | ||
| "logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($9, '%gmail%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", | ||
| "physical": "EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%gmail%':VARCHAR], expr#18=['\\'], expr#19=[ILIKE($t9, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| { | ||
| "root": { | ||
| "name": "ProjectOperator", | ||
| "description": { | ||
| "fields": "[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]" | ||
| }, | ||
| "children": [{ | ||
| "name": "OpenSearchIndexScan", | ||
| "description": { | ||
| "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"wildcard\":{\"email.keyword\":{\"wildcard\":\"*gmail*\",\"case_insensitive\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" | ||
| }, | ||
| "children": [] | ||
| }] | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,6 +37,7 @@ | |
| import static org.opensearch.index.query.QueryBuilders.regexpQuery; | ||
| import static org.opensearch.index.query.QueryBuilders.termQuery; | ||
| import static org.opensearch.index.query.QueryBuilders.termsQuery; | ||
| import static org.opensearch.index.query.QueryBuilders.wildcardQuery; | ||
| import static org.opensearch.script.Script.DEFAULT_SCRIPT_TYPE; | ||
| import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.MULTI_FIELDS_RELEVANCE_FUNCTION_SET; | ||
| import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.SINGLE_FIELD_RELEVANCE_FUNCTION_SET; | ||
|
|
@@ -93,6 +94,7 @@ | |
| import org.opensearch.sql.opensearch.storage.script.CalciteScriptEngine.ReferenceFieldVisitor; | ||
| import org.opensearch.sql.opensearch.storage.script.CalciteScriptEngine.UnsupportedScriptException; | ||
| import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine.ScriptEngineType; | ||
| import org.opensearch.sql.opensearch.storage.script.StringUtils; | ||
| import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchBoolPrefixQuery; | ||
| import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhrasePrefixQuery; | ||
| import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhraseQuery; | ||
|
|
@@ -325,7 +327,8 @@ public Expression visitCall(RexCall call) { | |
| case SPECIAL: | ||
| return switch (call.getKind()) { | ||
| case CAST -> toCastExpression(call); | ||
| case LIKE, CONTAINS -> binary(call); | ||
| case CONTAINS -> binary(call); | ||
| case LIKE -> like(call); | ||
| default -> { | ||
| String message = format(Locale.ROOT, "Unsupported call: [%s]", call); | ||
| throw new PredicateAnalyzerException(message); | ||
|
|
@@ -533,8 +536,6 @@ private QueryExpression binary(RexCall call) { | |
| switch (call.getKind()) { | ||
| case CONTAINS: | ||
| return QueryExpression.create(pair.getKey()).contains(pair.getValue()); | ||
| case LIKE: | ||
| throw new UnsupportedOperationException("LIKE not yet supported"); | ||
| case EQUALS: | ||
| return QueryExpression.create(pair.getKey()).equals(pair.getValue()); | ||
| case NOT_EQUALS: | ||
|
|
@@ -580,6 +581,16 @@ private QueryExpression binary(RexCall call) { | |
| throw new PredicateAnalyzerException(message); | ||
| } | ||
|
|
||
| private QueryExpression like(RexCall call) { | ||
| // The third default escape is not used here. It's handled by | ||
| // StringUtils.convertSqlWildcardToLucene | ||
| checkState(call.getOperands().size() == 3); | ||
| final Expression a = call.getOperands().get(0).accept(this); | ||
| final Expression b = call.getOperands().get(1).accept(this); | ||
| final SwapResult pair = swap(a, b); | ||
| return QueryExpression.create(pair.getKey()).like(pair.getValue()); | ||
| } | ||
|
|
||
| private static QueryExpression constructQueryExpressionForSearch( | ||
| RexCall call, SwapResult pair) { | ||
| if (isSearchWithComplementedPoints(call)) { | ||
|
|
@@ -1137,10 +1148,24 @@ public QueryExpression notExists() { | |
| return this; | ||
| } | ||
|
|
||
| /* | ||
| * Prefer to run wildcard query for keyword type field. For text type field, it doesn't support | ||
| * cross term match because OpenSearch internally break text to multiple terms and apply wildcard | ||
| * matching one by one, which is not same behavior with regular like function without pushdown. | ||
| */ | ||
| @Override | ||
| public QueryExpression like(LiteralExpression literal) { | ||
| builder = regexpQuery(getFieldReference(), literal.stringValue()); | ||
| return this; | ||
| String fieldName = getFieldReference(); | ||
| String keywordField = OpenSearchTextType.toKeywordSubField(fieldName, this.rel.getExprType()); | ||
| boolean isKeywordField = keywordField != null; | ||
| if (isKeywordField) { | ||
| builder = | ||
| wildcardQuery( | ||
| keywordField, StringUtils.convertSqlWildcardToLucene(literal.stringValue())) | ||
| .caseInsensitive(true); | ||
| return this; | ||
| } | ||
| throw new UnsupportedOperationException("Like query is not supported for text field"); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Text field can also be push down as script, Can we track it as enhancement issue? And add a Notes to explain current limitation on Text field support in LIKE.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added limitation to LIKE function doc. I tried pushdown like function script for text field. However, getting ScriptDocValues for text field throws exception to not recommend to do it.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, text data does not have doc values, we should use source, e.g.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense. Created a tracking issue: #3950. Hopefully, we can find a way to read script values from source. |
||
| } | ||
|
|
||
| @Override | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,8 +10,12 @@ | |
| import org.opensearch.index.query.WildcardQueryBuilder; | ||
| import org.opensearch.sql.data.model.ExprValue; | ||
| import org.opensearch.sql.data.type.ExprType; | ||
| import org.opensearch.sql.expression.FunctionExpression; | ||
| import org.opensearch.sql.expression.LiteralExpression; | ||
| import org.opensearch.sql.expression.ReferenceExpression; | ||
| import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; | ||
| import org.opensearch.sql.opensearch.storage.script.StringUtils; | ||
| import org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder.ScriptQueryUnSupportedException; | ||
|
|
||
| public class LikeQuery extends LuceneQuery { | ||
| @Override | ||
|
|
@@ -29,4 +33,32 @@ protected WildcardQueryBuilder createBuilder(String field, String query) { | |
| String matchText = StringUtils.convertSqlWildcardToLucene(query); | ||
| return QueryBuilders.wildcardQuery(field, matchText).caseInsensitive(true); | ||
| } | ||
|
|
||
| /** | ||
| * Verify if the function supports like/wildcard query. Prefer to run wildcard query for keyword | ||
| * type field. For text type field, it doesn't support cross term match because OpenSearch | ||
| * internally break text to multiple terms and apply wildcard matching one by one, which is not | ||
| * same behavior with regular like function without pushdown. | ||
| * | ||
| * @param func function Input function expression | ||
| * @return boolean | ||
| */ | ||
| @Override | ||
| public boolean canSupport(FunctionExpression func) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it for solving existing bug?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, DSL wildcard matching for text field has different behavior with SQL like function because of tokenization. |
||
| if (func.getArguments().size() == 2 | ||
| && (func.getArguments().get(0) instanceof ReferenceExpression) | ||
| && (func.getArguments().get(1) instanceof LiteralExpression | ||
| || literalExpressionWrappedByCast(func))) { | ||
| ReferenceExpression ref = (ReferenceExpression) func.getArguments().get(0); | ||
| // Only support keyword type field | ||
| if (OpenSearchTextType.toKeywordSubField(ref.getRawPath(), ref.getType()) != null) { | ||
| return true; | ||
| } else { | ||
| // Script pushdown is not supported for text type field | ||
| throw new ScriptQueryUnSupportedException( | ||
| "text field wildcard doesn't support script query"); | ||
| } | ||
| } | ||
| return false; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[Minor] Change to use
Assumeto skip the test ifisPushdownEnabledis false.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done