Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,6 @@ void populate() {
registerOperator(REGEXP, SqlLibraryOperators.REGEXP);
registerOperator(CONCAT, SqlLibraryOperators.CONCAT_FUNCTION);
registerOperator(CONCAT_WS, SqlLibraryOperators.CONCAT_WS);
registerOperator(LIKE, SqlLibraryOperators.ILIKE);
registerOperator(CONCAT_WS, SqlLibraryOperators.CONCAT_WS);
registerOperator(REVERSE, SqlLibraryOperators.REVERSE);
registerOperator(RIGHT, SqlLibraryOperators.RIGHT);
Expand Down Expand Up @@ -992,6 +991,18 @@ void populate() {
builder.makeLiteral(" "),
arg))),
PPLTypeChecker.family(SqlTypeFamily.ANY)));
register(
LIKE,
createFunctionImpWithTypeChecker(
(builder, arg1, arg2) ->
builder.makeCall(
SqlLibraryOperators.ILIKE,
arg1,
arg2,
// TODO: Figure out escaping solution. '\\' is used for JSON input but is not
// necessary for SQL function input
builder.makeLiteral("\\")),
PPLTypeChecker.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING)));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,66 +6,23 @@
package org.opensearch.sql.calcite.remote;

import java.io.IOException;
import org.junit.Ignore;
import org.junit.Test;
import org.opensearch.sql.ppl.LikeQueryIT;

// TODO Like function behaviour in V2 is not correct. Remove when it was fixed in V2.
public class CalciteLikeQueryIT extends LikeQueryIT {
@Override
public void init() throws Exception {
super.init();
enableCalcite();
// TODO: "https://github.com/opensearch-project/sql/issues/3428"
// disallowCalciteFallback();
disallowCalciteFallback();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_like_with_escaped_percent() throws IOException, IOException {
super.test_like_with_escaped_percent();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_like_in_where_with_escaped_underscore() throws IOException {
super.test_like_in_where_with_escaped_underscore();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_like_on_text_field_with_one_word() throws IOException {
super.test_like_on_text_field_with_one_word();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_like_on_text_keyword_field_with_one_word() throws IOException {
super.test_like_on_text_keyword_field_with_one_word();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_like_on_text_keyword_field_with_greater_than_one_word() throws IOException {
super.test_like_on_text_keyword_field_with_greater_than_one_word();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_like_on_text_field_with_greater_than_one_word() throws IOException {
super.test_like_on_text_field_with_greater_than_one_word();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_convert_field_text_to_keyword() throws IOException {
super.test_convert_field_text_to_keyword();
// This test case is not applied to no pushdown case
if (isPushdownEnabled()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Minor] Change to use Assume to skip the test if isPushdownEnabled is false.

Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled());

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

super.test_convert_field_text_to_keyword();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

package org.opensearch.sql.calcite.remote;

import java.io.IOException;
import org.opensearch.sql.ppl.WhereCommandIT;

public class CalciteWhereCommandIT extends WhereCommandIT {
Expand All @@ -16,19 +15,6 @@ public void init() throws Exception {
disallowCalciteFallback();
}

@Override
public void testIsNotNullFunction() throws IOException {
withFallbackEnabled(
() -> {
try {
super.testIsNotNullFunction();
} catch (IOException e) {
throw new RuntimeException(e);
}
},
"https://github.com/opensearch-project/sql/issues/3428");
}

@Override
protected String getIncompatibleTypeErrMsg() {
return "In expression types are incompatible: fields type LONG, values type [INTEGER, INTEGER,"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,15 @@ public void testMultiFieldsRelevanceQueryFunctionExplain() throws IOException {
+ " default_operator='or', analyzer=english)"));
}

@Test
public void testLikeFunctionExplain() throws IOException {
String expected = loadExpectedPlan("explain_like_function.json");
assertJsonEqualsIgnoreId(
expected,
explainQueryToString(
"source=opensearch-sql_test_index_account | where like(email, '%gmail%')"));
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the email a keyword or not? please add two tests for

  • keyword (e.g. firstname)
  • text (e.g. address)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


@Ignore("The serialized string is unstable because of function properties")
@Test
public void testFilterScriptPushDownExplain() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,17 @@ public void test_like_in_where_with_escaped_underscore() throws IOException {
@Test
public void test_like_on_text_field_with_one_word() throws IOException {
String query =
"source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test*') | fields TextBody";
"source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test%') | fields TextBody";
JSONObject result = executeQuery(query);
assertEquals(9, result.getInt("total"));
assertEquals(8, result.getInt("total"));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question:
With the patch, both in v2 and v3, predicate WHERE Like(TextKeywordBody, 'test%') will trigger wildcard query pushdown but predicate WHERE Like(TextBody, 'test%') won't trigger any pushdown. right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes

}

@Test
public void test_like_on_text_keyword_field_with_one_word() throws IOException {
String query =
"source="
+ TEST_INDEX_WILDCARD
+ " | WHERE Like(TextKeywordBody, 'test*') | fields TextKeywordBody";
+ " | WHERE Like(TextKeywordBody, 'test%') | fields TextKeywordBody";
JSONObject result = executeQuery(query);
assertEquals(8, result.getInt("total"));
}
Expand All @@ -82,17 +82,17 @@ public void test_like_on_text_keyword_field_with_greater_than_one_word() throws
String query =
"source="
+ TEST_INDEX_WILDCARD
+ " | WHERE Like(TextKeywordBody, 'test wild*') | fields TextKeywordBody";
+ " | WHERE Like(TextKeywordBody, 'test wild%') | fields TextKeywordBody";
JSONObject result = executeQuery(query);
assertEquals(7, result.getInt("total"));
}

@Test
public void test_like_on_text_field_with_greater_than_one_word() throws IOException {
String query =
"source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild*') | fields TextBody";
"source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild%') | fields TextBody";
JSONObject result = executeQuery(query);
assertEquals(0, result.getInt("total"));
assertEquals(7, result.getInt("total"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ public void testLikeFunction() throws IOException {
verifyDataRows(result, rows("Amber"));
}

@Test
public void testLikeFunctionNoHit() throws IOException {
JSONObject result =
executeQuery(
String.format(
"source=%s | where like(firstname, 'Duk_') | fields lastname",
TEST_INDEX_BANK_WITH_NULL_VALUES));
assertEquals(0, result.getInt("total"));
}

@Test
public void testIsNullFunction() throws IOException {
JSONObject result =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ public void test_like_in_where_with_escaped_underscore() throws IOException {

@Test
public void test_like_on_text_field_with_one_word() throws IOException {
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextBody LIKE 'test*'";
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextBody LIKE 'test%'";
JSONObject result = executeJdbcRequest(query);
assertEquals(9, result.getInt("total"));
assertEquals(8, result.getInt("total"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"calcite": {
"logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($9, '%gmail%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
"physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->ILIKE($9, '%gmail%':VARCHAR, '\\')], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"wildcard\":{\"email.keyword\":{\"wildcard\":\"*gmail*\",\"case_insensitive\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"calcite": {
"logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($9, '%gmail%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
"physical": "EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%gmail%':VARCHAR], expr#18=['\\'], expr#19=[ILIKE($t9, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"root": {
"name": "ProjectOperator",
"description": {
"fields": "[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]"
},
"children": [{
"name": "OpenSearchIndexScan",
"description": {
"request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"wildcard\":{\"email.keyword\":{\"wildcard\":\"*gmail*\",\"case_insensitive\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)"
},
"children": []
}]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import static org.opensearch.index.query.QueryBuilders.regexpQuery;
import static org.opensearch.index.query.QueryBuilders.termQuery;
import static org.opensearch.index.query.QueryBuilders.termsQuery;
import static org.opensearch.index.query.QueryBuilders.wildcardQuery;
import static org.opensearch.script.Script.DEFAULT_SCRIPT_TYPE;
import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.MULTI_FIELDS_RELEVANCE_FUNCTION_SET;
import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.SINGLE_FIELD_RELEVANCE_FUNCTION_SET;
Expand Down Expand Up @@ -93,6 +94,7 @@
import org.opensearch.sql.opensearch.storage.script.CalciteScriptEngine.ReferenceFieldVisitor;
import org.opensearch.sql.opensearch.storage.script.CalciteScriptEngine.UnsupportedScriptException;
import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine.ScriptEngineType;
import org.opensearch.sql.opensearch.storage.script.StringUtils;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchBoolPrefixQuery;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhrasePrefixQuery;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhraseQuery;
Expand Down Expand Up @@ -325,7 +327,8 @@ public Expression visitCall(RexCall call) {
case SPECIAL:
return switch (call.getKind()) {
case CAST -> toCastExpression(call);
case LIKE, CONTAINS -> binary(call);
case CONTAINS -> binary(call);
case LIKE -> like(call);
default -> {
String message = format(Locale.ROOT, "Unsupported call: [%s]", call);
throw new PredicateAnalyzerException(message);
Expand Down Expand Up @@ -533,8 +536,6 @@ private QueryExpression binary(RexCall call) {
switch (call.getKind()) {
case CONTAINS:
return QueryExpression.create(pair.getKey()).contains(pair.getValue());
case LIKE:
throw new UnsupportedOperationException("LIKE not yet supported");
case EQUALS:
return QueryExpression.create(pair.getKey()).equals(pair.getValue());
case NOT_EQUALS:
Expand Down Expand Up @@ -580,6 +581,16 @@ private QueryExpression binary(RexCall call) {
throw new PredicateAnalyzerException(message);
}

private QueryExpression like(RexCall call) {
// The third default escape is not used here. It's handled by
// StringUtils.convertSqlWildcardToLucene
checkState(call.getOperands().size() == 3);
final Expression a = call.getOperands().get(0).accept(this);
final Expression b = call.getOperands().get(1).accept(this);
final SwapResult pair = swap(a, b);
return QueryExpression.create(pair.getKey()).like(pair.getValue());
}

private static QueryExpression constructQueryExpressionForSearch(
RexCall call, SwapResult pair) {
if (isSearchWithComplementedPoints(call)) {
Expand Down Expand Up @@ -1137,10 +1148,24 @@ public QueryExpression notExists() {
return this;
}

/*
* Prefer to run wildcard query for keyword type field. For text type field, it doesn't support
* cross term match because OpenSearch internally break text to multiple terms and apply wildcard
* matching one by one, which is not same behavior with regular like function without pushdown.
*/
@Override
public QueryExpression like(LiteralExpression literal) {
builder = regexpQuery(getFieldReference(), literal.stringValue());
return this;
String fieldName = getFieldReference();
String keywordField = OpenSearchTextType.toKeywordSubField(fieldName, this.rel.getExprType());
boolean isKeywordField = keywordField != null;
if (isKeywordField) {
builder =
wildcardQuery(
keywordField, StringUtils.convertSqlWildcardToLucene(literal.stringValue()))
.caseInsensitive(true);
return this;
}
throw new UnsupportedOperationException("Like query is not supported for text field");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Text field can also be push down as script, Can we track it as enhancement issue?

And add a Notes to explain current limitation on Text field support in LIKE.

###
PUT {{baseUrl}}/demo-index
Content-Type: application/x-ndjson

{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
  },
  "mappings": {
    "properties": {
      "region": { "type": "text" },
      "sales": { "type": "double" }
    }
  }
}

###
POST {{baseUrl}}/demo-index/_bulk
Content-Type: application/x-ndjson

{ "index": {} }
{ "region": "us-east", "sales": 100.12 }
{ "index": {} }
{ "region": "us-east", "sales": 200.34 }
{ "index": {} }
{ "region": "us-west", "sales": 300.65 }
{ "index": {} }
{ "region": "us-west", "sales": 400.78 }

###
POST {{baseUrl}}/demo-index/_search
Content-Type: application/x-ndjson

{
  "derived": {
    "region_runtime": {
      "type": "keyword",
      "script": {
        "source": "emit(params._source['region'])"
      }
    }
  },
  "query": {
    "term": {
      "region_runtime": {
        "value": "us-east"
      }
    }
  }
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added limitation to LIKE function doc.

I tried pushdown like function script for text field. However, getting ScriptDocValues for text field throws exception to not recommend to do it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, text data does not have doc values, we should use source, e.g. params._source['region']. Let's track it in issue, not high priority.

Copy link
Contributor Author

@songkant-aws songkant-aws Jul 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. Created a tracking issue: #3950. Hopefully, we can find a way to read script values from source.

}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@
import org.opensearch.index.query.WildcardQueryBuilder;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.expression.FunctionExpression;
import org.opensearch.sql.expression.LiteralExpression;
import org.opensearch.sql.expression.ReferenceExpression;
import org.opensearch.sql.opensearch.data.type.OpenSearchTextType;
import org.opensearch.sql.opensearch.storage.script.StringUtils;
import org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder.ScriptQueryUnSupportedException;

public class LikeQuery extends LuceneQuery {
@Override
Expand All @@ -29,4 +33,32 @@ protected WildcardQueryBuilder createBuilder(String field, String query) {
String matchText = StringUtils.convertSqlWildcardToLucene(query);
return QueryBuilders.wildcardQuery(field, matchText).caseInsensitive(true);
}

/**
* Verify if the function supports like/wildcard query. Prefer to run wildcard query for keyword
* type field. For text type field, it doesn't support cross term match because OpenSearch
* internally break text to multiple terms and apply wildcard matching one by one, which is not
* same behavior with regular like function without pushdown.
*
* @param func function Input function expression
* @return boolean
*/
@Override
public boolean canSupport(FunctionExpression func) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it for solving existing bug?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, DSL wildcard matching for text field has different behavior with SQL like function because of tokenization.

if (func.getArguments().size() == 2
&& (func.getArguments().get(0) instanceof ReferenceExpression)
&& (func.getArguments().get(1) instanceof LiteralExpression
|| literalExpressionWrappedByCast(func))) {
ReferenceExpression ref = (ReferenceExpression) func.getArguments().get(0);
// Only support keyword type field
if (OpenSearchTextType.toKeywordSubField(ref.getRawPath(), ref.getType()) != null) {
return true;
} else {
// Script pushdown is not supported for text type field
throw new ScriptQueryUnSupportedException(
"text field wildcard doesn't support script query");
}
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ private boolean isMultiParameterQuery(FunctionExpression func) {
/**
* Check if the second argument of the function is a literal expression wrapped by cast function.
*/
private boolean literalExpressionWrappedByCast(FunctionExpression func) {
protected boolean literalExpressionWrappedByCast(FunctionExpression func) {
if (func.getArguments().get(1) instanceof FunctionExpression) {
FunctionExpression expr = (FunctionExpression) func.getArguments().get(1);
return castMap.containsKey(expr.getFunctionName())
Expand Down
Loading
Loading