From 1f43dbadc70a472b8a3e96fb41ac753b23386988 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Tue, 15 Jul 2025 17:48:43 +0800 Subject: [PATCH 1/2] [BugFix] Fix incorrect push down for Sarg with nullAs is TRUE Signed-off-by: Heng Qian --- .../rest-api-spec/test/issues/3477.yml | 2 + .../rest-api-spec/test/issues/3881.yml | 58 ++++++++++++ .../opensearch/request/PredicateAnalyzer.java | 94 ++++++++++++------- 3 files changed, 118 insertions(+), 36 deletions(-) create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3881.yml diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3477.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3477.yml index 819e367493c..8f5bdc17bc5 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3477.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3477.yml @@ -49,6 +49,8 @@ teardown: - match: {"datarows": [[{ "json" : { "status": "SUCCESS", "time": 100} }], [{ "json" : { "status": "SUCCESS", "time": 100} }], [{ "json" : { "status": "SUCCESS", "time": 100} }], [{ "json" : { "status": "SUCCESS", "time": 100} }], [{ "json" : { "status": "SUCCESS", "time": 100} }]]} - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' headers: Content-Type: 'application/json' ppl: diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3881.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3881.yml new file mode 100644 index 00000000000..ec997fa935e --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3881.yml @@ -0,0 +1,58 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + plugins.calcite.fallback.allowed : false + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + plugins.calcite.fallback.allowed : true + +--- +"Handle flattened document value": + - skip: + features: + - headers + - allowed_warnings + - do: + bulk: + index: test + refresh: true + body: + - '{"index": {}}' + - '{"a": 1}' + - '{"index": {}}' + - '{"a": 2}' + - '{"index": {}}' + - '{}' + + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'source=test | where a = 1 or a = 2 or isNull(a)' + - match: {"total": 3} + - match: {"schema": [{"name": "a", "type": "bigint"}]} + - match: {"datarows": [[1], [2], [null]]} + + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'source=test | where (a = 1 or a = 2) and isNotNull(a)' + - match: { "total": 2 } + - match: { "schema": [ { "name": "a", "type": "bigint" } ] } + - match: {"datarows": [[1], [2]]} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 81aa41721eb..0ef7192f15d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -58,6 +58,7 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUnknownAs; import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; @@ -74,7 +75,7 @@ import org.opensearch.index.query.RangeQueryBuilder; import org.opensearch.sql.calcite.plan.OpenSearchConstants; import org.opensearch.sql.calcite.type.ExprSqlType; -import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; import org.opensearch.sql.data.model.ExprTimestampValue; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; @@ -257,6 +258,12 @@ static boolean isSearchWithComplementedPoints(RexCall search) { return sarg.isComplementedPoints(); } + static RexUnknownAs getNullAsForSearch(RexCall search) { + RexLiteral literal = (RexLiteral) search.getOperands().get(1); + final Sarg sarg = requireNonNull(literal.getValueAs(Sarg.class), "Sarg"); + return sarg.nullAs; + } + @Override public Expression visitCall(RexCall call) { @@ -326,7 +333,7 @@ private QueryExpression visitRelevanceFunc(RexCall call) { } throw new PredicateAnalyzerException( - String.format(Locale.ROOT, "Unsupported search relevance function: [%s]", funcName)); + format(Locale.ROOT, "Unsupported search relevance function: [%s]", funcName)); } @FunctionalInterface @@ -371,7 +378,7 @@ private Map parseRelevanceFunctionOptionalArguments( String key = ((RexLiteral) aliasPair.alias).getValueAs(String.class); if (optionalArguments.containsKey(key)) { throw new PredicateAnalyzerException( - String.format( + format( Locale.ROOT, "Parameter '%s' can only be specified once for function [%s].", key, @@ -386,7 +393,7 @@ private Map parseRelevanceFunctionOptionalArguments( private static RexCall expectCall(RexNode node, SqlOperator op, String funcName) { if (!(node instanceof RexCall call) || call.getOperator() != op) { throw new IllegalArgumentException( - String.format( + format( Locale.ROOT, "Expect [%s] RexCall but get [%s] for function [%s]", op.getName(), @@ -508,31 +515,19 @@ private QueryExpression binary(RexCall call) { } return QueryExpression.create(pair.getKey()).lte(pair.getValue()); case SEARCH: - if (isSearchWithComplementedPoints(call)) { - return QueryExpression.create(pair.getKey()).notIn(pair.getValue()); - } else if (isSearchWithPoints(call)) { - return QueryExpression.create(pair.getKey()).in(pair.getValue()); - } else { - Sarg sarg = pair.getValue().literal.getValueAs(Sarg.class); - Set> rangeSet = requireNonNull(sarg).rangeSet.asRanges(); - boolean isTimeStamp = - (pair.getKey() instanceof NamedFieldExpression namedField) - && namedField.isTimeStampType(); - List queryExpressions = - rangeSet.stream() - .map( - range -> - RangeSets.isPoint(range) - ? QueryExpression.create(pair.getKey()) - .equals(sargPointValue(range.lowerEndpoint()), isTimeStamp) - : QueryExpression.create(pair.getKey()).between(range, isTimeStamp)) - .toList(); - if (queryExpressions.size() == 1) { - return queryExpressions.getFirst(); - } else { - return CompoundQueryExpression.or(queryExpressions.toArray(new QueryExpression[0])); - } - } + QueryExpression expression = constructQueryExpressionForSearch(call, pair); + RexUnknownAs nullAs = getNullAsForSearch(call); + return switch (nullAs) { + // e.g. where isNotNull(a) and (a = 1 or a = 2) + // TODO: For this case, seems return `expression` should be equivalent + case FALSE -> CompoundQueryExpression.and( + false, expression, QueryExpression.create(pair.getKey()).exists()); + // e.g. where isNull(a) or a = 1 or a = 2 + case TRUE -> CompoundQueryExpression.or( + expression, QueryExpression.create(pair.getKey()).notExists()); + // e.g. where a = 1 or a = 2 + case UNKNOWN -> expression; + }; default: break; } @@ -540,6 +535,35 @@ private QueryExpression binary(RexCall call) { throw new PredicateAnalyzerException(message); } + private static QueryExpression constructQueryExpressionForSearch( + RexCall call, SwapResult pair) { + if (isSearchWithComplementedPoints(call)) { + return QueryExpression.create(pair.getKey()).notIn(pair.getValue()); + } else if (isSearchWithPoints(call)) { + return QueryExpression.create(pair.getKey()).in(pair.getValue()); + } else { + Sarg sarg = pair.getValue().literal.getValueAs(Sarg.class); + Set> rangeSet = requireNonNull(sarg).rangeSet.asRanges(); + boolean isTimeStamp = + (pair.getKey() instanceof NamedFieldExpression namedField) + && namedField.isTimeStampType(); + List queryExpressions = + rangeSet.stream() + .map( + range -> + RangeSets.isPoint(range) + ? QueryExpression.create(pair.getKey()) + .equals(sargPointValue(range.lowerEndpoint()), isTimeStamp) + : QueryExpression.create(pair.getKey()).between(range, isTimeStamp)) + .toList(); + if (queryExpressions.size() == 1) { + return queryExpressions.getFirst(); + } else { + return CompoundQueryExpression.or(queryExpressions.toArray(new QueryExpression[0])); + } + } + } + private QueryExpression andOr(RexCall call) { QueryExpression[] expressions = new QueryExpression[call.getOperands().size()]; PredicateAnalyzerException firstError = null; @@ -717,13 +741,11 @@ public boolean isPartial() { public abstract QueryExpression notIn(LiteralExpression literal); public QueryExpression between(Range literal, boolean isTimeStamp) { - throw new PredicateAnalyzer.PredicateAnalyzerException( - "between cannot be applied to " + this.getClass()); + throw new PredicateAnalyzerException("between cannot be applied to " + this.getClass()); } public QueryExpression equals(Object point, boolean isTimeStamp) { - throw new PredicateAnalyzer.PredicateAnalyzerException( - "equals cannot be applied to " + this.getClass()); + throw new PredicateAnalyzerException("equals cannot be applied to " + this.getClass()); } public abstract QueryExpression notEquals(LiteralExpression literal); @@ -767,7 +789,7 @@ public static QueryExpression create(TerminalExpression expression) { return new SimpleQueryExpression((NamedFieldExpression) expression); } else { String message = format(Locale.ROOT, "Unsupported expression: [%s]", expression); - throw new PredicateAnalyzer.PredicateAnalyzerException(message); + throw new PredicateAnalyzerException(message); } } } @@ -1368,7 +1390,7 @@ public boolean isSarg() { public boolean isTimestamp() { if (literal.getType() instanceof ExprSqlType exprSqlType) { - return exprSqlType.getUdt() == OpenSearchTypeFactory.ExprUDT.EXPR_TIMESTAMP; + return exprSqlType.getUdt() == ExprUDT.EXPR_TIMESTAMP; } return false; } @@ -1439,7 +1461,7 @@ private static void checkForIncompatibleDateTimeOperands(RexCall call) { || (SqlTypeFamily.TIMESTAMP.contains(op2) && !SqlTypeFamily.TIMESTAMP.contains(op1)) || (SqlTypeFamily.TIME.contains(op1) && !SqlTypeFamily.TIME.contains(op2)) || (SqlTypeFamily.TIME.contains(op2) && !SqlTypeFamily.TIME.contains(op1))) { - throw new PredicateAnalyzer.PredicateAnalyzerException( + throw new PredicateAnalyzerException( "Cannot handle " + call.getKind() + " expression for _id field, " + call); } } From d31c01b9f4d095d610584a50a95d382c0ae7f2ef Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Wed, 16 Jul 2025 10:31:00 +0800 Subject: [PATCH 2/2] Add allow warning for 3570.xml Signed-off-by: Heng Qian --- .../yamlRestTest/resources/rest-api-spec/test/issues/3570.yml | 4 ++++ .../yamlRestTest/resources/rest-api-spec/test/issues/3881.yml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3570.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3570.yml index d52a4c35d67..3176817123c 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3570.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3570.yml @@ -101,7 +101,11 @@ teardown: - skip: features: - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' headers: Content-Type: 'application/json' ppl: diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3881.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3881.yml index ec997fa935e..d9ced656493 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3881.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3881.yml @@ -16,7 +16,7 @@ teardown: plugins.calcite.fallback.allowed : true --- -"Handle flattened document value": +"Handle sag with nullAs": - skip: features: - headers