diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index c57b33ab6d..0d4d0717b5 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2444,4 +2444,57 @@ public void testAggFilterOnNestedFields() throws IOException { "source=%s | stats count(eval(author.name < 'K')) as george_and_jk", TEST_INDEX_CASCADED_NESTED))); } + + // Only for Calcite - Test for issue #5054: query_string combined with boolean comparison + // This mimics the query pattern: "source=test url=http | where is_internal=true" + // where query_string search is combined with boolean field comparison. + @Test + public void testFilterQueryStringWithBooleanFieldPushDown() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + // Verifies that query_string combined with boolean field comparison produces pure DSL query + // without embedded script. The boolean comparison should be pushed down as a term query. + String query = + StringUtils.format( + "source=%s firstname=Amber | where male = true | fields firstname", TEST_INDEX_BANK); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testFilterBooleanFieldWithTRUE() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + // Test boolean literal with uppercase TRUE + String query = + StringUtils.format( + "source=%s firstname=Amber | where male = TRUE | fields firstname", TEST_INDEX_BANK); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testFilterBooleanFieldWithStringLiteral() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + // Test boolean field with string literal 'TRUE' - Calcite converts to boolean true + // and generates same term query as boolean literal + String query = + StringUtils.format( + "source=%s firstname=Amber | where male = 'TRUE' | fields firstname", TEST_INDEX_BANK); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testFilterBooleanFieldFalse() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + // Test boolean field with false - generates term query with false value + String query = + StringUtils.format( + "source=%s firstname=Amber | where male = false | fields firstname", TEST_INDEX_BANK); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_false.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml new file mode 100644 index 0000000000..78ae3956b4 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(firstname=[$1]) + LogicalFilter(condition=[$12]) + LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), $1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml new file mode 100644 index 0000000000..422c776914 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(firstname=[$1]) + LogicalFilter(condition=[NOT($12)]) + LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), NOT($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":false,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5054.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5054.yml new file mode 100644 index 0000000000..bc19974972 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5054.yml @@ -0,0 +1,55 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Fix boolean field comparison pushdown": + - skip: + features: + - headers + - allowed_warnings + - do: + indices.create: + index: test-boolean + body: + mappings: + properties: + is_internal: + type: boolean + name: + type: keyword + + - do: + bulk: + index: test-boolean + refresh: true + body: + - '{"index": {}}' + - '{ "is_internal": true, "name": "doc1" }' + - '{"index": {}}' + - '{ "is_internal": false, "name": "doc2" }' + - '{"index": {}}' + - '{ "is_internal": true, "name": "doc3" }' + + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test-boolean | where is_internal=true | fields name + + - match: { total: 2 } + - length: { datarows: 2 } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 355262b2d6..48e73aee4a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -225,7 +225,13 @@ public static QueryExpression analyzeExpression( requireNonNull(expression, "expression"); try { // visits expression tree - QueryExpression queryExpression = (QueryExpression) expression.accept(visitor); + Expression result = expression.accept(visitor); + // When a boolean field is used directly as a filter condition (e.g., `where male` after + // Calcite simplifies `where male = true`), convert NamedFieldExpression to a term query. + if (result instanceof NamedFieldExpression namedField && namedField.isBooleanType()) { + return QueryExpression.create(namedField).isTrue(); + } + QueryExpression queryExpression = (QueryExpression) result; return queryExpression; } catch (Throwable e) { if (e instanceof UnsupportedScriptException) { @@ -566,7 +572,18 @@ private QueryExpression prefix(RexCall call) { throw new PredicateAnalyzerException(message); } - QueryExpression expr = (QueryExpression) call.getOperands().get(0).accept(this); + Expression operandExpr = call.getOperands().get(0).accept(this); + // Handle NOT(boolean_field) - generate term query with false value + // This covers cases like: male = false -> NOT($12) + if (operandExpr instanceof NamedFieldExpression namedField && namedField.isBooleanType()) { + return QueryExpression.create(namedField).isFalse(); + } + QueryExpression expr = (QueryExpression) operandExpr; + // Handle NOT(IS_TRUE(boolean_field)) - convert to term query with false value + // This covers cases where IS_TRUE was explicitly applied + if (expr instanceof SimpleQueryExpression simpleExpr && simpleExpr.isBooleanFieldIsTrue()) { + return QueryExpression.create(simpleExpr.rel).isFalse(); + } return expr.not(); } @@ -582,6 +599,12 @@ private QueryExpression postfix(RexCall call) { if (call.getKind() == SqlKind.IS_TRUE) { Expression qe = call.getOperands().get(0).accept(this); + // When IS_TRUE is applied to a boolean field reference (e.g., IS_TRUE(boolean_field)), + // create a QueryExpression from the NamedFieldExpression and call isTrue(). + // When IS_TRUE is applied to a predicate (already evaluated), qe is a QueryExpression. + if (qe instanceof NamedFieldExpression namedField && namedField.isBooleanType()) { + return QueryExpression.create(namedField).isTrue(); + } return ((QueryExpression) qe).isTrue(); } @@ -797,8 +820,12 @@ private QueryExpression andOr(RexCall call) { public Expression tryAnalyzeOperand(RexNode node) { try { Expression expr = node.accept(this); - if (expr instanceof NamedFieldExpression) { - return expr; + // When a boolean field is used directly as a filter condition (e.g., `where male` after + // Calcite simplifies `where male = true`), convert NamedFieldExpression to a term query. + if (expr instanceof NamedFieldExpression namedField && namedField.isBooleanType()) { + QueryExpression qe = QueryExpression.create(namedField).isTrue(); + qe.updateAnalyzedNodes(node); + return qe; } QueryExpression qe = (QueryExpression) expr; if (!qe.isPartial()) { @@ -1057,6 +1084,10 @@ QueryExpression isTrue() { throw new PredicateAnalyzerException("isTrue cannot be applied to " + this.getClass()); } + QueryExpression isFalse() { + throw new PredicateAnalyzerException("isFalse cannot be applied to " + this.getClass()); + } + QueryExpression in(LiteralExpression literal) { throw new PredicateAnalyzerException("in cannot be applied to " + this.getClass()); } @@ -1182,6 +1213,8 @@ static class SimpleQueryExpression extends QueryExpression { private RexNode analyzedRexNode; private final NamedFieldExpression rel; private QueryBuilder builder; + // Flag indicating this expression represents IS_TRUE on a boolean field + private boolean isBooleanFieldIsTrue = false; private String getFieldReference() { return rel.getReference(); @@ -1393,11 +1426,29 @@ public QueryExpression multiMatch( @Override public QueryExpression isTrue() { - // Ignore istrue if ISTRUE(predicate) and will support ISTRUE(field) later. - // builder = termQuery(getFieldReferenceForTermQuery(), true); + // When IS_TRUE is called on a boolean field directly (e.g., IS_TRUE(field)), + // generate a term query with value true. + // When called on an already-evaluated predicate (builder already set), + // return as-is. + if (builder == null) { + builder = termQuery(getFieldReferenceForTermQuery(), true); + isBooleanFieldIsTrue = true; + } return this; } + @Override + public QueryExpression isFalse() { + // When IS_FALSE or NOT(IS_TRUE) is called on a boolean field, + // generate a term query with value false. + builder = termQuery(getFieldReferenceForTermQuery(), false); + return this; + } + + boolean isBooleanFieldIsTrue() { + return isBooleanFieldIsTrue; + } + @Override public QueryExpression in(LiteralExpression literal) { Collection collection = (Collection) literal.value(); @@ -1659,6 +1710,17 @@ boolean isTextType() { return type != null && type.getOriginalExprType() instanceof OpenSearchTextType; } + boolean isBooleanType() { + if (type == null) { + return false; + } + // Check if the type is a boolean type. For OpenSearchDataType, check exprCoreType. + if (type instanceof OpenSearchDataType osType) { + return ExprCoreType.BOOLEAN.equals(osType.getExprCoreType()); + } + return ExprCoreType.BOOLEAN.equals(type); + } + boolean isMetaField() { return OpenSearchConstants.METADATAFIELD_TYPE_MAP.containsKey(getRootName()); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java index 2e79da953b..b31bee2ce4 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java @@ -489,9 +489,11 @@ void analyze_aggCall_complexScriptFilter() throws ExpressionNotAnalyzableExcepti b.call(SqlStdOperatorTable.LIKE, b.field("c"), b.literal("%test%")))), "filter_complex_count")) .expectDslTemplate( - "[{\"filter_bool_count\":{\"filter\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"*\\\"}\"," - + "\"lang\":\"opensearch_compounded_script\",\"params\":{*}},\"boost\":1.0}}," + // filter_bool_count: Boolean field IS_TRUE is now pushed down as term query (issue + // #5054 fix) + "[{\"filter_bool_count\":{\"filter\":{\"term\":{\"d\":{\"value\":true,\"boost\":1.0}}}," + "\"aggregations\":{\"filter_bool_count\":{\"value_count\":{\"field\":\"_index\"}}}}}," + // filter_complex_count: Complex expression still uses script query + " {\"filter_complex_count\":{\"filter\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"*\\\"}\"," + "\"lang\":\"opensearch_compounded_script\",\"params\":{*}},\"boost\":1.0}}," + "\"aggregations\":{\"filter_complex_count\":{\"value_count\":{\"field\":\"_index\"}}}}}]") diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java index 0c1aef9ddf..5c31a36cfc 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java @@ -59,7 +59,7 @@ public class PredicateAnalyzerTest { final OpenSearchTypeFactory typeFactory = OpenSearchTypeFactory.TYPE_FACTORY; final RexBuilder builder = new RexBuilder(typeFactory); final RelOptCluster cluster = RelOptCluster.create(new VolcanoPlanner(), builder); - final List schema = List.of("a", "b", "c", "d"); + final List schema = List.of("a", "b", "c", "d", "e"); final Map fieldTypes = Map.of( "a", OpenSearchDataType.of(MappingType.Integer), @@ -67,12 +67,15 @@ public class PredicateAnalyzerTest { OpenSearchDataType.of( MappingType.Text, Map.of("fields", Map.of("keyword", Map.of("type", "keyword")))), "c", OpenSearchDataType.of(MappingType.Text), // Text without keyword cannot be push down - "d", OpenSearchDataType.of(MappingType.Date)); + "d", OpenSearchDataType.of(MappingType.Date), + "e", OpenSearchDataType.of(MappingType.Boolean)); final RexInputRef field1 = builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.INTEGER), 0); final RexInputRef field2 = builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.VARCHAR), 1); final RexInputRef field4 = builder.makeInputRef(typeFactory.createUDT(ExprUDT.EXPR_TIMESTAMP), 3); + final RexInputRef field5 = + builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.BOOLEAN), 4); final RexLiteral numericLiteral = builder.makeExactLiteral(new BigDecimal(12)); final RexLiteral stringLiteral = builder.makeLiteral("Hi"); final RexNode dateTimeLiteral = @@ -1121,4 +1124,65 @@ void gte_generatesRangeQueryWithFormatForDateTime() throws ExpressionNotAnalyzab """, result.toString()); } + + @Test + void isTrue_booleanField_generatesTermQuery() throws ExpressionNotAnalyzableException { + // IS_TRUE(boolean_field) should generate a term query with value true + RexNode call = builder.makeCall(SqlStdOperatorTable.IS_TRUE, field5); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + + assertInstanceOf(TermQueryBuilder.class, result); + assertEquals( + """ + { + "term" : { + "e" : { + "value" : true, + "boost" : 1.0 + } + } + }\ + """, + result.toString()); + } + + @Test + void isTrue_booleanFieldCombinedWithOtherCondition_generatesCompoundQuery() + throws ExpressionNotAnalyzableException { + // IS_TRUE(boolean_field) AND other_condition + RexNode isTrueCall = builder.makeCall(SqlStdOperatorTable.IS_TRUE, field5); + RexNode equalsCall = builder.makeCall(SqlStdOperatorTable.EQUALS, field1, numericLiteral); + RexNode andCall = builder.makeCall(SqlStdOperatorTable.AND, isTrueCall, equalsCall); + QueryBuilder result = PredicateAnalyzer.analyze(andCall, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must" : [ + { + "term" : { + "e" : { + "value" : true, + "boost" : 1.0 + } + } + }, + { + "term" : { + "a" : { + "value" : 12, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } }