Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2444,4 +2444,57 @@ public void testAggFilterOnNestedFields() throws IOException {
"source=%s | stats count(eval(author.name < 'K')) as george_and_jk",
TEST_INDEX_CASCADED_NESTED)));
}

// Only for Calcite - Test for issue #5054: query_string combined with boolean comparison
// This mimics the query pattern: "source=test url=http | where is_internal=true"
// where query_string search is combined with boolean field comparison.
@Test
public void testFilterQueryStringWithBooleanFieldPushDown() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// Verifies that query_string combined with boolean field comparison produces pure DSL query
// without embedded script. The boolean comparison should be pushed down as a term query.
String query =
StringUtils.format(
"source=%s firstname=Amber | where male = true | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldWithTRUE() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// Test boolean literal with uppercase TRUE
String query =
StringUtils.format(
"source=%s firstname=Amber | where male = TRUE | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldWithStringLiteral() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// Test boolean field with string literal 'TRUE' - Calcite converts to boolean true
// and generates same term query as boolean literal
String query =
StringUtils.format(
"source=%s firstname=Amber | where male = 'TRUE' | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldFalse() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// Test boolean field with false - generates term query with false value
String query =
StringUtils.format(
"source=%s firstname=Amber | where male = false | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_false.yaml");
assertYamlEqualsIgnoreId(expected, result);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
calcite:
logical: |
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalProject(firstname=[$1])
LogicalFilter(condition=[$12])
LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))])
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
physical: |
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), $1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
calcite:
logical: |
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalProject(firstname=[$1])
LogicalFilter(condition=[NOT($12)])
LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))])
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
physical: |
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), NOT($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":false,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
setup:
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: true

---
teardown:
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: false

---
"Fix boolean field comparison pushdown":
- skip:
features:
- headers
- allowed_warnings
- do:
indices.create:
index: test-boolean
body:
mappings:
properties:
is_internal:
type: boolean
name:
type: keyword

- do:
bulk:
index: test-boolean
refresh: true
body:
- '{"index": {}}'
- '{ "is_internal": true, "name": "doc1" }'
- '{"index": {}}'
- '{ "is_internal": false, "name": "doc2" }'
- '{"index": {}}'
- '{ "is_internal": true, "name": "doc3" }'

- do:
allowed_warnings:
- 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled'
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test-boolean | where is_internal=true | fields name
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using failed query source=test url=http | where is_internal=true
in #5054


- match: { total: 2 }
- length: { datarows: 2 }
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,13 @@ public static QueryExpression analyzeExpression(
requireNonNull(expression, "expression");
try {
// visits expression tree
QueryExpression queryExpression = (QueryExpression) expression.accept(visitor);
Expression result = expression.accept(visitor);
// When a boolean field is used directly as a filter condition (e.g., `where male` after
// Calcite simplifies `where male = true`), convert NamedFieldExpression to a term query.
if (result instanceof NamedFieldExpression namedField && namedField.isBooleanType()) {
return QueryExpression.create(namedField).isTrue();
}
QueryExpression queryExpression = (QueryExpression) result;
return queryExpression;
} catch (Throwable e) {
if (e instanceof UnsupportedScriptException) {
Expand Down Expand Up @@ -566,7 +572,18 @@ private QueryExpression prefix(RexCall call) {
throw new PredicateAnalyzerException(message);
}

QueryExpression expr = (QueryExpression) call.getOperands().get(0).accept(this);
Expression operandExpr = call.getOperands().get(0).accept(this);
// Handle NOT(boolean_field) - generate term query with false value
// This covers cases like: male = false -> NOT($12)
if (operandExpr instanceof NamedFieldExpression namedField && namedField.isBooleanType()) {
return QueryExpression.create(namedField).isFalse();
}
QueryExpression expr = (QueryExpression) operandExpr;
// Handle NOT(IS_TRUE(boolean_field)) - convert to term query with false value
// This covers cases where IS_TRUE was explicitly applied
if (expr instanceof SimpleQueryExpression simpleExpr && simpleExpr.isBooleanFieldIsTrue()) {
return QueryExpression.create(simpleExpr.rel).isFalse();
}
Comment on lines +582 to +586
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • (NOT boolean_field = true) will return fields include ture, null and missing fields
  • but boolean_field=false only return fields has false value.

return expr.not();
}

Expand All @@ -582,6 +599,12 @@ private QueryExpression postfix(RexCall call) {

if (call.getKind() == SqlKind.IS_TRUE) {
Expression qe = call.getOperands().get(0).accept(this);
// When IS_TRUE is applied to a boolean field reference (e.g., IS_TRUE(boolean_field)),
// create a QueryExpression from the NamedFieldExpression and call isTrue().
// When IS_TRUE is applied to a predicate (already evaluated), qe is a QueryExpression.
if (qe instanceof NamedFieldExpression namedField && namedField.isBooleanType()) {
return QueryExpression.create(namedField).isTrue();
}
return ((QueryExpression) qe).isTrue();
}

Expand Down Expand Up @@ -797,8 +820,12 @@ private QueryExpression andOr(RexCall call) {
public Expression tryAnalyzeOperand(RexNode node) {
try {
Expression expr = node.accept(this);
if (expr instanceof NamedFieldExpression) {
return expr;
// When a boolean field is used directly as a filter condition (e.g., `where male` after
// Calcite simplifies `where male = true`), convert NamedFieldExpression to a term query.
if (expr instanceof NamedFieldExpression namedField && namedField.isBooleanType()) {
QueryExpression qe = QueryExpression.create(namedField).isTrue();
qe.updateAnalyzedNodes(node);
return qe;
}
QueryExpression qe = (QueryExpression) expr;
if (!qe.isPartial()) {
Expand Down Expand Up @@ -1057,6 +1084,10 @@ QueryExpression isTrue() {
throw new PredicateAnalyzerException("isTrue cannot be applied to " + this.getClass());
}

QueryExpression isFalse() {
throw new PredicateAnalyzerException("isFalse cannot be applied to " + this.getClass());
}

QueryExpression in(LiteralExpression literal) {
throw new PredicateAnalyzerException("in cannot be applied to " + this.getClass());
}
Expand Down Expand Up @@ -1182,6 +1213,8 @@ static class SimpleQueryExpression extends QueryExpression {
private RexNode analyzedRexNode;
private final NamedFieldExpression rel;
private QueryBuilder builder;
// Flag indicating this expression represents IS_TRUE on a boolean field
private boolean isBooleanFieldIsTrue = false;

private String getFieldReference() {
return rel.getReference();
Expand Down Expand Up @@ -1393,11 +1426,29 @@ public QueryExpression multiMatch(

@Override
public QueryExpression isTrue() {
// Ignore istrue if ISTRUE(predicate) and will support ISTRUE(field) later.
// builder = termQuery(getFieldReferenceForTermQuery(), true);
// When IS_TRUE is called on a boolean field directly (e.g., IS_TRUE(field)),
// generate a term query with value true.
// When called on an already-evaluated predicate (builder already set),
// return as-is.
if (builder == null) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to override isTrue and not API for NamedFieldExpression instead of changing SimpleQueryExpression?

builder = termQuery(getFieldReferenceForTermQuery(), true);
isBooleanFieldIsTrue = true;
}
return this;
}

@Override
public QueryExpression isFalse() {
// When IS_FALSE or NOT(IS_TRUE) is called on a boolean field,
// generate a term query with value false.
builder = termQuery(getFieldReferenceForTermQuery(), false);
return this;
}

boolean isBooleanFieldIsTrue() {
return isBooleanFieldIsTrue;
}

@Override
public QueryExpression in(LiteralExpression literal) {
Collection<?> collection = (Collection<?>) literal.value();
Expand Down Expand Up @@ -1659,6 +1710,17 @@ boolean isTextType() {
return type != null && type.getOriginalExprType() instanceof OpenSearchTextType;
}

boolean isBooleanType() {
if (type == null) {
return false;
}
// Check if the type is a boolean type. For OpenSearchDataType, check exprCoreType.
if (type instanceof OpenSearchDataType osType) {
return ExprCoreType.BOOLEAN.equals(osType.getExprCoreType());
}
return ExprCoreType.BOOLEAN.equals(type);
}

boolean isMetaField() {
return OpenSearchConstants.METADATAFIELD_TYPE_MAP.containsKey(getRootName());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -489,9 +489,11 @@ void analyze_aggCall_complexScriptFilter() throws ExpressionNotAnalyzableExcepti
b.call(SqlStdOperatorTable.LIKE, b.field("c"), b.literal("%test%")))),
"filter_complex_count"))
.expectDslTemplate(
"[{\"filter_bool_count\":{\"filter\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"*\\\"}\","
+ "\"lang\":\"opensearch_compounded_script\",\"params\":{*}},\"boost\":1.0}},"
// filter_bool_count: Boolean field IS_TRUE is now pushed down as term query (issue
// #5054 fix)
"[{\"filter_bool_count\":{\"filter\":{\"term\":{\"d\":{\"value\":true,\"boost\":1.0}}},"
+ "\"aggregations\":{\"filter_bool_count\":{\"value_count\":{\"field\":\"_index\"}}}}},"
// filter_complex_count: Complex expression still uses script query
+ " {\"filter_complex_count\":{\"filter\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"*\\\"}\","
+ "\"lang\":\"opensearch_compounded_script\",\"params\":{*}},\"boost\":1.0}},"
+ "\"aggregations\":{\"filter_complex_count\":{\"value_count\":{\"field\":\"_index\"}}}}}]")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,23 @@ public class PredicateAnalyzerTest {
final OpenSearchTypeFactory typeFactory = OpenSearchTypeFactory.TYPE_FACTORY;
final RexBuilder builder = new RexBuilder(typeFactory);
final RelOptCluster cluster = RelOptCluster.create(new VolcanoPlanner(), builder);
final List<String> schema = List.of("a", "b", "c", "d");
final List<String> schema = List.of("a", "b", "c", "d", "e");
final Map<String, ExprType> fieldTypes =
Map.of(
"a", OpenSearchDataType.of(MappingType.Integer),
"b",
OpenSearchDataType.of(
MappingType.Text, Map.of("fields", Map.of("keyword", Map.of("type", "keyword")))),
"c", OpenSearchDataType.of(MappingType.Text), // Text without keyword cannot be push down
"d", OpenSearchDataType.of(MappingType.Date));
"d", OpenSearchDataType.of(MappingType.Date),
"e", OpenSearchDataType.of(MappingType.Boolean));
final RexInputRef field1 =
builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.INTEGER), 0);
final RexInputRef field2 =
builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.VARCHAR), 1);
final RexInputRef field4 = builder.makeInputRef(typeFactory.createUDT(ExprUDT.EXPR_TIMESTAMP), 3);
final RexInputRef field5 =
builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.BOOLEAN), 4);
final RexLiteral numericLiteral = builder.makeExactLiteral(new BigDecimal(12));
final RexLiteral stringLiteral = builder.makeLiteral("Hi");
final RexNode dateTimeLiteral =
Expand Down Expand Up @@ -1121,4 +1124,65 @@ void gte_generatesRangeQueryWithFormatForDateTime() throws ExpressionNotAnalyzab
""",
result.toString());
}

@Test
void isTrue_booleanField_generatesTermQuery() throws ExpressionNotAnalyzableException {
// IS_TRUE(boolean_field) should generate a term query with value true
RexNode call = builder.makeCall(SqlStdOperatorTable.IS_TRUE, field5);
QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes);

assertInstanceOf(TermQueryBuilder.class, result);
assertEquals(
"""
{
"term" : {
"e" : {
"value" : true,
"boost" : 1.0
}
}
}\
""",
result.toString());
}

@Test
void isTrue_booleanFieldCombinedWithOtherCondition_generatesCompoundQuery()
throws ExpressionNotAnalyzableException {
// IS_TRUE(boolean_field) AND other_condition
RexNode isTrueCall = builder.makeCall(SqlStdOperatorTable.IS_TRUE, field5);
RexNode equalsCall = builder.makeCall(SqlStdOperatorTable.EQUALS, field1, numericLiteral);
RexNode andCall = builder.makeCall(SqlStdOperatorTable.AND, isTrueCall, equalsCall);
QueryBuilder result = PredicateAnalyzer.analyze(andCall, schema, fieldTypes);

assertInstanceOf(BoolQueryBuilder.class, result);
assertEquals(
"""
{
"bool" : {
"must" : [
{
"term" : {
"e" : {
"value" : true,
"boost" : 1.0
}
}
},
{
"term" : {
"a" : {
"value" : 12,
"boost" : 1.0
}
}
}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
}\
""",
result.toString());
}
}
Loading