Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,12 @@
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLambdaRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlIntervalQualifier;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.type.ArraySqlType;
import org.apache.calcite.sql.type.SqlTypeName;
Expand Down Expand Up @@ -190,8 +193,15 @@ public RexNode visitXor(Xor node, CalcitePlanContext context) {

@Override
public RexNode visitNot(Not node, CalcitePlanContext context) {
final RexNode expr = analyze(node.getExpression(), context);
return context.relBuilder.not(expr);
// Special handling for NOT(boolean_field = true/false) - see boolean comparison helpers below
UnresolvedExpression inner = node.getExpression();
if (inner instanceof Compare compare && "=".equals(compare.getOperator())) {
RexNode result = tryMakeBooleanNotEquals(compare, context);
if (result != null) {
return result;
}
}
return context.relBuilder.not(analyze(node.getExpression(), context));
}

@Override
Expand Down Expand Up @@ -221,7 +231,15 @@ public RexNode visitIn(In node, CalcitePlanContext context) {
public RexNode visitCompare(Compare node, CalcitePlanContext context) {
RexNode left = analyze(node.getLeft(), context);
RexNode right = analyze(node.getRight(), context);
return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, node.getOperator(), left, right);
String op = node.getOperator();
// Handle boolean_field != literal -> IS_NOT_TRUE/IS_NOT_FALSE
if ("!=".equals(op) || "<>".equals(op)) {
RexNode result = tryMakeBooleanNotEquals(left, right, context);
if (result != null) {
return result;
}
}
return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, op, left, right);
}

@Override
Expand Down Expand Up @@ -251,6 +269,65 @@ public RexNode visitEqualTo(EqualTo node, CalcitePlanContext context) {
return context.rexBuilder.equals(left, right);
}

// ==================== Boolean NOT comparison helpers ====================
// Calcite's RexSimplify transforms:
// - "field = true" -> "field" (handled by PredicateAnalyzer detecting boolean field)
// - "field = false" -> "NOT(field)" (handled by PredicateAnalyzer.prefix())
// - "NOT(field = true)" -> "NOT(field)" -> would generate term{false}, have conflicted semantics
// - "NOT(field = false)" -> "NOT(NOT(field))" -> "field" -> would generate term{true}, have
// conflicted semantics
// We intercept NOT(field = true/false) at AST level before Calcite optimization:
// - "NOT(field = true)" -> IS_NOT_TRUE(field): matches false, null, missing
// - "NOT(field = false)" -> IS_NOT_FALSE(field): matches true, null, missing

/**
* Try to convert boolean_field != literal or NOT(boolean_field = literal) to
* IS_NOT_TRUE/IS_NOT_FALSE. This preserves correct null-handling semantics.
*/
private RexNode tryMakeBooleanNotEquals(RexNode left, RexNode right, CalcitePlanContext context) {
BooleanFieldComparison cmp = extractBooleanFieldComparison(left, right);
if (cmp == null) {
return null;
}
SqlOperator op =
Boolean.FALSE.equals(cmp.literalValue)
? SqlStdOperatorTable.IS_NOT_FALSE
: SqlStdOperatorTable.IS_NOT_TRUE;
return context.rexBuilder.makeCall(op, cmp.field);
}

/** Overload for NOT(Compare) AST pattern. */
private RexNode tryMakeBooleanNotEquals(Compare compare, CalcitePlanContext context) {
return tryMakeBooleanNotEquals(
analyze(compare.getLeft(), context), analyze(compare.getRight(), context), context);
}

/** Represents a comparison between a boolean field and a boolean literal. */
private record BooleanFieldComparison(RexNode field, Boolean literalValue) {}

/**
* Extract boolean field and literal value from a comparison, normalizing operand order. Returns
* null if the comparison is not between a boolean field and a boolean literal.
*/
private BooleanFieldComparison extractBooleanFieldComparison(RexNode left, RexNode right) {
if (isBooleanField(left) && isBooleanLiteral(right)) {
return new BooleanFieldComparison(left, ((RexLiteral) right).getValueAs(Boolean.class));
}
if (isBooleanField(right) && isBooleanLiteral(left)) {
return new BooleanFieldComparison(right, ((RexLiteral) left).getValueAs(Boolean.class));
}
return null;
}

private boolean isBooleanField(RexNode node) {
// Only match actual field references, not arbitrary boolean expressions like CASE
return node instanceof RexInputRef && node.getType().getSqlTypeName() == SqlTypeName.BOOLEAN;
}

private boolean isBooleanLiteral(RexNode node) {
return node instanceof RexLiteral && node.getType().getSqlTypeName() == SqlTypeName.BOOLEAN;
}

/** Resolve qualified name. Note, the name should be case-sensitive. */
@Override
public RexNode visitQualifiedName(QualifiedName node, CalcitePlanContext context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2497,4 +2497,85 @@ public void testExplainMvCombine() throws IOException {
String expected = loadExpectedPlan("explain_mvcombine.yaml");
assertYamlEqualsIgnoreId(expected, actual);
}

// Only for Calcite - Test for issue #5054: query_string combined with boolean comparison
// This mimics the query pattern: "source=test url=http | where is_internal=true"
// where query_string search is combined with boolean field comparison.
@Test
public void testFilterQueryStringWithBooleanFieldPushDown() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// Verifies that query_string combined with boolean field comparison produces pure DSL query
// without embedded script. The boolean comparison should be pushed down as a term query.
String query =
StringUtils.format(
"source=%s firstname=Amber | where male = true | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldWithTRUE() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// Test boolean literal with uppercase TRUE
String query =
StringUtils.format(
"source=%s firstname=Amber | where male = TRUE | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldWithStringLiteral() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// Test boolean field with string literal 'TRUE' - Calcite converts to boolean true
// and generates same term query as boolean literal
String query =
StringUtils.format(
"source=%s firstname=Amber | where male = 'TRUE' | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldFalse() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// male = false is converted to IS_FALSE(male) which generates term query {value: false}.
// This only matches documents where male is explicitly false (not null or missing).
String query =
StringUtils.format(
"source=%s firstname=Amber | where male = false | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_false.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldNotTrue() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// NOT(male = true) generates IS_NOT_TRUE which produces mustNot(term query {value: true})
// This matches documents where male is false, null, or missing
String query =
StringUtils.format(
"source=%s firstname=Amber | where NOT male = true | fields firstname",
TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_not_true.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldNotEquals() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// male != true generates IS_NOT_TRUE which produces mustNot(term query {value: true})
// This matches documents where male is false, null, or missing
String query =
StringUtils.format(
"source=%s firstname=Amber | where male != true | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_not_true.yaml");
assertYamlEqualsIgnoreId(expected, result);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
calcite:
logical: |
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalProject(firstname=[$1])
LogicalFilter(condition=[$12])
LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))])
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
physical: |
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), $1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
calcite:
logical: |
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalProject(firstname=[$1])
LogicalFilter(condition=[NOT($12)])
LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))])
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
physical: |
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), NOT($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":false,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
calcite:
logical: |
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalProject(firstname=[$1])
LogicalFilter(condition=[IS NOT TRUE($12)])
LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))])
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
physical: |
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), IS NOT TRUE($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"bool":{"must_not":[{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
Expand Up @@ -42,24 +42,10 @@ teardown:
ppl:
body:
query: 'source=hdfs_logs | patterns content method=brain mode=aggregation max_sample_count=2 variable_count_threshold=3'
- match: {"total": 2}
- match: {"schema": [{"name": "patterns_field", "type": "string"}, {"name": "pattern_count", "type": "bigint"}, {"name": "sample_logs", "type": "array"}]}
- match: {"datarows": [
[
"PacketResponder failed for blk_<*>",
2,
[
"PacketResponder failed for blk_6996194389878584395",
"PacketResponder failed for blk_-1547954353065580372"
]
],
[
"BLOCK* NameSystem.addStoredBlock: blockMap updated: <*IP*> is added to blk_<*> size <*>",
2,
[
"BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.31.85:50010 is added to blk_-7017553867379051457 size 67108864",
"BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.107.19:50010 is added to blk_-3249711809227781266 size 67108864"
]
]
]}
- match: { total: 2 }
- match: { schema: [{"name": "patterns_field", "type": "string"}, {"name": "pattern_count", "type": "bigint"}, {"name": "sample_logs", "type": "array"}] }
- length: { datarows: 2 }
# Verify each pattern has exactly 2 sample logs (max_sample_count=2)
- length: { datarows.0.2: 2 }
- length: { datarows.1.2: 2 }

Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
setup:
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: true

---
teardown:
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: false

Comment on lines +1 to +15
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Ensure index isolation by cleaning up test before/after use.
Right now the test can fail or leak state if an index named test already exists or is reused. Add a cleanup step (or use a unique index name) to keep this test independent.

🧹 Suggested cleanup (align with existing YAML REST test patterns)
 setup:
   - do:
       query.settings:
         body:
           transient:
             plugins.calcite.enabled: true
+  - do:
+      indices.delete:
+        index: test
+        ignore: 404

 ---
 teardown:
   - do:
       query.settings:
         body:
           transient:
             plugins.calcite.enabled: false
+  - do:
+      indices.delete:
+        index: test
+        ignore: 404

As per coding guidelines: Tests must not rely on execution order; ensure test independence.

Also applies to: 23-34

🤖 Prompt for AI Agents
In `@integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5054.yml`
around lines 1 - 15, The test uses an index named "test" and currently doesn't
clean it up; update the YAML to ensure index isolation by adding explicit delete
steps for the "test" index in both the setup and teardown blocks (or replace
"test" with a generated unique name), e.g., add a do: delete index action before
the test runs and another delete after the test completes so the index cannot
leak state or conflict with other tests; reference the existing setup/teardown
blocks and the index name "test" when making these changes.

---
"Fix boolean field comparison pushdown":
- skip:
features:
- headers
- allowed_warnings
- do:
indices.create:
index: test
body:
mappings:
properties:
"@timestamp":
type: date
url:
type: text
is_internal:
type: boolean


- do:
bulk:
index: test
refresh: true
body:
- '{"index": {}}'
- '{ "@timestamp":"2025-08-19T04:51:24Z", "url": "http" }'
- '{"index": {}}'
- '{ "@timestamp":"2025-08-19T04:51:24Z", "url": "http", "is_internal": true }'
- '{"index": {}}'
- '{ "@timestamp":"2025-08-19T04:51:24Z", "url": "http", "is_internal": false }'

# Test is_internal=true: should return only documents where is_internal is explicitly true
- do:
allowed_warnings:
- 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled'
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test url=http | where is_internal=true

- match: { total: 1 }
- length: { datarows: 1 }

# Test is_internal=false: should return only documents where is_internal is explicitly false
# NOT documents where is_internal is null or missing
- do:
allowed_warnings:
- 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled'
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test url=http | where is_internal=false

- match: { total: 1 }
- length: { datarows: 1 }

# Test NOT(is_internal=false): should return documents where is_internal is true OR null/missing
# This is the negation of "is_internal=false", so it should return 2 documents
- do:
allowed_warnings:
- 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled'
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test url=http | where not is_internal=false

- match: { total: 2 }
- length: { datarows: 2 }

# Test NOT(is_internal=true): should return documents where is_internal is false OR null/missing
# This is the negation of "is_internal=true", so it should return 2 documents
- do:
allowed_warnings:
- 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled'
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test url=http | where not is_internal=true

- match: { total: 2 }
- length: { datarows: 2 }
Loading
Loading