From f5a33d192eead520415c1e11055a47e715d959f5 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Thu, 20 Nov 2025 20:44:46 -0800 Subject: [PATCH 01/12] [BugFix] Fix Regex OOM when there are 10+ regex clauses Signed-off-by: Jialiang Liang --- .../sql/calcite/CalcitePlanContext.java | 55 ++++++++++++++++ .../sql/calcite/CalciteRelNodeVisitor.java | 66 ++++++++++++++++++- 2 files changed, 118 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java index 669d8452dc0..9c5e4bee595 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java @@ -8,6 +8,7 @@ import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; import java.sql.Connection; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -61,6 +62,12 @@ public class CalcitePlanContext { @Getter public Map rexLambdaRefMap; + /** Accumulated filter conditions to prevent deep Filter node chains */ + private final List pendingFilterConditions = new ArrayList<>(); + + /** Flag to indicate if filter accumulation mode is active */ + @Getter @Setter private boolean filterAccumulationEnabled = false; + private CalcitePlanContext(FrameworkConfig config, SysLimit sysLimit, QueryType queryType) { this.config = config; this.sysLimit = sysLimit; @@ -134,4 +141,52 @@ public static boolean isLegacyPreferred() { public void putRexLambdaRefMap(Map candidateMap) { this.rexLambdaRefMap.putAll(candidateMap); } + + /** + * Adds a filter condition to the accumulation list instead of creating immediate Filter RelNode. + * This prevents deep Filter node chains that cause memory explosion. + */ + public void addFilterCondition(RexNode condition) { + pendingFilterConditions.add(condition); + } + + /** + * Applies all accumulated filter conditions as a single Filter RelNode with AND operations. This + * creates a single Filter node instead of a deep chain of Filter nodes. + */ + public void flushFilterConditions() { + if (pendingFilterConditions.isEmpty()) { + return; + } + + if (pendingFilterConditions.size() == 1) { + relBuilder.filter(pendingFilterConditions.get(0)); + } else { + // Combine all filter conditions with AND + RexNode combinedCondition = relBuilder.and(pendingFilterConditions); + relBuilder.filter(combinedCondition); + } + pendingFilterConditions.clear(); + } + + /** + * Enables filter accumulation mode to prevent deep Filter node chains. Should be called before + * processing multiple filter operations. + */ + public void enableFilterAccumulation() { + filterAccumulationEnabled = true; + } + + /** + * Disables filter accumulation mode. Should be called after processing multiple filter + * operations. + */ + public void disableFilterAccumulation() { + filterAccumulationEnabled = false; + } + + /** Returns true if there are pending filter conditions that need to be flushed. */ + public boolean hasPendingFilterConditions() { + return !pendingFilterConditions.isEmpty(); + } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 8049afce825..1ce7d4509ab 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -176,7 +176,20 @@ public CalciteRelNodeVisitor(DataSourceService dataSourceService) { } public RelNode analyze(UnresolvedPlan unresolved, CalcitePlanContext context) { - return unresolved.accept(this, context); + // Enable filter accumulation if this plan contains multiple filtering operations + // that could create deep Filter RelNode chains + if (countFilteringOperations(unresolved) >= 2) { + context.enableFilterAccumulation(); + try { + unresolved.accept(this, context); + context.flushFilterConditions(); // Flush accumulated conditions before returning + return context.relBuilder.peek(); // Get the result after flushing + } finally { + context.disableFilterAccumulation(); + } + } else { + return unresolved.accept(this, context); + } } @Override @@ -244,7 +257,12 @@ public RelNode visitFilter(Filter node, CalcitePlanContext context) { context.relBuilder.filter(ImmutableList.of(v.get().id), condition); context.popCorrelVar(); } else { - context.relBuilder.filter(condition); + // Use filter accumulation to prevent deep Filter node chains + if (context.isFilterAccumulationEnabled()) { + context.addFilterCondition(condition); + } else { + context.relBuilder.filter(condition); + } } return context.relBuilder.peek(); } @@ -293,7 +311,12 @@ public RelNode visitRegex(Regex node, CalcitePlanContext context) { regexCondition = context.rexBuilder.makeCall(SqlStdOperatorTable.NOT, regexCondition); } - context.relBuilder.filter(regexCondition); + // Use filter accumulation to prevent deep Filter node chains + if (context.isFilterAccumulationEnabled()) { + context.addFilterCondition(regexCondition); + } else { + context.relBuilder.filter(regexCondition); + } return context.relBuilder.peek(); } @@ -384,6 +407,11 @@ private boolean containsSubqueryExpression(Node expr) { public RelNode visitProject(Project node, CalcitePlanContext context) { visitChildren(node, context); + // Flush accumulated filter conditions before schema-changing operations + if (context.isFilterAccumulationEnabled() && context.hasPendingFilterConditions()) { + context.flushFilterConditions(); + } + if (isSingleAllFieldsProject(node)) { return handleAllFieldsProject(node, context); } @@ -3239,4 +3267,36 @@ private RexNode createOptimizedTransliteration( throw new RuntimeException("Failed to optimize sed expression: " + sedExpression, e); } } + + /** + * Counts the number of filtering operations in an UnresolvedPlan tree that would create Filter + * RelNodes. This is used to detect queries with multiple regex/filter operations that could cause + * deep Filter RelNode chains and memory exhaustion. + * + * @param plan the UnresolvedPlan to analyze + * @return the count of filtering operations found + */ + private int countFilteringOperations(UnresolvedPlan plan) { + if (plan == null) { + return 0; + } + + int count = 0; + + // Count this node if it's a filtering operation + if (plan instanceof Regex || plan instanceof Filter) { + count = 1; + } + + // Recursively count filtering operations in children + if (plan.getChild() != null) { + for (Node child : plan.getChild()) { + if (child instanceof UnresolvedPlan) { + count += countFilteringOperations((UnresolvedPlan) child); + } + } + } + + return count; + } } From 8521325d13352dc0cd75727e9a296178b5f918f0 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Thu, 20 Nov 2025 21:32:21 -0800 Subject: [PATCH 02/12] fix unit tests Signed-off-by: Jialiang Liang --- .../ppl/calcite/CalcitePPLMultisearchTest.java | 17 ++++++++++------- .../sql/ppl/calcite/CalcitePPLRegexTest.java | 11 +++++------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java index 7185c85aa1d..d31c25c57e9 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java @@ -183,26 +183,29 @@ public void testMultisearchWithStats() { + " LogicalAggregate(group=[{0}], count=[COUNT()])\n" + " LogicalProject(type=[$8])\n" + " LogicalUnion(all=[true])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], type=['accounting':VARCHAR])\n" - + " LogicalFilter(condition=[=($7, 10)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], type=['research':VARCHAR])\n" - + " LogicalFilter(condition=[=($7, 20)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); + // SparkSQL reflects Filter above Project due to flush logic String expectedSparkSql = "SELECT COUNT(*) `count`, `type`\n" + + "FROM (SELECT *\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " 'accounting' `type`\n" - + "FROM `scott`.`EMP`\n" + + "FROM `scott`.`EMP`) `t`\n" + "WHERE `DEPTNO` = 10\n" + "UNION ALL\n" - + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " 'research' `type`\n" - + "FROM `scott`.`EMP`\n" + + "FROM `scott`.`EMP`) `t1`\n" + "WHERE `DEPTNO` = 20) `t3`\n" + "GROUP BY `type`"; verifyPPLToSparkSQL(root, expectedSparkSql); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRegexTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRegexTest.java index cfc0722bcfb..7e9d8a5c7bb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRegexTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRegexTest.java @@ -38,19 +38,18 @@ public void testRegexBasic() { public void testRegexChainedFilters() { String ppl = "source=EMP | regex ENAME='A.*' | regex JOB='.*CLERK' | fields ENAME, JOB"; RelNode root = getRelNode(ppl); + // Filter accumulation combines multiple regex conditions into a single Filter with AND String expectedLogical = "LogicalProject(ENAME=[$1], JOB=[$2])\n" - + " LogicalFilter(condition=[REGEXP_CONTAINS($2, '.*CLERK':VARCHAR)])\n" - + " LogicalFilter(condition=[REGEXP_CONTAINS($1, 'A.*':VARCHAR)])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + + " LogicalFilter(condition=[AND(REGEXP_CONTAINS($1, 'A.*':VARCHAR)," + + " REGEXP_CONTAINS($2, '.*CLERK':VARCHAR))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `ENAME`, `JOB`\n" - + "FROM (SELECT *\n" + "FROM `scott`.`EMP`\n" - + "WHERE REGEXP_CONTAINS(`ENAME`, 'A.*')) `t`\n" - + "WHERE REGEXP_CONTAINS(`JOB`, '.*CLERK')"; + + "WHERE REGEXP_CONTAINS(`ENAME`, 'A.*') AND REGEXP_CONTAINS(`JOB`, '.*CLERK')"; verifyPPLToSparkSQL(root, expectedSparkSql); } From c85ed67b974b438f36eeadc9cdb190ba224e53a2 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 21 Nov 2025 00:24:46 -0800 Subject: [PATCH 03/12] fix tests Signed-off-by: Jialiang Liang --- .../sql/calcite/CalciteRelNodeVisitor.java | 94 +++++++++++++++++-- .../calcite/explain_filter_push.yaml | 6 +- ...plain_filter_push_compare_date_string.yaml | 7 +- ..._filter_push_compare_timestamp_string.yaml | 5 +- .../calcite/CalcitePPLMultisearchTest.java | 17 ++-- 5 files changed, 99 insertions(+), 30 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 1ce7d4509ab..b484e11cd8c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -182,8 +182,8 @@ public RelNode analyze(UnresolvedPlan unresolved, CalcitePlanContext context) { context.enableFilterAccumulation(); try { unresolved.accept(this, context); - context.flushFilterConditions(); // Flush accumulated conditions before returning - return context.relBuilder.peek(); // Get the result after flushing + context.flushFilterConditions(); + return context.relBuilder.peek(); } finally { context.disableFilterAccumulation(); } @@ -192,6 +192,17 @@ public RelNode analyze(UnresolvedPlan unresolved, CalcitePlanContext context) { } } + /** + * Flushes accumulated filter conditions before schema-changing operations. This prevents + * RexInputRef index mismatches that occur when filters reference field indices from the old + * schema. + */ + private void flushFiltersBeforeSchemaChange(CalcitePlanContext context) { + if (context.isFilterAccumulationEnabled() && context.hasPendingFilterConditions()) { + context.flushFilterConditions(); + } + } + @Override public RelNode visitRelation(Relation node, CalcitePlanContext context) { DataSourceSchemaIdentifierNameResolver nameResolver = @@ -407,10 +418,7 @@ private boolean containsSubqueryExpression(Node expr) { public RelNode visitProject(Project node, CalcitePlanContext context) { visitChildren(node, context); - // Flush accumulated filter conditions before schema-changing operations - if (context.isFilterAccumulationEnabled() && context.hasPendingFilterConditions()) { - context.flushFilterConditions(); - } + flushFiltersBeforeSchemaChange(context); if (isSingleAllFieldsProject(node)) { return handleAllFieldsProject(node, context); @@ -886,6 +894,9 @@ public RelNode visitPatterns(Patterns node, CalcitePlanContext context) { @Override public RelNode visitEval(Eval node, CalcitePlanContext context) { visitChildren(node, context); + + flushFiltersBeforeSchemaChange(context); + node.getExpressionList() .forEach( expr -> { @@ -1155,6 +1166,9 @@ private Pair, List> resolveAttributesForAggregation( /** Visits an aggregation for stats command */ @Override public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { + // Flush accumulated filter conditions before schema-changing aggregation operations + flushFiltersBeforeSchemaChange(context); + Argument.ArgumentMap statsArgs = Argument.ArgumentMap.of(node.getArgExprList()); Boolean bucketNullable = (Boolean) statsArgs.get(Argument.BUCKET_NULLABLE).getValue(); int nGroup = node.getGroupExprList().size() + (Objects.nonNull(node.getSpan()) ? 1 : 0); @@ -2273,10 +2287,26 @@ private RelNode mergeTableAndResolveColumnConflict( @Override public RelNode visitMultisearch(Multisearch node, CalcitePlanContext context) { List subsearchNodes = new ArrayList<>(); + // Save the current filter accumulation state - we'll process each subsearch independently + boolean wasFilterAccumulationEnabled = context.isFilterAccumulationEnabled(); + for (UnresolvedPlan subsearch : node.getSubsearches()) { UnresolvedPlan prunedSubSearch = subsearch.accept(new EmptySourcePropagateVisitor(), null); - prunedSubSearch.accept(this, context); + + // Temporarily disable filter accumulation so each subsearch gets its own independent + // lifecycle via analyze(). This prevents filter state from bleeding across branches. + if (wasFilterAccumulationEnabled) { + context.disableFilterAccumulation(); + } + + // Use analyze() to let each subsearch determine its own filter accumulation needs + analyze(prunedSubSearch, context); subsearchNodes.add(context.relBuilder.build()); + + // Restore filter accumulation state for the next iteration + if (wasFilterAccumulationEnabled) { + context.enableFilterAccumulation(); + } } // Use shared schema merging logic that handles type conflicts via field renaming @@ -3273,8 +3303,12 @@ private RexNode createOptimizedTransliteration( * RelNodes. This is used to detect queries with multiple regex/filter operations that could cause * deep Filter RelNode chains and memory exhaustion. * + *

Stops counting at schema-changing operations (like Aggregation, Project with computed + * expressions) to avoid enabling filter accumulation across schema boundaries, which would cause + * RexInputRef index mismatches. + * * @param plan the UnresolvedPlan to analyze - * @return the count of filtering operations found + * @return the count of filtering operations found before the first schema-changing operation */ private int countFilteringOperations(UnresolvedPlan plan) { if (plan == null) { @@ -3284,8 +3318,25 @@ private int countFilteringOperations(UnresolvedPlan plan) { int count = 0; // Count this node if it's a filtering operation - if (plan instanceof Regex || plan instanceof Filter) { + // BUT: Don't count Filter nodes that contain function calls, as they can cause + // type mismatches when accumulated and flushed later + if (plan instanceof Regex) { count = 1; + } else if (plan instanceof Filter) { + Filter filterNode = (Filter) plan; + if (!containsFunctionCall(filterNode.getCondition())) { + count = 1; + } + } + + // Stop counting at schema-changing operations to prevent accumulation across schema boundaries + // Schema-changing operations include: Aggregation, Eval, Project (with computed expressions), + // Window, StreamWindow, etc. + if (plan instanceof Aggregation + || plan instanceof Eval + || plan instanceof Window + || plan instanceof StreamWindow) { + return count; // Don't recurse into children beyond schema changes } // Recursively count filtering operations in children @@ -3299,4 +3350,29 @@ private int countFilteringOperations(UnresolvedPlan plan) { return count; } + + /** + * Checks if an expression contains any function calls. Filter expressions with function calls can + * cause type mismatches when accumulated and flushed later, so we exclude them from filter + * accumulation. + */ + private boolean containsFunctionCall(UnresolvedExpression expr) { + if (expr == null) { + return false; + } + + if (expr instanceof org.opensearch.sql.ast.expression.Function) { + return true; + } + + // Check children recursively + for (Node child : expr.getChild()) { + if (child instanceof UnresolvedExpression + && containsFunctionCall((UnresolvedExpression) child)) { + return true; + } + } + + return false; + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push.yaml index 11bf9baa46c..eb020cf0f92 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push.yaml @@ -2,9 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age=[$8]) - LogicalFilter(condition=[>($3, 10000)]) - LogicalFilter(condition=[<($8, 40)]) - LogicalFilter(condition=[>($8, 30)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalFilter(condition=[AND(SEARCH($8, Sarg[(30..40)]), >($3, 10000))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, age], FILTER->AND(SEARCH($1, Sarg[(30..40)]), >($0, 10000)), PROJECT->[age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"range":{"age":{"from":30.0,"to":40.0,"include_lower":false,"include_upper":false,"boost":1.0}}},{"range":{"balance":{"from":10000,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml index 55951816ff7..7f604d806ee 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml @@ -1,9 +1,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalFilter(condition=[<($0, DATE('2018-11-09 00:00:00.000000000':VARCHAR))]) - LogicalFilter(condition=[>($0, DATE('2016-12-08 00:00:00.123456789':VARCHAR))]) - LogicalProject(yyyy-MM-dd=[$83]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) + LogicalFilter(condition=[AND(>($0, DATE('2016-12-08 00:00:00.123456789':VARCHAR)), <($0, DATE('2018-11-09 00:00:00.000000000':VARCHAR)))]) + LogicalProject(yyyy-MM-dd=[$83]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[yyyy-MM-dd], FILTER->SEARCH($0, Sarg[('2016-12-08':VARCHAR..'2018-11-09':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"yyyy-MM-dd":{"from":"2016-12-08","to":"2018-11-09","include_lower":false,"include_upper":false,"boost":1.0}}},"_source":{"includes":["yyyy-MM-dd"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml index e0a3fc8a7d3..954fab09d05 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml @@ -2,8 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) - LogicalFilter(condition=[<($3, TIMESTAMP('2018-11-09 00:00:00.000000000':VARCHAR))]) - LogicalFilter(condition=[>($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR))]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalFilter(condition=[AND(>($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR)), <($3, TIMESTAMP('2018-11-09 00:00:00.000000000':VARCHAR)))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($3, Sarg[('2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"birthdate":{"from":"2016-12-08T00:00:00.000Z","to":"2018-11-09T00:00:00.000Z","include_lower":false,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java index d31c25c57e9..7185c85aa1d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java @@ -183,29 +183,26 @@ public void testMultisearchWithStats() { + " LogicalAggregate(group=[{0}], count=[COUNT()])\n" + " LogicalProject(type=[$8])\n" + " LogicalUnion(all=[true])\n" - + " LogicalFilter(condition=[=($7, 10)])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], type=['accounting':VARCHAR])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n" - + " LogicalFilter(condition=[=($7, 20)])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], type=['research':VARCHAR])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - // SparkSQL reflects Filter above Project due to flush logic String expectedSparkSql = "SELECT COUNT(*) `count`, `type`\n" - + "FROM (SELECT *\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " 'accounting' `type`\n" - + "FROM `scott`.`EMP`) `t`\n" + + "FROM `scott`.`EMP`\n" + "WHERE `DEPTNO` = 10\n" + "UNION ALL\n" - + "SELECT *\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " 'research' `type`\n" - + "FROM `scott`.`EMP`) `t1`\n" + + "FROM `scott`.`EMP`\n" + "WHERE `DEPTNO` = 20) `t3`\n" + "GROUP BY `type`"; verifyPPLToSparkSQL(root, expectedSparkSql); From 19a82a03e0cc2be684f2f1fab825c0133de2647b Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 21 Nov 2025 01:39:31 -0800 Subject: [PATCH 04/12] fix explain tests and corresponding commands Signed-off-by: Jialiang Liang --- .../org/opensearch/sql/calcite/CalciteRelNodeVisitor.java | 5 +++++ .../calcite/explain_filter_push_compare_time_string.yaml | 7 +++---- .../calcite_no_pushdown/explain_filter_push.yaml | 6 ++---- .../explain_filter_push_compare_date_string.yaml | 7 +++---- .../explain_filter_push_compare_time_string.yaml | 7 +++---- .../explain_filter_push_compare_timestamp_string.yaml | 5 ++--- 6 files changed, 18 insertions(+), 19 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index b484e11cd8c..0b0527d3836 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -334,6 +334,8 @@ public RelNode visitRegex(Regex node, CalcitePlanContext context) { public RelNode visitRex(Rex node, CalcitePlanContext context) { visitChildren(node, context); + flushFiltersBeforeSchemaChange(context); + RexNode fieldRex = rexVisitor.analyze(node.getField(), context); String patternStr = (String) node.getPattern().getValue(); @@ -734,6 +736,8 @@ public RelNode visitReverse( public RelNode visitBin(Bin node, CalcitePlanContext context) { visitChildren(node, context); + flushFiltersBeforeSchemaChange(context); + RexNode fieldExpr = rexVisitor.analyze(node.getField(), context); String fieldName = BinUtils.extractFieldName(node); @@ -748,6 +752,7 @@ public RelNode visitBin(Bin node, CalcitePlanContext context) { @Override public RelNode visitParse(Parse node, CalcitePlanContext context) { visitChildren(node, context); + flushFiltersBeforeSchemaChange(context); buildParseRelNode(node, context); return context.relBuilder.peek(); } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml index faf6a3764c5..475bc85ecb2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml @@ -1,9 +1,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalFilter(condition=[<($0, TIME('2018-11-09 19:00:00.123456789':VARCHAR))]) - LogicalFilter(condition=[>($0, TIME('2016-12-08 12:00:00.123456789':VARCHAR))]) - LogicalProject(custom_time=[$49]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) + LogicalFilter(condition=[AND(>($0, TIME('2016-12-08 12:00:00.123456789':VARCHAR)), <($0, TIME('2018-11-09 19:00:00.123456789':VARCHAR)))]) + LogicalProject(custom_time=[$49]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[custom_time], FILTER->SEARCH($0, Sarg[('12:00:00.123456789':VARCHAR..'19:00:00.123456789':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"custom_time":{"from":"12:00:00.123456789","to":"19:00:00.123456789","include_lower":false,"include_upper":false,"boost":1.0}}},"_source":{"includes":["custom_time"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push.yaml index ff9e2ed0ec1..798a7bbc778 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push.yaml @@ -2,10 +2,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age=[$8]) - LogicalFilter(condition=[>($3, 10000)]) - LogicalFilter(condition=[<($8, 40)]) - LogicalFilter(condition=[>($8, 30)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalFilter(condition=[AND(>($8, 30), <($8, 40), >($3, 10000))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..16=[{inputs}], expr#17=[Sarg[(30..40)]], expr#18=[SEARCH($t8, $t17)], expr#19=[10000], expr#20=[>($t3, $t19)], expr#21=[AND($t18, $t20)], age=[$t8], $condition=[$t21]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_date_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_date_string.yaml index a8f52a8ac7e..f8fcc3a23a7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_date_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_date_string.yaml @@ -1,10 +1,9 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalFilter(condition=[<($0, DATE('2018-11-09 00:00:00.000000000':VARCHAR))]) - LogicalFilter(condition=[>($0, DATE('2016-12-08 00:00:00.123456789':VARCHAR))]) - LogicalProject(yyyy-MM-dd=[$83]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) + LogicalFilter(condition=[AND(>($0, DATE('2016-12-08 00:00:00.123456789':VARCHAR)), <($0, DATE('2018-11-09 00:00:00.000000000':VARCHAR)))]) + LogicalProject(yyyy-MM-dd=[$83]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..94=[{inputs}], expr#95=[Sarg[('2016-12-08':VARCHAR..'2018-11-09':VARCHAR)]:VARCHAR], expr#96=[SEARCH($t83, $t95)], yyyy-MM-dd=[$t83], $condition=[$t96]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_time_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_time_string.yaml index 72c738eaed0..4634cfaaa47 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_time_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_time_string.yaml @@ -1,10 +1,9 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalFilter(condition=[<($0, TIME('2018-11-09 19:00:00.123456789':VARCHAR))]) - LogicalFilter(condition=[>($0, TIME('2016-12-08 12:00:00.123456789':VARCHAR))]) - LogicalProject(custom_time=[$49]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) + LogicalFilter(condition=[AND(>($0, TIME('2016-12-08 12:00:00.123456789':VARCHAR)), <($0, TIME('2018-11-09 19:00:00.123456789':VARCHAR)))]) + LogicalProject(custom_time=[$49]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..94=[{inputs}], expr#95=[Sarg[('12:00:00.123456789':VARCHAR..'19:00:00.123456789':VARCHAR)]:VARCHAR], expr#96=[SEARCH($t49, $t95)], custom_time=[$t49], $condition=[$t96]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_timestamp_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_timestamp_string.yaml index 424444f8dc4..20f0cbf4238 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_timestamp_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_timestamp_string.yaml @@ -2,9 +2,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) - LogicalFilter(condition=[<($3, TIMESTAMP('2018-11-09 00:00:00.000000000':VARCHAR))]) - LogicalFilter(condition=[>($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR))]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalFilter(condition=[AND(>($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR)), <($3, TIMESTAMP('2018-11-09 00:00:00.000000000':VARCHAR)))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..18=[{inputs}], expr#19=[Sarg[('2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR], expr#20=[SEARCH($t3, $t19)], proj#0..12=[{exprs}], $condition=[$t20]) From 35c56c5e4a11c9301edd4436c0b0514c9c1884e3 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 21 Nov 2025 02:17:32 -0800 Subject: [PATCH 05/12] fix explain tests for testFilterPushDownExplain Signed-off-by: Jialiang Liang --- .../expectedOutput/calcite_no_pushdown/explain_filter_push.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push.yaml index 798a7bbc778..d1f0cead8f6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push.yaml @@ -2,7 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age=[$8]) - LogicalFilter(condition=[AND(>($8, 30), <($8, 40), >($3, 10000))]) + LogicalFilter(condition=[AND(SEARCH($8, Sarg[(30..40)]), >($3, 10000))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) From 3dfd44b476c21395f0a7a8e763be15435e5535ab Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 1 Dec 2025 12:02:21 -0800 Subject: [PATCH 06/12] peng - isolate the fix logic to its own visitor class Signed-off-by: Jialiang Liang --- .../sql/calcite/CalcitePlanContext.java | 55 ------- .../sql/calcite/CalciteRelNodeVisitor.java | 149 +----------------- .../sql/calcite/FilterMergeVisitor.java | 83 ++++++++++ 3 files changed, 91 insertions(+), 196 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/calcite/FilterMergeVisitor.java diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java index 9c5e4bee595..669d8452dc0 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java @@ -8,7 +8,6 @@ import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; import java.sql.Connection; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -62,12 +61,6 @@ public class CalcitePlanContext { @Getter public Map rexLambdaRefMap; - /** Accumulated filter conditions to prevent deep Filter node chains */ - private final List pendingFilterConditions = new ArrayList<>(); - - /** Flag to indicate if filter accumulation mode is active */ - @Getter @Setter private boolean filterAccumulationEnabled = false; - private CalcitePlanContext(FrameworkConfig config, SysLimit sysLimit, QueryType queryType) { this.config = config; this.sysLimit = sysLimit; @@ -141,52 +134,4 @@ public static boolean isLegacyPreferred() { public void putRexLambdaRefMap(Map candidateMap) { this.rexLambdaRefMap.putAll(candidateMap); } - - /** - * Adds a filter condition to the accumulation list instead of creating immediate Filter RelNode. - * This prevents deep Filter node chains that cause memory explosion. - */ - public void addFilterCondition(RexNode condition) { - pendingFilterConditions.add(condition); - } - - /** - * Applies all accumulated filter conditions as a single Filter RelNode with AND operations. This - * creates a single Filter node instead of a deep chain of Filter nodes. - */ - public void flushFilterConditions() { - if (pendingFilterConditions.isEmpty()) { - return; - } - - if (pendingFilterConditions.size() == 1) { - relBuilder.filter(pendingFilterConditions.get(0)); - } else { - // Combine all filter conditions with AND - RexNode combinedCondition = relBuilder.and(pendingFilterConditions); - relBuilder.filter(combinedCondition); - } - pendingFilterConditions.clear(); - } - - /** - * Enables filter accumulation mode to prevent deep Filter node chains. Should be called before - * processing multiple filter operations. - */ - public void enableFilterAccumulation() { - filterAccumulationEnabled = true; - } - - /** - * Disables filter accumulation mode. Should be called after processing multiple filter - * operations. - */ - public void disableFilterAccumulation() { - filterAccumulationEnabled = false; - } - - /** Returns true if there are pending filter conditions that need to be flushed. */ - public boolean hasPendingFilterConditions() { - return !pendingFilterConditions.isEmpty(); - } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 0b0527d3836..c35599f53da 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -176,31 +176,13 @@ public CalciteRelNodeVisitor(DataSourceService dataSourceService) { } public RelNode analyze(UnresolvedPlan unresolved, CalcitePlanContext context) { - // Enable filter accumulation if this plan contains multiple filtering operations - // that could create deep Filter RelNode chains - if (countFilteringOperations(unresolved) >= 2) { - context.enableFilterAccumulation(); - try { - unresolved.accept(this, context); - context.flushFilterConditions(); - return context.relBuilder.peek(); - } finally { - context.disableFilterAccumulation(); - } - } else { - return unresolved.accept(this, context); - } - } + // Build the RelNode tree (may contain deep Filter chains) + RelNode relNode = unresolved.accept(this, context); - /** - * Flushes accumulated filter conditions before schema-changing operations. This prevents - * RexInputRef index mismatches that occur when filters reference field indices from the old - * schema. - */ - private void flushFiltersBeforeSchemaChange(CalcitePlanContext context) { - if (context.isFilterAccumulationEnabled() && context.hasPendingFilterConditions()) { - context.flushFilterConditions(); - } + // Apply filter merge optimization as post-processing + // This merges consecutive LogicalFilter nodes to prevent OOM with deep chains + FilterMergeVisitor filterMergeVisitor = new FilterMergeVisitor(); + return relNode.accept(filterMergeVisitor); } @Override @@ -268,12 +250,7 @@ public RelNode visitFilter(Filter node, CalcitePlanContext context) { context.relBuilder.filter(ImmutableList.of(v.get().id), condition); context.popCorrelVar(); } else { - // Use filter accumulation to prevent deep Filter node chains - if (context.isFilterAccumulationEnabled()) { - context.addFilterCondition(condition); - } else { - context.relBuilder.filter(condition); - } + context.relBuilder.filter(condition); } return context.relBuilder.peek(); } @@ -322,20 +299,13 @@ public RelNode visitRegex(Regex node, CalcitePlanContext context) { regexCondition = context.rexBuilder.makeCall(SqlStdOperatorTable.NOT, regexCondition); } - // Use filter accumulation to prevent deep Filter node chains - if (context.isFilterAccumulationEnabled()) { - context.addFilterCondition(regexCondition); - } else { - context.relBuilder.filter(regexCondition); - } + context.relBuilder.filter(regexCondition); return context.relBuilder.peek(); } public RelNode visitRex(Rex node, CalcitePlanContext context) { visitChildren(node, context); - flushFiltersBeforeSchemaChange(context); - RexNode fieldRex = rexVisitor.analyze(node.getField(), context); String patternStr = (String) node.getPattern().getValue(); @@ -420,8 +390,6 @@ private boolean containsSubqueryExpression(Node expr) { public RelNode visitProject(Project node, CalcitePlanContext context) { visitChildren(node, context); - flushFiltersBeforeSchemaChange(context); - if (isSingleAllFieldsProject(node)) { return handleAllFieldsProject(node, context); } @@ -736,8 +704,6 @@ public RelNode visitReverse( public RelNode visitBin(Bin node, CalcitePlanContext context) { visitChildren(node, context); - flushFiltersBeforeSchemaChange(context); - RexNode fieldExpr = rexVisitor.analyze(node.getField(), context); String fieldName = BinUtils.extractFieldName(node); @@ -752,7 +718,6 @@ public RelNode visitBin(Bin node, CalcitePlanContext context) { @Override public RelNode visitParse(Parse node, CalcitePlanContext context) { visitChildren(node, context); - flushFiltersBeforeSchemaChange(context); buildParseRelNode(node, context); return context.relBuilder.peek(); } @@ -900,8 +865,6 @@ public RelNode visitPatterns(Patterns node, CalcitePlanContext context) { public RelNode visitEval(Eval node, CalcitePlanContext context) { visitChildren(node, context); - flushFiltersBeforeSchemaChange(context); - node.getExpressionList() .forEach( expr -> { @@ -1171,9 +1134,6 @@ private Pair, List> resolveAttributesForAggregation( /** Visits an aggregation for stats command */ @Override public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { - // Flush accumulated filter conditions before schema-changing aggregation operations - flushFiltersBeforeSchemaChange(context); - Argument.ArgumentMap statsArgs = Argument.ArgumentMap.of(node.getArgExprList()); Boolean bucketNullable = (Boolean) statsArgs.get(Argument.BUCKET_NULLABLE).getValue(); int nGroup = node.getGroupExprList().size() + (Objects.nonNull(node.getSpan()) ? 1 : 0); @@ -2292,26 +2252,11 @@ private RelNode mergeTableAndResolveColumnConflict( @Override public RelNode visitMultisearch(Multisearch node, CalcitePlanContext context) { List subsearchNodes = new ArrayList<>(); - // Save the current filter accumulation state - we'll process each subsearch independently - boolean wasFilterAccumulationEnabled = context.isFilterAccumulationEnabled(); for (UnresolvedPlan subsearch : node.getSubsearches()) { UnresolvedPlan prunedSubSearch = subsearch.accept(new EmptySourcePropagateVisitor(), null); - - // Temporarily disable filter accumulation so each subsearch gets its own independent - // lifecycle via analyze(). This prevents filter state from bleeding across branches. - if (wasFilterAccumulationEnabled) { - context.disableFilterAccumulation(); - } - - // Use analyze() to let each subsearch determine its own filter accumulation needs analyze(prunedSubSearch, context); subsearchNodes.add(context.relBuilder.build()); - - // Restore filter accumulation state for the next iteration - if (wasFilterAccumulationEnabled) { - context.enableFilterAccumulation(); - } } // Use shared schema merging logic that handles type conflicts via field renaming @@ -3302,82 +3247,4 @@ private RexNode createOptimizedTransliteration( throw new RuntimeException("Failed to optimize sed expression: " + sedExpression, e); } } - - /** - * Counts the number of filtering operations in an UnresolvedPlan tree that would create Filter - * RelNodes. This is used to detect queries with multiple regex/filter operations that could cause - * deep Filter RelNode chains and memory exhaustion. - * - *

Stops counting at schema-changing operations (like Aggregation, Project with computed - * expressions) to avoid enabling filter accumulation across schema boundaries, which would cause - * RexInputRef index mismatches. - * - * @param plan the UnresolvedPlan to analyze - * @return the count of filtering operations found before the first schema-changing operation - */ - private int countFilteringOperations(UnresolvedPlan plan) { - if (plan == null) { - return 0; - } - - int count = 0; - - // Count this node if it's a filtering operation - // BUT: Don't count Filter nodes that contain function calls, as they can cause - // type mismatches when accumulated and flushed later - if (plan instanceof Regex) { - count = 1; - } else if (plan instanceof Filter) { - Filter filterNode = (Filter) plan; - if (!containsFunctionCall(filterNode.getCondition())) { - count = 1; - } - } - - // Stop counting at schema-changing operations to prevent accumulation across schema boundaries - // Schema-changing operations include: Aggregation, Eval, Project (with computed expressions), - // Window, StreamWindow, etc. - if (plan instanceof Aggregation - || plan instanceof Eval - || plan instanceof Window - || plan instanceof StreamWindow) { - return count; // Don't recurse into children beyond schema changes - } - - // Recursively count filtering operations in children - if (plan.getChild() != null) { - for (Node child : plan.getChild()) { - if (child instanceof UnresolvedPlan) { - count += countFilteringOperations((UnresolvedPlan) child); - } - } - } - - return count; - } - - /** - * Checks if an expression contains any function calls. Filter expressions with function calls can - * cause type mismatches when accumulated and flushed later, so we exclude them from filter - * accumulation. - */ - private boolean containsFunctionCall(UnresolvedExpression expr) { - if (expr == null) { - return false; - } - - if (expr instanceof org.opensearch.sql.ast.expression.Function) { - return true; - } - - // Check children recursively - for (Node child : expr.getChild()) { - if (child instanceof UnresolvedExpression - && containsFunctionCall((UnresolvedExpression) child)) { - return true; - } - } - - return false; - } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/FilterMergeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/FilterMergeVisitor.java new file mode 100644 index 00000000000..f698ea7d444 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/FilterMergeVisitor.java @@ -0,0 +1,83 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite; + +import java.util.ArrayList; +import java.util.List; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttleImpl; +import org.apache.calcite.rel.logical.LogicalFilter; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; + +/** + * A RelNode visitor that merges consecutive LogicalFilter nodes into a single filter with combined + * AND conditions. This prevents deep Filter RelNode chains that cause memory exhaustion (OOM) with + * multiple filter operations. + * + *

Example transformation: + * + *

+ * BEFORE:
+ *   LogicalFilter(age > 30)
+ *     LogicalFilter(age < 40)
+ *       LogicalFilter(balance > 10000)
+ *         TableScan
+ *
+ * AFTER:
+ *   LogicalFilter(AND(age > 30, age < 40, balance > 10000))
+ *     TableScan
+ * 
+ * + * This is a post-processing optimization that runs after the RelNode tree is constructed by + * CalciteRelNodeVisitor. + */ +public class FilterMergeVisitor extends RelShuttleImpl { + + /** + * Visits a LogicalFilter node and merges it with consecutive child LogicalFilter nodes. + * + * @param filter the LogicalFilter node to visit + * @return the merged filter or the original filter if no merging is needed + */ + @Override + public RelNode visit(LogicalFilter filter) { + RelNode newInput = filter.getInput().accept(this); + + List conditions = new ArrayList<>(); + conditions.add(filter.getCondition()); + + RelNode current = newInput; + while (current instanceof LogicalFilter) { + LogicalFilter childFilter = (LogicalFilter) current; + conditions.add(childFilter.getCondition()); + current = childFilter.getInput(); + } + + // If we collected multiple conditions, merge them + if (conditions.size() > 1) { + RelOptCluster cluster = filter.getCluster(); + RexBuilder rexBuilder = cluster.getRexBuilder(); + + // Combine all conditions with AND + RexNode combinedCondition = + rexBuilder.makeCall(org.apache.calcite.sql.fun.SqlStdOperatorTable.AND, conditions); + + // Simplify the combined condition (e.g., remove redundant TRUE, optimize) + combinedCondition = org.apache.calcite.rex.RexUtil.simplify(rexBuilder, combinedCondition); + + // Create a new filter with the combined condition + return LogicalFilter.create(current, combinedCondition); + } + + if (newInput != filter.getInput()) { + return filter.copy(filter.getTraitSet(), newInput, filter.getCondition()); + } + + return filter; + } +} From ad43837802ace79eaff2499580b859967db4ef0e Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 1 Dec 2025 14:08:51 -0800 Subject: [PATCH 07/12] Directly apply Calcite CoreRules.FILTER_MERGE before VolcanoPlanner plan Co-authored-by: Peng Huo Signed-off-by: Jialiang Liang --- .../sql/calcite/CalciteRelNodeVisitor.java | 8 +- .../sql/calcite/FilterMergeVisitor.java | 83 ------------------- .../opensearch/sql/executor/QueryService.java | 18 ++++ 3 files changed, 19 insertions(+), 90 deletions(-) delete mode 100644 core/src/main/java/org/opensearch/sql/calcite/FilterMergeVisitor.java diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index c35599f53da..0316f2b37b4 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -176,13 +176,7 @@ public CalciteRelNodeVisitor(DataSourceService dataSourceService) { } public RelNode analyze(UnresolvedPlan unresolved, CalcitePlanContext context) { - // Build the RelNode tree (may contain deep Filter chains) - RelNode relNode = unresolved.accept(this, context); - - // Apply filter merge optimization as post-processing - // This merges consecutive LogicalFilter nodes to prevent OOM with deep chains - FilterMergeVisitor filterMergeVisitor = new FilterMergeVisitor(); - return relNode.accept(filterMergeVisitor); + return unresolved.accept(this, context); } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/FilterMergeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/FilterMergeVisitor.java deleted file mode 100644 index f698ea7d444..00000000000 --- a/core/src/main/java/org/opensearch/sql/calcite/FilterMergeVisitor.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.calcite; - -import java.util.ArrayList; -import java.util.List; -import org.apache.calcite.plan.RelOptCluster; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.RelShuttleImpl; -import org.apache.calcite.rel.logical.LogicalFilter; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexNode; - -/** - * A RelNode visitor that merges consecutive LogicalFilter nodes into a single filter with combined - * AND conditions. This prevents deep Filter RelNode chains that cause memory exhaustion (OOM) with - * multiple filter operations. - * - *

Example transformation: - * - *

- * BEFORE:
- *   LogicalFilter(age > 30)
- *     LogicalFilter(age < 40)
- *       LogicalFilter(balance > 10000)
- *         TableScan
- *
- * AFTER:
- *   LogicalFilter(AND(age > 30, age < 40, balance > 10000))
- *     TableScan
- * 
- * - * This is a post-processing optimization that runs after the RelNode tree is constructed by - * CalciteRelNodeVisitor. - */ -public class FilterMergeVisitor extends RelShuttleImpl { - - /** - * Visits a LogicalFilter node and merges it with consecutive child LogicalFilter nodes. - * - * @param filter the LogicalFilter node to visit - * @return the merged filter or the original filter if no merging is needed - */ - @Override - public RelNode visit(LogicalFilter filter) { - RelNode newInput = filter.getInput().accept(this); - - List conditions = new ArrayList<>(); - conditions.add(filter.getCondition()); - - RelNode current = newInput; - while (current instanceof LogicalFilter) { - LogicalFilter childFilter = (LogicalFilter) current; - conditions.add(childFilter.getCondition()); - current = childFilter.getInput(); - } - - // If we collected multiple conditions, merge them - if (conditions.size() > 1) { - RelOptCluster cluster = filter.getCluster(); - RexBuilder rexBuilder = cluster.getRexBuilder(); - - // Combine all conditions with AND - RexNode combinedCondition = - rexBuilder.makeCall(org.apache.calcite.sql.fun.SqlStdOperatorTable.AND, conditions); - - // Simplify the combined condition (e.g., remove redundant TRUE, optimize) - combinedCondition = org.apache.calcite.rex.RexUtil.simplify(rexBuilder, combinedCondition); - - // Create a new filter with the combined condition - return LogicalFilter.create(current, combinedCondition); - } - - if (newInput != filter.getInput()) { - return filter.copy(filter.getTraitSet(), newInput, filter.getCondition()); - } - - return filter; - } -} diff --git a/core/src/main/java/org/opensearch/sql/executor/QueryService.java b/core/src/main/java/org/opensearch/sql/executor/QueryService.java index e4a6c5da21c..37ffe739af9 100644 --- a/core/src/main/java/org/opensearch/sql/executor/QueryService.java +++ b/core/src/main/java/org/opensearch/sql/executor/QueryService.java @@ -16,11 +16,15 @@ import lombok.extern.log4j.Log4j2; import org.apache.calcite.jdbc.CalciteSchema; import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.plan.hep.HepProgramBuilder; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.rel.rules.FilterMergeRule; import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.tools.FrameworkConfig; @@ -100,6 +104,7 @@ public void executeWithCalcite( CalcitePlanContext.create( buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); RelNode relNode = analyze(plan, context); + relNode = mergeAdjacentFilters(relNode); RelNode optimized = optimize(relNode, context); RelNode calcitePlan = convertToCalcitePlan(optimized); executionEngine.execute(calcitePlan, context, listener); @@ -145,6 +150,7 @@ public void explainWithCalcite( context.run( () -> { RelNode relNode = analyze(plan, context); + relNode = mergeAdjacentFilters(relNode); RelNode optimized = optimize(relNode, context); RelNode calcitePlan = convertToCalcitePlan(optimized); executionEngine.explain(calcitePlan, format, context, listener); @@ -259,6 +265,18 @@ public RelNode analyze(UnresolvedPlan plan, CalcitePlanContext context) { return getRelNodeVisitor().analyze(plan, context); } + /** + * Run Calcite FILTER_MERGE once so adjacent filters created during analysis can collapse before + * the rest of optimization. + */ + private RelNode mergeAdjacentFilters(RelNode relNode) { + HepProgram program = + new HepProgramBuilder().addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()).build(); + HepPlanner planner = new HepPlanner(program); + planner.setRoot(relNode); + return planner.findBestExp(); + } + /** Analyze {@link UnresolvedPlan}. */ public LogicalPlan analyze(UnresolvedPlan plan, QueryType queryType) { return analyzer.analyze(plan, new AnalysisContext(queryType)); From ae78fdadde08999bd8a04ab44bdd49ea744ee0d0 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 1 Dec 2025 14:30:21 -0800 Subject: [PATCH 08/12] fix the UTs Signed-off-by: Jialiang Liang --- .../sql/ppl/calcite/CalcitePPLAbstractTest.java | 13 +++++++++++++ .../sql/ppl/calcite/CalcitePPLTrendlineTest.java | 9 +++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java index 9dd01b30df5..ab07cd9b5c1 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java @@ -22,9 +22,13 @@ import lombok.Getter; import org.apache.calcite.plan.Contexts; import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.plan.hep.HepProgramBuilder; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.rel2sql.RelToSqlConverter; import org.apache.calcite.rel.rel2sql.SqlImplementor; +import org.apache.calcite.rel.rules.FilterMergeRule; import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.parser.SqlParser; @@ -101,10 +105,19 @@ public RelNode getRelNode(String ppl) { Query query = (Query) plan(pplParser, ppl); planTransformer.analyze(query.getPlan(), context); RelNode root = context.relBuilder.build(); + root = mergeAdjacentFilters(root); System.out.println(root.explain()); return root; } + private RelNode mergeAdjacentFilters(RelNode relNode) { + HepProgram program = + new HepProgramBuilder().addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()).build(); + HepPlanner planner = new HepPlanner(program); + planner.setRoot(relNode); + return planner.findBestExp(); + } + private Node plan(PPLSyntaxParser parser, String query) { final AstStatementBuilder builder = new AstStatementBuilder( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java index b036a4b5906..3c23af4b7a6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java @@ -76,9 +76,8 @@ public void testTrendlineMultipleFields() { + " DEPTNO_trendline=[CASE(>(COUNT() OVER (ROWS 1 PRECEDING), 1), /(SUM($7) OVER (ROWS" + " 1 PRECEDING), CAST(COUNT($7) OVER (ROWS 1 PRECEDING)):DOUBLE NOT NULL)," + " null:NULL)])\n" - + " LogicalFilter(condition=[IS NOT NULL($7)])\n" - + " LogicalFilter(condition=[IS NOT NULL($5)])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + + " LogicalFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($7))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = @@ -89,10 +88,8 @@ public void testTrendlineMultipleFields() { + " BETWEEN 1 PRECEDING AND CURRENT ROW)) > 1 THEN (SUM(`DEPTNO`) OVER (ROWS BETWEEN 1" + " PRECEDING AND CURRENT ROW)) / CAST(COUNT(`DEPTNO`) OVER (ROWS BETWEEN 1 PRECEDING" + " AND CURRENT ROW) AS DOUBLE) ELSE NULL END `DEPTNO_trendline`\n" - + "FROM (SELECT *\n" + "FROM `scott`.`EMP`\n" - + "WHERE `SAL` IS NOT NULL) `t`\n" - + "WHERE `DEPTNO` IS NOT NULL"; + + "WHERE `SAL` IS NOT NULL AND `DEPTNO` IS NOT NULL"; verifyPPLToSparkSQL(root, expectedSparkSql); } } From 5e9ce99db74895314861a2897e8f5a143d784a60 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 1 Dec 2025 15:12:14 -0800 Subject: [PATCH 09/12] fix the ITs after rebase Signed-off-by: Jialiang Liang --- .../calcite/big5/composite_date_histogram_daily.yaml | 5 ++--- .../expectedOutput/calcite/big5/composite_terms.yaml | 5 ++--- .../expectedOutput/calcite/big5/composite_terms_keyword.yaml | 5 ++--- .../calcite/big5/date_histogram_minute_agg.yaml | 5 ++--- .../expectedOutput/calcite/big5/keyword_in_range.yaml | 5 ++--- .../expectedOutput/calcite/big5/multi_terms_keyword.yaml | 5 ++--- .../expectedOutput/calcite/explain_filter_with_search.yaml | 5 ++--- 7 files changed, 14 insertions(+), 21 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml index 10023133a38..0a6cd247367 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(count()=[$1], span(`@timestamp`,1d)=[$0]) LogicalAggregate(group=[{0}], count()=[COUNT()]) LogicalProject(span(`@timestamp`,1d)=[SPAN($17, 1, 'd')]) - LogicalFilter(condition=[IS NOT NULL($17)]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)), IS NOT NULL($17))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml index cc3af323ddf..1b6fc72198d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(count()=[$2], process.name=[$0], cloud.region=[$1]) LogicalAggregate(group=[{0, 1}], count()=[COUNT()]) LogicalProject(process.name=[$7], cloud.region=[$14]) - LogicalFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($14))]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)), IS NOT NULL($7), IS NOT NULL($14))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml index 9e546a26dbf..8885be7967d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(count()=[$3], process.name=[$0], cloud.region=[$1], aws.cloudwatch.log_stream=[$2]) LogicalAggregate(group=[{0, 1, 2}], count()=[COUNT()]) LogicalProject(process.name=[$7], cloud.region=[$14], aws.cloudwatch.log_stream=[$34]) - LogicalFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($14), IS NOT NULL($34))]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)), IS NOT NULL($7), IS NOT NULL($14), IS NOT NULL($34))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp, aws.cloudwatch.log_stream], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml index 44b15522967..ef9698b93e3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml @@ -4,8 +4,7 @@ calcite: LogicalProject(count()=[$1], span(`@timestamp`,1m)=[$0]) LogicalAggregate(group=[{0}], count()=[COUNT()]) LogicalProject(span(`@timestamp`,1m)=[SPAN($17, 1, 'm')]) - LogicalFilter(condition=[IS NOT NULL($17)]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)), IS NOT NULL($17))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml index 85c08cf100c..e1f0873de61 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml @@ -3,8 +3,7 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(agent=[$0], process=[$6], log=[$8], message=[$11], tags=[$12], cloud=[$13], input=[$15], @timestamp=[$17], ecs=[$18], data_stream=[$20], meta=[$24], host=[$26], metrics=[$27], aws=[$30], event=[$35]) LogicalSort(fetch=[10]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) - LogicalFilter(condition=[query_string(MAP('query', 'process.name:kernel':VARCHAR))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalFilter(condition=[AND(query_string(MAP('query', 'process.name:kernel':VARCHAR)), >=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->AND(query_string(MAP('query', 'process.name:kernel':VARCHAR)), SEARCH($7, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR)), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml index 89708ca4d4b..0470fe7872d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(count()=[$2], process.name=[$0], cloud.region=[$1]) LogicalAggregate(group=[{0, 1}], count()=[COUNT()]) LogicalProject(process.name=[$7], cloud.region=[$14]) - LogicalFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($14))]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-05 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-05 05:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-05 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-05 05:00:00':VARCHAR)), IS NOT NULL($7), IS NOT NULL($14))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml index bd8114a7989..3b779911368 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml @@ -4,8 +4,7 @@ calcite: LogicalProject(count()=[$1], span(birthdate,1d)=[$0]) LogicalAggregate(group=[{0}], count()=[COUNT()]) LogicalProject(span(birthdate,1d)=[SPAN($3, 1, 'd')]) - LogicalFilter(condition=[IS NOT NULL($3)]) - LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($3, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($3, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)), IS NOT NULL($3))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file From 3dc994b87ed27ccd54115343602bc182ce1f0c2b Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 1 Dec 2025 17:16:36 -0800 Subject: [PATCH 10/12] fix clickbench IT and more ITs Signed-off-by: Jialiang Liang --- .../calcite/big5/composite_date_histogram_daily.yaml | 4 ++-- .../expectedOutput/calcite/big5/composite_terms.yaml | 2 +- .../calcite/big5/composite_terms_keyword.yaml | 2 +- .../calcite/big5/date_histogram_minute_agg.yaml | 4 ++-- .../expectedOutput/calcite/big5/multi_terms_keyword.yaml | 2 +- .../resources/expectedOutput/calcite/clickbench/q11.yaml | 5 ++--- .../resources/expectedOutput/calcite/clickbench/q12.yaml | 5 ++--- .../resources/expectedOutput/calcite/clickbench/q13.yaml | 5 ++--- .../resources/expectedOutput/calcite/clickbench/q14.yaml | 5 ++--- .../resources/expectedOutput/calcite/clickbench/q15.yaml | 5 ++--- .../resources/expectedOutput/calcite/clickbench/q22.yaml | 5 ++--- .../resources/expectedOutput/calcite/clickbench/q23.yaml | 5 ++--- .../resources/expectedOutput/calcite/clickbench/q28.yaml | 5 ++--- .../resources/expectedOutput/calcite/clickbench/q31.yaml | 7 +++---- .../resources/expectedOutput/calcite/clickbench/q32.yaml | 7 +++---- .../resources/expectedOutput/calcite/clickbench/q37.yaml | 5 ++--- .../resources/expectedOutput/calcite/clickbench/q38.yaml | 5 ++--- .../resources/expectedOutput/calcite/clickbench/q39.yaml | 7 +++---- .../resources/expectedOutput/calcite/clickbench/q41.yaml | 7 +++---- .../resources/expectedOutput/calcite/clickbench/q42.yaml | 7 +++---- .../resources/expectedOutput/calcite/clickbench/q43.yaml | 7 +++---- .../resources/expectedOutput/calcite/clickbench/q8.yaml | 5 ++--- .../expectedOutput/calcite/explain_filter_with_search.yaml | 4 ++-- 23 files changed, 49 insertions(+), 66 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml index 0a6cd247367..56dec15223d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml @@ -5,7 +5,7 @@ calcite: LogicalProject(count()=[$1], span(`@timestamp`,1d)=[$0]) LogicalAggregate(group=[{0}], count()=[COUNT()]) LogicalProject(span(`@timestamp`,1d)=[SPAN($17, 1, 'd')]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)), IS NOT NULL($17))]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml index 1b6fc72198d..6d3ef26ee3e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)), IS NOT NULL($7), IS NOT NULL($14))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml index 8885be7967d..2b48025c015 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)), IS NOT NULL($7), IS NOT NULL($14), IS NOT NULL($34))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp, aws.cloudwatch.log_stream], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp, aws.cloudwatch.log_stream], FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}},{"exists":{"field":"aws.cloudwatch.log_stream","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["process.name","cloud.region","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml index ef9698b93e3..3d5639f94c0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml @@ -4,7 +4,7 @@ calcite: LogicalProject(count()=[$1], span(`@timestamp`,1m)=[$0]) LogicalAggregate(group=[{0}], count()=[COUNT()]) LogicalProject(span(`@timestamp`,1m)=[SPAN($17, 1, 'm')]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)), IS NOT NULL($17))]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml index 0470fe7872d..5659aca68dd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-05 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-05 05:00:00':VARCHAR)), IS NOT NULL($7), IS NOT NULL($14))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->AND(SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml index f21f57a583e..0f137b73605 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(u=[$1], MobilePhoneModel=[$0]) LogicalAggregate(group=[{0}], u=[COUNT(DISTINCT $1)]) LogicalProject(MobilePhoneModel=[$31], UserID=[$84]) - LogicalFilter(condition=[IS NOT NULL($31)]) - LogicalFilter(condition=[<>($31, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[<>($31, '')]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($31, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[u, MobilePhoneModel], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel":{"terms":{"field":"MobilePhoneModel","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml index 9164e61b3e8..6b47ac718b6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(u=[$2], MobilePhone=[$0], MobilePhoneModel=[$1]) LogicalAggregate(group=[{0, 1}], u=[COUNT(DISTINCT $2)]) LogicalProject(MobilePhone=[$62], MobilePhoneModel=[$31], UserID=[$84]) - LogicalFilter(condition=[AND(IS NOT NULL($62), IS NOT NULL($31))]) - LogicalFilter(condition=[<>($31, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(<>($31, ''), IS NOT NULL($62))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[MobilePhoneModel, MobilePhone, UserID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},u=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[u, MobilePhone, MobilePhoneModel], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"MobilePhone","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["MobilePhoneModel","MobilePhone","UserID"],"excludes":[]},"aggregations":{"MobilePhone|MobilePhoneModel":{"multi_terms":{"terms":[{"field":"MobilePhone"},{"field":"MobilePhoneModel"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml index 8c7797a4ad7..3c2c0f9dfb5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(c=[$1], SearchPhrase=[$0]) LogicalAggregate(group=[{0}], c=[COUNT()]) LogicalProject(SearchPhrase=[$63]) - LogicalFilter(condition=[IS NOT NULL($63)]) - LogicalFilter(condition=[<>($63, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[<>($63, '')]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($63, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml index aa980934e37..c3678470be8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(u=[$1], SearchPhrase=[$0]) LogicalAggregate(group=[{0}], u=[COUNT(DISTINCT $1)]) LogicalProject(SearchPhrase=[$63], UserID=[$84]) - LogicalFilter(condition=[IS NOT NULL($63)]) - LogicalFilter(condition=[<>($63, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[<>($63, '')]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($63, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[u, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml index 3b0000ec80a..7218a23bc29 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(c=[$2], SearchEngineID=[$0], SearchPhrase=[$1]) LogicalAggregate(group=[{0, 1}], c=[COUNT()]) LogicalProject(SearchEngineID=[$65], SearchPhrase=[$63]) - LogicalFilter(condition=[AND(IS NOT NULL($65), IS NOT NULL($63))]) - LogicalFilter(condition=[<>($63, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(<>($63, ''), IS NOT NULL($65))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, SearchEngineID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID"],"excludes":[]},"aggregations":{"SearchEngineID|SearchPhrase":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml index 513568e063b..a4cc2248e1e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(c=[$1], SearchPhrase=[$0]) LogicalAggregate(group=[{0}], c=[COUNT()]) LogicalProject(SearchPhrase=[$63]) - LogicalFilter(condition=[IS NOT NULL($63)]) - LogicalFilter(condition=[AND(LIKE($26, '%google%', '\'), <>($63, ''))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(LIKE($26, '%google%', '\'), <>($63, ''))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase], FILTER->AND(LIKE($0, '%google%', '\'), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase"],"excludes":[]},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml index d38438ad4e9..5ec734b13ea 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(c=[$1], dc(UserID)=[$2], SearchPhrase=[$0]) LogicalAggregate(group=[{0}], c=[COUNT()], dc(UserID)=[COUNT(DISTINCT $1)]) LogicalProject(SearchPhrase=[$63], UserID=[$84]) - LogicalFilter(condition=[IS NOT NULL($63)]) - LogicalFilter(condition=[AND(LIKE($97, '%Google%', '\'), <>($63, ''), NOT(LIKE($26, '%.google.%', '\')))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(LIKE($97, '%Google%', '\'), <>($63, ''), NOT(LIKE($26, '%.google.%', '\')))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase, UserID, Title], FILTER->AND(LIKE($3, '%Google%', '\'), <>($1, ''), NOT(LIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT(),dc(UserID)=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, dc(UserID), SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase","UserID","Title"],"excludes":[]},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml index daa53e1c368..2bf72da9393 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml @@ -6,9 +6,8 @@ calcite: LogicalProject(l=[$1], c=[$2], CounterID=[$0]) LogicalAggregate(group=[{0}], l=[AVG($1)], c=[COUNT()]) LogicalProject(CounterID=[$103], $f2=[CHAR_LENGTH($26)]) - LogicalFilter(condition=[IS NOT NULL($103)]) - LogicalFilter(condition=[<>($26, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(<>($26, ''), IS NOT NULL($103))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(fetch=[25]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml index 0f9c18436cd..b6a513d31ee 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(c=[$2], sum(IsRefresh)=[$3], avg(ResolutionWidth)=[$4], SearchEngineID=[$0], ClientIP=[$1]) LogicalAggregate(group=[{0, 1}], c=[COUNT()], sum(IsRefresh)=[SUM($2)], avg(ResolutionWidth)=[AVG($3)]) LogicalProject(SearchEngineID=[$65], ClientIP=[$76], IsRefresh=[$72], ResolutionWidth=[$80]) - LogicalFilter(condition=[AND(IS NOT NULL($65), IS NOT NULL($76))]) - LogicalFilter(condition=[<>($63, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(<>($63, ''), IS NOT NULL($65), IS NOT NULL($76))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID, IsRefresh, ClientIP, ResolutionWidth], FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($0, ''), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml index 6016bd287c6..e20758eed71 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(c=[$2], sum(IsRefresh)=[$3], avg(ResolutionWidth)=[$4], WatchID=[$0], ClientIP=[$1]) LogicalAggregate(group=[{0, 1}], c=[COUNT()], sum(IsRefresh)=[SUM($2)], avg(ResolutionWidth)=[AVG($3)]) LogicalProject(WatchID=[$41], ClientIP=[$76], IsRefresh=[$72], ResolutionWidth=[$80]) - LogicalFilter(condition=[AND(IS NOT NULL($41), IS NOT NULL($76))]) - LogicalFilter(condition=[<>($63, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(<>($63, ''), IS NOT NULL($41), IS NOT NULL($76))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[WatchID, SearchPhrase, IsRefresh, ClientIP, ResolutionWidth], FILTER-><>($1, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["WatchID","SearchPhrase","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[WatchID, SearchPhrase, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($1, ''), IS NOT NULL($0), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"WatchID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["WatchID","SearchPhrase","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml index ffe16fc8444..cea77806c98 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(PageViews=[$1], URL=[$0]) LogicalAggregate(group=[{0}], PageViews=[COUNT()]) LogicalProject(URL=[$26]) - LogicalFilter(condition=[IS NOT NULL($26)]) - LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($26, ''))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($26, ''))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URL, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($2, 0), =($3, 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URL","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml index 97b0fd3db2f..711cf5bc29e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(PageViews=[$1], Title=[$0]) LogicalAggregate(group=[{0}], PageViews=[COUNT()]) LogicalProject(Title=[$97]) - LogicalFilter(condition=[IS NOT NULL($97)]) - LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($97, ''))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($97, ''))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, DontCountHits, IsRefresh, Title, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($1, 0), =($2, 0), <>($3, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, Title], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"Title","boost":1.0}}],"must_not":[{"term":{"Title":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","DontCountHits","IsRefresh","Title","CounterID"],"excludes":[]},"aggregations":{"Title":{"terms":{"field":"Title","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml index 8b26cbbb03b..08ea4e0a45b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml @@ -5,10 +5,9 @@ calcite: LogicalProject(PageViews=[$1], URL=[$0]) LogicalAggregate(group=[{0}], PageViews=[COUNT()]) LogicalProject(URL=[$26]) - LogicalFilter(condition=[IS NOT NULL($26)]) - LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0), <>($49, 0), =($35, 0))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0), <>($49, 0), =($35, 0), IS NOT NULL($26))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URL, IsDownload, IsLink, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), <>($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URL","IsDownload","IsLink","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URL, IsDownload, IsLink, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), <>($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}},{"exists":{"field":"URL","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URL","IsDownload","IsLink","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml index 8c8a2359f51..cb31cbd45db 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml @@ -5,10 +5,9 @@ calcite: LogicalProject(PageViews=[$2], URLHash=[$0], EventDate=[$1]) LogicalAggregate(group=[{0, 1}], PageViews=[COUNT()]) LogicalProject(URLHash=[$25], EventDate=[$0]) - LogicalFilter(condition=[AND(IS NOT NULL($25), IS NOT NULL($0))]) - LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0), SEARCH($12, Sarg[-1, 6]), =($11, 3594120000172545465))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0), SEARCH($12, Sarg[-1, 6]), =($11, 3594120000172545465), IS NOT NULL($25))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[100], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, RefererHash, TraficSourceID, URLHash, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","RefererHash","TraficSourceID","URLHash","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URLHash|EventDate":{"multi_terms":{"terms":[{"field":"URLHash"},{"field":"EventDate","value_type":"long"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, RefererHash, TraficSourceID, URLHash, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}},{"exists":{"field":"URLHash","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","RefererHash","TraficSourceID","URLHash","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URLHash|EventDate":{"multi_terms":{"terms":[{"field":"URLHash"},{"field":"EventDate","value_type":"long"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml index 2f5e72fc1a0..6e9ced0f691 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml @@ -5,10 +5,9 @@ calcite: LogicalProject(PageViews=[$2], WindowClientWidth=[$0], WindowClientHeight=[$1]) LogicalAggregate(group=[{0, 1}], PageViews=[COUNT()]) LogicalProject(WindowClientWidth=[$104], WindowClientHeight=[$57]) - LogicalFilter(condition=[AND(IS NOT NULL($104), IS NOT NULL($57))]) - LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0), =($42, 0), =($25, 2868770270353813622))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0), =($42, 0), =($25, 2868770270353813622), IS NOT NULL($104), IS NOT NULL($57))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[10000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URLHash, DontCountHits, WindowClientHeight, IsRefresh, CounterID, WindowClientWidth], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URLHash","DontCountHits","WindowClientHeight","IsRefresh","CounterID","WindowClientWidth"],"excludes":[]},"aggregations":{"WindowClientWidth|WindowClientHeight":{"multi_terms":{"terms":[{"field":"WindowClientWidth"},{"field":"WindowClientHeight"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URLHash, DontCountHits, WindowClientHeight, IsRefresh, CounterID, WindowClientWidth], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622), IS NOT NULL($6), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}},{"exists":{"field":"WindowClientWidth","boost":1.0}},{"exists":{"field":"WindowClientHeight","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URLHash","DontCountHits","WindowClientHeight","IsRefresh","CounterID","WindowClientWidth"],"excludes":[]},"aggregations":{"WindowClientWidth|WindowClientHeight":{"multi_terms":{"terms":[{"field":"WindowClientWidth"},{"field":"WindowClientHeight"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml index 3bba5e1ed82..820898e401d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml @@ -5,11 +5,10 @@ calcite: LogicalProject(PageViews=[$1], M=[$0]) LogicalAggregate(group=[{0}], PageViews=[COUNT()]) LogicalProject(M=[SPAN($17, 1, 'm')]) - LogicalFilter(condition=[IS NOT NULL($17)]) - LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-15 00:00:00':VARCHAR)), =($72, 0), =($42, 0))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-15 00:00:00':VARCHAR)), =($72, 0), =($42, 0), IS NOT NULL($17))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], PageViews=[$t1], M=[$t0]) EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"date_histogram":{"field":"EventTime","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"exists":{"field":"EventTime","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"date_histogram":{"field":"EventTime","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml index 8ebc6067a0a..343aade48c4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(count()=[$1], AdvEngineID=[$0]) LogicalAggregate(group=[{0}], count()=[COUNT()]) LogicalProject(AdvEngineID=[$19]) - LogicalFilter(condition=[IS NOT NULL($19)]) - LogicalFilter(condition=[<>($19, 0)]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[<>($19, 0)]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($19, 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[count(), AdvEngineID], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"AdvEngineID","boost":1.0}}],"must_not":[{"term":{"AdvEngineID":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"AdvEngineID":{"terms":{"field":"AdvEngineID","size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml index 3b779911368..dd1f6444d09 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml @@ -4,7 +4,7 @@ calcite: LogicalProject(count()=[$1], span(birthdate,1d)=[$0]) LogicalAggregate(group=[{0}], count()=[COUNT()]) LogicalProject(span(birthdate,1d)=[SPAN($3, 1, 'd')]) - LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($3, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)), IS NOT NULL($3))]) + LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($3, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file From 8568c2f95ebc892b718e33243b95374872210c9a Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 1 Dec 2025 17:26:16 -0800 Subject: [PATCH 11/12] address comments from peng Signed-off-by: Jialiang Liang --- .../org/opensearch/sql/calcite/CalciteRelNodeVisitor.java | 4 +--- .../java/org/opensearch/sql/executor/QueryService.java | 7 ++++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 0316f2b37b4..8049afce825 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -858,7 +858,6 @@ public RelNode visitPatterns(Patterns node, CalcitePlanContext context) { @Override public RelNode visitEval(Eval node, CalcitePlanContext context) { visitChildren(node, context); - node.getExpressionList() .forEach( expr -> { @@ -2246,10 +2245,9 @@ private RelNode mergeTableAndResolveColumnConflict( @Override public RelNode visitMultisearch(Multisearch node, CalcitePlanContext context) { List subsearchNodes = new ArrayList<>(); - for (UnresolvedPlan subsearch : node.getSubsearches()) { UnresolvedPlan prunedSubSearch = subsearch.accept(new EmptySourcePropagateVisitor(), null); - analyze(prunedSubSearch, context); + prunedSubSearch.accept(this, context); subsearchNodes.add(context.relBuilder.build()); } diff --git a/core/src/main/java/org/opensearch/sql/executor/QueryService.java b/core/src/main/java/org/opensearch/sql/executor/QueryService.java index 37ffe739af9..d23430e7c4a 100644 --- a/core/src/main/java/org/opensearch/sql/executor/QueryService.java +++ b/core/src/main/java/org/opensearch/sql/executor/QueryService.java @@ -56,6 +56,9 @@ @AllArgsConstructor @Log4j2 public class QueryService { + private static final HepProgram FILTER_MERGE_PROGRAM = + new HepProgramBuilder().addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()).build(); + private final Analyzer analyzer; private final ExecutionEngine executionEngine; private final Planner planner; @@ -270,9 +273,7 @@ public RelNode analyze(UnresolvedPlan plan, CalcitePlanContext context) { * the rest of optimization. */ private RelNode mergeAdjacentFilters(RelNode relNode) { - HepProgram program = - new HepProgramBuilder().addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()).build(); - HepPlanner planner = new HepPlanner(program); + HepPlanner planner = new HepPlanner(FILTER_MERGE_PROGRAM); planner.setRoot(relNode); return planner.findBestExp(); } From 4804ada95f2c19f404704127de47cf2d14054dd7 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Tue, 2 Dec 2025 12:16:16 -0800 Subject: [PATCH 12/12] add yaml test Signed-off-by: Jialiang Liang --- .../rest-api-spec/test/issues/4842.yml | 171 ++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4842.yml diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4842.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4842.yml new file mode 100644 index 00000000000..3a0425b4a3f --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4842.yml @@ -0,0 +1,171 @@ +setup: + - do: + indices.create: + index: test_filter_merge + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + name: + type: keyword + age: + type: integer + email: + type: keyword + status: + type: keyword + score: + type: double + city: + type: keyword + department: + type: keyword + active: + type: boolean + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"Filter merge with multiple consecutive where clauses": + - skip: + features: + - headers + - allowed_warnings + - do: + bulk: + index: test_filter_merge + refresh: true + body: + - '{"index": {}}' + - '{"name": "Alice", "age": 30, "email": "alice@example.com", "status": "active", "score": 95.5, "city": "Seattle", "department": "Engineering", "active": true}' + - '{"index": {}}' + - '{"name": "Bob", "age": 25, "email": "bob@example.com", "status": "active", "score": 88.0, "city": "Portland", "department": "Sales", "active": true}' + - '{"index": {}}' + - '{"name": "Charlie", "age": 35, "email": "charlie@example.com", "status": "inactive", "score": 72.5, "city": "Seattle", "department": "Engineering", "active": false}' + - '{"index": {}}' + - '{"name": "Diana", "age": 28, "email": "diana@example.com", "status": "active", "score": 91.0, "city": "Seattle", "department": "Marketing", "active": true}' + - '{"index": {}}' + - '{"name": "Eve", "age": 32, "email": "eve@example.com", "status": "active", "score": 85.5, "city": "Portland", "department": "Engineering", "active": true}' + + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: | + source=test_filter_merge + | where name != "" + | where email != "" + | where status = "active" + | where age > 25 + | where age < 40 + | where score > 80 + | where score < 100 + | where city = "Seattle" + | where department = "Engineering" + | where active = true + | fields name, age, email, score + + - match: {"total": 1} + - match: {"datarows": [["Alice", 30, "alice@example.com", 95.5]]} + +--- +"Filter merge with IS NOT NULL checks": + - skip: + features: + - headers + - allowed_warnings + - do: + bulk: + index: test_filter_merge + refresh: true + body: + - '{"index": {}}' + - '{"name": "Frank", "age": 40, "email": "frank@example.com", "status": "active", "score": 78.0, "city": "Boston", "department": "Sales", "active": true}' + - '{"index": {}}' + - '{"name": "Grace", "age": 35, "email": "", "status": "active", "score": 92.0, "city": "Boston", "department": "Sales", "active": true}' + - '{"index": {}}' + - '{"name": "", "age": 29, "email": "helen@example.com", "status": "active", "score": 80.0, "city": "Boston", "department": "Sales", "active": true}' + + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: | + source=test_filter_merge + | where name != "" + | where email != "" + | where status = "active" + | where isnotnull(score) + | where age > 30 + | where age < 50 + | where score > 70 + | where city = "Boston" + | where department = "Sales" + | where active = true + | fields name, email, status, score + + - match: {"total": 1} + - match: {"datarows": [["Frank", "frank@example.com", "active", 78.0]]} + +--- +"Filter merge with range and equality checks": + - skip: + features: + - headers + - allowed_warnings + - do: + bulk: + index: test_filter_merge + refresh: true + body: + - '{"index": {}}' + - '{"name": "Ivan", "age": 27, "email": "ivan@example.com", "status": "active", "score": 85.0, "city": "Seattle", "department": "HR", "active": true}' + - '{"index": {}}' + - '{"name": "Julia", "age": 33, "email": "julia@example.com", "status": "active", "score": 90.0, "city": "Portland", "department": "HR", "active": true}' + - '{"index": {}}' + - '{"name": "Kevin", "age": 45, "email": "kevin@example.com", "status": "active", "score": 88.0, "city": "Seattle", "department": "HR", "active": false}' + - '{"index": {}}' + - '{"name": "Laura", "age": 26, "email": "laura@example.com", "status": "inactive", "score": 75.0, "city": "Seattle", "department": "HR", "active": true}' + + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: | + source=test_filter_merge + | where status = "active" + | where age >= 25 + | where age <= 35 + | where score >= 85 + | where score <= 95 + | where city = "Seattle" + | where name != "" + | where email != "" + | where department = "HR" + | where active = true + | stats count() by city + + - match: {"total": 1} + - match: {"datarows": [[1, "Seattle"]]}