opensearch-project · penghuo · Dec 2, 2025 · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025
@@ -16,11 +16,15 @@
 import lombok.extern.log4j.Log4j2;
 import org.apache.calcite.jdbc.CalciteSchema;
 import org.apache.calcite.plan.RelTraitDef;
+import org.apache.calcite.plan.hep.HepPlanner;
+import org.apache.calcite.plan.hep.HepProgram;
+import org.apache.calcite.plan.hep.HepProgramBuilder;
 import org.apache.calcite.rel.RelCollation;
 import org.apache.calcite.rel.RelCollations;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Sort;
 import org.apache.calcite.rel.logical.LogicalSort;
+import org.apache.calcite.rel.rules.FilterMergeRule;
 import org.apache.calcite.schema.SchemaPlus;
 import org.apache.calcite.sql.parser.SqlParser;
 import org.apache.calcite.tools.FrameworkConfig;
@@ -52,6 +56,9 @@
 @AllArgsConstructor
 @Log4j2
 public class QueryService {
+  private static final HepProgram FILTER_MERGE_PROGRAM =
+      new HepProgramBuilder().addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()).build();
+
   private final Analyzer analyzer;
   private final ExecutionEngine executionEngine;
   private final Planner planner;
@@ -100,6 +107,7 @@ public void executeWithCalcite(
                           CalcitePlanContext.create(
                               buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType);
                       RelNode relNode = analyze(plan, context);
+                      relNode = mergeAdjacentFilters(relNode);
                       RelNode optimized = optimize(relNode, context);
                       RelNode calcitePlan = convertToCalcitePlan(optimized);
                       executionEngine.execute(calcitePlan, context, listener);
@@ -145,6 +153,7 @@ public void explainWithCalcite(
                       context.run(
                           () -> {
                             RelNode relNode = analyze(plan, context);
+                            relNode = mergeAdjacentFilters(relNode);
                             RelNode optimized = optimize(relNode, context);
                             RelNode calcitePlan = convertToCalcitePlan(optimized);
                             executionEngine.explain(calcitePlan, format, context, listener);
@@ -259,6 +268,16 @@ public RelNode analyze(UnresolvedPlan plan, CalcitePlanContext context) {
     return getRelNodeVisitor().analyze(plan, context);
   }
 
+  /**
+   * Run Calcite FILTER_MERGE once so adjacent filters created during analysis can collapse before
+   * the rest of optimization.
+   */
+  private RelNode mergeAdjacentFilters(RelNode relNode) {
+    HepPlanner planner = new HepPlanner(FILTER_MERGE_PROGRAM);
+    planner.setRoot(relNode);
+    return planner.findBestExp();
+  }
+
   /** Analyze {@link UnresolvedPlan}. */
   public LogicalPlan analyze(UnresolvedPlan plan, QueryType queryType) {
     return analyzer.analyze(plan, new AnalysisContext(queryType));

@@ -5,8 +5,7 @@ calcite:
         LogicalProject(count()=[$1], span(`@timestamp`,1d)=[$0])
           LogicalAggregate(group=[{0}], count()=[COUNT()])
             LogicalProject(span(`@timestamp`,1d)=[SPAN($17, 1, 'd')])
-              LogicalFilter(condition=[IS NOT NULL($17)])
-                LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))])
-                  CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
+              LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))])
+                CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
   physical: |
-    CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
+    CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
@@ -5,8 +5,7 @@ calcite:
         LogicalProject(count()=[$2], process.name=[$0], cloud.region=[$1])
           LogicalAggregate(group=[{0, 1}], count()=[COUNT()])
             LogicalProject(process.name=[$7], cloud.region=[$14])
-              LogicalFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($14))])
-                LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)))])
-                  CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
+              LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)), IS NOT NULL($7), IS NOT NULL($14))])
+                CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
   physical: |
-    CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
+    CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
@@ -5,8 +5,7 @@ calcite:
         LogicalProject(count()=[$3], process.name=[$0], cloud.region=[$1], aws.cloudwatch.log_stream=[$2])
           LogicalAggregate(group=[{0, 1, 2}], count()=[COUNT()])
             LogicalProject(process.name=[$7], cloud.region=[$14], aws.cloudwatch.log_stream=[$34])
-              LogicalFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($14), IS NOT NULL($34))])
-                LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)))])
-                  CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
+              LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)), IS NOT NULL($7), IS NOT NULL($14), IS NOT NULL($34))])
+                CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
   physical: |
-    CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp, aws.cloudwatch.log_stream], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
+    CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp, aws.cloudwatch.log_stream], FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}},{"exists":{"field":"aws.cloudwatch.log_stream","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["process.name","cloud.region","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
@@ -4,8 +4,7 @@ calcite:
       LogicalProject(count()=[$1], span(`@timestamp`,1m)=[$0])
         LogicalAggregate(group=[{0}], count()=[COUNT()])
           LogicalProject(span(`@timestamp`,1m)=[SPAN($17, 1, 'm')])
-            LogicalFilter(condition=[IS NOT NULL($17)])
-              LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))])
-                CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
+            LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))])
+              CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
   physical: |
-    CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
+    CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
@@ -3,8 +3,7 @@ calcite:
     LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
       LogicalProject(agent=[$0], process=[$6], log=[$8], message=[$11], tags=[$12], cloud=[$13], input=[$15], @timestamp=[$17], ecs=[$18], data_stream=[$20], meta=[$24], host=[$26], metrics=[$27], aws=[$30], event=[$35])
         LogicalSort(fetch=[10])
-          LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))])
-            LogicalFilter(condition=[query_string(MAP('query', 'process.name:kernel':VARCHAR))])
-              CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
+          LogicalFilter(condition=[AND(query_string(MAP('query', 'process.name:kernel':VARCHAR)), >=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))])
+            CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
   physical: |
     CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->AND(query_string(MAP('query', 'process.name:kernel':VARCHAR)), SEARCH($7, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR)), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)])