diff --git a/common/src/main/java/org/opensearch/sql/common/setting/Settings.java b/common/src/main/java/org/opensearch/sql/common/setting/Settings.java index 8b80dbf8cea..5dcd7702f0d 100644 --- a/common/src/main/java/org/opensearch/sql/common/setting/Settings.java +++ b/common/src/main/java/org/opensearch/sql/common/setting/Settings.java @@ -32,6 +32,7 @@ public enum Key { PATTERN_MAX_SAMPLE_COUNT("plugins.ppl.pattern.max.sample.count"), PATTERN_BUFFER_LIMIT("plugins.ppl.pattern.buffer.limit"), PPL_REX_MAX_MATCH_LIMIT("plugins.ppl.rex.max_match.limit"), + PPL_SYNTAX_LEGACY_PREFERRED("plugins.ppl.syntax.legacy.preferred"), /** Enable Calcite as execution engine */ CALCITE_ENGINE_ENABLED("plugins.calcite.enabled"), diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index da1fa8e68f1..69a54f06c19 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -879,7 +879,11 @@ private LogicalAggregation analyzeAggregation( groupBys.forEach( group -> newEnv.define(new Symbol(Namespace.FIELD_NAME, group.getNameOrAlias()), group.type())); - return new LogicalAggregation(child, aggregators, groupBys); + + Argument.ArgumentMap statsArgs = Argument.ArgumentMap.of(node.getArgExprList()); + boolean bucketNullable = + (Boolean) statsArgs.getOrDefault(Argument.BUCKET_NULLABLE, Literal.TRUE).getValue(); + return new LogicalAggregation(child, aggregators, groupBys, bucketNullable); } private Aggregation analyzePatternsAgg(Patterns node) { diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 1bf890a55c6..fb1c64674e2 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -451,6 +451,7 @@ public static List defaultStatsArgs() { argument("partitions", intLiteral(1)), argument("allnum", booleanLiteral(false)), argument("delim", stringLiteral(" ")), + argument(Argument.BUCKET_NULLABLE, booleanLiteral(true)), argument("dedupsplit", booleanLiteral(false))); } diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/Argument.java b/core/src/main/java/org/opensearch/sql/ast/expression/Argument.java index 08bb3a4a418..0e0e032e22b 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/Argument.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/Argument.java @@ -20,6 +20,8 @@ @RequiredArgsConstructor @EqualsAndHashCode(callSuper = false) public class Argument extends UnresolvedExpression { + public static final String BUCKET_NULLABLE = "bucket_nullable"; + private final String argName; private final Literal value; @@ -66,5 +68,9 @@ public static ArgumentMap empty() { public Literal get(String name) { return map.get(name); } + + public Literal getOrDefault(String name, Literal literal) { + return map.getOrDefault(name, literal); + } } } diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/Literal.java b/core/src/main/java/org/opensearch/sql/ast/expression/Literal.java index 3d61d5dc5a3..6f3aa517dda 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/Literal.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/Literal.java @@ -49,5 +49,6 @@ public String toString() { public static Literal TRUE = new Literal(true, DataType.BOOLEAN); public static Literal FALSE = new Literal(false, DataType.BOOLEAN); - public static Literal ZERO = new Literal(Integer.valueOf("0"), DataType.INTEGER); + public static Literal ZERO = new Literal(0, DataType.INTEGER); + public static Literal ONE = new Literal(1, DataType.INTEGER); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 0a20100c624..5a6d1a08663 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -41,6 +41,9 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.hint.HintStrategyTable; +import org.apache.calcite.rel.hint.RelHint; +import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.logical.LogicalValues; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexCall; @@ -847,6 +850,41 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { groupExprList.addAll(node.getGroupExprList()); Pair, List> aggregationAttributes = aggregateWithTrimming(groupExprList, aggExprList, context); + // Add group by columns + List aliasedGroupByList = + aggregationAttributes.getLeft().stream() + .map(this::extractAliasLiteral) + .flatMap(Optional::stream) + .map(ref -> ((RexLiteral) ref).getValueAs(String.class)) + .map(context.relBuilder::field) + .map(f -> (RexNode) f) + .collect(Collectors.toList()); + + // add stats hint to LogicalAggregation + Argument.ArgumentMap statsArgs = Argument.ArgumentMap.of(node.getArgExprList()); + Boolean bucketNullable = + (Boolean) statsArgs.getOrDefault(Argument.BUCKET_NULLABLE, Literal.TRUE).getValue(); + if (!bucketNullable && !aliasedGroupByList.isEmpty()) { + final RelHint statHits = + RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build(); + assert context.relBuilder.peek() instanceof LogicalAggregate + : "Stats hits should be added to LogicalAggregate"; + context.relBuilder.hints(statHits); + context + .relBuilder + .getCluster() + .setHintStrategies( + HintStrategyTable.builder() + .hintStrategy( + "stats_args", + (hint, rel) -> { + return rel instanceof LogicalAggregate; + }) + .build()); + context.relBuilder.filter( + aliasedGroupByList.stream().map(context.relBuilder::isNotNull) + .collect(Collectors.toList())); + } // schema reordering // As an example, in command `stats count() by colA, colB`, @@ -859,15 +897,6 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { List aggRexList = outputFields.subList(numOfOutputFields - numOfAggList, numOfOutputFields); reordered.addAll(aggRexList); - // Add group by columns - List aliasedGroupByList = - aggregationAttributes.getLeft().stream() - .map(this::extractAliasLiteral) - .flatMap(Optional::stream) - .map(ref -> ((RexLiteral) ref).getValueAs(String.class)) - .map(context.relBuilder::field) - .map(f -> (RexNode) f) - .collect(Collectors.toList()); reordered.addAll(aliasedGroupByList); context.relBuilder.project(reordered); diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalAggregation.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalAggregation.java index ecbcece6236..8a8e5dbdb26 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalAggregation.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalAggregation.java @@ -14,20 +14,27 @@ import org.opensearch.sql.expression.aggregation.NamedAggregator; /** Logical Aggregation. */ +@Getter @ToString @EqualsAndHashCode(callSuper = true) public class LogicalAggregation extends LogicalPlan { - @Getter private final List aggregatorList; + private final List aggregatorList; - @Getter private final List groupByList; + private final List groupByList; + + private final boolean bucketNullable; /** Constructor of LogicalAggregation. */ public LogicalAggregation( - LogicalPlan child, List aggregatorList, List groupByList) { + LogicalPlan child, + List aggregatorList, + List groupByList, + boolean bucketNullable) { super(Collections.singletonList(child)); this.aggregatorList = aggregatorList; this.groupByList = groupByList; + this.bucketNullable = bucketNullable; } @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java index 7dbc6009bb8..6d17ccd85a8 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java @@ -38,9 +38,19 @@ public static LogicalPlan write(LogicalPlan input, Table table, List col return new LogicalWrite(input, table, columns); } + /** Build a logical aggregation with nullable bucket always true. */ public static LogicalPlan aggregation( LogicalPlan input, List aggregatorList, List groupByList) { - return new LogicalAggregation(input, aggregatorList, groupByList); + return new LogicalAggregation(input, aggregatorList, groupByList, true); + } + + /** Build a logical aggregation with nullable bucket parameter */ + public static LogicalPlan aggregation( + LogicalPlan input, + List aggregatorList, + List groupByList, + boolean bucketNullable) { + return new LogicalAggregation(input, aggregatorList, groupByList, bucketNullable); } public static LogicalPlan filter(LogicalPlan input, Expression expression) { diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index db97379d565..f60bbde6f2f 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -19,6 +19,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.booleanLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.compare; import static org.opensearch.sql.ast.dsl.AstDSL.computation; +import static org.opensearch.sql.ast.dsl.AstDSL.exprList; import static org.opensearch.sql.ast.dsl.AstDSL.field; import static org.opensearch.sql.ast.dsl.AstDSL.filter; import static org.opensearch.sql.ast.dsl.AstDSL.filteredAggregate; @@ -434,7 +435,7 @@ public void stats_source() { ImmutableList.of(DSL.named("string_value", DSL.ref("string_value", STRING)))), AstDSL.agg( AstDSL.relation("schema"), - AstDSL.exprList( + exprList( AstDSL.alias( "avg(integer_value)", AstDSL.aggregate("avg", field("integer_value")))), null, @@ -486,7 +487,7 @@ public void rename_to_invalid_expression() { AstDSL.rename( AstDSL.agg( AstDSL.relation("schema"), - AstDSL.exprList( + exprList( AstDSL.alias( "avg(integer_value)", AstDSL.aggregate("avg", field("integer_value")))), @@ -1956,4 +1957,28 @@ public void rex_command_throws_unsupported_operation_exception_in_legacy_engine( .attach(relation("schema")))); assertEquals("Rex is supported only when plugins.calcite.enabled=true", exception.getMessage()); } + + @Test + public void stats_non_bucket_nullable_test() { + assertAnalyzeEqual( + LogicalPlanDSL.aggregation( + LogicalPlanDSL.relation("schema", table), + ImmutableList.of( + DSL.named("avg(integer_value)", DSL.avg(DSL.ref("integer_value", INTEGER)))), + ImmutableList.of(DSL.named("string_value", DSL.ref("string_value", STRING))), + false), + AstDSL.agg( + AstDSL.relation("schema"), + exprList( + AstDSL.alias( + "avg(integer_value)", AstDSL.aggregate("avg", field("integer_value")))), + null, + ImmutableList.of(AstDSL.alias("string_value", field("string_value"))), + exprList( + argument("partitions", intLiteral(1)), + argument("allnum", booleanLiteral(false)), + argument("delim", stringLiteral(" ")), + argument(Argument.BUCKET_NULLABLE, booleanLiteral(false)), + argument("dedupsplit", booleanLiteral(false))))); + } } diff --git a/docs/user/ppl/admin/settings.rst b/docs/user/ppl/admin/settings.rst index b15fc8159a7..61345f0fe45 100644 --- a/docs/user/ppl/admin/settings.rst +++ b/docs/user/ppl/admin/settings.rst @@ -189,3 +189,40 @@ PPL query:: } } } + +plugins.ppl.syntax.legacy.preferred +=================================== + +Description +----------- + +This configuration is introduced since 3.3.0 which is used to switch some behaviours in PPL syntax. The current default value is ``true``. +The behaviours it controlled includes: + +- The default value of argument ``bucket_nullable`` in ``stats`` command. Check `stats command <../cmd/stats.rst>`_ for details. + +Example +------- + +You can update the setting with a new value like this. + +PPL query:: + + sh$ curl -sS -H 'Content-Type: application/json' \ + ... -X PUT localhost:9200/_plugins/_query/settings \ + ... -d '{"transient" : {"plugins.ppl.syntax.legacy.preferred" : "false"}}' + { + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "ppl": { + "syntax": { + "legacy": { + "preferred": "false" + } + } + } + } + } + } diff --git a/docs/user/ppl/cmd/stats.rst b/docs/user/ppl/cmd/stats.rst index 369e7a83e68..f53d3ceb645 100644 --- a/docs/user/ppl/cmd/stats.rst +++ b/docs/user/ppl/cmd/stats.rst @@ -38,11 +38,16 @@ The following table dataSources the aggregation functions and also indicates how Syntax ============ -stats ... [by-clause] +stats [bucket_nullable=bool] ... [by-clause] * aggregation: mandatory. A aggregation function. The argument of aggregation must be field. +* bucket_nullable: optional (since 3.3.0). Controls whether the stats command includes null buckets in group-by aggregations. When set to ``false``, the aggregation ignores records where the group-by field is null, resulting in faster performance by excluding null bucket. The default value of ``bucket_nullable`` is determined by ``plugins.ppl.syntax.legacy.preferred``: + + * When ``plugins.ppl.syntax.legacy.preferred=true``, ``bucket_nullable`` defaults to ``true`` + * When ``plugins.ppl.syntax.legacy.preferred=false``, ``bucket_nullable`` defaults to ``false`` + * by-clause: optional. * Syntax: by [span-expression,] [field,]... @@ -793,7 +798,7 @@ PPL query:: +-----+----------+--------+ Example 14: Collect all values in a field using LIST -===================================================== +==================================================== The example shows how to collect all firstname values, preserving duplicates and order. @@ -806,3 +811,22 @@ PPL query:: |-------------------------------------| | ["Amber","Hattie","Nanette","Dale"] | +-------------------------------------+ + + +Example 15: Ignore null bucket +============================== + +Note: This argument requires version 3.3.0 or above. + +PPL query:: + + PPL> source=accounts | stats bucket_nullable=false count() as cnt by email; + fetched rows / total rows = 3/3 + +-----+-----------------------+ + | cnt | email | + |-----+-----------------------| + | 1 | amberduke@pyrami.com | + | 1 | daleadams@boink.com | + | 1 | hattiebond@netagy.com | + +-----+-----------------------+ + diff --git a/docs/user/ppl/interfaces/endpoint.rst b/docs/user/ppl/interfaces/endpoint.rst index adcae17c967..f9775919875 100644 --- a/docs/user/ppl/interfaces/endpoint.rst +++ b/docs/user/ppl/interfaces/endpoint.rst @@ -91,7 +91,7 @@ The following PPL query demonstrated that where and stats command were pushed do { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":10,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"avg(age)\":{\"avg\":{\"field\":\"age\"}}}}, searchDone=false)" + "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":10,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"sort\":[],\"aggregations\":{\"avg(age)\":{\"avg\":{\"field\":\"age\"}}}}, searchDone=false)" }, "children": [] } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index ac6e1d9bd6c..d94bea7be77 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -20,6 +20,7 @@ */ @RunWith(Suite.class) @Suite.SuiteClasses({ + CalciteExplainIT.class, CalciteArrayFunctionIT.class, CalciteBinCommandIT.class, CalciteConvertTZFunctionIT.class, @@ -31,7 +32,6 @@ CalciteDedupCommandIT.class, CalciteDescribeCommandIT.class, CalciteExpandCommandIT.class, - CalciteExplainIT.class, CalciteFieldsCommandIT.class, CalciteFillNullCommandIT.class, CalciteFlattenCommandIT.class, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index e92d871e915..873e796726b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -116,7 +116,7 @@ public void supportPushDownSortMergeJoin() throws IOException { @Ignore("We've supported script push down on text field") @Test public void supportPartialPushDown() throws IOException { - Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); + enabledOnlyWhenPushdownIsEnabled(); // field `address` is text type without keyword subfield, so we cannot push it down. String query = "source=opensearch-sql_test_index_account | where (state = 'Seattle' or age < 10) and (age" @@ -130,7 +130,7 @@ public void supportPartialPushDown() throws IOException { @Ignore("We've supported script push down on text field") @Test public void supportPartialPushDown_NoPushIfAllFailed() throws IOException { - Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); + enabledOnlyWhenPushdownIsEnabled(); // field `address` is text type without keyword subfield, so we cannot push it down. String query = "source=opensearch-sql_test_index_account | where (address = '671 Bristol Street' or age <" @@ -188,7 +188,7 @@ public void testExplainIsNullOrOthers() throws IOException { @Ignore("We've supported script push down on text field") @Test public void supportPartialPushDownScript() throws IOException { - Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); + enabledOnlyWhenPushdownIsEnabled(); // field `address` is text type without keyword subfield, so we cannot push it down. // But the second condition can be translated to script, so the second one is pushed down. String query = @@ -216,7 +216,7 @@ public void testPartialPushdownFilterWithIsNull() throws IOException { @Test public void testSkipScriptEncodingOnExtendedFormat() throws IOException { - Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); + enabledOnlyWhenPushdownIsEnabled(); String query = "source=opensearch-sql_test_index_account | where address = '671 Bristol Street' and age -" + " 2 = 30 | fields firstname, age, address"; @@ -255,7 +255,7 @@ public void testExplainWithReverse() throws IOException { public void testExplainWithTimechartAvg() throws IOException { var result = explainQueryToString("source=events | timechart span=1m avg(cpu_usage) by host"); String expected = - isPushdownEnabled() + !isPushdownDisabled() ? loadFromFile("expectedOutput/calcite/explain_timechart.json") : loadFromFile("expectedOutput/calcite/explain_timechart_no_pushdown.json"); assertJsonEqualsIgnoreId(expected, result); @@ -265,7 +265,7 @@ public void testExplainWithTimechartAvg() throws IOException { public void testExplainWithTimechartCount() throws IOException { var result = explainQueryToString("source=events | timechart span=1m count() by host"); String expected = - isPushdownEnabled() + !isPushdownDisabled() ? loadFromFile("expectedOutput/calcite/explain_timechart_count.json") : loadFromFile("expectedOutput/calcite/explain_timechart_count_no_pushdown.json"); assertJsonEqualsIgnoreId(expected, result); @@ -273,7 +273,7 @@ public void testExplainWithTimechartCount() throws IOException { @Test public void noPushDownForAggOnWindow() throws IOException { - Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); + enabledOnlyWhenPushdownIsEnabled(); String query = "source=opensearch-sql_test_index_account | patterns address method=BRAIN | stats count()" + " by patterns_field"; @@ -285,7 +285,7 @@ public void noPushDownForAggOnWindow() throws IOException { // Only for Calcite @Test public void supportPushDownScriptOnTextField() throws IOException { - Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); + enabledOnlyWhenPushdownIsEnabled(); String result = explainQueryToString( "explain source=opensearch-sql_test_index_account | where length(address) > 0 | eval" @@ -315,7 +315,7 @@ public void testExplainCountEvalComplex() throws IOException { @Test public void testEventstatsDistinctCountExplain() throws IOException { - Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); + enabledOnlyWhenPushdownIsEnabled(); String query = "source=opensearch-sql_test_index_account | eventstats dc(state) as distinct_states"; var result = explainQueryToString(query); @@ -325,7 +325,7 @@ public void testEventstatsDistinctCountExplain() throws IOException { @Test public void testEventstatsDistinctCountFunctionExplain() throws IOException { - Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); + enabledOnlyWhenPushdownIsEnabled(); String query = "source=opensearch-sql_test_index_account | eventstats distinct_count(state) as" + " distinct_states by gender"; @@ -393,7 +393,7 @@ public void testExplainOnAggregationWithSumEnhancement() throws IOException { @Test public void testExplainRegexMatchInWhereWithScriptPushdown() throws IOException { - Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); + enabledOnlyWhenPushdownIsEnabled(); String query = String.format("source=%s | where regex_match(name, 'hello')", TEST_INDEX_STRINGS); var result = explainQueryToString(query); @@ -403,7 +403,7 @@ public void testExplainRegexMatchInWhereWithScriptPushdown() throws IOException @Test public void testExplainRegexMatchInEvalWithOutScriptPushdown() throws IOException { - Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); + enabledOnlyWhenPushdownIsEnabled(); String query = String.format( "source=%s |eval has_hello = regex_match(name, 'hello') | fields has_hello", @@ -522,7 +522,7 @@ public void testSimpleSortExpressionPushDownWithOnlyExprProjected() throws Excep @Test public void testPushdownLimitIntoAggregation() throws IOException { - Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); + enabledOnlyWhenPushdownIsEnabled(); String expected = loadExpectedPlan("explain_limit_agg_pushdown.json"); assertJsonEqualsIgnoreId( expected, @@ -548,6 +548,20 @@ public void testPushdownLimitIntoAggregation() throws IOException { "source=opensearch-sql_test_index_account | stats count() by state | sort state | head" + " 100 | head 10 from 10 ")); + expected = loadExpectedPlan("explain_limit_agg_pushdown_bucket_nullable1.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account | stats bucket_nullable=false count() by" + + " state | head 100 | head 10 from 10 ")); + + expected = loadExpectedPlan("explain_limit_agg_pushdown_bucket_nullable2.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account | stats bucket_nullable=false count() by" + + " state | sort state | head 100 | head 10 from 10 ")); + // Don't pushdown the combination of limit and sort expected = loadExpectedPlan("explain_limit_agg_pushdown5.json"); assertJsonEqualsIgnoreId( @@ -557,6 +571,25 @@ public void testPushdownLimitIntoAggregation() throws IOException { + " head 100 | head 10 from 10 ")); } + @Test + public void testExplainSortOnMetricsNoBucketNullable() throws IOException { + // TODO enhancement later: https://github.com/opensearch-project/sql/issues/4282 + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_agg_sort_on_metrics1.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account | stats bucket_nullable=false count() by" + + " state | sort `count()`")); + + expected = loadExpectedPlan("explain_agg_sort_on_metrics2.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account | stats bucket_nullable=false count() by" + + " gender, state | sort `count()`")); + } + /** * Executes the PPL query and returns the result as a string with windows-style line breaks * replaced with Unix-style ones. diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java index a0bc25b8caa..a207ffd9b18 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java @@ -20,7 +20,7 @@ public void init() throws Exception { @Override @Test public void test_convert_field_text_to_keyword() throws IOException { - Assume.assumeTrue("Pushdown is not enabled, skipping this test.", isPushdownEnabled()); + enabledOnlyWhenPushdownIsEnabled(); super.test_convert_field_text_to_keyword(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java index f5b4eb4f57b..b3bb4bfe1ae 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java @@ -892,12 +892,12 @@ public void testSumGroupByNullValue() throws IOException { verifySchema(response, schema("a", null, "bigint"), schema("age", null, "int")); verifyDataRows( response, - rows(isPushdownEnabled() ? 0 : null, null), + rows(isPushdownDisabled() ? null : 0, null), rows(32838, 28), rows(39225, 32), rows(4180, 33), rows(48086, 34), - rows(isPushdownEnabled() ? 0 : null, 36)); + rows(isPushdownDisabled() ? null : 0, 36)); } @Test @@ -961,7 +961,7 @@ public void testSumNull() throws IOException { + " ],\n" + " \"datarows\": [\n" + " [\n" - + (isPushdownEnabled() ? " 0\n" : " null\n") + + (isPushdownDisabled() ? " null\n" : " 0\n") + " ]\n" + " ],\n" + " \"total\": 1,\n" diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExplainIT.java index 22372f31d3f..cd4e2f5d694 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLExplainIT.java @@ -35,7 +35,7 @@ public void init() throws Exception { public void testExplainCommand() throws IOException { var result = explainQueryToString("source=test | where age = 20 | fields name, age"); String expected = - isPushdownEnabled() + !isPushdownDisabled() ? loadFromFile("expectedOutput/calcite/explain_filter_w_pushdown.json") : loadFromFile("expectedOutput/calcite/explain_filter_wo_pushdown.json"); @@ -58,7 +58,7 @@ public void testExplainCommandExtendedWithCodegen() throws IOException { public void testExplainCommandExtendedWithoutCodegen() throws IOException { var result = executeWithReplace("explain extended source=test | where age = 20 | fields name, age"); - if (isPushdownEnabled()) { + if (!isPushdownDisabled()) { assertFalse( result.contains( "public org.apache.calcite.linq4j.Enumerable bind(final" @@ -75,7 +75,7 @@ public void testExplainCommandExtendedWithoutCodegen() throws IOException { public void testExplainCommandCost() throws IOException { var result = executeWithReplace("explain cost source=test | where age = 20 | fields name, age"); String expected = - isPushdownEnabled() + !isPushdownDisabled() ? loadFromFile("expectedOutput/calcite/explain_filter_cost_w_pushdown.txt") : loadFromFile("expectedOutput/calcite/explain_filter_cost_wo_pushdown.txt"); assertTrue( diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRelevanceFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRelevanceFunctionIT.java index 03fca9bc615..b9bd4537971 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRelevanceFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRelevanceFunctionIT.java @@ -22,7 +22,7 @@ public void init() throws Exception { // optimization rule `FilterProjectTransposeRule` to push down the filter through the project. @Override public void not_pushdown_throws_exception() throws IOException { - Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); + enabledOnlyWhenPushdownIsEnabled(); String query1 = "SOURCE=" + TEST_INDEX_BEER diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteSortCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteSortCommandIT.java index db08219a738..13245ec8c94 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteSortCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteSortCommandIT.java @@ -40,7 +40,7 @@ public void testPushdownSortPlusExpression() throws IOException { "source=%s | eval age2 = age + 2 | sort age2 | fields age | head 2", TEST_INDEX_BANK); String explained = explainQueryToString(ppl); - if (isPushdownEnabled()) { + if (!isPushdownDisabled()) { assertTrue( explained.contains( "[SORT->[{\\n" @@ -63,7 +63,7 @@ public void testPushdownSortMinusExpression() throws IOException { "source=%s | eval age2 = 1 - age | sort age2 | fields age | head 2", TEST_INDEX_BANK); String explained = explainQueryToString(ppl); - if (isPushdownEnabled()) { + if (!isPushdownDisabled()) { assertTrue( explained.contains( "[SORT->[{\\n" @@ -86,7 +86,7 @@ public void testPushdownSortTimesExpression() throws IOException { "source=%s | eval age2 = 5 * age | sort age2 | fields age | head 2", TEST_INDEX_BANK); String explained = explainQueryToString(ppl); - if (isPushdownEnabled()) { + if (!isPushdownDisabled()) { assertTrue( explained.contains( "[SORT->[{\\n" @@ -109,7 +109,7 @@ public void testPushdownSortByMultiExpressions() throws IOException { "source=%s | eval age2 = 5 * age | sort gender, age2 | fields gender, age | head 2", TEST_INDEX_BANK); String explained = explainQueryToString(ppl); - if (isPushdownEnabled()) { + if (!isPushdownDisabled()) { assertTrue( explained.contains( "[SORT->[{\\n" @@ -137,7 +137,7 @@ public void testPushdownSortCastExpression() throws IOException { "source=%s | eval age2 = cast(age * 5 as long) | sort age2 | fields age | head 2", TEST_INDEX_BANK); String explained = explainQueryToString(ppl); - if (isPushdownEnabled()) { + if (!isPushdownDisabled()) { assertTrue( explained.contains( "[SORT->[{\\n" @@ -160,7 +160,7 @@ public void testPushdownSortCastToDoubleExpression() throws IOException { "source=%s | eval age2 = cast(age as double) | sort age2 | fields age, age2 | head 2", TEST_INDEX_BANK); String explained = explainQueryToString(ppl); - if (isPushdownEnabled()) { + if (!isPushdownDisabled()) { assertTrue( explained.contains( "SORT->[{\\n" diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java index d48de12ae30..6bc3c07f69a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java @@ -135,40 +135,6 @@ public List getSettings() { }; } - protected Settings enablePushdown() { - System.out.println(Settings.Key.CALCITE_PUSHDOWN_ENABLED.name() + " enabled"); - return new Settings() { - private final Map defaultSettings = - new ImmutableMap.Builder() - .put(Key.QUERY_SIZE_LIMIT, 200) - .put(Key.SQL_CURSOR_KEEP_ALIVE, TimeValue.timeValueMinutes(1)) - .put(Key.FIELD_TYPE_TOLERANCE, true) - .put(Key.CALCITE_ENGINE_ENABLED, true) - .put(Key.CALCITE_FALLBACK_ALLOWED, false) - .put(Key.CALCITE_PUSHDOWN_ENABLED, true) - .put(Key.CALCITE_PUSHDOWN_ROWCOUNT_ESTIMATION_FACTOR, 0.9) - .put(Key.PATTERN_METHOD, "SIMPLE_PATTERN") - .put(Key.PATTERN_MODE, "LABEL") - .put(Key.PATTERN_MAX_SAMPLE_COUNT, 10) - .put(Key.PATTERN_BUFFER_LIMIT, 100000) - .build(); - - @Override - public T getSettingValue(Key key) { - return (T) defaultSettings.get(key); - } - - @Override - public List getSettings() { - return (List) defaultSettings; - } - }; - } - - public boolean isPushdownEnabled() { - return getSettings().getSettingValue(Settings.Key.CALCITE_PUSHDOWN_ENABLED); - } - protected String execute(String query) { AtomicReference actual = new AtomicReference<>(); pplService.execute( diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchIT.java index e1491194645..97790fa7904 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchIT.java @@ -63,45 +63,45 @@ public void testQ1() throws IOException { "A", "F", 37474, - isPushdownEnabled() ? 37569624.64 : 37569624.63999998, - isPushdownEnabled() ? 35676192.097 : 35676192.096999995, - isPushdownEnabled() ? 37101416.222424 : 37101416.22242404, + isPushdownDisabled() ? 37569624.63999998 : 37569624.64, + isPushdownDisabled() ? 35676192.096999995 : 35676192.097, + isPushdownDisabled() ? 37101416.22242404 : 37101416.222424, 25.354533152909337, - isPushdownEnabled() ? 25419.231826792962 : 25419.231826792948, - isPushdownEnabled() ? 0.0508660351826793 : 0.050866035182679493, + isPushdownDisabled() ? 25419.231826792948 : 25419.231826792962, + isPushdownDisabled() ? 0.050866035182679493 : 0.0508660351826793, 1478), closeTo( "N", "F", 1041, 1041301.07, - isPushdownEnabled() ? 999060.898 : 999060.8979999998, - isPushdownEnabled() ? 1036450.8022800001 : 1036450.80228, + isPushdownDisabled() ? 999060.8979999998 : 999060.898, + isPushdownDisabled() ? 1036450.80228 : 1036450.8022800001, 27.394736842105264, 27402.659736842103, - isPushdownEnabled() ? 0.04289473684210526 : 0.042894736842105284, + isPushdownDisabled() ? 0.042894736842105284 : 0.04289473684210526, 38), closeTo( "N", "O", 75168, - isPushdownEnabled() ? 75384955.37 : 75384955.36999969, - isPushdownEnabled() ? 71653166.3034 : 71653166.30340016, - isPushdownEnabled() ? 74498798.133073 : 74498798.13307281, + isPushdownDisabled() ? 75384955.36999969 : 75384955.37, + isPushdownDisabled() ? 71653166.30340016 : 71653166.3034, + isPushdownDisabled() ? 74498798.13307281 : 74498798.133073, 25.558653519211152, - isPushdownEnabled() ? 25632.42277116627 : 25632.422771166166, - isPushdownEnabled() ? 0.049697381842910573 : 0.04969738184291069, + isPushdownDisabled() ? 25632.422771166166 : 25632.42277116627, + isPushdownDisabled() ? 0.04969738184291069 : 0.049697381842910573, 2941), closeTo( "R", "F", 36511, 36570841.24, - isPushdownEnabled() ? 34738472.8758 : 34738472.87580004, - isPushdownEnabled() ? 36169060.112193 : 36169060.11219294, + isPushdownDisabled() ? 34738472.87580004 : 34738472.8758, + isPushdownDisabled() ? 36169060.11219294 : 36169060.112193, 25.059025394646532, 25100.09693891558, - isPushdownEnabled() ? 0.05002745367192862 : 0.050027453671928686, + isPushdownDisabled() ? 0.050027453671928686 : 0.05002745367192862, 1457)); } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 4c3a3d740cb..b819974d331 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -259,8 +259,6 @@ public void testMultiSortPushDownExplain() throws IOException { @Test public void testSortThenAggregatePushDownExplain() throws IOException { - // TODO: Remove pushed-down sort in DSL in expectedOutput/ppl/explain_sort_then_agg_push.json - // existing collations should be eliminated when pushing down aggregations (v2) String expected = loadExpectedPlan("explain_sort_then_agg_push.json"); assertJsonEqualsIgnoreId( expected, @@ -490,6 +488,18 @@ public void testStatsBySpan() throws IOException { String.format("source=%s | stats count() by span(age,10)", TEST_INDEX_BANK))); } + @Test + public void testStatsBySpanNonBucketNullable() throws IOException { + // TODO isNotNull(Span) pushdown to script, can be optimized to exist() + String expected = loadExpectedPlan("explain_stats_by_span_non_bucket_nullable.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + String.format( + "source=%s | stats bucket_nullable=false count() by span(age,10)", + TEST_INDEX_BANK))); + } + @Test public void testStatsByTimeSpan() throws IOException { String expected = loadExpectedPlan("explain_stats_by_timespan.json"); @@ -537,11 +547,7 @@ public void testDedupKeepEmptyFalsePushdown() throws IOException { @Test public void testSingleFieldRelevanceQueryFunctionExplain() throws IOException { - // This test is only applicable if pushdown is enabled - if (!isPushdownEnabled()) { - return; - } - + enabledOnlyWhenPushdownIsEnabled(); String expected = isCalciteEnabled() ? loadFromFile("expectedOutput/calcite/explain_single_field_relevance_push.json") @@ -556,11 +562,7 @@ public void testSingleFieldRelevanceQueryFunctionExplain() throws IOException { @Test public void testMultiFieldsRelevanceQueryFunctionExplain() throws IOException { - // This test is only applicable if pushdown is enabled - if (!isPushdownEnabled()) { - return; - } - + enabledOnlyWhenPushdownIsEnabled(); String expected = isCalciteEnabled() ? loadFromFile("expectedOutput/calcite/explain_multi_fields_relevance_push.json") @@ -663,10 +665,10 @@ public void testExplainOnAggregationWithFunction() throws IOException { protected String loadExpectedPlan(String fileName) throws IOException { String prefix; if (isCalciteEnabled()) { - if (isPushdownEnabled()) { - prefix = "expectedOutput/calcite/"; - } else { + if (isPushdownDisabled()) { prefix = "expectedOutput/calcite_no_pushdown/"; + } else { + prefix = "expectedOutput/calcite/"; } } else { prefix = "expectedOutput/ppl/"; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java index 32fbf7ca63d..07b2bb0619c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java @@ -20,6 +20,7 @@ import org.json.JSONException; import org.json.JSONObject; import org.junit.Assert; +import org.junit.Assume; import org.junit.Rule; import org.opensearch.client.Request; import org.opensearch.client.RequestOptions; @@ -289,15 +290,24 @@ public static class GlobalPushdownConfig { public static boolean enabled = true; } - public boolean isPushdownEnabled() throws IOException { - return Boolean.parseBoolean( - getClusterSetting(Settings.Key.CALCITE_PUSHDOWN_ENABLED.getKeyValue(), "transient")); + /** + * We check pushdown disabled instead enabled because enabled is the default value of pushdown + * config whatever calcite is enabled or not. + */ + public boolean isPushdownDisabled() throws IOException { + return isCalciteEnabled() + && !Boolean.parseBoolean( + getClusterSetting(Settings.Key.CALCITE_PUSHDOWN_ENABLED.getKeyValue(), "transient")); + } + + protected void enabledOnlyWhenPushdownIsEnabled() throws IOException { + Assume.assumeTrue("This test is only for when push down is enabled", !isPushdownDisabled()); } public void updatePushdownSettings() throws IOException { String pushdownEnabled = String.valueOf(GlobalPushdownConfig.enabled); assert !pushdownEnabled.isBlank() : "Pushdown enabled setting cannot be empty"; - if (isPushdownEnabled() != GlobalPushdownConfig.enabled) { + if (isPushdownDisabled() == GlobalPushdownConfig.enabled) { LOG.info( "Updating {} to {}", Settings.Key.CALCITE_PUSHDOWN_ENABLED.getKeyValue(), diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/RareCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/RareCommandIT.java index 8b7355993ca..f8c5093a8d6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/RareCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/RareCommandIT.java @@ -59,7 +59,7 @@ public void testRareWithGroup() throws IOException { rows("F", "OK", 7), rows("F", "KS", 7), rows("F", "CO", 7), - isPushdownEnabled() ? rows("F", "AR", 8) : rows("F", "NV", 8), + isPushdownDisabled() ? rows("F", "NV", 8) : rows("F", "AR", 8), rows("M", "NE", 5), rows("M", "RI", 5), rows("M", "NV", 5), diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java index 02ce879f894..11079b215ba 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java @@ -17,6 +17,7 @@ import java.io.IOException; import org.json.JSONObject; import org.junit.jupiter.api.Test; +import org.opensearch.sql.common.setting.Settings; public class StatsCommandIT extends PPLIntegTestCase { @@ -207,6 +208,23 @@ public void testAvgGroupByNullValue() throws IOException { rows(null, 36)); } + @Test + public void testAvgGroupByNullValueNonNullBucket() throws IOException { + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false avg(balance) as a by age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema(response, schema("a", null, "double"), schema("age", null, "int")); + verifyDataRows( + response, + rows(32838D, 28), + rows(39225D, 32), + rows(4180D, 33), + rows(48086D, 34), + rows(null, 36)); + } + @Test public void testMinGroupByNullValue() throws IOException { JSONObject response = @@ -223,6 +241,22 @@ public void testMinGroupByNullValue() throws IOException { rows(null, 36)); } + @Test + public void testMinGroupByNullValueNonNullBucket() throws IOException { + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false min(balance) as a by age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRows( + response, + rows(32838D, 28), + rows(39225D, 32), + rows(4180D, 33), + rows(48086D, 34), + rows(null, 36)); + } + @Test public void testMaxGroupByNullValue() throws IOException { JSONObject response = @@ -239,6 +273,22 @@ public void testMaxGroupByNullValue() throws IOException { rows(null, 36)); } + @Test + public void testMaxGroupByNullValueNonNullBucket() throws IOException { + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false max(balance) as a by age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRows( + response, + rows(32838D, 28), + rows(39225D, 32), + rows(4180D, 33), + rows(48086D, 34), + rows(null, 36)); + } + @Test public void testSumGroupByNullValue() throws IOException { JSONObject response = @@ -256,6 +306,23 @@ public void testSumGroupByNullValue() throws IOException { rows(null, 36)); } + @Test + public void testSumGroupByNullValueNonNullBucket() throws IOException { + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false avg(balance) as a by age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema(response, schema("a", null, "double"), schema("age", null, "int")); + verifyDataRows( + response, + rows(32838D, 28), + rows(39225D, 32), + rows(4180D, 33), + rows(48086D, 34), + rows(null, 36)); + } + @Test public void testStatsWithLimit() throws IOException { // The original rows count is 6 if no head 5. See the test `testSumGroupByNullValue`. @@ -267,7 +334,7 @@ public void testStatsWithLimit() throws IOException { verifySchema(response, schema("a", null, "double"), schema("age", null, "int")); // If push down disabled, the final results will no longer be stable. In DSL, the order is // guaranteed because we always sort by bucket field, while we don't add sort in the plan. - if (isPushdownEnabled()) { + if (!isPushdownDisabled()) { verifyDataRows( response, rows(null, null), @@ -285,7 +352,7 @@ public void testStatsWithLimit() throws IOException { "source=%s | stats avg(balance) as a by age | head 5 | head 2 from 1", TEST_INDEX_BANK_WITH_NULL_VALUES)); verifySchema(response, schema("a", null, "double"), schema("age", null, "int")); - if (isPushdownEnabled()) { + if (!isPushdownDisabled()) { verifyDataRows(response, rows(32838D, 28), rows(39225D, 32)); } else { assert ((Integer) response.get("size") == 2); @@ -297,7 +364,7 @@ public void testStatsWithLimit() throws IOException { "source=%s | stats avg(balance) as a by age | sort - age | head 5 | head 2 from 1", TEST_INDEX_BANK_WITH_NULL_VALUES)); verifySchema(response, schema("a", null, "double"), schema("age", null, "int")); - if (isPushdownEnabled()) { + if (!isPushdownDisabled()) { verifyDataRows(response, rows(48086D, 34), rows(4180D, 33)); } else { assert ((Integer) response.get("size") == 2); @@ -309,7 +376,7 @@ public void testStatsWithLimit() throws IOException { "source=%s | stats avg(balance) as a by age | sort - a | head 5 | head 2 from 1", TEST_INDEX_BANK_WITH_NULL_VALUES)); verifySchema(response, schema("a", null, "double"), schema("age", null, "int")); - if (isPushdownEnabled()) { + if (!isPushdownDisabled()) { verifyDataRows(response, rows(39225D, 32), rows(32838D, 28)); } else { assert ((Integer) response.get("size") == 2); @@ -333,6 +400,17 @@ public void testStddevSampGroupByNullValue() throws IOException { rows(null, 36)); } + @Test + public void testStddevSampGroupByNullValueNonNullBucket() throws IOException { + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false STDDEV_SAMP(balance) as a by age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRows( + response, rows(null, 28), rows(null, 32), rows(null, 33), rows(null, 34), rows(null, 36)); + } + @Test public void testStddevPopGroupByNullValue() throws IOException { JSONObject response = @@ -350,6 +428,16 @@ public void testStddevPopGroupByNullValue() throws IOException { rows(null, 36)); } + @Test + public void testStddevPopGroupByNullValueNonNullBucket() throws IOException { + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false STDDEV_POP(balance) as a by age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRows(response, rows(0, 28), rows(0, 32), rows(0, 33), rows(0, 34), rows(null, 36)); + } + @Test public void testVarSampGroupByNullValue() throws IOException { JSONObject response = @@ -367,6 +455,17 @@ public void testVarSampGroupByNullValue() throws IOException { rows(null, 36)); } + @Test + public void testVarSampGroupByNullValueNonNullBucket() throws IOException { + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false VAR_SAMP(balance) as a by age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRows( + response, rows(null, 28), rows(null, 32), rows(null, 33), rows(null, 34), rows(null, 36)); + } + @Test public void testVarPopGroupByNullValue() throws IOException { JSONObject response = @@ -384,6 +483,16 @@ public void testVarPopGroupByNullValue() throws IOException { rows(null, 36)); } + @Test + public void testVarPopGroupByNullValueNonNullBucket() throws IOException { + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false VAR_POP(balance) as a by age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRows(response, rows(0, 28), rows(0, 32), rows(0, 33), rows(0, 34), rows(null, 36)); + } + // Todo. The column of agg function is in random order. This is because we create the project // all operator from the symbol table which can't maintain the original column order. @Test @@ -402,6 +511,17 @@ public void testStatsWithNull() throws IOException { verifyDataRows(response, rows(33.166666666666664)); } + @Test + public void testStatsWithNullNonNullBucket() throws IOException { + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false avg(age)", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema(response, schema("avg(age)", null, "double")); + verifyDataRows(response, rows(33.166666666666664)); + } + @Test public void testSumWithNull() throws IOException { JSONObject response = @@ -413,7 +533,7 @@ public void testSumWithNull() throws IOException { // TODO: Fix -- temporary workaround for the pushdown issue: // The current pushdown implementation will return 0 for sum when getting null values as input. // Returning null should be the expected behavior. - Integer expectedValue = isPushdownEnabled() ? 0 : null; + Integer expectedValue = isPushdownDisabled() ? null : 0; verifyDataRows(response, rows(expectedValue)); } @@ -559,12 +679,29 @@ public void testStatsPercentileByNullValue() throws IOException { verifySchema(response, schema("p50", null, "bigint"), schema("age", null, "int")); verifyDataRows( response, - rows(isCalciteEnabled() && !isPushdownEnabled() ? null : 0, null), + rows(isPushdownDisabled() ? null : 0, null), + rows(32838, 28), + rows(39225, 32), + rows(4180, 33), + rows(48086, 34), + rows(isPushdownDisabled() ? null : 0, 36)); + } + + @Test + public void testStatsPercentileByNullValueNonNullBucket() throws IOException { + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false percentile(balance, 50) as p50 by age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema(response, schema("p50", null, "bigint"), schema("age", null, "int")); + verifyDataRows( + response, rows(32838, 28), rows(39225, 32), rows(4180, 33), rows(48086, 34), - rows(isCalciteEnabled() && !isPushdownEnabled() ? null : 0, 36)); + rows(isPushdownDisabled() ? null : 0, 36)); } @Test @@ -577,4 +714,31 @@ public void testStatsPercentileBySpan() throws IOException { verifySchema(response, schema("p50", null, "bigint"), schema("age_bucket", null, "int")); verifyDataRows(response, rows(32838, 20), rows(27821, 30)); } + + @Test + public void testDisableLegacyPreferred() throws IOException { + withSettings( + Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED, + "false", + () -> { + JSONObject response = null; + try { + response = + executeQuery( + String.format( + "source=%s | stats avg(balance) as a by age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + } catch (IOException e) { + throw new RuntimeException(e); + } + verifySchema(response, schema("a", null, "double"), schema("age", null, "int")); + verifyDataRows( + response, + rows(32838D, 28), + rows(39225D, 32), + rows(4180D, 33), + rows(48086D, 34), + rows(null, 36)); + }); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_metrics1.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_metrics1.json new file mode 100644 index 00000000000..fa5f3c8f879 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_metrics1.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n LogicalProject(count()=[$1], state=[$0])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(state=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first])\n EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], state=[$t0])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->IS NOT NULL($7), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT())], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"state\",\"boost\":1.0}},\"sort\":[],\"aggregations\":{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"size\":1000,\"min_doc_count\":1,\"shard_min_doc_count\":0,\"show_term_doc_count_error\":false,\"order\":{\"_key\":\"asc\"}},\"aggregations\":{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_metrics2.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_metrics2.json new file mode 100644 index 00000000000..e6c9e3069fc --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_metrics2.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n LogicalProject(count()=[$2], gender=[$0], state=[$1])\n LogicalFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])\n LogicalAggregate(group=[{0, 1}], count()=[COUNT()])\n LogicalProject(gender=[$4], state=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first])\n EnumerableCalc(expr#0..2=[{inputs}], count()=[$t2], gender=[$t0], state=[$t1])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->AND(IS NOT NULL($4), IS NOT NULL($7)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT())], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"exists\":{\"field\":\"gender\",\"boost\":1.0}},{\"exists\":{\"field\":\"state\",\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"sort\":[],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"gender\":{\"terms\":{\"field\":\"gender.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_append_command.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_append_command.json index 6ebf3d56641..01d4e557bcb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_append_command.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_append_command.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalUnion(all=[true])\n LogicalProject(cnt=[$1], gender=[$0])\n LogicalAggregate(group=[{0}], cnt=[COUNT($1)])\n LogicalProject(gender=[$4], balance=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(cnt=[$0], gender=[null:VARCHAR])\n LogicalAggregate(group=[{}], cnt=[COUNT()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableUnion(all=[true])\n EnumerableCalc(expr#0..1=[{inputs}], cnt=[$t1], gender=[$t0])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"gender\":{\"terms\":{\"field\":\"gender.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"cnt\":{\"value_count\":{\"field\":\"balance\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0=[{inputs}], expr#1=[null:VARCHAR], proj#0..1=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},cnt=COUNT())], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"cnt\":{\"value_count\":{\"field\":\"_index\"}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableUnion(all=[true])\n EnumerableCalc(expr#0..1=[{inputs}], cnt=[$t1], gender=[$t0])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"gender\":{\"terms\":{\"field\":\"gender.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"cnt\":{\"value_count\":{\"field\":\"balance\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n EnumerableCalc(expr#0=[{inputs}], expr#1=[null:VARCHAR], proj#0..1=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},cnt=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"cnt\":{\"value_count\":{\"field\":\"_index\"}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_count_agg_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_count_agg_push.json index b852426b518..63b4c6d8ab2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_count_agg_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_count_agg_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], cnt=[COUNT()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},cnt=COUNT())], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"cnt\":{\"value_count\":{\"field\":\"_index\"}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},cnt=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"cnt\":{\"value_count\":{\"field\":\"_index\"}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } -} +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.json index dce0adb013f..2bc95c47061 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(avg_age=[$2], state=[$0], city=[$1])\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)])\n LogicalProject(state=[$7], city=[$5], age=[$8])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableCalc(expr#0..2=[{inputs}], avg_age=[$t2], state=[$t0], city=[$t1])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[city, state, age], FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"city\",\"state\",\"age\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableCalc(expr#0..2=[{inputs}], avg_age=[$t2], state=[$t0], city=[$t1])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[city, state, age], FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"city\",\"state\",\"age\"],\"excludes\":[]},\"sort\":[],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } -} +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable1.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable1.json new file mode 100644 index 00000000000..a44c4af7550 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable1.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalSort(offset=[10], fetch=[10])\n LogicalSort(fetch=[100])\n LogicalProject(count()=[$1], state=[$0])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(state=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], state=[$t0])\n EnumerableLimit(offset=[10], fetch=[10])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->IS NOT NULL($7), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), LIMIT->100, LIMIT->[10 from 10]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"state\",\"boost\":1.0}},\"sort\":[],\"aggregations\":{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"size\":20,\"min_doc_count\":1,\"shard_min_doc_count\":0,\"show_term_doc_count_error\":false,\"order\":{\"_key\":\"asc\"}},\"aggregations\":{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable2.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable2.json new file mode 100644 index 00000000000..4898344e498 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable2.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalSort(offset=[10], fetch=[10])\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first], fetch=[100])\n LogicalProject(count()=[$1], state=[$0])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(state=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], state=[$t0])\n EnumerableLimit(offset=[10], fetch=[10])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->IS NOT NULL($7), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT->[0 ASC FIRST], LIMIT->100, LIMIT->[10 from 10]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"state\",\"boost\":1.0}},\"sort\":[],\"aggregations\":{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"size\":20,\"min_doc_count\":1,\"shard_min_doc_count\":0,\"show_term_doc_count_error\":false,\"order\":{\"_key\":\"asc\"}},\"aggregations\":{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_output.json index 0e64b6580d9..668803ec7c1 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age2=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $2)])\n LogicalFilter(condition=[IS NOT NULL($2)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)])\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n LogicalProject(avg_age=[$2], state=[$0], city=[$1])\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)])\n LogicalProject(state=[$7], city=[$5], age=[$8])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], age2=[$t1], $condition=[$t4])\n EnumerableWindow(window#0=[window(partition {1} order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[2], expr#4=[+($t2, $t3)], expr#5=[IS NOT NULL($t2)], state=[$t0], age2=[$t4], $condition=[$t5])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[city, state, age], FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), SORT->[0 ASC FIRST]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"city\",\"state\",\"age\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], age2=[$t1], $condition=[$t4])\n EnumerableWindow(window#0=[window(partition {1} order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[2], expr#4=[+($t2, $t3)], expr#5=[IS NOT NULL($t2)], state=[$t0], age2=[$t4], $condition=[$t5])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[city, state, age], FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), SORT->[0 ASC FIRST]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"city\",\"state\",\"age\"],\"excludes\":[]},\"sort\":[],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } -} +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_percentile.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_percentile.json index 114b5c983bc..ffe6fae5517 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_percentile.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_percentile.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], p50=[percentile_approx($0, $1, $2)], p90=[percentile_approx($0, $3, $2)])\n LogicalProject(balance=[$3], $f2=[50], $f3=[FLAG(BIGINT)], $f4=[90])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},p50=percentile_approx($0, $1, $2),p90=percentile_approx($0, $3, $2))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"p50\":{\"percentiles\":{\"field\":\"balance\",\"percents\":[50.0],\"keyed\":true,\"tdigest\":{\"compression\":100.0}}},\"p90\":{\"percentiles\":{\"field\":\"balance\",\"percents\":[90.0],\"keyed\":true,\"tdigest\":{\"compression\":100.0}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},p50=percentile_approx($0, $1, $2),p90=percentile_approx($0, $3, $2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"p50\":{\"percentiles\":{\"field\":\"balance\",\"percents\":[50.0],\"keyed\":true,\"tdigest\":{\"compression\":100.0}}},\"p90\":{\"percentiles\":{\"field\":\"balance\",\"percents\":[90.0],\"keyed\":true,\"tdigest\":{\"compression\":100.0}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_script_push_on_text.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_script_push_on_text.json index d001b428cae..3c5b3b632aa 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_script_push_on_text.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_script_push_on_text.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], address_length=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(address_length=[CHAR_LENGTH($2)])\n LogicalFilter(condition=[>(CHAR_LENGTH($2), 0)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], address_length=[$t0])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[address], SCRIPT->>(CHAR_LENGTH($0), 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJhZGRyZXNzIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AbN7CiAgIm9wIjogewogICAgIm5hbWUiOiAiPiIsCiAgICAia2luZCI6ICJHUkVBVEVSX1RIQU4iLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgImtpbmQiOiAiQ0hBUl9MRU5HVEgiLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAwLAogICAgICAgICAgIm5hbWUiOiAiJDAiCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AAdhZGRyZXNzc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAAAeHh4\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}},\"_source\":{\"includes\":[\"address\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"address_length\":{\"terms\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJhZGRyZXNzIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AKZ7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0hBUl9MRU5HVEgiLAogICAgImtpbmQiOiAiQ0hBUl9MRU5HVEgiLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABdqYXZhLnV0aWwuTGlua2VkSGFzaE1hcDTATlwQbMD7AgABWgALYWNjZXNzT3JkZXJ4cgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAC3QADmFjY291bnRfbnVtYmVyfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgAReHB+cQB+AAt0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4ADHQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABx4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+AB4AAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+ABJ+cQB+AAt0AAZTVFJJTkd+cQB+ABh0AAdLZXl3b3JkcQB+AB14dAAHYWRkcmVzc3NxAH4AEHEAfgAWcQB+ABlxAH4AHXNxAH4AAAAAAAN3BAAAAAB4dAAHYmFsYW5jZXEAfgANdAAGZ2VuZGVyc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAEY2l0eXNxAH4AEHEAfgAWcQB+ABlxAH4AHXNxAH4AAAAAAAN3BAAAAAJxAH4AInEAfgAjeHQACGVtcGxveWVyc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAFc3RhdGVzcQB+ABBxAH4AFnEAfgAZcQB+AB1zcQB+AAAAAAADdwQAAAACcQB+ACJxAH4AI3h0AANhZ2VxAH4ADXQABWVtYWlsc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAIbGFzdG5hbWVzcQB+ABBxAH4AFnEAfgAZcQB+AB1zcQB+AAAAAAADdwQAAAACcQB+ACJxAH4AI3h4AHg=\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], address_length=[$t0])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[address], SCRIPT->>(CHAR_LENGTH($0), 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJhZGRyZXNzIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AbN7CiAgIm9wIjogewogICAgIm5hbWUiOiAiPiIsCiAgICAia2luZCI6ICJHUkVBVEVSX1RIQU4iLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgImtpbmQiOiAiQ0hBUl9MRU5HVEgiLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAwLAogICAgICAgICAgIm5hbWUiOiAiJDAiCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AAdhZGRyZXNzc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAAAeHh4\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}},\"_source\":{\"includes\":[\"address\"],\"excludes\":[]},\"sort\":[],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"address_length\":{\"terms\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJhZGRyZXNzIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AKZ7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0hBUl9MRU5HVEgiLAogICAgImtpbmQiOiAiQ0hBUl9MRU5HVEgiLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABdqYXZhLnV0aWwuTGlua2VkSGFzaE1hcDTATlwQbMD7AgABWgALYWNjZXNzT3JkZXJ4cgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAC3QADmFjY291bnRfbnVtYmVyfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgAReHB+cQB+AAt0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4ADHQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABx4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+AB4AAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+ABJ+cQB+AAt0AAZTVFJJTkd+cQB+ABh0AAdLZXl3b3JkcQB+AB14dAAHYWRkcmVzc3NxAH4AEHEAfgAWcQB+ABlxAH4AHXNxAH4AAAAAAAN3BAAAAAB4dAAHYmFsYW5jZXEAfgANdAAGZ2VuZGVyc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAEY2l0eXNxAH4AEHEAfgAWcQB+ABlxAH4AHXNxAH4AAAAAAAN3BAAAAAJxAH4AInEAfgAjeHQACGVtcGxveWVyc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAFc3RhdGVzcQB+ABBxAH4AFnEAfgAZcQB+AB1zcQB+AAAAAAADdwQAAAACcQB+ACJxAH4AI3h0AANhZ2VxAH4ADXQABWVtYWlsc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAIbGFzdG5hbWVzcQB+ABBxAH4AFnEAfgAZcQB+AB1zcQB+AAAAAAADdwQAAAACcQB+ACJxAH4AI3h4AHg=\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } -} +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span_non_bucket_nullable.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span_non_bucket_nullable.json new file mode 100644 index 00000000000..498092d47a2 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span_non_bucket_nullable.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(age,10)=[$0])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(age,10)=[SPAN($10, 10, null:NULL)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], span(age,10)=[$t0])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[SCRIPT->IS NOT NULL(SPAN($10, 10, null:NULL)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT())], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHjnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJiaXJ0aGRhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJnZW5kZXIiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJjaXR5IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAibGFzdG5hbWUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1wbG95ZXIiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJzdGF0ZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJlbWFpbCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIkJPT0xFQU4iLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJtYWxlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2lkIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2luZGV4IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiUkVBTCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogIl9zY29yZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfbWF4c2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc29ydCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9yb3V0aW5nIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogdHJ1ZQp9dAAEZXhwcnQDOXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJJUyBOT1QgTlVMTCIsCiAgICAia2luZCI6ICJJU19OT1RfTlVMTCIsCiAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJTUEFOIiwKICAgICAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDEwLAogICAgICAgICAgIm5hbWUiOiAiJDEwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogbnVsbCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJOVUxMIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXSwKICAgICAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0sCiAgICAgICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAgICAgImR5bmFtaWMiOiBmYWxzZQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAYdwgAAAAgAAAADXQADmFjY291bnRfbnVtYmVyfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QACWZpcnN0bmFtZX5xAH4ACnQABlNUUklOR3QAB2FkZHJlc3NzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hUZXh0VHlwZa2Do5ME4zFEAgABTAAGZmllbGRzdAAPTGphdmEvdXRpbC9NYXA7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXNxAH4AE3hwfnEAfgAKdAAHVU5LTk9XTn5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+AAt0AARUZXh0c3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAeeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAgAAAAAHNxAH4AAAAAAAN3BAAAAAB4dAAJYmlydGhkYXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0ZVR5cGWeLVKuEH3KrwIAAUwAB2Zvcm1hdHN0ABBMamF2YS91dGlsL0xpc3Q7eHEAfgAUfnEAfgAKdAAJVElNRVNUQU1QfnEAfgAadAAERGF0ZXEAfgAfc3EAfgAAAAAAAXcEAAAAAHh0AAZnZW5kZXJzcQB+ABJxAH4AGHEAfgAbcQB+AB9zcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4AFHEAfgAPfnEAfgAadAAHS2V5d29yZHEAfgAfeHQABGNpdHlxAH4AD3QACGxhc3RuYW1lcQB+AA90AAdiYWxhbmNlcQB+AAx0AAhlbXBsb3llcnNxAH4AEnEAfgAYcQB+ABtxAH4AH3EAfgAjdAAFc3RhdGVzcQB+ABJxAH4AGHEAfgAbcQB+AB9zcQB+AAAAAAADdwQAAAACcQB+ADBxAH4AMXh0AANhZ2V+cQB+AAp0AAdJTlRFR0VSdAAFZW1haWxzcQB+ABJxAH4AGHEAfgAbcQB+AB9xAH4AI3QABG1hbGV+cQB+AAp0AAdCT09MRUFOeHg=\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}},\"sort\":[],\"aggregations\":{\"span(age,10)\":{\"histogram\":{\"field\":\"age\",\"interval\":10.0,\"offset\":0.0,\"order\":{\"_key\":\"asc\"},\"keyed\":false,\"min_doc_count\":0},\"aggregations\":{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_span_non_bucket_nullable.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_span_non_bucket_nullable.json new file mode 100644 index 00000000000..8b383eea6e0 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_span_non_bucket_nullable.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(age,10)=[$0])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(age,10)=[SPAN($10, 10, null:NULL)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t0)], count()=[$t1], span(age,10)=[$t0], $condition=[$t2])\n EnumerableAggregate(group=[{0}], count()=[COUNT()])\n EnumerableCalc(expr#0..18=[{inputs}], expr#19=[10], expr#20=[null:NULL], expr#21=[SPAN($t10, $t19, $t20)], span(age,10)=[$t21])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_agg_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_agg_push.json index 02c44277079..8d20573e03d 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_agg_push.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_agg_push.json @@ -4,14 +4,12 @@ "description": { "fields": "[avg_age, state, city]" }, - "children": [ - { - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, needClean=true, searchDone=false, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - }, - "children": [] - } - ] + "children": [{ + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"sort\":[],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + }, + "children": [] + }] } } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_output.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_output.json index 6e43ec0660e..a3d91813787 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_output.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_output.json @@ -4,43 +4,35 @@ "description": { "fields": "[age2]" }, - "children": [ - { - "name": "DedupeOperator", + "children": [{ + "name": "DedupeOperator", + "description": { + "dedupeList": "[age2]", + "allowedDuplication": 1, + "keepEmpty": false, + "consecutive": false + }, + "children": [{ + "name": "OpenSearchEvalOperator", "description": { - "dedupeList": "[age2]", - "allowedDuplication": 1, - "keepEmpty": false, - "consecutive": false + "expressions": { + "age2": "+(avg_age, 2)" + } }, - "children": [ - { - "name": "OpenSearchEvalOperator", + "children": [{ + "name": "RemoveOperator", + "description": { + "removeList": "[city]" + }, + "children": [{ + "name": "OpenSearchIndexScan", "description": { - "expressions": { - "age2": "+(avg_age, 2)" - } + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"sort\":[],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" }, - "children": [ - { - "name": "RemoveOperator", - "description": { - "removeList": "[city]" - }, - "children": [ - { - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, needClean=true, searchDone=false, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - }, - "children": [] - } - ] - } - ] - } - ] - } - ] + "children": [] + }] + }] + }] + }] } } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_then_agg_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_then_agg_push.json index f23aad17a60..42dbbf3483a 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_then_agg_push.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_then_agg_push.json @@ -4,14 +4,12 @@ "description": { "fields": "[avg(balance), state]" }, - "children": [ - { - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"sort\":[{\"balance\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg(balance)\":{\"avg\":{\"field\":\"balance\"}}}}}}, needClean=true, searchDone=false, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - }, - "children": [] - } - ] + "children": [{ + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"sort\":[],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg(balance)\":{\"avg\":{\"field\":\"balance\"}}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + }, + "children": [] + }] } } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_span_non_bucket_nullable.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_span_non_bucket_nullable.json new file mode 100644 index 00000000000..bdc1b0c7405 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_span_non_bucket_nullable.json @@ -0,0 +1,15 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[count(), span(age,10)]" + }, + "children": [{ + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_bank, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"span(age,10)\":{\"histogram\":{\"field\":\"age\",\"interval\":10.0,\"offset\":0.0,\"order\":{\"_key\":\"asc\"},\"keyed\":false,\"min_doc_count\":0},\"aggregations\":{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + }, + "children": [] + }] + } +} \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 87f807e194d..e8664fb8ff3 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -56,21 +56,25 @@ import org.opensearch.search.aggregations.AggregationBuilders; import org.opensearch.search.aggregations.AggregatorFactories; import org.opensearch.search.aggregations.AggregatorFactories.Builder; +import org.opensearch.search.aggregations.BucketOrder; import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.missing.MissingOrder; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.opensearch.search.aggregations.metrics.ExtendedStats; import org.opensearch.search.aggregations.metrics.PercentilesAggregationBuilder; import org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder; import org.opensearch.search.aggregations.support.ValueType; import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; import org.opensearch.search.sort.SortOrder; +import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression; import org.opensearch.sql.opensearch.response.agg.ArgMaxMinParser; +import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; import org.opensearch.sql.opensearch.response.agg.MetricParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; @@ -80,6 +84,7 @@ import org.opensearch.sql.opensearch.response.agg.StatsParser; import org.opensearch.sql.opensearch.response.agg.TopHitsParser; import org.opensearch.sql.opensearch.storage.script.aggregation.dsl.BucketAggregationBuilder; +import org.opensearch.sql.opensearch.storage.script.aggregation.dsl.CompositeAggregationBuilder; /** * Aggregate analyzer. Convert aggregate to AggregationBuilder {@link AggregationBuilder} and its @@ -177,6 +182,13 @@ public static Pair, OpenSearchAggregationResponseParser throws ExpressionNotAnalyzableException { requireNonNull(aggregate, "aggregate"); try { + boolean bucketNullable = + Boolean.parseBoolean( + aggregate.getHints().stream() + .filter(hits -> hits.hintName.equals("stats_args")) + .map(hint -> hint.kvOptions.getOrDefault(Argument.BUCKET_NULLABLE, "true")) + .findFirst() + .orElseGet(() -> "true")); List groupList = aggregate.getGroupSet().asList(); AggregateBuilderHelper helper = new AggregateBuilderHelper(rowType, fieldTypes, cluster); List aggFieldNames = outputFields.subList(groupList.size(), outputFields.size()); @@ -190,6 +202,14 @@ public static Pair, OpenSearchAggregationResponseParser return Pair.of( ImmutableList.copyOf(metricBuilder.getAggregatorFactories()), new NoBucketAggregationParser(metricParserList)); + } else if (aggregate.getGroupSet().length() == 1 && !bucketNullable) { + // one bucket, use values source bucket builder for getting better performance + // TODO for multiple buckets, use MultiTermsAggregationBuilder + ValuesSourceAggregationBuilder bucketBuilder = + createBucketAggregation(groupList.get(0), project, helper); + return Pair.of( + Collections.singletonList(bucketBuilder.subAggregations(metricBuilder)), + new BucketAggregationParser(metricParserList)); } else { List> buckets = createCompositeBuckets(groupList, project, helper); @@ -378,15 +398,21 @@ private static Pair createRegularAggregation( } } + private static ValuesSourceAggregationBuilder createBucketAggregation( + Integer group, Project project, AggregateAnalyzer.AggregateBuilderHelper helper) { + return createBucket(group, project, helper); + } + private static List> createCompositeBuckets( List groupList, Project project, AggregateAnalyzer.AggregateBuilderHelper helper) { ImmutableList.Builder> resultBuilder = ImmutableList.builder(); - groupList.forEach(groupIndex -> resultBuilder.add(createBucket(groupIndex, project, helper))); + groupList.forEach( + groupIndex -> resultBuilder.add(createCompositeBucket(groupIndex, project, helper))); return resultBuilder.build(); } - private static CompositeValuesSourceBuilder createBucket( - Integer groupIndex, Project project, AggregateAnalyzer.AggregateBuilderHelper helper) { + private static ValuesSourceAggregationBuilder createBucket( + Integer groupIndex, Project project, AggregateBuilderHelper helper) { RexNode rex = project.getProjects().get(groupIndex); String bucketName = project.getRowType().getFieldList().get(groupIndex).getName(); if (rex instanceof RexCall @@ -397,6 +423,27 @@ private static CompositeValuesSourceBuilder createBucket( && ((RexCall) rex).getOperands().get(1) instanceof RexLiteral && ((RexCall) rex).getOperands().get(2) instanceof RexLiteral) { return BucketAggregationBuilder.buildHistogram( + bucketName, + helper.inferNamedField(((RexCall) rex).getOperands().get(0)).getRootName(), + ((RexLiteral)((RexCall) rex).getOperands().get(1)).getValueAs(Double.class), + SpanUnit.of(((RexLiteral)((RexCall) rex).getOperands().get(2)).getValueAs(String.class))); + } else { + return createTermsAggregationBuilder(bucketName, rex, helper); + } + } + + private static CompositeValuesSourceBuilder createCompositeBucket( + Integer groupIndex, Project project, AggregateBuilderHelper helper) { + RexNode rex = project.getProjects().get(groupIndex); + String bucketName = project.getRowType().getFieldList().get(groupIndex).getName(); + if (rex instanceof RexCall + && rex.getKind() == SqlKind.OTHER_FUNCTION + && ((RexCall) rex).getOperator().getName().equalsIgnoreCase(BuiltinFunctionName.SPAN.name()) + && ((RexCall) rex).getOperands().size() == 3 + && ((RexCall) rex).getOperands().get(0) instanceof RexInputRef + && ((RexCall) rex).getOperands().get(1) instanceof RexLiteral + && ((RexCall) rex).getOperands().get(2) instanceof RexLiteral) { + return CompositeAggregationBuilder.buildHistogram( bucketName, helper.inferNamedField(((RexCall) rex).getOperands().get(0)).getRootName(), ((RexLiteral)((RexCall) rex).getOperands().get(1)).getValueAs(Double.class), @@ -425,4 +472,21 @@ private static CompositeValuesSourceBuilder createTermsSourceBuilder( return sourceBuilder; } + + private static ValuesSourceAggregationBuilder createTermsAggregationBuilder( + String bucketName, RexNode group, AggregateBuilderHelper helper) { + TermsAggregationBuilder sourceBuilder = + helper.build( + group, + new TermsAggregationBuilder(bucketName) + .size(AGGREGATION_BUCKET_SIZE) + .order(BucketOrder.key(true))); + // Time types values are converted to LONG in ExpressionAggregationScript::execute + if (List.of(TIMESTAMP, TIME, DATE) + .contains(OpenSearchTypeFactory.convertRelDataTypeToExprType(group.getType()))) { + sourceBuilder.userValueTypeHint(ValueType.LONG); + } + + return sourceBuilder; + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 0f594b5b7ea..3f8a00e4d74 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -221,6 +221,10 @@ public void pushDownAggregation( aggregationBuilder.getLeft().forEach(sourceBuilder::aggregation); sourceBuilder.size(0); exprValueFactory.setParser(aggregationBuilder.getRight()); + // no need to sort docs for aggregation + if (sourceBuilder.sorts() != null) { + sourceBuilder.sorts().clear(); + } } /** diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java new file mode 100644 index 00000000000..833d0246304 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java @@ -0,0 +1,47 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.response.agg; + +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import lombok.EqualsAndHashCode; +import org.opensearch.search.aggregations.Aggregation; +import org.opensearch.search.aggregations.Aggregations; +import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation; + +/** + * Use BucketAggregationParser only when there is a single group-by key, it returns multiple + * buckets. {@link CompositeAggregationParser} is used for multiple group by keys + */ +@EqualsAndHashCode +public class BucketAggregationParser implements OpenSearchAggregationResponseParser { + private final MetricParserHelper metricsParser; + + public BucketAggregationParser(MetricParser... metricParserList) { + metricsParser = new MetricParserHelper(Arrays.asList(metricParserList)); + } + + public BucketAggregationParser(List metricParserList) { + metricsParser = new MetricParserHelper(metricParserList); + } + + @Override + public List> parse(Aggregations aggregations) { + Aggregation agg = aggregations.asList().get(0); + return ((MultiBucketsAggregation) agg) + .getBuckets().stream().map(b -> parse(b, agg.getName())).collect(Collectors.toList()); + } + + private Map parse(MultiBucketsAggregation.Bucket bucket, String keyName) { + Map resultMap = new LinkedHashMap<>(); + resultMap.put(keyName, bucket.getKey()); + resultMap.putAll(metricsParser.parse(bucket.getAggregations())); + return resultMap; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java index 56018ce688e..3f36ffa0f8e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java @@ -23,7 +23,11 @@ import org.opensearch.search.aggregations.Aggregations; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregation; -/** Composite Aggregation Parser which include composite aggregation and metric parsers. */ +/** + * Composite Aggregation Parser which include composite aggregation and metric parsers. This is only + * for the aggregation with multiple group-by keys. Use {@link BucketAggregationParser} when there + * is only one group-by key. + */ @Getter @EqualsAndHashCode public class CompositeAggregationParser implements OpenSearchAggregationResponseParser { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java index 5f2894d16d3..0bb51b6a993 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java @@ -85,6 +85,13 @@ public class OpenSearchSettings extends Settings { Setting.Property.NodeScope, Setting.Property.Dynamic); + public static final Setting PPL_SYNTAX_LEGACY_PREFERRED_SETTING = + Setting.boolSetting( + Key.PPL_SYNTAX_LEGACY_PREFERRED.getKeyValue(), + true, + Setting.Property.NodeScope, + Setting.Property.Dynamic); + public static final Setting DEFAULT_PATTERN_METHOD_SETTING = Setting.simpleString( Key.PATTERN_METHOD.getKeyValue(), @@ -350,11 +357,17 @@ public OpenSearchSettings(ClusterSettings clusterSettings) { PPL_ENABLED_SETTING, new Updater(Key.PPL_ENABLED)); register( - settingBuilder, - clusterSettings, - Key.PATTERN_METHOD, - DEFAULT_PATTERN_METHOD_SETTING, - new Updater(Key.PATTERN_METHOD)); + settingBuilder, + clusterSettings, + Key.PPL_SYNTAX_LEGACY_PREFERRED, + PPL_SYNTAX_LEGACY_PREFERRED_SETTING, + new Updater(Key.PPL_SYNTAX_LEGACY_PREFERRED)); + register( + settingBuilder, + clusterSettings, + Key.PATTERN_METHOD, + DEFAULT_PATTERN_METHOD_SETTING, + new Updater(Key.PATTERN_METHOD)); register( settingBuilder, clusterSettings, @@ -583,6 +596,7 @@ public static List> pluginSettings() { .add(SQL_DELETE_ENABLED_SETTING) .add(SQL_PAGINATION_API_SEARCH_AFTER_SETTING) .add(PPL_ENABLED_SETTING) + .add(PPL_SYNTAX_LEGACY_PREFERRED_SETTING) .add(CALCITE_ENGINE_ENABLED_SETTING) .add(CALCITE_FALLBACK_ALLOWED_SETTING) .add(CALCITE_PUSHDOWN_ENABLED_SETTING) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index a38614979f9..01e8a6923ac 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -45,9 +45,12 @@ import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; import org.opensearch.search.aggregations.AggregatorFactories.Builder; +import org.opensearch.search.aggregations.BucketOrder; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.missing.MissingOrder; +import org.opensearch.search.aggregations.bucket.terms.MultiTermsAggregationBuilder; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; import org.opensearch.search.sort.ScoreSortBuilder; import org.opensearch.search.sort.SortBuilder; @@ -451,59 +454,99 @@ public void apply(OpenSearchRequestBuilder requestBuilder) { } public void pushDownSortIntoAggBucket(List collations) { - // It will always use a single CompositeAggregationBuilder for the aggregation with GroupBy - // See {@link AggregateAnalyzer} - CompositeAggregationBuilder compositeAggregationBuilder = - (CompositeAggregationBuilder) aggregationBuilder.getLeft().get(0); - List> buckets = - ((CompositeAggregationBuilder) aggregationBuilder.getLeft().get(0)).sources(); - List> newBuckets = new ArrayList<>(buckets.size()); + AggregationBuilder builder = aggregationBuilder.getLeft().get(0); List selected = new ArrayList<>(collations.size()); - // Have to put the collation required buckets first, then the rest of buckets. - collations.forEach( - collation -> { - CompositeValuesSourceBuilder bucket = buckets.get(collation.getFieldIndex()); - Direction direction = collation.getDirection(); - NullDirection nullDirection = collation.nullDirection; - SortOrder order = - Direction.DESCENDING.equals(direction) ? SortOrder.DESC : SortOrder.ASC; - MissingOrder missingOrder; - switch (nullDirection) { - case FIRST: - missingOrder = MissingOrder.FIRST; - break; - case LAST: - missingOrder = MissingOrder.LAST; - break; - default: - missingOrder = MissingOrder.DEFAULT; - break; - } - newBuckets.add(bucket.order(order).missingOrder(missingOrder)); - selected.add(collation.getFieldIndex()); - }); - IntStream.range(0, buckets.size()) - .filter(i -> !selected.contains(i)) - .forEach(i -> newBuckets.add(buckets.get(i))); - Builder newAggBuilder = new Builder(); - compositeAggregationBuilder.getSubAggregations().forEach(newAggBuilder::addAggregator); - aggregationBuilder = - Pair.of( - Collections.singletonList( - AggregationBuilders.composite("composite_buckets", newBuckets) - .subAggregations(newAggBuilder) - .size(AGGREGATION_BUCKET_SIZE)), - aggregationBuilder.getRight()); + if (builder instanceof CompositeAggregationBuilder) { + // It will always use a single CompositeAggregationBuilder for the aggregation with GroupBy + // See {@link AggregateAnalyzer} + CompositeAggregationBuilder compositeAggBuilder = (CompositeAggregationBuilder) builder; + List> buckets = compositeAggBuilder.sources(); + List> newBuckets = new ArrayList<>(buckets.size()); + // Have to put the collation required buckets first, then the rest of buckets. + collations.forEach( + collation -> { + CompositeValuesSourceBuilder bucket = buckets.get(collation.getFieldIndex()); + Direction direction = collation.getDirection(); + NullDirection nullDirection = collation.nullDirection; + SortOrder order = + Direction.DESCENDING.equals(direction) ? SortOrder.DESC : SortOrder.ASC; + MissingOrder missingOrder; + switch (nullDirection) { + case FIRST: + missingOrder = MissingOrder.FIRST; + break; + case LAST: + missingOrder = MissingOrder.LAST; + break; + default: + missingOrder = MissingOrder.DEFAULT; + break; + } + newBuckets.add(bucket.order(order).missingOrder(missingOrder)); + selected.add(collation.getFieldIndex()); + }); + IntStream.range(0, buckets.size()) + .filter(i -> !selected.contains(i)) + .forEach(i -> newBuckets.add(buckets.get(i))); + Builder newAggBuilder = new Builder(); + compositeAggBuilder.getSubAggregations().forEach(newAggBuilder::addAggregator); + aggregationBuilder = + Pair.of( + Collections.singletonList( + AggregationBuilders.composite("composite_buckets", newBuckets) + .subAggregations(newAggBuilder) + .size(AGGREGATION_BUCKET_SIZE)), + aggregationBuilder.getRight()); + } + if (builder instanceof TermsAggregationBuilder) { + TermsAggregationBuilder termsAggBuilder = (TermsAggregationBuilder) builder; + termsAggBuilder.order( + BucketOrder.key(!collations.get(0).getDirection().isDescending())); + } + // TODO for MultiTermsAggregationBuilder } + /** + * Check if the limit can be pushed down into aggregation bucket when the limit size is less + * than bucket number. + */ public boolean pushDownLimitIntoBucketSize(Integer size) { - CompositeAggregationBuilder compositeAggregationBuilder = - (CompositeAggregationBuilder) aggregationBuilder.getLeft().get(0); - if (size < compositeAggregationBuilder.size()) { - compositeAggregationBuilder.size(size); + AggregationBuilder builder = aggregationBuilder.getLeft().get(0); + if (builder instanceof CompositeAggregationBuilder) { + CompositeAggregationBuilder compositeAggBuilder = (CompositeAggregationBuilder) builder; + if (size < compositeAggBuilder.size()) { + compositeAggBuilder.size(size); + return true; + } else { + return false; + } + } + if (builder instanceof TermsAggregationBuilder) { + TermsAggregationBuilder termsAggBuilder = (TermsAggregationBuilder) builder; + if (size < termsAggBuilder.size()) { + termsAggBuilder.size(size); + return true; + } else { + return false; + } + } + if (builder instanceof MultiTermsAggregationBuilder) { + MultiTermsAggregationBuilder multiTermsAggBuilder = (MultiTermsAggregationBuilder) builder; + if (size < multiTermsAggBuilder.size()) { + multiTermsAggBuilder.size(size); + return true; + } else { + return false; + } + } + // now we only have Composite, Terms and MultiTerms bucket aggregations, + // add code here when we could support more in the future. + if (builder instanceof ValuesSourceAggregationBuilder.LeafOnly) { + // Note: all metric aggregations will be treated as pushed since it generates only one row. return true; } - return false; + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Unknown aggregation builder " + builder.getClass().getSimpleName()); } } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java index 77716184565..7d54e409acd 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java @@ -40,11 +40,15 @@ class OpenSearchIndexScanAggregationBuilder implements PushDownQueryBuilder { /** Sorting items pushed down. */ private List> sortList; + /** When false, ignore aggregation values for null bucket. */ + private final boolean bucketNullable; + OpenSearchIndexScanAggregationBuilder( OpenSearchRequestBuilder requestBuilder, LogicalAggregation aggregation) { this.requestBuilder = requestBuilder; aggregatorList = aggregation.getAggregatorList(); groupByList = aggregation.getGroupByList(); + this.bucketNullable = aggregation.isBucketNullable(); } @Override @@ -52,7 +56,7 @@ public OpenSearchRequestBuilder build() { AggregationQueryBuilder builder = new AggregationQueryBuilder(new DefaultExpressionSerializer()); Pair, OpenSearchAggregationResponseParser> aggregationBuilder = - builder.buildAggregationBuilder(aggregatorList, groupByList, sortList); + builder.buildAggregationBuilder(aggregatorList, groupByList, sortList, bucketNullable); requestBuilder.pushDownAggregation(aggregationBuilder); requestBuilder.pushTypeMapping(builder.buildTypeMapping(aggregatorList, groupByList)); return requestBuilder; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java index d93f7e8c234..6a45dc0a8fe 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java @@ -29,11 +29,13 @@ import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.expression.aggregation.NamedAggregator; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; import org.opensearch.sql.opensearch.response.agg.MetricParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; import org.opensearch.sql.opensearch.storage.script.aggregation.dsl.BucketAggregationBuilder; +import org.opensearch.sql.opensearch.storage.script.aggregation.dsl.CompositeAggregationBuilder; import org.opensearch.sql.opensearch.storage.script.aggregation.dsl.MetricAggregationBuilder; import org.opensearch.sql.opensearch.storage.serde.ExpressionSerializer; @@ -50,12 +52,16 @@ public class AggregationQueryBuilder extends ExpressionNodeVisitor namedAggregatorList, List groupByList, - List> sortList) { + List> sortList, + boolean bucketNullable) { final Pair> metrics = metricBuilder.build(namedAggregatorList); @@ -74,13 +81,21 @@ public AggregationQueryBuilder(ExpressionSerializer serializer) { return Pair.of( ImmutableList.copyOf(metrics.getLeft().getAggregatorFactories()), new NoBucketAggregationParser(metrics.getRight())); + } else if (groupByList.size() == 1 && !bucketNullable) { + // one bucket, use values source bucket builder for getting better performance + // TODO for multiple buckets, use MultiTermsAggregationBuilder + return Pair.of( + Collections.singletonList( + bucketBuilder.build(groupByList.get(0)).subAggregations(metrics.getLeft())), + new BucketAggregationParser(metrics.getRight())); } else { + // multiple bucket, use composite builder GroupSortOrder groupSortOrder = new GroupSortOrder(sortList); return Pair.of( Collections.singletonList( AggregationBuilders.composite( "composite_buckets", - bucketBuilder.build( + compositeBuilder.build( groupByList.stream() .sorted(groupSortOrder) .map( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java index 5b1fa67b5c0..a8229c94814 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java @@ -9,18 +9,19 @@ import static org.opensearch.sql.data.type.ExprCoreType.DATETIME; import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; +import static org.opensearch.sql.opensearch.storage.script.aggregation.AggregationQueryBuilder.AGGREGATION_BUCKET_SIZE; -import com.google.common.collect.ImmutableList; import java.util.List; -import org.apache.commons.lang3.tuple.Triple; -import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder; -import org.opensearch.search.aggregations.bucket.composite.DateHistogramValuesSourceBuilder; -import org.opensearch.search.aggregations.bucket.composite.HistogramValuesSourceBuilder; -import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; +import java.util.stream.Collectors; +import org.opensearch.search.aggregations.BucketOrder; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; -import org.opensearch.search.aggregations.bucket.missing.MissingOrder; +import org.opensearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; +import org.opensearch.search.aggregations.bucket.terms.MultiTermsAggregationBuilder; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.aggregations.support.MultiTermsValuesSourceConfig; import org.opensearch.search.aggregations.support.ValueType; -import org.opensearch.search.sort.SortOrder; +import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.span.SpanExpression; @@ -36,66 +37,77 @@ public BucketAggregationBuilder(ExpressionSerializer serializer) { this.helper = new AggregationBuilderHelper(serializer); } - /** Build the list of CompositeValuesSourceBuilder. */ - public List> build( - List> groupList) { - ImmutableList.Builder> resultBuilder = - new ImmutableList.Builder<>(); - for (Triple groupPair : groupList) { - resultBuilder.add( - buildCompositeValuesSourceBuilder( - groupPair.getLeft(), groupPair.getMiddle(), groupPair.getRight())); - } - return resultBuilder.build(); - } - - // todo, Expression should implement buildCompositeValuesSourceBuilder() interface. - private CompositeValuesSourceBuilder buildCompositeValuesSourceBuilder( - NamedExpression expr, SortOrder sortOrder, MissingOrder missingOrder) { + /** Build the ValuesSourceAggregationBuilder. */ + public ValuesSourceAggregationBuilder build(NamedExpression expr) { if (expr.getDelegated() instanceof SpanExpression) { SpanExpression spanExpr = (SpanExpression) expr.getDelegated(); return buildHistogram( expr.getName(), spanExpr.getField().toString(), spanExpr.getValue().valueOf().doubleValue(), - spanExpr.getUnit(), - missingOrder); + spanExpr.getUnit()); } else { - CompositeValuesSourceBuilder sourceBuilder = - new TermsValuesSourceBuilder(expr.getName()) - .missingBucket(true) - .missingOrder(missingOrder) - .order(sortOrder); + TermsAggregationBuilder sourceBuilder = new TermsAggregationBuilder(expr.getName()); + sourceBuilder.size(AGGREGATION_BUCKET_SIZE); + sourceBuilder.order(BucketOrder.key(true)); // Time types values are converted to LONG in ExpressionAggregationScript::execute if ((expr.getDelegated().type() instanceof OpenSearchDateType && List.of(TIMESTAMP, TIME, DATE, DATETIME) .contains(((OpenSearchDateType) expr.getDelegated().type()).getExprCoreType())) || List.of(TIMESTAMP, TIME, DATE, DATETIME).contains(expr.getDelegated().type())) { - sourceBuilder.userValuetypeHint(ValueType.LONG); + sourceBuilder.userValueTypeHint(ValueType.LONG); } return helper.build(expr.getDelegated(), sourceBuilder::field, sourceBuilder::script); } } - public static CompositeValuesSourceBuilder buildHistogram( - String name, String field, Double value, SpanUnit unit, MissingOrder missingOrder) { + /** Build the MultiTermsAggregationBuilder. */ + public MultiTermsAggregationBuilder buildMultipleTerms(List exprs) { + MultiTermsAggregationBuilder sourceBuilder = + new MultiTermsAggregationBuilder( + exprs.stream().map(NamedExpression::getName).collect(Collectors.joining("_"))); + sourceBuilder.terms( + exprs.stream() + .map( + expr -> { + MultiTermsValuesSourceConfig.Builder config = + new MultiTermsValuesSourceConfig.Builder(); + config.setFieldName(expr.getName()); + // Time types values are converted to LONG in ExpressionAggregationScript::execute + if ((expr.getDelegated().type() instanceof OpenSearchDateType + && List.of(TIMESTAMP, TIME, DATE) + .contains( + ((OpenSearchDateType) expr.getDelegated().type()) + .getExprCoreType())) + || List.of(TIMESTAMP, TIME, DATE).contains(expr.getDelegated().type())) { + config.setUserValueTypeHint(ValueType.LONG); + } + return config.build(); + }) + .collect(Collectors.toList())); + sourceBuilder.size(AGGREGATION_BUCKET_SIZE); + return sourceBuilder; + } + + public static ValuesSourceAggregationBuilder buildHistogram( + String name, String field, Double value, SpanUnit unit) { switch (unit) { case NONE: - return new HistogramValuesSourceBuilder(name) - .field(field) - .interval(value) - .missingBucket(true) - .missingOrder(missingOrder); + HistogramAggregationBuilder builder = new HistogramAggregationBuilder(name); + builder.field(field).interval(value); + return builder; case UNKNOWN: throw new IllegalStateException("Invalid span unit"); default: - return buildDateHistogram(name, field, value.intValue(), unit, missingOrder); + return buildDateHistogram(name, field, value.intValue(), unit); } } - public static CompositeValuesSourceBuilder buildDateHistogram( - String name, String field, Integer value, SpanUnit unit, MissingOrder missingOrder) { + public static ValuesSourceAggregationBuilder buildDateHistogram( + String name, String field, Integer value, SpanUnit unit) { String spanValue = value + unit.getName(); + DateHistogramAggregationBuilder builder = new DateHistogramAggregationBuilder(name); + builder.field(field); switch (unit) { case MILLISECOND: case MS: @@ -107,17 +119,11 @@ public static CompositeValuesSourceBuilder buildDateHistogram( case H: case DAY: case D: - return new DateHistogramValuesSourceBuilder(name) - .field(field) - .missingBucket(true) - .missingOrder(missingOrder) - .fixedInterval(new DateHistogramInterval(spanValue)); + builder.fixedInterval(new DateHistogramInterval(spanValue)); + break; default: - return new DateHistogramValuesSourceBuilder(name) - .field(field) - .missingBucket(true) - .missingOrder(missingOrder) - .calendarInterval(new DateHistogramInterval(spanValue)); + builder.calendarInterval(new DateHistogramInterval(spanValue)); } + return builder; } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java new file mode 100644 index 00000000000..e569bbabbf1 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java @@ -0,0 +1,122 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.aggregation.dsl; + +import static org.opensearch.sql.data.type.ExprCoreType.DATE; +import static org.opensearch.sql.data.type.ExprCoreType.TIME; +import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import org.apache.commons.lang3.tuple.Triple; +import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.composite.DateHistogramValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.composite.HistogramValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; +import org.opensearch.search.aggregations.bucket.missing.MissingOrder; +import org.opensearch.search.aggregations.support.ValueType; +import org.opensearch.search.sort.SortOrder; +import org.opensearch.sql.ast.expression.SpanUnit; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.expression.span.SpanExpression; +import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; +import org.opensearch.sql.opensearch.storage.serde.ExpressionSerializer; + +/** Composite Aggregation Builder. */ +public class CompositeAggregationBuilder { + + private final AggregationBuilderHelper helper; + + public CompositeAggregationBuilder(ExpressionSerializer serializer) { + this.helper = new AggregationBuilderHelper(serializer); + } + + /** Build the list of CompositeValuesSourceBuilder. */ + public List> build( + List> groupList) { + ImmutableList.Builder> resultBuilder = + new ImmutableList.Builder<>(); + for (Triple groupPair : groupList) { + resultBuilder.add( + buildCompositeValuesSourceBuilder( + groupPair.getLeft(), groupPair.getMiddle(), groupPair.getRight())); + } + return resultBuilder.build(); + } + + // todo, Expression should implement buildCompositeValuesSourceBuilder() interface. + private CompositeValuesSourceBuilder buildCompositeValuesSourceBuilder( + NamedExpression expr, SortOrder sortOrder, MissingOrder missingOrder) { + if (expr.getDelegated() instanceof SpanExpression) { + SpanExpression spanExpr = (SpanExpression) expr.getDelegated(); + return buildHistogram( + expr.getName(), + spanExpr.getField().toString(), + spanExpr.getValue().valueOf().doubleValue(), + spanExpr.getUnit(), + missingOrder); + } else { + CompositeValuesSourceBuilder sourceBuilder = + new TermsValuesSourceBuilder(expr.getName()) + .missingBucket(true) + .missingOrder(missingOrder) + .order(sortOrder); + // Time types values are converted to LONG in ExpressionAggregationScript::execute + if ((expr.getDelegated().type() instanceof OpenSearchDateType + && List.of(TIMESTAMP, TIME, DATE) + .contains(((OpenSearchDateType) expr.getDelegated().type()).getExprCoreType())) + || List.of(TIMESTAMP, TIME, DATE).contains(expr.getDelegated().type())) { + sourceBuilder.userValuetypeHint(ValueType.LONG); + } + return helper.build(expr.getDelegated(), sourceBuilder::field, sourceBuilder::script); + } + } + + public static CompositeValuesSourceBuilder buildHistogram( + String name, String field, Double value, SpanUnit unit, MissingOrder missingOrder) { + switch (unit) { + case NONE: + return new HistogramValuesSourceBuilder(name) + .field(field) + .interval(value) + .missingBucket(true) + .missingOrder(missingOrder); + case UNKNOWN: + throw new IllegalStateException("Invalid span unit"); + default: + return buildDateHistogram(name, field, value.intValue(), unit, missingOrder); + } + } + + public static CompositeValuesSourceBuilder buildDateHistogram( + String name, String field, Integer value, SpanUnit unit, MissingOrder missingOrder) { + String spanValue = value + unit.getName(); + switch (unit) { + case MILLISECOND: + case MS: + case SECOND: + case S: + case MINUTE: + case m: + case HOUR: + case H: + case DAY: + case D: + return new DateHistogramValuesSourceBuilder(name) + .field(field) + .missingBucket(true) + .missingOrder(missingOrder) + .fixedInterval(new DateHistogramInterval(spanValue)); + default: + return new DateHistogramValuesSourceBuilder(name) + .field(field) + .missingBucket(true) + .missingOrder(missingOrder) + .calendarInterval(new DateHistogramInterval(spanValue)); + } + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java index c2824a7f95a..e304f504242 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java @@ -579,6 +579,7 @@ private Aggregate createMockAggregate(List calls, ImmutableBitSet Aggregate agg = mock(Aggregate.class); when(agg.getGroupSet()).thenReturn(groups); when(agg.getAggCallList()).thenReturn(calls); + when(agg.getHints()).thenReturn(ImmutableList.of()); return agg; } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java index 9ae76f88438..57f7c4ea044 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java @@ -22,6 +22,7 @@ import org.junit.jupiter.api.DisplayNameGenerator; import org.junit.jupiter.api.Test; import org.opensearch.search.aggregations.metrics.ExtendedStats; +import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; import org.opensearch.sql.opensearch.response.agg.FilterParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; @@ -64,24 +65,19 @@ void no_bucket_two_metric_should_pass() { void one_bucket_one_metric_should_pass() { String response = "{\n" - + " \"composite#composite_buckets\": {\n" - + " \"after_key\": {\n" - + " \"type\": \"sale\"\n" - + " },\n" + + " \"sterms#type\": {\n" + + " \"doc_count_error_upper_bound\": 0,\n" + + " \"sum_other_doc_count\": 0,\n" + " \"buckets\": [\n" + " {\n" - + " \"key\": {\n" - + " \"type\": \"cost\"\n" - + " },\n" + + " \"key\": \"cost\",\n" + " \"doc_count\": 2,\n" + " \"avg#avg\": {\n" + " \"value\": 20\n" + " }\n" + " },\n" + " {\n" - + " \"key\": {\n" - + " \"type\": \"sale\"\n" - + " },\n" + + " \"key\": \"sale\",\n" + " \"doc_count\": 2,\n" + " \"avg#avg\": {\n" + " \"value\": 105\n" @@ -92,7 +88,7 @@ void one_bucket_one_metric_should_pass() { + "}"; OpenSearchAggregationResponseParser parser = - new CompositeAggregationParser(new SingleValueParser("avg")); + new BucketAggregationParser(new SingleValueParser("avg")); assertThat( parse(parser, response), containsInAnyOrder( @@ -182,15 +178,12 @@ void filter_aggregation_should_pass() { void filter_aggregation_group_by_should_pass() { String response = "{\n" - + " \"composite#composite_buckets\":{\n" - + " \"after_key\":{\n" - + " \"gender\":\"m\"\n" - + " },\n" + + " \"sterms#gender\": {\n" + + " \"doc_count_error_upper_bound\": 0,\n" + + " \"sum_other_doc_count\": 0,\n" + " \"buckets\":[\n" + " {\n" - + " \"key\":{\n" - + " \"gender\":\"f\"\n" - + " },\n" + + " \"key\":\"f\",\n" + " \"doc_count\":3,\n" + " \"filter#filter\":{\n" + " \"doc_count\":1,\n" @@ -200,9 +193,7 @@ void filter_aggregation_group_by_should_pass() { + " }\n" + " },\n" + " {\n" - + " \"key\":{\n" - + " \"gender\":\"m\"\n" - + " },\n" + + " \"key\":\"m\",\n" + " \"doc_count\":4,\n" + " \"filter#filter\":{\n" + " \"doc_count\":2,\n" @@ -215,7 +206,7 @@ void filter_aggregation_group_by_should_pass() { + " }\n" + "}"; OpenSearchAggregationResponseParser parser = - new CompositeAggregationParser( + new BucketAggregationParser( FilterParser.builder() .name("filter") .metricsParser(new SingleValueParser("avg")) @@ -352,15 +343,12 @@ void no_bucket_two_metric_percentile_should_pass() { void one_bucket_one_metric_percentile_should_pass() { String response = "{\n" - + " \"composite#composite_buckets\": {\n" - + " \"after_key\": {\n" - + " \"type\": \"sale\"\n" - + " },\n" + + " \"sterms#type\": {\n" + + " \"doc_count_error_upper_bound\": 0,\n" + + " \"sum_other_doc_count\": 0,\n" + " \"buckets\": [\n" + " {\n" - + " \"key\": {\n" - + " \"type\": \"cost\"\n" - + " },\n" + + " \"key\": \"cost\",\n" + " \"doc_count\": 2,\n" + " \"percentiles#percentile\": {\n" + " \"values\": {\n" @@ -369,9 +357,7 @@ void one_bucket_one_metric_percentile_should_pass() { + " }\n" + " },\n" + " {\n" - + " \"key\": {\n" - + " \"type\": \"sale\"\n" - + " },\n" + + " \"key\": \"sale\",\n" + " \"doc_count\": 2,\n" + " \"percentiles#percentile\": {\n" + " \"values\": {\n" @@ -384,7 +370,7 @@ void one_bucket_one_metric_percentile_should_pass() { + "}"; OpenSearchAggregationResponseParser parser = - new CompositeAggregationParser(new SinglePercentileParser("percentile")); + new BucketAggregationParser(new SinglePercentileParser("percentile")); assertThat( parse(parser, response), containsInAnyOrder( @@ -470,15 +456,12 @@ void no_bucket_percentiles_should_pass() { void one_bucket_percentiles_should_pass() { String response = "{\n" - + " \"composite#composite_buckets\": {\n" - + " \"after_key\": {\n" - + " \"type\": \"sale\"\n" - + " },\n" + + " \"sterms#type\": {\n" + + " \"doc_count_error_upper_bound\": 0,\n" + + " \"sum_other_doc_count\": 0,\n" + " \"buckets\": [\n" + " {\n" - + " \"key\": {\n" - + " \"type\": \"cost\"\n" - + " },\n" + + " \"key\": \"cost\",\n" + " \"doc_count\": 2,\n" + " \"percentiles#percentiles\": {\n" + " \"values\": {\n" @@ -493,9 +476,7 @@ void one_bucket_percentiles_should_pass() { + " }\n" + " },\n" + " {\n" - + " \"key\": {\n" - + " \"type\": \"sale\"\n" - + " },\n" + + " \"key\": \"sale\",\n" + " \"doc_count\": 2,\n" + " \"percentiles#percentiles\": {\n" + " \"values\": {\n" @@ -514,7 +495,7 @@ void one_bucket_percentiles_should_pass() { + "}"; OpenSearchAggregationResponseParser parser = - new CompositeAggregationParser(new PercentilesParser("percentiles")); + new BucketAggregationParser(new PercentilesParser("percentiles")); assertThat( parse(parser, response), containsInAnyOrder( diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index 49adaf8189d..2b0f77e5744 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -19,6 +19,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.LONG; import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.expression.DSL.literal; +import static org.opensearch.sql.opensearch.storage.script.aggregation.AggregationQueryBuilder.AGGREGATION_BUCKET_SIZE; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.aggregation; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.filter; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.highlight; @@ -56,8 +57,9 @@ import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; -import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; +import org.opensearch.search.aggregations.BucketOrder; import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.opensearch.search.sort.NestedSortBuilder; import org.opensearch.search.sort.SortBuilder; import org.opensearch.search.sort.SortBuilders; @@ -75,6 +77,7 @@ import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; import org.opensearch.sql.opensearch.response.agg.SingleValueParser; @@ -258,6 +261,31 @@ void test_aggregation_push_down() { DSL.named("AVG(intV)", DSL.ref("AVG(intV)", DOUBLE)))); } + @Test + void test_aggregation_push_down_non_bucket_nullable() { + assertEqualsAfterOptimization( + project( + indexScanAggBuilder( + false, + withAggregationPushedDown( + false, + aggregate("AVG(intV)") + .aggregateBy("intV") + .groupBy("longV") + .resultTypes( + Map.of( + "AVG(intV)", DOUBLE, + "longV", LONG)))), + DSL.named("AVG(intV)", DSL.ref("AVG(intV)", DOUBLE))), + project( + aggregation( + relation("schema", table), + ImmutableList.of(DSL.named("AVG(intV)", DSL.avg(DSL.ref("intV", INTEGER)))), + ImmutableList.of(DSL.named("longV", DSL.ref("longV", LONG))), + false), + DSL.named("AVG(intV)", DSL.ref("AVG(intV)", DOUBLE)))); + } + /* @Disabled("This test should be enabled once https://github.com/opensearch-project/sql/issues/912 is fixed") @Test @@ -386,6 +414,34 @@ void test_aggregation_filter_push_down() { DSL.named("AVG(intV)", DSL.ref("AVG(intV)", DOUBLE)))); } + @Test + void test_aggregation_filter_push_down_non_bucket_nullable() { + assertEqualsAfterOptimization( + project( + indexScanAggBuilder( + false, + withFilterPushedDown(QueryBuilders.termQuery("intV", 1)), + withAggregationPushedDown( + false, + aggregate("AVG(intV)") + .aggregateBy("intV") + .groupBy("longV") + .resultTypes( + Map.of( + "AVG(intV)", DOUBLE, + "longV", LONG)))), + DSL.named("AVG(intV)", DSL.ref("AVG(intV)", DOUBLE))), + project( + aggregation( + filter( + relation("schema", table), + DSL.equal(DSL.ref("intV", INTEGER), DSL.literal(integerValue(1)))), + ImmutableList.of(DSL.named("AVG(intV)", DSL.avg(DSL.ref("intV", INTEGER)))), + ImmutableList.of(DSL.named("longV", DSL.ref("longV", LONG))), + false), + DSL.named("AVG(intV)", DSL.ref("AVG(intV)", DOUBLE)))); + } + /** Sort - Filter - Relation --> IndexScan. */ @Test void test_sort_filter_push_down() { @@ -427,6 +483,34 @@ void test_sort_aggregation_push_down() { DSL.named("AVG(intV)", DSL.ref("AVG(intV)", DOUBLE)))); } + @Test + void test_sort_aggregation_push_down_non_bucket_nullable() { + assertEqualsAfterOptimization( + project( + indexScanAggBuilder( + false, + withAggregationPushedDown( + false, + aggregate("AVG(intV)") + .aggregateBy("intV") + .groupBy("stringV") + .sortBy(SortOption.DEFAULT_DESC) + .resultTypes( + Map.of( + "AVG(intV)", DOUBLE, + "stringV", STRING)))), + DSL.named("AVG(intV)", DSL.ref("AVG(intV)", DOUBLE))), + project( + sort( + aggregation( + relation("schema", table), + ImmutableList.of(DSL.named("AVG(intV)", DSL.avg(DSL.ref("intV", INTEGER)))), + ImmutableList.of(DSL.named("stringV", DSL.ref("stringV", STRING))), + false), + Pair.of(SortOption.DEFAULT_DESC, DSL.ref("stringV", STRING))), + DSL.named("AVG(intV)", DSL.ref("AVG(intV)", DOUBLE)))); + } + @Test void test_limit_sort_filter_push_down() { assertEqualsAfterOptimization( @@ -547,6 +631,31 @@ void sort_with_expression_cannot_merge_with_aggregation() { Pair.of(SortOption.DEFAULT_ASC, DSL.abs(DSL.ref("intV", INTEGER))))); } + @Test + void sort_with_expression_cannot_merge_with_aggregation_non_bucket_nullable() { + assertEqualsAfterOptimization( + sort( + indexScanAggBuilder( + false, + withAggregationPushedDown( + false, + aggregate("AVG(intV)") + .aggregateBy("intV") + .groupBy("stringV") + .resultTypes( + Map.of( + "AVG(intV)", DOUBLE, + "stringV", STRING)))), + Pair.of(SortOption.DEFAULT_ASC, DSL.abs(DSL.ref("intV", INTEGER)))), + sort( + aggregation( + relation("schema", table), + ImmutableList.of(DSL.named("AVG(intV)", DSL.avg(DSL.ref("intV", INTEGER)))), + ImmutableList.of(DSL.named("stringV", DSL.ref("stringV", STRING))), + false), + Pair.of(SortOption.DEFAULT_ASC, DSL.abs(DSL.ref("intV", INTEGER))))); + } + @Test void aggregation_cant_merge_index_scan_with_limit() { assertEqualsAfterOptimization( @@ -594,6 +703,35 @@ void sort_refer_to_aggregator_should_not_merge_with_indexAgg() { DSL.named("AVG(intV)", DSL.ref("AVG(intV)", DOUBLE)))); } + @Test + void sort_refer_to_aggregator_should_not_merge_with_indexAgg_non_bucket_nullable() { + assertEqualsAfterOptimization( + project( + sort( + indexScanAggBuilder( + false, + withAggregationPushedDown( + false, + aggregate("AVG(intV)") + .aggregateBy("intV") + .groupBy("stringV") + .resultTypes( + Map.of( + "AVG(intV)", DOUBLE, + "stringV", STRING)))), + Pair.of(SortOption.DEFAULT_ASC, DSL.ref("AVG(intV)", INTEGER))), + DSL.named("AVG(intV)", DSL.ref("AVG(intV)", DOUBLE))), + project( + sort( + aggregation( + relation("schema", table), + ImmutableList.of(DSL.named("AVG(intV)", DSL.avg(DSL.ref("intV", INTEGER)))), + ImmutableList.of(DSL.named("stringV", DSL.ref("stringV", STRING))), + false), + Pair.of(SortOption.DEFAULT_ASC, DSL.ref("AVG(intV)", INTEGER))), + DSL.named("AVG(intV)", DSL.ref("AVG(intV)", DOUBLE)))); + } + @Test void project_literal_should_not_be_pushed_down() { assertEqualsAfterOptimization( @@ -608,9 +746,15 @@ private OpenSearchIndexScanBuilder indexScanBuilder(Runnable... verifyPushDownCa } private OpenSearchIndexScanBuilder indexScanAggBuilder(Runnable... verifyPushDownCalls) { + return indexScanAggBuilder(true, verifyPushDownCalls); + } + + private OpenSearchIndexScanBuilder indexScanAggBuilder( + boolean bucketNullable, Runnable... verifyPushDownCalls) { this.verifyPushDownCalls = verifyPushDownCalls; - var aggregationBuilder = - new OpenSearchIndexScanAggregationBuilder(requestBuilder, mock(LogicalAggregation.class)); + LogicalAggregation mockAgg = mock(LogicalAggregation.class); + when(mockAgg.isBucketNullable()).thenReturn(bucketNullable); + var aggregationBuilder = new OpenSearchIndexScanAggregationBuilder(requestBuilder, mockAgg); return new OpenSearchIndexScanBuilder(aggregationBuilder, builder -> indexScan); } @@ -635,25 +779,44 @@ private Runnable withFilterPushedDown(QueryBuilder filteringCondition) { private Runnable withAggregationPushedDown( AggregationAssertHelper.AggregationAssertHelperBuilder aggregation) { + return withAggregationPushedDown(true, aggregation); + } - // Assume single term bucket and AVG metric in all tests in this suite - CompositeAggregationBuilder aggBuilder = - AggregationBuilders.composite( - "composite_buckets", - Collections.singletonList( - new TermsValuesSourceBuilder(aggregation.groupBy) - .field(aggregation.groupBy) - .order(aggregation.sortBy.getSortOrder() == ASC ? "asc" : "desc") - .missingOrder( - aggregation.sortBy.getNullOrder() == NULL_FIRST ? "first" : "last") - .missingBucket(true))) - .subAggregation( - AggregationBuilders.avg(aggregation.aggregateName).field(aggregation.aggregateBy)) - .size(AggregationQueryBuilder.AGGREGATION_BUCKET_SIZE); + private Runnable withAggregationPushedDown( + boolean bucketNullable, AggregationAssertHelper.AggregationAssertHelperBuilder aggregation) { + // Assume single term bucket and AVG metric in all tests in this suite + AggregationBuilder aggBuilder; + OpenSearchAggregationResponseParser responseParser; + if (bucketNullable) { + aggBuilder = + AggregationBuilders.composite( + "composite_buckets", + Collections.singletonList( + new TermsValuesSourceBuilder(aggregation.groupBy) + .field(aggregation.groupBy) + .order(aggregation.sortBy.getSortOrder() == ASC ? "asc" : "desc") + .missingOrder( + aggregation.sortBy.getNullOrder() == NULL_FIRST ? "first" : "last") + .missingBucket(true))) + .subAggregation( + AggregationBuilders.avg(aggregation.aggregateName).field(aggregation.aggregateBy)) + .size(AggregationQueryBuilder.AGGREGATION_BUCKET_SIZE); + } else { + aggBuilder = + new TermsAggregationBuilder(aggregation.groupBy) + .field(aggregation.groupBy) + .size(AGGREGATION_BUCKET_SIZE) + .order(BucketOrder.key(true)) + .subAggregation( + AggregationBuilders.avg(aggregation.aggregateName) + .field(aggregation.aggregateBy)); + } List aggBuilders = Collections.singletonList(aggBuilder); - OpenSearchAggregationResponseParser responseParser = - new CompositeAggregationParser(new SingleValueParser(aggregation.aggregateName)); + responseParser = + bucketNullable + ? new CompositeAggregationParser(new SingleValueParser(aggregation.aggregateName)) + : new BucketAggregationParser(new SingleValueParser(aggregation.aggregateName)); return () -> { verify(requestBuilder, times(1)).pushDownAggregation(Pair.of(aggBuilders, responseParser)); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java index 5f046cdd7ee..096788f4371 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java @@ -86,6 +86,16 @@ void should_build_composite_aggregation_for_field_reference() { + " \"order\" : \"asc\"%n" + " }%n" + " }%n" + + " }, {%n" + + " \"date\" : {%n" + + " \"terms\" : {%n" + + " \"field\" : \"date\",%n" + + " \"missing_bucket\" : true,%n" + + " \"value_type\" : \"long\",%n" + + " \"missing_order\" : \"first\",%n" + + " \"order\" : \"asc\"%n" + + " }%n" + + " }%n" + " } ]%n" + " },%n" + " \"aggregations\" : {%n" @@ -100,7 +110,7 @@ void should_build_composite_aggregation_for_field_reference() { buildQuery( Arrays.asList( named("avg(age)", new AvgAggregator(Arrays.asList(ref("age", INTEGER)), INTEGER))), - Arrays.asList(named("name", ref("name", STRING))))); + Arrays.asList(named("name", ref("name", STRING)), named("date", ref("date", DATE))))); } @Test @@ -120,6 +130,16 @@ void should_build_composite_aggregation_for_field_reference_with_order() { + " \"order\" : \"desc\"%n" + " }%n" + " }%n" + + " }, {%n" + + " \"date\" : {%n" + + " \"terms\" : {%n" + + " \"field\" : \"date\",%n" + + " \"missing_bucket\" : true,%n" + + " \"value_type\" : \"long\",%n" + + " \"missing_order\" : \"first\",%n" + + " \"order\" : \"asc\"%n" + + " }%n" + + " }%n" + " } ]%n" + " },%n" + " \"aggregations\" : {%n" @@ -134,8 +154,9 @@ void should_build_composite_aggregation_for_field_reference_with_order() { buildQuery( Arrays.asList( named("avg(age)", new AvgAggregator(Arrays.asList(ref("age", INTEGER)), INTEGER))), - Arrays.asList(named("name", ref("name", STRING))), - sort(ref("name", STRING), Sort.SortOption.DEFAULT_DESC))); + Arrays.asList(named("name", ref("name", STRING)), named("date", ref("date", DATE))), + sort(ref("name", STRING), Sort.SortOption.DEFAULT_DESC), + true)); } @Test @@ -219,6 +240,16 @@ void should_build_composite_aggregation_for_field_reference_of_keyword() { + " \"order\" : \"asc\"%n" + " }%n" + " }%n" + + " }, {%n" + + " \"date\" : {%n" + + " \"terms\" : {%n" + + " \"field\" : \"date\",%n" + + " \"missing_bucket\" : true,%n" + + " \"value_type\" : \"long\",%n" + + " \"missing_order\" : \"first\",%n" + + " \"order\" : \"asc\"%n" + + " }%n" + + " }%n" + " } ]%n" + " },%n" + " \"aggregations\" : {%n" @@ -241,8 +272,8 @@ void should_build_composite_aggregation_for_field_reference_of_keyword() { OpenSearchTextType.of( Map.of( "words", - OpenSearchDataType.of( - OpenSearchDataType.MappingType.Keyword)))))))); + OpenSearchDataType.of(OpenSearchDataType.MappingType.Keyword))))), + named("date", ref("date", DATE))))); } @Test @@ -268,24 +299,59 @@ void should_build_composite_aggregation_for_expression() { .serialize(any()); assertEquals( format( - "{%n \"composite_buckets\" : {%n \"composite\" : {%n \"size\" : 1000,%n " - + " \"sources\" : [ {%n \"age\" : {%n \"terms\" : {%n " - + " \"script\" : {%n \"source\" :" - + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"asin(age)\\\"}\",%n " - + " \"lang\" : \"opensearch_compounded_script\"%n },%n " - + " \"missing_bucket\" : true,%n \"missing_order\" : \"first\",%n " - + " \"order\" : \"asc\"%n }%n }%n } ]%n },%n " - + " \"aggregations\" : {%n \"avg(balance)\" : {%n \"avg\" : {%n " - + " \"script\" : {%n \"source\" :" - + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"abs(balance)\\\"}\",%n " - + " \"lang\" : \"opensearch_compounded_script\"%n }%n }%n " - + " }%n }%n }%n}"), + "{%n" + + " \"composite_buckets\" : {%n" + + " \"composite\" : {%n" + + " \"size\" : 1000,%n" + + " \"sources\" : [ {%n" + + " \"age\" : {%n" + + " \"terms\" : {%n" + + " \"script\" : {%n" + + " \"source\" :" + + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"asin(age)\\\"}\",%n" + + " \"lang\" : \"opensearch_compounded_script\"%n" + + " },%n" + + " \"missing_bucket\" : true,%n" + + " \"missing_order\" : \"first\",%n" + + " \"order\" : \"asc\"%n" + + " }%n" + + " }%n" + + " }, {%n" + + " \"date\" : {%n" + + " \"terms\" : {%n" + + " \"script\" : {%n" + + " \"source\" :" + + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"dayname(date)\\\"}\",%n" + + " \"lang\" : \"opensearch_compounded_script\"%n" + + " },%n" + + " \"missing_bucket\" : true,%n" + + " \"missing_order\" : \"first\",%n" + + " \"order\" : \"asc\"%n" + + " }%n" + + " }%n" + + " } ]%n" + + " },%n" + + " \"aggregations\" : {%n" + + " \"avg(balance)\" : {%n" + + " \"avg\" : {%n" + + " \"script\" : {%n" + + " \"source\" :" + + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"abs(balance)\\\"}\",%n" + + " \"lang\" : \"opensearch_compounded_script\"%n" + + " }%n" + + " }%n" + + " }%n" + + " }%n" + + " }%n" + + "}"), buildQuery( Arrays.asList( named( "avg(balance)", new AvgAggregator(Arrays.asList(DSL.abs(ref("balance", INTEGER))), INTEGER))), - Arrays.asList(named("age", DSL.asin(ref("age", INTEGER)))))); + Arrays.asList( + named("age", DSL.asin(ref("age", INTEGER))), + named("date", DSL.dayname(ref("date", DATE)))))); } @Test @@ -332,7 +398,8 @@ void should_build_composite_aggregation_follow_with_order_by_position() { ref("name", STRING), Sort.SortOption.DEFAULT_DESC, ref("age", INTEGER), - Sort.SortOption.DEFAULT_ASC))); + Sort.SortOption.DEFAULT_ASC), + true)); } @Test @@ -408,19 +475,16 @@ void should_build_filter_aggregation_group_by() { assertEquals( format( "{%n" - + " \"composite_buckets\" : {%n" - + " \"composite\" : {%n" + + " \"gender\" : {%n" + + " \"terms\" : {%n" + + " \"field\" : \"gender\",%n" + " \"size\" : 1000,%n" - + " \"sources\" : [ {%n" - + " \"gender\" : {%n" - + " \"terms\" : {%n" - + " \"field\" : \"gender\",%n" - + " \"missing_bucket\" : true,%n" - + " \"missing_order\" : \"first\",%n" - + " \"order\" : \"asc\"%n" - + " }%n" - + " }%n" - + " } ]%n" + + " \"min_doc_count\" : 1,%n" + + " \"shard_min_doc_count\" : 0,%n" + + " \"show_term_doc_count_error\" : false,%n" + + " \"order\" : {%n" + + " \"_key\" : \"asc\"%n" + + " }%n" + " },%n" + " \"aggregations\" : {%n" + " \"avg(age) filter(where age > 34)\" : {%n" @@ -452,7 +516,8 @@ void should_build_filter_aggregation_group_by() { "avg(age) filter(where age > 34)", new AvgAggregator(Arrays.asList(ref("age", INTEGER)), INTEGER) .condition(DSL.greater(ref("age", INTEGER), literal(20))))), - Arrays.asList(named(ref("gender", OpenSearchDataType.of(STRING)))))); + Arrays.asList(named(ref("gender", OpenSearchDataType.of(STRING)))), + false)); } @Test @@ -472,20 +537,16 @@ void should_build_histogram() { assertEquals( format( "{%n" - + " \"composite_buckets\" : {%n" - + " \"composite\" : {%n" - + " \"size\" : 1000,%n" - + " \"sources\" : [ {%n" - + " \"SpanExpression(field=age, value=10, unit=NONE)\" : {%n" - + " \"histogram\" : {%n" - + " \"field\" : \"age\",%n" - + " \"missing_bucket\" : true,%n" - + " \"missing_order\" : \"first\",%n" - + " \"order\" : \"asc\",%n" - + " \"interval\" : 10.0%n" - + " }%n" - + " }%n" - + " } ]%n" + + " \"SpanExpression(field=age, value=10, unit=NONE)\" : {%n" + + " \"histogram\" : {%n" + + " \"field\" : \"age\",%n" + + " \"interval\" : 10.0,%n" + + " \"offset\" : 0.0,%n" + + " \"order\" : {%n" + + " \"_key\" : \"asc\"%n" + + " },%n" + + " \"keyed\" : false,%n" + + " \"min_doc_count\" : 0%n" + " },%n" + " \"aggregations\" : {%n" + " \"count(a)\" : {%n" @@ -499,7 +560,8 @@ void should_build_histogram() { buildQuery( Arrays.asList( named("count(a)", new CountAggregator(Arrays.asList(ref("a", INTEGER)), INTEGER))), - Arrays.asList(named(span(ref("age", INTEGER), literal(10), ""))))); + Arrays.asList(named(span(ref("age", INTEGER), literal(10), ""))), + false)); } @Test @@ -507,20 +569,16 @@ void should_build_histogram_two_metrics() { assertEquals( format( "{%n" - + " \"composite_buckets\" : {%n" - + " \"composite\" : {%n" - + " \"size\" : 1000,%n" - + " \"sources\" : [ {%n" - + " \"SpanExpression(field=age, value=10, unit=NONE)\" : {%n" - + " \"histogram\" : {%n" - + " \"field\" : \"age\",%n" - + " \"missing_bucket\" : true,%n" - + " \"missing_order\" : \"first\",%n" - + " \"order\" : \"asc\",%n" - + " \"interval\" : 10.0%n" - + " }%n" - + " }%n" - + " } ]%n" + + " \"SpanExpression(field=age, value=10, unit=NONE)\" : {%n" + + " \"histogram\" : {%n" + + " \"field\" : \"age\",%n" + + " \"interval\" : 10.0,%n" + + " \"offset\" : 0.0,%n" + + " \"order\" : {%n" + + " \"_key\" : \"asc\"%n" + + " },%n" + + " \"keyed\" : false,%n" + + " \"min_doc_count\" : 0%n" + " },%n" + " \"aggregations\" : {%n" + " \"count(a)\" : {%n" @@ -540,7 +598,8 @@ void should_build_histogram_two_metrics() { Arrays.asList( named("count(a)", new CountAggregator(Arrays.asList(ref("a", INTEGER)), INTEGER)), named("avg(b)", new AvgAggregator(Arrays.asList(ref("b", INTEGER)), INTEGER))), - Arrays.asList(named(span(ref("age", INTEGER), literal(10), ""))))); + Arrays.asList(named(span(ref("age", INTEGER), literal(10), ""))), + false)); } @Test @@ -548,20 +607,16 @@ void fixed_interval_time_span() { assertEquals( format( "{%n" - + " \"composite_buckets\" : {%n" - + " \"composite\" : {%n" - + " \"size\" : 1000,%n" - + " \"sources\" : [ {%n" - + " \"SpanExpression(field=timestamp, value=1, unit=H)\" : {%n" - + " \"date_histogram\" : {%n" - + " \"field\" : \"timestamp\",%n" - + " \"missing_bucket\" : true,%n" - + " \"missing_order\" : \"first\",%n" - + " \"order\" : \"asc\",%n" - + " \"fixed_interval\" : \"1h\"%n" - + " }%n" - + " }%n" - + " } ]%n" + + " \"SpanExpression(field=timestamp, value=1, unit=H)\" : {%n" + + " \"date_histogram\" : {%n" + + " \"field\" : \"timestamp\",%n" + + " \"fixed_interval\" : \"1h\",%n" + + " \"offset\" : 0,%n" + + " \"order\" : {%n" + + " \"_key\" : \"asc\"%n" + + " },%n" + + " \"keyed\" : false,%n" + + " \"min_doc_count\" : 0%n" + " },%n" + " \"aggregations\" : {%n" + " \"count(a)\" : {%n" @@ -575,7 +630,8 @@ void fixed_interval_time_span() { buildQuery( Arrays.asList( named("count(a)", new CountAggregator(Arrays.asList(ref("a", INTEGER)), INTEGER))), - Arrays.asList(named(span(ref("timestamp", TIMESTAMP), literal(1), "h"))))); + Arrays.asList(named(span(ref("timestamp", TIMESTAMP), literal(1), "h"))), + false)); } @Test @@ -583,20 +639,16 @@ void calendar_interval_time_span() { assertEquals( format( "{%n" - + " \"composite_buckets\" : {%n" - + " \"composite\" : {%n" - + " \"size\" : 1000,%n" - + " \"sources\" : [ {%n" - + " \"SpanExpression(field=date, value=1, unit=W)\" : {%n" - + " \"date_histogram\" : {%n" - + " \"field\" : \"date\",%n" - + " \"missing_bucket\" : true,%n" - + " \"missing_order\" : \"first\",%n" - + " \"order\" : \"asc\",%n" - + " \"calendar_interval\" : \"1w\"%n" - + " }%n" - + " }%n" - + " } ]%n" + + " \"SpanExpression(field=date, value=1, unit=W)\" : {%n" + + " \"date_histogram\" : {%n" + + " \"field\" : \"date\",%n" + + " \"calendar_interval\" : \"1w\",%n" + + " \"offset\" : 0,%n" + + " \"order\" : {%n" + + " \"_key\" : \"asc\"%n" + + " },%n" + + " \"keyed\" : false,%n" + + " \"min_doc_count\" : 0%n" + " },%n" + " \"aggregations\" : {%n" + " \"count(a)\" : {%n" @@ -610,7 +662,8 @@ void calendar_interval_time_span() { buildQuery( Arrays.asList( named("count(a)", new CountAggregator(Arrays.asList(ref("a", INTEGER)), INTEGER))), - Arrays.asList(named(span(ref("date", DATE), literal(1), "w"))))); + Arrays.asList(named(span(ref("date", DATE), literal(1), "w"))), + false)); } @Test @@ -618,20 +671,16 @@ void general_span() { assertEquals( format( "{%n" - + " \"composite_buckets\" : {%n" - + " \"composite\" : {%n" - + " \"size\" : 1000,%n" - + " \"sources\" : [ {%n" - + " \"SpanExpression(field=age, value=1, unit=NONE)\" : {%n" - + " \"histogram\" : {%n" - + " \"field\" : \"age\",%n" - + " \"missing_bucket\" : true,%n" - + " \"missing_order\" : \"first\",%n" - + " \"order\" : \"asc\",%n" - + " \"interval\" : 1.0%n" - + " }%n" - + " }%n" - + " } ]%n" + + " \"SpanExpression(field=age, value=1, unit=NONE)\" : {%n" + + " \"histogram\" : {%n" + + " \"field\" : \"age\",%n" + + " \"interval\" : 1.0,%n" + + " \"offset\" : 0.0,%n" + + " \"order\" : {%n" + + " \"_key\" : \"asc\"%n" + + " },%n" + + " \"keyed\" : false,%n" + + " \"min_doc_count\" : 0%n" + " },%n" + " \"aggregations\" : {%n" + " \"count(a)\" : {%n" @@ -645,7 +694,8 @@ void general_span() { buildQuery( Arrays.asList( named("count(a)", new CountAggregator(Arrays.asList(ref("a", INTEGER)), INTEGER))), - Arrays.asList(named(span(ref("age", INTEGER), literal(1), ""))))); + Arrays.asList(named(span(ref("age", INTEGER), literal(1), ""))), + false)); } @Test @@ -664,19 +714,28 @@ void invalid_unit() { @SneakyThrows private String buildQuery( List namedAggregatorList, List groupByList) { - return buildQuery(namedAggregatorList, groupByList, null); + return buildQuery(namedAggregatorList, groupByList, true); + } + + @SneakyThrows + private String buildQuery( + List namedAggregatorList, + List groupByList, + boolean bucketNullable) { + return buildQuery(namedAggregatorList, groupByList, null, bucketNullable); } @SneakyThrows private String buildQuery( List namedAggregatorList, List groupByList, - List> sortList) { + List> sortList, + boolean bucketNullable) { ObjectMapper objectMapper = new ObjectMapper(); return objectMapper .readTree( queryBuilder - .buildAggregationBuilder(namedAggregatorList, groupByList, sortList) + .buildAggregationBuilder(namedAggregatorList, groupByList, sortList, bucketNullable) .getLeft() .get(0) .toString()) diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java index 79347a1f2f9..02ca2a5d312 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java @@ -14,11 +14,8 @@ import static org.opensearch.sql.expression.DSL.named; import static org.opensearch.sql.expression.DSL.ref; -import java.util.Arrays; -import java.util.List; import java.util.Map; import lombok.SneakyThrows; -import org.apache.commons.lang3.tuple.Triple; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; @@ -31,9 +28,7 @@ import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.xcontent.XContentBuilder; -import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder; -import org.opensearch.search.aggregations.bucket.missing.MissingOrder; -import org.opensearch.search.sort.SortOrder; +import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.DSL; @@ -61,14 +56,20 @@ void set_up() { void should_build_bucket_with_field() { assertEquals( "{\n" - + " \"terms\" : {\n" - + " \"field\" : \"age\",\n" - + " \"missing_bucket\" : true,\n" - + " \"missing_order\" : \"first\",\n" - + " \"order\" : \"asc\"\n" + + " \"age\" : {\n" + + " \"terms\" : {\n" + + " \"field\" : \"age\",\n" + + " \"size\" : 1000,\n" + + " \"min_doc_count\" : 1,\n" + + " \"shard_min_doc_count\" : 0,\n" + + " \"show_term_doc_count_error\" : false,\n" + + " \"order\" : {\n" + + " \"_key\" : \"asc\"\n" + + " }\n" + + " }\n" + " }\n" + "}", - buildQuery(Arrays.asList(asc(named("age", ref("age", INTEGER)))))); + buildQuery(named("age", ref("age", INTEGER)))); } @Test @@ -77,43 +78,52 @@ void should_build_bucket_with_literal() { when(serializer.serialize(literal)).thenReturn("mock-serialize"); assertEquals( "{\n" - + " \"terms\" : {\n" - + " \"script\" : {\n" - + " \"source\" :" + + " \"1\" : {\n" + + " \"terms\" : {\n" + + " \"script\" : {\n" + + " \"source\" :" + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"mock-serialize\\\"}\",\n" - + " \"lang\" : \"opensearch_compounded_script\"\n" - + " },\n" - + " \"missing_bucket\" : true,\n" - + " \"missing_order\" : \"first\",\n" - + " \"order\" : \"asc\"\n" + + " \"lang\" : \"opensearch_compounded_script\"\n" + + " },\n" + + " \"size\" : 1000,\n" + + " \"min_doc_count\" : 1,\n" + + " \"shard_min_doc_count\" : 0,\n" + + " \"show_term_doc_count_error\" : false,\n" + + " \"order\" : {\n" + + " \"_key\" : \"asc\"\n" + + " }\n" + + " }\n" + " }\n" + "}", - buildQuery(Arrays.asList(asc(named(literal))))); + buildQuery(named(literal))); } @Test void should_build_bucket_with_keyword_field() { assertEquals( "{\n" - + " \"terms\" : {\n" - + " \"field\" : \"name.keyword\",\n" - + " \"missing_bucket\" : true,\n" - + " \"missing_order\" : \"first\",\n" - + " \"order\" : \"asc\"\n" + + " \"name\" : {\n" + + " \"terms\" : {\n" + + " \"field\" : \"name.keyword\",\n" + + " \"size\" : 1000,\n" + + " \"min_doc_count\" : 1,\n" + + " \"shard_min_doc_count\" : 0,\n" + + " \"show_term_doc_count_error\" : false,\n" + + " \"order\" : {\n" + + " \"_key\" : \"asc\"\n" + + " }\n" + + " }\n" + " }\n" + "}", buildQuery( - Arrays.asList( - asc( - named( - "name", - ref( - "name", - OpenSearchTextType.of( - Map.of( - "words", - OpenSearchDataType.of( - OpenSearchDataType.MappingType.Keyword))))))))); + named( + "name", + ref( + "name", + OpenSearchTextType.of( + Map.of( + "words", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Keyword))))))); } @Test @@ -123,18 +133,24 @@ void should_build_bucket_with_parse_expression() { when(serializer.serialize(parseExpression)).thenReturn("mock-serialize"); assertEquals( "{\n" - + " \"terms\" : {\n" - + " \"script\" : {\n" - + " \"source\" :" + + " \"name\" : {\n" + + " \"terms\" : {\n" + + " \"script\" : {\n" + + " \"source\" :" + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"mock-serialize\\\"}\",\n" - + " \"lang\" : \"opensearch_compounded_script\"\n" - + " },\n" - + " \"missing_bucket\" : true,\n" - + " \"missing_order\" : \"first\",\n" - + " \"order\" : \"asc\"\n" + + " \"lang\" : \"opensearch_compounded_script\"\n" + + " },\n" + + " \"size\" : 1000,\n" + + " \"min_doc_count\" : 1,\n" + + " \"shard_min_doc_count\" : 0,\n" + + " \"show_term_doc_count_error\" : false,\n" + + " \"order\" : {\n" + + " \"_key\" : \"asc\"\n" + + " }\n" + + " }\n" + " }\n" + "}", - buildQuery(Arrays.asList(asc(named("name", parseExpression))))); + buildQuery(named("name", parseExpression))); } @Test @@ -143,15 +159,21 @@ void terms_bucket_for_opensearchdate_type_uses_long() { assertEquals( "{\n" - + " \"terms\" : {\n" - + " \"field\" : \"date\",\n" - + " \"missing_bucket\" : true,\n" - + " \"value_type\" : \"long\",\n" - + " \"missing_order\" : \"first\",\n" - + " \"order\" : \"asc\"\n" + + " \"date\" : {\n" + + " \"terms\" : {\n" + + " \"field\" : \"date\",\n" + + " \"value_type\" : \"long\",\n" + + " \"size\" : 1000,\n" + + " \"min_doc_count\" : 1,\n" + + " \"shard_min_doc_count\" : 0,\n" + + " \"show_term_doc_count_error\" : false,\n" + + " \"order\" : {\n" + + " \"_key\" : \"asc\"\n" + + " }\n" + + " }\n" + " }\n" + "}", - buildQuery(Arrays.asList(asc(named("date", ref("date", dataType)))))); + buildQuery(named("date", ref("date", dataType)))); } @Test @@ -160,14 +182,20 @@ void terms_bucket_for_opensearchdate_type_uses_long_false() { assertEquals( "{\n" - + " \"terms\" : {\n" - + " \"field\" : \"date\",\n" - + " \"missing_bucket\" : true,\n" - + " \"missing_order\" : \"first\",\n" - + " \"order\" : \"asc\"\n" + + " \"date\" : {\n" + + " \"terms\" : {\n" + + " \"field\" : \"date\",\n" + + " \"size\" : 1000,\n" + + " \"min_doc_count\" : 1,\n" + + " \"shard_min_doc_count\" : 0,\n" + + " \"show_term_doc_count_error\" : false,\n" + + " \"order\" : {\n" + + " \"_key\" : \"asc\"\n" + + " }\n" + + " }\n" + " }\n" + "}", - buildQuery(Arrays.asList(asc(named("date", ref("date", dataType)))))); + buildQuery(named("date", ref("date", dataType)))); } @ParameterizedTest(name = "{0}") @@ -177,30 +205,30 @@ void terms_bucket_for_opensearchdate_type_uses_long_false() { void terms_bucket_for_datetime_types_uses_long(ExprType dataType) { assertEquals( "{\n" - + " \"terms\" : {\n" - + " \"field\" : \"date\",\n" - + " \"missing_bucket\" : true,\n" - + " \"value_type\" : \"long\",\n" - + " \"missing_order\" : \"first\",\n" - + " \"order\" : \"asc\"\n" + + " \"date\" : {\n" + + " \"terms\" : {\n" + + " \"field\" : \"date\",\n" + + " \"value_type\" : \"long\",\n" + + " \"size\" : 1000,\n" + + " \"min_doc_count\" : 1,\n" + + " \"shard_min_doc_count\" : 0,\n" + + " \"show_term_doc_count_error\" : false,\n" + + " \"order\" : {\n" + + " \"_key\" : \"asc\"\n" + + " }\n" + + " }\n" + " }\n" + "}", - buildQuery(Arrays.asList(asc(named("date", ref("date", dataType)))))); + buildQuery(named("date", ref("date", dataType)))); } @SneakyThrows - private String buildQuery( - List> groupByExpressions) { + private String buildQuery(NamedExpression groupByExpression) { XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); builder.startObject(); - CompositeValuesSourceBuilder sourceBuilder = - aggregationBuilder.build(groupByExpressions).get(0); + ValuesSourceAggregationBuilder sourceBuilder = aggregationBuilder.build(groupByExpression); sourceBuilder.toXContent(builder, EMPTY_PARAMS); builder.endObject(); return BytesReference.bytes(builder).utf8ToString(); } - - private Triple asc(NamedExpression expression) { - return Triple.of(expression, SortOrder.ASC, MissingOrder.FIRST); - } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilderTest.java new file mode 100644 index 00000000000..8f364d90f77 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilderTest.java @@ -0,0 +1,206 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.aggregation.dsl; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.when; +import static org.opensearch.core.xcontent.ToXContent.EMPTY_PARAMS; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.expression.DSL.literal; +import static org.opensearch.sql.expression.DSL.named; +import static org.opensearch.sql.expression.DSL.ref; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import lombok.SneakyThrows; +import org.apache.commons.lang3.tuple.Triple; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.missing.MissingOrder; +import org.opensearch.search.sort.SortOrder; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.expression.parse.ParseExpression; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; +import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; +import org.opensearch.sql.opensearch.storage.serde.ExpressionSerializer; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +@ExtendWith(MockitoExtension.class) +class CompositeAggregationBuilderTest { + + @Mock private ExpressionSerializer serializer; + + private CompositeAggregationBuilder compositeBuilder; + + @BeforeEach + void set_up() { + compositeBuilder = new CompositeAggregationBuilder(serializer); + } + + @Test + void should_build_bucket_with_field() { + assertEquals( + "{\n" + + " \"terms\" : {\n" + + " \"field\" : \"age\",\n" + + " \"missing_bucket\" : true,\n" + + " \"missing_order\" : \"first\",\n" + + " \"order\" : \"asc\"\n" + + " }\n" + + "}", + buildQuery(Arrays.asList(asc(named("age", ref("age", INTEGER)))))); + } + + @Test + void should_build_bucket_with_literal() { + var literal = literal(1); + when(serializer.serialize(literal)).thenReturn("mock-serialize"); + assertEquals( + "{\n" + + " \"terms\" : {\n" + + " \"script\" : {\n" + + " \"source\" :" + + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"mock-serialize\\\"}\",\n" + + " \"lang\" : \"opensearch_compounded_script\"\n" + + " },\n" + + " \"missing_bucket\" : true,\n" + + " \"missing_order\" : \"first\",\n" + + " \"order\" : \"asc\"\n" + + " }\n" + + "}", + buildQuery(Arrays.asList(asc(named(literal))))); + } + + @Test + void should_build_bucket_with_keyword_field() { + assertEquals( + "{\n" + + " \"terms\" : {\n" + + " \"field\" : \"name.keyword\",\n" + + " \"missing_bucket\" : true,\n" + + " \"missing_order\" : \"first\",\n" + + " \"order\" : \"asc\"\n" + + " }\n" + + "}", + buildQuery( + Arrays.asList( + asc( + named( + "name", + ref( + "name", + OpenSearchTextType.of( + Map.of( + "words", + OpenSearchDataType.of( + OpenSearchDataType.MappingType.Keyword))))))))); + } + + @Test + void should_build_bucket_with_parse_expression() { + ParseExpression parseExpression = + DSL.regex(ref("name.keyword", STRING), DSL.literal("(?\\w+)"), DSL.literal("name")); + when(serializer.serialize(parseExpression)).thenReturn("mock-serialize"); + assertEquals( + "{\n" + + " \"terms\" : {\n" + + " \"script\" : {\n" + + " \"source\" :" + + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"mock-serialize\\\"}\",\n" + + " \"lang\" : \"opensearch_compounded_script\"\n" + + " },\n" + + " \"missing_bucket\" : true,\n" + + " \"missing_order\" : \"first\",\n" + + " \"order\" : \"asc\"\n" + + " }\n" + + "}", + buildQuery(Arrays.asList(asc(named("name", parseExpression))))); + } + + @Test + void terms_bucket_for_opensearchdate_type_uses_long() { + OpenSearchDateType dataType = OpenSearchDateType.of(ExprCoreType.TIMESTAMP); + + assertEquals( + "{\n" + + " \"terms\" : {\n" + + " \"field\" : \"date\",\n" + + " \"missing_bucket\" : true,\n" + + " \"value_type\" : \"long\",\n" + + " \"missing_order\" : \"first\",\n" + + " \"order\" : \"asc\"\n" + + " }\n" + + "}", + buildQuery(Arrays.asList(asc(named("date", ref("date", dataType)))))); + } + + @Test + void terms_bucket_for_opensearchdate_type_uses_long_false() { + OpenSearchDateType dataType = OpenSearchDateType.of(STRING); + + assertEquals( + "{\n" + + " \"terms\" : {\n" + + " \"field\" : \"date\",\n" + + " \"missing_bucket\" : true,\n" + + " \"missing_order\" : \"first\",\n" + + " \"order\" : \"asc\"\n" + + " }\n" + + "}", + buildQuery(Arrays.asList(asc(named("date", ref("date", dataType)))))); + } + + @ParameterizedTest(name = "{0}") + @EnumSource( + value = ExprCoreType.class, + names = {"TIMESTAMP", "TIME", "DATE"}) + void terms_bucket_for_datetime_types_uses_long(ExprType dataType) { + assertEquals( + "{\n" + + " \"terms\" : {\n" + + " \"field\" : \"date\",\n" + + " \"missing_bucket\" : true,\n" + + " \"value_type\" : \"long\",\n" + + " \"missing_order\" : \"first\",\n" + + " \"order\" : \"asc\"\n" + + " }\n" + + "}", + buildQuery(Arrays.asList(asc(named("date", ref("date", dataType)))))); + } + + @SneakyThrows + private String buildQuery( + List> groupByExpressions) { + XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); + builder.startObject(); + CompositeValuesSourceBuilder sourceBuilder = + compositeBuilder.build(groupByExpressions).get(0); + sourceBuilder.toXContent(builder, EMPTY_PARAMS); + builder.endObject(); + return BytesReference.bytes(builder).utf8ToString(); + } + + private Triple asc(NamedExpression expression) { + return Triple.of(expression, SortOrder.ASC, MissingOrder.FIRST); + } +} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 85b214e3203..6586c9c4b5c 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -108,6 +108,7 @@ DEDUP_SPLITVALUES: 'DEDUP_SPLITVALUES'; PARTITIONS: 'PARTITIONS'; ALLNUM: 'ALLNUM'; DELIM: 'DELIM'; +BUCKET_NULLABLE: 'BUCKET_NULLABLE'; CENTROIDS: 'CENTROIDS'; ITERATIONS: 'ITERATIONS'; DISTANCE_TYPE: 'DISTANCE_TYPE'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index aac83f7c7ab..eb4b17c152f 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -159,7 +159,31 @@ renameCommand ; statsCommand - : STATS (PARTITIONS EQUAL partitions = integerLiteral)? (ALLNUM EQUAL allnum = booleanLiteral)? (DELIM EQUAL delim = stringLiteral)? statsAggTerm (COMMA statsAggTerm)* (statsByClause)? (DEDUP_SPLITVALUES EQUAL dedupsplit = booleanLiteral)? + : STATS statsArgs statsAggTerm (COMMA statsAggTerm)* (statsByClause)? (dedupSplitArg)? + ; + +statsArgs + : (partitionsArg | allnumArg | delimArg | bucketNullableArg)* + ; + +partitionsArg + : PARTITIONS EQUAL partitions = integerLiteral + ; + +allnumArg + : ALLNUM EQUAL allnum = booleanLiteral + ; + +delimArg + : DELIM EQUAL delim = stringLiteral + ; + +bucketNullableArg + : BUCKET_NULLABLE EQUAL bucket_nullable = booleanLiteral + ; + +dedupSplitArg + : DEDUP_SPLITVALUES EQUAL dedupsplit = booleanLiteral ; eventstatsCommand @@ -1386,6 +1410,7 @@ keywordsCanBeId | PARTITIONS | ALLNUM | DELIM + | BUCKET_NULLABLE | CENTROIDS | ITERATIONS | DISTANCE_TYPE diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 8877b21c6c0..c7d3d91790b 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -413,7 +413,7 @@ public UnresolvedPlan visitStatsCommand(StatsCommandContext ctx) { Collections.emptyList(), groupList, span, - ArgumentFactory.getArgumentList(ctx)); + ArgumentFactory.getArgumentList(ctx, settings)); return aggregation; } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index 2f64ba907fa..e1d892fdfce 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -5,6 +5,7 @@ package org.opensearch.sql.ppl.utils; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -13,16 +14,17 @@ import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.tree.Join; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.BooleanLiteralContext; +import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DecimalLiteralContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DedupCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FieldsCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.IntegerLiteralContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.RareCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SortFieldContext; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StatsCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.TopCommandContext; /** Util class to get all arguments as a list from the PPL command. */ @@ -47,20 +49,41 @@ public static List getArgumentList(FieldsCommandContext ctx) { * @param ctx StatsCommandContext instance * @return the list of arguments fetched from the stats command */ - public static List getArgumentList(StatsCommandContext ctx) { - return Arrays.asList( - ctx.partitions != null - ? new Argument("partitions", getArgumentValue(ctx.partitions)) - : new Argument("partitions", new Literal(1, DataType.INTEGER)), - ctx.allnum != null - ? new Argument("allnum", getArgumentValue(ctx.allnum)) - : new Argument("allnum", new Literal(false, DataType.BOOLEAN)), - ctx.delim != null - ? new Argument("delim", getArgumentValue(ctx.delim)) - : new Argument("delim", new Literal(" ", DataType.STRING)), - ctx.dedupsplit != null - ? new Argument("dedupsplit", getArgumentValue(ctx.dedupsplit)) - : new Argument("dedupsplit", new Literal(false, DataType.BOOLEAN))); + public static List getArgumentList( + OpenSearchPPLParser.StatsCommandContext ctx, Settings settings) { + OpenSearchPPLParser.StatsArgsContext ctx1 = ctx.statsArgs(); + OpenSearchPPLParser.DedupSplitArgContext ctx2 = ctx.dedupSplitArg(); + List list = + new ArrayList<>( + Arrays.asList( + ctx1.partitionsArg() != null && !ctx1.partitionsArg().isEmpty() + ? new Argument("partitions", getArgumentValue(ctx1.partitionsArg(0).partitions)) + : new Argument("partitions", Literal.ONE), + ctx1.allnumArg() != null && !ctx1.allnumArg().isEmpty() + ? new Argument("allnum", getArgumentValue(ctx1.allnumArg(0).allnum)) + : new Argument("allnum", Literal.FALSE), + ctx1.delimArg() != null && !ctx1.delimArg().isEmpty() + ? new Argument("delim", getArgumentValue(ctx1.delimArg(0).delim)) + : new Argument("delim", new Literal(" ", DataType.STRING)), + ctx1.bucketNullableArg() != null && !ctx1.bucketNullableArg().isEmpty() + ? new Argument( + Argument.BUCKET_NULLABLE, + getArgumentValue(ctx1.bucketNullableArg(0).bucket_nullable)) + : new Argument( + Argument.BUCKET_NULLABLE, + legacyPreferred(settings) ? Literal.TRUE : Literal.FALSE))); + if (ctx2 != null) { + list.add(new Argument("dedupsplit", getArgumentValue(ctx2.dedupsplit))); + } else { + list.add(new Argument("dedupsplit", Literal.FALSE)); + } + return list; + } + + private static boolean legacyPreferred(Settings settings) { + return settings == null + || settings.getSettingValue(Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED) == null + || Boolean.TRUE.equals(settings.getSettingValue(Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED)); } /** @@ -149,11 +172,15 @@ public static List getArgumentList(RareCommandContext ctx) { * @return Literal */ private static Literal getArgumentValue(ParserRuleContext ctx) { - return ctx instanceof IntegerLiteralContext - ? new Literal(Integer.parseInt(ctx.getText()), DataType.INTEGER) - : ctx instanceof BooleanLiteralContext - ? new Literal(Boolean.valueOf(ctx.getText()), DataType.BOOLEAN) - : new Literal(StringUtils.unquoteText(ctx.getText()), DataType.STRING); + if (ctx instanceof IntegerLiteralContext) { + return new Literal(Integer.parseInt(ctx.getText()), DataType.INTEGER); + } else if (ctx instanceof DecimalLiteralContext) { + return new Literal(Double.parseDouble(ctx.getText()), DataType.DOUBLE); + } else if (ctx instanceof BooleanLiteralContext) { + return new Literal(Boolean.valueOf(ctx.getText()), DataType.BOOLEAN); + } else { + return new Literal(StringUtils.unquoteText(ctx.getText()), DataType.STRING); + } } /** diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java index 044d56ac1fa..57b3ec9952b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java @@ -62,6 +62,7 @@ public CalcitePPLAbstractTest(CalciteAssert.SchemaSpec... schemaSpecs) { public void init() { doReturn(true).when(settings).getSettingValue(Settings.Key.CALCITE_ENGINE_ENABLED); doReturn(true).when(settings).getSettingValue(Settings.Key.CALCITE_SUPPORT_ALL_JOIN_TYPES); + doReturn(true).when(settings).getSettingValue(Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED); } protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java index 1a67fb7101d..a82470c1165 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java @@ -91,6 +91,23 @@ public void testSimpleAvg() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testNonBucketNullableShouldNotImpactAggregateWithoutGroupBy() { + String ppl = "source=EMP | stats bucket_nullable=false avg(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalAggregate(group=[{}], avg(SAL)=[AVG($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "avg(SAL)=2073.214285\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = "" + "SELECT AVG(`SAL`) `avg(SAL)`\n" + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testMultipleAggregatesWithAliases() { String ppl = @@ -206,6 +223,34 @@ public void testAvgByField() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testAvgByFieldNonBucketNullable() { + String ppl = "source=EMP | stats bucket_nullable=false avg(SAL) by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalProject(avg(SAL)=[$1], DEPTNO=[$0])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + + " LogicalAggregate(group=[{0}], avg(SAL)=[AVG($1)])\n" + + " LogicalProject(DEPTNO=[$7], SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "" + + "avg(SAL)=2175.; DEPTNO=20\n" + + "avg(SAL)=2916.666666; DEPTNO=10\n" + + "avg(SAL)=1566.666666; DEPTNO=30\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "" + + "SELECT AVG(`SAL`) `avg(SAL)`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "GROUP BY `DEPTNO`\n" + + "HAVING `DEPTNO` IS NOT NULL"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testAvgByMultipleFieldsWithDiffOrder() { String ppl1 = "source=EMP | stats avg(SAL) by JOB, DEPTNO"; diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index e927293cf2b..60fc85c838c 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -55,6 +55,7 @@ import java.util.List; import java.util.Optional; import org.apache.commons.lang3.tuple.Pair; +import org.junit.Before; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; @@ -97,6 +98,11 @@ public void testDynamicSourceClauseThrowsUnsupportedException() { "Dynamic source clause with metadata filters is not supported.", exception.getMessage()); } + @Before + public void setup() { + when(settings.getSettingValue(Key.PPL_SYNTAX_LEGACY_PREFERRED)).thenReturn(true); + } + @Test public void testSearchCommand() { assertEqual( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java index 761dbe2997b..adb9ec719e6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java @@ -21,6 +21,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import org.junit.Test; +import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ppl.parser.AstBuilderTest; public class ArgumentFactoryTest extends AstBuilderTest { @@ -51,6 +52,7 @@ public void testStatsCommandArgument() { argument("partitions", intLiteral(1)), argument("allnum", booleanLiteral(false)), argument("delim", stringLiteral(",")), + argument(Argument.BUCKET_NULLABLE, booleanLiteral(true)), argument("dedupsplit", booleanLiteral(true))))); }