diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 645ee5a2f1e..744a0edb8c9 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -198,6 +198,7 @@ public enum BuiltinFunctionName { TAKE(FunctionName.of("take")), // t-digest percentile which is used in OpenSearch core by default. PERCENTILE_APPROX(FunctionName.of("percentile_approx")), + MEDIAN(FunctionName.of("median")), EARLIEST(FunctionName.of("earliest")), LATEST(FunctionName.of("latest")), DISTINCT_COUNT_APPROX(FunctionName.of("distinct_count_approx")), @@ -347,6 +348,7 @@ public enum BuiltinFunctionName { .put("take", BuiltinFunctionName.TAKE) .put("percentile", BuiltinFunctionName.PERCENTILE_APPROX) .put("percentile_approx", BuiltinFunctionName.PERCENTILE_APPROX) + .put("median", BuiltinFunctionName.MEDIAN) .put("earliest", BuiltinFunctionName.EARLIEST) .put("latest", BuiltinFunctionName.LATEST) .put("distinct_count_approx", BuiltinFunctionName.DISTINCT_COUNT_APPROX) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index beb70e41fd7..48f46307a1b 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -124,6 +124,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_PHRASE_PREFIX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MD5; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MEDIAN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MICROSECOND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE; @@ -257,6 +258,7 @@ import org.apache.logging.log4j.Logger; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils; import org.opensearch.sql.exception.ExpressionEvaluationException; @@ -1040,6 +1042,7 @@ void register( } private static class AggBuilder { + private static final double MEDIAN_PERCENTILE = 50.0; private final Map> map = new HashMap<>(); @@ -1114,6 +1117,9 @@ void populate() { register( PERCENTILE_APPROX, (distinct, field, argList, ctx) -> { + if (field.getType() == null) { + throw new IllegalArgumentException("Field type cannot be null"); + } List newArgList = argList.stream().map(PlanUtils::derefMapCall).collect(Collectors.toList()); newArgList.add(ctx.rexBuilder.makeFlag(field.getType().getSqlTypeName())); @@ -1125,6 +1131,31 @@ void populate() { PERCENTILE_APPROX.name(), false)); + register( + MEDIAN, + (distinct, field, argList, ctx) -> { + if (distinct) { + throw new IllegalArgumentException("MEDIAN does not support DISTINCT"); + } + if (!argList.isEmpty()) { + throw new IllegalArgumentException("MEDIAN takes no additional arguments"); + } + if (field.getType() == null) { + throw new IllegalArgumentException("Field type cannot be null"); + } + List medianArgList = + List.of( + ctx.rexBuilder.makeExactLiteral(BigDecimal.valueOf(MEDIAN_PERCENTILE)), + ctx.rexBuilder.makeFlag(field.getType().getSqlTypeName())); + return UserDefinedFunctionUtils.makeAggregateCall( + PPLBuiltinOperators.PERCENTILE_APPROX, + List.of(field), + medianArgList, + ctx.relBuilder); + }, + wrapSqlOperandTypeChecker( + PPLOperandTypes.NUMERIC.getInnerTypeChecker(), MEDIAN.name(), false)); + register( EARLIEST, (distinct, field, argList, ctx) -> { diff --git a/docs/user/dql/aggregations.rst b/docs/user/dql/aggregations.rst index d2544ce2b9b..adf933c09c1 100644 --- a/docs/user/dql/aggregations.rst +++ b/docs/user/dql/aggregations.rst @@ -389,34 +389,6 @@ Example:: | M | 36 | +--------+-----+ -Percentile Shortcut Functions ->>>>>>>>>>>>>>>>>>>>>>>>>>>>> - -For convenience, OpenSearch PPL provides shortcut functions for common percentiles: - -- ``PERC(expr)`` - Equivalent to ``PERCENTILE(expr, )`` -- ``P(expr)`` - Equivalent to ``PERCENTILE(expr, )`` - -Both integer and decimal percentiles from 0 to 100 are supported (e.g., ``PERC95``, ``P99.5``). - -Example:: - - ppl> source=accounts | stats perc99.5(age); - fetched rows / total rows = 1/1 - +---------------+ - | perc99.5(age) | - |---------------| - | 36 | - +---------------+ - - ppl> source=accounts | stats p50(age); - fetched rows / total rows = 1/1 - +---------+ - | p50(age) | - |---------| - | 32 | - +---------+ - HAVING Clause ============= diff --git a/docs/user/ppl/cmd/stats.rst b/docs/user/ppl/cmd/stats.rst index 001164860f4..dfd466a3138 100644 --- a/docs/user/ppl/cmd/stats.rst +++ b/docs/user/ppl/cmd/stats.rst @@ -51,6 +51,7 @@ stats ... [by-clause] * Description: The unit of the interval expression is the natural unit by default. If the field is a date and time type field, and the interval is in date/time units, you will need to specify the unit in the interval expression. For example, to split the field ``age`` into buckets by 10 years, it looks like ``span(age, 10)``. And here is another example of time span, the span to split a ``timestamp`` field into hourly intervals, it looks like ``span(timestamp, 1h)``. * Available time unit: + +----------------------------+ | Span Interval Units | +============================+ @@ -273,7 +274,7 @@ Example:: +--------------------+ DISTINCT_COUNT_APPROX ----------- +--------------------- Description >>>>>>>>>>> @@ -334,6 +335,58 @@ Example:: | 36 | M | +---------------------+--------+ +Percentile Shortcut Functions +>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + +Version: 3.3.0 + +For convenience, OpenSearch PPL provides shortcut functions for common percentiles: + +- ``PERC(expr)`` - Equivalent to ``PERCENTILE(expr, )`` +- ``P(expr)`` - Equivalent to ``PERCENTILE(expr, )`` + +Both integer and decimal percentiles from 0 to 100 are supported (e.g., ``PERC95``, ``P99.5``). + +Example:: + + ppl> source=accounts | stats perc99.5(age); + fetched rows / total rows = 1/1 + +---------------+ + | perc99.5(age) | + |---------------| + | 36 | + +---------------+ + + ppl> source=accounts | stats p50(age); + fetched rows / total rows = 1/1 + +---------+ + | p50(age) | + |---------| + | 32 | + +---------+ + +MEDIAN +------ + +Description +>>>>>>>>>>> + +Version: 3.3.0 + +Usage: MEDIAN(expr). Returns the median (50th percentile) value of `expr`. This is equivalent to ``PERCENTILE(expr, 50)``. + +Note: This function requires Calcite to be enabled (see `Configuration`_ section above). + +Example:: + + os> source=accounts | stats median(age); + fetched rows / total rows = 1/1 + +-------------+ + | median(age) | + |-------------| + | 32 | + +-------------+ + EARLIEST -------- @@ -414,7 +467,6 @@ LIST Description >>>>>>>>>>> -======= Version: 3.3.0 (Calcite engine only) Usage: LIST(expr). Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved. @@ -442,7 +494,6 @@ Example with result field rename:: |-------------------------------------| | ["Amber","Hattie","Nanette","Dale"] | +-------------------------------------+ - Example 1: Calculate the count of events ======================================== diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java index 0a39afe578d..4d12e1f9b5b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java @@ -969,4 +969,12 @@ public void testStatsCountAliasByGroupWithSort() throws IOException { rows(1, "VA"), rows(1, "WA")); } + + @Test + public void testMedian() throws IOException { + JSONObject actual = + executeQuery(String.format("source=%s | stats median(balance)", TEST_INDEX_BANK)); + verifySchema(actual, schema("median(balance)", "bigint")); + verifyDataRows(actual, rows(32838)); + } } diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 084a09751fb..a213d4609e8 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -515,6 +515,9 @@ statsFunctionName | STDDEV_POP | PERCENTILE | PERCENTILE_APPROX + | MEDIAN + | EARLIEST + | LATEST | LIST ; diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java index a7e81e1446e..1a67fb7101d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java @@ -696,4 +696,19 @@ public void testPercentileShortcutInvalidDecimalValueAbove100() { String ppl = "source=EMP | stats perc100.1(SAL)"; getRelNode(ppl); } + + @Test + public void testMedian() { + String ppl = "source=EMP | stats median(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalAggregate(group=[{}], median(SAL)=[percentile_approx($0, $1, $2)])\n" + + " LogicalProject(SAL=[$5], $f1=[50.0:DECIMAL(3, 1)], $f2=[FLAG(DECIMAL)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `percentile_approx`(`SAL`, 50.0, DECIMAL) `median(SAL)`\n" + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 12eb611cca4..ad55ad10cc2 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -1298,4 +1298,16 @@ public void testPercentileShortcutFunctionInvalidDecimalValueAbove100() { SyntaxCheckException.class, () -> assertEqual("source=t | stats perc100.1(a)", (Node) null)); } + + @Test + public void testMedianAggFuncExpr() { + assertEqual( + "source=t | stats median(a)", + agg( + relation("t"), + exprList(alias("median(a)", aggregate("median", field("a")))), + emptyList(), + emptyList(), + defaultStatsArgs())); + } }