Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ public enum BuiltinFunctionName {
TAKE(FunctionName.of("take")),
// t-digest percentile which is used in OpenSearch core by default.
PERCENTILE_APPROX(FunctionName.of("percentile_approx")),
MEDIAN(FunctionName.of("median")),
EARLIEST(FunctionName.of("earliest")),
LATEST(FunctionName.of("latest")),
DISTINCT_COUNT_APPROX(FunctionName.of("distinct_count_approx")),
Expand Down Expand Up @@ -347,6 +348,7 @@ public enum BuiltinFunctionName {
.put("take", BuiltinFunctionName.TAKE)
.put("percentile", BuiltinFunctionName.PERCENTILE_APPROX)
.put("percentile_approx", BuiltinFunctionName.PERCENTILE_APPROX)
.put("median", BuiltinFunctionName.MEDIAN)
.put("earliest", BuiltinFunctionName.EARLIEST)
.put("latest", BuiltinFunctionName.LATEST)
.put("distinct_count_approx", BuiltinFunctionName.DISTINCT_COUNT_APPROX)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_PHRASE_PREFIX;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAX;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MD5;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MEDIAN;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MICROSECOND;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MIN;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE;
Expand Down Expand Up @@ -257,6 +258,7 @@
import org.apache.logging.log4j.Logger;
import org.opensearch.sql.calcite.CalcitePlanContext;
import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory;
import org.opensearch.sql.calcite.utils.PPLOperandTypes;
import org.opensearch.sql.calcite.utils.PlanUtils;
import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils;
import org.opensearch.sql.exception.ExpressionEvaluationException;
Expand Down Expand Up @@ -1040,6 +1042,7 @@ void register(
}

private static class AggBuilder {
private static final double MEDIAN_PERCENTILE = 50.0;
private final Map<BuiltinFunctionName, Pair<CalciteFuncSignature, AggHandler>> map =
new HashMap<>();

Expand Down Expand Up @@ -1114,6 +1117,9 @@ void populate() {
register(
PERCENTILE_APPROX,
(distinct, field, argList, ctx) -> {
if (field.getType() == null) {
throw new IllegalArgumentException("Field type cannot be null");
}
List<RexNode> newArgList =
argList.stream().map(PlanUtils::derefMapCall).collect(Collectors.toList());
newArgList.add(ctx.rexBuilder.makeFlag(field.getType().getSqlTypeName()));
Expand All @@ -1125,6 +1131,31 @@ void populate() {
PERCENTILE_APPROX.name(),
false));

register(
MEDIAN,
(distinct, field, argList, ctx) -> {
if (distinct) {
throw new IllegalArgumentException("MEDIAN does not support DISTINCT");
}
if (!argList.isEmpty()) {
throw new IllegalArgumentException("MEDIAN takes no additional arguments");
}
if (field.getType() == null) {
throw new IllegalArgumentException("Field type cannot be null");
}
List<RexNode> medianArgList =
List.of(
ctx.rexBuilder.makeExactLiteral(BigDecimal.valueOf(MEDIAN_PERCENTILE)),
ctx.rexBuilder.makeFlag(field.getType().getSqlTypeName()));
return UserDefinedFunctionUtils.makeAggregateCall(
PPLBuiltinOperators.PERCENTILE_APPROX,
List.of(field),
medianArgList,
ctx.relBuilder);
},
wrapSqlOperandTypeChecker(
PPLOperandTypes.NUMERIC.getInnerTypeChecker(), MEDIAN.name(), false));

register(
EARLIEST,
(distinct, field, argList, ctx) -> {
Expand Down
28 changes: 0 additions & 28 deletions docs/user/dql/aggregations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -389,34 +389,6 @@ Example::
| M | 36 |
+--------+-----+

Percentile Shortcut Functions
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

For convenience, OpenSearch PPL provides shortcut functions for common percentiles:

- ``PERC<percent>(expr)`` - Equivalent to ``PERCENTILE(expr, <percent>)``
- ``P<percent>(expr)`` - Equivalent to ``PERCENTILE(expr, <percent>)``

Both integer and decimal percentiles from 0 to 100 are supported (e.g., ``PERC95``, ``P99.5``).

Example::

ppl> source=accounts | stats perc99.5(age);
fetched rows / total rows = 1/1
+---------------+
| perc99.5(age) |
|---------------|
| 36 |
+---------------+

ppl> source=accounts | stats p50(age);
fetched rows / total rows = 1/1
+---------+
| p50(age) |
|---------|
| 32 |
+---------+

HAVING Clause
=============

Expand Down
57 changes: 54 additions & 3 deletions docs/user/ppl/cmd/stats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ stats <aggregation>... [by-clause]
* Description: The unit of the interval expression is the natural unit by default. If the field is a date and time type field, and the interval is in date/time units, you will need to specify the unit in the interval expression. For example, to split the field ``age`` into buckets by 10 years, it looks like ``span(age, 10)``. And here is another example of time span, the span to split a ``timestamp`` field into hourly intervals, it looks like ``span(timestamp, 1h)``.

* Available time unit:

+----------------------------+
| Span Interval Units |
+============================+
Expand Down Expand Up @@ -273,7 +274,7 @@ Example::
+--------------------+

DISTINCT_COUNT_APPROX
----------
---------------------

Description
>>>>>>>>>>>
Expand Down Expand Up @@ -334,6 +335,58 @@ Example::
| 36 | M |
+---------------------+--------+

Percentile Shortcut Functions
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

Version: 3.3.0

For convenience, OpenSearch PPL provides shortcut functions for common percentiles:

- ``PERC<percent>(expr)`` - Equivalent to ``PERCENTILE(expr, <percent>)``
- ``P<percent>(expr)`` - Equivalent to ``PERCENTILE(expr, <percent>)``

Both integer and decimal percentiles from 0 to 100 are supported (e.g., ``PERC95``, ``P99.5``).

Example::

ppl> source=accounts | stats perc99.5(age);
fetched rows / total rows = 1/1
+---------------+
| perc99.5(age) |
|---------------|
| 36 |
+---------------+

ppl> source=accounts | stats p50(age);
fetched rows / total rows = 1/1
+---------+
| p50(age) |
|---------|
| 32 |
+---------+

MEDIAN
------

Description
>>>>>>>>>>>

Version: 3.3.0

Usage: MEDIAN(expr). Returns the median (50th percentile) value of `expr`. This is equivalent to ``PERCENTILE(expr, 50)``.

Note: This function requires Calcite to be enabled (see `Configuration`_ section above).

Example::

os> source=accounts | stats median(age);
fetched rows / total rows = 1/1
+-------------+
| median(age) |
|-------------|
| 32 |
+-------------+

EARLIEST
--------

Expand Down Expand Up @@ -414,7 +467,6 @@ LIST
Description
>>>>>>>>>>>

=======
Version: 3.3.0 (Calcite engine only)

Usage: LIST(expr). Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved.
Expand Down Expand Up @@ -442,7 +494,6 @@ Example with result field rename::
|-------------------------------------|
| ["Amber","Hattie","Nanette","Dale"] |
+-------------------------------------+

Example 1: Calculate the count of events
========================================

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -969,4 +969,12 @@ public void testStatsCountAliasByGroupWithSort() throws IOException {
rows(1, "VA"),
rows(1, "WA"));
}

@Test
public void testMedian() throws IOException {
JSONObject actual =
executeQuery(String.format("source=%s | stats median(balance)", TEST_INDEX_BANK));
verifySchema(actual, schema("median(balance)", "bigint"));
verifyDataRows(actual, rows(32838));
}
}
3 changes: 3 additions & 0 deletions ppl/src/main/antlr/OpenSearchPPLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,9 @@ statsFunctionName
| STDDEV_POP
| PERCENTILE
| PERCENTILE_APPROX
| MEDIAN
| EARLIEST
| LATEST
| LIST
;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -696,4 +696,19 @@ public void testPercentileShortcutInvalidDecimalValueAbove100() {
String ppl = "source=EMP | stats perc100.1(SAL)";
getRelNode(ppl);
}

@Test
public void testMedian() {
String ppl = "source=EMP | stats median(SAL)";
RelNode root = getRelNode(ppl);
String expectedLogical =
"LogicalAggregate(group=[{}], median(SAL)=[percentile_approx($0, $1, $2)])\n"
+ " LogicalProject(SAL=[$5], $f1=[50.0:DECIMAL(3, 1)], $f2=[FLAG(DECIMAL)])\n"
+ " LogicalTableScan(table=[[scott, EMP]])\n";
verifyLogical(root, expectedLogical);

String expectedSparkSql =
"SELECT `percentile_approx`(`SAL`, 50.0, DECIMAL) `median(SAL)`\n" + "FROM `scott`.`EMP`";
verifyPPLToSparkSQL(root, expectedSparkSql);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1298,4 +1298,16 @@ public void testPercentileShortcutFunctionInvalidDecimalValueAbove100() {
SyntaxCheckException.class,
() -> assertEqual("source=t | stats perc100.1(a)", (Node) null));
}

@Test
public void testMedianAggFuncExpr() {
assertEqual(
"source=t | stats median(a)",
agg(
relation("t"),
exprList(alias("median(a)", aggregate("median", field("a")))),
emptyList(),
emptyList(),
defaultStatsArgs()));
}
}
Loading