opensearch-project · dai-chen · Sep 10, 2025 · Sep 8, 2025 · Sep 9, 2025
@@ -198,6 +198,7 @@ public enum BuiltinFunctionName {
   TAKE(FunctionName.of("take")),
   // t-digest percentile which is used in OpenSearch core by default.
   PERCENTILE_APPROX(FunctionName.of("percentile_approx")),
+  MEDIAN(FunctionName.of("median")),
   EARLIEST(FunctionName.of("earliest")),
   LATEST(FunctionName.of("latest")),
   DISTINCT_COUNT_APPROX(FunctionName.of("distinct_count_approx")),
@@ -347,6 +348,7 @@ public enum BuiltinFunctionName {
           .put("take", BuiltinFunctionName.TAKE)
           .put("percentile", BuiltinFunctionName.PERCENTILE_APPROX)
           .put("percentile_approx", BuiltinFunctionName.PERCENTILE_APPROX)
+          .put("median", BuiltinFunctionName.MEDIAN)
           .put("earliest", BuiltinFunctionName.EARLIEST)
           .put("latest", BuiltinFunctionName.LATEST)
           .put("distinct_count_approx", BuiltinFunctionName.DISTINCT_COUNT_APPROX)

@@ -124,6 +124,7 @@
 import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_PHRASE_PREFIX;
 import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAX;
 import static org.opensearch.sql.expression.function.BuiltinFunctionName.MD5;
+import static org.opensearch.sql.expression.function.BuiltinFunctionName.MEDIAN;
 import static org.opensearch.sql.expression.function.BuiltinFunctionName.MICROSECOND;
 import static org.opensearch.sql.expression.function.BuiltinFunctionName.MIN;
 import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE;
@@ -257,6 +258,7 @@
 import org.apache.logging.log4j.Logger;
 import org.opensearch.sql.calcite.CalcitePlanContext;
 import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory;
+import org.opensearch.sql.calcite.utils.PPLOperandTypes;
 import org.opensearch.sql.calcite.utils.PlanUtils;
 import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils;
 import org.opensearch.sql.exception.ExpressionEvaluationException;
@@ -1040,6 +1042,7 @@ void register(
   }
 
   private static class AggBuilder {
+    private static final double MEDIAN_PERCENTILE = 50.0;
     private final Map<BuiltinFunctionName, Pair<CalciteFuncSignature, AggHandler>> map =
         new HashMap<>();
 
@@ -1114,6 +1117,9 @@ void populate() {
       register(
           PERCENTILE_APPROX,
           (distinct, field, argList, ctx) -> {
+            if (field.getType() == null) {
+              throw new IllegalArgumentException("Field type cannot be null");
+            }
             List<RexNode> newArgList =
                 argList.stream().map(PlanUtils::derefMapCall).collect(Collectors.toList());
             newArgList.add(ctx.rexBuilder.makeFlag(field.getType().getSqlTypeName()));
@@ -1125,6 +1131,31 @@ void populate() {
               PERCENTILE_APPROX.name(),
               false));
 
+      register(
+          MEDIAN,
+          (distinct, field, argList, ctx) -> {
+            if (distinct) {
+              throw new IllegalArgumentException("MEDIAN does not support DISTINCT");
+            }
+            if (!argList.isEmpty()) {
+              throw new IllegalArgumentException("MEDIAN takes no additional arguments");
+            }
+            if (field.getType() == null) {
+              throw new IllegalArgumentException("Field type cannot be null");
+            }
+            List<RexNode> medianArgList =
+                List.of(
+                    ctx.rexBuilder.makeExactLiteral(BigDecimal.valueOf(MEDIAN_PERCENTILE)),
+                    ctx.rexBuilder.makeFlag(field.getType().getSqlTypeName()));
+            return UserDefinedFunctionUtils.makeAggregateCall(
+                PPLBuiltinOperators.PERCENTILE_APPROX,
+                List.of(field),
+                medianArgList,
+                ctx.relBuilder);
+          },
+          wrapSqlOperandTypeChecker(
+              PPLOperandTypes.NUMERIC.getInnerTypeChecker(), MEDIAN.name(), false));
+
       register(
           EARLIEST,
           (distinct, field, argList, ctx) -> {

@@ -389,34 +389,6 @@ Example::
     | M      | 36  |
     +--------+-----+
 
-Percentile Shortcut Functions
->>>>>>>>>>>>>>>>>>>>>>>>>>>>>
-
-For convenience, OpenSearch PPL provides shortcut functions for common percentiles:
-
-- ``PERC<percent>(expr)`` - Equivalent to ``PERCENTILE(expr, <percent>)``
-- ``P<percent>(expr)`` - Equivalent to ``PERCENTILE(expr, <percent>)``
-
-Both integer and decimal percentiles from 0 to 100 are supported (e.g., ``PERC95``, ``P99.5``).
-
-Example::
-
-    ppl> source=accounts | stats perc99.5(age);
-    fetched rows / total rows = 1/1
-    +---------------+
-    | perc99.5(age) |
-    |---------------|
-    | 36            |
-    +---------------+
-
-    ppl> source=accounts | stats p50(age);
-    fetched rows / total rows = 1/1
-    +---------+
-    | p50(age) |
-    |---------|
-    | 32      |
-    +---------+
-
 HAVING Clause
 =============
 

@@ -51,6 +51,7 @@ stats <aggregation>... [by-clause]
  * Description: The unit of the interval expression is the natural unit by default. If the field is a date and time type field, and the interval is in date/time units, you will need to specify the unit in the interval expression. For example, to split the field ``age`` into buckets by 10 years, it looks like ``span(age, 10)``. And here is another example of time span, the span to split a ``timestamp`` field into hourly intervals, it looks like ``span(timestamp, 1h)``.
 
 * Available time unit:
+
 +----------------------------+
 | Span Interval Units        |
 +============================+
@@ -273,7 +274,7 @@ Example::
     +--------------------+
 
 DISTINCT_COUNT_APPROX
-----------
+---------------------
 
 Description
 >>>>>>>>>>>
@@ -334,6 +335,58 @@ Example::
     | 36                  | M      |
     +---------------------+--------+
 
+Percentile Shortcut Functions
+>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+
+Version: 3.3.0
+
+For convenience, OpenSearch PPL provides shortcut functions for common percentiles:
+
+- ``PERC<percent>(expr)`` - Equivalent to ``PERCENTILE(expr, <percent>)``
+- ``P<percent>(expr)`` - Equivalent to ``PERCENTILE(expr, <percent>)``
+
+Both integer and decimal percentiles from 0 to 100 are supported (e.g., ``PERC95``, ``P99.5``).
+
+Example::
+
+    ppl> source=accounts | stats perc99.5(age);
+    fetched rows / total rows = 1/1
+    +---------------+
+    | perc99.5(age) |
+    |---------------|
+    | 36            |
+    +---------------+
+
+    ppl> source=accounts | stats p50(age);
+    fetched rows / total rows = 1/1
+    +---------+
+    | p50(age) |
+    |---------|
+    | 32      |
+    +---------+
+
+MEDIAN
+------
+
+Description
+>>>>>>>>>>>
+
+Version: 3.3.0
+
+Usage: MEDIAN(expr). Returns the median (50th percentile) value of `expr`. This is equivalent to ``PERCENTILE(expr, 50)``.
+
+Note: This function requires Calcite to be enabled (see `Configuration`_ section above).
+
+Example::
+
+    os> source=accounts | stats median(age);
+    fetched rows / total rows = 1/1
+    +-------------+
+    | median(age) |
+    |-------------|
+    | 32          |
+    +-------------+
+
 EARLIEST
 --------
 
@@ -414,7 +467,6 @@ LIST
 Description
 >>>>>>>>>>>
 
-=======
 Version: 3.3.0 (Calcite engine only)
 
 Usage: LIST(expr). Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved. 
@@ -442,7 +494,6 @@ Example with result field rename::
     |-------------------------------------|
     | ["Amber","Hattie","Nanette","Dale"] |
     +-------------------------------------+
-
 Example 1: Calculate the count of events
 ========================================
 

@@ -969,4 +969,12 @@ public void testStatsCountAliasByGroupWithSort() throws IOException {
         rows(1, "VA"),
         rows(1, "WA"));
   }
+
+  @Test
+  public void testMedian() throws IOException {
+    JSONObject actual =
+        executeQuery(String.format("source=%s | stats median(balance)", TEST_INDEX_BANK));
+    verifySchema(actual, schema("median(balance)", "bigint"));
+    verifyDataRows(actual, rows(32838));
+  }
 }
@@ -515,6 +515,9 @@ statsFunctionName
    | STDDEV_POP
    | PERCENTILE
    | PERCENTILE_APPROX
+   | MEDIAN
+   | EARLIEST
+   | LATEST
    | LIST
    ;
 

@@ -696,4 +696,19 @@ public void testPercentileShortcutInvalidDecimalValueAbove100() {
     String ppl = "source=EMP | stats perc100.1(SAL)";
     getRelNode(ppl);
   }
+
+  @Test
+  public void testMedian() {
+    String ppl = "source=EMP | stats median(SAL)";
+    RelNode root = getRelNode(ppl);
+    String expectedLogical =
+        "LogicalAggregate(group=[{}], median(SAL)=[percentile_approx($0, $1, $2)])\n"
+            + "  LogicalProject(SAL=[$5], $f1=[50.0:DECIMAL(3, 1)], $f2=[FLAG(DECIMAL)])\n"
+            + "    LogicalTableScan(table=[[scott, EMP]])\n";
+    verifyLogical(root, expectedLogical);
+
+    String expectedSparkSql =
+        "SELECT `percentile_approx`(`SAL`, 50.0, DECIMAL) `median(SAL)`\n" + "FROM `scott`.`EMP`";
+    verifyPPLToSparkSQL(root, expectedSparkSql);
+  }
 }
@@ -1298,4 +1298,16 @@ public void testPercentileShortcutFunctionInvalidDecimalValueAbove100() {
         SyntaxCheckException.class,
         () -> assertEqual("source=t | stats perc100.1(a)", (Node) null));
   }
+
+  @Test
+  public void testMedianAggFuncExpr() {
+    assertEqual(
+        "source=t | stats median(a)",
+        agg(
+            relation("t"),
+            exprList(alias("median(a)", aggregate("median", field("a")))),
+            emptyList(),
+            emptyList(),
+            defaultStatsArgs()));
+  }
 }