Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.calcite.udf.udaf;

import java.util.ArrayList;
import java.util.List;
import org.opensearch.sql.calcite.udf.UserDefinedAggFunction;

/**
* List aggregation function that collects values into an array preserving duplicates.
*
* <p>Behavior:
*
* <ul>
* <li>Collects up to 100 values (additional values are ignored)
* <li>Filters out null values
* <li>Preserves duplicate values
* <li>Order of values in the result is non-deterministic
* </ul>
*
* <p>Note: Similar to the TAKE function, LIST does not guarantee any specific order of values in
* the result array. The order may vary between executions and depends on the underlying query
* execution plan and optimizations.
*/
public class ListAggFunction implements UserDefinedAggFunction<ListAggFunction.ListAccumulator> {

private static final int DEFAULT_LIMIT = 100;

@Override
public ListAccumulator init() {
return new ListAccumulator();
}

@Override
public Object result(ListAccumulator accumulator) {
return accumulator.value();
}

@Override
public ListAccumulator add(ListAccumulator acc, Object... values) {
// Handle case where no values are passed
if (values == null || values.length == 0) {
return acc;
}

Object value = values[0];

// Filter out null values and enforce 100-item limit
if (value != null && acc.size() < DEFAULT_LIMIT) {
// Convert value to string, handling all types safely
String stringValue = String.valueOf(value);
acc.add(stringValue);
}

return acc;
}

public static class ListAccumulator implements Accumulator {
private final List<String> values;

public ListAccumulator() {
this.values = new ArrayList<>();
}

@Override
public Object value(Object... argList) {
return values;
}

public void add(String value) {
values.add(value);
}

public int size() {
return values.size();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,31 @@ private PPLOperandTypes() {}
SqlTypeFamily.CHARACTER,
SqlTypeFamily.CHARACTER,
SqlTypeFamily.CHARACTER)));

/**
* Operand type checker that accepts any scalar type. This includes numeric types, strings,
* booleans, datetime types, and special scalar types like IP and BINARY. Excludes complex types
* like arrays, structs, and maps.
*/
public static final UDFOperandMetadata ANY_SCALAR =
UDFOperandMetadata.wrapUDT(
java.util.List.of(
// Numeric types
java.util.List.of(org.opensearch.sql.data.type.ExprCoreType.BYTE),
java.util.List.of(org.opensearch.sql.data.type.ExprCoreType.SHORT),
java.util.List.of(org.opensearch.sql.data.type.ExprCoreType.INTEGER),
java.util.List.of(org.opensearch.sql.data.type.ExprCoreType.LONG),
java.util.List.of(org.opensearch.sql.data.type.ExprCoreType.FLOAT),
java.util.List.of(org.opensearch.sql.data.type.ExprCoreType.DOUBLE),
// String type
java.util.List.of(org.opensearch.sql.data.type.ExprCoreType.STRING),
// Boolean type
java.util.List.of(org.opensearch.sql.data.type.ExprCoreType.BOOLEAN),
// Temporal types
java.util.List.of(org.opensearch.sql.data.type.ExprCoreType.DATE),
java.util.List.of(org.opensearch.sql.data.type.ExprCoreType.TIME),
java.util.List.of(org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP),
// Special scalar types
java.util.List.of(org.opensearch.sql.data.type.ExprCoreType.IP),
java.util.List.of(org.opensearch.sql.data.type.ExprCoreType.BINARY)));
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,12 @@ private PPLReturnTypes() {}
RelDataType firstArgType = argTypes.get(0);
return SqlTypeUtil.createArrayType(typeFactory, firstArgType, true);
};
public static final SqlReturnTypeInference STRING_ARRAY =
opBinding -> {
RelDataTypeFactory typeFactory = opBinding.getTypeFactory();
// Always return array of strings since multivalue functions convert everything to strings
RelDataType stringType =
typeFactory.createSqlType(org.apache.calcite.sql.type.SqlTypeName.VARCHAR);
return SqlTypeUtil.createArrayType(typeFactory, stringType, true);
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ public enum BuiltinFunctionName {
EARLIEST(FunctionName.of("earliest")),
LATEST(FunctionName.of("latest")),
DISTINCT_COUNT_APPROX(FunctionName.of("distinct_count_approx")),

// Multivalue aggregation function
LIST(FunctionName.of("list")),
// Not always an aggregation query
NESTED(FunctionName.of("nested")),

Expand Down Expand Up @@ -347,6 +350,7 @@ public enum BuiltinFunctionName {
.put("earliest", BuiltinFunctionName.EARLIEST)
.put("latest", BuiltinFunctionName.LATEST)
.put("distinct_count_approx", BuiltinFunctionName.DISTINCT_COUNT_APPROX)
.put("list", BuiltinFunctionName.LIST)
.put("pattern", BuiltinFunctionName.INTERNAL_PATTERN)
.build();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.calcite.sql.type.SqlTypeTransforms;
import org.apache.calcite.sql.util.ReflectiveSqlOperatorTable;
import org.apache.calcite.util.BuiltInMethod;
import org.opensearch.sql.calcite.udf.udaf.ListAggFunction;
import org.opensearch.sql.calcite.udf.udaf.LogPatternAggFunction;
import org.opensearch.sql.calcite.udf.udaf.NullableSqlAvgAggFunction;
import org.opensearch.sql.calcite.udf.udaf.PercentileApproxFunction;
Expand Down Expand Up @@ -432,6 +433,9 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
"pattern",
ReturnTypes.explicit(UserDefinedFunctionUtils.nullablePatternAggList),
null);
public static final SqlAggFunction LIST =
createUserDefinedAggFunction(
ListAggFunction.class, "LIST", PPLReturnTypes.STRING_ARRAY, PPLOperandTypes.ANY_SCALAR);

/**
* Returns the PPL specific operator table, creating it if necessary.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LENGTH;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LESS;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LIKE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LIST;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LN;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LOCALTIME;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LOCALTIMESTAMP;
Expand Down Expand Up @@ -1088,6 +1089,7 @@ void populate() {
registerOperator(STDDEV_POP, PPLBuiltinOperators.STDDEV_POP_NULLABLE);
registerOperator(TAKE, PPLBuiltinOperators.TAKE);
registerOperator(INTERNAL_PATTERN, PPLBuiltinOperators.INTERNAL_PATTERN);
registerOperator(LIST, PPLBuiltinOperators.LIST);

register(
AVG,
Expand Down
52 changes: 52 additions & 0 deletions docs/user/ppl/cmd/stats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ The following table dataSources the aggregation functions and also indicates how
+----------+-------------+-------------+
| MIN | Ignore | Ignore |
+----------+-------------+-------------+
| LIST | Ignore | Ignore |
+----------+-------------+-------------+


Syntax
Expand Down Expand Up @@ -406,6 +408,41 @@ Example with custom time field::
| inactive | users |
+----------------------------+----------+

LIST
----

Description
>>>>>>>>>>>

=======
Version: 3.3.0 (Calcite engine only)

Usage: LIST(expr). Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved.
The function returns up to 100 values with no guaranteed ordering.

* expr: The field expression to collect values from.
* This aggregation function doesn't support Array, Struct, Object field types.

Example with string fields::

PPL> source=accounts | stats list(firstname);
fetched rows / total rows = 1/1
+-------------------------------------+
| list(firstname) |
|-------------------------------------|`
| ["Amber","Hattie","Nanette","Dale"] |
+-------------------------------------+

Example with result field rename::

PPL> source=accounts | stats list(firstname) as names;
fetched rows / total rows = 1/1
+-------------------------------------+
| names |
|-------------------------------------|
| ["Amber","Hattie","Nanette","Dale"] |
+-------------------------------------+

Example 1: Calculate the count of events
========================================

Expand Down Expand Up @@ -628,3 +665,18 @@ PPL query::
| 28 | 20 | F |
| 36 | 30 | M |
+-----+----------+--------+

Example 14: Collect all values in a field using LIST
=====================================================

The example shows how to collect all firstname values, preserving duplicates and order.

PPL query::

PPL> source=accounts | stats list(firstname);
fetched rows / total rows = 1/1
+-------------------------------------+
| list(firstname) |
|-------------------------------------|
| ["Amber","Hattie","Nanette","Dale"] |
+-------------------------------------+
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
CalciteLegacyAPICompatibilityIT.class,
CalciteLikeQueryIT.class,
CalciteMathematicalFunctionIT.class,
CalciteMultiValueStatsIT.class,
CalciteNewAddedCommandsIT.class,
CalciteNowLikeFunctionIT.class,
CalciteObjectFieldOperateIT.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,15 @@ public void testExplainOnEarliestLatestWithCustomTimeField() throws IOException
TEST_INDEX_LOGS)));
}

@Test
public void testListAggregationExplain() throws IOException {
String expected = loadExpectedPlan("explain_list_aggregation.json");
assertJsonEqualsIgnoreId(
expected,
explainQueryToString(
"source=opensearch-sql_test_index_account | stats list(age) as age_list"));
}

/**
* Executes the PPL query and returns the result as a string with windows-style line breaks
* replaced with Unix-style ones.
Expand Down
Loading
Loading