Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ private static DefaultFunctionResolver count() {
new FunctionSignature(functionName, Collections.singletonList(type)),
type ->
(functionProperties, arguments) ->
new CountAggregator(arguments, INTEGER))));
new CountAggregator(arguments, LONG))));
return functionResolver;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public String toString() {

/** Count State. */
protected static class CountState implements AggregationState {
protected int count;
protected long count;

CountState() {
this.count = 0;
Expand All @@ -56,7 +56,7 @@ public void count(ExprValue value) {

@Override
public ExprValue result() {
return ExprValueUtils.integerValue(count);
return ExprValueUtils.longValue(count);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1283,7 +1283,7 @@ public void named_aggregator_with_condition() {
emptyList()),
DSL.named(
"count(string_value) filter(where integer_value > 1)",
DSL.ref("count(string_value) filter(where integer_value > 1)", INTEGER))),
DSL.ref("count(string_value) filter(where integer_value > 1)", LONG))),
AstDSL.project(
AstDSL.agg(
AstDSL.relation("schema"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,37 +29,37 @@ class CountAggregatorTest extends AggregationTest {
@Test
public void count_integer_field_expression() {
ExprValue result = aggregation(DSL.count(DSL.ref("integer_value", INTEGER)), tuples);
assertEquals(4, result.value());
assertEquals(4L, result.value());
}

@Test
public void count_long_field_expression() {
ExprValue result = aggregation(DSL.count(DSL.ref("long_value", LONG)), tuples);
assertEquals(4, result.value());
assertEquals(4L, result.value());
}

@Test
public void count_float_field_expression() {
ExprValue result = aggregation(DSL.count(DSL.ref("float_value", FLOAT)), tuples);
assertEquals(4, result.value());
assertEquals(4L, result.value());
}

@Test
public void count_double_field_expression() {
ExprValue result = aggregation(DSL.count(DSL.ref("double_value", DOUBLE)), tuples);
assertEquals(4, result.value());
assertEquals(4L, result.value());
}

@Test
public void count_date_field_expression() {
ExprValue result = aggregation(DSL.count(DSL.ref("date_value", DATE)), tuples);
assertEquals(4, result.value());
assertEquals(4L, result.value());
}

@Test
public void count_timestamp_field_expression() {
ExprValue result = aggregation(DSL.count(DSL.ref("timestamp_value", TIMESTAMP)), tuples);
assertEquals(4, result.value());
assertEquals(4L, result.value());
}

@Test
Expand All @@ -68,34 +68,34 @@ public void count_arithmetic_expression() {
aggregation(
DSL.count(
DSL.multiply(
DSL.ref("integer_value", INTEGER),
DSL.literal(ExprValueUtils.integerValue(10)))),
DSL.ref("long_value", LONG),
DSL.literal(ExprValueUtils.longValue(10L)))),
tuples);
assertEquals(4, result.value());
assertEquals(4L, result.value());
}

@Test
public void count_string_field_expression() {
ExprValue result = aggregation(DSL.count(DSL.ref("string_value", STRING)), tuples);
assertEquals(4, result.value());
assertEquals(4L, result.value());
}

@Test
public void count_boolean_field_expression() {
ExprValue result = aggregation(DSL.count(DSL.ref("boolean_value", BOOLEAN)), tuples);
assertEquals(1, result.value());
assertEquals(1L, result.value());
}

@Test
public void count_struct_field_expression() {
ExprValue result = aggregation(DSL.count(DSL.ref("struct_value", STRUCT)), tuples);
assertEquals(1, result.value());
assertEquals(1L, result.value());
}

@Test
public void count_array_field_expression() {
ExprValue result = aggregation(DSL.count(DSL.ref("array_value", ARRAY)), tuples);
assertEquals(1, result.value());
assertEquals(1L, result.value());
}

@Test
Expand All @@ -105,14 +105,14 @@ public void filtered_count() {
DSL.count(DSL.ref("integer_value", INTEGER))
.condition(DSL.greater(DSL.ref("integer_value", INTEGER), DSL.literal(1))),
tuples);
assertEquals(3, result.value());
assertEquals(3L, result.value());
}

@Test
public void distinct_count() {
ExprValue result =
aggregation(DSL.distinctCount(DSL.ref("integer_value", INTEGER)), tuples_with_duplicates);
assertEquals(3, result.value());
assertEquals(3L, result.value());
}

@Test
Expand All @@ -122,47 +122,47 @@ public void filtered_distinct_count() {
DSL.distinctCount(DSL.ref("integer_value", INTEGER))
.condition(DSL.greater(DSL.ref("double_value", DOUBLE), DSL.literal(1d))),
tuples_with_duplicates);
assertEquals(2, result.value());
assertEquals(2L, result.value());
}

@Test
public void distinct_count_map() {
ExprValue result =
aggregation(DSL.distinctCount(DSL.ref("struct_value", STRUCT)), tuples_with_duplicates);
assertEquals(3, result.value());
assertEquals(3L, result.value());
}

@Test
public void distinct_count_array() {
ExprValue result =
aggregation(DSL.distinctCount(DSL.ref("array_value", ARRAY)), tuples_with_duplicates);
assertEquals(3, result.value());
assertEquals(3L, result.value());
}

@Test
public void count_with_missing() {
ExprValue result =
aggregation(DSL.count(DSL.ref("integer_value", INTEGER)), tuples_with_null_and_missing);
assertEquals(2, result.value());
assertEquals(2L, result.value());
}

@Test
public void count_with_null() {
ExprValue result =
aggregation(DSL.count(DSL.ref("double_value", DOUBLE)), tuples_with_null_and_missing);
assertEquals(2, result.value());
assertEquals(2L, result.value());
}

@Test
public void count_star_with_null_and_missing() {
ExprValue result = aggregation(DSL.count(DSL.literal("*")), tuples_with_null_and_missing);
assertEquals(3, result.value());
assertEquals(3L, result.value());
}

@Test
public void count_literal_with_null_and_missing() {
ExprValue result = aggregation(DSL.count(DSL.literal(1)), tuples_with_null_and_missing);
assertEquals(3, result.value());
assertEquals(3L, result.value());
}

@Test
Expand Down
2 changes: 1 addition & 1 deletion docs/category.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
"user/ppl/cmd/subquery.rst",
"user/ppl/general/identifiers.rst",
"user/ppl/general/datatypes.rst",
"user/ppl/functions/condition.rst",
"user/ppl/functions/datetime.rst",
"user/ppl/functions/expressions.rst",
"user/ppl/functions/ip.rst",
Expand All @@ -56,6 +55,7 @@
],
"ppl_cli_calcite": [
"user/ppl/cmd/append.rst",
"user/ppl/functions/condition.rst",
"user/ppl/cmd/eventstats.rst",
"user/ppl/cmd/fields.rst",
"user/ppl/cmd/regex.rst",
Expand Down
84 changes: 59 additions & 25 deletions docs/user/ppl/functions/condition.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,26 @@ Example::
| default | null | Dale |
+---------+----------+-----------+

Nested IFNULL Pattern
>>>>>>>>>>>>>>>>>>>>>

For OpenSearch versions prior to 3.1, COALESCE-like functionality can be achieved using nested IFNULL statements. This pattern is particularly useful in observability use cases where field names may vary across different data sources.

Usage: ifnull(field1, ifnull(field2, ifnull(field3, default_value)))

Example::

os> source=accounts | eval result = ifnull(employer, ifnull(firstname, ifnull(lastname, "unknown"))) | fields result, employer, firstname, lastname
fetched rows / total rows = 4/4
+---------+----------+-----------+----------+
| result | employer | firstname | lastname |
|---------+----------+-----------+----------|
| Pyrami | Pyrami | Amber | Duke |
| Netagy | Netagy | Hattie | Bond |
| Quility | Quility | Nanette | Bates |
| Dale | null | Dale | Adams |
+---------+----------+-----------+----------+

NULLIF
------

Expand Down Expand Up @@ -255,15 +275,28 @@ Argument type: all the supported data type. Supports mixed data types with autom
Return type: determined by the least restrictive common type among all arguments, with fallback to string if no common type can be determined

Behavior:

- Returns the first value that is not null and not missing (missing includes non-existent fields)
- Empty strings ("") and whitespace strings (" ") are considered valid values
- If all arguments are null or missing, returns null
- Automatic type coercion is applied to match the determined return type
- If type conversion fails, the value is converted to string representation
- For best results, use arguments of the same data type to avoid unexpected type conversions

Performance Considerations:

- Optimized for multiple field evaluation, more efficient than nested IFNULL patterns
- Evaluates arguments sequentially, stopping at the first non-null value
- Consider field order based on likelihood of containing values to minimize evaluation overhead

Limitations:

- Type coercion may result in unexpected string conversions for incompatible types
- Performance may degrade with very large numbers of arguments

Example::

PPL> source=accounts | eval result = coalesce(employer, firstname, lastname) | fields result, firstname, lastname, employer
os> source=accounts | eval result = coalesce(employer, firstname, lastname) | fields result, firstname, lastname, employer
fetched rows / total rows = 4/4
+---------+-----------+----------+----------+
| result | firstname | lastname | employer |
Expand All @@ -276,7 +309,7 @@ Example::

Empty String Handling Examples::

PPL> source=accounts | eval empty_field = "" | eval result = coalesce(empty_field, firstname) | fields result, empty_field, firstname
os> source=accounts | eval empty_field = "" | eval result = coalesce(empty_field, firstname) | fields result, empty_field, firstname
fetched rows / total rows = 4/4
+--------+-------------+-----------+
| result | empty_field | firstname |
Expand All @@ -287,7 +320,7 @@ Empty String Handling Examples::
| | | Dale |
+--------+-------------+-----------+

PPL> source=accounts | eval result = coalesce(" ", firstname) | fields result, firstname
os> source=accounts | eval result = coalesce(" ", firstname) | fields result, firstname
fetched rows / total rows = 4/4
+--------+-----------+
| result | firstname |
Expand All @@ -300,20 +333,20 @@ Empty String Handling Examples::

Mixed Data Types with Auto Coercion::

PPL> source=accounts | eval result = coalesce(employer, balance, "fallback") | fields result, employer, balance
os> source=accounts | eval result = coalesce(employer, balance, "fallback") | fields result, employer, balance
fetched rows / total rows = 4/4
+---------+----------+---------+
| result | employer | balance |
|---------+----------+---------|
| Pyrami | Pyrami | 39225 |
| Netagy | Netagy | 32838 |
| Quility | Quility | 4180 |
| 5686 | null | 5686 |
| Netagy | Netagy | 5686 |
| Quility | Quility | 32838 |
| 4180 | null | 4180 |
+---------+----------+---------+

Non-existent Field Handling::

PPL> source=accounts | eval result = coalesce(nonexistent_field, firstname, "unknown") | fields result, firstname
os> source=accounts | eval result = coalesce(nonexistent_field, firstname, "unknown") | fields result, firstname
fetched rows / total rows = 4/4
+---------+-----------+
| result | firstname |
Expand All @@ -324,6 +357,7 @@ Non-existent Field Handling::
| Dale | Dale |
+---------+-----------+


ISPRESENT
---------

Expand All @@ -342,7 +376,7 @@ Synonyms: `ISNOTNULL`_

Example::

PPL> source=accounts | where ispresent(employer) | fields employer, firstname
os> source=accounts | where ispresent(employer) | fields employer, firstname
fetched rows / total rows = 3/3
+----------+-----------+
| employer | firstname |
Expand All @@ -368,7 +402,7 @@ Return type: BOOLEAN

Example::

PPL> source=accounts | eval temp = ifnull(employer, ' ') | eval `isblank(employer)` = isblank(employer), `isblank(temp)` = isblank(temp) | fields `isblank(temp)`, temp, `isblank(employer)`, employer
os> source=accounts | eval temp = ifnull(employer, ' ') | eval `isblank(employer)` = isblank(employer), `isblank(temp)` = isblank(temp) | fields `isblank(temp)`, temp, `isblank(employer)`, employer
fetched rows / total rows = 4/4
+---------------+---------+-------------------+----------+
| isblank(temp) | temp | isblank(employer) | employer |
Expand Down Expand Up @@ -396,7 +430,7 @@ Return type: BOOLEAN

Example::

PPL> source=accounts | eval temp = ifnull(employer, ' ') | eval `isempty(employer)` = isempty(employer), `isempty(temp)` = isempty(temp) | fields `isempty(temp)`, temp, `isempty(employer)`, employer
os> source=accounts | eval temp = ifnull(employer, ' ') | eval `isempty(employer)` = isempty(employer), `isempty(temp)` = isempty(temp) | fields `isempty(temp)`, temp, `isempty(employer)`, employer
fetched rows / total rows = 4/4
+---------------+---------+-------------------+----------+
| isempty(temp) | temp | isempty(employer) | employer |
Expand Down Expand Up @@ -447,15 +481,15 @@ Return type: BOOLEAN

Example::

PPL> source=accounts | eval now = utc_timestamp() | eval a = earliest("now", now), b = earliest("-2d@d", now) | fields a, b | head 1
os> source=accounts | eval now = utc_timestamp() | eval a = earliest("now", now), b = earliest("-2d@d", now) | fields a, b | head 1
fetched rows / total rows = 1/1
+-------+-------+
| a | b |
|-------+-------|
| False | True |
+-------+-------+
+-------+------+
| a | b |
|-------+------|
| False | True |
+-------+------+

PPL> source=nyc_taxi | where earliest('07/01/2014:00:30:00', timestamp) | stats COUNT() as cnt
os> source=nyc_taxi | where earliest('07/01/2014:00:30:00', timestamp) | stats COUNT() as cnt
fetched rows / total rows = 1/1
+-----+
| cnt |
Expand All @@ -479,15 +513,15 @@ Return type: BOOLEAN

Example::

PPL> source=accounts | eval now = utc_timestamp() | eval a = latest("now", now), b = latest("+2d@d", now) | fields a, b | head 1
os> source=accounts | eval now = utc_timestamp() | eval a = latest("now", now), b = latest("+2d@d", now) | fields a, b | head 1
fetched rows / total rows = 1/1
+-------+-------+
| a | b |
|-------+-------|
| False | True |
+-------+-------+
+------+------+
| a | b |
|------+------|
| True | True |
+------+------+

PPL> source=nyc_taxi | where latest('07/21/2014:04:00:00', timestamp) | stats COUNT() as cnt
os> source=nyc_taxi | where latest('07/21/2014:04:00:00', timestamp) | stats COUNT() as cnt
fetched rows / total rows = 1/1
+-----+
| cnt |
Expand Down
Loading