Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions core/src/main/java/org/opensearch/sql/ast/expression/Span.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import com.google.common.collect.ImmutableList;
import java.util.List;
import javax.annotation.Nullable;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
Expand All @@ -19,13 +20,18 @@
@RequiredArgsConstructor
@ToString
public class Span extends UnresolvedExpression {
private final UnresolvedExpression field;
/** When field is {@code null}, span implicitly refers to {@code @timestamp} field */
@Nullable private final UnresolvedExpression field;

private final UnresolvedExpression value;
private final SpanUnit unit;

@Override
public List<UnresolvedExpression> getChild() {
return ImmutableList.of(field, value);
if (field == null) {
return ImmutableList.of(value);
}
return List.of(field, value);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.opensearch.sql.ast.expression.Cast;
import org.opensearch.sql.ast.expression.Compare;
import org.opensearch.sql.ast.expression.EqualTo;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.In;
import org.opensearch.sql.ast.expression.Interval;
Expand All @@ -67,6 +68,7 @@
import org.opensearch.sql.ast.expression.subquery.InSubquery;
import org.opensearch.sql.ast.expression.subquery.ScalarSubquery;
import org.opensearch.sql.ast.tree.UnresolvedPlan;
import org.opensearch.sql.calcite.plan.OpenSearchConstants;
import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory;
import org.opensearch.sql.calcite.utils.PlanUtils;
import org.opensearch.sql.common.utils.StringUtils;
Expand Down Expand Up @@ -355,7 +357,19 @@ public RexNode visitAlias(Alias node, CalcitePlanContext context) {

@Override
public RexNode visitSpan(Span node, CalcitePlanContext context) {
RexNode field = analyze(node.getField(), context);
RexNode field;
if (node.getField() != null) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will it be better to set field in span to be new Field(new QualifiedName(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP)) if field is null? Then we don't need any change here.

And I'm wondering if v2 works well since we only make change for calcite visitor.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I referred it here because I thought the AST level should be consistent with users' input. E.g. BY span(1h) should be parsed to Span(value=1, unit=hour, field=null). Subsequent implicit reference can be added later because this semantic of how absent field should be interpreted is in a higher level than syntax tree construction.

But it's also not bizarre to refer it while building AST tree. I'll modify the implementation accordingly.

field = analyze(node.getField(), context);
} else {
try {
field = referenceImplicitTimestampField(context);
} catch (IllegalArgumentException e) {
throw new SemanticCheckException(
"SPAN operation requires an explicit field or an implicit '@timestamp' field, but"
+ " '@timestamp' was not found in the input schema.",
e);
}
}
RexNode value = analyze(node.getValue(), context);
SpanUnit unit = node.getUnit();
RexBuilder rexBuilder = context.relBuilder.getRexBuilder();
Expand All @@ -365,6 +379,11 @@ public RexNode visitSpan(Span node, CalcitePlanContext context) {
context.rexBuilder, BuiltinFunctionName.SPAN, field, value, unitNode);
}

private RexNode referenceImplicitTimestampField(CalcitePlanContext context) {
return analyze(
new Field(new QualifiedName(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP)), context);
}

private boolean isTimeBased(SpanUnit unit) {
return !(unit == NONE || unit == UNKNOWN);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ public interface OpenSearchConstants {

String METADATA_FIELD_ROUTING = "_routing";

String IMPLICIT_FIELD_TIMESTAMP = "@timestamp";

java.util.Map<String, ExprType> METADATAFIELD_TYPE_MAP =
Map.of(
METADATA_FIELD_ID, ExprCoreType.STRING,
Expand Down
21 changes: 18 additions & 3 deletions docs/user/ppl/cmd/stats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ stats [bucket_nullable=bool] <aggregation>... [by-clause]

* span-expression: optional, at most one.

* Syntax: span(field_expr, interval_expr)
* Description: The unit of the interval expression is the natural unit by default. If the field is a date and time type field, and the interval is in date/time units, you will need to specify the unit in the interval expression. For example, to split the field ``age`` into buckets by 10 years, it looks like ``span(age, 10)``. And here is another example of time span, the span to split a ``timestamp`` field into hourly intervals, it looks like ``span(timestamp, 1h)``.
* Syntax: span([field_expr,] interval_expr)
* Description: The unit of the interval expression is the natural unit by default. If ``field_expr`` is omitted, span will use the implicit ``@timestamp`` field. An error will be thrown if this field doesn't exist. If the field is a date and time type field, and the interval is in date/time units, you will need to specify the unit in the interval expression. For example, to split the field ``age`` into buckets by 10 years, it looks like ``span(age, 10)``. And here is another example of time span, the span to split a ``timestamp`` field into hourly intervals, it looks like ``span(timestamp, 1h)``.

* Available time unit:

Expand Down Expand Up @@ -581,7 +581,7 @@ Description

Version: 3.3.0 (Calcite engine only)

Usage: LIST(expr). Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved.
Usage: LIST(expr). Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved.
The function returns up to 100 values with no guaranteed ordering.

* expr: The field expression to collect values from.
Expand Down Expand Up @@ -923,3 +923,18 @@ PPL query::
|-------------------------------------|
| ["Amber","Dale","Hattie","Nanette"] |
+-------------------------------------+

Example 17: Calculate the count by the implicit @timestamp field
================================================================

This example demonstrates that if you omit the field parameter in the span function, it will automatically use the implicit ``@timestamp`` field.

PPL query::

PPL> source=big5 | stats count() by span(1month)
fetched rows / total rows = 1/1
+---------+---------------------+
| count() | span(1month) |
|---------+---------------------|
| 1 | 2023-01-01 00:00:00 |
+---------+---------------------+
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.schema;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
import static org.opensearch.sql.util.MatcherUtils.verifyErrorMessageContains;
import static org.opensearch.sql.util.MatcherUtils.verifySchema;
import static org.opensearch.sql.util.MatcherUtils.verifySchemaInOrder;

Expand All @@ -24,6 +25,8 @@
import org.json.JSONObject;
import org.junit.jupiter.api.Test;
import org.opensearch.client.Request;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.ppl.PPLIntegTestCase;

public class CalcitePPLAggregationIT extends PPLIntegTestCase {
Expand All @@ -40,6 +43,7 @@ public void init() throws Exception {
loadIndex(Index.CALCS);
loadIndex(Index.DATE_FORMATS);
loadIndex(Index.DATA_TYPE_NUMERIC);
loadIndex(Index.BIG5);
loadIndex(Index.LOGS);
loadIndex(Index.TIME_TEST_DATA);
}
Expand Down Expand Up @@ -723,6 +727,26 @@ public void testCountBySpanForCustomFormats() throws IOException {
verifyDataRows(actual, rows(1, "00:00:00"), rows(1, "12:00:00"));
}

// Only available in v3 with Calcite
@Test
public void testSpanByImplicitTimestamp() throws IOException {
JSONObject result = executeQuery("source=big5 | stats count() by span(1d) as span");
verifySchema(result, schema("count()", "bigint"), schema("span", "timestamp"));
verifyDataRows(result, rows(1, "2023-01-02 00:00:00"));

Throwable t =
assertThrowsWithReplace(
SemanticCheckException.class,
() ->
executeQuery(
StringUtils.format(
"source=%s | stats count() by span(5m)", TEST_INDEX_DATE_FORMATS)));
verifyErrorMessageContains(
t,
"SPAN operation requires an explicit field or an implicit '@timestamp' field, but"
+ " '@timestamp' was not found in the input schema.");
}

@Test
public void testCountDistinct() throws IOException {
JSONObject actual =
Expand Down
2 changes: 1 addition & 1 deletion ppl/src/main/antlr/OpenSearchPPLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,7 @@ bySpanClause
;

spanClause
: SPAN LT_PRTHS fieldExpression COMMA value = literalValue (unit = timespanUnit)? RT_PRTHS
: SPAN LT_PRTHS (fieldExpression COMMA)? value = literalValue (unit = timespanUnit)? RT_PRTHS
;

sortbyClause
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,8 @@ public UnresolvedExpression visitBySpanClause(BySpanClauseContext ctx) {
@Override
public UnresolvedExpression visitSpanClause(SpanClauseContext ctx) {
String unit = ctx.unit != null ? ctx.unit.getText() : "";
return new Span(visit(ctx.fieldExpression()), visit(ctx.value), SpanUnit.of(unit));
var field = ctx.fieldExpression() != null ? visit(ctx.fieldExpression()) : null;
return new Span(field, visit(ctx.value), SpanUnit.of(unit));
}

// Handle new syntax: span=1h
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -729,9 +729,13 @@ public String visitAggregateFunction(AggregateFunction node, String context) {

@Override
public String visitSpan(Span node, String context) {
String field = analyze(node.getField(), context);
String field = node.getField() != null ? analyze(node.getField(), context) : null;
String value = analyze(node.getValue(), context);
return StringUtils.format("span(%s, %s %s)", field, value, node.getUnit().getName());
if (field != null) {
return StringUtils.format("span(%s, %s %s)", field, value, node.getUnit().getName());
} else {
return StringUtils.format("span(%s %s)", value, node.getUnit().getName());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no test for verification?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted this change to Span as span will always come with a field with latest implementation -- I inject @timestamp to spans without a field specified in the AST layer now.

}
}

@Override
Expand Down
Loading