diff --git a/docs/changelog/128639.yaml b/docs/changelog/128639.yaml new file mode 100644 index 0000000000000..8fbfa9927a938 --- /dev/null +++ b/docs/changelog/128639.yaml @@ -0,0 +1,6 @@ +pr: 128639 +summary: Substitue `date_trunc` with `round_to` when the pre-calculated rounding points + are available +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index e8acabe71ab41..ed15caa17ad3d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -290,12 +290,12 @@ public long count(FieldName field, BytesRef value) { } @Override - public byte[] min(FieldName field, DataType dataType) { + public Object min(FieldName field) { return null; } @Override - public byte[] max(FieldName field, DataType dataType) { + public Object max(FieldName field) { return null; } @@ -381,6 +381,27 @@ public String toString() { } } + public static class TestSearchStatsWithMinMax extends TestSearchStats { + + private final Map minValues; + private final Map maxValues; + + public TestSearchStatsWithMinMax(Map minValues, Map maxValues) { + this.minValues = minValues; + this.maxValues = maxValues; + } + + @Override + public Object min(FieldName field) { + return minValues.get(field.string()); + } + + @Override + public Object max(FieldName field) { + return maxValues.get(field.string()); + } + } + public static final TestSearchStats TEST_SEARCH_STATS = new TestSearchStats(); private static final Map> TABLES = tables(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/LocalSurrogateExpression.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/LocalSurrogateExpression.java new file mode 100644 index 0000000000000..f0401ae1d4f05 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/LocalSurrogateExpression.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression; + +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.stats.SearchStats; + +/** + * Interface signaling to the local logical plan optimizer that the declaring expression + * has to be replaced by a different form. + * Implement this on {@code Function}s when: + *
    + *
  • The expression can be rewritten to another expression on data node, with the statistics available in SearchStats. + * Like {@code DateTrunc} and {@code Bucket} could be rewritten to {@code RoundTo} with the min/max values on the date field. + *
  • + *
+ */ +public interface LocalSurrogateExpression { + /** + * Returns the expression to be replaced by or {@code null} if this cannot be replaced. + */ + Expression surrogate(SearchStats searchStats); +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java index bb6633686fc7c..01bc4dd2b4eec 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java @@ -25,6 +25,7 @@ import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.LocalSurrogateExpression; import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.FunctionType; @@ -35,10 +36,9 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Div; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.stats.SearchStats; import java.io.IOException; -import java.time.ZoneId; -import java.time.ZoneOffset; import java.util.ArrayList; import java.util.List; @@ -50,6 +50,8 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNumeric; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; import static org.elasticsearch.xpack.esql.expression.Validations.isFoldable; +import static org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc.maybeSubstituteWithRoundTo; +import static org.elasticsearch.xpack.esql.session.Configuration.DEFAULT_TZ; import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateTimeToLong; /** @@ -61,7 +63,8 @@ public class Bucket extends GroupingFunction.EvaluatableGroupingFunction implements PostOptimizationVerificationAware, - TwoOptionalArguments { + TwoOptionalArguments, + LocalSurrogateExpression { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Bucket", Bucket::new); // TODO maybe we should just cover the whole of representable dates here - like ten years, 100 years, 1000 years, all the way up. @@ -87,8 +90,6 @@ public class Bucket extends GroupingFunction.EvaluatableGroupingFunction Rounding.builder(TimeValue.timeValueMillis(10)).build(), Rounding.builder(TimeValue.timeValueMillis(1)).build(), }; - private static final ZoneId DEFAULT_TZ = ZoneOffset.UTC; // TODO: plug in the config - private final Expression field; private final Expression buckets; private final Expression from; @@ -301,15 +302,22 @@ public Rounding.Prepared getDateRoundingOrNull(FoldContext foldCtx) { } private Rounding.Prepared getDateRounding(FoldContext foldContext) { + return getDateRounding(foldContext, null, null); + } + + private Rounding.Prepared getDateRounding(FoldContext foldContext, Long min, Long max) { assert field.dataType() == DataType.DATETIME || field.dataType() == DataType.DATE_NANOS : "expected date type; got " + field; if (buckets.dataType().isWholeNumber()) { int b = ((Number) buckets.fold(foldContext)).intValue(); long f = foldToLong(foldContext, from); long t = foldToLong(foldContext, to); + if (min != null && max != null) { + return new DateRoundingPicker(b, f, t).pickRounding().prepare(min, max); + } return new DateRoundingPicker(b, f, t).pickRounding().prepareForUnknown(); } else { assert DataType.isTemporalAmount(buckets.dataType()) : "Unexpected span data type [" + buckets.dataType() + "]"; - return DateTrunc.createRounding(buckets.fold(foldContext), DEFAULT_TZ); + return DateTrunc.createRounding(buckets.fold(foldContext), DEFAULT_TZ, min, max); } } @@ -488,4 +496,17 @@ public Expression to() { public String toString() { return "Bucket{" + "field=" + field + ", buckets=" + buckets + ", from=" + from + ", to=" + to + '}'; } + + @Override + public Expression surrogate(SearchStats searchStats) { + // LocalSubstituteSurrogateExpressions should make sure this doesn't happen + assert searchStats != null : "SearchStats cannot be null"; + return maybeSubstituteWithRoundTo( + source(), + field(), + buckets(), + searchStats, + (interval, minValue, maxValue) -> getDateRounding(FoldContext.small(), minValue, maxValue) + ); + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/BinaryDateTimeFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/BinaryDateTimeFunction.java deleted file mode 100644 index 74f0dae76c425..0000000000000 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/BinaryDateTimeFunction.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.expression.function.scalar.date; - -import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.function.scalar.BinaryScalarFunction; -import org.elasticsearch.xpack.esql.core.tree.Source; -import org.elasticsearch.xpack.esql.core.type.DataType; - -import java.time.ZoneId; -import java.time.ZoneOffset; -import java.util.Objects; - -public abstract class BinaryDateTimeFunction extends BinaryScalarFunction { - - protected static final ZoneId DEFAULT_TZ = ZoneOffset.UTC; - - private final ZoneId zoneId; - - protected BinaryDateTimeFunction(Source source, Expression argument, Expression timestamp) { - super(source, argument, timestamp); - zoneId = DEFAULT_TZ; - } - - @Override - public DataType dataType() { - return DataType.DATETIME; - } - - public Expression timestampField() { - return right(); - } - - public ZoneId zoneId() { - return zoneId; - } - - @Override - public int hashCode() { - return Objects.hash(super.hashCode(), zoneId()); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - if (super.equals(o) == false) { - return false; - } - BinaryDateTimeFunction that = (BinaryDateTimeFunction) o; - return zoneId().equals(that.zoneId()); - } -} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateDiff.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateDiff.java index a2ec96d1e0b34..2437fdc307415 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateDiff.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateDiff.java @@ -57,7 +57,7 @@ public class DateDiff extends EsqlScalarFunction { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "DateDiff", DateDiff::new); - public static final ZoneId UTC = ZoneId.of("Z"); + public static final ZoneId UTC = org.elasticsearch.xpack.esql.core.util.DateUtils.UTC; private final Expression unit; private final Expression startTimestamp; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateTrunc.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateTrunc.java index 6981c8e3b9d82..9b4d312e9df42 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateTrunc.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateTrunc.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.expression.function.scalar.date; import org.elasticsearch.common.Rounding; +import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -16,38 +17,53 @@ import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.FoldContext; +import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; +import org.elasticsearch.xpack.esql.expression.LocalSurrogateExpression; import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; +import org.elasticsearch.xpack.esql.expression.function.scalar.math.RoundTo; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.stats.SearchStats; import java.io.IOException; import java.time.Duration; import java.time.Period; import java.time.ZoneId; -import java.time.ZoneOffset; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_NANOS; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; +import static org.elasticsearch.xpack.esql.session.Configuration.DEFAULT_TZ; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateWithTypeToString; -public class DateTrunc extends EsqlScalarFunction { +public class DateTrunc extends EsqlScalarFunction implements LocalSurrogateExpression { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "DateTrunc", DateTrunc::new ); + private static final Logger logger = LogManager.getLogger(DateTrunc.class); + @FunctionalInterface public interface DateTruncFactoryProvider { ExpressionEvaluator.Factory apply(Source source, ExpressionEvaluator.Factory lhs, Rounding.Prepared rounding); @@ -59,7 +75,6 @@ public interface DateTruncFactoryProvider { ); private final Expression interval; private final Expression timestampField; - protected static final ZoneId DEFAULT_TZ = ZoneOffset.UTC; @FunctionInfo( returnType = { "date", "date_nanos" }, @@ -163,14 +178,23 @@ static Rounding.Prepared createRounding(final Object interval) { public static Rounding.Prepared createRounding(final Object interval, final ZoneId timeZone) { if (interval instanceof Period period) { - return createRounding(period, timeZone); + return createRounding(period, timeZone, null, null); + } else if (interval instanceof Duration duration) { + return createRounding(duration, timeZone, null, null); + } + throw new IllegalArgumentException("Time interval is not supported"); + } + + public static Rounding.Prepared createRounding(final Object interval, final ZoneId timeZone, Long min, Long max) { + if (interval instanceof Period period) { + return createRounding(period, timeZone, min, max); } else if (interval instanceof Duration duration) { - return createRounding(duration, timeZone); + return createRounding(duration, timeZone, min, max); } throw new IllegalArgumentException("Time interval is not supported"); } - private static Rounding.Prepared createRounding(final Period period, final ZoneId timeZone) { + private static Rounding.Prepared createRounding(final Period period, final ZoneId timeZone, Long min, Long max) { // Zero or negative intervals are not supported if (period == null || period.isNegative() || period.isZero()) { throw new IllegalArgumentException("Zero or negative time interval is not supported"); @@ -182,6 +206,7 @@ private static Rounding.Prepared createRounding(final Period period, final ZoneI } final Rounding.Builder rounding; + boolean tryPrepareWithMinMax = true; if (period.getDays() == 1) { rounding = new Rounding.Builder(Rounding.DateTimeUnit.DAY_OF_MONTH); } else if (period.getDays() == 7) { @@ -190,6 +215,7 @@ private static Rounding.Prepared createRounding(final Period period, final ZoneI rounding = new Rounding.Builder(Rounding.DateTimeUnit.WEEK_OF_WEEKYEAR); } else if (period.getDays() > 1) { rounding = new Rounding.Builder(new TimeValue(period.getDays(), TimeUnit.DAYS)); + tryPrepareWithMinMax = false; } else if (period.getMonths() == 3) { // java.time.Period does not have a QUARTERLY period, so a period of 3 months // returns a quarterly rounding @@ -198,19 +224,26 @@ private static Rounding.Prepared createRounding(final Period period, final ZoneI rounding = new Rounding.Builder(Rounding.DateTimeUnit.MONTH_OF_YEAR); } else if (period.getMonths() > 0) { rounding = new Rounding.Builder(Rounding.DateTimeUnit.MONTHS_OF_YEAR, period.getMonths()); + tryPrepareWithMinMax = false; } else if (period.getYears() == 1) { rounding = new Rounding.Builder(Rounding.DateTimeUnit.YEAR_OF_CENTURY); } else if (period.getYears() > 0) { rounding = new Rounding.Builder(Rounding.DateTimeUnit.YEARS_OF_CENTURY, period.getYears()); + tryPrepareWithMinMax = false; } else { throw new IllegalArgumentException("Time interval is not supported"); } rounding.timeZone(timeZone); + if (min != null && max != null && tryPrepareWithMinMax) { + // Multiple quantities calendar interval - day/week/month/quarter/year is not supported by PreparedRounding.maybeUseArray, + // which is called by prepare(min, max), as it may hit an assert. Call prepare(min, max) only for single calendar interval. + return rounding.build().prepare(min, max); + } return rounding.build().prepareForUnknown(); } - private static Rounding.Prepared createRounding(final Duration duration, final ZoneId timeZone) { + private static Rounding.Prepared createRounding(final Duration duration, final ZoneId timeZone, Long min, Long max) { // Zero or negative intervals are not supported if (duration == null || duration.isNegative() || duration.isZero()) { throw new IllegalArgumentException("Zero or negative time interval is not supported"); @@ -218,6 +251,9 @@ private static Rounding.Prepared createRounding(final Duration duration, final Z final Rounding.Builder rounding = new Rounding.Builder(TimeValue.timeValueMillis(duration.toMillis())); rounding.timeZone(timeZone); + if (min != null && max != null) { + return rounding.build().prepare(min, max); + } return rounding.build().prepareForUnknown(); } @@ -249,4 +285,56 @@ public static ExpressionEvaluator.Factory evaluator( ) { return evaluatorMap.get(forType).apply(source, fieldEvaluator, rounding); } + + @Override + public Expression surrogate(SearchStats searchStats) { + // LocalSubstituteSurrogateExpressions should make sure this doesn't happen + assert searchStats != null : "SearchStats cannot be null"; + return maybeSubstituteWithRoundTo( + source(), + field(), + interval(), + searchStats, + (interval, minValue, maxValue) -> createRounding(interval, DEFAULT_TZ, minValue, maxValue) + ); + } + + public static RoundTo maybeSubstituteWithRoundTo( + Source source, + Expression field, + Expression foldableTimeExpression, + SearchStats searchStats, + TriFunction roundingFunction + ) { + if (field instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField == false && isDateTime(fa.dataType())) { + // Extract min/max from SearchStats + DataType fieldType = fa.dataType(); + FieldAttribute.FieldName fieldName = fa.fieldName(); + var min = searchStats.min(fieldName); + var max = searchStats.max(fieldName); + // If min/max is available create rounding with them + if (min instanceof Long minValue && max instanceof Long maxValue && foldableTimeExpression.foldable()) { + Object foldedInterval = foldableTimeExpression.fold(FoldContext.small() /* TODO remove me */); + Rounding.Prepared rounding = roundingFunction.apply(foldedInterval, minValue, maxValue); + long[] roundingPoints = rounding.fixedRoundingPoints(); + if (roundingPoints == null) { + logger.trace( + "Fixed rounding point is null for field {}, minValue {} in string format {} and maxValue {} in string format {}", + fieldName, + minValue, + dateWithTypeToString(minValue, fieldType), + maxValue, + dateWithTypeToString(maxValue, fieldType) + ); + return null; + } + // Convert to round_to function with the roundings + List points = Arrays.stream(roundingPoints) + .mapToObj(l -> new Literal(Source.EMPTY, l, fieldType)) + .collect(Collectors.toList()); + return new RoundTo(source, field, points); + } + } + return null; + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java index b9d85d191f1d2..39f37f952ae02 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java @@ -15,6 +15,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferIsNotNull; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferNonNullAggConstraint; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.LocalPropagateEmptyRelation; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.LocalSubstituteSurrogateExpressions; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceFieldWithConstantOrNull; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceTopNWithLimitAndSort; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; @@ -46,7 +47,8 @@ public class LocalLogicalPlanOptimizer extends ParameterizedRuleExecutor { + + @Override + public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext context) { + return context.searchStats() != null + ? plan.transformUp(Eval.class, eval -> eval.transformExpressionsOnly(Function.class, f -> substitute(f, context.searchStats()))) + : plan; + } + + /** + * Perform the actual substitution. + */ + private static Expression substitute(Expression e, SearchStats searchStats) { + if (e instanceof LocalSurrogateExpression s) { + Expression surrogate = s.surrogate(searchStats); + if (surrogate != null) { + return surrogate; + } + } + return e; + } + +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/Configuration.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/Configuration.java index 0f66a839bb429..183dccf48d5ac 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/Configuration.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/Configuration.java @@ -24,6 +24,7 @@ import java.time.Duration; import java.time.Instant; import java.time.ZoneId; +import java.time.ZoneOffset; import java.time.ZonedDateTime; import java.util.Locale; import java.util.Map; @@ -34,6 +35,7 @@ public class Configuration implements Writeable { public static final int QUERY_COMPRESS_THRESHOLD_CHARS = KB.toIntBytes(5); + public static final ZoneId DEFAULT_TZ = ZoneOffset.UTC; private final String clusterName; private final String username; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java index c308b317529ca..89aa2402248b8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java @@ -18,6 +18,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; import org.elasticsearch.index.mapper.ConstantFieldType; import org.elasticsearch.index.mapper.DocCountFieldMapper.DocCountFieldType; import org.elasticsearch.index.mapper.IdFieldMapper; @@ -29,7 +30,7 @@ import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute.FieldName; -import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.util.Holder; import java.io.IOException; import java.util.LinkedHashMap; @@ -51,7 +52,11 @@ public class SearchContextStats implements SearchStats { private final List contexts; - private record FieldConfig(boolean exists, boolean hasExactSubfield, boolean indexed, boolean hasDocValues) {} + private record FieldConfig(boolean exists, boolean hasExactSubfield, boolean indexed, boolean hasDocValues, MappedFieldType fieldType) { + FieldConfig(boolean exists, boolean hasExactSubfield, boolean indexed, boolean hasDocValues) { + this(exists, hasExactSubfield, indexed, hasDocValues, null); + } + } private static class FieldStats { private Long count; @@ -93,11 +98,18 @@ private FieldConfig makeFieldConfig(String field) { boolean hasExactSubfield = true; boolean indexed = true; boolean hasDocValues = true; + boolean mixedFieldType = false; + MappedFieldType fieldType = null; // Extract the field type, it will be used by min/max later. // even if there are deleted documents, check the existence of a field // since if it's missing, deleted documents won't change that for (SearchExecutionContext context : contexts) { if (context.isFieldMapped(field)) { - var type = context.getFieldType(field); + MappedFieldType type = context.getFieldType(field); + if (fieldType == null) { + fieldType = type; + } else if (mixedFieldType == false && fieldType.typeName().equals(type.typeName()) == false) { + mixedFieldType = true; + } exists |= true; indexed &= type.isIndexed(); hasDocValues &= type.hasDocValues(); @@ -115,7 +127,7 @@ private FieldConfig makeFieldConfig(String field) { // if it does not exist on any context, no other settings are valid return new FieldConfig(false, false, false, false); } else { - return new FieldConfig(exists, hasExactSubfield, indexed, hasDocValues); + return new FieldConfig(exists, hasExactSubfield, indexed, hasDocValues, mixedFieldType ? null : fieldType); } } @@ -185,49 +197,57 @@ public long count(FieldName field, BytesRef value) { } @Override - public byte[] min(FieldName field, DataType dataType) { + public Object min(FieldName field) { var stat = cache.computeIfAbsent(field.string(), this::makeFieldStats); + // Consolidate min for indexed date fields only, skip the others and mixed-typed fields. + MappedFieldType fieldType = stat.config.fieldType; + if (fieldType == null || stat.config.indexed == false || fieldType instanceof DateFieldType == false) { + return null; + } if (stat.min == null) { - var min = new byte[][] { null }; + var min = new long[] { Long.MAX_VALUE }; + Holder foundMinValue = new Holder<>(false); doWithContexts(r -> { - byte[] localMin = PointValues.getMinPackedValue(r, field.string()); - // TODO: how to compare with the previous min - if (localMin != null) { - if (min[0] == null) { - min[0] = localMin; - } else { - throw new EsqlIllegalArgumentException("Don't know how to compare with previous min"); + byte[] minPackedValue = PointValues.getMinPackedValue(r, field.string()); + if (minPackedValue != null && minPackedValue.length == 8) { + long minValue = NumericUtils.sortableBytesToLong(minPackedValue, 0); + if (minValue <= min[0]) { + min[0] = minValue; + foundMinValue.set(true); } } return true; }, true); - stat.min = min[0]; + stat.min = foundMinValue.get() ? min[0] : null; } - // return stat.min; - return null; + return stat.min; } @Override - public byte[] max(FieldName field, DataType dataType) { + public Object max(FieldName field) { var stat = cache.computeIfAbsent(field.string(), this::makeFieldStats); + // Consolidate max for indexed date fields only, skip the others and mixed-typed fields. + MappedFieldType fieldType = stat.config.fieldType; + if (fieldType == null || stat.config.indexed == false || fieldType instanceof DateFieldType == false) { + return null; + } if (stat.max == null) { - var max = new byte[][] { null }; + var max = new long[] { Long.MIN_VALUE }; + Holder foundMaxValue = new Holder<>(false); doWithContexts(r -> { - byte[] localMax = PointValues.getMaxPackedValue(r, field.string()); - // TODO: how to compare with the previous max - if (localMax != null) { - if (max[0] == null) { - max[0] = localMax; - } else { - throw new EsqlIllegalArgumentException("Don't know how to compare with previous max"); + byte[] maxPackedValue = PointValues.getMaxPackedValue(r, field.string()); + if (maxPackedValue != null && maxPackedValue.length == 8) { + long maxValue = NumericUtils.sortableBytesToLong(maxPackedValue, 0); + if (maxValue >= max[0]) { + max[0] = maxValue; + foundMaxValue.set(true); } } return true; }, true); - stat.max = max[0]; + stat.max = foundMaxValue.get() ? max[0] : null; } - // return stat.max; - return null; + return stat.max; } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java index ff1701104eca9..5c7ab1fdd6242 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java @@ -10,7 +10,6 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute.FieldName; -import org.elasticsearch.xpack.esql.core.type.DataType; /** * Interface for determining information about fields in the index. @@ -33,9 +32,9 @@ public interface SearchStats { long count(FieldName field, BytesRef value); - byte[] min(FieldName field, DataType dataType); + Object min(FieldName field); - byte[] max(FieldName field, DataType dataType); + Object max(FieldName field); boolean isSingleValue(FieldName field); @@ -90,12 +89,12 @@ public long count(FieldName field, BytesRef value) { } @Override - public byte[] min(FieldName field, DataType dataType) { + public Object min(FieldName field) { return null; } @Override - public byte[] max(FieldName field, DataType dataType) { + public Object max(FieldName field) { return null; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java index 7dbd625c18455..f89c79079c876 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java @@ -818,22 +818,16 @@ private LocalRelation asEmptyRelation(Object o) { private LogicalPlan plan(String query, Analyzer analyzer) { var analyzed = analyzer.analyze(parser.createStatement(query, EsqlTestUtils.TEST_CFG)); - // System.out.println(analyzed); - var optimized = logicalOptimizer.optimize(analyzed); - // System.out.println(optimized); - return optimized; + return logicalOptimizer.optimize(analyzed); } - private LogicalPlan plan(String query) { + protected LogicalPlan plan(String query) { return plan(query, analyzer); } - private LogicalPlan localPlan(LogicalPlan plan, SearchStats searchStats) { + protected LogicalPlan localPlan(LogicalPlan plan, SearchStats searchStats) { var localContext = new LocalLogicalOptimizerContext(EsqlTestUtils.TEST_CFG, FoldContext.small(), searchStats); - // System.out.println(plan); - var localPlan = new LocalLogicalPlanOptimizer(localContext).localOptimize(plan); - // System.out.println(localPlan); - return localPlan; + return new LocalLogicalPlanOptimizer(localContext).localOptimize(plan); } private LogicalPlan localPlan(String query) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/LocalSubstituteSurrogateExpressionTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/LocalSubstituteSurrogateExpressionTests.java new file mode 100644 index 0000000000000..74ec1b71cf824 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/LocalSubstituteSurrogateExpressionTests.java @@ -0,0 +1,114 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules.logical.local; + +import org.elasticsearch.xpack.esql.EsqlTestUtils; +import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.expression.function.scalar.math.RoundTo; +import org.elasticsearch.xpack.esql.optimizer.LocalLogicalPlanOptimizerTests; +import org.elasticsearch.xpack.esql.plan.logical.Aggregate; +import org.elasticsearch.xpack.esql.plan.logical.EsRelation; +import org.elasticsearch.xpack.esql.plan.logical.Eval; +import org.elasticsearch.xpack.esql.plan.logical.Limit; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.logical.Project; +import org.elasticsearch.xpack.esql.plan.logical.TopN; +import org.elasticsearch.xpack.esql.stats.SearchStats; + +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.esql.EsqlTestUtils.as; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; + +public class LocalSubstituteSurrogateExpressionTests extends LocalLogicalPlanOptimizerTests { + + public void testSubstituteDateTruncInEvalWithRoundTo() { + var plan = plan(""" + from test + | sort hire_date + | eval x = date_trunc(1 day, hire_date) + | keep emp_no, hire_date, x + | limit 5 + """); + + // create a SearchStats with min and max millis + Map minValue = Map.of("hire_date", 1697804103360L); // 2023-10-20T12:15:03.360Z + Map maxValue = Map.of("hire_date", 1698069301543L); // 2023-10-23T13:55:01.543Z + SearchStats searchStats = new EsqlTestUtils.TestSearchStatsWithMinMax(minValue, maxValue); + + LogicalPlan localPlan = localPlan(plan, searchStats); + Project project = as(localPlan, Project.class); + TopN topN = as(project.child(), TopN.class); + Eval eval = as(topN.child(), Eval.class); + List fields = eval.fields(); + assertEquals(1, fields.size()); + Alias a = fields.get(0); + assertEquals("x", a.name()); + RoundTo roundTo = as(a.child(), RoundTo.class); + FieldAttribute fa = as(roundTo.field(), FieldAttribute.class); + assertEquals("hire_date", fa.name()); + assertEquals(DATETIME, fa.dataType()); + assertEquals(4, roundTo.points().size()); // 4 days + EsRelation relation = as(eval.child(), EsRelation.class); + } + + public void testSubstituteDateTruncInAggWithRoundTo() { + var plan = plan(""" + from test + | stats count(*) by x = date_trunc(1 day, hire_date) + """); + + // create a SearchStats with min and max millis + Map minValue = Map.of("hire_date", 1697804103360L); // 2023-10-20T12:15:03.360Z + Map maxValue = Map.of("hire_date", 1698069301543L); // 2023-10-23T13:55:01.543Z + SearchStats searchStats = new EsqlTestUtils.TestSearchStatsWithMinMax(minValue, maxValue); + + LogicalPlan localPlan = localPlan(plan, searchStats); + Limit limit = as(localPlan, Limit.class); + Aggregate aggregate = as(limit.child(), Aggregate.class); + Eval eval = as(aggregate.child(), Eval.class); + List fields = eval.fields(); + assertEquals(1, fields.size()); + Alias a = fields.get(0); + assertEquals("x", a.name()); + RoundTo roundTo = as(a.child(), RoundTo.class); + FieldAttribute fa = as(roundTo.field(), FieldAttribute.class); + assertEquals("hire_date", fa.name()); + assertEquals(DATETIME, fa.dataType()); + assertEquals(4, roundTo.points().size()); // 4 days + EsRelation relation = as(eval.child(), EsRelation.class); + } + + public void testSubstituteBucketInAggWithRoundTo() { + var plan = plan(""" + from test + | stats count(*) by x = bucket(hire_date, 1 day) + """); + // create a SearchStats with min and max millis + Map minValue = Map.of("hire_date", 1697804103360L); // 2023-10-20T12:15:03.360Z + Map maxValue = Map.of("hire_date", 1698069301543L); // 2023-10-23T13:55:01.543Z + SearchStats searchStats = new EsqlTestUtils.TestSearchStatsWithMinMax(minValue, maxValue); + + LogicalPlan localPlan = localPlan(plan, searchStats); + Limit limit = as(localPlan, Limit.class); + Aggregate aggregate = as(limit.child(), Aggregate.class); + Eval eval = as(aggregate.child(), Eval.class); + List fields = eval.fields(); + assertEquals(1, fields.size()); + Alias a = fields.get(0); + assertEquals("x", a.name()); + RoundTo roundTo = as(a.child(), RoundTo.class); + FieldAttribute fa = as(roundTo.field(), FieldAttribute.class); + assertEquals("hire_date", fa.name()); + assertEquals(DATETIME, fa.dataType()); + assertEquals(4, roundTo.points().size()); // 4 days + EsRelation relation = as(eval.child(), EsRelation.class); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java index 6d8f5ca925121..308d21da05c6d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java @@ -9,7 +9,6 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute.FieldName; -import org.elasticsearch.xpack.esql.core.type.DataType; public class DisabledSearchStats implements SearchStats { @@ -49,12 +48,12 @@ public long count(FieldName field, BytesRef value) { } @Override - public byte[] min(FieldName field, DataType dataType) { + public Object min(FieldName field) { return null; } @Override - public byte[] max(FieldName field, DataType dataType) { + public Object max(FieldName field) { return null; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/SearchContextStatsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/SearchContextStatsTests.java new file mode 100644 index 0000000000000..2d8ecd26ad230 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/SearchContextStatsTests.java @@ -0,0 +1,170 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.stats; + +import org.apache.lucene.document.DoubleField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatField; +import org.apache.lucene.document.IntField; +import org.apache.lucene.document.LongField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperServiceTestCase; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateNanosToLong; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateTimeToLong; + +public class SearchContextStatsTests extends MapperServiceTestCase { + private final Directory directory = newDirectory(); + private SearchStats searchStats; + private List mapperServices; + private List readers; + private long minMillis, maxMillis, minNanos, maxNanos; + + @Before + public void setup() throws IOException { + int indexCount = randomIntBetween(1, 5); + List contexts = new ArrayList<>(indexCount); + mapperServices = new ArrayList<>(indexCount); + readers = new ArrayList<>(indexCount); + maxMillis = minMillis = dateTimeToLong("2025-01-01T00:00:01"); + maxNanos = minNanos = dateNanosToLong("2025-01-01T00:00:01"); + + MapperServiceTestCase mapperHelper = new MapperServiceTestCase() { + }; + // create one or more index, so that there is one or more SearchExecutionContext in SearchStats + for (int i = 0; i < indexCount; i++) { + // Start with millis/nanos, numeric and keyword types in the index mapping, more data types can be covered later if needed. + // SearchContextStats returns min/max for millis and nanos only currently, null is returned for the other types min and max. + MapperService mapperService; + if (i == 0) { + mapperService = mapperHelper.createMapperService(""" + { + "doc": { "properties": { + "byteField": { "type": "byte" }, + "shortField": { "type": "short" }, + "intField": { "type": "integer" }, + "longField": { "type": "long" }, + "floatField": { "type": "float" }, + "doubleField": { "type": "double" }, + "dateField": { "type": "date" }, + "dateNanosField": { "type": "date_nanos" }, + "keywordField": { "type": "keyword" }, + "maybeMixedField": { "type": "long" } + }} + }"""); + } else { + mapperService = mapperHelper.createMapperService(""" + { + "doc": { "properties": { + "byteField": { "type": "byte" }, + "shortField": { "type": "short" }, + "intField": { "type": "integer" }, + "longField": { "type": "long" }, + "floatField": { "type": "float" }, + "doubleField": { "type": "double" }, + "dateField": { "type": "date" }, + "dateNanosField": { "type": "date_nanos" }, + "maybeMixedField": { "type": "date" } + }} + }"""); + } + mapperServices.add(mapperService); + + int perIndexDocumentCount = randomIntBetween(1, 5); + IndexReader reader; + try (RandomIndexWriter writer = new RandomIndexWriter(random(), directory)) { + List byteValues = randomList(perIndexDocumentCount, perIndexDocumentCount, ESTestCase::randomByte); + List shortValues = randomList(perIndexDocumentCount, perIndexDocumentCount, ESTestCase::randomShort); + List intValues = randomList(perIndexDocumentCount, perIndexDocumentCount, ESTestCase::randomInt); + List longValues = randomList(perIndexDocumentCount, perIndexDocumentCount, ESTestCase::randomLong); + List floatValues = randomList(perIndexDocumentCount, perIndexDocumentCount, ESTestCase::randomFloat); + List doubleValues = randomList(perIndexDocumentCount, perIndexDocumentCount, ESTestCase::randomDouble); + List keywordValues = randomList(perIndexDocumentCount, perIndexDocumentCount, () -> randomAlphaOfLength(5)); + + for (int j = 0; j < perIndexDocumentCount; j++) { + long millis = minMillis + (j == 0 ? 0 : randomInt(1000)); + long nanos = minNanos + (j == 0 ? 0 : randomInt(1000)); + maxMillis = Math.max(millis, maxMillis); + maxNanos = Math.max(nanos, maxNanos); + minMillis = Math.min(millis, minMillis); + minNanos = Math.min(nanos, minNanos); + writer.addDocument( + List.of( + new IntField("byteField", byteValues.get(j), Field.Store.NO), + new IntField("shortField", shortValues.get(j), Field.Store.NO), + new IntField("intField", intValues.get(j), Field.Store.NO), + new LongField("longField", longValues.get(j), Field.Store.NO), + new FloatField("floatField", floatValues.get(j), Field.Store.NO), + new DoubleField("doubleField", doubleValues.get(j), Field.Store.NO), + new LongField("dateField", millis, Field.Store.NO), + new LongField("dateNanosField", nanos, Field.Store.NO), + new StringField("keywordField", keywordValues.get(j), Field.Store.NO), + new LongField("maybeMixedField", millis, Field.Store.NO) + ) + ); + } + reader = writer.getReader(); + readers.add(reader); + } + // create SearchExecutionContext for each index + SearchExecutionContext context = mapperHelper.createSearchExecutionContext(mapperService, newSearcher(reader)); + contexts.add(context); + } + // create SearchContextStats + searchStats = SearchContextStats.from(contexts); + } + + public void testMinMax() { + List fields = List.of( + "byteField", + "shortField", + "intField", + "longField", + "floatField", + "doubleField", + "dateField", + "dateNanosField", + "keywordField" + ); + for (String field : fields) { + Object min = searchStats.min(new FieldAttribute.FieldName(field)); + Object max = searchStats.max(new FieldAttribute.FieldName(field)); + if (field.startsWith("date") == false) { + assertNull(min); + assertNull(max); + } else if (field.equals("dateField")) { + assertEquals(minMillis, min); + assertEquals(maxMillis, max); + } else if (field.equals("dateNanosField")) { + assertEquals(minNanos, min); + assertEquals(maxNanos, max); + } + } + } + + @After + public void cleanup() throws IOException { + IOUtils.close(readers); + IOUtils.close(mapperServices); + IOUtils.close(directory); + } +}