diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorFactory.java index 347834f5fb825..49e0064c6e94a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorFactory.java @@ -29,7 +29,6 @@ import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.AggregatorSupplier; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; -import org.elasticsearch.search.aggregations.support.HistogramAggregatorSupplier; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/HistogramAggregatorSupplier.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorSupplier.java similarity index 89% rename from server/src/main/java/org/elasticsearch/search/aggregations/support/HistogramAggregatorSupplier.java rename to server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorSupplier.java index 021577852c39e..1945a5bb1d4ef 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/HistogramAggregatorSupplier.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorSupplier.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.search.aggregations.support; +package org.elasticsearch.search.aggregations.bucket.histogram; import org.elasticsearch.common.Nullable; import org.elasticsearch.search.DocValueFormat; @@ -24,6 +24,8 @@ import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.AggregatorSupplier; +import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 4cf73196f23e6..a94be4004893b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -62,7 +62,6 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { private final TermsAggregator.BucketCountThresholds bucketCountThresholds; private final boolean showTermDocCountError; - // TODO: Registration should happen on the actual aggregator classes static void registerAggregators(ValuesSourceRegistry valuesSourceRegistry) { valuesSourceRegistry.register(TermsAggregationBuilder.NAME, List.of(CoreValuesSourceType.BYTES, CoreValuesSourceType.IP), diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/AggregatorSupplier.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/AggregatorSupplier.java index 97ca793faedb3..b646bc6b9e1d9 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/AggregatorSupplier.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/AggregatorSupplier.java @@ -18,5 +18,24 @@ */ package org.elasticsearch.search.aggregations.support; +/** + * {@link AggregatorSupplier} serves as a marker for what the {@link ValuesSourceRegistry} holds to construct aggregator instances. + * The aggregators for each aggregation should all share a signature, and that signature should be used to create an AggregatorSupplier for + * that aggregation. Alternatively, if an existing supplier has a matching signature, please re-use that. + * + * In many cases, this can be a simple wrapper over the aggregator constructor. If that is sufficient, please consider the "typecast + * lambda" syntax: + * + * {@code + * (GeoCentroidAggregatorSupplier) (name, context, parent, valuesSource, pipelineAggregators, metaData) -> + * new GeoCentroidAggregator(name, context, parent, (ValuesSource.GeoPoint) valuesSource, pipelineAggregators, metaData)); + * } + * + * The suppliers are responsible for any casting of {@link ValuesSource} that needs to happen. They must accept a base {@link ValuesSource} + * instance. The suppliers may perform additional logic to configure the aggregator as needed, such as in + * {@link org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory} deciding the execution mode. + * + * There is ongoing work to normalize aggregator constructor signatures, and thus reduce the number of AggregatorSupplier interfaces. + */ public interface AggregatorSupplier { } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java index e6064cbd1f35d..c41f65cb8b1e5 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java @@ -57,7 +57,6 @@ public ValuesSource getScript(AggregationScript.LeafFactory script, ValueType sc public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFactory script) { if ((fieldContext.indexFieldData() instanceof IndexNumericFieldData) == false) { - // TODO: Is this the correct exception type here? throw new IllegalArgumentException("Expected numeric type on field [" + fieldContext.field() + "], but got [" + fieldContext.fieldType().typeName() + "]"); } @@ -71,8 +70,9 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa } @Override - public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) { - Number missing = docValueFormat.parseDouble(rawMissing.toString(), false, now); + public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, + LongSupplier nowSupplier) { + Number missing = docValueFormat.parseDouble(rawMissing.toString(), false, nowSupplier); return MissingValues.replaceMissing((ValuesSource.Numeric) valuesSource, missing); } }, @@ -104,7 +104,8 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa } @Override - public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) { + public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, + LongSupplier nowSupplier) { final BytesRef missing = docValueFormat.parseBytesRef(rawMissing.toString()); if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals) { return MissingValues.replaceMissing((ValuesSource.Bytes.WithOrdinals) valuesSource, missing); @@ -127,7 +128,6 @@ public ValuesSource getScript(AggregationScript.LeafFactory script, ValueType sc @Override public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFactory script) { if (!(fieldContext.indexFieldData() instanceof IndexGeoPointFieldData)) { - // TODO: Is this the correct exception type here? throw new IllegalArgumentException("Expected geo_point type on field [" + fieldContext.field() + "], but got [" + fieldContext.fieldType().typeName() + "]"); } @@ -136,7 +136,8 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa } @Override - public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) { + public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, + LongSupplier nowSupplier) { // TODO: also support the structured formats of geo points final GeoPoint missing = new GeoPoint(rawMissing.toString()); return MissingValues.replaceMissing((ValuesSource.GeoPoint) valuesSource, missing); @@ -150,7 +151,6 @@ public DocValueFormat getFormatter(String format, ZoneId tz) { RANGE() { @Override public ValuesSource getEmpty() { - // TODO: Is this the correct exception type here? throw new IllegalArgumentException("Can't deal with unmapped ValuesSource type " + this.value()); } @@ -164,15 +164,15 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa MappedFieldType fieldType = fieldContext.fieldType(); if (fieldType instanceof RangeFieldMapper.RangeFieldType == false) { - // TODO: Is this the correct exception type here? - throw new IllegalStateException("Asked for range ValuesSource, but field is of type " + fieldType.name()); + throw new IllegalArgumentException("Asked for range ValuesSource, but field is of type " + fieldType.name()); } RangeFieldMapper.RangeFieldType rangeFieldType = (RangeFieldMapper.RangeFieldType) fieldType; return new ValuesSource.Range(fieldContext.indexFieldData(), rangeFieldType.rangeType()); } @Override - public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) { + public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, + LongSupplier nowSupplier) { throw new IllegalArgumentException("Can't apply missing values on a " + valuesSource.getClass()); } }, @@ -193,8 +193,9 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa } @Override - public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) { - return BYTES.replaceMissing(valuesSource, rawMissing, docValueFormat, now); + public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, + LongSupplier nowSupplier) { + return BYTES.replaceMissing(valuesSource, rawMissing, docValueFormat, nowSupplier); } @Override @@ -219,8 +220,9 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa } @Override - public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) { - return NUMERIC.replaceMissing(valuesSource, rawMissing, docValueFormat, now); + public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, + LongSupplier nowSupplier) { + return NUMERIC.replaceMissing(valuesSource, rawMissing, docValueFormat, nowSupplier); } @Override @@ -250,8 +252,9 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa } @Override - public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) { - return NUMERIC.replaceMissing(valuesSource, rawMissing, docValueFormat, now); + public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, + LongSupplier nowSupplier) { + return NUMERIC.replaceMissing(valuesSource, rawMissing, docValueFormat, nowSupplier); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValueType.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValueType.java index 068790030bd49..3df1146533238 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValueType.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValueType.java @@ -30,6 +30,7 @@ import java.time.ZoneOffset; import java.util.Set; +@Deprecated public enum ValueType implements Writeable { STRING((byte) 1, "string", "string", CoreValuesSourceType.BYTES, @@ -42,7 +43,6 @@ public enum ValueType implements Writeable { new DocValueFormat.DateTime(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, ZoneOffset.UTC, DateFieldMapper.Resolution.MILLISECONDS)), IP((byte) 6, "ip", "ip", CoreValuesSourceType.IP, DocValueFormat.IP), - // TODO: what is the difference between "number" and "numeric"? NUMERIC((byte) 7, "numeric", "numeric", CoreValuesSourceType.NUMERIC, DocValueFormat.RAW), GEOPOINT((byte) 8, "geo_point", "geo_point", CoreValuesSourceType.GEOPOINT, DocValueFormat.GEOHASH), BOOLEAN((byte) 9, "boolean", "boolean", CoreValuesSourceType.BOOLEAN, DocValueFormat.BOOLEAN), diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java index 4bd3acfeeb08f..1b1552bf0336e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java @@ -52,6 +52,14 @@ import java.io.IOException; import java.util.function.LongUnaryOperator; +/** + * Note on subclassing ValuesSources: Generally, direct subclasses of ValuesSource should also be abstract, representing types. These + * subclasses are free to add new methods specific to that type (e.g. {@link Numeric#isFloatingPoint()}). Subclasses of these should, in + * turn, be concrete and implement specific ways of reading the given values (e.g. script and field based sources). It is also possible + * to see sub-sub-classes of ValuesSource that act as wrappers on other concrete values sources to add functionality, such as the + * anonymous subclasses returned from {@link MissingValues} or the GeoPoint to Numeric conversion logic in + * {@link org.elasticsearch.search.aggregations.bucket.geogrid.CellIdSource} + */ public abstract class ValuesSource { /** diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregationBuilder.java index f7bdecfa3e448..a7c423d658683 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregationBuilder.java @@ -334,6 +334,15 @@ protected final ValuesSourceAggregatorFactory doBuild(QueryShardContext querySha */ protected abstract ValuesSourceType defaultValueSourceType(); + /** + * Aggregations should override this if they need non-standard logic for resolving where to get values from. For example, join + * aggregations (like Parent and Child) ask the user to specify one side of the join and then look up the other field to read values + * from. + * + * The default implementation just uses the field and/or script the user provided. + * + * @return A {@link ValuesSourceConfig} configured based on the parsed field and/or script. + */ protected ValuesSourceConfig resolveConfig(QueryShardContext queryShardContext) { return ValuesSourceConfig.resolve(queryShardContext, this.userValueTypeHint, field, script, missing, timeZone, format, this.defaultValueSourceType(), this.getType()); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceType.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceType.java index 040ede186c7ab..ab995f9c53b5d 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceType.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceType.java @@ -26,10 +26,23 @@ import java.util.function.LongSupplier; /** - * ValuesSourceType wraps the creation of specific per-source instances each {@link ValuesSource} needs to provide. Every top-level - * subclass of {@link ValuesSource} should have a corresponding implementation of this interface. In general, new data types seeking - * aggregation support should create a top level {@link ValuesSource}, then implement this to return wrappers for the specific sources of - * values. + * {@link ValuesSourceType} represents a collection of fields that share a common set of operations, for example all numeric fields. + * Aggregations declare their support for a given ValuesSourceType (via {@link ValuesSourceRegistry#register}), and should then not need + * to care about the fields which use that ValuesSourceType. + * + * ValuesSourceTypes provide a set of methods to instantiate concrete {@link ValuesSource} instances, based on the actual source of the + * data for the aggregations. In general, aggregations should not call these methods, but rather rely on {@link ValuesSourceConfig} to have + * selected the correct implementation. + * + * ValuesSourceTypes should be stateless. We recommend that plugins define an enum for their ValuesSourceTypes, even if the plugin only + * intends to define one ValuesSourceType. ValuesSourceTypes are not serialized as part of the aggregations framework. + * + * Prefer reusing an existing ValuesSourceType (ideally from {@link CoreValuesSourceType}) over creating a new type. There are some cases + * where creating a new type is necessary however. In particular, consider a new ValuesSourceType if the field has custom encoding/decoding + * requirements; if the field needs to expose additional information to the aggregation (e.g. {@link ValuesSource.Range#rangeType()}); or + * if logically the type needs a more restricted use (e.g. even though dates are stored as numbers, it doesn't make sense to pass them to + * a sum aggregation). When adding a new ValuesSourceType, new aggregators should be added and registered at the same time, to add support + * for the new type to existing aggregations, as appropriate. */ public interface ValuesSourceType { /** @@ -66,11 +79,11 @@ public interface ValuesSourceType { * @param valuesSource - The original {@link ValuesSource} * @param rawMissing - The missing value we got from the parser, typically a string or number * @param docValueFormat - The format to use for further parsing the user supplied value, e.g. a date format - * @param now - Used in conjunction with the formatter, should return the current time in milliseconds + * @param nowSupplier - Used in conjunction with the formatter, should return the current time in milliseconds * @return - Wrapper over the provided {@link ValuesSource} to apply the given missing value */ ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, - LongSupplier now); + LongSupplier nowSupplier); /** * This method provides a hook for specifying a type-specific formatter. When {@link ValuesSourceConfig} can resolve a diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/package-info.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/package-info.java new file mode 100644 index 0000000000000..da5469e873133 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/package-info.java @@ -0,0 +1,77 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.support; + +/** + *
+ * This package holds shared code for the aggregations framework, especially around dealing with values. + *
+ * + *+ * These are thin wrappers which provide a unified interface to different ways of getting input data (e.g. DocValues from Lucene, or script + * output). A class hierarchy defines the type of values returned by the source. The top level sub-classes define type-specific behavior, + * such as {@link org.elasticsearch.search.aggregations.support.ValuesSource.Numeric#isFloatingPoint()}. Second level subclasses are + * then specialized based on where they read values from, e.g. script or field cases. There are also adapter classes like + * {@link org.elasticsearch.search.aggregations.bucket.geogrid.CellIdSource} which do run-time conversion from one type to another, often + * dependent on a user specified parameter (precision in that case). + *
+ * + *+ * ValuesSourceRegistry stores the mappings for what types are supported by what aggregations. It is configured at startup, when + * {@link org.elasticsearch.search.SearchModule} is configuring aggregations. It shouldn't be necessary to access the registry in most + * cases, but you can get a read copy from {@link org.elasticsearch.index.query.QueryShardContext#getValuesSourceRegistry()} if necessary. + *
+ * + *+ * ValuesSourceTypes are the quantum of support in the aggregations framework, and provide a common language between fields and + * aggregations. Fields which support aggregation override {@link org.elasticsearch.index.mapper.MappedFieldType#getValuesSourceType()} to + * return a compatible VaulesSourceType (based on how the field is stored), and aggregations register what types they support via one of the + * {@link org.elasticsearch.search.aggregations.support.ValuesSourceRegistry#register} methods. The VaulesSourceType itself holds + * information on how to with values of that type, including methods for creating + * {@link org.elasticsearch.search.aggregations.support.ValuesSource} instances and {@link org.elasticsearch.search.DocValueFormat} + * instances. + *
+ * + *+ * There are two things going on in ValuesSourceConfig. First, there is a collection of static factory methods to build valid configs for + * different situations. {@link org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder#resolveConfig} has a good + * default for what to call here and generally aggregations shouldn't need to deviate from that. + *
+ * + *+ * Once properly constructed, the ValuesSourceConfig provides access to the ValuesSource instance, as well as related information like the + * formatter. Aggregations are free to use this information as needed, such as Max and Min which inspect the field context to see if they + * can apply an optimization. + *
+ * + *+ * This class is primarily used for parsing user type hints, and is deprecated for new use. Work is ongoing to remove it from existing + * code. + *
+ * + */