Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.support.AggregatorSupplier;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.HistogramAggregatorSupplier;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,16 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.support;
package org.elasticsearch.search.aggregations.bucket.histogram;

import org.elasticsearch.common.Nullable;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.support.AggregatorSupplier;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
private final boolean showTermDocCountError;

// TODO: Registration should happen on the actual aggregator classes
static void registerAggregators(ValuesSourceRegistry valuesSourceRegistry) {
valuesSourceRegistry.register(TermsAggregationBuilder.NAME,
List.of(CoreValuesSourceType.BYTES, CoreValuesSourceType.IP),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,25 @@
*/
package org.elasticsearch.search.aggregations.support;

/**
* {@link AggregatorSupplier} is a place holder interface. The {@link ValuesSourceRegistry} uses this as a common interface for tools to
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not really sure what you mean by place holder. Like, you mean that we'll replace it with another one later? Does this interface just exist to mark things that the registry can handle?

* construct aggregator instances. The aggregators for each aggregation should all share a signature, and that signature should be
* used to create an AggregatorSupplier for that aggregation. Alternatively, if an existing supplier has a matching signature, please
* re-use that.
*
* In many cases, this can be a simple wrapper over the aggregator constructor. If that is sufficient, please consider the "typecast
* lambda" syntax:
*
* {@code
* (GeoCentroidAggregatorSupplier) (name, context, parent, valuesSource, pipelineAggregators, metaData) ->
* new GeoCentroidAggregator(name, context, parent, (ValuesSource.GeoPoint) valuesSource, pipelineAggregators, metaData));
* }
*
* The suppliers are responsible for any casting of {@link ValuesSource} that needs to happen. They must accept a base {@link ValuesSource}
* instance. The suppliers may perform additional logic to configure the aggregator as needed, such as in
* {@link org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory} deciding the execution mode.
*
* There is ongoing work to normalize aggregator constructor signatures, and thus reduce the number of AggregatorSupplier interfaces.
*/
public interface AggregatorSupplier {
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ public ValuesSource getScript(AggregationScript.LeafFactory script, ValueType sc
public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFactory script) {

if ((fieldContext.indexFieldData() instanceof IndexNumericFieldData) == false) {
// TODO: Is this the correct exception type here?
throw new IllegalArgumentException("Expected numeric type on field [" + fieldContext.field() +
"], but got [" + fieldContext.fieldType().typeName() + "]");
}
Expand All @@ -71,8 +70,9 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa
}

@Override
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) {
Number missing = docValueFormat.parseDouble(rawMissing.toString(), false, now);
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat,
LongSupplier nowSupplier) {
Number missing = docValueFormat.parseDouble(rawMissing.toString(), false, nowSupplier);
return MissingValues.replaceMissing((ValuesSource.Numeric) valuesSource, missing);
}
},
Expand Down Expand Up @@ -104,7 +104,8 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa
}

@Override
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) {
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat,
LongSupplier nowSupplier) {
final BytesRef missing = docValueFormat.parseBytesRef(rawMissing.toString());
if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals) {
return MissingValues.replaceMissing((ValuesSource.Bytes.WithOrdinals) valuesSource, missing);
Expand All @@ -127,7 +128,6 @@ public ValuesSource getScript(AggregationScript.LeafFactory script, ValueType sc
@Override
public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFactory script) {
if (!(fieldContext.indexFieldData() instanceof IndexGeoPointFieldData)) {
// TODO: Is this the correct exception type here?
throw new IllegalArgumentException("Expected geo_point type on field [" + fieldContext.field() +
"], but got [" + fieldContext.fieldType().typeName() + "]");
}
Expand All @@ -136,7 +136,8 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa
}

@Override
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) {
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat,
LongSupplier nowSupplier) {
// TODO: also support the structured formats of geo points
final GeoPoint missing = new GeoPoint(rawMissing.toString());
return MissingValues.replaceMissing((ValuesSource.GeoPoint) valuesSource, missing);
Expand All @@ -150,7 +151,6 @@ public DocValueFormat getFormatter(String format, ZoneId tz) {
RANGE(EquivalenceType.RANGE) {
@Override
public ValuesSource getEmpty() {
// TODO: Is this the correct exception type here?
throw new IllegalArgumentException("Can't deal with unmapped ValuesSource type " + this.value());
}

Expand All @@ -164,19 +164,18 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa
MappedFieldType fieldType = fieldContext.fieldType();

if (fieldType instanceof RangeFieldMapper.RangeFieldType == false) {
// TODO: Is this the correct exception type here?
throw new IllegalStateException("Asked for range ValuesSource, but field is of type " + fieldType.name());
throw new IllegalArgumentException("Asked for range ValuesSource, but field is of type " + fieldType.name());
}
RangeFieldMapper.RangeFieldType rangeFieldType = (RangeFieldMapper.RangeFieldType) fieldType;
return new ValuesSource.Range(fieldContext.indexFieldData(), rangeFieldType.rangeType());
}

@Override
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) {
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat,
LongSupplier nowSupplier) {
throw new IllegalArgumentException("Can't apply missing values on a " + valuesSource.getClass());
}
},
// TODO: Ordinal Numbering sync with types from master
IP(EquivalenceType.STRING) {
@Override
public ValuesSource getEmpty() {
Expand All @@ -194,8 +193,9 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa
}

@Override
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) {
return BYTES.replaceMissing(valuesSource, rawMissing, docValueFormat, now);
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat,
LongSupplier nowSupplier) {
return BYTES.replaceMissing(valuesSource, rawMissing, docValueFormat, nowSupplier);
}

@Override
Expand All @@ -220,8 +220,9 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa
}

@Override
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) {
return NUMERIC.replaceMissing(valuesSource, rawMissing, docValueFormat, now);
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat,
LongSupplier nowSupplier) {
return NUMERIC.replaceMissing(valuesSource, rawMissing, docValueFormat, nowSupplier);
}

@Override
Expand Down Expand Up @@ -251,8 +252,9 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa
}

@Override
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) {
return NUMERIC.replaceMissing(valuesSource, rawMissing, docValueFormat, now);
public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat,
LongSupplier nowSupplier) {
return NUMERIC.replaceMissing(valuesSource, rawMissing, docValueFormat, nowSupplier);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.io.IOException;
import java.time.ZoneOffset;

@Deprecated
public enum ValueType implements Writeable {

STRING((byte) 1, "string", "string", CoreValuesSourceType.BYTES,
Expand All @@ -41,7 +42,6 @@ public enum ValueType implements Writeable {
new DocValueFormat.DateTime(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, ZoneOffset.UTC,
DateFieldMapper.Resolution.MILLISECONDS)),
IP((byte) 6, "ip", "ip", CoreValuesSourceType.IP, DocValueFormat.IP),
// TODO: what is the difference between "number" and "numeric"?
NUMERIC((byte) 7, "numeric", "numeric", CoreValuesSourceType.NUMERIC, DocValueFormat.RAW),
GEOPOINT((byte) 8, "geo_point", "geo_point", CoreValuesSourceType.GEOPOINT, DocValueFormat.GEOHASH),
BOOLEAN((byte) 9, "boolean", "boolean", CoreValuesSourceType.BOOLEAN, DocValueFormat.BOOLEAN),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,15 @@ protected final ValuesSourceAggregatorFactory doBuild(QueryShardContext querySha
*/
protected abstract ValuesSourceType defaultValueSourceType();

/**
* Aggregations should override this if they need non-standard logic for resolving where to get values from. For example, join
* aggregations (like Parent and Child) ask the user to specify one side of the join and then look up the other field to read values
* from.
*
* The default implementation just uses the field and/or script the user provided.
*
* @return A {@link ValuesSourceConfig} configured based on the parsed field and/or script.
*/
protected ValuesSourceConfig resolveConfig(QueryShardContext queryShardContext) {
return ValuesSourceConfig.resolve(queryShardContext,
this.userValueTypeHint, field, script, missing, timeZone, format, this.defaultValueSourceType(), this.getType());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,16 @@
import java.util.function.LongSupplier;

/**
* ValuesSourceType wraps the creation of specific per-source instances each {@link ValuesSource} needs to provide. Every top-level
* subclass of {@link ValuesSource} should have a corresponding implementation of this interface. In general, new data types seeking
* aggregation support should create a top level {@link ValuesSource}, then implement this to return wrappers for the specific sources of
* values.
* {@link ValuesSourceType}s are the quantum unit of aggregations support. {@link org.elasticsearch.index.mapper.MappedFieldType}s that
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe something like `{@link ValuesSourceType}s represent data types on which aggregations can operate. On data nodes they are {@link resolved} to {@link ValueSource} instances that blah blah blah"

I'm not sure it is useful to explain too much about subclasses of ValuesSource here. I think I'd prefer to read that on ValuesSource.

* allow aggregations map to exactly one ValuesSourceType, although multiple field types can map to the same ValuesSourceType. Aggregations
* in turn provide a set of ValuesSourceTypes they can operate on. ValuesSourceTypes in turn map to a single direct sub-class of
* {@link ValuesSource} (e.g. {@link ValuesSource.Numeric}; note that a given ValuesSourceType can yield different sub-sub-classes, e.g.
* {@link ValuesSource.Numeric.WithScript}, depending on the configuration). Note that it's possible that two different ValuesSourceTypes
* will yield the same ValuesSource subclass. This typically happens when the underlying representation is shared, but logically the data
* are different, such as with numbers and dates.
*
* ValuesSourceTypes should be stateless, and thus immutable. We recommend that plugins define an enum for their ValuesSourceTypes, even
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 on calling out stateless. Though I don't think you need to call out that that implies they are immutable.

* if the plugin only intends to define one ValuesSourceType. ValuesSourceTypes are not serialized as part of the aggregations framework.
*/
public interface ValuesSourceType {
/**
Expand Down Expand Up @@ -66,11 +72,11 @@ public interface ValuesSourceType {
* @param valuesSource - The original {@link ValuesSource}
* @param rawMissing - The missing value we got from the parser, typically a string or number
* @param docValueFormat - The format to use for further parsing the user supplied value, e.g. a date format
* @param now - Used in conjunction with the formatter, should return the current time in milliseconds
* @param nowSupplier - Used in conjunction with the formatter, should return the current time in milliseconds
* @return - Wrapper over the provided {@link ValuesSource} to apply the given missing value
*/
ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat,
LongSupplier now);
LongSupplier nowSupplier);

/**
* This method provides a hook for specifying a type-specific formatter. When {@link ValuesSourceConfig} can resolve a
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/**
* The Aggregations Support package holds shared code for the aggregations framework, especially around dealing with values.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/The Aggregations Support package// ? The topic of the sentence is sort of implied by its position as package javadoc.

*
* Key Classes
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You probably should add html to this, just so it renders in a sane way when I mouseover the package and the like.

*
* {@link org.elasticsearch.search.aggregations.support.ValuesSource} and its subclasses
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is in the package do you need to fully qualify it? In general I'm fine importing things only for Javadoc if it makes the javadoc easier to read.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAICT, package-info.java always requires fully qualified names in links, no idea why.

* These are thin wrappers which provide a unified interface to different ways of getting input data (e.g. DocValues from Lucene, or script
* output). A class hierarchy defines the type of values returned by the source. The top level sub-classes define type-specific behavior,
* such as {@link org.elasticsearch.search.aggregations.support.ValuesSource.Numeric#isFloatingPoint()}. Second level subclasses are
* then specialized based on where they read values from, e.g. script or field cases. There are also adapter classes like
* {@link org.elasticsearch.search.aggregations.bucket.geogrid.CellIdSource} which do run-time conversion from one type to another, often
* dependent on a user specified parameter (precision in that case).
*
* {@link org.elasticsearch.search.aggregations.support.ValuesSourceRegistry}
* ValuesSourceRegistry stores the mappings for what types are supported by what aggregations. It is configured once during startup, when
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/once during startup/at startup/ ?

* {@link org.elasticsearch.search.SearchModule} is configuring aggregations. It shouldn't be necessary to access the registry in most
* cases, but you can get a read copy from {@link org.elasticsearch.index.query.QueryShardContext#getValuesSourceRegistry()} if necessary.
*
* {@link org.elasticsearch.search.aggregations.support.ValuesSourceType}
* ValuesSourceTypes are the quantum of support in the aggregations framework, and provide a common language between fields and
* aggregations. Fields which support aggregation override {@link org.elasticsearch.index.mapper.MappedFieldType#getValuesSourceType()} to
* return a compatible VaulesSourceType (based on how the field is stored), and aggregations register what types they support via one of the
* {@link org.elasticsearch.search.aggregations.support.ValuesSourceRegistry#register} methods. The VaulesSourceType itself holds
* information on how to with values of that type, including methods for creating
* {@link org.elasticsearch.search.aggregations.support.ValuesSource} instances and {@link org.elasticsearch.search.DocValueFormat}
* instances.
*
* {@link org.elasticsearch.search.aggregations.support.ValuesSourceConfig}
* There are two things going on in ValuesSourceConfig. First, there is a collection of static factory methods to build valid configs for
* different situations. {@link org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder#resolveConfig} has a good
* default for what to call here and generally aggregations shouldn't need to deviate from that.
*
* Once properly constructed, the ValuesSourceConfig provides access to the ValuesSource instance, as well as related information like the
* formatter. Aggregations are free to use this information as needed, such as Max and Min which inspect the field context to see if they
* can apply an optimization.
*
* Classes we are trying to phase out
* {@link org.elasticsearch.search.aggregations.support.ValueType}
* This class is primarily used for parsing user type hints, and is deprecated for new use. Work is ongoing to remove it from existing
* code.
*
*/
package org.elasticsearch.search.aggregations.support;