From d3bc413f0faa17238f9843146ae24f738f545133 Mon Sep 17 00:00:00 2001 From: Zachary Tong Date: Mon, 10 Feb 2020 16:27:43 -0500 Subject: [PATCH 1/4] Convert RareTerms to new VS registry --- .../elasticsearch/search/SearchModule.java | 3 +- .../terms/RareTermsAggregationBuilder.java | 5 + .../terms/RareTermsAggregatorFactory.java | 124 +++++++++++++----- .../terms/RareTermsAggregatorSupplier.java | 27 ++++ .../terms/RareTermsAggregatorTests.java | 2 +- 5 files changed, 128 insertions(+), 33 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorSupplier.java diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index cf49a8266afc3..6c7773f0f6704 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -395,7 +395,8 @@ private void registerAggregations(List plugins) { RareTermsAggregationBuilder::parse) .addResultReader(StringRareTerms.NAME, StringRareTerms::new) .addResultReader(UnmappedRareTerms.NAME, UnmappedRareTerms::new) - .addResultReader(LongRareTerms.NAME, LongRareTerms::new)); + .addResultReader(LongRareTerms.NAME, LongRareTerms::new) + .setAggregatorRegistrar(RareTermsAggregationBuilder::registerAggregators)); registerAggregation(new AggregationSpec(SignificantTermsAggregationBuilder.NAME, SignificantTermsAggregationBuilder::new, SignificantTermsAggregationBuilder::parse) .addResultReader(SignificantStringTerms.NAME, SignificantStringTerms::new) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregationBuilder.java index a578a569550bf..bc12343996172 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregationBuilder.java @@ -34,6 +34,7 @@ import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.aggregations.support.ValuesSourceParserHelper; +import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; import org.elasticsearch.search.aggregations.support.ValuesSourceType; import java.io.IOException; @@ -66,6 +67,10 @@ public static AggregationBuilder parse(String aggregationName, XContentParser pa return PARSER.parse(parser, new RareTermsAggregationBuilder(aggregationName), null); } + public static void registerAggregators(ValuesSourceRegistry valuesSourceRegistry) { + RareTermsAggregatorFactory.registerAggregators(valuesSourceRegistry); + } + private IncludeExclude includeExclude = null; private int maxDocCount = 1; private double precision = 0.001; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java index 4da3b1b3513ea..d4e5687031b40 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java @@ -30,9 +30,12 @@ import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.NonCollectingAggregator; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.AggregatorSupplier; +import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; +import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -44,6 +47,89 @@ public class RareTermsAggregatorFactory extends ValuesSourceAggregatorFactory { private final int maxDocCount; private final double precision; + // TODO: Registration should happen on the actual aggregator classes + static void registerAggregators(ValuesSourceRegistry valuesSourceRegistry) { + valuesSourceRegistry.register(RareTermsAggregationBuilder.NAME, + List.of(CoreValuesSourceType.BYTES, CoreValuesSourceType.IP), + RareTermsAggregatorFactory.bytesSupplier()); + + valuesSourceRegistry.register(RareTermsAggregationBuilder.NAME, + List.of(CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN, CoreValuesSourceType.NUMERIC), + RareTermsAggregatorFactory.numericSupplier()); + } + + /** + * This supplier is used for all the field types that should be aggregated as bytes/strings, + * including those that need global ordinals + */ + private static RareTermsAggregatorSupplier bytesSupplier() { + return new RareTermsAggregatorSupplier() { + @Override + public Aggregator build(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + DocValueFormat format, + int maxDocCount, + double precision, + IncludeExclude includeExclude, + SearchContext context, + Aggregator parent, + List pipelineAggregators, + Map metaData) throws IOException { + + ExecutionMode execution = ExecutionMode.MAP; //TODO global ords not implemented yet, only supports "map" + + if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) { + throw new AggregationExecutionException("Aggregation [" + name + "] cannot support " + + "regular expression style include/exclude settings as they can only be applied to string fields. " + + "Use an array of values for include/exclude clauses"); + } + + return execution.create(name, factories, valuesSource, format, + includeExclude, context, parent, pipelineAggregators, metaData, maxDocCount, precision); + + } + }; + } + + /** + * This supplier is used for all fields that expect to be aggregated as a numeric value. + * This includes floating points, and formatted types that use numerics internally for storage (date, boolean, etc) + */ + private static RareTermsAggregatorSupplier numericSupplier() { + return new RareTermsAggregatorSupplier() { + @Override + public Aggregator build(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + DocValueFormat format, + int maxDocCount, + double precision, + IncludeExclude includeExclude, + SearchContext context, + Aggregator parent, + List pipelineAggregators, + Map metaData) throws IOException { + + if ((includeExclude != null) && (includeExclude.isRegexBased())) { + throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression " + + "style include/exclude settings as they can only be applied to string fields. Use an array of numeric " + + "values for include/exclude clauses used to filter numeric fields"); + } + + IncludeExclude.LongFilter longFilter = null; + if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { + throw new AggregationExecutionException("RareTerms aggregation does not support floating point fields."); + } + if (includeExclude != null) { + longFilter = includeExclude.convertToLongFilter(format); + } + return new LongRareTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, format, + context, parent, longFilter, maxDocCount, precision, pipelineAggregators, metaData); + } + }; + } + RareTermsAggregatorFactory(String name, ValuesSourceConfig config, IncludeExclude includeExclude, QueryShardContext queryShardContext, @@ -79,40 +165,16 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, if (collectsFromSingleBucket == false) { return asMultiBucketAggregator(this, searchContext, parent); } - if (valuesSource instanceof ValuesSource.Bytes) { - ExecutionMode execution = ExecutionMode.MAP; //TODO global ords not implemented yet, only supports "map" - - DocValueFormat format = config.format(); - if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) { - throw new AggregationExecutionException("Aggregation [" + name + "] cannot support " + - "regular expression style include/exclude settings as they can only be applied to string fields. " + - "Use an array of values for include/exclude clauses"); - } - - return execution.create(name, factories, valuesSource, format, - includeExclude, searchContext, parent, pipelineAggregators, metaData, maxDocCount, precision); - } - - if ((includeExclude != null) && (includeExclude.isRegexBased())) { - throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " - + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses " + - "used to filter numeric fields"); - } - if (valuesSource instanceof ValuesSource.Numeric) { - IncludeExclude.LongFilter longFilter = null; - if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { - throw new AggregationExecutionException("RareTerms aggregation does not support floating point fields."); - } - if (includeExclude != null) { - longFilter = includeExclude.convertToLongFilter(config.format()); - } - return new LongRareTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), - searchContext, parent, longFilter, maxDocCount, precision, pipelineAggregators, metaData); + AggregatorSupplier aggregatorSupplier = queryShardContext.getValuesSourceRegistry().getAggregator(config.valueSourceType(), + RareTermsAggregationBuilder.NAME); + if (aggregatorSupplier instanceof RareTermsAggregatorSupplier == false) { + throw new AggregationExecutionException("Registry miss-match - expected RareTermsAggregatorSupplier, found [" + + aggregatorSupplier.getClass().toString() + "]"); } - throw new AggregationExecutionException("RareTerms aggregation cannot be applied to field [" + config.fieldContext().field() - + "]. It can only be applied to numeric or string fields."); + return ((RareTermsAggregatorSupplier) aggregatorSupplier).build(name, factories, valuesSource, config.format(), + maxDocCount, precision, includeExclude, searchContext, parent, pipelineAggregators, metaData); } public enum ExecutionMode { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorSupplier.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorSupplier.java new file mode 100644 index 0000000000000..b4fb0c62d0df1 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorSupplier.java @@ -0,0 +1,27 @@ +package org.elasticsearch.search.aggregations.bucket.terms; + +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.AggregatorFactories; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.AggregatorSupplier; +import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +interface RareTermsAggregatorSupplier extends AggregatorSupplier { + Aggregator build(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + DocValueFormat format, + int maxDocCount, + double precision, + IncludeExclude includeExclude, + SearchContext context, + Aggregator parent, + List pipelineAggregators, + Map metaData) throws IOException; +} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java index 9718f2cace660..f130dae96503a 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java @@ -311,7 +311,7 @@ public void testRangeField() throws Exception { IndexSearcher indexSearcher = newIndexSearcher(indexReader); RareTermsAggregationBuilder aggregationBuilder = new RareTermsAggregationBuilder("_name") .field("field"); - expectThrows(AggregationExecutionException.class, + expectThrows(IllegalArgumentException.class, () -> createAggregator(aggregationBuilder, indexSearcher, fieldType)); } } From 00c78aa306602274db00fa3c2020007aa1de092e Mon Sep 17 00:00:00 2001 From: Zachary Tong Date: Wed, 12 Feb 2020 14:14:43 -0500 Subject: [PATCH 2/4] Address review comments --- .../main/java/org/elasticsearch/search/SearchModule.java | 2 +- .../bucket/terms/RareTermsAggregatorFactory.java | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index 6c7773f0f6704..21898279355c6 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -396,7 +396,7 @@ private void registerAggregations(List plugins) { .addResultReader(StringRareTerms.NAME, StringRareTerms::new) .addResultReader(UnmappedRareTerms.NAME, UnmappedRareTerms::new) .addResultReader(LongRareTerms.NAME, LongRareTerms::new) - .setAggregatorRegistrar(RareTermsAggregationBuilder::registerAggregators)); + .setAggregatorRegistrar(RareTermsAggregationBuilder::registerAggregators)); registerAggregation(new AggregationSpec(SignificantTermsAggregationBuilder.NAME, SignificantTermsAggregationBuilder::new, SignificantTermsAggregationBuilder::parse) .addResultReader(SignificantStringTerms.NAME, SignificantStringTerms::new) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java index d4e5687031b40..c959baad7bcd4 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java @@ -47,7 +47,6 @@ public class RareTermsAggregatorFactory extends ValuesSourceAggregatorFactory { private final int maxDocCount; private final double precision; - // TODO: Registration should happen on the actual aggregator classes static void registerAggregators(ValuesSourceRegistry valuesSourceRegistry) { valuesSourceRegistry.register(RareTermsAggregationBuilder.NAME, List.of(CoreValuesSourceType.BYTES, CoreValuesSourceType.IP), @@ -80,7 +79,7 @@ public Aggregator build(String name, ExecutionMode execution = ExecutionMode.MAP; //TODO global ords not implemented yet, only supports "map" if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) { - throw new AggregationExecutionException("Aggregation [" + name + "] cannot support " + + throw new IllegalArgumentException("Aggregation [" + name + "] cannot support " + "regular expression style include/exclude settings as they can only be applied to string fields. " + "Use an array of values for include/exclude clauses"); } @@ -112,14 +111,14 @@ public Aggregator build(String name, Map metaData) throws IOException { if ((includeExclude != null) && (includeExclude.isRegexBased())) { - throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression " + + throw new IllegalArgumentException("Aggregation [" + name + "] cannot support regular expression " + "style include/exclude settings as they can only be applied to string fields. Use an array of numeric " + "values for include/exclude clauses used to filter numeric fields"); } IncludeExclude.LongFilter longFilter = null; if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { - throw new AggregationExecutionException("RareTerms aggregation does not support floating point fields."); + throw new IllegalArgumentException("RareTerms aggregation does not support floating point fields."); } if (includeExclude != null) { longFilter = includeExclude.convertToLongFilter(format); From 0d9f03a7982b4d84279b12046a5a34ea6c20f646 Mon Sep 17 00:00:00 2001 From: Zachary Tong Date: Thu, 13 Feb 2020 07:38:16 -0500 Subject: [PATCH 3/4] Checkstyle --- .../terms/RareTermsAggregatorSupplier.java | 18 ++++++++++++++++++ .../bucket/terms/RareTermsAggregatorTests.java | 1 - 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorSupplier.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorSupplier.java index b4fb0c62d0df1..98e718d6e3b10 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorSupplier.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorSupplier.java @@ -1,3 +1,21 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.elasticsearch.search.aggregations.bucket.terms; import org.elasticsearch.search.DocValueFormat; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java index f130dae96503a..1dffa9b768d9f 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java @@ -52,7 +52,6 @@ import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.aggregations.Aggregation; -import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.Aggregations; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorTestCase; From 7c637a5646798952d1a6bc53dd9478ebb7754304 Mon Sep 17 00:00:00 2001 From: Zachary Tong Date: Tue, 18 Feb 2020 11:11:44 -0500 Subject: [PATCH 4/4] Tweak YAML test assertion Makes the catch more explicit (looking for the error) while also making it bwc across versions due to exception change --- .../rest-api-spec/test/search.aggregation/280_rare_terms.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml index 04d76a987809d..3bd7d7feeb8eb 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml @@ -112,7 +112,7 @@ setup: - length: { aggregations.ip_terms.buckets: 0 } - do: - catch: request + catch: /Aggregation \[ip_terms\] cannot support regular expression style include\/exclude settings as they can only be applied to string fields\. Use an array of values for include\/exclude clauses/ search: index: test_1 body: { "size" : 0, "aggs" : { "ip_terms" : { "rare_terms" : { "field" : "ip", "exclude" : "127.*" } } } }