elastic · nik9000 · Jun 9, 2020 · Jun 4, 2020 · Jun 5, 2020 · Jun 6, 2020
diff --git a/...lClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java b/...lClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java
@@ -45,6 +45,7 @@
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.hasEntry;
 import static org.hamcrest.Matchers.notNullValue;
 
 @ESIntegTestCase.SuiteScopeTestCase
@@ -59,7 +60,12 @@ public class AggregationProfilerIT extends ESIntegTestCase {
 
     private static final String TOTAL_BUCKETS = "total_buckets";
     private static final String WRAPPED = "wrapped_in_multi_bucket_aggregator";
-    private static final Object DEFERRED = "deferred_aggregators";
+    private static final String DEFERRED = "deferred_aggregators";
+    private static final String COLLECTION_STRAT = "collection_strategy";
+    private static final String RESULT_STRAT = "result_strategy";
+    private static final String HAS_FILTER = "has_filter";
+    private static final String SEGMENTS_WITH_SINGLE = "segments_with_single_valued_ords";
+    private static final String SEGMENTS_WITH_MULTI = "segments_with_multi_valued_ords";
 
     private static final String NUMBER_FIELD = "number";
     private static final String TAG_FIELD = "tag";
@@ -73,6 +79,7 @@ protected int numberOfShards() {
     @Override
     protected void setupSuiteScopeCluster() throws Exception {
         assertAcked(client().admin().indices().prepareCreate("idx")
+                .setSettings(Map.of("number_of_shards", 1, "number_of_replicas", 0))
                 .setMapping(STRING_FIELD, "type=keyword", NUMBER_FIELD, "type=integer", TAG_FIELD, "type=keyword").get());
         List<IndexRequestBuilder> builders = new ArrayList<>();
 
@@ -90,7 +97,7 @@ protected void setupSuiteScopeCluster() throws Exception {
                         .endObject()));
         }
 
-        indexRandom(true, builders);
+        indexRandom(true, false, builders);
         createIndex("idx_unmapped");
     }
 
@@ -184,7 +191,7 @@ public void testMultiLevelProfile() {
             assertThat(termsBreakdown.get(COLLECT), greaterThan(0L));
             assertThat(termsBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(termsBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(termsAggResult.getDebugInfo(), equalTo(Map.of(WRAPPED, true)));
+            assertRemapTermsDebugInfo(termsAggResult);
             assertThat(termsAggResult.getProfiledChildren().size(), equalTo(1));
 
             ProfileResult avgAggResult = termsAggResult.getProfiledChildren().get(0);
@@ -204,6 +211,18 @@ public void testMultiLevelProfile() {
         }
     }
 
+    private void assertRemapTermsDebugInfo(ProfileResult termsAggResult) {
+        assertThat(termsAggResult.getDebugInfo(), hasEntry(COLLECTION_STRAT, "remap"));
+        assertThat(termsAggResult.getDebugInfo(), hasEntry(RESULT_STRAT, "terms"));
+        assertThat(termsAggResult.getDebugInfo(), hasEntry(HAS_FILTER, false));
+        // TODO we only index single valued docs but the ordinals ends up with multi valued sometimes
+        assertThat(
+            termsAggResult.getDebugInfo().toString(),
+            (int) termsAggResult.getDebugInfo().get(SEGMENTS_WITH_SINGLE) + (int) termsAggResult.getDebugInfo().get(SEGMENTS_WITH_MULTI),
+            greaterThan(0)
+        );
+    }
+
     public void testMultiLevelProfileBreadthFirst() {
         SearchResponse response = client().prepareSearch("idx").setProfile(true)
                 .addAggregation(histogram("histo").field(NUMBER_FIELD).interval(1L).subAggregation(terms("terms")
@@ -251,7 +270,7 @@ public void testMultiLevelProfileBreadthFirst() {
             assertThat(termsBreakdown.get(COLLECT), greaterThan(0L));
             assertThat(termsBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(termsBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(termsAggResult.getDebugInfo(), equalTo(Map.of(WRAPPED, true)));
+            assertRemapTermsDebugInfo(termsAggResult);
             assertThat(termsAggResult.getProfiledChildren().size(), equalTo(1));
 
             ProfileResult avgAggResult = termsAggResult.getProfiledChildren().get(0);
@@ -377,7 +396,7 @@ public void testComplexProfile() {
             assertThat(tagsBreakdown.get(COLLECT), greaterThan(0L));
             assertThat(tagsBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(tagsBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(tagsAggResult.getDebugInfo(), equalTo(Map.of(WRAPPED, true)));
+            assertRemapTermsDebugInfo(tagsAggResult);
             assertThat(tagsAggResult.getProfiledChildren().size(), equalTo(2));
 
             Map<String, ProfileResult> tagsAggResultSubAggregations = tagsAggResult.getProfiledChildren().stream()
@@ -422,7 +441,7 @@ public void testComplexProfile() {
             assertThat(stringsBreakdown.get(COLLECT), greaterThan(0L));
             assertThat(stringsBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(stringsBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(stringsAggResult.getDebugInfo(), equalTo(Map.of(WRAPPED, true)));
+            assertRemapTermsDebugInfo(stringsAggResult);
             assertThat(stringsAggResult.getProfiledChildren().size(), equalTo(3));
 
             Map<String, ProfileResult> stringsAggResultSubAggregations = stringsAggResult.getProfiledChildren().stream()
@@ -468,7 +487,7 @@ public void testComplexProfile() {
             assertThat(tagsBreakdown.get(COLLECT), greaterThan(0L));
             assertThat(tagsBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(tagsBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(tagsAggResult.getDebugInfo(), equalTo(Map.of(WRAPPED, true)));
+            assertRemapTermsDebugInfo(tagsAggResult);
             assertThat(tagsAggResult.getProfiledChildren().size(), equalTo(2));
 
             tagsAggResultSubAggregations = tagsAggResult.getProfiledChildren().stream()

diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java
@@ -180,7 +180,7 @@ public final InternalAggregation buildTopLevel() throws IOException {
     public abstract InternalAggregation buildEmptyAggregation();
 
     /**
-     * Collect debug information to add to the profiling results.. This will
+     * Collect debug information to add to the profiling results. This will
      * only be called if the aggregation is being profiled.
      * <p>
      * Well behaved implementations will always call the superclass

diff --git a/...rc/main/java/org/elasticsearch/search/aggregations/bucket/terms/BytesKeyedBucketOrds.java b/...rc/main/java/org/elasticsearch/search/aggregations/bucket/terms/BytesKeyedBucketOrds.java
@@ -0,0 +1,220 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.lease.Releasable;
+import org.elasticsearch.common.lease.Releasables;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BytesRefHash;
+
+/**
+ * Maps {@link BytesRef} bucket keys to bucket ordinals.
+ */
+public abstract class BytesKeyedBucketOrds implements Releasable {
+    /**
+     * Build a {@link LongKeyedBucketOrds}.
+     */
+    public static BytesKeyedBucketOrds build(BigArrays bigArrays, boolean collectsFromSingleBucket) {
+        return collectsFromSingleBucket ? new FromSingle(bigArrays) : new FromMany(bigArrays);
+    }
+
+    private BytesKeyedBucketOrds() {}
+
+    /**
+     * Add the {@code owningBucketOrd, value} pair. Return the ord for
+     * their bucket if they have yet to be added, or {@code -1-ord}
+     * if they were already present.
+     */
+    public abstract long add(long owningBucketOrd, BytesRef value);
+
+    /**
+     * Count the buckets in {@code owningBucketOrd}.
+     */
+    public abstract long bucketsInOrd(long owningBucketOrd);
+
+    /**
+     * The number of collected buckets.
+     */
+    public abstract long size();
+
+    /**
+     * Build an iterator for buckets inside {@code owningBucketOrd} in order
+     * of increasing ord.
+     * <p>
+     * When this is first returns it is "unpositioned" and you must call
+     * {@link BucketOrdsEnum#next()} to move it to the first value.
+     */
+    public abstract BucketOrdsEnum ordsEnum(long owningBucketOrd);
+
+    /**
+     * An iterator for buckets inside a particular {@code owningBucketOrd}.
+     */
+    public interface BucketOrdsEnum {
+        /**
+         * Advance to the next value.
+         * @return {@code true} if there *is* a next value,
+         *         {@code false} if there isn't
+         */
+        boolean next();
+
+        /**
+         * The ordinal of the current value.
+         */
+        long ord();
+
+        /**
+         * Read the current value.
+         */
+        void readValue(BytesRef dest);
+
+        /**
+         * An {@linkplain BucketOrdsEnum} that is empty. 
+         */
+        BucketOrdsEnum EMPTY = new BucketOrdsEnum() {
+            @Override
+            public boolean next() {
+                return false;
+            }
+
+            @Override
+            public long ord() {
+                return 0;
+            }
+
+            @Override
+            public void readValue(BytesRef dest) {}
+        };
+    }
+
+    /**
+     * Implementation that only works if it is collecting from a single bucket.
+     */
+    private static class FromSingle extends BytesKeyedBucketOrds {
+        private final BytesRefHash ords;
+
+        private FromSingle(BigArrays bigArrays) {
+            ords = new BytesRefHash(1, bigArrays);
+        }
+
+        @Override
+        public long add(long owningBucketOrd, BytesRef value) {
+            assert owningBucketOrd == 0;
+            return ords.add(value);
+        }
+
+        @Override
+        public long bucketsInOrd(long owningBucketOrd) {
+            return ords.size();
+        }
+
+        @Override
+        public long size() {
+            return ords.size();
+        }
+
+        @Override
+        public BucketOrdsEnum ordsEnum(long owningBucketOrd) {
+            return new BucketOrdsEnum() {
+                private int ord = -1;
+
+                @Override
+                public boolean next() {
+                    ord++;
+                    return ord < ords.size();
+                }
+
+                @Override
+                public long ord() {
+                    return ord;
+                }
+
+                @Override
+                public void readValue(BytesRef dest) {
+                    ords.get(ord, dest);
+                }
+            };
+        }
+
+        @Override
+        public void close() {
+            ords.close();
+        }
+    }
+
+    /**
+     * Implementation that works properly when collecting from many buckets.
+     */
+    private static class FromMany extends BytesKeyedBucketOrds {
+        // TODO we can almost certainly do better here by building something fit for purpose rather than trying to lego together stuff
+        private final BytesRefHash bytesToLong;
+        private final LongKeyedBucketOrds longToBucketOrds;
+
+        private FromMany(BigArrays bigArrays) {
+            bytesToLong = new BytesRefHash(1, bigArrays);
+            longToBucketOrds = LongKeyedBucketOrds.build(bigArrays, false);
+        }
+
+        @Override
+        public long add(long owningBucketOrd, BytesRef value) {
+            long l = bytesToLong.add(value);
+            if (l < 0) {
+                l = -1 - l;
+            }
+            return longToBucketOrds.add(owningBucketOrd, l);
+        }
+
+        @Override
+        public long bucketsInOrd(long owningBucketOrd) {
+            return longToBucketOrds.bucketsInOrd(owningBucketOrd);
+        }
+
+        @Override
+        public long size() {
+            return longToBucketOrds.size();
+        }
+
+        @Override
+        public BucketOrdsEnum ordsEnum(long owningBucketOrd) {
+            LongKeyedBucketOrds.BucketOrdsEnum delegate = longToBucketOrds.ordsEnum(owningBucketOrd);
+            return new BucketOrdsEnum() {
+                @Override
+                public boolean next() {
+                    return delegate.next();
+                }
+
+                @Override
+                public long ord() {
+                    return delegate.ord();
+                }
+
+                @Override
+                public void readValue(BytesRef dest) {
+                    bytesToLong.get(delegate.value(), dest);
+                }
+            };
+        }
+
+        @Override
+        public void close() {
+            Releasables.close(bytesToLong, longToBucketOrds);
+        }
+    }
+}