elastic · nyurik · Feb 1, 2019 · Jan 25, 2019 · Jan 24, 2019 · Jan 25, 2019
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/RestHighLevelClient.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/RestHighLevelClient.java
@@ -95,6 +95,8 @@
 import org.elasticsearch.search.aggregations.bucket.filter.ParsedFilters;
 import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashGridAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.geogrid.ParsedGeoHashGrid;
+import org.elasticsearch.search.aggregations.bucket.geogrid.ParsedQuadkeyGrid;
+import org.elasticsearch.search.aggregations.bucket.geogrid.QuadkeyGridAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.global.ParsedGlobal;
 import org.elasticsearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder;
@@ -1759,6 +1761,7 @@ static List<NamedXContentRegistry.Entry> getDefaultNamedXContents() {
         map.put(FilterAggregationBuilder.NAME, (p, c) -> ParsedFilter.fromXContent(p, (String) c));
         map.put(InternalSampler.PARSER_NAME, (p, c) -> ParsedSampler.fromXContent(p, (String) c));
         map.put(GeoHashGridAggregationBuilder.NAME, (p, c) -> ParsedGeoHashGrid.fromXContent(p, (String) c));
+        map.put(QuadkeyGridAggregationBuilder.NAME, (p, c) -> ParsedQuadkeyGrid.fromXContent(p, (String) c));
         map.put(RangeAggregationBuilder.NAME, (p, c) -> ParsedRange.fromXContent(p, (String) c));
         map.put(DateRangeAggregationBuilder.NAME, (p, c) -> ParsedDateRange.fromXContent(p, (String) c));
         map.put(GeoDistanceAggregationBuilder.NAME, (p, c) -> ParsedGeoDistance.fromXContent(p, (String) c));

diff --git a/docs/reference/aggregations/bucket/quadkeygrid-aggregation.asciidoc b/docs/reference/aggregations/bucket/quadkeygrid-aggregation.asciidoc
@@ -0,0 +1,182 @@
+[[search-aggregations-bucket-quadkeygrid-aggregation]]
+=== Quadkey Grid Aggregation
+
+A multi-bucket aggregation that works on `geo_point` fields and groups points into buckets that represent cells in a grid.
+The resulting grid can be sparse and only contains cells that have matching data.
+Each cell corresponds to a https://en.wikipedia.org/wiki/Tiled_web_map[map tile] as used by many online map sites.
+Each cell is labeled using a "{zoom}/{x}/{y}" format, where zoom is equal to the user-specified precision.
+
+* High precision quadkeys have a larger range for x and y, and represent tiles that cover only a small area.
+* Low precision quadkeys have a smaller range for x and y, and represent tiles that each cover a large area.
+
+Quadkey used in this aggregation can have a choice of precision between 0 and 29.
+
+WARNING: The highest-precision quadkey of length 29 produces cells that cover less than a square metre of land and
+so high-precision requests can be very costly in terms of RAM and result sizes.
+Please see the example below on how to first filter the aggregation to a smaller geographic area before requesting
+high-levels of detail.
+
+The specified field must be of type `geo_point` (which can only be set explicitly in the mappings) and it can also hold
+an array of `geo_point` fields, in which case all points will be taken into account during aggregation.
+
+
+==== Simple low-precision request
+
+[source,js]
+--------------------------------------------------
+PUT /museums
+{
+    "mappings": {
+          "properties": {
+              "location": {
+                  "type": "geo_point"
+              }
+          }
+    }
+}
+
+POST /museums/_bulk?refresh
+{"index":{"_id":1}}
+{"location": "52.374081,4.912350", "name": "NEMO Science Museum"}
+{"index":{"_id":2}}
+{"location": "52.369219,4.901618", "name": "Museum Het Rembrandthuis"}
+{"index":{"_id":3}}
+{"location": "52.371667,4.914722", "name": "Nederlands Scheepvaartmuseum"}
+{"index":{"_id":4}}
+{"location": "51.222900,4.405200", "name": "Letterenhuis"}
+{"index":{"_id":5}}
+{"location": "48.861111,2.336389", "name": "Musée du Louvre"}
+{"index":{"_id":6}}
+{"location": "48.860000,2.327000", "name": "Musée d'Orsay"}
+
+POST /museums/_search?size=0
+{
+    "aggregations" : {
+        "large-grid" : {
+            "quadkey_grid" : {
+                "field" : "location",
+                "precision" : 8
+            }
+        }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+
+Response:
+
+[source,js]
+--------------------------------------------------
+{
+    ...
+    "aggregations": {
+        "large-grid": {
+            "buckets": [
+                {
+                  "key" : "8/131/84",
+                  "doc_count" : 3
+                },
+                {
+                  "key" : "8/129/88",
+                  "doc_count" : 2
+                },
+                {
+                  "key" : "8/131/85",
+                  "doc_count" : 1
+                }
+            ]
+        }
+    }
+}
+--------------------------------------------------
+// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/]
+
+==== High-precision requests
+
+When requesting detailed buckets (typically for displaying a "zoomed in" map) a filter like <<query-dsl-geo-bounding-box-query,geo_bounding_box>> should be applied to narrow the subject area otherwise potentially millions of buckets will be created and returned.
+
+[source,js]
+--------------------------------------------------
+POST /museums/_search?size=0
+{
+    "aggregations" : {
+        "zoomed-in" : {
+            "filter" : {
+                "geo_bounding_box" : {
+                    "location" : {
+                        "top_left" : "52.4, 4.9",
+                        "bottom_right" : "52.3, 5.0"
+                    }
+                }
+            },
+            "aggregations":{
+                "zoom1":{
+                    "quadkey_grid" : {
+                        "field": "location",
+                        "precision": 22
+                    }
+                }
+            }
+        }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+// TEST[continued]
+
+[source,js]
+--------------------------------------------------
+{
+    ...
+    "aggregations" : {
+        "zoomed-in" : {
+            "doc_count" : 3,
+            "zoom1" : {
+                "buckets" : [
+                    {
+                      "key" : "20/538603/344594",
+                      "doc_count" : 1
+                    },
+                    {
+                      "key" : "20/538596/344583",
+                      "doc_count" : 1
+                    },
+                    {
+                      "key" : "20/538564/344606",
+                      "doc_count" : 1
+                    }
+                ]
+            }
+        }
+    }
+}
+--------------------------------------------------
+// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/]
+
+
+==== Options
+
+[horizontal]
+field::         Mandatory. The name of the field indexed with GeoPoints.
+
+precision::     Optional. The string zoom of the key used to define
+                cells/buckets in the results. Defaults to 7.
+                The precision can either be defined in terms of the integer
+                precision levels mentioned above. Values outside of [0,29] will
+                be rejected.
+                Alternatively, the precision level can be approximated from a
+                distance measure like "1km", "10m". The precision level is
+                calculate such that cells will not exceed the specified
+                size (diagonal) of the required precision. When this would lead
+                to precision levels higher than the supported 29 levels,
+                the value is rejected.
+
+size::          Optional. The maximum number of geohash buckets to return
+                (defaults to 10,000). When results are trimmed, buckets are
+                prioritised based on the volumes of documents they contain.
+
+shard_size::    Optional. To allow for more accurate counting of the top cells
+                returned in the final result the aggregation defaults to
+                returning `max(10,(size x number-of-shards))` buckets from each
+                shard. If this heuristic is undesirable, the number considered
+                from each shard can be over-ridden using this parameter.
diff --git a/server/src/main/java/org/elasticsearch/common/geo/QuadkeyUtils.java b/server/src/main/java/org/elasticsearch/common/geo/QuadkeyUtils.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.common.geo;
+
+import org.apache.lucene.util.BitUtil;
+import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.support.XContentMapValues;
+
+import java.io.IOException;
+
+import static org.elasticsearch.common.geo.GeoUtils.normalizeLat;
+import static org.elasticsearch.common.geo.GeoUtils.normalizeLon;
+
+/**
+ * Implements quad key hashing, same as used by map tiles.
+ * The string key is formatted as  "zoom/x/y"
+ * The hash value (long) contains all three of those values.
+ */
+public class QuadkeyUtils {
+
+    /**
+     * Largest number of tiles (precision) to use.
+     * This value cannot be more than (64-5)/2 = 29, because 5 bits are used for zoom level itself
+     * If zoom is not stored inside hash, it would be possible to use up to 32.
+     * Another consideration is that index optimizes lat/lng storage, loosing some precision.
+     * E.g. hash lng=140.74779717298918D lat=45.61884022447444D == "18/233561/93659", but shown as "18/233561/93658"
+     */
+    public static final int MAX_ZOOM = 29;
+
+    /**
+     * Bit position of the zoom value within hash.  Must be &gt;= 2*MAX_ZOOM
+     * Keeping it at a constant place allows MAX_ZOOM to be increased
+     * without breaking serialization binary compatibility
+     * (still, the newer version should not use higher MAX_ZOOM in the mixed cases)
+     */
+    private static final int ZOOM_SHIFT = 29 * 2;
+
+    /**
+     * Mask of all the bits used by the quadkey in a hash
+     */
+    private static final long QUADKEY_MASK = (1L << ZOOM_SHIFT) - 1;
+
+    /**
+     * Parse quadkey hash as zoom, x, y integers.
+     */
+    private static int[] parseHash(final long hash) {
+        final int zoom = checkPrecisionRange((int) (hash >>> ZOOM_SHIFT));
+        final int tiles = 1 << zoom;
+
+        // decode the quadkey bits as interleaved xTile and yTile
+        long val = hash & QUADKEY_MASK;
+        int xTile = (int) BitUtil.deinterleave(val);
+        int yTile = (int) BitUtil.deinterleave(val >>> 1);
+        if (xTile < 0 || yTile < 0 || xTile >= tiles || yTile >= tiles) {
+            throw new IllegalArgumentException("hash-tile");
+        }
+
+        return new int[]{zoom, xTile, yTile};
+    }
+
+    /**
+     * Parse a precision that can be expressed as an integer or a distance measure like "1km", "10m".
+     *
+     * The precision is expressed as a zoom level between 0 and MAX_ZOOM.
+     *
+     * @param parser {@link XContentParser} to parse the value from
+     * @return int representing precision
+     */
+    public static int parsePrecision(XContentParser parser) throws IOException, ElasticsearchParseException {
+        XContentParser.Token token = parser.currentToken();
+        if (token.equals(XContentParser.Token.VALUE_NUMBER)) {
+            return XContentMapValues.nodeIntegerValue(parser.intValue());
+        } else {
+            String precision = parser.text();
+            try {
+                // we want to treat simple integer strings as precision levels, not distances
+                return XContentMapValues.nodeIntegerValue(precision);
+            } catch (NumberFormatException e) {
+                // try to parse as a distance value
+                final int parsedPrecision = GeoUtils.quadTreeLevelsForPrecision(precision);
+                try {
+                    return checkPrecisionRange(parsedPrecision);
+                } catch (IllegalArgumentException e2) {
+                    // this happens when distance too small, so precision > max.
+                    // We'd like to see the original string
+                    throw new IllegalArgumentException("precision too high [" + precision + "]", e2);
+                }
+            }
+        }
+    }
+
+    public static int checkPrecisionRange(int precision) {
+        if (precision < 0 || precision > MAX_ZOOM) {
+            throw new IllegalArgumentException("Invalid quadkey precision of " +
+                precision + ". Must be between 0 and " + MAX_ZOOM + ".");
+        }
+        return precision;
+    }
+
+    /**
+     * Encode lon/lat to the quadkey based long format.
+     * The resulting hash contains interleaved tile X and Y coordinates.
+     * The precision itself is also encoded as a few high bits.
+     */
+    public static long longEncode(double longitude, double latitude, int precision) {
+        // Mathematics for this code was adapted from https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#Java
+
+        // How many tiles in X and in Y
+        final int tiles = 1 << checkPrecisionRange(precision);
+        final double lon = normalizeLon(longitude);
+        final double lat = normalizeLat(latitude);
+
+        int xTile = (int) Math.floor((lon + 180) / 360 * tiles);
+        int yTile = (int) Math.floor(
+            (1 - Math.log(
+                Math.tan(Math.toRadians(lat)) + 1 / Math.cos(Math.toRadians(lat))
+            ) / Math.PI) / 2 * tiles);
+        if (xTile < 0) {
+            xTile = 0;
+        }
+        if (xTile >= tiles) {
+            xTile = tiles - 1;
+        }
+        if (yTile < 0) {
+            yTile = 0;
+        }
+        if (yTile >= tiles) {
+            yTile = tiles - 1;
+        }
+
+        // Zoom value is placed in front of all the bits used for the quadkey
+        // e.g. if max zoom is 26, the largest index would use 52 bits (51st..0th),
+        // leaving 12 bits unused for zoom. See MAX_ZOOM comment above.
+        return BitUtil.interleave(xTile, yTile) | ((long) precision << ZOOM_SHIFT);
+    }
+
+    /**
+     * Encode to a quadkey string from the quadkey based long format
+     */
+    public static String stringEncode(long hash) {
+        int[] res = parseHash(hash);
+        return "" + res[0] + "/" + res[1] + "/" + res[2];
+    }
+
+    public static GeoPoint hashToGeoPoint(long hash) {
+        int[] res = parseHash(hash);
+        return zxyToGeoPoint(res[0], res[1], res[2]);
+    }
+
+    public static GeoPoint hashToGeoPoint(String hashAsString) {
+        Throwable cause = null;
+        try {
+            final String[] parts = hashAsString.split("/", 4);
+            if (parts.length == 3) {
+                return zxyToGeoPoint(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]), Integer.parseInt(parts[2]));
+            }
+        } catch (IllegalArgumentException e) {
+            // This will also handle NumberFormatException
+            cause = e;
+        }
+        throw new IllegalArgumentException("Invalid quadkey hash string of " +
+            hashAsString + ". Must be three integers in a form \"zoom/x/y\".", cause);
+    }
+
+    private static GeoPoint zxyToGeoPoint(int zoom, int xTile, int yTile) {
+        final int maxTiles = 1 << checkPrecisionRange(zoom);
+        if (xTile >= 0 && xTile < maxTiles && yTile >= 0 && yTile < maxTiles) {
+            final double tiles = Math.pow(2.0, zoom);
+            final double n = Math.PI - (2.0 * Math.PI * (yTile + 0.5)) / tiles;
+            final double lat = Math.toDegrees(Math.atan(Math.sinh(n)));
+            final double lon = ((xTile + 0.5) / tiles * 360.0) - 180;
+            return new GeoPoint(lat, lon);
+        }
+        throw new IllegalArgumentException(String.format("Invalid quadkey z/x/y values of %s/%s/%s", zoom, xTile, yTile));
+    }
+}