Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
8848778
Refactored GeoHashGrid unit tests
nyurik Jan 25, 2019
c927e15
Quadkey implementation
nyurik Jan 24, 2019
8590403
fix rebased artifacts
nyurik Jan 25, 2019
8d63835
quadkey unit tests
nyurik Jan 25, 2019
8201303
rename createInternalGeoHashGridBucket to createInternalGeoGridBucket
nyurik Jan 25, 2019
79abc7c
Merge branch 'geohashgrid_tests-v2' into quadkey-v3
nyurik Jan 25, 2019
eebcb66
add lat/lng adjustment, declarations
nyurik Jan 25, 2019
acdb804
fix quadkey tests
nyurik Jan 25, 2019
477fc4f
Merge remote-tracking branch 'origin/master' into quadkey-v3
nyurik Jan 25, 2019
c5fee30
QuadkeyUtils tests
nyurik Jan 25, 2019
3eeadb1
Added quadkey docs, changed dflt to 8
nyurik Jan 25, 2019
9ea893e
Merge remote-tracking branch 'origin/master' into quadkey-v3
nyurik Jan 25, 2019
288178d
string.format fix
nyurik Jan 25, 2019
c1bc488
fix doc example
nyurik Jan 25, 2019
6d69245
Merge remote-tracking branch 'origin/master' into quadkey-v3
nyurik Jan 25, 2019
caf478e
fix unittest
nyurik Jan 25, 2019
5ac5c82
renamed to geotile_grid
nyurik Jan 26, 2019
01b2a3f
style fix
nyurik Jan 26, 2019
c597529
Add GeoTileGridTests to AggregationsTests
nyurik Jan 28, 2019
778452d
Add geotile_grid test to ShardReduceIT
nyurik Jan 28, 2019
347b7f4
Merge remote-tracking branch 'origin/master' into quadkey-v3
nyurik Jan 28, 2019
95f4272
Merge remote-tracking branch 'origin/master' into quadkey-v3
nyurik Jan 29, 2019
a583d5d
Merge remote-tracking branch 'upstream/master' into quadkey-v3
talevy Jan 29, 2019
9d32468
add basic REST test
talevy Jan 29, 2019
11d57df
restrict randomPrecision in GeoTileGridTests
talevy Jan 29, 2019
475e945
remove type usage from rest test
talevy Jan 30, 2019
ef0acd6
Remove support for non-integer precision
nyurik Jan 30, 2019
7bdfcb7
Merge remote-tracking branch 'origin/master' into quadkey-v3
nyurik Jan 30, 2019
01c2423
reformat asciidoc
nyurik Jan 30, 2019
be1132f
optimize geotile hashing algorithm
nyurik Jan 31, 2019
b6cd525
stringEncode test extra by Tal
nyurik Jan 31, 2019
d040f41
Docs, optimize lat/lng encoding
nyurik Jan 31, 2019
bfdd0e9
Merge remote-tracking branch 'origin/master' into quadkey-v3
nyurik Jan 31, 2019
857f105
Merge remote-tracking branch 'origin/master' into quadkey-v3
nyurik Jan 31, 2019
5369cbb
address review comments
nyurik Jan 31, 2019
8c0125c
extra test for polar coords (thx Tal)
nyurik Jan 31, 2019
680a507
address review comments
nyurik Jan 31, 2019
f007644
Merge remote-tracking branch 'origin/master' into quadkey-v3
nyurik Jan 31, 2019
880db72
test builder precision
nyurik Jan 31, 2019
a8f8183
Merge remote-tracking branch 'origin/master' into quadkey-v3
nyurik Jan 31, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@
import org.elasticsearch.search.aggregations.bucket.filter.ParsedFilters;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashGridAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.geogrid.ParsedGeoHashGrid;
import org.elasticsearch.search.aggregations.bucket.geogrid.ParsedQuadkeyGrid;
import org.elasticsearch.search.aggregations.bucket.geogrid.QuadkeyGridAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.global.ParsedGlobal;
import org.elasticsearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder;
Expand Down Expand Up @@ -1759,6 +1761,7 @@ static List<NamedXContentRegistry.Entry> getDefaultNamedXContents() {
map.put(FilterAggregationBuilder.NAME, (p, c) -> ParsedFilter.fromXContent(p, (String) c));
map.put(InternalSampler.PARSER_NAME, (p, c) -> ParsedSampler.fromXContent(p, (String) c));
map.put(GeoHashGridAggregationBuilder.NAME, (p, c) -> ParsedGeoHashGrid.fromXContent(p, (String) c));
map.put(QuadkeyGridAggregationBuilder.NAME, (p, c) -> ParsedQuadkeyGrid.fromXContent(p, (String) c));
map.put(RangeAggregationBuilder.NAME, (p, c) -> ParsedRange.fromXContent(p, (String) c));
map.put(DateRangeAggregationBuilder.NAME, (p, c) -> ParsedDateRange.fromXContent(p, (String) c));
map.put(GeoDistanceAggregationBuilder.NAME, (p, c) -> ParsedGeoDistance.fromXContent(p, (String) c));
Expand Down
182 changes: 182 additions & 0 deletions docs/reference/aggregations/bucket/quadkeygrid-aggregation.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
[[search-aggregations-bucket-quadkeygrid-aggregation]]
=== Quadkey Grid Aggregation

A multi-bucket aggregation that works on `geo_point` fields and groups points into buckets that represent cells in a grid.
The resulting grid can be sparse and only contains cells that have matching data.
Each cell corresponds to a https://en.wikipedia.org/wiki/Tiled_web_map[map tile] as used by many online map sites.
Each cell is labeled using a "{zoom}/{x}/{y}" format, where zoom is equal to the user-specified precision.

* High precision quadkeys have a larger range for x and y, and represent tiles that cover only a small area.
* Low precision quadkeys have a smaller range for x and y, and represent tiles that each cover a large area.

Quadkey used in this aggregation can have a choice of precision between 0 and 29.

WARNING: The highest-precision quadkey of length 29 produces cells that cover less than a square metre of land and
so high-precision requests can be very costly in terms of RAM and result sizes.
Please see the example below on how to first filter the aggregation to a smaller geographic area before requesting
high-levels of detail.

The specified field must be of type `geo_point` (which can only be set explicitly in the mappings) and it can also hold
an array of `geo_point` fields, in which case all points will be taken into account during aggregation.


==== Simple low-precision request

[source,js]
--------------------------------------------------
PUT /museums
{
"mappings": {
"properties": {
"location": {
"type": "geo_point"
}
}
}
}

POST /museums/_bulk?refresh
{"index":{"_id":1}}
{"location": "52.374081,4.912350", "name": "NEMO Science Museum"}
{"index":{"_id":2}}
{"location": "52.369219,4.901618", "name": "Museum Het Rembrandthuis"}
{"index":{"_id":3}}
{"location": "52.371667,4.914722", "name": "Nederlands Scheepvaartmuseum"}
{"index":{"_id":4}}
{"location": "51.222900,4.405200", "name": "Letterenhuis"}
{"index":{"_id":5}}
{"location": "48.861111,2.336389", "name": "Musée du Louvre"}
{"index":{"_id":6}}
{"location": "48.860000,2.327000", "name": "Musée d'Orsay"}

POST /museums/_search?size=0
{
"aggregations" : {
"large-grid" : {
"quadkey_grid" : {
"field" : "location",
"precision" : 8
}
}
}
}
--------------------------------------------------
// CONSOLE

Response:

[source,js]
--------------------------------------------------
{
...
"aggregations": {
"large-grid": {
"buckets": [
{
"key" : "8/131/84",
"doc_count" : 3
},
{
"key" : "8/129/88",
"doc_count" : 2
},
{
"key" : "8/131/85",
"doc_count" : 1
}
]
}
}
}
--------------------------------------------------
// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/]

==== High-precision requests

When requesting detailed buckets (typically for displaying a "zoomed in" map) a filter like <<query-dsl-geo-bounding-box-query,geo_bounding_box>> should be applied to narrow the subject area otherwise potentially millions of buckets will be created and returned.

[source,js]
--------------------------------------------------
POST /museums/_search?size=0
{
"aggregations" : {
"zoomed-in" : {
"filter" : {
"geo_bounding_box" : {
"location" : {
"top_left" : "52.4, 4.9",
"bottom_right" : "52.3, 5.0"
}
}
},
"aggregations":{
"zoom1":{
"quadkey_grid" : {
"field": "location",
"precision": 22
}
}
}
}
}
}
--------------------------------------------------
// CONSOLE
// TEST[continued]

[source,js]
--------------------------------------------------
{
...
"aggregations" : {
"zoomed-in" : {
"doc_count" : 3,
"zoom1" : {
"buckets" : [
{
"key" : "20/538603/344594",
"doc_count" : 1
},
{
"key" : "20/538596/344583",
"doc_count" : 1
},
{
"key" : "20/538564/344606",
"doc_count" : 1
}
]
}
}
}
}
--------------------------------------------------
// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/]


==== Options

[horizontal]
field:: Mandatory. The name of the field indexed with GeoPoints.

precision:: Optional. The string zoom of the key used to define
cells/buckets in the results. Defaults to 7.
The precision can either be defined in terms of the integer
precision levels mentioned above. Values outside of [0,29] will
be rejected.
Alternatively, the precision level can be approximated from a
distance measure like "1km", "10m". The precision level is
calculate such that cells will not exceed the specified
size (diagonal) of the required precision. When this would lead
to precision levels higher than the supported 29 levels,
the value is rejected.

size:: Optional. The maximum number of geohash buckets to return
(defaults to 10,000). When results are trimmed, buckets are
prioritised based on the volumes of documents they contain.

shard_size:: Optional. To allow for more accurate counting of the top cells
returned in the final result the aggregation defaults to
returning `max(10,(size x number-of-shards))` buckets from each
shard. If this heuristic is undesirable, the number considered
from each shard can be over-ridden using this parameter.
193 changes: 193 additions & 0 deletions server/src/main/java/org/elasticsearch/common/geo/QuadkeyUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.geo;

import org.apache.lucene.util.BitUtil;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.support.XContentMapValues;

import java.io.IOException;

import static org.elasticsearch.common.geo.GeoUtils.normalizeLat;
import static org.elasticsearch.common.geo.GeoUtils.normalizeLon;

/**
* Implements quad key hashing, same as used by map tiles.
* The string key is formatted as "zoom/x/y"
* The hash value (long) contains all three of those values.
*/
public class QuadkeyUtils {

/**
* Largest number of tiles (precision) to use.
* This value cannot be more than (64-5)/2 = 29, because 5 bits are used for zoom level itself
* If zoom is not stored inside hash, it would be possible to use up to 32.
* Another consideration is that index optimizes lat/lng storage, loosing some precision.
* E.g. hash lng=140.74779717298918D lat=45.61884022447444D == "18/233561/93659", but shown as "18/233561/93658"
*/
public static final int MAX_ZOOM = 29;

/**
* Bit position of the zoom value within hash. Must be &gt;= 2*MAX_ZOOM
* Keeping it at a constant place allows MAX_ZOOM to be increased
* without breaking serialization binary compatibility
* (still, the newer version should not use higher MAX_ZOOM in the mixed cases)
*/
private static final int ZOOM_SHIFT = 29 * 2;

/**
* Mask of all the bits used by the quadkey in a hash
*/
private static final long QUADKEY_MASK = (1L << ZOOM_SHIFT) - 1;

/**
* Parse quadkey hash as zoom, x, y integers.
*/
private static int[] parseHash(final long hash) {
final int zoom = checkPrecisionRange((int) (hash >>> ZOOM_SHIFT));
final int tiles = 1 << zoom;

// decode the quadkey bits as interleaved xTile and yTile
long val = hash & QUADKEY_MASK;
int xTile = (int) BitUtil.deinterleave(val);
int yTile = (int) BitUtil.deinterleave(val >>> 1);
if (xTile < 0 || yTile < 0 || xTile >= tiles || yTile >= tiles) {
throw new IllegalArgumentException("hash-tile");
}

return new int[]{zoom, xTile, yTile};
}

/**
* Parse a precision that can be expressed as an integer or a distance measure like "1km", "10m".
*
* The precision is expressed as a zoom level between 0 and MAX_ZOOM.
*
* @param parser {@link XContentParser} to parse the value from
* @return int representing precision
*/
public static int parsePrecision(XContentParser parser) throws IOException, ElasticsearchParseException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two questions:

  1. Should we support unit-less precision levels? It looks like it adds a lot of autodetect magic below, and we've consistently been moving towards requiring units everywhere in ES. I don't know enough about the algo to say, but is there something tangible that we gain by supporting unit-less precision over just 10km, etc?
  2. Is there a reason we fall back to the "old" style of xcontent parsing instead of using the newer static parsers here too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. for quadkey, most of the time it will be unitless - e.g .map visualizations will use current_zoom + 2 as the precision to get 16 dots per visualization tile.
  2. I tried to make as few changes as possible to the existing code to make reviewing easier. This code is an exact copy of the GeoUtils.parsePrecision(), adjusted for different validation and parsing. We might consider rewriting them, but I think we should do them at the same time and possibly consolidate it for reuse.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@polyfractal, per @talevy 's suggestion, I removed non-integer precision - it is not needed by our use cases, and we do not know if they will be needed by other users. This also simplifies precision parsing.

XContentParser.Token token = parser.currentToken();
if (token.equals(XContentParser.Token.VALUE_NUMBER)) {
return XContentMapValues.nodeIntegerValue(parser.intValue());
} else {
String precision = parser.text();
try {
// we want to treat simple integer strings as precision levels, not distances
return XContentMapValues.nodeIntegerValue(precision);
} catch (NumberFormatException e) {
// try to parse as a distance value
final int parsedPrecision = GeoUtils.quadTreeLevelsForPrecision(precision);
try {
return checkPrecisionRange(parsedPrecision);
} catch (IllegalArgumentException e2) {
// this happens when distance too small, so precision > max.
// We'd like to see the original string
throw new IllegalArgumentException("precision too high [" + precision + "]", e2);
}
}
}
}

public static int checkPrecisionRange(int precision) {
if (precision < 0 || precision > MAX_ZOOM) {
throw new IllegalArgumentException("Invalid quadkey precision of " +
precision + ". Must be between 0 and " + MAX_ZOOM + ".");
}
return precision;
}

/**
* Encode lon/lat to the quadkey based long format.
* The resulting hash contains interleaved tile X and Y coordinates.
* The precision itself is also encoded as a few high bits.
*/
public static long longEncode(double longitude, double latitude, int precision) {
// Mathematics for this code was adapted from https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#Java

// How many tiles in X and in Y
final int tiles = 1 << checkPrecisionRange(precision);
final double lon = normalizeLon(longitude);
final double lat = normalizeLat(latitude);

int xTile = (int) Math.floor((lon + 180) / 360 * tiles);
int yTile = (int) Math.floor(
(1 - Math.log(
Math.tan(Math.toRadians(lat)) + 1 / Math.cos(Math.toRadians(lat))
) / Math.PI) / 2 * tiles);
if (xTile < 0) {
xTile = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Style nit: I think we try to avoid conditionals without braces, too easy to accidentally introduce bugs if you're not paying attention when touching the code.

Could replace with xTile = Math.min(xTile, 0), if you're looking for a concise alternative. Probably similar for the rest too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thx, refactored.

}
if (xTile >= tiles) {
xTile = tiles - 1;
}
if (yTile < 0) {
yTile = 0;
}
if (yTile >= tiles) {
yTile = tiles - 1;
}

// Zoom value is placed in front of all the bits used for the quadkey
// e.g. if max zoom is 26, the largest index would use 52 bits (51st..0th),
// leaving 12 bits unused for zoom. See MAX_ZOOM comment above.
return BitUtil.interleave(xTile, yTile) | ((long) precision << ZOOM_SHIFT);
}

/**
* Encode to a quadkey string from the quadkey based long format
*/
public static String stringEncode(long hash) {
int[] res = parseHash(hash);
return "" + res[0] + "/" + res[1] + "/" + res[2];
}

public static GeoPoint hashToGeoPoint(long hash) {
int[] res = parseHash(hash);
return zxyToGeoPoint(res[0], res[1], res[2]);
}

public static GeoPoint hashToGeoPoint(String hashAsString) {
Throwable cause = null;
try {
final String[] parts = hashAsString.split("/", 4);
if (parts.length == 3) {
return zxyToGeoPoint(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]), Integer.parseInt(parts[2]));
}
} catch (IllegalArgumentException e) {
// This will also handle NumberFormatException
cause = e;
}
throw new IllegalArgumentException("Invalid quadkey hash string of " +
hashAsString + ". Must be three integers in a form \"zoom/x/y\".", cause);
}

private static GeoPoint zxyToGeoPoint(int zoom, int xTile, int yTile) {
final int maxTiles = 1 << checkPrecisionRange(zoom);
if (xTile >= 0 && xTile < maxTiles && yTile >= 0 && yTile < maxTiles) {
final double tiles = Math.pow(2.0, zoom);
final double n = Math.PI - (2.0 * Math.PI * (yTile + 0.5)) / tiles;
final double lat = Math.toDegrees(Math.atan(Math.sinh(n)));
final double lon = ((xTile + 0.5) / tiles * 360.0) - 180;
return new GeoPoint(lat, lon);
}
throw new IllegalArgumentException(String.format("Invalid quadkey z/x/y values of %s/%s/%s", zoom, xTile, yTile));
}
}
Loading