diff --git a/arches/app/media/js/viewmodels/map.js b/arches/app/media/js/viewmodels/map.js index eac6659c0a3..917f3752d86 100644 --- a/arches/app/media/js/viewmodels/map.js +++ b/arches/app/media/js/viewmodels/map.js @@ -10,7 +10,192 @@ define([ 'templates/views/components/map-popup.htm' ], function($, _, arches, ko, koMapping, mapPopupProvider, mapConfigurator, ariaUtils) { const viewModel = function(params) { - var self = this; + const ZOOM_THRESHOLD = 14; + const self = this; + const searchLayerIds = ['clusters', 'cluster-count', 'unclustered-point', 'individual-points', 'individual-geometries']; + + const searchLayerDefinitions = [ + { + id: 'individual-points', + type: 'circle', + source: 'search-layer-source', + 'source-layer': 'points', + minzoom: ZOOM_THRESHOLD, + paint: { + 'circle-color': '#fa6003', + 'circle-radius': 5, + 'circle-opacity': 1 + } + }, + { + id: 'individual-geometries', + type: 'fill', + source: 'search-layer-source', + 'source-layer': 'geometries', + minzoom: ZOOM_THRESHOLD-1, + paint: { + 'fill-color': '#fa6003', + 'fill-opacity': 0.3, + 'fill-outline-color': '#fa6003' + } + }, + { + "id": "clusters", + "type": "circle", + "source": "search-layer-source", + "source-layer": "clusters", + "filter": [ + "all", + ["has", "count"], + [">", ["get", "count"], 1] + ], + "paint": { + "circle-color": "#fa6003", + "circle-radius": [ + "step", + ["get", "count"], + 15, + 10, 20, + 50, 25, + 100, 30, + 500, 35, + 1000, 40 + ], + "circle-opacity": 0.8 + }, + "maxzoom": ZOOM_THRESHOLD, + "minzoom": 1 + }, + { + "id": "cluster-count", + "type": "symbol", + "source": "search-layer-source", + "source-layer": "clusters", + "filter": [ + "all", + ["has", "count"], + [">", ["get", "count"], 0] + ], + "layout": { + "text-field": "{count}", + "text-font": ["DIN Offc Pro Medium", "Arial Unicode MS Bold"], + "text-size": 12 + }, + "paint": { + "text-color": "#ffffff" + }, + "maxzoom": ZOOM_THRESHOLD, + "minzoom": 1 + }, + { + "id": "unclustered-point", + "type": "circle", + "source": "search-layer-source", + "source-layer": "clusters", + "filter": [ + "all", + ["has", "count"], + ["==", ["get", "count"], 1] + ], + "paint": { + "circle-color": "#fa6003", + "circle-radius": 5, + "circle-opacity": 1 + }, + minzoom: ZOOM_THRESHOLD + } + ]; + this.searchQueryId = params.searchQueryId || ko.observable(); + this.searchQueryId.subscribe(function (searchId) { + if (searchId) { + self.addSearchLayer(searchId); + } else if (ko.unwrap(self.map)) { + self.removeSearchLayer(); + } + }); + + this.addClusterClickHandlers = function() { + const map = self.map(); + + // Handle clicks on clusters + map.on('click', 'clusters', function(e) { + var features = map.queryRenderedFeatures(e.point, { layers: ['clusters'] }); + var feature = features[0]; + var count = feature.properties.count; + + if (count > 1) { + // Zoom in on the cluster + var coordinates = feature.geometry.coordinates.slice(); + map.easeTo({ + center: coordinates, + zoom: map.getZoom() + 2 + }); + } else { + // For count == 1, show a popup + self.onFeatureClick(features, e.lngLat, mapboxgl); + } + }); + + // Change the cursor to a pointer when over clusters and unclustered points + map.on('mouseenter', 'clusters', function() { + map.getCanvas().style.cursor = 'pointer'; + }); + map.on('mouseleave', 'clusters', function() { + map.getCanvas().style.cursor = ''; + }); + map.on('mouseenter', 'unclustered-point', function() { + map.getCanvas().style.cursor = 'pointer'; + }); + map.on('mouseleave', 'unclustered-point', function() { + map.getCanvas().style.cursor = ''; + }); + }; + + + this.addSearchLayer = function (searchId) { + console.log(searchId); + if (!self.map()) + return; + const tileUrlTemplate = `${window.location.origin}/search-layer/{z}/{x}/{y}.pbf?searchid=${encodeURIComponent(searchId)}`; + + // Remove existing source and layer if they exist + searchLayerIds.forEach(layerId => { + if (self.map().getLayer(layerId)) { + self.map().removeLayer(layerId); + } + }); + if (self.map().getSource('search-layer-source')) { + self.map().removeSource('search-layer-source'); + } + + // Add the vector tile source + self.map().addSource('search-layer-source', { + type: 'vector', + tiles: [tileUrlTemplate], + minzoom: 0, + maxzoom: 22, + }); + + // Add the layer to display the data + searchLayerDefinitions.forEach(mapLayer => { + self.map().addLayer(mapLayer); + }); + + self.addClusterClickHandlers(); + // Optionally, fit the map to the data bounds + // self.fitMapToDataBounds(searchId); + }; + + this.removeSearchLayer = function () { + searchLayerDefinitions.forEach(mapLayer => { + if (self.map().getLayer(mapLayer.id)) { + self.map().removeLayer(mapLayer.id); + } + }); + if (self.map().getSource('search-layer-source')) { + self.map().removeSource('search-layer-source'); + } + }; var geojsonSourceFactory = function() { @@ -61,7 +246,10 @@ define([ if (ko.unwrap(params.bounds)) { map.fitBounds(ko.unwrap(params.bounds), boundingOptions); } - + // If searchQueryId is already available, add the search layer + if (self.searchQueryId()) { + self.addSearchLayer(self.searchQueryId()); + } }); this.bounds = ko.observable(ko.unwrap(params.bounds) || arches.hexBinBounds); diff --git a/arches/app/media/js/views/components/search/map-filter.js b/arches/app/media/js/views/components/search/map-filter.js index 2f00cce545d..d3ca1e20018 100644 --- a/arches/app/media/js/views/components/search/map-filter.js +++ b/arches/app/media/js/views/components/search/map-filter.js @@ -40,6 +40,7 @@ define([ options.name = "Map Filter"; BaseFilter.prototype.initialize.call(this, options); + options.searchQueryId = this.searchQueryId; options.sources = { "geojson-search-buffer-data": { "type": "geojson", @@ -369,14 +370,6 @@ define([ this.updateFilter(); }, this); - this.searchAggregations.subscribe(this.updateSearchResultsLayers, this); - if (ko.isObservable(bins)) { - bins.subscribe(this.updateSearchResultsLayers, this); - } - if (this.searchAggregations()) { - this.updateSearchResultsLayers(); - } - this.mouseoverInstanceId.subscribe(updateSearchResultPointLayer); }, this); }, diff --git a/arches/app/media/js/views/components/search/standard-search-view.js b/arches/app/media/js/views/components/search/standard-search-view.js index e311bcf4382..7134caf1939 100644 --- a/arches/app/media/js/views/components/search/standard-search-view.js +++ b/arches/app/media/js/views/components/search/standard-search-view.js @@ -15,6 +15,8 @@ define([ this.selectedPopup = ko.observable(''); this.sharedStateObject.selectedPopup = this.selectedPopup; + this.searchQueryId = ko.observable(null); + this.sharedStateObject.searchQueryId = this.searchQueryId; var firstEnabledFilter = _.find(this.sharedStateObject.searchFilterConfigs, function(filter) { return filter.config.layoutType === 'tabbed'; }, this); @@ -51,6 +53,47 @@ define([ this.searchFilterVms[componentName](this); }, + doQuery: function() { + const queryObj = JSON.parse(this.queryString()); + if (self.updateRequest) { self.updateRequest.abort(); } + self.updateRequest = $.ajax({ + type: "GET", + url: arches.urls.search_results, + data: queryObj, + context: this, + success: function(response) { + _.each(this.sharedStateObject.searchResults, function(value, key, results) { + if (key !== 'timestamp') { + delete this.sharedStateObject.searchResults[key]; + } + }, this); + _.each(response, function(value, key, response) { + if (key !== 'timestamp') { + this.sharedStateObject.searchResults[key] = value; + } + }, this); + this.sharedStateObject.searchResults.timestamp(response.timestamp); + this.searchQueryId(this.sharedStateObject.searchResults.searchqueryid); + this.sharedStateObject.userIsReviewer(response.reviewer); + this.sharedStateObject.userid(response.userid); + this.sharedStateObject.total(response.total_results); + this.sharedStateObject.hits(response.results.hits.hits.length); + this.sharedStateObject.alert(false); + }, + error: function(response, status, error) { + const alert = new AlertViewModel('ep-alert-red', arches.translations.requestFailed.title, response.responseJSON?.message); + if(self.updateRequest.statusText !== 'abort'){ + this.alert(alert); + } + this.sharedStateObject.loading(false); + }, + complete: function(request, status) { + self.updateRequest = undefined; + window.history.pushState({}, '', '?' + $.param(queryObj).split('+').join('%20')); + this.sharedStateObject.loading(false); + } + }); + }, }); return ko.components.register(componentName, { diff --git a/arches/app/search/components/standard_search_view.py b/arches/app/search/components/standard_search_view.py index 6af783b1af6..bde6e39e45b 100644 --- a/arches/app/search/components/standard_search_view.py +++ b/arches/app/search/components/standard_search_view.py @@ -1,5 +1,5 @@ from typing import Dict, Tuple - +import hashlib from arches.app.models.system_settings import settings from arches.app.search.components.base_search_view import BaseSearchView from arches.app.search.components.base import SearchFilterFactory @@ -16,9 +16,11 @@ user_is_resource_exporter, ) from arches.app.utils.string_utils import get_str_kwarg_as_bool +from django.core.cache import cache from django.utils.translation import gettext as _ from datetime import datetime import logging +import json details = { @@ -136,6 +138,38 @@ def append_dsl(self, search_query_object, **kwargs): if load_tiles: search_query_object["query"].include("tiles") + def execute_resourceids_only_query(self, search_query_object, cache, se, **kwargs): + search_request_object = kwargs.get("search_request_object", self.request.GET) + resourceids_only_query_hash_key = create_searchresults_cache_key( + self.request, search_request_object, resourceids_only=True + ) + + hpla_idx = f"{settings.ELASTICSEARCH_PREFIX}_{RESOURCES_INDEX}" + pit_response = se.es.open_point_in_time( + index=hpla_idx, keep_alive="5m" # Adjust as needed + ) + pit_id = pit_response.get("id") + # Perform the search + search_query_object["query"].prepare() + query_dsl = search_query_object["query"].dsl + search_response = se.es.search( + pit={"id": pit_id, "keep_alive": "5m"}, _source=False, **query_dsl + ) + + # Cache the pit_id and search parameters + cache.set( + resourceids_only_query_hash_key + "_pit", + pit_id, + timeout=300, + ) + cache.set( + resourceids_only_query_hash_key + "_dsl", + search_query_object["query"].__str__(), + timeout=300, + ) + + return resourceids_only_query_hash_key + def execute_query(self, search_query_object, response_object, **kwargs): for_export = get_str_kwarg_as_bool("export", self.request.GET) pages = self.request.GET.get("pages", None) @@ -232,12 +266,22 @@ def handle_search_results_query( if returnDsl: return response_object, search_query_object + for_export = get_str_kwarg_as_bool("export", sorted_query_obj) + if not for_export: + resourceids_only_query_hash_key = self.execute_resourceids_only_query( + search_query_object, + cache, + se, + search_request_object=sorted_query_obj, + ) + for filter_type, querystring in list(sorted_query_obj.items()): search_filter = search_filter_factory.get_filter(filter_type) if search_filter: search_filter.execute_query(search_query_object, response_object) if response_object["results"] is not None: + response_object["searchqueryid"] = resourceids_only_query_hash_key # allow filters to modify the results for filter_type, querystring in list(sorted_query_obj.items()): search_filter = search_filter_factory.get_filter(filter_type) @@ -256,3 +300,31 @@ def handle_search_results_query( response_object[key] = value return response_object, search_query_object + + +def create_searchresults_cache_key(request, search_query, **kwargs): + """ + method to create a hash cache key + blends a snapshot of the current database with whatever the searchquery was + also cleans/sorts the searchquery before converting to string in order to normalize + kwargs: + - dict: search_query - contains search query filters/parameters + """ + resourceids_only = kwargs.get("resourceids_only", False) + user_proxy = ( + request.user.username + if request.user.username == "anonymous" + else str(request.user.id) + ) + search_query_string = "".join([k + str(v) for k, v in sorted(search_query.items())]) + + search_query_string = search_query_string.strip() + + hashable_string = search_query_string + user_proxy + hashable_string += "rids" if resourceids_only else "" + b = bytearray() + b.extend((search_query_string + user_proxy).encode()) + search_query_cache_key_hash = hashlib.sha1(b) + search_query_cache_key_hash = search_query_cache_key_hash.hexdigest() + + return search_query_cache_key_hash diff --git a/arches/app/search/elasticsearch_dsl_builder.py b/arches/app/search/elasticsearch_dsl_builder.py index 9338e6a4a5b..156a49ebe7c 100644 --- a/arches/app/search/elasticsearch_dsl_builder.py +++ b/arches/app/search/elasticsearch_dsl_builder.py @@ -505,6 +505,7 @@ def __init__(self, **kwargs): self.script = kwargs.pop("script", None) self.type = kwargs.pop("type", None) self.size = kwargs.pop("size", None) + self.filter = kwargs.pop("filter", None) # Extract 'filter' from kwargs if self.field is not None and self.script is not None: raise AggregationDSLException( @@ -517,23 +518,34 @@ def __init__(self, **kwargs): if self.type is None: raise AggregationDSLException(_("You need to specify an aggregation type")) - self.agg = {self.name: {self.type: {}}} + # Initialize the aggregation dictionary + self.agg = {self.name: {}} - if self.field is not None: - self.agg[self.name][self.type]["field"] = self.field - elif self.script is not None: - self.agg[self.name][self.type]["script"] = self.script + if self.type == "filter": + if self.filter is None: + raise AggregationDSLException( + _("You need to specify 'filter' for a filter aggregation") + ) + # For filter aggregation, place the filter content directly + self.agg[self.name][self.type] = self.filter + else: + self.agg[self.name][self.type] = {} - self.set_size(self.size) + if self.field is not None: + self.agg[self.name][self.type]["field"] = self.field + elif self.script is not None: + self.agg[self.name][self.type]["script"] = self.script - for key in kwargs: - self.agg[self.name][self.type][key] = kwargs.get(key, None) + self.set_size(self.size) + + # Set other keyword arguments + for key in kwargs: + self.agg[self.name][self.type][key] = kwargs.get(key, None) def add_aggregation(self, agg=None): if agg is not None: if "aggs" not in self.agg[self.name]: self.agg[self.name]["aggs"] = {} - self.agg[self.name]["aggs"][agg.name] = agg.agg[agg.name] def set_size(self, size): @@ -559,6 +571,18 @@ def __init__(self, **kwargs): self.agg[self.name][self.type]["precision"] = self.precision +class GeoTileGridAgg(Aggregation): + """ + https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-geotilegrid-aggregation.html + + """ + + def __init__(self, **kwargs): + self.precision = kwargs.get("precision", 5) + super(GeoTileGridAgg, self).__init__(type="geotile_grid", **kwargs) + self.agg[self.name][self.type]["precision"] = self.precision + + class GeoBoundsAgg(Aggregation): """ https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-geohashgrid-aggregation.html @@ -701,16 +725,14 @@ class NestedAgg(Aggregation): """ def __init__(self, **kwargs): - self.aggregation = kwargs.pop("agg", {}) + # self.aggregation = kwargs.pop("agg", {}) self.path = kwargs.pop("path", None) if self.path is None: raise NestedAggDSLException( _("You need to specify a path for your nested aggregation") ) - super(NestedAgg, self).__init__(type="nested", path=self.path, **kwargs) - - if self.name: - self.agg[self.name]["aggs"] = self.aggregation + super(NestedAgg, self).__init__(type="nested", **kwargs) + self.agg[self.name][self.type]["path"] = self.path class NestedAggDSLException(Exception): diff --git a/arches/app/views/search_layer.py b/arches/app/views/search_layer.py new file mode 100644 index 00000000000..1687e5eb6ca --- /dev/null +++ b/arches/app/views/search_layer.py @@ -0,0 +1,249 @@ +import math +from django.views import View + +from django.core.cache import caches +from arches.app.models.system_settings import settings +from django.utils.translation import gettext as _ + +from arches.app.search.search_engine_factory import SearchEngineFactory +from arches.app.search.elasticsearch_dsl_builder import ( + Query, + Bool, + GeoShape, + Nested, + GeoTileGridAgg, + NestedAgg, + Aggregation, +) + +# from django.db import connection +from django.http import Http404, HttpResponse +from arches.app.utils.betterJSONSerializer import JSONDeserializer +from pprint import pprint + +# from django.contrib.gis.geos import Polygon +from datetime import datetime, timedelta +from time import time +import mercantile +import mapbox_vector_tile + +ZOOM_THRESHOLD = 14 +EXTENT = 4096 + + +class SearchLayer(View): + def get(self, request, zoom, x, y): + start = time() + print(f"ZOOM: {zoom}") + searchid = request.GET.get("searchid", None) + if not searchid: + print("NO SEARCHID FOUND ON REQUEST") + raise Http404(_("Missing 'searchid' query parameter.")) + + EARTHCIRCUM = 40075016.6856 + PIXELSPERTILE = 256 + cache = caches["default"] + pit_id = cache.get(searchid + "_pit") + query_dsl = cache.get(searchid + "_dsl") + # pprint(query_dsl) + # {"pit_id": pit_id, "dsl": query.dsl} + if pit_id is None or query_dsl is None: + print(f"no resourceids found in cache for searchid: {searchid}") + raise Http404(_("Missing resourceids from search cache.")) + + se = SearchEngineFactory().create() + query_dsl = JSONDeserializer().deserialize(query_dsl, indent=4) + new_query = Query(se, limit=0) + new_query.prepare() + new_query.dsl = query_dsl + # spatial_query = Bool() + # if int(y) == 203: + # print("\n\n\nwhats my new query\n\n\n") + # pprint(new_query.__str__()) + tile_x = int(x) + tile_y = int(y) + tile_z = int(zoom) + tile_bounds = mercantile.bounds(tile_x, tile_y, tile_z) + bbox = ( + tile_bounds.west, + tile_bounds.south, + tile_bounds.east, + tile_bounds.north, + ) + geo_bbox_query = { + "geo_bounding_box": { + "points.point": { + "top_left": {"lat": tile_bounds.north, "lon": tile_bounds.west}, + "bottom_right": {"lat": tile_bounds.south, "lon": tile_bounds.east}, + } + } + } + + if int(zoom) < ZOOM_THRESHOLD: + + geotile_agg = GeoTileGridAgg( + precision=int(zoom), field="points.point", size=10000 + ) + centroid_agg = Aggregation( + type="geo_centroid", name="centroid", field="points.point" + ) + geotile_agg.add_aggregation(centroid_agg) + nested_agg = NestedAgg(path="points", name="geo_aggs") + nested_agg.add_aggregation(geotile_agg) + + # Build the filter aggregation + geo_filter_agg = Aggregation( + type="filter", + name="geo_filter", + filter=Nested(path="points", query=geo_bbox_query).dsl, + ) + + # Add the geotile_grid aggregation under the filter aggregation + geo_filter_agg.add_aggregation(geotile_agg) + + # Update the nested aggregation + nested_agg = NestedAgg(path="points", name="geo_aggs") + nested_agg.add_aggregation(geo_filter_agg) + new_query.add_aggregation(nested_agg) + + # pit doesn't allow scroll context or index + new_query.dsl["source_includes"] = [] + new_query.dsl["size"] = 0 + # if int(y) == 203: + # pprint(new_query.dsl) + results = se.es.search( + pit={"id": pit_id, "keep_alive": "5m"}, _source=False, **new_query.dsl + ) + elapsed = time() - start + # print( + # "_______Time to finish search_layer search 1 (total: {0}) = {1}".format(results["hits"]["total"]["value"], timedelta(seconds=elapsed)) + # ) + # print("search done") + # print(results["hits"]["total"]) + # pprint(results) + features = [] + buckets = results["aggregations"]["geo_aggs"]["geo_filter"]["zoomed_grid"][ + "buckets" + ] + # print(f"Number of buckets: {len(buckets)}") + + for bucket in buckets: + centroid = bucket["centroid"]["location"] + lon = centroid["lon"] + lat = centroid["lat"] + doc_count = bucket["doc_count"] + # px, py = lnglat_to_tile_px(lon, lat, tile_x, tile_y, tile_z, EXTENT) + + feature = { + "geometry": {"type": "Point", "coordinates": [lon, lat]}, + "properties": {"count": doc_count}, + } + + features.append(feature) + + layers = [ + { + "name": "clusters", # Layer name + "features": features, + "version": 2, + "extent": EXTENT, + } + ] + else: + # Fetch individual features + # Add the spatial filter to the query + points_spatial_query = Nested(path="points", query=geo_bbox_query) + # new_query.add_query(spatial_query) + + geometries_spatial_query = Nested(path="geometries", query=geo_bbox_query) + spatial_bool_query = Bool() + spatial_bool_query.should(points_spatial_query) + spatial_bool_query.should(geometries_spatial_query) + new_query.add_query(spatial_bool_query) + + new_query.dsl["size"] = 10000 + + new_query.include("points.point") + new_query.include("geometries.geom") + # new_query.include("resourceinstanceid") + # Add other fields if needed + + # Execute the search + results = se.es.search( + pit={"id": pit_id, "keep_alive": "5m"}, **new_query.dsl + ) + + # Process the hits to generate features + features = [] + point_features = [] + geometry_features = [] + + for hit in results["hits"]["hits"]: + source = hit["_source"] + resource_id = hit.get("_id") + + # Handle points + points = source.get("points", []) + for point in points: + point_geom = point.get("point") + if point_geom: + lon = point_geom.get("lon") + lat = point_geom.get("lat") + if lon and lat: + feature = { + "geometry": { + "type": "Point", + "coordinates": [lon, lat], + }, + "properties": { + "resourceinstanceid": resource_id, + "count": 1, + }, + } + point_features.append(feature) + geometries = source.get("geometries", []) + for geometry in geometries: + geom = geometry.get("geom") + if geom: + geom_type = geom.get("type") + coordinates = geom.get("coordinates") + if coordinates: + feature = { + "geometry": { + "type": geom_type, + "coordinates": coordinates, + }, + "properties": {"resourceinstanceid": resource_id}, + } + pprint(feature) + geometry_features.append(feature) + + # Build layers + layers = [] + + if point_features: + point_layer = { + "name": "points", + "features": point_features, + "version": 2, + "extent": EXTENT, + } + layers.append(point_layer) + + if geometry_features: + geometry_layer = { + "name": "geometries", + "features": geometry_features, + "version": 2, + "extent": EXTENT, + } + layers.append(geometry_layer) + + tile = mapbox_vector_tile.encode( + layers, quantize_bounds=bbox, y_coord_down=True, extents=EXTENT + ) + return HttpResponse(tile, content_type="application/vnd.mapbox-vector-tile") + + +def create_searchlayer_mvt_cache_key(searchid_hash, zoom, x, y, user): + return f"searchlayer_mvt_{searchid_hash}_{zoom}_{x}_{y}_{user}" diff --git a/arches/urls.py b/arches/urls.py index e2772c1999c..29fb817f887 100644 --- a/arches/urls.py +++ b/arches/urls.py @@ -56,6 +56,7 @@ ResourceActivityStreamPageView, ResourceActivityStreamCollectionView, ) +from arches.app.views.search_layer import SearchLayer from arches.app.views.plugin import PluginView from arches.app.views.workflow_history import WorkflowHistoryView from arches.app.views.concept import RDMView @@ -675,6 +676,11 @@ api.MVT.as_view(), name="mvt", ), + path( + "search-layer///.pbf", + SearchLayer.as_view(), + name="search_layer", + ), re_path(r"^images$", api.Images.as_view(), name="images"), re_path( r"^ontology_properties$",